X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/51004dcb01e06fef634b61be77ed73dd61cb6db9..cecc3f9394f261e71def48cf396d137687dbd0a7:/icuSources/common/uts46.cpp diff --git a/icuSources/common/uts46.cpp b/icuSources/common/uts46.cpp index 8e6c8ff1..5a23572e 100644 --- a/icuSources/common/uts46.cpp +++ b/icuSources/common/uts46.cpp @@ -1,10 +1,12 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* -* Copyright (C) 2010-2012, International Business Machines +* Copyright (C) 2010-2015, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: uts46.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -27,8 +29,6 @@ #include "ubidi_props.h" #include "ustr_imp.h" -#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) - // Note about tests for UIDNA_ERROR_DOMAIN_NAME_TOO_LONG: // // The domain name length limit is 255 octets in an internal DNS representation @@ -70,7 +70,7 @@ isASCIIOkBiDi(const char *s, int32_t length); IDNA::~IDNA() {} void -IDNA::labelToASCII_UTF8(const StringPiece &label, ByteSink &dest, +IDNA::labelToASCII_UTF8(StringPiece label, ByteSink &dest, IDNAInfo &info, UErrorCode &errorCode) const { if(U_SUCCESS(errorCode)) { UnicodeString destString; @@ -80,7 +80,7 @@ IDNA::labelToASCII_UTF8(const StringPiece &label, ByteSink &dest, } void -IDNA::labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest, +IDNA::labelToUnicodeUTF8(StringPiece label, ByteSink &dest, IDNAInfo &info, UErrorCode &errorCode) const { if(U_SUCCESS(errorCode)) { UnicodeString destString; @@ -90,7 +90,7 @@ IDNA::labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest, } void -IDNA::nameToASCII_UTF8(const StringPiece &name, ByteSink &dest, +IDNA::nameToASCII_UTF8(StringPiece name, ByteSink &dest, IDNAInfo &info, UErrorCode &errorCode) const { if(U_SUCCESS(errorCode)) { UnicodeString destString; @@ -100,7 +100,7 @@ IDNA::nameToASCII_UTF8(const StringPiece &name, ByteSink &dest, } void -IDNA::nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest, +IDNA::nameToUnicodeUTF8(StringPiece name, ByteSink &dest, IDNAInfo &info, UErrorCode &errorCode) const { if(U_SUCCESS(errorCode)) { UnicodeString destString; @@ -133,19 +133,19 @@ public: IDNAInfo &info, UErrorCode &errorCode) const; virtual void - labelToASCII_UTF8(const StringPiece &label, ByteSink &dest, + labelToASCII_UTF8(StringPiece label, ByteSink &dest, IDNAInfo &info, UErrorCode &errorCode) const; virtual void - labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest, + labelToUnicodeUTF8(StringPiece label, ByteSink &dest, IDNAInfo &info, UErrorCode &errorCode) const; virtual void - nameToASCII_UTF8(const StringPiece &name, ByteSink &dest, + nameToASCII_UTF8(StringPiece name, ByteSink &dest, IDNAInfo &info, UErrorCode &errorCode) const; virtual void - nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest, + nameToUnicodeUTF8(StringPiece name, ByteSink &dest, IDNAInfo &info, UErrorCode &errorCode) const; private: @@ -156,7 +156,7 @@ private: IDNAInfo &info, UErrorCode &errorCode) const; void - processUTF8(const StringPiece &src, + processUTF8(StringPiece src, UBool isLabel, UBool toASCII, ByteSink &dest, IDNAInfo &info, UErrorCode &errorCode) const; @@ -182,7 +182,7 @@ private: int32_t markBadACELabel(UnicodeString &dest, int32_t labelStart, int32_t labelLength, - UBool toASCII, IDNAInfo &info) const; + UBool toASCII, IDNAInfo &info, UErrorCode &errorCode) const; void checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) const; @@ -253,25 +253,25 @@ UTS46::nameToUnicode(const UnicodeString &name, UnicodeString &dest, } void -UTS46::labelToASCII_UTF8(const StringPiece &label, ByteSink &dest, +UTS46::labelToASCII_UTF8(StringPiece label, ByteSink &dest, IDNAInfo &info, UErrorCode &errorCode) const { processUTF8(label, TRUE, TRUE, dest, info, errorCode); } void -UTS46::labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest, +UTS46::labelToUnicodeUTF8(StringPiece label, ByteSink &dest, IDNAInfo &info, UErrorCode &errorCode) const { processUTF8(label, TRUE, FALSE, dest, info, errorCode); } void -UTS46::nameToASCII_UTF8(const StringPiece &name, ByteSink &dest, +UTS46::nameToASCII_UTF8(StringPiece name, ByteSink &dest, IDNAInfo &info, UErrorCode &errorCode) const { processUTF8(name, FALSE, TRUE, dest, info, errorCode); } void -UTS46::nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest, +UTS46::nameToUnicodeUTF8(StringPiece name, ByteSink &dest, IDNAInfo &info, UErrorCode &errorCode) const { processUTF8(name, FALSE, FALSE, dest, info, errorCode); } @@ -321,9 +321,7 @@ UTS46::process(const UnicodeString &src, info.reset(); int32_t srcLength=src.length(); if(srcLength==0) { - if(toASCII) { - info.errors|=UIDNA_ERROR_EMPTY_LABEL; - } + info.errors|=UIDNA_ERROR_EMPTY_LABEL; return dest; } UChar *destArray=dest.getBuffer(srcLength); @@ -381,13 +379,11 @@ UTS46::process(const UnicodeString &src, ++i; // '.' was copied to dest already break; } - if(toASCII) { - // Permit an empty label at the end but not elsewhere. - if(i==labelStart && i<(srcLength-1)) { - info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; - } else if((i-labelStart)>63) { - info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; - } + if(i==labelStart) { + info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; + } + if(toASCII && (i-labelStart)>63) { + info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; } info.errors|=info.labelErrors; info.labelErrors=0; @@ -407,7 +403,7 @@ UTS46::process(const UnicodeString &src, } void -UTS46::processUTF8(const StringPiece &src, +UTS46::processUTF8(StringPiece src, UBool isLabel, UBool toASCII, ByteSink &dest, IDNAInfo &info, UErrorCode &errorCode) const { @@ -423,9 +419,7 @@ UTS46::processUTF8(const StringPiece &src, // Arguments are fine, reset output values. info.reset(); if(srcLength==0) { - if(toASCII) { - info.errors|=UIDNA_ERROR_EMPTY_LABEL; - } + info.errors|=UIDNA_ERROR_EMPTY_LABEL; dest.Flush(); return; } @@ -436,7 +430,7 @@ UTS46::processUTF8(const StringPiece &src, char stackArray[256]; int32_t destCapacity; char *destArray=dest.GetAppendBuffer(srcLength, srcLength+20, - stackArray, LENGTHOF(stackArray), &destCapacity); + stackArray, UPRV_LENGTHOF(stackArray), &destCapacity); UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; int32_t i; for(i=0;; ++i) { @@ -483,13 +477,11 @@ UTS46::processUTF8(const StringPiece &src, if(isLabel) { break; // Replacing with U+FFFD can be complicated for toASCII. } - if(toASCII) { - // Permit an empty label at the end but not elsewhere. - if(i==labelStart && i<(srcLength-1)) { - info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; - } else if((i-labelStart)>63) { - info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; - } + if(i==labelStart) { + info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; + } + if(toASCII && (i-labelStart)>63) { + info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; } info.errors|=info.labelErrors; info.labelErrors=0; @@ -597,6 +589,9 @@ UTS46::processUnicode(const UnicodeString &src, int32_t UTS46::mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart, UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return 0; + } int32_t length=dest.length(); UChar *s=dest.getBuffer(dest[mappingStart]==0xdf ? length+1 : length); if(s==NULL) { @@ -654,6 +649,9 @@ UTS46::mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart uts46Norm2.normalize(dest.tempSubString(labelStart), normalized, errorCode); if(U_SUCCESS(errorCode)) { dest.replace(labelStart, 0x7fffffff, normalized); + if(dest.isBogus()) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + } return dest.length(); } } @@ -675,9 +673,16 @@ isNonASCIIDisallowedSTD3Valid(UChar32 c) { // Returns labelLength (= the new label length). static int32_t replaceLabel(UnicodeString &dest, int32_t destLabelStart, int32_t destLabelLength, - const UnicodeString &label, int32_t labelLength) { + const UnicodeString &label, int32_t labelLength, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return 0; + } if(&label!=&dest) { dest.replace(destLabelStart, destLabelLength, label); + if(dest.isBogus()) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return 0; + } } return labelLength; } @@ -687,6 +692,9 @@ UTS46::processLabel(UnicodeString &dest, int32_t labelStart, int32_t labelLength, UBool toASCII, IDNAInfo &info, UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return 0; + } UnicodeString fromPunycode; UnicodeString *labelString; const UChar *label=dest.getBuffer()+labelStart; @@ -721,7 +729,7 @@ UTS46::processLabel(UnicodeString &dest, fromPunycode.releaseBuffer(unicodeLength); if(U_FAILURE(punycodeErrorCode)) { info.labelErrors|=UIDNA_ERROR_PUNYCODE; - return markBadACELabel(dest, labelStart, labelLength, toASCII, info); + return markBadACELabel(dest, labelStart, labelLength, toASCII, info, errorCode); } // Check for NFC, and for characters that are not // valid or deviation characters according to the normalizer. @@ -736,7 +744,7 @@ UTS46::processLabel(UnicodeString &dest, } if(!isValid) { info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; - return markBadACELabel(dest, labelStart, labelLength, toASCII, info); + return markBadACELabel(dest, labelStart, labelLength, toASCII, info, errorCode); } labelString=&fromPunycode; label=fromPunycode.getBuffer(); @@ -748,10 +756,9 @@ UTS46::processLabel(UnicodeString &dest, } // Validity check if(labelLength==0) { - if(toASCII) { - info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; - } - return replaceLabel(dest, destLabelStart, destLabelLength, *labelString, labelLength); + info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; + return replaceLabel(dest, destLabelStart, destLabelLength, + *labelString, labelLength, errorCode); } // labelLength>0 if(labelLength>=4 && label[2]==0x2d && label[3]==0x2d) { @@ -873,7 +880,7 @@ UTS46::processLabel(UnicodeString &dest, info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; } return replaceLabel(dest, destLabelStart, destLabelLength, - punycode, punycodeLength); + punycode, punycodeLength, errorCode); } else { // all-ASCII label if(labelLength>63) { @@ -886,10 +893,11 @@ UTS46::processLabel(UnicodeString &dest, // then leave it but make sure it does not look valid. if(wasPunycode) { info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; - return markBadACELabel(dest, destLabelStart, destLabelLength, toASCII, info); + return markBadACELabel(dest, destLabelStart, destLabelLength, toASCII, info, errorCode); } } - return replaceLabel(dest, destLabelStart, destLabelLength, *labelString, labelLength); + return replaceLabel(dest, destLabelStart, destLabelLength, + *labelString, labelLength, errorCode); } // Make sure an ACE label does not look valid. @@ -898,7 +906,10 @@ UTS46::processLabel(UnicodeString &dest, int32_t UTS46::markBadACELabel(UnicodeString &dest, int32_t labelStart, int32_t labelLength, - UBool toASCII, IDNAInfo &info) const { + UBool toASCII, IDNAInfo &info, UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return 0; + } UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; UBool isASCII=TRUE; UBool onlyLDH=TRUE; @@ -926,6 +937,10 @@ UTS46::markBadACELabel(UnicodeString &dest, } while(++s63) { @@ -1000,8 +1015,8 @@ UTS46::checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) c ) { info.isOkBiDi=FALSE; } - // Get the directionalities of the intervening characters. - uint32_t mask=0; + // Add the directionalities of the intervening characters. + uint32_t mask=firstMask|lastMask; while(i(uprv_strlen(label)) : length); CheckedArrayByteSink sink(dest, capacity); IDNAInfo info; reinterpret_cast(idna)->labelToASCII_UTF8(src, sink, info, *pErrorCode); @@ -1416,7 +1430,7 @@ uidna_labelToUnicodeUTF8(const UIDNA *idna, if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { return 0; } - StringPiece src(label, length<0 ? uprv_strlen(label) : length); + StringPiece src(label, length<0 ? static_cast(uprv_strlen(label)) : length); CheckedArrayByteSink sink(dest, capacity); IDNAInfo info; reinterpret_cast(idna)->labelToUnicodeUTF8(src, sink, info, *pErrorCode); @@ -1432,7 +1446,7 @@ uidna_nameToASCII_UTF8(const UIDNA *idna, if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { return 0; } - StringPiece src(name, length<0 ? uprv_strlen(name) : length); + StringPiece src(name, length<0 ? static_cast(uprv_strlen(name)) : length); CheckedArrayByteSink sink(dest, capacity); IDNAInfo info; reinterpret_cast(idna)->nameToASCII_UTF8(src, sink, info, *pErrorCode); @@ -1448,7 +1462,7 @@ uidna_nameToUnicodeUTF8(const UIDNA *idna, if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { return 0; } - StringPiece src(name, length<0 ? uprv_strlen(name) : length); + StringPiece src(name, length<0 ? static_cast(uprv_strlen(name)) : length); CheckedArrayByteSink sink(dest, capacity); IDNAInfo info; reinterpret_cast(idna)->nameToUnicodeUTF8(src, sink, info, *pErrorCode);