X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/729e4ab9bc6618bc3d8a898e575df7f4019e29ca..ef6cf650f4a75c3f97de06b51fa104f2069b9ea2:/icuSources/common/uts46.cpp?ds=inline diff --git a/icuSources/common/uts46.cpp b/icuSources/common/uts46.cpp index e08ee356..13a1f246 100644 --- a/icuSources/common/uts46.cpp +++ b/icuSources/common/uts46.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************* -* Copyright (C) 2010, International Business Machines +* Copyright (C) 2010-2015, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: uts46.cpp @@ -18,14 +18,15 @@ #include "unicode/idna.h" #include "unicode/normalizer2.h" +#include "unicode/uscript.h" #include "unicode/ustring.h" +#include "unicode/utf16.h" #include "cmemory.h" #include "cstring.h" #include "punycode.h" +#include "ubidi_props.h" #include "ustr_imp.h" -#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) - // Note about tests for UIDNA_ERROR_DOMAIN_NAME_TOO_LONG: // // The domain name length limit is 255 octets in an internal DNS representation @@ -64,6 +65,8 @@ isASCIIOkBiDi(const char *s, int32_t length); // IDNA class default implementations -------------------------------------- *** +IDNA::~IDNA() {} + void IDNA::labelToASCII_UTF8(const StringPiece &label, ByteSink &dest, IDNAInfo &info, UErrorCode &errorCode) const { @@ -104,8 +107,6 @@ IDNA::nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest, } } -UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(IDNA) - // UTS46 class declaration ------------------------------------------------- *** class UTS46 : public IDNA { @@ -179,7 +180,7 @@ private: int32_t markBadACELabel(UnicodeString &dest, int32_t labelStart, int32_t labelLength, - UBool toASCII, IDNAInfo &info) const; + UBool toASCII, IDNAInfo &info, UErrorCode &errorCode) const; void checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) const; @@ -187,6 +188,9 @@ private: UBool isLabelOkContextJ(const UChar *label, int32_t labelLength) const; + void + checkLabelContextO(const UChar *label, int32_t labelLength, IDNAInfo &info) const; + const Normalizer2 &uts46Norm2; // uts46.nrm uint32_t options; }; @@ -315,9 +319,7 @@ UTS46::process(const UnicodeString &src, info.reset(); int32_t srcLength=src.length(); if(srcLength==0) { - if(toASCII) { - info.errors|=UIDNA_ERROR_EMPTY_LABEL; - } + info.errors|=UIDNA_ERROR_EMPTY_LABEL; return dest; } UChar *destArray=dest.getBuffer(srcLength); @@ -375,13 +377,11 @@ UTS46::process(const UnicodeString &src, ++i; // '.' was copied to dest already break; } - if(toASCII) { - // Permit an empty label at the end but not elsewhere. - if(i==labelStart && i<(srcLength-1)) { - info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; - } else if((i-labelStart)>63) { - info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; - } + if(i==labelStart) { + info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; + } + if(toASCII && (i-labelStart)>63) { + info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; } info.errors|=info.labelErrors; info.labelErrors=0; @@ -417,9 +417,7 @@ UTS46::processUTF8(const StringPiece &src, // Arguments are fine, reset output values. info.reset(); if(srcLength==0) { - if(toASCII) { - info.errors|=UIDNA_ERROR_EMPTY_LABEL; - } + info.errors|=UIDNA_ERROR_EMPTY_LABEL; dest.Flush(); return; } @@ -430,7 +428,7 @@ UTS46::processUTF8(const StringPiece &src, char stackArray[256]; int32_t destCapacity; char *destArray=dest.GetAppendBuffer(srcLength, srcLength+20, - stackArray, LENGTHOF(stackArray), &destCapacity); + stackArray, UPRV_LENGTHOF(stackArray), &destCapacity); UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; int32_t i; for(i=0;; ++i) { @@ -477,13 +475,11 @@ UTS46::processUTF8(const StringPiece &src, if(isLabel) { break; // Replacing with U+FFFD can be complicated for toASCII. } - if(toASCII) { - // Permit an empty label at the end but not elsewhere. - if(i==labelStart && i<(srcLength-1)) { - info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; - } else if((i-labelStart)>63) { - info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; - } + if(i==labelStart) { + info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; + } + if(toASCII && (i-labelStart)>63) { + info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; } info.errors|=info.labelErrors; info.labelErrors=0; @@ -591,6 +587,9 @@ UTS46::processUnicode(const UnicodeString &src, int32_t UTS46::mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart, UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return 0; + } int32_t length=dest.length(); UChar *s=dest.getBuffer(dest[mappingStart]==0xdf ? length+1 : length); if(s==NULL) { @@ -648,6 +647,9 @@ UTS46::mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart uts46Norm2.normalize(dest.tempSubString(labelStart), normalized, errorCode); if(U_SUCCESS(errorCode)) { dest.replace(labelStart, 0x7fffffff, normalized); + if(dest.isBogus()) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + } return dest.length(); } } @@ -669,9 +671,16 @@ isNonASCIIDisallowedSTD3Valid(UChar32 c) { // Returns labelLength (= the new label length). static int32_t replaceLabel(UnicodeString &dest, int32_t destLabelStart, int32_t destLabelLength, - const UnicodeString &label, int32_t labelLength) { + const UnicodeString &label, int32_t labelLength, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return 0; + } if(&label!=&dest) { dest.replace(destLabelStart, destLabelLength, label); + if(dest.isBogus()) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return 0; + } } return labelLength; } @@ -681,6 +690,9 @@ UTS46::processLabel(UnicodeString &dest, int32_t labelStart, int32_t labelLength, UBool toASCII, IDNAInfo &info, UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return 0; + } UnicodeString fromPunycode; UnicodeString *labelString; const UChar *label=dest.getBuffer()+labelStart; @@ -715,7 +727,7 @@ UTS46::processLabel(UnicodeString &dest, fromPunycode.releaseBuffer(unicodeLength); if(U_FAILURE(punycodeErrorCode)) { info.labelErrors|=UIDNA_ERROR_PUNYCODE; - return markBadACELabel(dest, labelStart, labelLength, toASCII, info); + return markBadACELabel(dest, labelStart, labelLength, toASCII, info, errorCode); } // Check for NFC, and for characters that are not // valid or deviation characters according to the normalizer. @@ -730,7 +742,7 @@ UTS46::processLabel(UnicodeString &dest, } if(!isValid) { info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; - return markBadACELabel(dest, labelStart, labelLength, toASCII, info); + return markBadACELabel(dest, labelStart, labelLength, toASCII, info, errorCode); } labelString=&fromPunycode; label=fromPunycode.getBuffer(); @@ -742,10 +754,9 @@ UTS46::processLabel(UnicodeString &dest, } // Validity check if(labelLength==0) { - if(toASCII) { - info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; - } - return replaceLabel(dest, destLabelStart, destLabelLength, *labelString, labelLength); + info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; + return replaceLabel(dest, destLabelStart, destLabelLength, + *labelString, labelLength, errorCode); } // labelLength>0 if(labelLength>=4 && label[2]==0x2d && label[3]==0x2d) { @@ -821,6 +832,9 @@ UTS46::processLabel(UnicodeString &dest, ) { info.labelErrors|=UIDNA_ERROR_CONTEXTJ; } + if((options&UIDNA_CHECK_CONTEXTO)!=0 && oredChars>=0xb7) { + checkLabelContextO(label, labelLength, info); + } if(toASCII) { if(wasPunycode) { // Leave a Punycode label unchanged if it has no severe errors. @@ -864,7 +878,7 @@ UTS46::processLabel(UnicodeString &dest, info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; } return replaceLabel(dest, destLabelStart, destLabelLength, - punycode, punycodeLength); + punycode, punycodeLength, errorCode); } else { // all-ASCII label if(labelLength>63) { @@ -877,10 +891,11 @@ UTS46::processLabel(UnicodeString &dest, // then leave it but make sure it does not look valid. if(wasPunycode) { info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; - return markBadACELabel(dest, destLabelStart, destLabelLength, toASCII, info); + return markBadACELabel(dest, destLabelStart, destLabelLength, toASCII, info, errorCode); } } - return replaceLabel(dest, destLabelStart, destLabelLength, *labelString, labelLength); + return replaceLabel(dest, destLabelStart, destLabelLength, + *labelString, labelLength, errorCode); } // Make sure an ACE label does not look valid. @@ -889,7 +904,10 @@ UTS46::processLabel(UnicodeString &dest, int32_t UTS46::markBadACELabel(UnicodeString &dest, int32_t labelStart, int32_t labelLength, - UBool toASCII, IDNAInfo &info) const { + UBool toASCII, IDNAInfo &info, UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return 0; + } UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; UBool isASCII=TRUE; UBool onlyLDH=TRUE; @@ -917,6 +935,10 @@ UTS46::markBadACELabel(UnicodeString &dest, } while(++s63) { @@ -1102,6 +1124,7 @@ isASCIIOkBiDi(const char *s, int32_t length) { UBool UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const { + const UBiDiProps *bdp=ubidi_getSingleton(); // [IDNA2008-Tables] // 200C..200D ; CONTEXTJ # ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER for(int32_t i=0; i0) { + info.labelErrors|=UIDNA_ERROR_CONTEXTO_DIGITS; + } + arabicDigits=-1; + } else if(0x6f0<=c) { + if(arabicDigits<0) { + info.labelErrors|=UIDNA_ERROR_CONTEXTO_DIGITS; + } + arabicDigits=1; + } + } + } else if(c==0x30fb) { + // Appendix A.7. KATAKANA MIDDLE DOT (U+30FB) + // Rule Set: + // False; + // For All Characters: + // If Script(cp) .in. {Hiragana, Katakana, Han} Then True; + // End For; + UErrorCode errorCode=U_ZERO_ERROR; + for(int j=0;;) { + if(j>labelEnd) { + info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION; + break; + } + U16_NEXT(label, j, labelLength, c); + UScriptCode script=uscript_getScript(c, &errorCode); + if(script==USCRIPT_HIRAGANA || script==USCRIPT_KATAKANA || script==USCRIPT_HAN) { + break; + } + } + } + } +} + U_NAMESPACE_END // C API ------------------------------------------------------------------- *** U_NAMESPACE_USE -U_DRAFT UIDNA * U_EXPORT2 +U_CAPI UIDNA * U_EXPORT2 uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode) { return reinterpret_cast(IDNA::createUTS46Instance(options, *pErrorCode)); } -U_DRAFT void U_EXPORT2 +U_CAPI void U_EXPORT2 uidna_close(UIDNA *idna) { delete reinterpret_cast(idna); } @@ -1215,7 +1341,7 @@ idnaInfoToStruct(IDNAInfo &info, UIDNAInfo *pInfo) { pInfo->errors=info.getErrors(); } -U_DRAFT int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 uidna_labelToASCII(const UIDNA *idna, const UChar *label, int32_t length, UChar *dest, int32_t capacity, @@ -1231,7 +1357,7 @@ uidna_labelToASCII(const UIDNA *idna, return destString.extract(dest, capacity, *pErrorCode); } -U_DRAFT int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 uidna_labelToUnicode(const UIDNA *idna, const UChar *label, int32_t length, UChar *dest, int32_t capacity, @@ -1247,7 +1373,7 @@ uidna_labelToUnicode(const UIDNA *idna, return destString.extract(dest, capacity, *pErrorCode); } -U_DRAFT int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 uidna_nameToASCII(const UIDNA *idna, const UChar *name, int32_t length, UChar *dest, int32_t capacity, @@ -1263,7 +1389,7 @@ uidna_nameToASCII(const UIDNA *idna, return destString.extract(dest, capacity, *pErrorCode); } -U_DRAFT int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 uidna_nameToUnicode(const UIDNA *idna, const UChar *name, int32_t length, UChar *dest, int32_t capacity, @@ -1279,7 +1405,7 @@ uidna_nameToUnicode(const UIDNA *idna, return destString.extract(dest, capacity, *pErrorCode); } -U_DRAFT int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 uidna_labelToASCII_UTF8(const UIDNA *idna, const char *label, int32_t length, char *dest, int32_t capacity, @@ -1295,7 +1421,7 @@ uidna_labelToASCII_UTF8(const UIDNA *idna, return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); } -U_DRAFT int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 uidna_labelToUnicodeUTF8(const UIDNA *idna, const char *label, int32_t length, char *dest, int32_t capacity, @@ -1311,7 +1437,7 @@ uidna_labelToUnicodeUTF8(const UIDNA *idna, return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); } -U_DRAFT int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 uidna_nameToASCII_UTF8(const UIDNA *idna, const char *name, int32_t length, char *dest, int32_t capacity, @@ -1327,7 +1453,7 @@ uidna_nameToASCII_UTF8(const UIDNA *idna, return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); } -U_DRAFT int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 uidna_nameToUnicodeUTF8(const UIDNA *idna, const char *name, int32_t length, char *dest, int32_t capacity,