]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/common/uts46.cpp
ICU-57132.0.1.tar.gz
[apple/icu.git] / icuSources / common / uts46.cpp
index 70a97845e150b2cee689b3e724840f98a1572ad6..13a1f246c21cc91ee67a2aa2bafe76db85c5bd14 100644 (file)
@@ -1,6 +1,6 @@
 /*
 *******************************************************************************
 /*
 *******************************************************************************
-*   Copyright (C) 2010-2011, International Business Machines
+*   Copyright (C) 2010-2015, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *******************************************************************************
 *   file name:  uts46.cpp
 *   Corporation and others.  All Rights Reserved.
 *******************************************************************************
 *   file name:  uts46.cpp
@@ -27,8 +27,6 @@
 #include "ubidi_props.h"
 #include "ustr_imp.h"
 
 #include "ubidi_props.h"
 #include "ustr_imp.h"
 
-#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
-
 // Note about tests for UIDNA_ERROR_DOMAIN_NAME_TOO_LONG:
 //
 // The domain name length limit is 255 octets in an internal DNS representation
 // Note about tests for UIDNA_ERROR_DOMAIN_NAME_TOO_LONG:
 //
 // The domain name length limit is 255 octets in an internal DNS representation
@@ -109,8 +107,6 @@ IDNA::nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest,
     }
 }
 
     }
 }
 
-UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(IDNA)
-
 // UTS46 class declaration ------------------------------------------------- ***
 
 class UTS46 : public IDNA {
 // UTS46 class declaration ------------------------------------------------- ***
 
 class UTS46 : public IDNA {
@@ -184,7 +180,7 @@ private:
     int32_t
     markBadACELabel(UnicodeString &dest,
                     int32_t labelStart, int32_t labelLength,
     int32_t
     markBadACELabel(UnicodeString &dest,
                     int32_t labelStart, int32_t labelLength,
-                    UBool toASCII, IDNAInfo &info) const;
+                    UBool toASCII, IDNAInfo &info, UErrorCode &errorCode) const;
 
     void
     checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) const;
 
     void
     checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) const;
@@ -323,9 +319,7 @@ UTS46::process(const UnicodeString &src,
     info.reset();
     int32_t srcLength=src.length();
     if(srcLength==0) {
     info.reset();
     int32_t srcLength=src.length();
     if(srcLength==0) {
-        if(toASCII) {
-            info.errors|=UIDNA_ERROR_EMPTY_LABEL;
-        }
+        info.errors|=UIDNA_ERROR_EMPTY_LABEL;
         return dest;
     }
     UChar *destArray=dest.getBuffer(srcLength);
         return dest;
     }
     UChar *destArray=dest.getBuffer(srcLength);
@@ -383,13 +377,11 @@ UTS46::process(const UnicodeString &src,
                     ++i;  // '.' was copied to dest already
                     break;
                 }
                     ++i;  // '.' was copied to dest already
                     break;
                 }
-                if(toASCII) {
-                    // Permit an empty label at the end but not elsewhere.
-                    if(i==labelStart && i<(srcLength-1)) {
-                        info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL;
-                    } else if((i-labelStart)>63) {
-                        info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
-                    }
+                if(i==labelStart) {
+                    info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL;
+                }
+                if(toASCII && (i-labelStart)>63) {
+                    info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
                 }
                 info.errors|=info.labelErrors;
                 info.labelErrors=0;
                 }
                 info.errors|=info.labelErrors;
                 info.labelErrors=0;
@@ -425,9 +417,7 @@ UTS46::processUTF8(const StringPiece &src,
     // Arguments are fine, reset output values.
     info.reset();
     if(srcLength==0) {
     // Arguments are fine, reset output values.
     info.reset();
     if(srcLength==0) {
-        if(toASCII) {
-            info.errors|=UIDNA_ERROR_EMPTY_LABEL;
-        }
+        info.errors|=UIDNA_ERROR_EMPTY_LABEL;
         dest.Flush();
         return;
     }
         dest.Flush();
         return;
     }
@@ -438,7 +428,7 @@ UTS46::processUTF8(const StringPiece &src,
         char stackArray[256];
         int32_t destCapacity;
         char *destArray=dest.GetAppendBuffer(srcLength, srcLength+20,
         char stackArray[256];
         int32_t destCapacity;
         char *destArray=dest.GetAppendBuffer(srcLength, srcLength+20,
-                                             stackArray, LENGTHOF(stackArray), &destCapacity);
+                                             stackArray, UPRV_LENGTHOF(stackArray), &destCapacity);
         UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0;
         int32_t i;
         for(i=0;; ++i) {
         UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0;
         int32_t i;
         for(i=0;; ++i) {
@@ -485,13 +475,11 @@ UTS46::processUTF8(const StringPiece &src,
                     if(isLabel) {
                         break;  // Replacing with U+FFFD can be complicated for toASCII.
                     }
                     if(isLabel) {
                         break;  // Replacing with U+FFFD can be complicated for toASCII.
                     }
-                    if(toASCII) {
-                        // Permit an empty label at the end but not elsewhere.
-                        if(i==labelStart && i<(srcLength-1)) {
-                            info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL;
-                        } else if((i-labelStart)>63) {
-                            info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
-                        }
+                    if(i==labelStart) {
+                        info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL;
+                    }
+                    if(toASCII && (i-labelStart)>63) {
+                        info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
                     }
                     info.errors|=info.labelErrors;
                     info.labelErrors=0;
                     }
                     info.errors|=info.labelErrors;
                     info.labelErrors=0;
@@ -599,6 +587,9 @@ UTS46::processUnicode(const UnicodeString &src,
 int32_t
 UTS46::mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart,
                    UErrorCode &errorCode) const {
 int32_t
 UTS46::mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart,
                    UErrorCode &errorCode) const {
+    if(U_FAILURE(errorCode)) {
+        return 0;
+    }
     int32_t length=dest.length();
     UChar *s=dest.getBuffer(dest[mappingStart]==0xdf ? length+1 : length);
     if(s==NULL) {
     int32_t length=dest.length();
     UChar *s=dest.getBuffer(dest[mappingStart]==0xdf ? length+1 : length);
     if(s==NULL) {
@@ -656,6 +647,9 @@ UTS46::mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart
         uts46Norm2.normalize(dest.tempSubString(labelStart), normalized, errorCode);
         if(U_SUCCESS(errorCode)) {
             dest.replace(labelStart, 0x7fffffff, normalized);
         uts46Norm2.normalize(dest.tempSubString(labelStart), normalized, errorCode);
         if(U_SUCCESS(errorCode)) {
             dest.replace(labelStart, 0x7fffffff, normalized);
+            if(dest.isBogus()) {
+                errorCode=U_MEMORY_ALLOCATION_ERROR;
+            }
             return dest.length();
         }
     }
             return dest.length();
         }
     }
@@ -677,9 +671,16 @@ isNonASCIIDisallowedSTD3Valid(UChar32 c) {
 // Returns labelLength (= the new label length).
 static int32_t
 replaceLabel(UnicodeString &dest, int32_t destLabelStart, int32_t destLabelLength,
 // Returns labelLength (= the new label length).
 static int32_t
 replaceLabel(UnicodeString &dest, int32_t destLabelStart, int32_t destLabelLength,
-             const UnicodeString &label, int32_t labelLength) {
+             const UnicodeString &label, int32_t labelLength, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return 0;
+    }
     if(&label!=&dest) {
         dest.replace(destLabelStart, destLabelLength, label);
     if(&label!=&dest) {
         dest.replace(destLabelStart, destLabelLength, label);
+        if(dest.isBogus()) {
+            errorCode=U_MEMORY_ALLOCATION_ERROR;
+            return 0;
+        }
     }
     return labelLength;
 }
     }
     return labelLength;
 }
@@ -689,6 +690,9 @@ UTS46::processLabel(UnicodeString &dest,
                     int32_t labelStart, int32_t labelLength,
                     UBool toASCII,
                     IDNAInfo &info, UErrorCode &errorCode) const {
                     int32_t labelStart, int32_t labelLength,
                     UBool toASCII,
                     IDNAInfo &info, UErrorCode &errorCode) const {
+    if(U_FAILURE(errorCode)) {
+        return 0;
+    }
     UnicodeString fromPunycode;
     UnicodeString *labelString;
     const UChar *label=dest.getBuffer()+labelStart;
     UnicodeString fromPunycode;
     UnicodeString *labelString;
     const UChar *label=dest.getBuffer()+labelStart;
@@ -723,7 +727,7 @@ UTS46::processLabel(UnicodeString &dest,
         fromPunycode.releaseBuffer(unicodeLength);
         if(U_FAILURE(punycodeErrorCode)) {
             info.labelErrors|=UIDNA_ERROR_PUNYCODE;
         fromPunycode.releaseBuffer(unicodeLength);
         if(U_FAILURE(punycodeErrorCode)) {
             info.labelErrors|=UIDNA_ERROR_PUNYCODE;
-            return markBadACELabel(dest, labelStart, labelLength, toASCII, info);
+            return markBadACELabel(dest, labelStart, labelLength, toASCII, info, errorCode);
         }
         // Check for NFC, and for characters that are not
         // valid or deviation characters according to the normalizer.
         }
         // Check for NFC, and for characters that are not
         // valid or deviation characters according to the normalizer.
@@ -738,7 +742,7 @@ UTS46::processLabel(UnicodeString &dest,
         }
         if(!isValid) {
             info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL;
         }
         if(!isValid) {
             info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL;
-            return markBadACELabel(dest, labelStart, labelLength, toASCII, info);
+            return markBadACELabel(dest, labelStart, labelLength, toASCII, info, errorCode);
         }
         labelString=&fromPunycode;
         label=fromPunycode.getBuffer();
         }
         labelString=&fromPunycode;
         label=fromPunycode.getBuffer();
@@ -750,10 +754,9 @@ UTS46::processLabel(UnicodeString &dest,
     }
     // Validity check
     if(labelLength==0) {
     }
     // Validity check
     if(labelLength==0) {
-        if(toASCII) {
-            info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL;
-        }
-        return replaceLabel(dest, destLabelStart, destLabelLength, *labelString, labelLength);
+        info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL;
+        return replaceLabel(dest, destLabelStart, destLabelLength,
+                            *labelString, labelLength, errorCode);
     }
     // labelLength>0
     if(labelLength>=4 && label[2]==0x2d && label[3]==0x2d) {
     }
     // labelLength>0
     if(labelLength>=4 && label[2]==0x2d && label[3]==0x2d) {
@@ -875,7 +878,7 @@ UTS46::processLabel(UnicodeString &dest,
                     info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
                 }
                 return replaceLabel(dest, destLabelStart, destLabelLength,
                     info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
                 }
                 return replaceLabel(dest, destLabelStart, destLabelLength,
-                                    punycode, punycodeLength);
+                                    punycode, punycodeLength, errorCode);
             } else {
                 // all-ASCII label
                 if(labelLength>63) {
             } else {
                 // all-ASCII label
                 if(labelLength>63) {
@@ -888,10 +891,11 @@ UTS46::processLabel(UnicodeString &dest,
         // then leave it but make sure it does not look valid.
         if(wasPunycode) {
             info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL;
         // then leave it but make sure it does not look valid.
         if(wasPunycode) {
             info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL;
-            return markBadACELabel(dest, destLabelStart, destLabelLength, toASCII, info);
+            return markBadACELabel(dest, destLabelStart, destLabelLength, toASCII, info, errorCode);
         }
     }
         }
     }
-    return replaceLabel(dest, destLabelStart, destLabelLength, *labelString, labelLength);
+    return replaceLabel(dest, destLabelStart, destLabelLength,
+                        *labelString, labelLength, errorCode);
 }
 
 // Make sure an ACE label does not look valid.
 }
 
 // Make sure an ACE label does not look valid.
@@ -900,7 +904,10 @@ UTS46::processLabel(UnicodeString &dest,
 int32_t
 UTS46::markBadACELabel(UnicodeString &dest,
                        int32_t labelStart, int32_t labelLength,
 int32_t
 UTS46::markBadACELabel(UnicodeString &dest,
                        int32_t labelStart, int32_t labelLength,
-                       UBool toASCII, IDNAInfo &info) const {
+                       UBool toASCII, IDNAInfo &info, UErrorCode &errorCode) const {
+    if(U_FAILURE(errorCode)) {
+        return 0;
+    }
     UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0;
     UBool isASCII=TRUE;
     UBool onlyLDH=TRUE;
     UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0;
     UBool isASCII=TRUE;
     UBool onlyLDH=TRUE;
@@ -928,6 +935,10 @@ UTS46::markBadACELabel(UnicodeString &dest,
     } while(++s<limit);
     if(onlyLDH) {
         dest.insert(labelStart+labelLength, (UChar)0xfffd);
     } while(++s<limit);
     if(onlyLDH) {
         dest.insert(labelStart+labelLength, (UChar)0xfffd);
+        if(dest.isBogus()) {
+            errorCode=U_MEMORY_ALLOCATION_ERROR;
+            return 0;
+        }
         ++labelLength;
     } else {
         if(toASCII && isASCII && labelLength>63) {
         ++labelLength;
     } else {
         if(toASCII && isASCII && labelLength>63) {
@@ -1290,12 +1301,12 @@ U_NAMESPACE_END
 
 U_NAMESPACE_USE
 
 
 U_NAMESPACE_USE
 
-U_DRAFT UIDNA * U_EXPORT2
+U_CAPI UIDNA * U_EXPORT2
 uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode) {
     return reinterpret_cast<UIDNA *>(IDNA::createUTS46Instance(options, *pErrorCode));
 }
 
 uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode) {
     return reinterpret_cast<UIDNA *>(IDNA::createUTS46Instance(options, *pErrorCode));
 }
 
-U_DRAFT void U_EXPORT2
+U_CAPI void U_EXPORT2
 uidna_close(UIDNA *idna) {
     delete reinterpret_cast<IDNA *>(idna);
 }
 uidna_close(UIDNA *idna) {
     delete reinterpret_cast<IDNA *>(idna);
 }
@@ -1330,7 +1341,7 @@ idnaInfoToStruct(IDNAInfo &info, UIDNAInfo *pInfo) {
     pInfo->errors=info.getErrors();
 }
 
     pInfo->errors=info.getErrors();
 }
 
-U_DRAFT int32_t U_EXPORT2
+U_CAPI int32_t U_EXPORT2
 uidna_labelToASCII(const UIDNA *idna,
                    const UChar *label, int32_t length,
                    UChar *dest, int32_t capacity,
 uidna_labelToASCII(const UIDNA *idna,
                    const UChar *label, int32_t length,
                    UChar *dest, int32_t capacity,
@@ -1346,7 +1357,7 @@ uidna_labelToASCII(const UIDNA *idna,
     return destString.extract(dest, capacity, *pErrorCode);
 }
 
     return destString.extract(dest, capacity, *pErrorCode);
 }
 
-U_DRAFT int32_t U_EXPORT2
+U_CAPI int32_t U_EXPORT2
 uidna_labelToUnicode(const UIDNA *idna,
                      const UChar *label, int32_t length,
                      UChar *dest, int32_t capacity,
 uidna_labelToUnicode(const UIDNA *idna,
                      const UChar *label, int32_t length,
                      UChar *dest, int32_t capacity,
@@ -1362,7 +1373,7 @@ uidna_labelToUnicode(const UIDNA *idna,
     return destString.extract(dest, capacity, *pErrorCode);
 }
 
     return destString.extract(dest, capacity, *pErrorCode);
 }
 
-U_DRAFT int32_t U_EXPORT2
+U_CAPI int32_t U_EXPORT2
 uidna_nameToASCII(const UIDNA *idna,
                   const UChar *name, int32_t length,
                   UChar *dest, int32_t capacity,
 uidna_nameToASCII(const UIDNA *idna,
                   const UChar *name, int32_t length,
                   UChar *dest, int32_t capacity,
@@ -1378,7 +1389,7 @@ uidna_nameToASCII(const UIDNA *idna,
     return destString.extract(dest, capacity, *pErrorCode);
 }
 
     return destString.extract(dest, capacity, *pErrorCode);
 }
 
-U_DRAFT int32_t U_EXPORT2
+U_CAPI int32_t U_EXPORT2
 uidna_nameToUnicode(const UIDNA *idna,
                     const UChar *name, int32_t length,
                     UChar *dest, int32_t capacity,
 uidna_nameToUnicode(const UIDNA *idna,
                     const UChar *name, int32_t length,
                     UChar *dest, int32_t capacity,
@@ -1394,7 +1405,7 @@ uidna_nameToUnicode(const UIDNA *idna,
     return destString.extract(dest, capacity, *pErrorCode);
 }
 
     return destString.extract(dest, capacity, *pErrorCode);
 }
 
-U_DRAFT int32_t U_EXPORT2
+U_CAPI int32_t U_EXPORT2
 uidna_labelToASCII_UTF8(const UIDNA *idna,
                         const char *label, int32_t length,
                         char *dest, int32_t capacity,
 uidna_labelToASCII_UTF8(const UIDNA *idna,
                         const char *label, int32_t length,
                         char *dest, int32_t capacity,
@@ -1410,7 +1421,7 @@ uidna_labelToASCII_UTF8(const UIDNA *idna,
     return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode);
 }
 
     return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode);
 }
 
-U_DRAFT int32_t U_EXPORT2
+U_CAPI int32_t U_EXPORT2
 uidna_labelToUnicodeUTF8(const UIDNA *idna,
                          const char *label, int32_t length,
                          char *dest, int32_t capacity,
 uidna_labelToUnicodeUTF8(const UIDNA *idna,
                          const char *label, int32_t length,
                          char *dest, int32_t capacity,
@@ -1426,7 +1437,7 @@ uidna_labelToUnicodeUTF8(const UIDNA *idna,
     return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode);
 }
 
     return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode);
 }
 
-U_DRAFT int32_t U_EXPORT2
+U_CAPI int32_t U_EXPORT2
 uidna_nameToASCII_UTF8(const UIDNA *idna,
                        const char *name, int32_t length,
                        char *dest, int32_t capacity,
 uidna_nameToASCII_UTF8(const UIDNA *idna,
                        const char *name, int32_t length,
                        char *dest, int32_t capacity,
@@ -1442,7 +1453,7 @@ uidna_nameToASCII_UTF8(const UIDNA *idna,
     return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode);
 }
 
     return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode);
 }
 
-U_DRAFT int32_t U_EXPORT2
+U_CAPI int32_t U_EXPORT2
 uidna_nameToUnicodeUTF8(const UIDNA *idna,
                         const char *name, int32_t length,
                         char *dest, int32_t capacity,
 uidna_nameToUnicodeUTF8(const UIDNA *idna,
                         const char *name, int32_t length,
                         char *dest, int32_t capacity,