]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/common/unormcmp.cpp
ICU-64243.0.1.tar.gz
[apple/icu.git] / icuSources / common / unormcmp.cpp
index 83fbc58ecf9d6d5395f8cf1aa3959fcfa29df22b..689b0b53b2d5f503b8c56b3174d0318b34abf684 100644 (file)
@@ -1,12 +1,14 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2001-2010, International Business Machines
+*   Copyright (C) 2001-2014, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
 *   file name:  unormcmp.cpp
-*   encoding:   US-ASCII
+*   encoding:   UTF-8
 *   tab size:   8 (not used)
 *   indentation:4
 *
@@ -32,8 +34,6 @@
 
 U_NAMESPACE_USE
 
-#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
-
 /* compare canonically equivalent ------------------------------------------- */
 
 /*
@@ -145,7 +145,6 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
                    uint32_t options,
                    UErrorCode *pErrorCode) {
     const Normalizer2Impl *nfcImpl;
-    const UCaseProps *csp;
 
     /* current-level start/limit - s1/s2 as current */
     const UChar *start1, *start2, *limit1, *limit2;
@@ -183,11 +182,6 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
     } else {
         nfcImpl=NULL;
     }
-    if((options&U_COMPARE_IGNORE_CASE)!=0) {
-        csp=ucase_getSingleton();
-    } else {
-        csp=NULL;
-    }
     if(U_FAILURE(*pErrorCode)) {
         return 0;
     }
@@ -233,10 +227,10 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
                 /* reached end of level buffer, pop one level */
                 do {
                     --level1;
-                    start1=stack1[level1].start;
+                    start1=stack1[level1].start;    /*Not uninitialized*/
                 } while(start1==NULL);
-                s1=stack1[level1].s;
-                limit1=stack1[level1].limit;
+                s1=stack1[level1].s;                /*Not uninitialized*/
+                limit1=stack1[level1].limit;        /*Not uninitialized*/
             }
         }
 
@@ -256,10 +250,10 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
                 /* reached end of level buffer, pop one level */
                 do {
                     --level2;
-                    start2=stack2[level2].start;
+                    start2=stack2[level2].start;    /*Not uninitialized*/
                 } while(start2==NULL);
-                s2=stack2[level2].s;
-                limit2=stack2[level2].limit;
+                s2=stack2[level2].s;                /*Not uninitialized*/
+                limit2=stack2[level2].limit;        /*Not uninitialized*/
             }
         }
 
@@ -319,7 +313,7 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
          */
 
         if( level1==0 && (options&U_COMPARE_IGNORE_CASE) &&
-            (length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0
+            (length=ucase_toFullFolding((UChar32)cp1, &p, options))>=0
         ) {
             /* cp1 case-folds to the code point "length" or to p[length] */
             if(U_IS_SURROGATE(c1)) {
@@ -364,7 +358,7 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
         }
 
         if( level2==0 && (options&U_COMPARE_IGNORE_CASE) &&
-            (length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0
+            (length=ucase_toFullFolding((UChar32)cp2, &p, options))>=0
         ) {
             /* cp2 case-folds to the code point "length" or to p[length] */
             if(U_IS_SURROGATE(c2)) {
@@ -534,6 +528,35 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
     }
 }
 
+static
+UBool _normalize(const Normalizer2 *n2, const UChar *s, int32_t length,
+                UnicodeString &normalized, UErrorCode *pErrorCode) {
+    UnicodeString str(length<0, s, length);
+
+    // check if s fulfill the conditions
+    int32_t spanQCYes=n2->spanQuickCheckYes(str, *pErrorCode);
+    if (U_FAILURE(*pErrorCode)) {
+        return FALSE;
+    }
+    /*
+     * ICU 2.4 had a further optimization:
+     * If both strings were not in FCD, then they were both NFD'ed,
+     * and the _COMPARE_EQUIV option was turned off.
+     * It is not entirely clear that this is valid with the current
+     * definition of the canonical caseless match.
+     * Therefore, ICU 2.6 removes that optimization.
+     */
+    if(spanQCYes<str.length()) {
+        UnicodeString unnormalized=str.tempSubString(spanQCYes);
+        normalized.setTo(FALSE, str.getBuffer(), spanQCYes);
+        n2->normalizeSecondAndAppend(normalized, unnormalized, *pErrorCode);
+        if (U_SUCCESS(*pErrorCode)) {
+            return TRUE;
+        }
+    }
+    return FALSE;
+}
+
 U_CAPI int32_t U_EXPORT2
 unorm_compare(const UChar *s1, int32_t length1,
               const UChar *s2, int32_t length2,
@@ -576,7 +599,7 @@ unorm_compare(const UChar *s1, int32_t length1,
     if(!(options&UNORM_INPUT_IS_FCD) || (options&U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
         const Normalizer2 *n2;
         if(options&U_FOLD_CASE_EXCLUDE_SPECIAL_I) {
-            n2=Normalizer2Factory::getNFDInstance(*pErrorCode);
+            n2=Normalizer2::getNFDInstance(*pErrorCode);
         } else {
             n2=Normalizer2Factory::getFCDInstance(*pErrorCode);
         }
@@ -584,48 +607,26 @@ unorm_compare(const UChar *s1, int32_t length1,
             return 0;
         }
 
-        // check if s1 and/or s2 fulfill the FCD conditions
-        const UnicodeSet *uni32;
         if(normOptions&UNORM_UNICODE_3_2) {
-            uni32=uniset_getUnicode32Instance(*pErrorCode);
+            const UnicodeSet *uni32=uniset_getUnicode32Instance(*pErrorCode);
+            FilteredNormalizer2 fn2(*n2, *uni32);
+            if(_normalize(&fn2, s1, length1, fcd1, pErrorCode)) {
+                s1=fcd1.getBuffer();
+                length1=fcd1.length();
+            }
+            if(_normalize(&fn2, s2, length2, fcd2, pErrorCode)) {
+                s2=fcd2.getBuffer();
+                length2=fcd2.length();
+            }
         } else {
-            uni32=NULL;  // unused
-        }
-        FilteredNormalizer2 fn2(*n2, *uni32);
-        if(normOptions&UNORM_UNICODE_3_2) {
-            n2=&fn2;
-        }
-
-        UnicodeString str1(length1<0, s1, length1);
-        UnicodeString str2(length2<0, s2, length2);
-        int32_t spanQCYes1=n2->spanQuickCheckYes(str1, *pErrorCode);
-        int32_t spanQCYes2=n2->spanQuickCheckYes(str2, *pErrorCode);
-        if(U_FAILURE(*pErrorCode)) {
-            return 0;
-        }
-
-        /*
-         * ICU 2.4 had a further optimization:
-         * If both strings were not in FCD, then they were both NFD'ed,
-         * and the _COMPARE_EQUIV option was turned off.
-         * It is not entirely clear that this is valid with the current
-         * definition of the canonical caseless match.
-         * Therefore, ICU 2.6 removes that optimization.
-         */
-
-        if(spanQCYes1<str1.length()) {
-            UnicodeString unnormalized=str1.tempSubString(spanQCYes1);
-            fcd1.setTo(FALSE, str1.getBuffer(), spanQCYes1);
-            n2->normalizeSecondAndAppend(fcd1, unnormalized, *pErrorCode);
-            s1=fcd1.getBuffer();
-            length1=fcd1.length();
-        }
-        if(spanQCYes2<str2.length()) {
-            UnicodeString unnormalized=str2.tempSubString(spanQCYes2);
-            fcd2.setTo(FALSE, str2.getBuffer(), spanQCYes2);
-            n2->normalizeSecondAndAppend(fcd2, unnormalized, *pErrorCode);
-            s2=fcd2.getBuffer();
-            length2=fcd2.length();
+            if(_normalize(n2, s1, length1, fcd1, pErrorCode)) {
+                s1=fcd1.getBuffer();
+                length1=fcd1.length();
+            }
+            if(_normalize(n2, s2, length2, fcd2, pErrorCode)) {
+                s2=fcd2.getBuffer();
+                length2=fcd2.length();
+            }
         }
     }