+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
-* Copyright (C) 2001-2010, International Business Machines
+* Copyright (C) 2001-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: unormcmp.cpp
-* encoding: US-ASCII
+* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
U_NAMESPACE_USE
-#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
-
/* compare canonically equivalent ------------------------------------------- */
/*
uint32_t options,
UErrorCode *pErrorCode) {
const Normalizer2Impl *nfcImpl;
- const UCaseProps *csp;
/* current-level start/limit - s1/s2 as current */
const UChar *start1, *start2, *limit1, *limit2;
} else {
nfcImpl=NULL;
}
- if((options&U_COMPARE_IGNORE_CASE)!=0) {
- csp=ucase_getSingleton();
- } else {
- csp=NULL;
- }
if(U_FAILURE(*pErrorCode)) {
return 0;
}
/* reached end of level buffer, pop one level */
do {
--level1;
- start1=stack1[level1].start;
+ start1=stack1[level1].start; /*Not uninitialized*/
} while(start1==NULL);
- s1=stack1[level1].s;
- limit1=stack1[level1].limit;
+ s1=stack1[level1].s; /*Not uninitialized*/
+ limit1=stack1[level1].limit; /*Not uninitialized*/
}
}
/* reached end of level buffer, pop one level */
do {
--level2;
- start2=stack2[level2].start;
+ start2=stack2[level2].start; /*Not uninitialized*/
} while(start2==NULL);
- s2=stack2[level2].s;
- limit2=stack2[level2].limit;
+ s2=stack2[level2].s; /*Not uninitialized*/
+ limit2=stack2[level2].limit; /*Not uninitialized*/
}
}
*/
if( level1==0 && (options&U_COMPARE_IGNORE_CASE) &&
- (length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0
+ (length=ucase_toFullFolding((UChar32)cp1, &p, options))>=0
) {
/* cp1 case-folds to the code point "length" or to p[length] */
if(U_IS_SURROGATE(c1)) {
}
if( level2==0 && (options&U_COMPARE_IGNORE_CASE) &&
- (length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0
+ (length=ucase_toFullFolding((UChar32)cp2, &p, options))>=0
) {
/* cp2 case-folds to the code point "length" or to p[length] */
if(U_IS_SURROGATE(c2)) {
}
}
+static
+UBool _normalize(const Normalizer2 *n2, const UChar *s, int32_t length,
+ UnicodeString &normalized, UErrorCode *pErrorCode) {
+ UnicodeString str(length<0, s, length);
+
+ // check if s fulfill the conditions
+ int32_t spanQCYes=n2->spanQuickCheckYes(str, *pErrorCode);
+ if (U_FAILURE(*pErrorCode)) {
+ return FALSE;
+ }
+ /*
+ * ICU 2.4 had a further optimization:
+ * If both strings were not in FCD, then they were both NFD'ed,
+ * and the _COMPARE_EQUIV option was turned off.
+ * It is not entirely clear that this is valid with the current
+ * definition of the canonical caseless match.
+ * Therefore, ICU 2.6 removes that optimization.
+ */
+ if(spanQCYes<str.length()) {
+ UnicodeString unnormalized=str.tempSubString(spanQCYes);
+ normalized.setTo(FALSE, str.getBuffer(), spanQCYes);
+ n2->normalizeSecondAndAppend(normalized, unnormalized, *pErrorCode);
+ if (U_SUCCESS(*pErrorCode)) {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
U_CAPI int32_t U_EXPORT2
unorm_compare(const UChar *s1, int32_t length1,
const UChar *s2, int32_t length2,
if(!(options&UNORM_INPUT_IS_FCD) || (options&U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
const Normalizer2 *n2;
if(options&U_FOLD_CASE_EXCLUDE_SPECIAL_I) {
- n2=Normalizer2Factory::getNFDInstance(*pErrorCode);
+ n2=Normalizer2::getNFDInstance(*pErrorCode);
} else {
n2=Normalizer2Factory::getFCDInstance(*pErrorCode);
}
return 0;
}
- // check if s1 and/or s2 fulfill the FCD conditions
- const UnicodeSet *uni32;
if(normOptions&UNORM_UNICODE_3_2) {
- uni32=uniset_getUnicode32Instance(*pErrorCode);
+ const UnicodeSet *uni32=uniset_getUnicode32Instance(*pErrorCode);
+ FilteredNormalizer2 fn2(*n2, *uni32);
+ if(_normalize(&fn2, s1, length1, fcd1, pErrorCode)) {
+ s1=fcd1.getBuffer();
+ length1=fcd1.length();
+ }
+ if(_normalize(&fn2, s2, length2, fcd2, pErrorCode)) {
+ s2=fcd2.getBuffer();
+ length2=fcd2.length();
+ }
} else {
- uni32=NULL; // unused
- }
- FilteredNormalizer2 fn2(*n2, *uni32);
- if(normOptions&UNORM_UNICODE_3_2) {
- n2=&fn2;
- }
-
- UnicodeString str1(length1<0, s1, length1);
- UnicodeString str2(length2<0, s2, length2);
- int32_t spanQCYes1=n2->spanQuickCheckYes(str1, *pErrorCode);
- int32_t spanQCYes2=n2->spanQuickCheckYes(str2, *pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- /*
- * ICU 2.4 had a further optimization:
- * If both strings were not in FCD, then they were both NFD'ed,
- * and the _COMPARE_EQUIV option was turned off.
- * It is not entirely clear that this is valid with the current
- * definition of the canonical caseless match.
- * Therefore, ICU 2.6 removes that optimization.
- */
-
- if(spanQCYes1<str1.length()) {
- UnicodeString unnormalized=str1.tempSubString(spanQCYes1);
- fcd1.setTo(FALSE, str1.getBuffer(), spanQCYes1);
- n2->normalizeSecondAndAppend(fcd1, unnormalized, *pErrorCode);
- s1=fcd1.getBuffer();
- length1=fcd1.length();
- }
- if(spanQCYes2<str2.length()) {
- UnicodeString unnormalized=str2.tempSubString(spanQCYes2);
- fcd2.setTo(FALSE, str2.getBuffer(), spanQCYes2);
- n2->normalizeSecondAndAppend(fcd2, unnormalized, *pErrorCode);
- s2=fcd2.getBuffer();
- length2=fcd2.length();
+ if(_normalize(n2, s1, length1, fcd1, pErrorCode)) {
+ s1=fcd1.getBuffer();
+ length1=fcd1.length();
+ }
+ if(_normalize(n2, s2, length2, fcd2, pErrorCode)) {
+ s2=fcd2.getBuffer();
+ length2=fcd2.length();
+ }
}
}