X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/729e4ab9bc6618bc3d8a898e575df7f4019e29ca..c5116b9f5a666b9d59f443b3770acd6ef64dc6c3:/icuSources/common/unormcmp.cpp diff --git a/icuSources/common/unormcmp.cpp b/icuSources/common/unormcmp.cpp index 83fbc58e..689b0b53 100644 --- a/icuSources/common/unormcmp.cpp +++ b/icuSources/common/unormcmp.cpp @@ -1,12 +1,14 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * -* Copyright (C) 2001-2010, International Business Machines +* Copyright (C) 2001-2014, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: unormcmp.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -32,8 +34,6 @@ U_NAMESPACE_USE -#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) - /* compare canonically equivalent ------------------------------------------- */ /* @@ -145,7 +145,6 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1, uint32_t options, UErrorCode *pErrorCode) { const Normalizer2Impl *nfcImpl; - const UCaseProps *csp; /* current-level start/limit - s1/s2 as current */ const UChar *start1, *start2, *limit1, *limit2; @@ -183,11 +182,6 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1, } else { nfcImpl=NULL; } - if((options&U_COMPARE_IGNORE_CASE)!=0) { - csp=ucase_getSingleton(); - } else { - csp=NULL; - } if(U_FAILURE(*pErrorCode)) { return 0; } @@ -233,10 +227,10 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1, /* reached end of level buffer, pop one level */ do { --level1; - start1=stack1[level1].start; + start1=stack1[level1].start; /*Not uninitialized*/ } while(start1==NULL); - s1=stack1[level1].s; - limit1=stack1[level1].limit; + s1=stack1[level1].s; /*Not uninitialized*/ + limit1=stack1[level1].limit; /*Not uninitialized*/ } } @@ -256,10 +250,10 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1, /* reached end of level buffer, pop one level */ do { --level2; - start2=stack2[level2].start; + start2=stack2[level2].start; /*Not uninitialized*/ } while(start2==NULL); - s2=stack2[level2].s; - limit2=stack2[level2].limit; + s2=stack2[level2].s; /*Not uninitialized*/ + limit2=stack2[level2].limit; /*Not uninitialized*/ } } @@ -319,7 +313,7 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1, */ if( level1==0 && (options&U_COMPARE_IGNORE_CASE) && - (length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0 + (length=ucase_toFullFolding((UChar32)cp1, &p, options))>=0 ) { /* cp1 case-folds to the code point "length" or to p[length] */ if(U_IS_SURROGATE(c1)) { @@ -364,7 +358,7 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1, } if( level2==0 && (options&U_COMPARE_IGNORE_CASE) && - (length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0 + (length=ucase_toFullFolding((UChar32)cp2, &p, options))>=0 ) { /* cp2 case-folds to the code point "length" or to p[length] */ if(U_IS_SURROGATE(c2)) { @@ -534,6 +528,35 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1, } } +static +UBool _normalize(const Normalizer2 *n2, const UChar *s, int32_t length, + UnicodeString &normalized, UErrorCode *pErrorCode) { + UnicodeString str(length<0, s, length); + + // check if s fulfill the conditions + int32_t spanQCYes=n2->spanQuickCheckYes(str, *pErrorCode); + if (U_FAILURE(*pErrorCode)) { + return FALSE; + } + /* + * ICU 2.4 had a further optimization: + * If both strings were not in FCD, then they were both NFD'ed, + * and the _COMPARE_EQUIV option was turned off. + * It is not entirely clear that this is valid with the current + * definition of the canonical caseless match. + * Therefore, ICU 2.6 removes that optimization. + */ + if(spanQCYesnormalizeSecondAndAppend(normalized, unnormalized, *pErrorCode); + if (U_SUCCESS(*pErrorCode)) { + return TRUE; + } + } + return FALSE; +} + U_CAPI int32_t U_EXPORT2 unorm_compare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, @@ -576,7 +599,7 @@ unorm_compare(const UChar *s1, int32_t length1, if(!(options&UNORM_INPUT_IS_FCD) || (options&U_FOLD_CASE_EXCLUDE_SPECIAL_I)) { const Normalizer2 *n2; if(options&U_FOLD_CASE_EXCLUDE_SPECIAL_I) { - n2=Normalizer2Factory::getNFDInstance(*pErrorCode); + n2=Normalizer2::getNFDInstance(*pErrorCode); } else { n2=Normalizer2Factory::getFCDInstance(*pErrorCode); } @@ -584,48 +607,26 @@ unorm_compare(const UChar *s1, int32_t length1, return 0; } - // check if s1 and/or s2 fulfill the FCD conditions - const UnicodeSet *uni32; if(normOptions&UNORM_UNICODE_3_2) { - uni32=uniset_getUnicode32Instance(*pErrorCode); + const UnicodeSet *uni32=uniset_getUnicode32Instance(*pErrorCode); + FilteredNormalizer2 fn2(*n2, *uni32); + if(_normalize(&fn2, s1, length1, fcd1, pErrorCode)) { + s1=fcd1.getBuffer(); + length1=fcd1.length(); + } + if(_normalize(&fn2, s2, length2, fcd2, pErrorCode)) { + s2=fcd2.getBuffer(); + length2=fcd2.length(); + } } else { - uni32=NULL; // unused - } - FilteredNormalizer2 fn2(*n2, *uni32); - if(normOptions&UNORM_UNICODE_3_2) { - n2=&fn2; - } - - UnicodeString str1(length1<0, s1, length1); - UnicodeString str2(length2<0, s2, length2); - int32_t spanQCYes1=n2->spanQuickCheckYes(str1, *pErrorCode); - int32_t spanQCYes2=n2->spanQuickCheckYes(str2, *pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return 0; - } - - /* - * ICU 2.4 had a further optimization: - * If both strings were not in FCD, then they were both NFD'ed, - * and the _COMPARE_EQUIV option was turned off. - * It is not entirely clear that this is valid with the current - * definition of the canonical caseless match. - * Therefore, ICU 2.6 removes that optimization. - */ - - if(spanQCYes1normalizeSecondAndAppend(fcd1, unnormalized, *pErrorCode); - s1=fcd1.getBuffer(); - length1=fcd1.length(); - } - if(spanQCYes2normalizeSecondAndAppend(fcd2, unnormalized, *pErrorCode); - s2=fcd2.getBuffer(); - length2=fcd2.length(); + if(_normalize(n2, s1, length1, fcd1, pErrorCode)) { + s1=fcd1.getBuffer(); + length1=fcd1.length(); + } + if(_normalize(n2, s2, length2, fcd2, pErrorCode)) { + s2=fcd2.getBuffer(); + length2=fcd2.length(); + } } }