X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b25be06635768807f8f693286fa73bb2297bb06c..4388f060552cc537e71e957d32f35e9d75a61233:/icuSources/common/ustrcase.c diff --git a/icuSources/common/ustrcase.c b/icuSources/common/ustrcase.c deleted file mode 100644 index 4b62fb95..00000000 --- a/icuSources/common/ustrcase.c +++ /dev/null @@ -1,987 +0,0 @@ -/* -******************************************************************************* -* -* Copyright (C) 2001-2010, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: ustrcase.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002feb20 -* created by: Markus W. Scherer -* -* Implementation file for string casing C API functions. -* Uses functions from uchar.c for basic functionality that requires access -* to the Unicode Character Database (uprops.dat). -*/ - -#include "unicode/utypes.h" -#include "unicode/uloc.h" -#include "unicode/ustring.h" -#include "unicode/ucasemap.h" -#include "unicode/ubrk.h" -#include "cmemory.h" -#include "ucase.h" -#include "ustr_imp.h" - -/* string casing ------------------------------------------------------------ */ - -/* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */ -static U_INLINE int32_t -appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity, - int32_t result, const UChar *s) { - UChar32 c; - int32_t length; - - /* decode the result */ - if(result<0) { - /* (not) original code point */ - c=~result; - length=-1; - } else if(result<=UCASE_MAX_STRING_LENGTH) { - c=U_SENTINEL; - length=result; - } else { - c=result; - length=-1; - } - - if(destIndex0) { - dest[destIndex++]=*s++; - --length; - } - } else { - /* overflow */ - destIndex+=length; - } - } - } else { - /* preflight */ - if(length<0) { - destIndex+=U16_LENGTH(c); - } else { - destIndex+=length; - } - } - return destIndex; -} - -static UChar32 U_CALLCONV -utf16_caseContextIterator(void *context, int8_t dir) { - UCaseContext *csc=(UCaseContext *)context; - UChar32 c; - - if(dir<0) { - /* reset for backward iteration */ - csc->index=csc->cpStart; - csc->dir=dir; - } else if(dir>0) { - /* reset for forward iteration */ - csc->index=csc->cpLimit; - csc->dir=dir; - } else { - /* continue current iteration direction */ - dir=csc->dir; - } - - if(dir<0) { - if(csc->startindex) { - U16_PREV((const UChar *)csc->p, csc->start, csc->index, c); - return c; - } - } else { - if(csc->indexlimit) { - U16_NEXT((const UChar *)csc->p, csc->index, csc->limit, c); - return c; - } - } - return U_SENTINEL; -} - -/* - * Case-maps [srcStart..srcLimit[ but takes - * context [0..srcLength[ into account. - */ -static int32_t -_caseMap(const UCaseMap *csm, UCaseMapFull *map, - UChar *dest, int32_t destCapacity, - const UChar *src, UCaseContext *csc, - int32_t srcStart, int32_t srcLimit, - UErrorCode *pErrorCode) { - const UChar *s; - UChar32 c, c2 = 0; - int32_t srcIndex, destIndex; - int32_t locCache; - - locCache=csm->locCache; - - /* case mapping loop */ - srcIndex=srcStart; - destIndex=0; - while(srcIndexcpStart=srcIndex; - U16_NEXT(src, srcIndex, srcLimit, c); - csc->cpLimit=srcIndex; - c=map(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &locCache); - if((destIndexdestCapacity) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - return destIndex; -} - -static void -setTempCaseMapLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) { - /* - * We could call ucasemap_setLocale(), but here we really only care about - * the initial language subtag, we need not return the real string via - * ucasemap_getLocale(), and we don't care about only getting "x" from - * "x-some-thing" etc. - * - * We ignore locales with a longer-than-3 initial subtag. - * - * We also do not fill in the locCache because it is rarely used, - * and not worth setting unless we reuse it for many case mapping operations. - * (That's why UCaseMap was created.) - */ - int i; - char c; - - /* the internal functions require locale!=NULL */ - if(locale==NULL) { - locale=uloc_getDefault(); - } - for(i=0; i<4 && (c=locale[i])!=0 && c!='-' && c!='_'; ++i) { - csm->locale[i]=c; - } - if(i<=3) { - csm->locale[i]=0; /* Up to 3 non-separator characters. */ - } else { - csm->locale[0]=0; /* Longer-than-3 initial subtag: Ignore. */ - } -} - -/* - * Set parameters on an empty UCaseMap, for UCaseMap-less API functions. - * Do this fast because it is called with every function call. - */ -static U_INLINE void -setTempCaseMap(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) { - if(csm->csp==NULL) { - csm->csp=ucase_getSingleton(); - } - if(locale!=NULL && locale[0]==0) { - csm->locale[0]=0; - } else { - setTempCaseMapLocale(csm, locale, pErrorCode); - } -} - -#if !UCONFIG_NO_BREAK_ITERATION - -/* - * Internal titlecasing function. - */ -static int32_t -_toTitle(UCaseMap *csm, - UChar *dest, int32_t destCapacity, - const UChar *src, UCaseContext *csc, - int32_t srcLength, - UErrorCode *pErrorCode) { - const UChar *s; - UChar32 c; - int32_t prev, titleStart, titleLimit, idx, destIndex, length; - UBool isFirstIndex; - - if(csm->iter!=NULL) { - ubrk_setText(csm->iter, src, srcLength, pErrorCode); - } else { - csm->iter=ubrk_open(UBRK_WORD, csm->locale, - src, srcLength, - pErrorCode); - } - if(U_FAILURE(*pErrorCode)) { - return 0; - } - - /* set up local variables */ - destIndex=0; - prev=0; - isFirstIndex=TRUE; - - /* titlecasing loop */ - while(previter); - } else { - idx=ubrk_next(csm->iter); - } - if(idx==UBRK_DONE || idx>srcLength) { - idx=srcLength; - } - - /* - * Unicode 4 & 5 section 3.13 Default Case Operations: - * - * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex - * #29, "Text Boundaries." Between each pair of word boundaries, find the first - * cased character F. If F exists, map F to default_title(F); then map each - * subsequent character C to default_lower(C). - * - * In this implementation, segment [prev..index[ into 3 parts: - * a) uncased characters (copy as-is) [prev..titleStart[ - * b) first case letter (titlecase) [titleStart..titleLimit[ - * c) subsequent characters (lowercase) [titleLimit..index[ - */ - if(prevoptions&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) { - /* Adjust the titlecasing index (titleStart) to the next cased character. */ - for(;;) { - titleStart=titleLimit; - if(titleLimit==idx) { - /* - * only uncased characters in [prev..index[ - * stop with titleStart==titleLimit==index - */ - break; - } - U16_NEXT(src, titleLimit, idx, c); - if(UCASE_NONE!=ucase_getType(csm->csp, c)) { - break; /* cased letter at [titleStart..titleLimit[ */ - } - } - length=titleStart-prev; - if(length>0) { - if((destIndex+length)<=destCapacity) { - uprv_memcpy(dest+destIndex, src+prev, length*U_SIZEOF_UCHAR); - } - destIndex+=length; - } - } - - if(titleStartcpStart=titleStart; - csc->cpLimit=titleLimit; - c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &csm->locCache); - destIndex=appendResult(dest, destIndex, destCapacity, c, s); - - /* Special case Dutch IJ titlecasing */ - if ( titleStart+1 < idx && - ucase_getCaseLocale(csm->locale,&csm->locCache) == UCASE_LOC_DUTCH && - ( src[titleStart] == (UChar32) 0x0049 || src[titleStart] == (UChar32) 0x0069 ) && - ( src[titleStart+1] == (UChar32) 0x004A || src[titleStart+1] == (UChar32) 0x006A )) { - c=(UChar32) 0x004A; - destIndex=appendResult(dest, destIndex, destCapacity, c, s); - titleLimit++; - } - - /* lowercase [titleLimit..index[ */ - if(titleLimitoptions&U_TITLECASE_NO_LOWERCASE)==0) { - /* Normal operation: Lowercase the rest of the word. */ - destIndex+= - _caseMap( - csm, ucase_toFullLower, - dest+destIndex, destCapacity-destIndex, - src, csc, - titleLimit, idx, - pErrorCode); - } else { - /* Optionally just copy the rest of the word unchanged. */ - length=idx-titleLimit; - if((destIndex+length)<=destCapacity) { - uprv_memcpy(dest+destIndex, src+titleLimit, length*U_SIZEOF_UCHAR); - } - destIndex+=length; - } - } - } - } - - prev=idx; - } - - if(destIndex>destCapacity) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - return destIndex; -} - -#endif - -/* functions available in the common library (for unistr_case.cpp) */ - -U_CFUNC int32_t -ustr_toLower(const UCaseProps *csp, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - const char *locale, - UErrorCode *pErrorCode) { - UCaseMap csm={ NULL }; - UCaseContext csc={ NULL }; - - csm.csp=csp; - setTempCaseMap(&csm, locale, pErrorCode); - csc.p=(void *)src; - csc.limit=srcLength; - - return _caseMap(&csm, ucase_toFullLower, - dest, destCapacity, - src, &csc, 0, srcLength, - pErrorCode); -} - -U_CFUNC int32_t -ustr_toUpper(const UCaseProps *csp, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - const char *locale, - UErrorCode *pErrorCode) { - UCaseMap csm={ NULL }; - UCaseContext csc={ NULL }; - - csm.csp=csp; - setTempCaseMap(&csm, locale, pErrorCode); - csc.p=(void *)src; - csc.limit=srcLength; - - return _caseMap(&csm, ucase_toFullUpper, - dest, destCapacity, - src, &csc, 0, srcLength, - pErrorCode); -} - -#if !UCONFIG_NO_BREAK_ITERATION - -U_CFUNC int32_t -ustr_toTitle(const UCaseProps *csp, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UBreakIterator *titleIter, - const char *locale, uint32_t options, - UErrorCode *pErrorCode) { - UCaseMap csm={ NULL }; - UCaseContext csc={ NULL }; - int32_t length; - - csm.csp=csp; - csm.iter=titleIter; - csm.options=options; - setTempCaseMap(&csm, locale, pErrorCode); - csc.p=(void *)src; - csc.limit=srcLength; - - length=_toTitle(&csm, - dest, destCapacity, - src, &csc, srcLength, - pErrorCode); - if(titleIter==NULL && csm.iter!=NULL) { - ubrk_close(csm.iter); - } - return length; -} - -#endif - -U_CFUNC int32_t -ustr_foldCase(const UCaseProps *csp, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - uint32_t options, - UErrorCode *pErrorCode) { - int32_t srcIndex, destIndex; - - const UChar *s; - UChar32 c, c2 = 0; - - /* case mapping loop */ - srcIndex=destIndex=0; - while(srcIndexdestCapacity) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - return destIndex; -} - -/* - * Implement argument checking and buffer handling - * for string case mapping as a common function. - */ - -/* common internal function for public API functions */ - -static int32_t -caseMap(const UCaseMap *csm, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - int32_t toWhichCase, - UErrorCode *pErrorCode) { - UChar buffer[300]; - UChar *temp; - - int32_t destLength; - - /* check argument values */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if( destCapacity<0 || - (dest==NULL && destCapacity>0) || - src==NULL || - srcLength<-1 - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* get the string length */ - if(srcLength==-1) { - srcLength=u_strlen(src); - } - - /* check for overlapping source and destination */ - if( dest!=NULL && - ((src>=dest && src<(dest+destCapacity)) || - (dest>=src && dest<(src+srcLength))) - ) { - /* overlap: provide a temporary destination buffer and later copy the result */ - if(destCapacity<=(sizeof(buffer)/U_SIZEOF_UCHAR)) { - /* the stack buffer is large enough */ - temp=buffer; - } else { - /* allocate a buffer */ - temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR); - if(temp==NULL) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return 0; - } - } - } else { - temp=dest; - } - - destLength=0; - - if(toWhichCase==FOLD_CASE) { - destLength=ustr_foldCase(csm->csp, temp, destCapacity, src, srcLength, - csm->options, pErrorCode); - } else { - UCaseContext csc={ NULL }; - - csc.p=(void *)src; - csc.limit=srcLength; - - if(toWhichCase==TO_LOWER) { - destLength=_caseMap(csm, ucase_toFullLower, - temp, destCapacity, - src, &csc, - 0, srcLength, - pErrorCode); - } else if(toWhichCase==TO_UPPER) { - destLength=_caseMap(csm, ucase_toFullUpper, - temp, destCapacity, - src, &csc, - 0, srcLength, - pErrorCode); - } else /* if(toWhichCase==TO_TITLE) */ { -#if UCONFIG_NO_BREAK_ITERATION - *pErrorCode=U_UNSUPPORTED_ERROR; -#else - /* UCaseMap is actually non-const in toTitle() APIs. */ - destLength=_toTitle((UCaseMap *)csm, temp, destCapacity, - src, &csc, srcLength, - pErrorCode); -#endif - } - } - if(temp!=dest) { - /* copy the result string to the destination buffer */ - if(destLength>0) { - int32_t copyLength= destLength<=destCapacity ? destLength : destCapacity; - if(copyLength>0) { - uprv_memmove(dest, temp, copyLength*U_SIZEOF_UCHAR); - } - } - if(temp!=buffer) { - uprv_free(temp); - } - } - - return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); -} - -/* public API functions */ - -U_CAPI int32_t U_EXPORT2 -u_strToLower(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - const char *locale, - UErrorCode *pErrorCode) { - UCaseMap csm={ NULL }; - setTempCaseMap(&csm, locale, pErrorCode); - return caseMap(&csm, - dest, destCapacity, - src, srcLength, - TO_LOWER, pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -u_strToUpper(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - const char *locale, - UErrorCode *pErrorCode) { - UCaseMap csm={ NULL }; - setTempCaseMap(&csm, locale, pErrorCode); - return caseMap(&csm, - dest, destCapacity, - src, srcLength, - TO_UPPER, pErrorCode); -} - -#if !UCONFIG_NO_BREAK_ITERATION - -U_CAPI int32_t U_EXPORT2 -u_strToTitle(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UBreakIterator *titleIter, - const char *locale, - UErrorCode *pErrorCode) { - UCaseMap csm={ NULL }; - int32_t length; - - csm.iter=titleIter; - setTempCaseMap(&csm, locale, pErrorCode); - length=caseMap(&csm, - dest, destCapacity, - src, srcLength, - TO_TITLE, pErrorCode); - if(titleIter==NULL && csm.iter!=NULL) { - ubrk_close(csm.iter); - } - return length; -} - -U_CAPI int32_t U_EXPORT2 -ucasemap_toTitle(UCaseMap *csm, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UErrorCode *pErrorCode) { - return caseMap(csm, - dest, destCapacity, - src, srcLength, - TO_TITLE, pErrorCode); -} - -#endif - -U_CAPI int32_t U_EXPORT2 -u_strFoldCase(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - uint32_t options, - UErrorCode *pErrorCode) { - UCaseMap csm={ NULL }; - csm.csp=ucase_getSingleton(); - csm.options=options; - return caseMap(&csm, - dest, destCapacity, - src, srcLength, - FOLD_CASE, pErrorCode); -} - -/* case-insensitive string comparisons -------------------------------------- */ - -/* - * This function is a copy of unorm_cmpEquivFold() minus the parts for - * canonical equivalence. - * Keep the functions in sync, and see there for how this works. - * The duplication is for modularization: - * It makes caseless (but not canonical caseless) matches independent of - * the normalization code. - */ - -/* stack element for previous-level source/decomposition pointers */ -struct CmpEquivLevel { - const UChar *start, *s, *limit; -}; -typedef struct CmpEquivLevel CmpEquivLevel; - -/* internal function */ -U_CFUNC int32_t -u_strcmpFold(const UChar *s1, int32_t length1, - const UChar *s2, int32_t length2, - uint32_t options, - UErrorCode *pErrorCode) { - const UCaseProps *csp; - - /* current-level start/limit - s1/s2 as current */ - const UChar *start1, *start2, *limit1, *limit2; - - /* case folding variables */ - const UChar *p; - int32_t length; - - /* stacks of previous-level start/current/limit */ - CmpEquivLevel stack1[2], stack2[2]; - - /* case folding buffers, only use current-level start/limit */ - UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1]; - - /* track which is the current level per string */ - int32_t level1, level2; - - /* current code units, and code points for lookups */ - UChar32 c1, c2, cp1, cp2; - - /* no argument error checking because this itself is not an API */ - - /* - * assume that at least the option U_COMPARE_IGNORE_CASE is set - * otherwise this function would have to behave exactly as uprv_strCompare() - */ - csp=ucase_getSingleton(); - if(U_FAILURE(*pErrorCode)) { - return 0; - } - - /* initialize */ - start1=s1; - if(length1==-1) { - limit1=NULL; - } else { - limit1=s1+length1; - } - - start2=s2; - if(length2==-1) { - limit2=NULL; - } else { - limit2=s2+length2; - } - - level1=level2=0; - c1=c2=-1; - - /* comparison loop */ - for(;;) { - /* - * here a code unit value of -1 means "get another code unit" - * below it will mean "this source is finished" - */ - - if(c1<0) { - /* get next code unit from string 1, post-increment */ - for(;;) { - if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) { - if(level1==0) { - c1=-1; - break; - } - } else { - ++s1; - break; - } - - /* reached end of level buffer, pop one level */ - do { - --level1; - start1=stack1[level1].start; - } while(start1==NULL); - s1=stack1[level1].s; - limit1=stack1[level1].limit; - } - } - - if(c2<0) { - /* get next code unit from string 2, post-increment */ - for(;;) { - if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) { - if(level2==0) { - c2=-1; - break; - } - } else { - ++s2; - break; - } - - /* reached end of level buffer, pop one level */ - do { - --level2; - start2=stack2[level2].start; - } while(start2==NULL); - s2=stack2[level2].s; - limit2=stack2[level2].limit; - } - } - - /* - * compare c1 and c2 - * either variable c1, c2 is -1 only if the corresponding string is finished - */ - if(c1==c2) { - if(c1<0) { - return 0; /* c1==c2==-1 indicating end of strings */ - } - c1=c2=-1; /* make us fetch new code units */ - continue; - } else if(c1<0) { - return -1; /* string 1 ends before string 2 */ - } else if(c2<0) { - return 1; /* string 2 ends before string 1 */ - } - /* c1!=c2 && c1>=0 && c2>=0 */ - - /* get complete code points for c1, c2 for lookups if either is a surrogate */ - cp1=c1; - if(U_IS_SURROGATE(c1)) { - UChar c; - - if(U_IS_SURROGATE_LEAD(c1)) { - if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) { - /* advance ++s1; only below if cp1 decomposes/case-folds */ - cp1=U16_GET_SUPPLEMENTARY(c1, c); - } - } else /* isTrail(c1) */ { - if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) { - cp1=U16_GET_SUPPLEMENTARY(c, c1); - } - } - } - - cp2=c2; - if(U_IS_SURROGATE(c2)) { - UChar c; - - if(U_IS_SURROGATE_LEAD(c2)) { - if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) { - /* advance ++s2; only below if cp2 decomposes/case-folds */ - cp2=U16_GET_SUPPLEMENTARY(c2, c); - } - } else /* isTrail(c2) */ { - if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) { - cp2=U16_GET_SUPPLEMENTARY(c, c2); - } - } - } - - /* - * go down one level for each string - * continue with the main loop as soon as there is a real change - */ - - if( level1==0 && - (length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0 - ) { - /* cp1 case-folds to the code point "length" or to p[length] */ - if(U_IS_SURROGATE(c1)) { - if(U_IS_SURROGATE_LEAD(c1)) { - /* advance beyond source surrogate pair if it case-folds */ - ++s1; - } else /* isTrail(c1) */ { - /* - * we got a supplementary code point when hitting its trail surrogate, - * therefore the lead surrogate must have been the same as in the other string; - * compare this decomposition with the lead surrogate in the other string - * remember that this simulates bulk text replacement: - * the decomposition would replace the entire code point - */ - --s2; - c2=*(s2-1); - } - } - - /* push current level pointers */ - stack1[0].start=start1; - stack1[0].s=s1; - stack1[0].limit=limit1; - ++level1; - - /* copy the folding result to fold1[] */ - if(length<=UCASE_MAX_STRING_LENGTH) { - u_memcpy(fold1, p, length); - } else { - int32_t i=0; - U16_APPEND_UNSAFE(fold1, i, length); - length=i; - } - - /* set next level pointers to case folding */ - start1=s1=fold1; - limit1=fold1+length; - - /* get ready to read from decomposition, continue with loop */ - c1=-1; - continue; - } - - if( level2==0 && - (length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0 - ) { - /* cp2 case-folds to the code point "length" or to p[length] */ - if(U_IS_SURROGATE(c2)) { - if(U_IS_SURROGATE_LEAD(c2)) { - /* advance beyond source surrogate pair if it case-folds */ - ++s2; - } else /* isTrail(c2) */ { - /* - * we got a supplementary code point when hitting its trail surrogate, - * therefore the lead surrogate must have been the same as in the other string; - * compare this decomposition with the lead surrogate in the other string - * remember that this simulates bulk text replacement: - * the decomposition would replace the entire code point - */ - --s1; - c1=*(s1-1); - } - } - - /* push current level pointers */ - stack2[0].start=start2; - stack2[0].s=s2; - stack2[0].limit=limit2; - ++level2; - - /* copy the folding result to fold2[] */ - if(length<=UCASE_MAX_STRING_LENGTH) { - u_memcpy(fold2, p, length); - } else { - int32_t i=0; - U16_APPEND_UNSAFE(fold2, i, length); - length=i; - } - - /* set next level pointers to case folding */ - start2=s2=fold2; - limit2=fold2+length; - - /* get ready to read from decomposition, continue with loop */ - c2=-1; - continue; - } - - /* - * no decomposition/case folding, max level for both sides: - * return difference result - * - * code point order comparison must not just return cp1-cp2 - * because when single surrogates are present then the surrogate pairs - * that formed cp1 and cp2 may be from different string indexes - * - * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units - * c1=d800 cp1=10001 c2=dc00 cp2=10000 - * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 } - * - * therefore, use same fix-up as in ustring.c/uprv_strCompare() - * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++ - * so we have slightly different pointer/start/limit comparisons here - */ - - if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) { - /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ - if( - (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) || - (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2))) - ) { - /* part of a surrogate pair, leave >=d800 */ - } else { - /* BMP code point - may be surrogate code point - make =d800 */ - } else { - /* BMP code point - may be surrogate code point - make