X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/4388f060552cc537e71e957d32f35e9d75a61233..f59164e3d128c7675a4d3934206346a3384e53a5:/icuSources/common/ustrcase.cpp?ds=sidebyside diff --git a/icuSources/common/ustrcase.cpp b/icuSources/common/ustrcase.cpp index fce05c8e..13f148df 100644 --- a/icuSources/common/ustrcase.cpp +++ b/icuSources/common/ustrcase.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2001-2011, International Business Machines +* Copyright (C) 2001-2015, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -28,8 +28,7 @@ #include "cmemory.h" #include "ucase.h" #include "ustr_imp.h" - -#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) +#include "uassert.h" U_NAMESPACE_USE @@ -46,24 +45,27 @@ appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity, if(result<0) { /* (not) original code point */ c=~result; - length=-1; + length=U16_LENGTH(c); } else if(result<=UCASE_MAX_STRING_LENGTH) { c=U_SENTINEL; length=result; } else { c=result; - length=-1; + length=U16_LENGTH(c); + } + if(length>(INT32_MAX-destIndex)) { + return -1; // integer overflow } if(destIndex=0) { /* code point */ UBool isError=FALSE; U16_APPEND(dest, destIndex, destCapacity, c, isError); if(isError) { /* overflow, nothing written */ - destIndex+=U16_LENGTH(c); + destIndex+=length; } } else { /* string */ @@ -79,15 +81,21 @@ appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity, } } else { /* preflight */ - if(length<0) { - destIndex+=U16_LENGTH(c); - } else { - destIndex+=length; - } + destIndex+=length; } return destIndex; } +static inline int32_t +appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) { + if(destIndex0) { if((destIndex+length)<=destCapacity) { - uprv_memcpy(dest+destIndex, src+prev, length*U_SIZEOF_UCHAR); + u_memcpy(dest+destIndex, src+prev, length); } destIndex+=length; } @@ -249,15 +261,22 @@ ustrcase_internalToTitle(const UCaseMap *csm, csc.cpLimit=titleLimit; c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, &csc, &s, csm->locale, &locCache); destIndex=appendResult(dest, destIndex, destCapacity, c, s); + if(destIndex<0) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } /* Special case Dutch IJ titlecasing */ - if ( titleStart+1 < idx && - ucase_getCaseLocale(csm->locale,&locCache) == UCASE_LOC_DUTCH && - ( src[titleStart] == (UChar32) 0x0049 || src[titleStart] == (UChar32) 0x0069 ) && - ( src[titleStart+1] == (UChar32) 0x004A || src[titleStart+1] == (UChar32) 0x006A )) { - c=(UChar32) 0x004A; - destIndex=appendResult(dest, destIndex, destCapacity, c, s); - titleLimit++; + if (titleStart+1 < idx && + ucase_getCaseLocale(csm->locale,&locCache) == UCASE_LOC_DUTCH && + (src[titleStart] == 0x0049 || src[titleStart] == 0x0069) && + (src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A)) { + destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A); + if(destIndex<0) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + titleLimit++; } /* lowercase [titleLimit..index[ */ @@ -271,11 +290,18 @@ ustrcase_internalToTitle(const UCaseMap *csm, src, &csc, titleLimit, idx, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return destIndex; + } } else { /* Optionally just copy the rest of the word unchanged. */ length=idx-titleLimit; + if(length>(INT32_MAX-destIndex)) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } if((destIndex+length)<=destCapacity) { - uprv_memcpy(dest+destIndex, src+titleLimit, length*U_SIZEOF_UCHAR); + u_memcpy(dest+destIndex, src+titleLimit, length); } destIndex+=length; } @@ -347,6 +373,10 @@ ustr_foldCase(const UCaseProps *csp, dest[destIndex++]=(UChar)c2; } else { destIndex=appendResult(dest, destIndex, destCapacity, c, s); + if(destIndex<0) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } } } @@ -399,7 +429,7 @@ ustrcase_map(const UCaseMap *csm, (dest>=src && dest<(src+srcLength))) ) { /* overlap: provide a temporary destination buffer and later copy the result */ - if(destCapacity<=LENGTHOF(buffer)) { + if(destCapacity<=UPRV_LENGTHOF(buffer)) { /* the stack buffer is large enough */ temp=buffer; } else { @@ -420,7 +450,7 @@ ustrcase_map(const UCaseMap *csm, if(destLength>0) { int32_t copyLength= destLength<=destCapacity ? destLength : destCapacity; if(copyLength>0) { - uprv_memmove(dest, temp, copyLength*U_SIZEOF_UCHAR); + u_memmove(dest, temp, copyLength); } } if(temp!=buffer) { @@ -465,17 +495,39 @@ struct CmpEquivLevel { }; typedef struct CmpEquivLevel CmpEquivLevel; -/* internal function */ -U_CFUNC int32_t -u_strcmpFold(const UChar *s1, int32_t length1, - const UChar *s2, int32_t length2, - uint32_t options, - UErrorCode *pErrorCode) { +/** + * Internal implementation code comparing string with case fold. + * This function is called from u_strcmpFold() and u_caseInsensitivePrefixMatch(). + * + * @param s1 input string 1 + * @param length1 length of string 1, or -1 (NULL terminated) + * @param s2 input string 2 + * @param length2 length of string 2, or -1 (NULL terminated) + * @param options compare options + * @param matchLen1 (output) length of partial prefix match in s1 + * @param matchLen2 (output) length of partial prefix match in s2 + * @param pErrorCode receives error status + * @return The result of comparison + */ +static int32_t _cmpFold( + const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + uint32_t options, + int32_t *matchLen1, int32_t *matchLen2, + UErrorCode *pErrorCode) { + int32_t cmpRes = 0; + const UCaseProps *csp; /* current-level start/limit - s1/s2 as current */ const UChar *start1, *start2, *limit1, *limit2; + /* points to the original start address */ + const UChar *org1, *org2; + + /* points to the end of match + 1 */ + const UChar *m1, *m2; + /* case folding variables */ const UChar *p; int32_t length; @@ -504,14 +556,20 @@ u_strcmpFold(const UChar *s1, int32_t length1, } /* initialize */ - start1=s1; + if(matchLen1) { + U_ASSERT(matchLen2 !=NULL); + *matchLen1=0; + *matchLen2=0; + } + + start1=m1=org1=s1; if(length1==-1) { limit1=NULL; } else { limit1=s1+length1; } - start2=s2; + start2=m2=org2=s2; if(length2==-1) { limit2=NULL; } else { @@ -579,15 +637,59 @@ u_strcmpFold(const UChar *s1, int32_t length1, * either variable c1, c2 is -1 only if the corresponding string is finished */ if(c1==c2) { + const UChar *next1, *next2; + if(c1<0) { - return 0; /* c1==c2==-1 indicating end of strings */ + cmpRes=0; /* c1==c2==-1 indicating end of strings */ + break; + } + + /* + * Note: Move the match positions in both strings at the same time + * only when corresponding code point(s) in the original strings + * are fully consumed. For example, when comparing s1="Fust" and + * s2="Fu\u00dfball", s2[2] is folded into "ss", and s1[2] matches + * the first code point in the case-folded data. But the second "s" + * has no matching code point in s1, so this implementation returns + * 2 as the prefix match length ("Fu"). + */ + next1=next2=NULL; + if(level1==0) { + next1=s1; + } else if(s1==limit1) { + /* Note: This implementation only use a single level of stack. + * If this code needs to be changed to use multiple levels + * of stacks, the code above should check if the current + * code is at the end of all stacks. + */ + U_ASSERT(level1==1); + + /* is s1 at the end of the current stack? */ + next1=stack1[0].s; + } + + if (next1!=NULL) { + if(level2==0) { + next2=s2; + } else if(s2==limit2) { + U_ASSERT(level2==1); + + /* is s2 at the end of the current stack? */ + next2=stack2[0].s; + } + if(next2!=NULL) { + m1=next1; + m2=next2; + } } c1=c2=-1; /* make us fetch new code units */ continue; } else if(c1<0) { - return -1; /* string 1 ends before string 2 */ + cmpRes=-1; /* string 1 ends before string 2 */ + break; } else if(c2<0) { - return 1; /* string 2 ends before string 1 */ + cmpRes=1; /* string 2 ends before string 1 */ + break; } /* c1!=c2 && c1>=0 && c2>=0 */ @@ -646,6 +748,7 @@ u_strcmpFold(const UChar *s1, int32_t length1, * the decomposition would replace the entire code point */ --s2; + --m2; c2=*(s2-1); } } @@ -691,6 +794,7 @@ u_strcmpFold(const UChar *s1, int32_t length1, * the decomposition would replace the entire code point */ --s1; + --m2; c1=*(s1-1); } } @@ -759,8 +863,24 @@ u_strcmpFold(const UChar *s1, int32_t length1, } } - return c1-c2; + cmpRes=c1-c2; + break; } + + if(matchLen1) { + *matchLen1=m1-org1; + *matchLen2=m2-org2; + } + return cmpRes; +} + +/* internal function */ +U_CFUNC int32_t +u_strcmpFold(const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + uint32_t options, + UErrorCode *pErrorCode) { + return _cmpFold(s1, length1, s2, length2, options, NULL, NULL, pErrorCode); } /* public API functions */ @@ -806,3 +926,14 @@ u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) { options|(U_COMPARE_IGNORE_CASE|_STRNCMP_STYLE), &errorCode); } + +/* internal API - detect length of shared prefix */ +U_CAPI void +u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + uint32_t options, + int32_t *matchLen1, int32_t *matchLen2, + UErrorCode *pErrorCode) { + _cmpFold(s1, length1, s2, length2, options, + matchLen1, matchLen2, pErrorCode); +}