X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/374ca955a76ecab1204ca8bfa63ff9238d998416..a01113dcd0f39d5da295ef82785beff9ed86fe38:/icuSources/common/unistr_case.cpp diff --git a/icuSources/common/unistr_case.cpp b/icuSources/common/unistr_case.cpp index dd4a0819..2138d60c 100644 --- a/icuSources/common/unistr_case.cpp +++ b/icuSources/common/unistr_case.cpp @@ -1,12 +1,14 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * -* Copyright (C) 1999-2004, International Business Machines +* Copyright (C) 1999-2014, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: unistr_case.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:2 * @@ -17,16 +19,18 @@ */ #include "unicode/utypes.h" +#include "unicode/brkiter.h" +#include "unicode/casemap.h" +#include "unicode/edits.h" #include "unicode/putil.h" -#include "unicode/locid.h" #include "cstring.h" #include "cmemory.h" #include "unicode/ustring.h" #include "unicode/unistr.h" #include "unicode/uchar.h" -#include "unicode/ubrk.h" -#include "ustr_imp.h" -#include "unormimp.h" +#include "uassert.h" +#include "ucasemap_imp.h" +#include "uelement.h" U_NAMESPACE_BEGIN @@ -59,7 +63,9 @@ UnicodeString::doCaseCompare(int32_t start, const UChar *chars = getArrayStart(); chars += start; - srcChars += srcStart; + if(srcStart!=0) { + srcChars += srcStart; + } if(chars != srcChars) { UErrorCode errorCode=U_ZERO_ERROR; @@ -84,161 +90,161 @@ UnicodeString::doCaseCompare(int32_t start, // Write implementation //======================================== -/* - * Implement argument checking and buffer handling - * for string case mapping as a common function. - */ -enum { - TO_LOWER, - TO_UPPER, - TO_TITLE, - FOLD_CASE -}; - UnicodeString & -UnicodeString::caseMap(BreakIterator *titleIter, - const char *locale, - uint32_t options, - int32_t toWhichCase) { - if(fLength <= 0) { +UnicodeString::caseMap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM + UStringCaseMapper *stringCaseMapper) { + if(isEmpty() || !isWritable()) { // nothing to do return *this; } - UErrorCode errorCode; + UChar oldBuffer[2 * US_STACKBUF_SIZE]; + UChar *oldArray; + int32_t oldLength = length(); + int32_t newLength; + UBool writable = isBufferWritable(); + UErrorCode errorCode = U_ZERO_ERROR; - errorCode = U_ZERO_ERROR; - UCaseProps *csp=ucase_getSingleton(&errorCode); - if(U_FAILURE(errorCode)) { - setToBogus(); - return *this; - } - - // We need to allocate a new buffer for the internal string case mapping function. - // This is very similar to how doReplace() below keeps the old array pointer - // and deletes the old array itself after it is done. - // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array. - UChar *oldArray = fArray; - int32_t oldLength = fLength; - int32_t *bufferToDelete = 0; +#if !UCONFIG_NO_BREAK_ITERATION + // Read-only alias to the original string contents for the titlecasing BreakIterator. + // We cannot set the iterator simply to *this because *this is being modified. + UnicodeString oldString; +#endif - // Make sure that if the string is in fStackBuffer we do not overwrite it! - int32_t capacity; - if(fLength <= US_STACKBUF_SIZE) { - if(fArray == fStackBuffer) { - capacity = 2 * US_STACKBUF_SIZE; // make sure that cloneArrayIfNeeded() allocates a new buffer + // Try to avoid heap-allocating a new character array for this string. + if (writable ? oldLength <= UPRV_LENGTHOF(oldBuffer) : oldLength < US_STACKBUF_SIZE) { + // Short string: Copy the contents into a temporary buffer and + // case-map back into the current array, or into the stack buffer. + UChar *buffer = getArrayStart(); + int32_t capacity; + oldArray = oldBuffer; + u_memcpy(oldBuffer, buffer, oldLength); + if (writable) { + capacity = getCapacity(); } else { + // Switch from the read-only alias or shared heap buffer to the stack buffer. + if (!cloneArrayIfNeeded(US_STACKBUF_SIZE, US_STACKBUF_SIZE, /* doCopyArray= */ FALSE)) { + return *this; + } + U_ASSERT(fUnion.fFields.fLengthAndFlags & kUsingStackBuffer); + buffer = fUnion.fStackFields.fBuffer; capacity = US_STACKBUF_SIZE; } - } else { - capacity = fLength + 20; - } - if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) { - return *this; - } - #if !UCONFIG_NO_BREAK_ITERATION - // set up the titlecasing break iterator - UBreakIterator *cTitleIter = 0; - - if(toWhichCase == TO_TITLE) { - errorCode = U_ZERO_ERROR; - if(titleIter != 0) { - cTitleIter = (UBreakIterator *)titleIter; - ubrk_setText(cTitleIter, oldArray, oldLength, &errorCode); - } else { - cTitleIter = ubrk_open(UBRK_WORD, locale, - oldArray, oldLength, - &errorCode); + if (iter != nullptr) { + oldString.setTo(FALSE, oldArray, oldLength); + iter->setText(oldString); } - if(U_FAILURE(errorCode)) { - uprv_free(bufferToDelete); +#endif + newLength = stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR + buffer, capacity, + oldArray, oldLength, NULL, errorCode); + if (U_SUCCESS(errorCode)) { + setLength(newLength); + return *this; + } else if (errorCode == U_BUFFER_OVERFLOW_ERROR) { + // common overflow handling below + } else { setToBogus(); return *this; } - } -#endif - - // Case-map, and if the result is too long, then reallocate and repeat. - do { - errorCode = U_ZERO_ERROR; - if(toWhichCase==TO_LOWER) { - fLength = ustr_toLower(csp, fArray, fCapacity, - oldArray, oldLength, - locale, &errorCode); - } else if(toWhichCase==TO_UPPER) { - fLength = ustr_toUpper(csp, fArray, fCapacity, - oldArray, oldLength, - locale, &errorCode); - } else if(toWhichCase==TO_TITLE) { -#if UCONFIG_NO_BREAK_ITERATION - errorCode=U_UNSUPPORTED_ERROR; -#else - fLength = ustr_toTitle(csp, fArray, fCapacity, - oldArray, oldLength, - cTitleIter, locale, &errorCode); + } else { + // Longer string or read-only buffer: + // Collect only changes and then apply them to this string. + // Case mapping often changes only small parts of a string, + // and often does not change its length. + oldArray = getArrayStart(); + Edits edits; + UChar replacementChars[200]; +#if !UCONFIG_NO_BREAK_ITERATION + if (iter != nullptr) { + oldString.setTo(FALSE, oldArray, oldLength); + iter->setText(oldString); + } #endif + stringCaseMapper(caseLocale, options | U_OMIT_UNCHANGED_TEXT, UCASEMAP_BREAK_ITERATOR + replacementChars, UPRV_LENGTHOF(replacementChars), + oldArray, oldLength, &edits, errorCode); + if (U_SUCCESS(errorCode)) { + // Grow the buffer at most once, not for multiple doReplace() calls. + newLength = oldLength + edits.lengthDelta(); + if (newLength > oldLength && !cloneArrayIfNeeded(newLength, newLength)) { + return *this; + } + for (Edits::Iterator ei = edits.getCoarseChangesIterator(); ei.next(errorCode);) { + doReplace(ei.destinationIndex(), ei.oldLength(), + replacementChars, ei.replacementIndex(), ei.newLength()); + } + if (U_FAILURE(errorCode)) { + setToBogus(); + } + return *this; + } else if (errorCode == U_BUFFER_OVERFLOW_ERROR) { + // common overflow handling below + newLength = oldLength + edits.lengthDelta(); } else { - fLength = ustr_foldCase(csp, fArray, fCapacity, - oldArray, oldLength, - options, - &errorCode); + setToBogus(); + return *this; } - } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(fLength, fLength, FALSE)); - -#if !UCONFIG_NO_BREAK_ITERATION - if(cTitleIter != 0 && titleIter == 0) { - ubrk_close(cTitleIter); } -#endif + // Handle buffer overflow, newLength is known. + // We need to allocate a new buffer for the internal string case mapping function. + // This is very similar to how doReplace() keeps the old array pointer + // and deletes the old array itself after it is done. + // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array. + int32_t *bufferToDelete = 0; + if (!cloneArrayIfNeeded(newLength, newLength, FALSE, &bufferToDelete, TRUE)) { + return *this; + } + errorCode = U_ZERO_ERROR; + // No need to iter->setText() again: The case mapper restarts via iter->first(). + newLength = stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR + getArrayStart(), getCapacity(), + oldArray, oldLength, NULL, errorCode); if (bufferToDelete) { uprv_free(bufferToDelete); } - if(U_FAILURE(errorCode)) { + if (U_SUCCESS(errorCode)) { + setLength(newLength); + } else { setToBogus(); } return *this; } UnicodeString & -UnicodeString::toLower() { - return caseMap(0, Locale::getDefault().getName(), 0, TO_LOWER); -} - -UnicodeString & -UnicodeString::toLower(const Locale &locale) { - return caseMap(0, locale.getName(), 0, TO_LOWER); -} - -UnicodeString & -UnicodeString::toUpper() { - return caseMap(0, Locale::getDefault().getName(), 0, TO_UPPER); -} - -UnicodeString & -UnicodeString::toUpper(const Locale &locale) { - return caseMap(0, locale.getName(), 0, TO_UPPER); +UnicodeString::foldCase(uint32_t options) { + return caseMap(UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalFold); } -#if !UCONFIG_NO_BREAK_ITERATION - -UnicodeString & -UnicodeString::toTitle(BreakIterator *titleIter) { - return caseMap(titleIter, Locale::getDefault().getName(), 0, TO_TITLE); -} +U_NAMESPACE_END -UnicodeString & -UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) { - return caseMap(titleIter, locale.getName(), 0, TO_TITLE); +// Defined here to reduce dependencies on break iterator +U_CAPI int32_t U_EXPORT2 +uhash_hashCaselessUnicodeString(const UElement key) { + U_NAMESPACE_USE + const UnicodeString *str = (const UnicodeString*) key.pointer; + if (str == NULL) { + return 0; + } + // Inefficient; a better way would be to have a hash function in + // UnicodeString that does case folding on the fly. + UnicodeString copy(*str); + return copy.foldCase().hashCode(); } -#endif - -UnicodeString & -UnicodeString::foldCase(uint32_t options) { - /* The Locale parameter isn't used. Use "" instead. */ - return caseMap(0, "", options, FOLD_CASE); +// Defined here to reduce dependencies on break iterator +U_CAPI UBool U_EXPORT2 +uhash_compareCaselessUnicodeString(const UElement key1, const UElement key2) { + U_NAMESPACE_USE + const UnicodeString *str1 = (const UnicodeString*) key1.pointer; + const UnicodeString *str2 = (const UnicodeString*) key2.pointer; + if (str1 == str2) { + return TRUE; + } + if (str1 == NULL || str2 == NULL) { + return FALSE; + } + return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0; } - -U_NAMESPACE_END