icuSources/common/unistr_case.cpp

   1 /*
   2 *******************************************************************************
   3 *
   4 *   Copyright (C) 1999-2007, International Business Machines
   5 *   Corporation and others.  All Rights Reserved.
   6 *
   7 *******************************************************************************
   8 *   file name:  unistr_case.cpp
   9 *   encoding:   US-ASCII
  10 *   tab size:   8 (not used)
  11 *   indentation:2
  12 *
  13 *   created on: 2004aug19
  14 *   created by: Markus W. Scherer
  15 *
  16 *   Case-mapping functions moved here from unistr.cpp
  17 */
  18
  19 #include "unicode/utypes.h"
  20 #include "unicode/putil.h"
  21 #include "unicode/locid.h"
  22 #include "cstring.h"
  23 #include "cmemory.h"
  24 #include "unicode/ustring.h"
  25 #include "unicode/unistr.h"
  26 #include "unicode/uchar.h"
  27 #include "unicode/ubrk.h"
  28 #include "ustr_imp.h"
  29 #include "unormimp.h"
  30 #include "uhash.h"
  31
  32 U_NAMESPACE_BEGIN
  33
  34 //========================================
  35 // Read-only implementation
  36 //========================================
  37
  38 int8_t
  39 UnicodeString::doCaseCompare(int32_t start,
  40                              int32_t length,
  41                              const UChar *srcChars,
  42                              int32_t srcStart,
  43                              int32_t srcLength,
  44                              uint32_t options) const
  45 {
  46   // compare illegal string values
  47   // treat const UChar *srcChars==NULL as an empty string
  48   if(isBogus()) {
  49     return -1;
  50   }
  51
  52   // pin indices to legal values
  53   pinIndices(start, length);
  54
  55   if(srcChars == NULL) {
  56     srcStart = srcLength = 0;
  57   }
  58
  59   // get the correct pointer
  60   const UChar *chars = getArrayStart();
  61
  62   chars += start;
  63   srcChars += srcStart;
  64
  65   if(chars != srcChars) {
  66     UErrorCode errorCode=U_ZERO_ERROR;
  67     int32_t result=u_strcmpFold(chars, length, srcChars, srcLength,
  68                                 options|U_COMPARE_IGNORE_CASE, &errorCode);
  69     if(result!=0) {
  70       return (int8_t)(result >> 24 | 1);
  71     }
  72   } else {
  73     // get the srcLength if necessary
  74     if(srcLength < 0) {
  75       srcLength = u_strlen(srcChars + srcStart);
  76     }
  77     if(length != srcLength) {
  78       return (int8_t)((length - srcLength) >> 24 | 1);
  79     }
  80   }
  81   return 0;
  82 }
  83
  84 //========================================
  85 // Write implementation
  86 //========================================
  87
  88 /*
  89  * Implement argument checking and buffer handling
  90  * for string case mapping as a common function.
  91  */
  92
  93 UnicodeString &
  94 UnicodeString::caseMap(BreakIterator *titleIter,
  95                        const char *locale,
  96                        uint32_t options,
  97                        int32_t toWhichCase) {
  98   if(isEmpty() || !isWritable()) {
  99     // nothing to do
 100     return *this;
 101   }
 102
 103   UErrorCode errorCode;
 104
 105   errorCode = U_ZERO_ERROR;
 106   const UCaseProps *csp=ucase_getSingleton(&errorCode);
 107   if(U_FAILURE(errorCode)) {
 108     setToBogus();
 109     return *this;
 110   }
 111
 112   // We need to allocate a new buffer for the internal string case mapping function.
 113   // This is very similar to how doReplace() keeps the old array pointer
 114   // and deletes the old array itself after it is done.
 115   // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
 116   UChar oldStackBuffer[US_STACKBUF_SIZE];
 117   UChar *oldArray;
 118   int32_t oldLength;
 119
 120   if(fFlags&kUsingStackBuffer) {
 121     // copy the stack buffer contents because it will be overwritten
 122     u_memcpy(oldStackBuffer, fUnion.fStackBuffer, fShortLength);
 123     oldArray = oldStackBuffer;
 124     oldLength = fShortLength;
 125   } else {
 126     oldArray = getArrayStart();
 127     oldLength = length();
 128   }
 129
 130   int32_t capacity;
 131   if(oldLength <= US_STACKBUF_SIZE) {
 132     capacity = US_STACKBUF_SIZE;
 133   } else {
 134     capacity = oldLength + 20;
 135   }
 136   int32_t *bufferToDelete = 0;
 137   if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) {
 138     return *this;
 139   }
 140
 141   // Case-map, and if the result is too long, then reallocate and repeat.
 142   int32_t newLength;
 143   do {
 144     errorCode = U_ZERO_ERROR;
 145     if(toWhichCase==TO_LOWER) {
 146       newLength = ustr_toLower(csp, getArrayStart(), getCapacity(),
 147                                oldArray, oldLength,
 148                                locale, &errorCode);
 149     } else if(toWhichCase==TO_UPPER) {
 150       newLength = ustr_toUpper(csp, getArrayStart(), getCapacity(),
 151                                oldArray, oldLength,
 152                                locale, &errorCode);
 153     } else if(toWhichCase==TO_TITLE) {
 154 #if UCONFIG_NO_BREAK_ITERATION
 155         errorCode=U_UNSUPPORTED_ERROR;
 156 #else
 157       newLength = ustr_toTitle(csp, getArrayStart(), getCapacity(),
 158                                oldArray, oldLength,
 159                                (UBreakIterator *)titleIter, locale, options, &errorCode);
 160 #endif
 161     } else {
 162       newLength = ustr_foldCase(csp, getArrayStart(), getCapacity(),
 163                                 oldArray, oldLength,
 164                                 options,
 165                                 &errorCode);
 166     }
 167     setLength(newLength);
 168   } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE));
 169
 170   if (bufferToDelete) {
 171     uprv_free(bufferToDelete);
 172   }
 173   if(U_FAILURE(errorCode)) {
 174     setToBogus();
 175   }
 176   return *this;
 177 }
 178
 179 UnicodeString &
 180 UnicodeString::toLower() {
 181   return caseMap(0, Locale::getDefault().getName(), 0, TO_LOWER);
 182 }
 183
 184 UnicodeString &
 185 UnicodeString::toLower(const Locale &locale) {
 186   return caseMap(0, locale.getName(), 0, TO_LOWER);
 187 }
 188
 189 UnicodeString &
 190 UnicodeString::toUpper() {
 191   return caseMap(0, Locale::getDefault().getName(), 0, TO_UPPER);
 192 }
 193
 194 UnicodeString &
 195 UnicodeString::toUpper(const Locale &locale) {
 196   return caseMap(0, locale.getName(), 0, TO_UPPER);
 197 }
 198
 199 #if !UCONFIG_NO_BREAK_ITERATION
 200
 201 UnicodeString &
 202 UnicodeString::toTitle(BreakIterator *titleIter) {
 203   return caseMap(titleIter, Locale::getDefault().getName(), 0, TO_TITLE);
 204 }
 205
 206 UnicodeString &
 207 UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) {
 208   return caseMap(titleIter, locale.getName(), 0, TO_TITLE);
 209 }
 210
 211 UnicodeString &
 212 UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options) {
 213   return caseMap(titleIter, locale.getName(), options, TO_TITLE);
 214 }
 215
 216 #endif
 217
 218 UnicodeString &
 219 UnicodeString::foldCase(uint32_t options) {
 220     /* The Locale parameter isn't used. Use "" instead. */
 221     return caseMap(0, "", options, FOLD_CASE);
 222 }
 223
 224 U_NAMESPACE_END
 225
 226 // Defined here to reduce dependencies on break iterator
 227 U_CAPI int32_t U_EXPORT2
 228 uhash_hashCaselessUnicodeString(const UHashTok key) {
 229     U_NAMESPACE_USE
 230     const UnicodeString *str = (const UnicodeString*) key.pointer;
 231     if (str == NULL) {
 232         return 0;
 233     }
 234     // Inefficient; a better way would be to have a hash function in
 235     // UnicodeString that does case folding on the fly.
 236     UnicodeString copy(*str);
 237     return copy.foldCase().hashCode();
 238 }
 239
 240 // Defined here to reduce dependencies on break iterator
 241 U_CAPI UBool U_EXPORT2
 242 uhash_compareCaselessUnicodeString(const UHashTok key1, const UHashTok key2) {
 243     U_NAMESPACE_USE
 244     const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
 245     const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
 246     if (str1 == str2) {
 247         return TRUE;
 248     }
 249     if (str1 == NULL || str2 == NULL) {
 250         return FALSE;
 251     }
 252     return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0;
 253 }
 254