icuSources/common/unistr_case.cpp

   1 /*
   2 *******************************************************************************
   3 *
   4 *   Copyright (C) 1999-2005, International Business Machines
   5 *   Corporation and others.  All Rights Reserved.
   6 *
   7 *******************************************************************************
   8 *   file name:  unistr_case.cpp
   9 *   encoding:   US-ASCII
  10 *   tab size:   8 (not used)
  11 *   indentation:2
  12 *
  13 *   created on: 2004aug19
  14 *   created by: Markus W. Scherer
  15 *
  16 *   Case-mapping functions moved here from unistr.cpp
  17 */
  18
  19 #include "unicode/utypes.h"
  20 #include "unicode/putil.h"
  21 #include "unicode/locid.h"
  22 #include "cstring.h"
  23 #include "cmemory.h"
  24 #include "unicode/ustring.h"
  25 #include "unicode/unistr.h"
  26 #include "unicode/uchar.h"
  27 #include "unicode/ubrk.h"
  28 #include "ustr_imp.h"
  29 #include "unormimp.h"
  30 #include "uhash.h"
  31
  32 U_NAMESPACE_BEGIN
  33
  34 //========================================
  35 // Read-only implementation
  36 //========================================
  37
  38 int8_t
  39 UnicodeString::doCaseCompare(int32_t start,
  40                              int32_t length,
  41                              const UChar *srcChars,
  42                              int32_t srcStart,
  43                              int32_t srcLength,
  44                              uint32_t options) const
  45 {
  46   // compare illegal string values
  47   // treat const UChar *srcChars==NULL as an empty string
  48   if(isBogus()) {
  49     return -1;
  50   }
  51
  52   // pin indices to legal values
  53   pinIndices(start, length);
  54
  55   if(srcChars == NULL) {
  56     srcStart = srcLength = 0;
  57   }
  58
  59   // get the correct pointer
  60   const UChar *chars = getArrayStart();
  61
  62   chars += start;
  63   srcChars += srcStart;
  64
  65   if(chars != srcChars) {
  66     UErrorCode errorCode=U_ZERO_ERROR;
  67     int32_t result=u_strcmpFold(chars, length, srcChars, srcLength,
  68                                 options|U_COMPARE_IGNORE_CASE, &errorCode);
  69     if(result!=0) {
  70       return (int8_t)(result >> 24 | 1);
  71     }
  72   } else {
  73     // get the srcLength if necessary
  74     if(srcLength < 0) {
  75       srcLength = u_strlen(srcChars + srcStart);
  76     }
  77     if(length != srcLength) {
  78       return (int8_t)((length - srcLength) >> 24 | 1);
  79     }
  80   }
  81   return 0;
  82 }
  83
  84 //========================================
  85 // Write implementation
  86 //========================================
  87
  88 /*
  89  * Implement argument checking and buffer handling
  90  * for string case mapping as a common function.
  91  */
  92 enum {
  93     TO_LOWER,
  94     TO_UPPER,
  95     TO_TITLE,
  96     FOLD_CASE
  97 };
  98
  99 UnicodeString &
 100 UnicodeString::caseMap(BreakIterator *titleIter,
 101                        const char *locale,
 102                        uint32_t options,
 103                        int32_t toWhichCase) {
 104   if(fLength <= 0) {
 105     // nothing to do
 106     return *this;
 107   }
 108
 109   UErrorCode errorCode;
 110
 111   errorCode = U_ZERO_ERROR;
 112   const UCaseProps *csp=ucase_getSingleton(&errorCode);
 113   if(U_FAILURE(errorCode)) {
 114     setToBogus();
 115     return *this;
 116   }
 117
 118   // We need to allocate a new buffer for the internal string case mapping function.
 119   // This is very similar to how doReplace() below keeps the old array pointer
 120   // and deletes the old array itself after it is done.
 121   // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
 122   UChar *oldArray = fArray;
 123   int32_t oldLength = fLength;
 124   int32_t *bufferToDelete = 0;
 125
 126   // Make sure that if the string is in fStackBuffer we do not overwrite it!
 127   int32_t capacity;
 128   if(fLength <= US_STACKBUF_SIZE) {
 129     if(fArray == fStackBuffer) {
 130       capacity = 2 * US_STACKBUF_SIZE; // make sure that cloneArrayIfNeeded() allocates a new buffer
 131     } else {
 132       capacity = US_STACKBUF_SIZE;
 133     }
 134   } else {
 135     capacity = fLength + 20;
 136   }
 137   if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) {
 138     return *this;
 139   }
 140
 141 #if !UCONFIG_NO_BREAK_ITERATION
 142   // set up the titlecasing break iterator
 143   UBreakIterator *cTitleIter = 0;
 144
 145   if(toWhichCase == TO_TITLE) {
 146     errorCode = U_ZERO_ERROR;
 147     if(titleIter != 0) {
 148       cTitleIter = (UBreakIterator *)titleIter;
 149       ubrk_setText(cTitleIter, oldArray, oldLength, &errorCode);
 150     } else {
 151       cTitleIter = ubrk_open(UBRK_WORD, locale,
 152                              oldArray, oldLength,
 153                              &errorCode);
 154     }
 155     if(U_FAILURE(errorCode)) {
 156       uprv_free(bufferToDelete);
 157       setToBogus();
 158       return *this;
 159     }
 160   }
 161 #endif
 162
 163   // Case-map, and if the result is too long, then reallocate and repeat.
 164   do {
 165     errorCode = U_ZERO_ERROR;
 166     if(toWhichCase==TO_LOWER) {
 167       fLength = ustr_toLower(csp, fArray, fCapacity,
 168                              oldArray, oldLength,
 169                              locale, &errorCode);
 170     } else if(toWhichCase==TO_UPPER) {
 171       fLength = ustr_toUpper(csp, fArray, fCapacity,
 172                              oldArray, oldLength,
 173                              locale, &errorCode);
 174     } else if(toWhichCase==TO_TITLE) {
 175 #if UCONFIG_NO_BREAK_ITERATION
 176         errorCode=U_UNSUPPORTED_ERROR;
 177 #else
 178       fLength = ustr_toTitle(csp, fArray, fCapacity,
 179                              oldArray, oldLength,
 180                              cTitleIter, locale, &errorCode);
 181 #endif
 182     } else {
 183       fLength = ustr_foldCase(csp, fArray, fCapacity,
 184                               oldArray, oldLength,
 185                               options,
 186                               &errorCode);
 187     }
 188   } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(fLength, fLength, FALSE));
 189
 190 #if !UCONFIG_NO_BREAK_ITERATION
 191   if(cTitleIter != 0 && titleIter == 0) {
 192     ubrk_close(cTitleIter);
 193   }
 194 #endif
 195
 196   if (bufferToDelete) {
 197     uprv_free(bufferToDelete);
 198   }
 199   if(U_FAILURE(errorCode)) {
 200     setToBogus();
 201   }
 202   return *this;
 203 }
 204
 205 UnicodeString &
 206 UnicodeString::toLower() {
 207   return caseMap(0, Locale::getDefault().getName(), 0, TO_LOWER);
 208 }
 209
 210 UnicodeString &
 211 UnicodeString::toLower(const Locale &locale) {
 212   return caseMap(0, locale.getName(), 0, TO_LOWER);
 213 }
 214
 215 UnicodeString &
 216 UnicodeString::toUpper() {
 217   return caseMap(0, Locale::getDefault().getName(), 0, TO_UPPER);
 218 }
 219
 220 UnicodeString &
 221 UnicodeString::toUpper(const Locale &locale) {
 222   return caseMap(0, locale.getName(), 0, TO_UPPER);
 223 }
 224
 225 #if !UCONFIG_NO_BREAK_ITERATION
 226
 227 UnicodeString &
 228 UnicodeString::toTitle(BreakIterator *titleIter) {
 229   return caseMap(titleIter, Locale::getDefault().getName(), 0, TO_TITLE);
 230 }
 231
 232 UnicodeString &
 233 UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) {
 234   return caseMap(titleIter, locale.getName(), 0, TO_TITLE);
 235 }
 236
 237 #endif
 238
 239 UnicodeString &
 240 UnicodeString::foldCase(uint32_t options) {
 241     /* The Locale parameter isn't used. Use "" instead. */
 242     return caseMap(0, "", options, FOLD_CASE);
 243 }
 244
 245 U_NAMESPACE_END
 246
 247 // Defined here to reduce dependencies on break iterator
 248 U_CAPI int32_t U_EXPORT2
 249 uhash_hashCaselessUnicodeString(const UHashTok key) {
 250     U_NAMESPACE_USE
 251     const UnicodeString *str = (const UnicodeString*) key.pointer;
 252     if (str == NULL) {
 253         return 0;
 254     }
 255     // Inefficient; a better way would be to have a hash function in
 256     // UnicodeString that does case folding on the fly.
 257     UnicodeString copy(*str);
 258     return copy.foldCase().hashCode();
 259 }
 260
 261 // Defined here to reduce dependencies on break iterator
 262 U_CAPI UBool U_EXPORT2
 263 uhash_compareCaselessUnicodeString(const UHashTok key1, const UHashTok key2) {
 264     U_NAMESPACE_USE
 265     const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
 266     const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
 267     if (str1 == str2) {
 268         return TRUE;
 269     }
 270     if (str1 == NULL || str2 == NULL) {
 271         return FALSE;
 272     }
 273     return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0;
 274 }
 275