icuSources/common/unistr_case.cpp

   1 /*
   2 *******************************************************************************
   3 *
   4 *   Copyright (C) 1999-2010, International Business Machines
   5 *   Corporation and others.  All Rights Reserved.
   6 *
   7 *******************************************************************************
   8 *   file name:  unistr_case.cpp
   9 *   encoding:   US-ASCII
  10 *   tab size:   8 (not used)
  11 *   indentation:2
  12 *
  13 *   created on: 2004aug19
  14 *   created by: Markus W. Scherer
  15 *
  16 *   Case-mapping functions moved here from unistr.cpp
  17 */
  18
  19 #include "unicode/utypes.h"
  20 #include "unicode/putil.h"
  21 #include "unicode/locid.h"
  22 #include "cstring.h"
  23 #include "cmemory.h"
  24 #include "unicode/ustring.h"
  25 #include "unicode/unistr.h"
  26 #include "unicode/uchar.h"
  27 #include "unicode/ubrk.h"
  28 #include "ustr_imp.h"
  29 #include "uhash.h"
  30
  31 U_NAMESPACE_BEGIN
  32
  33 //========================================
  34 // Read-only implementation
  35 //========================================
  36
  37 int8_t
  38 UnicodeString::doCaseCompare(int32_t start,
  39                              int32_t length,
  40                              const UChar *srcChars,
  41                              int32_t srcStart,
  42                              int32_t srcLength,
  43                              uint32_t options) const
  44 {
  45   // compare illegal string values
  46   // treat const UChar *srcChars==NULL as an empty string
  47   if(isBogus()) {
  48     return -1;
  49   }
  50
  51   // pin indices to legal values
  52   pinIndices(start, length);
  53
  54   if(srcChars == NULL) {
  55     srcStart = srcLength = 0;
  56   }
  57
  58   // get the correct pointer
  59   const UChar *chars = getArrayStart();
  60
  61   chars += start;
  62   srcChars += srcStart;
  63
  64   if(chars != srcChars) {
  65     UErrorCode errorCode=U_ZERO_ERROR;
  66     int32_t result=u_strcmpFold(chars, length, srcChars, srcLength,
  67                                 options|U_COMPARE_IGNORE_CASE, &errorCode);
  68     if(result!=0) {
  69       return (int8_t)(result >> 24 | 1);
  70     }
  71   } else {
  72     // get the srcLength if necessary
  73     if(srcLength < 0) {
  74       srcLength = u_strlen(srcChars + srcStart);
  75     }
  76     if(length != srcLength) {
  77       return (int8_t)((length - srcLength) >> 24 | 1);
  78     }
  79   }
  80   return 0;
  81 }
  82
  83 //========================================
  84 // Write implementation
  85 //========================================
  86
  87 /*
  88  * Implement argument checking and buffer handling
  89  * for string case mapping as a common function.
  90  */
  91
  92 UnicodeString &
  93 UnicodeString::caseMap(BreakIterator *titleIter,
  94                        const char *locale,
  95                        uint32_t options,
  96                        int32_t toWhichCase) {
  97   if(isEmpty() || !isWritable()) {
  98     // nothing to do
  99     return *this;
 100   }
 101
 102   const UCaseProps *csp=ucase_getSingleton();
 103
 104   // We need to allocate a new buffer for the internal string case mapping function.
 105   // This is very similar to how doReplace() keeps the old array pointer
 106   // and deletes the old array itself after it is done.
 107   // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
 108   UChar oldStackBuffer[US_STACKBUF_SIZE];
 109   UChar *oldArray;
 110   int32_t oldLength;
 111
 112   if(fFlags&kUsingStackBuffer) {
 113     // copy the stack buffer contents because it will be overwritten
 114     u_memcpy(oldStackBuffer, fUnion.fStackBuffer, fShortLength);
 115     oldArray = oldStackBuffer;
 116     oldLength = fShortLength;
 117   } else {
 118     oldArray = getArrayStart();
 119     oldLength = length();
 120   }
 121
 122   int32_t capacity;
 123   if(oldLength <= US_STACKBUF_SIZE) {
 124     capacity = US_STACKBUF_SIZE;
 125   } else {
 126     capacity = oldLength + 20;
 127   }
 128   int32_t *bufferToDelete = 0;
 129   if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) {
 130     return *this;
 131   }
 132
 133   // Case-map, and if the result is too long, then reallocate and repeat.
 134   UErrorCode errorCode;
 135   int32_t newLength;
 136   do {
 137     errorCode = U_ZERO_ERROR;
 138     if(toWhichCase==TO_LOWER) {
 139       newLength = ustr_toLower(csp, getArrayStart(), getCapacity(),
 140                                oldArray, oldLength,
 141                                locale, &errorCode);
 142     } else if(toWhichCase==TO_UPPER) {
 143       newLength = ustr_toUpper(csp, getArrayStart(), getCapacity(),
 144                                oldArray, oldLength,
 145                                locale, &errorCode);
 146     } else if(toWhichCase==TO_TITLE) {
 147 #if UCONFIG_NO_BREAK_ITERATION
 148         errorCode=U_UNSUPPORTED_ERROR;
 149 #else
 150       newLength = ustr_toTitle(csp, getArrayStart(), getCapacity(),
 151                                oldArray, oldLength,
 152                                (UBreakIterator *)titleIter, locale, options, &errorCode);
 153 #endif
 154     } else {
 155       newLength = ustr_foldCase(csp, getArrayStart(), getCapacity(),
 156                                 oldArray, oldLength,
 157                                 options,
 158                                 &errorCode);
 159     }
 160     setLength(newLength);
 161   } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE));
 162
 163   if (bufferToDelete) {
 164     uprv_free(bufferToDelete);
 165   }
 166   if(U_FAILURE(errorCode)) {
 167     setToBogus();
 168   }
 169   return *this;
 170 }
 171
 172 UnicodeString &
 173 UnicodeString::toLower() {
 174   return caseMap(0, Locale::getDefault().getName(), 0, TO_LOWER);
 175 }
 176
 177 UnicodeString &
 178 UnicodeString::toLower(const Locale &locale) {
 179   return caseMap(0, locale.getName(), 0, TO_LOWER);
 180 }
 181
 182 UnicodeString &
 183 UnicodeString::toUpper() {
 184   return caseMap(0, Locale::getDefault().getName(), 0, TO_UPPER);
 185 }
 186
 187 UnicodeString &
 188 UnicodeString::toUpper(const Locale &locale) {
 189   return caseMap(0, locale.getName(), 0, TO_UPPER);
 190 }
 191
 192 #if !UCONFIG_NO_BREAK_ITERATION
 193
 194 UnicodeString &
 195 UnicodeString::toTitle(BreakIterator *titleIter) {
 196   return caseMap(titleIter, Locale::getDefault().getName(), 0, TO_TITLE);
 197 }
 198
 199 UnicodeString &
 200 UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) {
 201   return caseMap(titleIter, locale.getName(), 0, TO_TITLE);
 202 }
 203
 204 UnicodeString &
 205 UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options) {
 206   return caseMap(titleIter, locale.getName(), options, TO_TITLE);
 207 }
 208
 209 #endif
 210
 211 UnicodeString &
 212 UnicodeString::foldCase(uint32_t options) {
 213     /* The Locale parameter isn't used. Use "" instead. */
 214     return caseMap(0, "", options, FOLD_CASE);
 215 }
 216
 217 U_NAMESPACE_END
 218
 219 // Defined here to reduce dependencies on break iterator
 220 U_CAPI int32_t U_EXPORT2
 221 uhash_hashCaselessUnicodeString(const UHashTok key) {
 222     U_NAMESPACE_USE
 223     const UnicodeString *str = (const UnicodeString*) key.pointer;
 224     if (str == NULL) {
 225         return 0;
 226     }
 227     // Inefficient; a better way would be to have a hash function in
 228     // UnicodeString that does case folding on the fly.
 229     UnicodeString copy(*str);
 230     return copy.foldCase().hashCode();
 231 }
 232
 233 // Defined here to reduce dependencies on break iterator
 234 U_CAPI UBool U_EXPORT2
 235 uhash_compareCaselessUnicodeString(const UHashTok key1, const UHashTok key2) {
 236     U_NAMESPACE_USE
 237     const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
 238     const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
 239     if (str1 == str2) {
 240         return TRUE;
 241     }
 242     if (str1 == NULL || str2 == NULL) {
 243         return FALSE;
 244     }
 245     return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0;
 246 }
 247