icuSources/common/cstring.c

   1 /*
   2 ******************************************************************************
   3 *
   4 *   Copyright (C) 1997-2011, International Business Machines
   5 *   Corporation and others.  All Rights Reserved.
   6 *
   7 ******************************************************************************
   8 *
   9 * File CSTRING.C
  10 *
  11 * @author       Helena Shih
  12 *
  13 * Modification History:
  14 *
  15 *   Date        Name        Description
  16 *   6/18/98     hshih       Created
  17 *   09/08/98    stephen     Added include for ctype, for Mac Port
  18 *   11/15/99    helena      Integrated S/390 IEEE changes.
  19 ******************************************************************************
  20 */
  21
  22
  23
  24 #include <stdlib.h>
  25 #include <stdio.h>
  26 #include "unicode/utypes.h"
  27 #include "cmemory.h"
  28 #include "cstring.h"
  29 #include "uassert.h"
  30
  31 /*
  32  * We hardcode case conversion for invariant characters to match our expectation
  33  * and the compiler execution charset.
  34  * This prevents problems on systems
  35  * - with non-default casing behavior, like Turkish system locales where
  36  *   tolower('I') maps to dotless i and toupper('i') maps to dotted I
  37  * - where there are no lowercase Latin characters at all, or using different
  38  *   codes (some old EBCDIC codepages)
  39  *
  40  * This works because the compiler usually runs on a platform where the execution
  41  * charset includes all of the invariant characters at their expected
  42  * code positions, so that the char * string literals in ICU code match
  43  * the char literals here.
  44  *
  45  * Note that the set of lowercase Latin letters is discontiguous in EBCDIC
  46  * and the set of uppercase Latin letters is discontiguous as well.
  47  */
  48
  49 U_CAPI UBool U_EXPORT2
  50 uprv_isASCIILetter(char c) {
  51 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
  52     return
  53         ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') ||
  54         ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z');
  55 #else
  56     return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
  57 #endif
  58 }
  59
  60 U_CAPI char U_EXPORT2
  61 uprv_toupper(char c) {
  62 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
  63     if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
  64         c=(char)(c+('A'-'a'));
  65     }
  66 #else
  67     if('a'<=c && c<='z') {
  68         c=(char)(c+('A'-'a'));
  69     }
  70 #endif
  71     return c;
  72 }
  73
  74
  75 #if 0
  76 /*
  77  * Commented out because cstring.h defines uprv_tolower() to be
  78  * the same as either uprv_asciitolower() or uprv_ebcdictolower()
  79  * to reduce the amount of code to cover with tests.
  80  *
  81  * Note that this uprv_tolower() definition is likely to work for most
  82  * charset families, not just ASCII and EBCDIC, because its #else branch
  83  * is written generically.
  84  */
  85 U_CAPI char U_EXPORT2
  86 uprv_tolower(char c) {
  87 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
  88     if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
  89         c=(char)(c+('a'-'A'));
  90     }
  91 #else
  92     if('A'<=c && c<='Z') {
  93         c=(char)(c+('a'-'A'));
  94     }
  95 #endif
  96     return c;
  97 }
  98 #endif
  99
 100 U_CAPI char U_EXPORT2
 101 uprv_asciitolower(char c) {
 102     if(0x41<=c && c<=0x5a) {
 103         c=(char)(c+0x20);
 104     }
 105     return c;
 106 }
 107
 108 U_CAPI char U_EXPORT2
 109 uprv_ebcdictolower(char c) {
 110     if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
 111         (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
 112         (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
 113     ) {
 114         c=(char)(c-0x40);
 115     }
 116     return c;
 117 }
 118
 119
 120 U_CAPI char* U_EXPORT2
 121 T_CString_toLowerCase(char* str)
 122 {
 123     char* origPtr = str;
 124
 125     if (str) {
 126         do
 127             *str = (char)uprv_tolower(*str);
 128         while (*(str++));
 129     }
 130
 131     return origPtr;
 132 }
 133
 134 U_CAPI char* U_EXPORT2
 135 T_CString_toUpperCase(char* str)
 136 {
 137     char* origPtr = str;
 138
 139     if (str) {
 140         do
 141             *str = (char)uprv_toupper(*str);
 142         while (*(str++));
 143     }
 144
 145     return origPtr;
 146 }
 147
 148 /*
 149  * Takes a int32_t and fills in  a char* string with that number "radix"-based.
 150  * Does not handle negative values (makes an empty string for them).
 151  * Writes at most 12 chars ("-2147483647" plus NUL).
 152  * Returns the length of the string (not including the NUL).
 153  */
 154 U_CAPI int32_t U_EXPORT2
 155 T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
 156 {
 157     char      tbuf[30];
 158     int32_t   tbx    = sizeof(tbuf);
 159     uint8_t   digit;
 160     int32_t   length = 0;
 161     uint32_t  uval;
 162
 163     U_ASSERT(radix>=2 && radix<=16);
 164     uval = (uint32_t) v;
 165     if(v<0 && radix == 10) {
 166         /* Only in base 10 do we conside numbers to be signed. */
 167         uval = (uint32_t)(-v);
 168         buffer[length++] = '-';
 169     }
 170
 171     tbx = sizeof(tbuf)-1;
 172     tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
 173     do {
 174         digit = (uint8_t)(uval % radix);
 175         tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
 176         uval  = uval / radix;
 177     } while (uval != 0);
 178
 179     /* copy converted number into user buffer  */
 180     uprv_strcpy(buffer+length, tbuf+tbx);
 181     length += sizeof(tbuf) - tbx -1;
 182     return length;
 183 }
 184
 185
 186
 187 /*
 188  * Takes a int64_t and fills in  a char* string with that number "radix"-based.
 189  * Writes at most 21: chars ("-9223372036854775807" plus NUL).
 190  * Returns the length of the string, not including the terminating NULL.
 191  */
 192 U_CAPI int32_t U_EXPORT2
 193 T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
 194 {
 195     char      tbuf[30];
 196     int32_t   tbx    = sizeof(tbuf);
 197     uint8_t   digit;
 198     int32_t   length = 0;
 199     uint64_t  uval;
 200
 201     U_ASSERT(radix>=2 && radix<=16);
 202     uval = (uint64_t) v;
 203     if(v<0 && radix == 10) {
 204         /* Only in base 10 do we conside numbers to be signed. */
 205         uval = (uint64_t)(-v);
 206         buffer[length++] = '-';
 207     }
 208
 209     tbx = sizeof(tbuf)-1;
 210     tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
 211     do {
 212         digit = (uint8_t)(uval % radix);
 213         tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
 214         uval  = uval / radix;
 215     } while (uval != 0);
 216
 217     /* copy converted number into user buffer  */
 218     uprv_strcpy(buffer+length, tbuf+tbx);
 219     length += sizeof(tbuf) - tbx -1;
 220     return length;
 221 }
 222
 223
 224 U_CAPI int32_t U_EXPORT2
 225 T_CString_stringToInteger(const char *integerString, int32_t radix)
 226 {
 227     char *end;
 228     return uprv_strtoul(integerString, &end, radix);
 229
 230 }
 231
 232 U_CAPI int U_EXPORT2
 233 uprv_stricmp(const char *str1, const char *str2) {
 234     if(str1==NULL) {
 235         if(str2==NULL) {
 236             return 0;
 237         } else {
 238             return -1;
 239         }
 240     } else if(str2==NULL) {
 241         return 1;
 242     } else {
 243         /* compare non-NULL strings lexically with lowercase */
 244         int rc;
 245         unsigned char c1, c2;
 246
 247         for(;;) {
 248             c1=(unsigned char)*str1;
 249             c2=(unsigned char)*str2;
 250             if(c1==0) {
 251                 if(c2==0) {
 252                     return 0;
 253                 } else {
 254                     return -1;
 255                 }
 256             } else if(c2==0) {
 257                 return 1;
 258             } else {
 259                 /* compare non-zero characters with lowercase */
 260                 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
 261                 if(rc!=0) {
 262                     return rc;
 263                 }
 264             }
 265             ++str1;
 266             ++str2;
 267         }
 268     }
 269 }
 270
 271 U_CAPI int U_EXPORT2
 272 uprv_strnicmp(const char *str1, const char *str2, uint32_t n) {
 273     if(str1==NULL) {
 274         if(str2==NULL) {
 275             return 0;
 276         } else {
 277             return -1;
 278         }
 279     } else if(str2==NULL) {
 280         return 1;
 281     } else {
 282         /* compare non-NULL strings lexically with lowercase */
 283         int rc;
 284         unsigned char c1, c2;
 285
 286         for(; n--;) {
 287             c1=(unsigned char)*str1;
 288             c2=(unsigned char)*str2;
 289             if(c1==0) {
 290                 if(c2==0) {
 291                     return 0;
 292                 } else {
 293                     return -1;
 294                 }
 295             } else if(c2==0) {
 296                 return 1;
 297             } else {
 298                 /* compare non-zero characters with lowercase */
 299                 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
 300                 if(rc!=0) {
 301                     return rc;
 302                 }
 303             }
 304             ++str1;
 305             ++str2;
 306         }
 307     }
 308
 309     return 0;
 310 }
 311
 312 U_CAPI char* U_EXPORT2
 313 uprv_strdup(const char *src) {
 314     size_t len = uprv_strlen(src) + 1;
 315     char *dup = (char *) uprv_malloc(len);
 316
 317     if (dup) {
 318         uprv_memcpy(dup, src, len);
 319     }
 320
 321     return dup;
 322 }
 323
 324 U_CAPI char* U_EXPORT2
 325 uprv_strndup(const char *src, int32_t n) {
 326     char *dup;
 327
 328     if(n < 0) {
 329         dup = uprv_strdup(src);
 330     } else {
 331         dup = (char*)uprv_malloc(n+1);
 332         if (dup) {
 333             uprv_memcpy(dup, src, n);
 334             dup[n] = 0;
 335         }
 336     }
 337
 338     return dup;
 339 }