icuSources/common/cstring.c

   1 /*
   2 ******************************************************************************
   3 *
   4 *   Copyright (C) 1997-2003, International Business Machines
   5 *   Corporation and others.  All Rights Reserved.
   6 *
   7 ******************************************************************************
   8 *
   9 * File CSTRING.C
  10 *
  11 * @author       Helena Shih
  12 *
  13 * Modification History:
  14 *
  15 *   Date        Name        Description
  16 *   6/18/98     hshih       Created
  17 *   09/08/98    stephen     Added include for ctype, for Mac Port
  18 *   11/15/99    helena      Integrated S/390 IEEE changes.
  19 ******************************************************************************
  20 */
  21
  22
  23
  24 #include <stdlib.h>
  25 #include <stdio.h>
  26 #include "unicode/utypes.h"
  27 #include "cmemory.h"
  28 #include "cstring.h"
  29 #include "uassert.h"
  30
  31 /*
  32  * We hardcode case conversion for invariant characters to match our expectation
  33  * and the compiler execution charset.
  34  * This prevents problems on systems
  35  * - with non-default casing behavior, like Turkish system locales where
  36  *   tolower('I') maps to dotless i and toupper('i') maps to dotted I
  37  * - where there are no lowercase Latin characters at all, or using different
  38  *   codes (some old EBCDIC codepages)
  39  *
  40  * This works because the compiler usually runs on a platform where the execution
  41  * charset includes all of the invariant characters at their expected
  42  * code positions, so that the char * string literals in ICU code match
  43  * the char literals here.
  44  *
  45  * Note that the set of lowercase Latin letters is discontiguous in EBCDIC
  46  * and the set of uppercase Latin letters is discontiguous as well.
  47  */
  48
  49 U_CAPI char U_EXPORT2
  50 uprv_toupper(char c) {
  51 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
  52     if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
  53         c=(char)(c+('A'-'a'));
  54     }
  55 #else
  56     if('a'<=c && c<='z') {
  57         c=(char)(c+('A'-'a'));
  58     }
  59 #endif
  60     return c;
  61 }
  62
  63
  64 #if 0
  65 /*
  66  * Commented out because cstring.h defines uprv_tolower() to be
  67  * the same as either uprv_asciitolower() or uprv_ebcdictolower()
  68  * to reduce the amount of code to cover with tests.
  69  *
  70  * Note that this uprv_tolower() definition is likely to work for most
  71  * charset families, not just ASCII and EBCDIC, because its #else branch
  72  * is written generically.
  73  */
  74 U_CAPI char U_EXPORT2
  75 uprv_tolower(char c) {
  76 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
  77     if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
  78         c=(char)(c+('a'-'A'));
  79     }
  80 #else
  81     if('A'<=c && c<='Z') {
  82         c=(char)(c+('a'-'A'));
  83     }
  84 #endif
  85     return c;
  86 }
  87 #endif
  88
  89 U_CAPI char U_EXPORT2
  90 uprv_asciitolower(char c) {
  91     if(0x41<=c && c<=0x5a) {
  92         c=(char)(c+0x20);
  93     }
  94     return c;
  95 }
  96
  97 U_CAPI char U_EXPORT2
  98 uprv_ebcdictolower(char c) {
  99     if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
 100         (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
 101         (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
 102     ) {
 103         c=(char)(c-0x40);
 104     }
 105     return c;
 106 }
 107
 108
 109 U_CAPI char* U_EXPORT2
 110 T_CString_toLowerCase(char* str)
 111 {
 112     char* origPtr = str;
 113
 114     if (str) {
 115         do
 116             *str = (char)uprv_tolower(*str);
 117         while (*(str++));
 118     }
 119
 120     return origPtr;
 121 }
 122
 123 U_CAPI char* U_EXPORT2
 124 T_CString_toUpperCase(char* str)
 125 {
 126     char* origPtr = str;
 127
 128     if (str) {
 129         do
 130             *str = (char)uprv_toupper(*str);
 131         while (*(str++));
 132     }
 133
 134     return origPtr;
 135 }
 136
 137 /*
 138  * Takes a int32_t and fills in  a char* string with that number "radix"-based.
 139  * Does not handle negative values (makes an empty string for them).
 140  * Writes at most 12 chars ("-2147483647" plus NUL).
 141  * Returns the length of the string (not including the NUL).
 142  */
 143 U_CAPI int32_t U_EXPORT2
 144 T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
 145 {
 146     char      tbuf[30];
 147     int32_t   tbx    = sizeof(tbuf);
 148     uint8_t   digit;
 149     int32_t   length = 0;
 150     uint32_t  uval;
 151
 152     U_ASSERT(radix>=2 && radix<=16);
 153     uval = (uint32_t) v;
 154     if(v<0 && radix == 10) {
 155         /* Only in base 10 do we conside numbers to be signed. */
 156         uval = (uint32_t)(-v);
 157         buffer[length++] = '-';
 158     }
 159
 160     tbx = sizeof(tbuf)-1;
 161     tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
 162     do {
 163         digit = (uint8_t)(uval % radix);
 164         tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
 165         uval  = uval / radix;
 166     } while (uval != 0);
 167
 168     /* copy converted number into user buffer  */
 169     uprv_strcpy(buffer+length, tbuf+tbx);
 170     length += sizeof(tbuf) - tbx -1;
 171     return length;
 172 }
 173
 174
 175
 176 /*
 177  * Takes a int64_t and fills in  a char* string with that number "radix"-based.
 178  * Writes at most 21: chars ("-9223372036854775807" plus NUL).
 179  * Returns the length of the string, not including the terminating NULL.
 180  */
 181 U_CAPI int32_t U_EXPORT2
 182 T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
 183 {
 184     char      tbuf[30];
 185     int32_t   tbx    = sizeof(tbuf);
 186     uint8_t   digit;
 187     int32_t   length = 0;
 188     uint64_t  uval;
 189
 190     U_ASSERT(radix>=2 && radix<=16);
 191     uval = (uint64_t) v;
 192     if(v<0 && radix == 10) {
 193         /* Only in base 10 do we conside numbers to be signed. */
 194         uval = (uint64_t)(-v);
 195         buffer[length++] = '-';
 196     }
 197
 198     tbx = sizeof(tbuf)-1;
 199     tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
 200     do {
 201         digit = (uint8_t)(uval % radix);
 202         tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
 203         uval  = uval / radix;
 204     } while (uval != 0);
 205
 206     /* copy converted number into user buffer  */
 207     uprv_strcpy(buffer+length, tbuf+tbx);
 208     length += sizeof(tbuf) - tbx -1;
 209     return length;
 210 }
 211
 212
 213 U_CAPI int32_t U_EXPORT2
 214 T_CString_stringToInteger(const char *integerString, int32_t radix)
 215 {
 216     char *end;
 217     return uprv_strtoul(integerString, &end, radix);
 218
 219 }
 220
 221 U_CAPI int U_EXPORT2
 222 T_CString_stricmp(const char *str1, const char *str2) {
 223     if(str1==NULL) {
 224         if(str2==NULL) {
 225             return 0;
 226         } else {
 227             return -1;
 228         }
 229     } else if(str2==NULL) {
 230         return 1;
 231     } else {
 232         /* compare non-NULL strings lexically with lowercase */
 233         int rc;
 234         unsigned char c1, c2;
 235
 236         for(;;) {
 237             c1=(unsigned char)*str1;
 238             c2=(unsigned char)*str2;
 239             if(c1==0) {
 240                 if(c2==0) {
 241                     return 0;
 242                 } else {
 243                     return -1;
 244                 }
 245             } else if(c2==0) {
 246                 return 1;
 247             } else {
 248                 /* compare non-zero characters with lowercase */
 249                 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
 250                 if(rc!=0) {
 251                     return rc;
 252                 }
 253             }
 254             ++str1;
 255             ++str2;
 256         }
 257     }
 258 }
 259
 260 U_CAPI int U_EXPORT2
 261 T_CString_strnicmp(const char *str1, const char *str2, uint32_t n) {
 262     if(str1==NULL) {
 263         if(str2==NULL) {
 264             return 0;
 265         } else {
 266             return -1;
 267         }
 268     } else if(str2==NULL) {
 269         return 1;
 270     } else {
 271         /* compare non-NULL strings lexically with lowercase */
 272         int rc;
 273         unsigned char c1, c2;
 274
 275         for(; n--;) {
 276             c1=(unsigned char)*str1;
 277             c2=(unsigned char)*str2;
 278             if(c1==0) {
 279                 if(c2==0) {
 280                     return 0;
 281                 } else {
 282                     return -1;
 283                 }
 284             } else if(c2==0) {
 285                 return 1;
 286             } else {
 287                 /* compare non-zero characters with lowercase */
 288                 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
 289                 if(rc!=0) {
 290                     return rc;
 291                 }
 292             }
 293             ++str1;
 294             ++str2;
 295         }
 296     }
 297
 298     return 0;
 299 }
 300
 301 U_CAPI char* U_EXPORT2
 302 uprv_strdup(const char *src) {
 303     size_t len = uprv_strlen(src) + 1;
 304     char *dup = (char *) uprv_malloc(len);
 305
 306     if (dup) {
 307         uprv_memcpy(dup, src, len);
 308     }
 309
 310     return dup;
 311 }
 312
 313 U_CAPI char* U_EXPORT2
 314 uprv_strndup(const char *src, int32_t n) {
 315     char *dup;
 316
 317     if(n < 0) {
 318         dup = uprv_strdup(src);
 319     } else {
 320         dup = (char*)uprv_malloc(n+1);
 321         if (dup) {
 322             uprv_memcpy(dup, src, n);
 323             dup[n] = 0;
 324         }
 325     }
 326
 327     return dup;
 328 }