icuSources/common/cstring.cpp

   1 // © 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /*
   4 ******************************************************************************
   5 *
   6 *   Copyright (C) 1997-2011, International Business Machines
   7 *   Corporation and others.  All Rights Reserved.
   8 *
   9 ******************************************************************************
  10 *
  11 * File CSTRING.C
  12 *
  13 * @author       Helena Shih
  14 *
  15 * Modification History:
  16 *
  17 *   Date        Name        Description
  18 *   6/18/98     hshih       Created
  19 *   09/08/98    stephen     Added include for ctype, for Mac Port
  20 *   11/15/99    helena      Integrated S/390 IEEE changes.
  21 ******************************************************************************
  22 */
  23
  24
  25
  26 #include <stdlib.h>
  27 #include <stdio.h>
  28 #include "unicode/utypes.h"
  29 #include "cmemory.h"
  30 #include "cstring.h"
  31 #include "uassert.h"
  32
  33 /*
  34  * We hardcode case conversion for invariant characters to match our expectation
  35  * and the compiler execution charset.
  36  * This prevents problems on systems
  37  * - with non-default casing behavior, like Turkish system locales where
  38  *   tolower('I') maps to dotless i and toupper('i') maps to dotted I
  39  * - where there are no lowercase Latin characters at all, or using different
  40  *   codes (some old EBCDIC codepages)
  41  *
  42  * This works because the compiler usually runs on a platform where the execution
  43  * charset includes all of the invariant characters at their expected
  44  * code positions, so that the char * string literals in ICU code match
  45  * the char literals here.
  46  *
  47  * Note that the set of lowercase Latin letters is discontiguous in EBCDIC
  48  * and the set of uppercase Latin letters is discontiguous as well.
  49  */
  50
  51 U_CAPI UBool U_EXPORT2
  52 uprv_isASCIILetter(char c) {
  53 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
  54     return
  55         ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') ||
  56         ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z');
  57 #else
  58     return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
  59 #endif
  60 }
  61
  62 U_CAPI char U_EXPORT2
  63 uprv_toupper(char c) {
  64 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
  65     if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
  66         c=(char)(c+('A'-'a'));
  67     }
  68 #else
  69     if('a'<=c && c<='z') {
  70         c=(char)(c+('A'-'a'));
  71     }
  72 #endif
  73     return c;
  74 }
  75
  76
  77 #if 0
  78 /*
  79  * Commented out because cstring.h defines uprv_tolower() to be
  80  * the same as either uprv_asciitolower() or uprv_ebcdictolower()
  81  * to reduce the amount of code to cover with tests.
  82  *
  83  * Note that this uprv_tolower() definition is likely to work for most
  84  * charset families, not just ASCII and EBCDIC, because its #else branch
  85  * is written generically.
  86  */
  87 U_CAPI char U_EXPORT2
  88 uprv_tolower(char c) {
  89 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
  90     if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
  91         c=(char)(c+('a'-'A'));
  92     }
  93 #else
  94     if('A'<=c && c<='Z') {
  95         c=(char)(c+('a'-'A'));
  96     }
  97 #endif
  98     return c;
  99 }
 100 #endif
 101
 102 U_CAPI char U_EXPORT2
 103 uprv_asciitolower(char c) {
 104     if(0x41<=c && c<=0x5a) {
 105         c=(char)(c+0x20);
 106     }
 107     return c;
 108 }
 109
 110 U_CAPI char U_EXPORT2
 111 uprv_ebcdictolower(char c) {
 112     if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
 113         (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
 114         (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
 115     ) {
 116         c=(char)(c-0x40);
 117     }
 118     return c;
 119 }
 120
 121
 122 U_CAPI char* U_EXPORT2
 123 T_CString_toLowerCase(char* str)
 124 {
 125     char* origPtr = str;
 126
 127     if (str) {
 128         do
 129             *str = (char)uprv_tolower(*str);
 130         while (*(str++));
 131     }
 132
 133     return origPtr;
 134 }
 135
 136 U_CAPI char* U_EXPORT2
 137 T_CString_toUpperCase(char* str)
 138 {
 139     char* origPtr = str;
 140
 141     if (str) {
 142         do
 143             *str = (char)uprv_toupper(*str);
 144         while (*(str++));
 145     }
 146
 147     return origPtr;
 148 }
 149
 150 /*
 151  * Takes a int32_t and fills in  a char* string with that number "radix"-based.
 152  * Does not handle negative values (makes an empty string for them).
 153  * Writes at most 12 chars ("-2147483647" plus NUL).
 154  * Returns the length of the string (not including the NUL).
 155  */
 156 U_CAPI int32_t U_EXPORT2
 157 T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
 158 {
 159     char      tbuf[30];
 160     int32_t   tbx    = sizeof(tbuf);
 161     uint8_t   digit;
 162     int32_t   length = 0;
 163     uint32_t  uval;
 164
 165     U_ASSERT(radix>=2 && radix<=16);
 166     uval = (uint32_t) v;
 167     if(v<0 && radix == 10) {
 168         /* Only in base 10 do we conside numbers to be signed. */
 169         uval = (uint32_t)(-v);
 170         buffer[length++] = '-';
 171     }
 172
 173     tbx = sizeof(tbuf)-1;
 174     tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
 175     do {
 176         digit = (uint8_t)(uval % radix);
 177         tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
 178         uval  = uval / radix;
 179     } while (uval != 0);
 180
 181     /* copy converted number into user buffer  */
 182     uprv_strcpy(buffer+length, tbuf+tbx);
 183     length += sizeof(tbuf) - tbx -1;
 184     return length;
 185 }
 186
 187
 188
 189 /*
 190  * Takes a int64_t and fills in  a char* string with that number "radix"-based.
 191  * Writes at most 21: chars ("-9223372036854775807" plus NUL).
 192  * Returns the length of the string, not including the terminating NULL.
 193  */
 194 U_CAPI int32_t U_EXPORT2
 195 T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
 196 {
 197     char      tbuf[30];
 198     int32_t   tbx    = sizeof(tbuf);
 199     uint8_t   digit;
 200     int32_t   length = 0;
 201     uint64_t  uval;
 202
 203     U_ASSERT(radix>=2 && radix<=16);
 204     uval = (uint64_t) v;
 205     if(v<0 && radix == 10) {
 206         /* Only in base 10 do we conside numbers to be signed. */
 207         uval = (uint64_t)(-v);
 208         buffer[length++] = '-';
 209     }
 210
 211     tbx = sizeof(tbuf)-1;
 212     tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
 213     do {
 214         digit = (uint8_t)(uval % radix);
 215         tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
 216         uval  = uval / radix;
 217     } while (uval != 0);
 218
 219     /* copy converted number into user buffer  */
 220     uprv_strcpy(buffer+length, tbuf+tbx);
 221     length += sizeof(tbuf) - tbx -1;
 222     return length;
 223 }
 224
 225
 226 U_CAPI int32_t U_EXPORT2
 227 T_CString_stringToInteger(const char *integerString, int32_t radix)
 228 {
 229     char *end;
 230     return uprv_strtoul(integerString, &end, radix);
 231
 232 }
 233
 234 U_CAPI int U_EXPORT2
 235 uprv_stricmp(const char *str1, const char *str2) {
 236     if(str1==NULL) {
 237         if(str2==NULL) {
 238             return 0;
 239         } else {
 240             return -1;
 241         }
 242     } else if(str2==NULL) {
 243         return 1;
 244     } else {
 245         /* compare non-NULL strings lexically with lowercase */
 246         int rc;
 247         unsigned char c1, c2;
 248
 249         for(;;) {
 250             c1=(unsigned char)*str1;
 251             c2=(unsigned char)*str2;
 252             if(c1==0) {
 253                 if(c2==0) {
 254                     return 0;
 255                 } else {
 256                     return -1;
 257                 }
 258             } else if(c2==0) {
 259                 return 1;
 260             } else {
 261                 /* compare non-zero characters with lowercase */
 262                 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
 263                 if(rc!=0) {
 264                     return rc;
 265                 }
 266             }
 267             ++str1;
 268             ++str2;
 269         }
 270     }
 271 }
 272
 273 U_CAPI int U_EXPORT2
 274 uprv_strnicmp(const char *str1, const char *str2, uint32_t n) {
 275     if(str1==NULL) {
 276         if(str2==NULL) {
 277             return 0;
 278         } else {
 279             return -1;
 280         }
 281     } else if(str2==NULL) {
 282         return 1;
 283     } else {
 284         /* compare non-NULL strings lexically with lowercase */
 285         int rc;
 286         unsigned char c1, c2;
 287
 288         for(; n--;) {
 289             c1=(unsigned char)*str1;
 290             c2=(unsigned char)*str2;
 291             if(c1==0) {
 292                 if(c2==0) {
 293                     return 0;
 294                 } else {
 295                     return -1;
 296                 }
 297             } else if(c2==0) {
 298                 return 1;
 299             } else {
 300                 /* compare non-zero characters with lowercase */
 301                 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
 302                 if(rc!=0) {
 303                     return rc;
 304                 }
 305             }
 306             ++str1;
 307             ++str2;
 308         }
 309     }
 310
 311     return 0;
 312 }
 313
 314 U_CAPI char* U_EXPORT2
 315 uprv_strdup(const char *src) {
 316     size_t len = uprv_strlen(src) + 1;
 317     char *dup = (char *) uprv_malloc(len);
 318
 319     if (dup) {
 320         uprv_memcpy(dup, src, len);
 321     }
 322
 323     return dup;
 324 }
 325
 326 U_CAPI char* U_EXPORT2
 327 uprv_strndup(const char *src, int32_t n) {
 328     char *dup;
 329
 330     if(n < 0) {
 331         dup = uprv_strdup(src);
 332     } else {
 333         dup = (char*)uprv_malloc(n+1);
 334         if (dup) {
 335             uprv_memcpy(dup, src, n);
 336             dup[n] = 0;
 337         }
 338     }
 339
 340     return dup;
 341 }