icuSources/common/putil.c

   1 /*
   2 ******************************************************************************
   3 *
   4 *   Copyright (C) 1997-2007, International Business Machines
   5 *   Corporation and others.  All Rights Reserved.
   6 *
   7 ******************************************************************************
   8 *
   9 *  FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
  10 *
  11 *   Date        Name        Description
  12 *   04/14/97    aliu        Creation.
  13 *   04/24/97    aliu        Added getDefaultDataDirectory() and
  14 *                            getDefaultLocaleID().
  15 *   04/28/97    aliu        Rewritten to assume Unix and apply general methods
  16 *                            for assumed case.  Non-UNIX platforms must be
  17 *                            special-cased.  Rewrote numeric methods dealing
  18 *                            with NaN and Infinity to be platform independent
  19 *                             over all IEEE 754 platforms.
  20 *   05/13/97    aliu        Restored sign of timezone
  21 *                            (semantics are hours West of GMT)
  22 *   06/16/98    erm         Added IEEE_754 stuff, cleaned up isInfinite, isNan,
  23 *                             nextDouble..
  24 *   07/22/98    stephen     Added remainder, max, min, trunc
  25 *   08/13/98    stephen     Added isNegativeInfinity, isPositiveInfinity
  26 *   08/24/98    stephen     Added longBitsFromDouble
  27 *   09/08/98    stephen     Minor changes for Mac Port
  28 *   03/02/99    stephen     Removed openFile().  Added AS400 support.
  29 *                            Fixed EBCDIC tables
  30 *   04/15/99    stephen     Converted to C.
  31 *   06/28/99    stephen     Removed mutex locking in u_isBigEndian().
  32 *   08/04/99    jeffrey R.  Added OS/2 changes
  33 *   11/15/99    helena      Integrated S/390 IEEE support.
  34 *   04/26/01    Barry N.    OS/400 support for uprv_getDefaultLocaleID
  35 *   08/15/01    Steven H.   OS/400 support for uprv_getDefaultCodepage
  36 ******************************************************************************
  37 */
  38
  39 /* Define _XOPEN_SOURCE for Solaris and friends. */
  40 /* NetBSD needs it to be >= 4 */
  41 #ifndef _XOPEN_SOURCE
  42 #if __STDC_VERSION__ >= 199901L
  43 /* It is invalid to compile an XPG3, XPG4, XPG4v2 or XPG5 application using c99 */
  44 #define _XOPEN_SOURCE 600
  45 #else
  46 #define _XOPEN_SOURCE 4
  47 #endif
  48 #endif
  49
  50 /* Make sure things like readlink and such functions work. */
  51 #ifndef _XOPEN_SOURCE_EXTENDED
  52 #define _XOPEN_SOURCE_EXTENDED 1
  53 #endif
  54
  55 /* include ICU headers */
  56 #include "unicode/utypes.h"
  57 #include "unicode/putil.h"
  58 #include "unicode/ustring.h"
  59 #include "putilimp.h"
  60 #include "uassert.h"
  61 #include "umutex.h"
  62 #include "cmemory.h"
  63 #include "cstring.h"
  64 #include "locmap.h"
  65 #include "ucln_cmn.h"
  66
  67 /* Include standard headers. */
  68 #include <stdio.h>
  69 #include <stdlib.h>
  70 #include <string.h>
  71 #include <math.h>
  72 #include <locale.h>
  73 #include <float.h>
  74 #include <time.h>
  75
  76 /* include system headers */
  77 #ifdef U_WINDOWS
  78 #   define WIN32_LEAN_AND_MEAN
  79 #   define VC_EXTRALEAN
  80 #   define NOUSER
  81 #   define NOSERVICE
  82 #   define NOIME
  83 #   define NOMCX
  84 #   include <windows.h>
  85 #   include "wintz.h"
  86 #elif defined(U_CYGWIN) && defined(__STRICT_ANSI__)
  87 /* tzset isn't defined in strict ANSI on Cygwin. */
  88 #   undef __STRICT_ANSI__
  89 #elif defined(OS400)
  90 #   include <float.h>
  91 #   include <qusec.h>       /* error code structure */
  92 #   include <qusrjobi.h>
  93 #   include <qliept.h>      /* EPT_CALL macro  - this include must be after all other "QSYSINCs" */
  94 #elif defined(XP_MAC)
  95 #   include <Files.h>
  96 #   include <IntlResources.h>
  97 #   include <Script.h>
  98 #   include <Folders.h>
  99 #   include <MacTypes.h>
 100 #   include <TextUtils.h>
 101 #   define ICU_NO_USER_DATA_OVERRIDE 1
 102 #elif defined(OS390)
 103 #include "unicode/ucnv.h"   /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
 104 #elif defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD)
 105 #include <limits.h>
 106 #include <unistd.h>
 107 #elif defined(U_QNX)
 108 #include <sys/neutrino.h>
 109 #endif
 110
 111 #ifndef U_WINDOWS
 112 #include <sys/time.h>
 113 #endif
 114
 115 /*
 116  * Only include langinfo.h if we have a way to get the codeset. If we later
 117  * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
 118  *
 119  */
 120
 121 #if U_HAVE_NL_LANGINFO_CODESET
 122 #include <langinfo.h>
 123 #endif
 124
 125 /* Define the extension for data files, again... */
 126 #define DATA_TYPE "dat"
 127
 128 /* Leave this copyright notice here! */
 129 static const char copyright[] = U_COPYRIGHT_STRING;
 130
 131 /* floating point implementations ------------------------------------------- */
 132
 133 /* We return QNAN rather than SNAN*/
 134 #define SIGN 0x80000000U
 135
 136 /* Make it easy to define certain types of constants */
 137 typedef union {
 138     int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
 139     double d64;
 140 } BitPatternConversion;
 141 static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
 142 static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
 143
 144 /*---------------------------------------------------------------------------
 145   Platform utilities
 146   Our general strategy is to assume we're on a POSIX platform.  Platforms which
 147   are non-POSIX must declare themselves so.  The default POSIX implementation
 148   will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
 149   functions).
 150   ---------------------------------------------------------------------------*/
 151
 152 #if defined(U_WINDOWS) || defined(XP_MAC) || defined(OS400)
 153 #   undef U_POSIX_LOCALE
 154 #else
 155 #   define U_POSIX_LOCALE    1
 156 #endif
 157
 158 /*
 159     WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
 160     can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
 161 */
 162 #if !IEEE_754
 163 static char*
 164 u_topNBytesOfDouble(double* d, int n)
 165 {
 166 #if U_IS_BIG_ENDIAN
 167     return (char*)d;
 168 #else
 169     return (char*)(d + 1) - n;
 170 #endif
 171 }
 172 #endif
 173
 174 static char*
 175 u_bottomNBytesOfDouble(double* d, int n)
 176 {
 177 #if U_IS_BIG_ENDIAN
 178     return (char*)(d + 1) - n;
 179 #else
 180     return (char*)d;
 181 #endif
 182 }
 183
 184 #if defined(U_WINDOWS)
 185 typedef union {
 186     int64_t int64;
 187     FILETIME fileTime;
 188 } FileTimeConversion;   /* This is like a ULARGE_INTEGER */
 189
 190 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
 191 #define EPOCH_BIAS  INT64_C(116444736000000000)
 192 #define HECTONANOSECOND_PER_MILLISECOND   10000
 193
 194 #endif
 195
 196 /*---------------------------------------------------------------------------
 197   Universal Implementations
 198   These are designed to work on all platforms.  Try these, and if they
 199   don't work on your platform, then special case your platform with new
 200   implementations.
 201 ---------------------------------------------------------------------------*/
 202
 203 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
 204 U_CAPI UDate U_EXPORT2
 205 uprv_getUTCtime()
 206 {
 207 #ifdef XP_MAC
 208     time_t t, t1, t2;
 209     struct tm tmrec;
 210
 211     uprv_memset( &tmrec, 0, sizeof(tmrec) );
 212     tmrec.tm_year = 70;
 213     tmrec.tm_mon = 0;
 214     tmrec.tm_mday = 1;
 215     t1 = mktime(&tmrec);    /* seconds of 1/1/1970*/
 216
 217     time(&t);
 218     uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
 219     t2 = mktime(&tmrec);    /* seconds of current GMT*/
 220     return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND;         /* GMT (or UTC) in seconds since 1970*/
 221 #elif defined(U_WINDOWS)
 222
 223     FileTimeConversion winTime;
 224     GetSystemTimeAsFileTime(&winTime.fileTime);
 225     return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
 226 #else
 227 /*
 228     struct timeval posixTime;
 229     gettimeofday(&posixTime, NULL);
 230     return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
 231 */
 232     time_t epochtime;
 233     time(&epochtime);
 234     return (UDate)epochtime * U_MILLIS_PER_SECOND;
 235 #endif
 236 }
 237
 238 /*-----------------------------------------------------------------------------
 239   IEEE 754
 240   These methods detect and return NaN and infinity values for doubles
 241   conforming to IEEE 754.  Platforms which support this standard include X86,
 242   Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
 243   If this doesn't work on your platform, you have non-IEEE floating-point, and
 244   will need to code your own versions.  A naive implementation is to return 0.0
 245   for getNaN and getInfinity, and false for isNaN and isInfinite.
 246   ---------------------------------------------------------------------------*/
 247
 248 U_CAPI UBool U_EXPORT2
 249 uprv_isNaN(double number)
 250 {
 251 #if IEEE_754
 252     BitPatternConversion convertedNumber;
 253     convertedNumber.d64 = number;
 254     /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
 255     return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
 256
 257 #elif defined(OS390)
 258     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
 259                         sizeof(uint32_t));
 260     uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
 261                         sizeof(uint32_t));
 262
 263     return ((highBits & 0x7F080000L) == 0x7F080000L) &&
 264       (lowBits == 0x00000000L);
 265
 266 #else
 267     /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
 268     /* you'll need to replace this default implementation with what's correct*/
 269     /* for your platform.*/
 270     return number != number;
 271 #endif
 272 }
 273
 274 U_CAPI UBool U_EXPORT2
 275 uprv_isInfinite(double number)
 276 {
 277 #if IEEE_754
 278     BitPatternConversion convertedNumber;
 279     convertedNumber.d64 = number;
 280     /* Infinity is exactly 0x7FF0000000000000U. */
 281     return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
 282 #elif defined(OS390)
 283     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
 284                         sizeof(uint32_t));
 285     uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
 286                         sizeof(uint32_t));
 287
 288     return ((highBits  & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
 289
 290 #else
 291     /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
 292     /* value, you'll need to replace this default implementation with what's*/
 293     /* correct for your platform.*/
 294     return number == (2.0 * number);
 295 #endif
 296 }
 297
 298 U_CAPI UBool U_EXPORT2
 299 uprv_isPositiveInfinity(double number)
 300 {
 301 #if IEEE_754 || defined(OS390)
 302     return (UBool)(number > 0 && uprv_isInfinite(number));
 303 #else
 304     return uprv_isInfinite(number);
 305 #endif
 306 }
 307
 308 U_CAPI UBool U_EXPORT2
 309 uprv_isNegativeInfinity(double number)
 310 {
 311 #if IEEE_754 || defined(OS390)
 312     return (UBool)(number < 0 && uprv_isInfinite(number));
 313
 314 #else
 315     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
 316                         sizeof(uint32_t));
 317     return((highBits & SIGN) && uprv_isInfinite(number));
 318
 319 #endif
 320 }
 321
 322 U_CAPI double U_EXPORT2
 323 uprv_getNaN()
 324 {
 325 #if IEEE_754 || defined(OS390)
 326     return gNan.d64;
 327 #else
 328     /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
 329     /* you'll need to replace this default implementation with what's correct*/
 330     /* for your platform.*/
 331     return 0.0;
 332 #endif
 333 }
 334
 335 U_CAPI double U_EXPORT2
 336 uprv_getInfinity()
 337 {
 338 #if IEEE_754 || defined(OS390)
 339     return gInf.d64;
 340 #else
 341     /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
 342     /* value, you'll need to replace this default implementation with what's*/
 343     /* correct for your platform.*/
 344     return 0.0;
 345 #endif
 346 }
 347
 348 U_CAPI double U_EXPORT2
 349 uprv_floor(double x)
 350 {
 351     return floor(x);
 352 }
 353
 354 U_CAPI double U_EXPORT2
 355 uprv_ceil(double x)
 356 {
 357     return ceil(x);
 358 }
 359
 360 U_CAPI double U_EXPORT2
 361 uprv_round(double x)
 362 {
 363     return uprv_floor(x + 0.5);
 364 }
 365
 366 U_CAPI double U_EXPORT2
 367 uprv_fabs(double x)
 368 {
 369     return fabs(x);
 370 }
 371
 372 U_CAPI double U_EXPORT2
 373 uprv_modf(double x, double* y)
 374 {
 375     return modf(x, y);
 376 }
 377
 378 U_CAPI double U_EXPORT2
 379 uprv_fmod(double x, double y)
 380 {
 381     return fmod(x, y);
 382 }
 383
 384 U_CAPI double U_EXPORT2
 385 uprv_pow(double x, double y)
 386 {
 387     /* This is declared as "double pow(double x, double y)" */
 388     return pow(x, y);
 389 }
 390
 391 U_CAPI double U_EXPORT2
 392 uprv_pow10(int32_t x)
 393 {
 394     return pow(10.0, (double)x);
 395 }
 396
 397 U_CAPI double U_EXPORT2
 398 uprv_fmax(double x, double y)
 399 {
 400 #if IEEE_754
 401     int32_t lowBits;
 402
 403     /* first handle NaN*/
 404     if(uprv_isNaN(x) || uprv_isNaN(y))
 405         return uprv_getNaN();
 406
 407     /* check for -0 and 0*/
 408     lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&x, sizeof(uint32_t));
 409     if(x == 0.0 && y == 0.0 && (lowBits & SIGN))
 410         return y;
 411
 412 #endif
 413
 414     /* this should work for all flt point w/o NaN and Infpecial cases */
 415     return (x > y ? x : y);
 416 }
 417
 418 U_CAPI double U_EXPORT2
 419 uprv_fmin(double x, double y)
 420 {
 421 #if IEEE_754
 422     int32_t lowBits;
 423
 424     /* first handle NaN*/
 425     if(uprv_isNaN(x) || uprv_isNaN(y))
 426         return uprv_getNaN();
 427
 428     /* check for -0 and 0*/
 429     lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&y, sizeof(uint32_t));
 430     if(x == 0.0 && y == 0.0 && (lowBits & SIGN))
 431         return y;
 432
 433 #endif
 434
 435     /* this should work for all flt point w/o NaN and Inf special cases */
 436     return (x > y ? y : x);
 437 }
 438
 439 /**
 440  * Truncates the given double.
 441  * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
 442  * This is different than calling floor() or ceil():
 443  * floor(3.3) = 3, floor(-3.3) = -4
 444  * ceil(3.3) = 4, ceil(-3.3) = -3
 445  */
 446 U_CAPI double U_EXPORT2
 447 uprv_trunc(double d)
 448 {
 449 #if IEEE_754
 450     int32_t lowBits;
 451
 452     /* handle error cases*/
 453     if(uprv_isNaN(d))
 454         return uprv_getNaN();
 455     if(uprv_isInfinite(d))
 456         return uprv_getInfinity();
 457
 458     lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&d, sizeof(uint32_t));
 459     if( (d == 0.0 && (lowBits & SIGN)) || d < 0)
 460         return ceil(d);
 461     else
 462         return floor(d);
 463
 464 #else
 465     return d >= 0 ? floor(d) : ceil(d);
 466
 467 #endif
 468 }
 469
 470 /**
 471  * Return the largest positive number that can be represented by an integer
 472  * type of arbitrary bit length.
 473  */
 474 U_CAPI double U_EXPORT2
 475 uprv_maxMantissa(void)
 476 {
 477     return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
 478 }
 479
 480 U_CAPI double U_EXPORT2
 481 uprv_log(double d)
 482 {
 483     return log(d);
 484 }
 485
 486 #if 0
 487 /* This isn't used. If it's readded, readd putiltst.c tests */
 488 U_CAPI int32_t U_EXPORT2
 489 uprv_digitsAfterDecimal(double x)
 490 {
 491     char buffer[20];
 492     int32_t numDigits, bytesWritten;
 493     char *p = buffer;
 494     int32_t ptPos, exponent;
 495
 496     /* cheat and use the string-format routine to get a string representation*/
 497     /* (it handles mathematical inaccuracy better than we can), then find out */
 498     /* many characters are to the right of the decimal point */
 499     bytesWritten = sprintf(buffer, "%+.9g", x);
 500     while (isdigit(*(++p))) {
 501     }
 502
 503     ptPos = (int32_t)(p - buffer);
 504     numDigits = (int32_t)(bytesWritten - ptPos - 1);
 505
 506     /* if the number's string representation is in scientific notation, find */
 507     /* the exponent and take it into account*/
 508     exponent = 0;
 509     p = uprv_strchr(buffer, 'e');
 510     if (p != 0) {
 511         int16_t expPos = (int16_t)(p - buffer);
 512         numDigits -= bytesWritten - expPos;
 513         exponent = (int32_t)(atol(p + 1));
 514     }
 515
 516     /* the string representation may still have spurious decimal digits in it, */
 517     /* so we cut off at the ninth digit to the right of the decimal, and have */
 518     /* to search backward from there to the first non-zero digit*/
 519     if (numDigits > 9) {
 520         numDigits = 9;
 521         while (numDigits > 0 && buffer[ptPos + numDigits] == '0')
 522             --numDigits;
 523     }
 524     numDigits -= exponent;
 525     if (numDigits < 0) {
 526         return 0;
 527     }
 528     return numDigits;
 529 }
 530 #endif
 531
 532 /*---------------------------------------------------------------------------
 533   Platform-specific Implementations
 534   Try these, and if they don't work on your platform, then special case your
 535   platform with new implementations.
 536   ---------------------------------------------------------------------------*/
 537
 538 /* Generic time zone layer -------------------------------------------------- */
 539
 540 /* Time zone utilities */
 541 U_CAPI void U_EXPORT2
 542 uprv_tzset()
 543 {
 544 #ifdef U_TZSET
 545     U_TZSET();
 546 #else
 547     /* no initialization*/
 548 #endif
 549 }
 550
 551 U_CAPI int32_t U_EXPORT2
 552 uprv_timezone()
 553 {
 554 #ifdef U_TIMEZONE
 555     return U_TIMEZONE;
 556 #else
 557     time_t t, t1, t2;
 558     struct tm tmrec;
 559     UBool dst_checked;
 560     int32_t tdiff = 0;
 561
 562     time(&t);
 563     uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
 564     dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
 565     t1 = mktime(&tmrec);                 /* local time in seconds*/
 566     uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
 567     t2 = mktime(&tmrec);                 /* GMT (or UTC) in seconds*/
 568     tdiff = t2 - t1;
 569     /* imitate NT behaviour, which returns same timezone offset to GMT for
 570        winter and summer*/
 571     if (dst_checked)
 572         tdiff += 3600;
 573     return tdiff;
 574 #endif
 575 }
 576
 577 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
 578    some platforms need to have it declared here. */
 579
 580 #if defined(U_TZNAME) && (defined(U_IRIX) || defined(U_DARWIN) || defined(U_CYGWIN))
 581 /* RS6000 and others reject char **tzname.  */
 582 extern U_IMPORT char *U_TZNAME[];
 583 #endif
 584
 585 #if !UCONFIG_NO_FILE_IO && (defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD))
 586 /* These platforms are likely to use Olson timezone IDs. */
 587 #define CHECK_LOCALTIME_LINK 1
 588 #include <tzfile.h>
 589 #define TZZONEINFO      (TZDIR "/")
 590 static char gTimeZoneBuffer[PATH_MAX];
 591 static char *gTimeZoneBufferPtr = NULL;
 592 #endif
 593
 594 #ifndef U_WINDOWS
 595 #define isNonDigit(ch) (ch < '0' || '9' < ch)
 596 static UBool isValidOlsonID(const char *id) {
 597     int32_t idx = 0;
 598
 599     /* Determine if this is something like Iceland (Olson ID)
 600     or AST4ADT (non-Olson ID) */
 601     while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
 602         idx++;
 603     }
 604
 605     /* If we went through the whole string, then it might be okay.
 606     The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
 607     "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
 608     The rest of the time it could be an Olson ID. George */
 609     return (UBool)(id[idx] == 0
 610         || uprv_strcmp(id, "PST8PDT") == 0
 611         || uprv_strcmp(id, "MST7MDT") == 0
 612         || uprv_strcmp(id, "CST6CDT") == 0
 613         || uprv_strcmp(id, "EST5EDT") == 0);
 614 }
 615 #endif
 616
 617 U_CAPI const char* U_EXPORT2
 618 uprv_tzname(int n)
 619 {
 620 #ifdef U_WINDOWS
 621     const char *id = uprv_detectWindowsTimeZone();
 622
 623     if (id != NULL) {
 624         return id;
 625     }
 626 #else
 627     const char *tzenv = NULL;
 628
 629 /*#if defined(U_DARWIN)
 630     int ret;
 631
 632     tzenv = getenv("TZFILE");
 633     if (tzenv != NULL) {
 634         return tzenv;
 635     }
 636 #endif*/
 637
 638     tzenv = getenv("TZ");
 639     if (tzenv != NULL && isValidOlsonID(tzenv))
 640     {
 641         /* This might be a good Olson ID. */
 642         if (uprv_strncmp(tzenv, "posix/", 6) == 0
 643             || uprv_strncmp(tzenv, "right/", 6) == 0)
 644         {
 645             /* Remove the posix/ or right/ prefix. */
 646             tzenv += 6;
 647         }
 648         return tzenv;
 649     }
 650     /* else U_TZNAME will give a better result. */
 651
 652 #if defined(CHECK_LOCALTIME_LINK)
 653     /* Caller must handle threading issues */
 654     if (gTimeZoneBufferPtr == NULL) {
 655         /*
 656         This is a trick to look at the name of the link to get the Olson ID
 657         because the tzfile contents is underspecified.
 658         This isn't guaranteed to work because it may not be a symlink.
 659         */
 660         int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer));
 661         if (0 < ret) {
 662             int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO);
 663             gTimeZoneBuffer[ret] = 0;
 664             if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0
 665                 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
 666             {
 667                 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
 668             }
 669         }
 670     }
 671     else {
 672         return gTimeZoneBufferPtr;
 673     }
 674 #endif
 675 #endif
 676
 677 #ifdef U_TZNAME
 678     /*
 679     U_TZNAME is usually a non-unique abbreviation,
 680     which isn't normally usable.
 681     */
 682     return U_TZNAME[n];
 683 #else
 684     return "";
 685 #endif
 686 }
 687
 688 /* Get and set the ICU data directory --------------------------------------- */
 689
 690 static char *gDataDirectory = NULL;
 691 #if U_POSIX_LOCALE
 692  static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
 693 #endif
 694
 695 static UBool U_CALLCONV putil_cleanup(void)
 696 {
 697     if (gDataDirectory && *gDataDirectory) {
 698         uprv_free(gDataDirectory);
 699     }
 700     gDataDirectory = NULL;
 701 #if U_POSIX_LOCALE
 702     if (gCorrectedPOSIXLocale) {
 703         uprv_free(gCorrectedPOSIXLocale);
 704         gCorrectedPOSIXLocale = NULL;
 705     }
 706 #endif
 707     return TRUE;
 708 }
 709
 710 /*
 711  * Set the data directory.
 712  *    Make a copy of the passed string, and set the global data dir to point to it.
 713  *    TODO:  see bug #2849, regarding thread safety.
 714  */
 715 U_CAPI void U_EXPORT2
 716 u_setDataDirectory(const char *directory) {
 717     char *newDataDir;
 718     int32_t length;
 719
 720     if(directory==NULL || *directory==0) {
 721         /* A small optimization to prevent the malloc and copy when the
 722         shared library is used, and this is a way to make sure that NULL
 723         is never returned.
 724         */
 725         newDataDir = (char *)"";
 726     }
 727     else {
 728         length=(int32_t)uprv_strlen(directory);
 729         newDataDir = (char *)uprv_malloc(length + 2);
 730         uprv_strcpy(newDataDir, directory);
 731
 732 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
 733         {
 734             char *p;
 735             while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
 736                 *p = U_FILE_SEP_CHAR;
 737             }
 738         }
 739 #endif
 740     }
 741
 742     umtx_lock(NULL);
 743     if (gDataDirectory && *gDataDirectory) {
 744         uprv_free(gDataDirectory);
 745     }
 746     gDataDirectory = newDataDir;
 747     ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
 748     umtx_unlock(NULL);
 749 }
 750
 751 U_CAPI UBool U_EXPORT2
 752 uprv_pathIsAbsolute(const char *path)
 753 {
 754   if(!path || !*path) {
 755     return FALSE;
 756   }
 757
 758   if(*path == U_FILE_SEP_CHAR) {
 759     return TRUE;
 760   }
 761
 762 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
 763   if(*path == U_FILE_ALT_SEP_CHAR) {
 764     return TRUE;
 765   }
 766 #endif
 767
 768 #if defined(U_WINDOWS)
 769   if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
 770        ((path[0] >= 'a') && (path[0] <= 'z'))) &&
 771       path[1] == ':' ) {
 772     return TRUE;
 773   }
 774 #endif
 775
 776   return FALSE;
 777 }
 778
 779 U_CAPI const char * U_EXPORT2
 780 u_getDataDirectory(void) {
 781     const char *path = NULL;
 782
 783     /* if we have the directory, then return it immediately */
 784     umtx_lock(NULL);
 785     path = gDataDirectory;
 786     umtx_unlock(NULL);
 787
 788     if(path) {
 789         return path;
 790     }
 791
 792     /*
 793     When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
 794     override ICU's data with the ICU_DATA environment variable. This prevents
 795     problems where multiple custom copies of ICU's specific version of data
 796     are installed on a system. Either the application must define the data
 797     directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
 798     ICU, set the data with udata_setCommonData or trust that all of the
 799     required data is contained in ICU's data library that contains
 800     the entry point defined by U_ICUDATA_ENTRY_POINT.
 801
 802     There may also be some platforms where environment variables
 803     are not allowed.
 804     */
 805 #   if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
 806     /* First try to get the environment variable */
 807     path=getenv("ICU_DATA");
 808 #   endif
 809
 810     /* ICU_DATA_DIR may be set as a compile option */
 811 #   ifdef ICU_DATA_DIR
 812     if(path==NULL || *path==0) {
 813         path=ICU_DATA_DIR;
 814     }
 815 #   endif
 816
 817     if(path==NULL) {
 818         /* It looks really bad, set it to something. */
 819         path = "";
 820     }
 821
 822     u_setDataDirectory(path);
 823     return gDataDirectory;
 824 }
 825
 826
 827
 828
 829
 830 /* Macintosh-specific locale information ------------------------------------ */
 831 #ifdef XP_MAC
 832
 833 typedef struct {
 834     int32_t script;
 835     int32_t region;
 836     int32_t lang;
 837     int32_t date_region;
 838     const char* posixID;
 839 } mac_lc_rec;
 840
 841 /* Todo: This will be updated with a newer version from www.unicode.org web
 842    page when it's available.*/
 843 #define MAC_LC_MAGIC_NUMBER -5
 844 #define MAC_LC_INIT_NUMBER -9
 845
 846 static const mac_lc_rec mac_lc_recs[] = {
 847     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US",
 848     /* United States*/
 849     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR",
 850     /* France*/
 851     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB",
 852     /* Great Britain*/
 853     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE",
 854     /* Germany*/
 855     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT",
 856     /* Italy*/
 857     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL",
 858     /* Metherlands*/
 859     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE",
 860     /* French for Belgium or Lxembourg*/
 861     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE",
 862     /* Sweden*/
 863     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK",
 864     /* Denmark*/
 865     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT",
 866     /* Portugal*/
 867     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA",
 868     /* French Canada*/
 869     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS",
 870     /* Israel*/
 871     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP",
 872     /* Japan*/
 873     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU",
 874     /* Australia*/
 875     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE",
 876     /* the Arabic world (?)*/
 877     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI",
 878     /* Finland*/
 879     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH",
 880     /* French for Switzerland*/
 881     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH",
 882     /* German for Switzerland*/
 883     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR",
 884     /* Greece*/
 885     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS",
 886     /* Iceland ===*/
 887     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
 888     /* Malta ===*/
 889     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
 890     /* Cyprus ===*/
 891     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR",
 892     /* Turkey ===*/
 893     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU",
 894     /* Croatian system for Yugoslavia*/
 895     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
 896     /* Hindi system for India*/
 897     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
 898     /* Pakistan*/
 899     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT",
 900     /* Lithuania*/
 901     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL",
 902     /* Poland*/
 903     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU",
 904     /* Hungary*/
 905     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE",
 906     /* Estonia*/
 907     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV",
 908     /* Latvia*/
 909     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
 910     /* Lapland  [Ask Rich for the data. HS]*/
 911     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
 912     /* Faeroe Islands*/
 913     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR",
 914     /* Iran*/
 915     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU",
 916     /* Russia*/
 917     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE",
 918     /* Ireland*/
 919     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR",
 920     /* Korea*/
 921     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN",
 922     /* People's Republic of China*/
 923     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW",
 924     /* Taiwan*/
 925     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH",
 926     /* Thailand*/
 927
 928     /* fallback is en_US*/
 929     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER,
 930     MAC_LC_MAGIC_NUMBER, "en_US"
 931 };
 932
 933 #endif
 934
 935 #if U_POSIX_LOCALE
 936 /* Return just the POSIX id, whatever happens to be in it */
 937 static const char *uprv_getPOSIXID(void)
 938 {
 939     static const char* posixID = NULL;
 940     if (posixID == 0) {
 941         /*
 942         * On Solaris two different calls to setlocale can result in
 943         * different values. Only get this value once.
 944         *
 945         * We must check this first because an application can set this.
 946         *
 947         * LC_ALL can't be used because it's platform dependent. The LANG
 948         * environment variable seems to affect LC_CTYPE variable by default.
 949         * Here is what setlocale(LC_ALL, NULL) can return.
 950         * HPUX can return 'C C C C C C C'
 951         * Solaris can return /en_US/C/C/C/C/C on the second try.
 952         * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
 953         *
 954         * The default codepage detection also needs to use LC_CTYPE.
 955         *
 956         * Do not call setlocale(LC_*, "")! Using an empty string instead
 957         * of NULL, will modify the libc behavior.
 958         */
 959         posixID = setlocale(LC_CTYPE, NULL);
 960         if ((posixID == 0)
 961             || (uprv_strcmp("C", posixID) == 0)
 962             || (uprv_strcmp("POSIX", posixID) == 0))
 963         {
 964             /* Maybe we got some garbage.  Try something more reasonable */
 965             posixID = getenv("LC_ALL");
 966             if (posixID == 0) {
 967                 posixID = getenv("LC_CTYPE");
 968                 if (posixID == 0) {
 969                     posixID = getenv("LANG");
 970                 }
 971             }
 972         }
 973
 974         if ((posixID==0)
 975             || (uprv_strcmp("C", posixID) == 0)
 976             || (uprv_strcmp("POSIX", posixID) == 0))
 977         {
 978             /* Nothing worked.  Give it a nice POSIX default value. */
 979             posixID = "en_US_POSIX";
 980         }
 981     }
 982
 983     return posixID;
 984 }
 985 #endif
 986
 987 /* NOTE: The caller should handle thread safety */
 988 U_CAPI const char* U_EXPORT2
 989 uprv_getDefaultLocaleID()
 990 {
 991 #if U_POSIX_LOCALE
 992 /*
 993   Note that:  (a '!' means the ID is improper somehow)
 994      LC_ALL  ---->     default_loc          codepage
 995 --------------------------------------------------------
 996      ab.CD             ab                   CD
 997      ab@CD             ab__CD               -
 998      ab@CD.EF          ab__CD               EF
 999
1000      ab_CD.EF@GH       ab_CD_GH             EF
1001
1002 Some 'improper' ways to do the same as above:
1003   !  ab_CD@GH.EF       ab_CD_GH             EF
1004   !  ab_CD.EF@GH.IJ    ab_CD_GH             EF
1005   !  ab_CD@ZZ.EF@GH.IJ ab_CD_GH             EF
1006
1007      _CD@GH            _CD_GH               -
1008      _CD.EF@GH         _CD_GH               EF
1009
1010 The variant cannot have dots in it.
1011 The 'rightmost' variant (@xxx) wins.
1012 The leftmost codepage (.xxx) wins.
1013 */
1014     char *correctedPOSIXLocale = 0;
1015     const char* posixID = uprv_getPOSIXID();
1016     const char *p;
1017     const char *q;
1018     int32_t len;
1019
1020     /* Format: (no spaces)
1021     ll [ _CC ] [ . MM ] [ @ VV]
1022
1023       l = lang, C = ctry, M = charmap, V = variant
1024     */
1025
1026     if (gCorrectedPOSIXLocale != NULL) {
1027         return gCorrectedPOSIXLocale;
1028     }
1029
1030     if ((p = uprv_strchr(posixID, '.')) != NULL) {
1031         /* assume new locale can't be larger than old one? */
1032         correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1033         uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1034         correctedPOSIXLocale[p-posixID] = 0;
1035
1036         /* do not copy after the @ */
1037         if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
1038             correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
1039         }
1040     }
1041
1042     /* Note that we scan the *uncorrected* ID. */
1043     if ((p = uprv_strrchr(posixID, '@')) != NULL) {
1044         if (correctedPOSIXLocale == NULL) {
1045             correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1046             uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1047             correctedPOSIXLocale[p-posixID] = 0;
1048         }
1049         p++;
1050
1051         /* Take care of any special cases here.. */
1052         if (!uprv_strcmp(p, "nynorsk")) {
1053             p = "NY";
1054             /* Don't worry about no__NY. In practice, it won't appear. */
1055         }
1056
1057         if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
1058             uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
1059         }
1060         else {
1061             uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1062         }
1063
1064         if ((q = uprv_strchr(p, '.')) != NULL) {
1065             /* How big will the resulting string be? */
1066             len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1067             uprv_strncat(correctedPOSIXLocale, p, q-p);
1068             correctedPOSIXLocale[len] = 0;
1069         }
1070         else {
1071             /* Anything following the @ sign */
1072             uprv_strcat(correctedPOSIXLocale, p);
1073         }
1074
1075         /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1076          * How about 'russian' -> 'ru'?
1077          * Many of the other locales using ISO codes will be handled by the
1078          * canonicalization functions in uloc_getDefault.
1079          */
1080     }
1081
1082     /* Was a correction made? */
1083     if (correctedPOSIXLocale != NULL) {
1084         posixID = correctedPOSIXLocale;
1085     }
1086     else {
1087         /* copy it, just in case the original pointer goes away.  See j2395 */
1088         correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
1089         posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
1090     }
1091
1092     if (gCorrectedPOSIXLocale == NULL) {
1093         gCorrectedPOSIXLocale = correctedPOSIXLocale;
1094         ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1095         correctedPOSIXLocale = NULL;
1096     }
1097
1098     if (correctedPOSIXLocale != NULL) {  /* Was already set - clean up. */
1099         uprv_free(correctedPOSIXLocale);
1100     }
1101
1102     return posixID;
1103
1104 #elif defined(U_WINDOWS)
1105     UErrorCode status = U_ZERO_ERROR;
1106     LCID id = GetThreadLocale();
1107     const char* locID = uprv_convertToPosix(id, &status);
1108
1109     if (U_FAILURE(status)) {
1110         locID = "en_US";
1111     }
1112     return locID;
1113
1114 #elif defined(XP_MAC)
1115     int32_t script = MAC_LC_INIT_NUMBER;
1116     /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
1117     int32_t region = MAC_LC_INIT_NUMBER;
1118     /* = GetScriptManagerVariable(smRegionCode);*/
1119     int32_t lang = MAC_LC_INIT_NUMBER;
1120     /* = GetScriptManagerVariable(smScriptLang);*/
1121     int32_t date_region = MAC_LC_INIT_NUMBER;
1122     const char* posixID = 0;
1123     int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec);
1124     int32_t i;
1125     Intl1Hndl ih;
1126
1127     ih = (Intl1Hndl) GetIntlResource(1);
1128     if (ih)
1129         date_region = ((uint16_t)(*ih)->intl1Vers) >> 8;
1130
1131     for (i = 0; i < count; i++) {
1132         if (   ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER)
1133              || (mac_lc_recs[i].script == script))
1134             && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER)
1135              || (mac_lc_recs[i].region == region))
1136             && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER)
1137              || (mac_lc_recs[i].lang == lang))
1138             && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER)
1139              || (mac_lc_recs[i].date_region == date_region))
1140             )
1141         {
1142             posixID = mac_lc_recs[i].posixID;
1143             break;
1144         }
1145     }
1146
1147     return posixID;
1148
1149 #elif defined(OS400)
1150     /* locales are process scoped and are by definition thread safe */
1151     static char correctedLocale[64];
1152     const  char *localeID = getenv("LC_ALL");
1153            char *p;
1154
1155     if (localeID == NULL)
1156         localeID = getenv("LANG");
1157     if (localeID == NULL)
1158         localeID = setlocale(LC_ALL, NULL);
1159     /* Make sure we have something... */
1160     if (localeID == NULL)
1161         return "en_US_POSIX";
1162
1163     /* Extract the locale name from the path. */
1164     if((p = uprv_strrchr(localeID, '/')) != NULL)
1165     {
1166         /* Increment p to start of locale name. */
1167         p++;
1168         localeID = p;
1169     }
1170
1171     /* Copy to work location. */
1172     uprv_strcpy(correctedLocale, localeID);
1173
1174     /* Strip off the '.locale' extension. */
1175     if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1176         *p = 0;
1177     }
1178
1179     /* Upper case the locale name. */
1180     T_CString_toUpperCase(correctedLocale);
1181
1182     /* See if we are using the POSIX locale.  Any of the
1183     * following are equivalent and use the same QLGPGCMA
1184     * (POSIX) locale.
1185     * QLGPGCMA2 means UCS2
1186     * QLGPGCMA_4 means UTF-32
1187     * QLGPGCMA_8 means UTF-8
1188     */
1189     if ((uprv_strcmp("C", correctedLocale) == 0) ||
1190         (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1191         (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1192     {
1193         uprv_strcpy(correctedLocale, "en_US_POSIX");
1194     }
1195     else
1196     {
1197         int16_t LocaleLen;
1198
1199         /* Lower case the lang portion. */
1200         for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1201         {
1202             *p = uprv_tolower(*p);
1203         }
1204
1205         /* Adjust for Euro.  After '_E' add 'URO'. */
1206         LocaleLen = uprv_strlen(correctedLocale);
1207         if (correctedLocale[LocaleLen - 2] == '_' &&
1208             correctedLocale[LocaleLen - 1] == 'E')
1209         {
1210             uprv_strcat(correctedLocale, "URO");
1211         }
1212
1213         /* If using Lotus-based locale then convert to
1214          * equivalent non Lotus.
1215          */
1216         else if (correctedLocale[LocaleLen - 2] == '_' &&
1217             correctedLocale[LocaleLen - 1] == 'L')
1218         {
1219             correctedLocale[LocaleLen - 2] = 0;
1220         }
1221
1222         /* There are separate simplified and traditional
1223          * locales called zh_HK_S and zh_HK_T.
1224          */
1225         else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1226         {
1227             uprv_strcpy(correctedLocale, "zh_HK");
1228         }
1229
1230         /* A special zh_CN_GBK locale...
1231         */
1232         else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1233         {
1234             uprv_strcpy(correctedLocale, "zh_CN");
1235         }
1236
1237     }
1238
1239     return correctedLocale;
1240 #endif
1241
1242 }
1243
1244 #if U_POSIX_LOCALE
1245 /*
1246 Due to various platform differences, one platform may specify a charset,
1247 when they really mean a different charset. Remap the names so that they are
1248 compatible with ICU.
1249 */
1250 static const char*
1251 remapPlatformDependentCodepage(const char *locale, const char *name) {
1252     if (locale != NULL && *locale == 0) {
1253         /* Make sure that an empty locale is handled the same way. */
1254         locale = NULL;
1255     }
1256     if (name == NULL) {
1257         return NULL;
1258     }
1259 #if defined(U_AIX)
1260     if (uprv_strcmp(name, "IBM-943") == 0) {
1261         /* Use the ASCII compatible ibm-943 */
1262         name = "Shift-JIS";
1263     }
1264     else if (uprv_strcmp(name, "IBM-1252") == 0) {
1265         /* Use the windows-1252 that contains the Euro */
1266         name = "IBM-5348";
1267     }
1268 #elif defined(U_SOLARIS)
1269     if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1270         /* Solaris underspecifies the "EUC" name. */
1271         if (uprv_strcmp(locale, "zh_CN") == 0) {
1272             name = "EUC-CN";
1273         }
1274         else if (uprv_strcmp(locale, "zh_TW") == 0) {
1275             name = "EUC-TW";
1276         }
1277         else if (uprv_strcmp(locale, "ko_KR") == 0) {
1278             name = "EUC-KR";
1279         }
1280     }
1281 #elif defined(U_DARWIN)
1282     if (locale == NULL && *name == 0) {
1283         /*
1284         No locale was specified, and an empty name was passed in.
1285         This usually indicates that nl_langinfo didn't return valid information.
1286         Mac OS X uses UTF-8 by default (especially the locale data and console).
1287         */
1288         name = "UTF-8";
1289     }
1290 #endif
1291     /* return NULL when "" is passed in */
1292     if (*name == 0) {
1293         name = NULL;
1294     }
1295     return name;
1296 }
1297
1298 static const char*
1299 getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
1300 {
1301     char localeBuf[100];
1302     const char *name = NULL;
1303     char *variant = NULL;
1304
1305     if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
1306         size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
1307         uprv_strncpy(localeBuf, localeName, localeCapacity);
1308         localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
1309         name = uprv_strncpy(buffer, name+1, buffCapacity);
1310         buffer[buffCapacity-1] = 0; /* ensure NULL termination */
1311         if ((variant = (uprv_strchr(name, '@'))) != NULL) {
1312             *variant = 0;
1313         }
1314         name = remapPlatformDependentCodepage(localeBuf, name);
1315     }
1316     return name;
1317 }
1318 #endif
1319
1320 static const char*
1321 int_getDefaultCodepage()
1322 {
1323 #if defined(OS400)
1324     uint32_t ccsid = 37; /* Default to ibm-37 */
1325     static char codepage[64];
1326     Qwc_JOBI0400_t jobinfo;
1327     Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
1328
1329     EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
1330         "*                         ", "                ", &error);
1331
1332     if (error.Bytes_Available == 0) {
1333         if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
1334             ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
1335         }
1336         else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
1337             ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
1338         }
1339         /* else use the default */
1340     }
1341     sprintf(codepage,"ibm-%d", ccsid);
1342     return codepage;
1343
1344 #elif defined(OS390)
1345     static char codepage[64];
1346     sprintf(codepage,"%s" UCNV_SWAP_LFNL_OPTION_STRING, nl_langinfo(CODESET));
1347     return codepage;
1348
1349 #elif defined(XP_MAC)
1350     return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
1351
1352 #elif defined(U_WINDOWS)
1353     static char codepage[64];
1354     sprintf(codepage, "windows-%d", GetACP());
1355     return codepage;
1356
1357 #elif U_POSIX_LOCALE
1358     static char codesetName[100];
1359     const char *localeName = NULL;
1360     const char *name = NULL;
1361
1362     uprv_memset(codesetName, 0, sizeof(codesetName));
1363
1364     /* Use setlocale in a nice way, and then check some environment variables.
1365        Maybe the application used setlocale already.
1366     */
1367     localeName = uprv_getPOSIXID();
1368     name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
1369     if (name) {
1370         /* if we can find the codeset name from setlocale, return that. */
1371         return name;
1372     }
1373     /* else "C" was probably returned. That's underspecified. */
1374
1375 #if U_HAVE_NL_LANGINFO_CODESET
1376     if (*codesetName) {
1377         uprv_memset(codesetName, 0, sizeof(codesetName));
1378     }
1379     /* When available, check nl_langinfo because it usually gives more
1380        useful names. It depends on LC_CTYPE and not LANG or LC_ALL.
1381        nl_langinfo may use the same buffer as setlocale. */
1382     {
1383         const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
1384         codeset = remapPlatformDependentCodepage(NULL, codeset);
1385         if (codeset != NULL) {
1386             uprv_strncpy(codesetName, codeset, sizeof(codesetName));
1387             codesetName[sizeof(codesetName)-1] = 0;
1388             return codesetName;
1389         }
1390     }
1391 #endif
1392
1393     if (*codesetName == 0)
1394     {
1395         /* Everything failed. Return US ASCII (ISO 646). */
1396         uprv_strcpy(codesetName, "US-ASCII");
1397     }
1398     return codesetName;
1399 #else
1400     return "US-ASCII";
1401 #endif
1402 }
1403
1404
1405 U_CAPI const char*  U_EXPORT2
1406 uprv_getDefaultCodepage()
1407 {
1408     static char const  *name = NULL;
1409     umtx_lock(NULL);
1410     if (name == NULL) {
1411         name = int_getDefaultCodepage();
1412     }
1413     umtx_unlock(NULL);
1414     return name;
1415 }
1416
1417
1418 /* end of platform-specific implementation -------------- */
1419
1420 /* version handling --------------------------------------------------------- */
1421
1422 U_CAPI void U_EXPORT2
1423 u_versionFromString(UVersionInfo versionArray, const char *versionString) {
1424     char *end;
1425     uint16_t part=0;
1426
1427     if(versionArray==NULL) {
1428         return;
1429     }
1430
1431     if(versionString!=NULL) {
1432         for(;;) {
1433             versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
1434             if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
1435                 break;
1436             }
1437             versionString=end+1;
1438         }
1439     }
1440
1441     while(part<U_MAX_VERSION_LENGTH) {
1442         versionArray[part++]=0;
1443     }
1444 }
1445
1446 U_CAPI void U_EXPORT2
1447 u_versionToString(UVersionInfo versionArray, char *versionString) {
1448     uint16_t count, part;
1449     uint8_t field;
1450
1451     if(versionString==NULL) {
1452         return;
1453     }
1454
1455     if(versionArray==NULL) {
1456         versionString[0]=0;
1457         return;
1458     }
1459
1460     /* count how many fields need to be written */
1461     for(count=4; count>0 && versionArray[count-1]==0; --count) {
1462     }
1463
1464     if(count <= 1) {
1465         count = 2;
1466     }
1467
1468     /* write the first part */
1469     /* write the decimal field value */
1470     field=versionArray[0];
1471     if(field>=100) {
1472         *versionString++=(char)('0'+field/100);
1473         field%=100;
1474     }
1475     if(field>=10) {
1476         *versionString++=(char)('0'+field/10);
1477         field%=10;
1478     }
1479     *versionString++=(char)('0'+field);
1480
1481     /* write the following parts */
1482     for(part=1; part<count; ++part) {
1483         /* write a dot first */
1484         *versionString++=U_VERSION_DELIMITER;
1485
1486         /* write the decimal field value */
1487         field=versionArray[part];
1488         if(field>=100) {
1489             *versionString++=(char)('0'+field/100);
1490             field%=100;
1491         }
1492         if(field>=10) {
1493             *versionString++=(char)('0'+field/10);
1494             field%=10;
1495         }
1496         *versionString++=(char)('0'+field);
1497     }
1498
1499     /* NUL-terminate */
1500     *versionString=0;
1501 }
1502
1503 U_CAPI void U_EXPORT2
1504 u_getVersion(UVersionInfo versionArray) {
1505     u_versionFromString(versionArray, U_ICU_VERSION);
1506 }
1507
1508 /*
1509  * Hey, Emacs, please set the following:
1510  *
1511  * Local Variables:
1512  * indent-tabs-mode: nil
1513  * End:
1514  *
1515  */