icuSources/common/putil.c

   1 /*
   2 ******************************************************************************
   3 *
   4 *   Copyright (C) 1997-2004, International Business Machines
   5 *   Corporation and others.  All Rights Reserved.
   6 *
   7 ******************************************************************************
   8 *
   9 *  FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
  10 *
  11 *   Date        Name        Description
  12 *   04/14/97    aliu        Creation.
  13 *   04/24/97    aliu        Added getDefaultDataDirectory() and
  14 *                            getDefaultLocaleID().
  15 *   04/28/97    aliu        Rewritten to assume Unix and apply general methods
  16 *                            for assumed case.  Non-UNIX platforms must be
  17 *                            special-cased.  Rewrote numeric methods dealing
  18 *                            with NaN and Infinity to be platform independent
  19 *                             over all IEEE 754 platforms.
  20 *   05/13/97    aliu        Restored sign of timezone
  21 *                            (semantics are hours West of GMT)
  22 *   06/16/98    erm         Added IEEE_754 stuff, cleaned up isInfinite, isNan,
  23 *                             nextDouble..
  24 *   07/22/98    stephen     Added remainder, max, min, trunc
  25 *   08/13/98    stephen     Added isNegativeInfinity, isPositiveInfinity
  26 *   08/24/98    stephen     Added longBitsFromDouble
  27 *   09/08/98    stephen     Minor changes for Mac Port
  28 *   03/02/99    stephen     Removed openFile().  Added AS400 support.
  29 *                            Fixed EBCDIC tables
  30 *   04/15/99    stephen     Converted to C.
  31 *   06/28/99    stephen     Removed mutex locking in u_isBigEndian().
  32 *   08/04/99    jeffrey R.  Added OS/2 changes
  33 *   11/15/99    helena      Integrated S/390 IEEE support.
  34 *   04/26/01    Barry N.    OS/400 support for uprv_getDefaultLocaleID
  35 *   08/15/01    Steven H.   OS/400 support for uprv_getDefaultCodepage
  36 ******************************************************************************
  37 */
  38
  39 #ifndef PTX
  40
  41 /* Define _XOPEN_SOURCE for Solaris and friends. */
  42 /* NetBSD needs it to be >= 4 */
  43 #ifndef _XOPEN_SOURCE
  44 #define _XOPEN_SOURCE 4
  45 #endif
  46
  47 /* Define __USE_POSIX and __USE_XOPEN for Linux and glibc. */
  48 #ifndef __USE_POSIX
  49 #define __USE_POSIX
  50 #endif
  51 #ifndef __USE_XOPEN
  52 #define __USE_XOPEN
  53 #endif
  54
  55 #endif /* PTX */
  56
  57 /* include ICU headers */
  58 #include "unicode/utypes.h"
  59 #include "unicode/putil.h"
  60 #include "unicode/ustring.h"
  61 #include "putilimp.h"
  62 #include "uassert.h"
  63 #include "umutex.h"
  64 #include "cmemory.h"
  65 #include "cstring.h"
  66 #include "locmap.h"
  67 #include "ucln_cmn.h"
  68 #include "udataswp.h"
  69
  70 /* include system headers */
  71 #ifdef WIN32
  72 #   define WIN32_LEAN_AND_MEAN
  73 #   define VC_EXTRALEAN
  74 #   define NOUSER
  75 #   define NOSERVICE
  76 #   define NOIME
  77 #   define NOMCX
  78 #   include <windows.h>
  79 #elif defined(U_CYGWIN) && defined(__STRICT_ANSI__)
  80 /* tzset isn't defined in strict ANSI on Cygwin. */
  81 #   undef __STRICT_ANSI__
  82 #elif defined(OS2)
  83 #   define INCL_DOSMISC
  84 #   define INCL_DOSERRORS
  85 #   define INCL_DOSMODULEMGR
  86 #   include <os2.h>
  87 #elif defined(OS400)
  88 #   include <float.h>
  89 #   include <qusec.h>       /* error code structure */
  90 #   include <qusrjobi.h>
  91 #   include <qliept.h>      /* EPT_CALL macro  - this include must be after all other "QSYSINCs" */
  92 #elif defined(XP_MAC)
  93 #   include <Files.h>
  94 #   include <IntlResources.h>
  95 #   include <Script.h>
  96 #   include <Folders.h>
  97 #   include <MacTypes.h>
  98 #   include <TextUtils.h>
  99 #elif defined(OS390)
 100 #include "unicode/ucnv.h"   /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
 101 #elif defined(U_AIX)
 102 #elif defined(U_SOLARIS) || defined(U_LINUX)
 103 #elif defined(U_HPUX)
 104 #elif defined(U_DARWIN)
 105 #include <sys/file.h>
 106 #include <sys/param.h>
 107 #elif defined(U_QNX)
 108 #include <sys/neutrino.h>
 109 #endif
 110
 111 /* Include standard headers. */
 112 #include <stdio.h>
 113 #include <stdlib.h>
 114 #include <string.h>
 115 #include <math.h>
 116 #include <locale.h>
 117 #include <float.h>
 118 #include <time.h>
 119
 120 /*
 121  * Only include langinfo.h if we have a way to get the codeset. If we later
 122  * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
 123  *
 124  */
 125
 126 #if U_HAVE_NL_LANGINFO_CODESET
 127 #include <langinfo.h>
 128 #endif
 129
 130 /* Define the extension for data files, again... */
 131 #define DATA_TYPE "dat"
 132
 133 /* Leave this copyright notice here! */
 134 static const char copyright[] = U_COPYRIGHT_STRING;
 135
 136 /* floating point implementations ------------------------------------------- */
 137
 138 /* We return QNAN rather than SNAN*/
 139 #define SIGN 0x80000000U
 140 #if defined(__GNUC__)
 141 /*
 142     This is an optimization for when u_topNBytesOfDouble
 143     and u_bottomNBytesOfDouble can't be properly optimized by the compiler.
 144 */
 145 #define USE_64BIT_DOUBLE_OPTIMIZATION 1
 146 #else
 147 #define USE_64BIT_DOUBLE_OPTIMIZATION 0
 148 #endif
 149
 150 #if USE_64BIT_DOUBLE_OPTIMIZATION
 151 /* gcc 3.2 has an optimization bug */
 152 static const int64_t gNan64 = 0x7FF8000000000000LL;
 153 static const int64_t gInf64 = 0x7FF0000000000000LL;
 154 static const double * const fgNan = (const double *)(&gNan64);
 155 static const double * const fgInf = (const double *)(&gInf64);
 156 #else
 157
 158 #if IEEE_754
 159 #define NAN_TOP ((int16_t)0x7FF8)
 160 #define INF_TOP ((int16_t)0x7FF0)
 161 #elif defined(OS390)
 162 #define NAN_TOP ((int16_t)0x7F08)
 163 #define INF_TOP ((int16_t)0x3F00)
 164 #endif
 165
 166 /* statics */
 167 static UBool fgNaNInitialized = FALSE;
 168 static UBool fgInfInitialized = FALSE;
 169 static double gNan;
 170 static double gInf;
 171 static double * const fgNan = &gNan;
 172 static double * const fgInf = &gInf;
 173 #endif
 174
 175 /*---------------------------------------------------------------------------
 176   Platform utilities
 177   Our general strategy is to assume we're on a POSIX platform.  Platforms which
 178   are non-POSIX must declare themselves so.  The default POSIX implementation
 179   will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
 180   functions).
 181   ---------------------------------------------------------------------------*/
 182
 183 #if defined(_WIN32) || defined(XP_MAC) || defined(OS400) || defined(OS2)
 184 #   undef U_POSIX_LOCALE
 185 #else
 186 #   define U_POSIX_LOCALE    1
 187 #endif
 188
 189 /* Utilities to get the bits from a double */
 190 static char*
 191 u_topNBytesOfDouble(double* d, int n)
 192 {
 193 #if U_IS_BIG_ENDIAN
 194     return (char*)d;
 195 #else
 196     return (char*)(d + 1) - n;
 197 #endif
 198 }
 199
 200 static char*
 201 u_bottomNBytesOfDouble(double* d, int n)
 202 {
 203 #if U_IS_BIG_ENDIAN
 204     return (char*)(d + 1) - n;
 205 #else
 206     return (char*)d;
 207 #endif
 208 }
 209
 210 /*---------------------------------------------------------------------------
 211   Universal Implementations
 212   These are designed to work on all platforms.  Try these, and if they don't
 213   work on your platform, then special case your platform with new
 214   implementations.
 215   ---------------------------------------------------------------------------*/
 216
 217 /* Get UTC (GMT) time measured in seconds since 0:00 on 1/1/70.*/
 218 U_CAPI UDate U_EXPORT2
 219 uprv_getUTCtime()
 220 {
 221 #ifdef XP_MAC
 222     time_t t, t1, t2;
 223     struct tm tmrec;
 224
 225     uprv_memset( &tmrec, 0, sizeof(tmrec) );
 226     tmrec.tm_year = 70;
 227     tmrec.tm_mon = 0;
 228     tmrec.tm_mday = 1;
 229     t1 = mktime(&tmrec);    /* seconds of 1/1/1970*/
 230
 231     time(&t);
 232     uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
 233     t2 = mktime(&tmrec);    /* seconds of current GMT*/
 234     return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND;         /* GMT (or UTC) in seconds since 1970*/
 235 #else
 236     time_t epochtime;
 237     time(&epochtime);
 238     return (UDate)epochtime * U_MILLIS_PER_SECOND;
 239 #endif
 240 }
 241
 242 /*-----------------------------------------------------------------------------
 243   IEEE 754
 244   These methods detect and return NaN and infinity values for doubles
 245   conforming to IEEE 754.  Platforms which support this standard include X86,
 246   Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
 247   If this doesn't work on your platform, you have non-IEEE floating-point, and
 248   will need to code your own versions.  A naive implementation is to return 0.0
 249   for getNaN and getInfinity, and false for isNaN and isInfinite.
 250   ---------------------------------------------------------------------------*/
 251
 252 U_CAPI UBool U_EXPORT2
 253 uprv_isNaN(double number)
 254 {
 255 #if IEEE_754
 256 #if USE_64BIT_DOUBLE_OPTIMIZATION
 257     /* gcc 3.2 has an optimization bug */
 258     /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
 259     return (UBool)(((*((int64_t *)&number)) & U_INT64_MAX) > gInf64);
 260
 261 #else
 262     /* This should work in theory, but it doesn't, so we resort to the more*/
 263     /* complicated method below.*/
 264     /*  return number != number;*/
 265
 266     /* You can't return number == getNaN() because, by definition, NaN != x for*/
 267     /* all x, including NaN (that is, NaN != NaN).  So instead, we compare*/
 268     /* against the known bit pattern.  We must be careful of endianism here.*/
 269     /* The pattern we are looking for id:*/
 270
 271     /*   7FFy yyyy yyyy yyyy  (some y non-zero)*/
 272
 273     /* There are two different kinds of NaN, but we ignore the distinction*/
 274     /* here.  Note that the y value must be non-zero; if it is zero, then we*/
 275     /* have infinity.*/
 276
 277     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
 278                               sizeof(uint32_t));
 279     uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
 280                              sizeof(uint32_t));
 281
 282     return (UBool)(((highBits & 0x7FF00000L) == 0x7FF00000L) &&
 283       (((highBits & 0x000FFFFFL) != 0) || (lowBits != 0)));
 284 #endif
 285
 286 #elif defined(OS390)
 287     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
 288                         sizeof(uint32_t));
 289     uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
 290                         sizeof(uint32_t));
 291
 292     return ((highBits & 0x7F080000L) == 0x7F080000L) &&
 293       (lowBits == 0x00000000L);
 294
 295 #else
 296     /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
 297     /* you'll need to replace this default implementation with what's correct*/
 298     /* for your platform.*/
 299     return number != number;
 300 #endif
 301 }
 302
 303 U_CAPI UBool U_EXPORT2
 304 uprv_isInfinite(double number)
 305 {
 306 #if IEEE_754
 307 #if USE_64BIT_DOUBLE_OPTIMIZATION
 308     /* gcc 3.2 has an optimization bug */
 309     return (UBool)(((*((int64_t *)&number)) & U_INT64_MAX) == gInf64);
 310 #else
 311
 312     /* We know the top bit is the sign bit, so we mask that off in a copy of */
 313     /* the number and compare against infinity. [LIU]*/
 314     /* The following approach doesn't work for some reason, so we go ahead and */
 315     /* scrutinize the pattern itself. */
 316     /*  double a = number; */
 317     /*  *(int8_t*)u_topNBytesOfDouble(&a, 1) &= 0x7F;*/
 318     /*  return a == uprv_getInfinity();*/
 319     /* Instead, We want to see either:*/
 320
 321     /*   7FF0 0000 0000 0000*/
 322     /*   FFF0 0000 0000 0000*/
 323
 324     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
 325                         sizeof(uint32_t));
 326     uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
 327                         sizeof(uint32_t));
 328
 329     return (UBool)(((highBits  & ~SIGN) == 0x7FF00000U) &&
 330       (lowBits == 0x00000000U));
 331 #endif
 332
 333 #elif defined(OS390)
 334     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
 335                         sizeof(uint32_t));
 336     uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
 337                         sizeof(uint32_t));
 338
 339     return ((highBits  & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
 340
 341 #else
 342     /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
 343     /* value, you'll need to replace this default implementation with what's*/
 344     /* correct for your platform.*/
 345     return number == (2.0 * number);
 346 #endif
 347 }
 348
 349 U_CAPI UBool U_EXPORT2
 350 uprv_isPositiveInfinity(double number)
 351 {
 352 #if IEEE_754 || defined(OS390)
 353     return (UBool)(number > 0 && uprv_isInfinite(number));
 354 #else
 355     return uprv_isInfinite(number);
 356 #endif
 357 }
 358
 359 U_CAPI UBool U_EXPORT2
 360 uprv_isNegativeInfinity(double number)
 361 {
 362 #if IEEE_754 || defined(OS390)
 363     return (UBool)(number < 0 && uprv_isInfinite(number));
 364
 365 #else
 366     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
 367                         sizeof(uint32_t));
 368     return((highBits & SIGN) && uprv_isInfinite(number));
 369
 370 #endif
 371 }
 372
 373 U_CAPI double U_EXPORT2
 374 uprv_getNaN()
 375 {
 376 #if IEEE_754 || defined(OS390)
 377 #if !USE_64BIT_DOUBLE_OPTIMIZATION
 378     if (!fgNaNInitialized) {
 379         /* This variable is always initialized with the same value,
 380         so a mutex isn't needed. */
 381         int i;
 382         int8_t* p = (int8_t*)fgNan;
 383         for(i = 0; i < sizeof(double); ++i)
 384             *p++ = 0;
 385         *(int16_t*)u_topNBytesOfDouble(fgNan, sizeof(NAN_TOP)) = NAN_TOP;
 386         fgNaNInitialized = TRUE;
 387     }
 388 #endif
 389     return *fgNan;
 390 #else
 391     /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
 392     /* you'll need to replace this default implementation with what's correct*/
 393     /* for your platform.*/
 394     return 0.0;
 395 #endif
 396 }
 397
 398 U_CAPI double U_EXPORT2
 399 uprv_getInfinity()
 400 {
 401 #if IEEE_754 || defined(OS390)
 402 #if !USE_64BIT_DOUBLE_OPTIMIZATION
 403     if (!fgInfInitialized)
 404     {
 405         /* This variable is always initialized with the same value,
 406         so a mutex isn't needed. */
 407         int i;
 408         int8_t* p = (int8_t*)fgInf;
 409         for(i = 0; i < sizeof(double); ++i)
 410             *p++ = 0;
 411         *(int16_t*)u_topNBytesOfDouble(fgInf, sizeof(INF_TOP)) = INF_TOP;
 412         fgInfInitialized = TRUE;
 413     }
 414 #endif
 415     return *fgInf;
 416 #else
 417     /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
 418     /* value, you'll need to replace this default implementation with what's*/
 419     /* correct for your platform.*/
 420     return 0.0;
 421 #endif
 422 }
 423
 424 U_CAPI double U_EXPORT2
 425 uprv_floor(double x)
 426 {
 427     return floor(x);
 428 }
 429
 430 U_CAPI double U_EXPORT2
 431 uprv_ceil(double x)
 432 {
 433     return ceil(x);
 434 }
 435
 436 U_CAPI double U_EXPORT2
 437 uprv_round(double x)
 438 {
 439     return uprv_floor(x + 0.5);
 440 }
 441
 442 U_CAPI double U_EXPORT2
 443 uprv_fabs(double x)
 444 {
 445     return fabs(x);
 446 }
 447
 448 U_CAPI double U_EXPORT2
 449 uprv_modf(double x, double* y)
 450 {
 451     return modf(x, y);
 452 }
 453
 454 U_CAPI double U_EXPORT2
 455 uprv_fmod(double x, double y)
 456 {
 457     return fmod(x, y);
 458 }
 459
 460 U_CAPI double U_EXPORT2
 461 uprv_pow(double x, double y)
 462 {
 463     /* This is declared as "double pow(double x, double y)" */
 464     return pow(x, y);
 465 }
 466
 467 U_CAPI double U_EXPORT2
 468 uprv_pow10(int32_t x)
 469 {
 470     return pow(10.0, (double)x);
 471 }
 472
 473 U_CAPI double U_EXPORT2
 474 uprv_fmax(double x, double y)
 475 {
 476 #if IEEE_754
 477     int32_t lowBits;
 478
 479     /* first handle NaN*/
 480     if(uprv_isNaN(x) || uprv_isNaN(y))
 481         return uprv_getNaN();
 482
 483     /* check for -0 and 0*/
 484     lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&x, sizeof(uint32_t));
 485     if(x == 0.0 && y == 0.0 && (lowBits & SIGN))
 486         return y;
 487
 488 #endif
 489
 490     /* this should work for all flt point w/o NaN and Infpecial cases */
 491     return (x > y ? x : y);
 492 }
 493
 494 U_CAPI int32_t U_EXPORT2
 495 uprv_max(int32_t x, int32_t y)
 496 {
 497     return (x > y ? x : y);
 498 }
 499
 500 U_CAPI double U_EXPORT2
 501 uprv_fmin(double x, double y)
 502 {
 503 #if IEEE_754
 504     int32_t lowBits;
 505
 506     /* first handle NaN*/
 507     if(uprv_isNaN(x) || uprv_isNaN(y))
 508         return uprv_getNaN();
 509
 510     /* check for -0 and 0*/
 511     lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&y, sizeof(uint32_t));
 512     if(x == 0.0 && y == 0.0 && (lowBits & SIGN))
 513         return y;
 514
 515 #endif
 516
 517     /* this should work for all flt point w/o NaN and Inf special cases */
 518     return (x > y ? y : x);
 519 }
 520
 521 U_CAPI int32_t U_EXPORT2
 522 uprv_min(int32_t x, int32_t y)
 523 {
 524     return (x > y ? y : x);
 525 }
 526
 527 /**
 528  * Truncates the given double.
 529  * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
 530  * This is different than calling floor() or ceil():
 531  * floor(3.3) = 3, floor(-3.3) = -4
 532  * ceil(3.3) = 4, ceil(-3.3) = -3
 533  */
 534 U_CAPI double U_EXPORT2
 535 uprv_trunc(double d)
 536 {
 537 #if IEEE_754
 538     int32_t lowBits;
 539
 540     /* handle error cases*/
 541     if(uprv_isNaN(d))
 542         return uprv_getNaN();
 543     if(uprv_isInfinite(d))
 544         return uprv_getInfinity();
 545
 546     lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&d, sizeof(uint32_t));
 547     if( (d == 0.0 && (lowBits & SIGN)) || d < 0)
 548         return ceil(d);
 549     else
 550         return floor(d);
 551
 552 #else
 553     return d >= 0 ? floor(d) : ceil(d);
 554
 555 #endif
 556 }
 557
 558 /**
 559  * Return the largest positive number that can be represented by an integer
 560  * type of arbitrary bit length.
 561  */
 562 U_CAPI double U_EXPORT2
 563 uprv_maxMantissa(void)
 564 {
 565     return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
 566 }
 567
 568 /**
 569  * Return the floor of the log base 10 of a given double.
 570  * This method compensates for inaccuracies which arise naturally when
 571  * computing logs, and always give the correct value.  The parameter
 572  * must be positive and finite.
 573  * (Thanks to Alan Liu for supplying this function.)
 574  */
 575 U_CAPI int16_t U_EXPORT2
 576 uprv_log10(double d)
 577 {
 578 #ifdef OS400
 579     /* We don't use the normal implementation because you can't underflow */
 580     /* a double otherwise an underflow exception occurs */
 581     return log10(d);
 582 #else
 583     /* The reason this routine is needed is that simply taking the*/
 584     /* log and dividing by log10 yields a result which may be off*/
 585     /* by 1 due to rounding errors.  For example, the naive log10*/
 586     /* of 1.0e300 taken this way is 299, rather than 300.*/
 587     double alog10 = log(d) / log(10.0);
 588     int16_t ailog10 = (int16_t) floor(alog10);
 589
 590     /* Positive logs could be too small, e.g. 0.99 instead of 1.0*/
 591     if (alog10 > 0 && d >= pow(10.0, (double)(ailog10 + 1)))
 592         ++ailog10;
 593
 594     /* Negative logs could be too big, e.g. -0.99 instead of -1.0*/
 595     else if (alog10 < 0 && d < pow(10.0, (double)(ailog10)))
 596         --ailog10;
 597
 598     return ailog10;
 599 #endif
 600 }
 601
 602 U_CAPI double U_EXPORT2
 603 uprv_log(double d)
 604 {
 605     return log(d);
 606 }
 607
 608 #if 0
 609 /* This isn't used. If it's readded, readd putiltst.c tests */
 610 U_CAPI int32_t U_EXPORT2
 611 uprv_digitsAfterDecimal(double x)
 612 {
 613     char buffer[20];
 614     int32_t numDigits, bytesWritten;
 615     char *p = buffer;
 616     int32_t ptPos, exponent;
 617
 618     /* cheat and use the string-format routine to get a string representation*/
 619     /* (it handles mathematical inaccuracy better than we can), then find out */
 620     /* many characters are to the right of the decimal point */
 621     bytesWritten = sprintf(buffer, "%+.9g", x);
 622     while (isdigit(*(++p))) {
 623     }
 624
 625     ptPos = (int32_t)(p - buffer);
 626     numDigits = (int32_t)(bytesWritten - ptPos - 1);
 627
 628     /* if the number's string representation is in scientific notation, find */
 629     /* the exponent and take it into account*/
 630     exponent = 0;
 631     p = uprv_strchr(buffer, 'e');
 632     if (p != 0) {
 633         int16_t expPos = (int16_t)(p - buffer);
 634         numDigits -= bytesWritten - expPos;
 635         exponent = (int32_t)(atol(p + 1));
 636     }
 637
 638     /* the string representation may still have spurious decimal digits in it, */
 639     /* so we cut off at the ninth digit to the right of the decimal, and have */
 640     /* to search backward from there to the first non-zero digit*/
 641     if (numDigits > 9) {
 642         numDigits = 9;
 643         while (numDigits > 0 && buffer[ptPos + numDigits] == '0')
 644             --numDigits;
 645     }
 646     numDigits -= exponent;
 647     if (numDigits < 0) {
 648         return 0;
 649     }
 650     return numDigits;
 651 }
 652 #endif
 653
 654 /*---------------------------------------------------------------------------
 655   Platform-specific Implementations
 656   Try these, and if they don't work on your platform, then special case your
 657   platform with new implementations.
 658   ---------------------------------------------------------------------------*/
 659
 660 /* Win32 time zone detection ------------------------------------------------ */
 661
 662 #ifdef WIN32
 663
 664 /*
 665   This code attempts to detect the Windows time zone, as set in the
 666   Windows Date and Time control panel.  It attempts to work on
 667   multiple flavors of Windows (9x, Me, NT, 2000, XP) and on localized
 668   installs.  It works by directly interrogating the registry and
 669   comparing the data there with the data returned by the
 670   GetTimeZoneInformation API, along with some other strategies.  The
 671   registry contains time zone data under one of two keys (depending on
 672   the flavor of Windows):
 673
 674     HKLM\SOFTWARE\Microsoft\Windows\CurrentVersion\Time Zones\
 675     HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones\
 676
 677   Under this key are several subkeys, one for each time zone.  These
 678   subkeys are named "Pacific" on Win9x/Me and "Pacific Standard Time"
 679   on WinNT/2k/XP.  There are some other wrinkles; see the code for
 680   details.  The subkey name is NOT LOCALIZED, allowing us to support
 681   localized installs.
 682
 683   Under the subkey are data values.  We care about:
 684
 685     Std   Standard time display name, localized
 686     TZI   Binary block of data
 687
 688   The TZI data is of particular interest.  It contains the offset, two
 689   more offsets for standard and daylight time, and the start and end
 690   rules.  This is the same data returned by the GetTimeZoneInformation
 691   API.  The API may modify the data on the way out, so we have to be
 692   careful, but essentially we do a binary comparison against the TZI
 693   blocks of various registry keys.  When we find a match, we know what
 694   time zone Windows is set to.  Since the registry key is not
 695   localized, we can then translate the key through a simple table
 696   lookup into the corresponding ICU time zone.
 697
 698   This strategy doesn't always work because there are zones which
 699   share an offset and rules, so more than one TZI block will match.
 700   For example, both Tokyo and Seoul are at GMT+9 with no DST rules;
 701   their TZI blocks are identical.  For these cases, we fall back to a
 702   name lookup.  We attempt to match the display name as stored in the
 703   registry for the current zone to the display name stored in the
 704   registry for various Windows zones.  By comparing the registry data
 705   directly we avoid conversion complications.
 706
 707   Author: Alan Liu
 708   Since: ICU 2.6
 709   Based on original code by Carl Brown <cbrown@xnetinc.com>
 710 */
 711
 712 /**
 713  * Layout of the binary registry data under the "TZI" key.
 714  */
 715 typedef struct {
 716    LONG       Bias;
 717    LONG       StandardBias;
 718    LONG       DaylightBias; /* Tweaked by GetTimeZoneInformation */
 719    SYSTEMTIME StandardDate;
 720    SYSTEMTIME DaylightDate;
 721 } TZI;
 722
 723 typedef struct {
 724     const char* icuid;
 725     const char* winid;
 726 } WindowsICUMap;
 727
 728 /**
 729  * Mapping between Windows zone IDs and ICU zone IDs.  This list has
 730  * been mechanically checked; all zone offsets match (most important)
 731  * and city names match the display city names (where possible).  The
 732  * presence or absence of DST differs in some cases, but this is
 733  * acceptable as long as the zone is semantically the same (which has
 734  * been manually checked).
 735  *
 736  * Windows 9x/Me zone IDs are listed as "Pacific" rather than "Pacific
 737  * Standard Time", which is seen in NT/2k/XP.  This is fixed-up at
 738  * runtime as needed.  The one exception is "Mexico Standard Time 2",
 739  * which is not present on Windows 9x/Me.
 740  *
 741  * Zones that are not unique under Offset+Rules should be grouped
 742  * together for efficiency (see code below).  In addition, rules MUST
 743  * be grouped so that all zones of a single offset are together.
 744  *
 745  * Comments list S(tandard) or D(aylight), as declared by Windows,
 746  * followed by the display name (data from Windows XP).
 747  *
 748  * NOTE: Etc/GMT+12 is CORRECT for offset GMT-12:00.  Consult
 749  * documentation elsewhere for an explanation.
 750  */
 751 static const WindowsICUMap ZONE_MAP[] = {
 752     "Etc/GMT+12",           "Dateline", /* S (GMT-12:00) International Date Line West */
 753
 754     "Pacific/Apia",         "Samoa", /* S (GMT-11:00) Midway Island, Samoa */
 755
 756     "Pacific/Honolulu",     "Hawaiian", /* S (GMT-10:00) Hawaii */
 757
 758     "America/Anchorage",    "Alaskan", /* D (GMT-09:00) Alaska */
 759
 760     "America/Los_Angeles",  "Pacific", /* D (GMT-08:00) Pacific Time (US & Canada); Tijuana */
 761
 762     "America/Phoenix",      "US Mountain", /* S (GMT-07:00) Arizona */
 763     "America/Denver",       "Mountain", /* D (GMT-07:00) Mountain Time (US & Canada) */
 764     "America/Chihuahua",    "Mexico Standard Time 2", /* D (GMT-07:00) Chihuahua, La Paz, Mazatlan */
 765
 766     "America/Managua",      "Central America", /* S (GMT-06:00) Central America */
 767     "America/Regina",       "Canada Central", /* S (GMT-06:00) Saskatchewan */
 768     "America/Mexico_City",  "Mexico", /* D (GMT-06:00) Guadalajara, Mexico City, Monterrey */
 769     "America/Chicago",      "Central", /* D (GMT-06:00) Central Time (US & Canada) */
 770
 771     "America/Indianapolis", "US Eastern", /* S (GMT-05:00) Indiana (East) */
 772     "America/Bogota",       "SA Pacific", /* S (GMT-05:00) Bogota, Lima, Quito */
 773     "America/New_York",     "Eastern", /* D (GMT-05:00) Eastern Time (US & Canada) */
 774
 775     "America/Caracas",      "SA Western", /* S (GMT-04:00) Caracas, La Paz */
 776     "America/Santiago",     "Pacific SA", /* D (GMT-04:00) Santiago */
 777     "America/Halifax",      "Atlantic", /* D (GMT-04:00) Atlantic Time (Canada) */
 778
 779     "America/St_Johns",     "Newfoundland", /* D (GMT-03:30) Newfoundland */
 780
 781     "America/Buenos_Aires", "SA Eastern", /* S (GMT-03:00) Buenos Aires, Georgetown */
 782     "America/Godthab",      "Greenland", /* D (GMT-03:00) Greenland */
 783     "America/Sao_Paulo",    "E. South America", /* D (GMT-03:00) Brasilia */
 784
 785     "America/Noronha",      "Mid-Atlantic", /* D (GMT-02:00) Mid-Atlantic */
 786
 787     "Atlantic/Cape_Verde",  "Cape Verde", /* S (GMT-01:00) Cape Verde Is. */
 788     "Atlantic/Azores",      "Azores", /* D (GMT-01:00) Azores */
 789
 790     "Africa/Casablanca",    "Greenwich", /* S (GMT) Casablanca, Monrovia */
 791     "Europe/London",        "GMT", /* D (GMT) Greenwich Mean Time : Dublin, Edinburgh, Lisbon, London */
 792
 793     "Africa/Lagos",         "W. Central Africa", /* S (GMT+01:00) West Central Africa */
 794     "Europe/Berlin",        "W. Europe", /* D (GMT+01:00) Amsterdam, Berlin, Bern, Rome, Stockholm, Vienna */
 795     "Europe/Paris",         "Romance", /* D (GMT+01:00) Brussels, Copenhagen, Madrid, Paris */
 796     "Europe/Sarajevo",      "Central European", /* D (GMT+01:00) Sarajevo, Skopje, Warsaw, Zagreb */
 797     "Europe/Belgrade",      "Central Europe", /* D (GMT+01:00) Belgrade, Bratislava, Budapest, Ljubljana, Prague */
 798
 799     "Africa/Johannesburg",  "South Africa", /* S (GMT+02:00) Harare, Pretoria */
 800     "Asia/Jerusalem",       "Israel", /* S (GMT+02:00) Jerusalem */
 801     "Europe/Istanbul",      "GTB", /* D (GMT+02:00) Athens, Istanbul, Minsk */
 802     "Europe/Helsinki",      "FLE", /* D (GMT+02:00) Helsinki, Kyiv, Riga, Sofia, Tallinn, Vilnius */
 803     "Africa/Cairo",         "Egypt", /* D (GMT+02:00) Cairo */
 804     "Europe/Bucharest",     "E. Europe", /* D (GMT+02:00) Bucharest */
 805
 806     "Africa/Nairobi",       "E. Africa", /* S (GMT+03:00) Nairobi */
 807     "Asia/Riyadh",          "Arab", /* S (GMT+03:00) Kuwait, Riyadh */
 808     "Europe/Moscow",        "Russian", /* D (GMT+03:00) Moscow, St. Petersburg, Volgograd */
 809     "Asia/Baghdad",         "Arabic", /* D (GMT+03:00) Baghdad */
 810
 811     "Asia/Tehran",          "Iran", /* D (GMT+03:30) Tehran */
 812
 813     "Asia/Muscat",          "Arabian", /* S (GMT+04:00) Abu Dhabi, Muscat */
 814     "Asia/Tbilisi",         "Caucasus", /* D (GMT+04:00) Baku, Tbilisi, Yerevan */
 815
 816     "Asia/Kabul",           "Afghanistan", /* S (GMT+04:30) Kabul */
 817
 818     "Asia/Karachi",         "West Asia", /* S (GMT+05:00) Islamabad, Karachi, Tashkent */
 819     "Asia/Yekaterinburg",   "Ekaterinburg", /* D (GMT+05:00) Ekaterinburg */
 820
 821     "Asia/Calcutta",        "India", /* S (GMT+05:30) Chennai, Kolkata, Mumbai, New Delhi */
 822
 823     "Asia/Katmandu",        "Nepal", /* S (GMT+05:45) Kathmandu */
 824
 825     "Asia/Colombo",         "Sri Lanka", /* S (GMT+06:00) Sri Jayawardenepura */
 826     "Asia/Dhaka",           "Central Asia", /* S (GMT+06:00) Astana, Dhaka */
 827     "Asia/Novosibirsk",     "N. Central Asia", /* D (GMT+06:00) Almaty, Novosibirsk */
 828
 829     "Asia/Rangoon",         "Myanmar", /* S (GMT+06:30) Rangoon */
 830
 831     "Asia/Bangkok",         "SE Asia", /* S (GMT+07:00) Bangkok, Hanoi, Jakarta */
 832     "Asia/Krasnoyarsk",     "North Asia", /* D (GMT+07:00) Krasnoyarsk */
 833
 834     "Australia/Perth",      "W. Australia", /* S (GMT+08:00) Perth */
 835     "Asia/Taipei",          "Taipei", /* S (GMT+08:00) Taipei */
 836     "Asia/Singapore",       "Singapore", /* S (GMT+08:00) Kuala Lumpur, Singapore */
 837     "Asia/Hong_Kong",       "China", /* S (GMT+08:00) Beijing, Chongqing, Hong Kong, Urumqi */
 838     "Asia/Irkutsk",         "North Asia East", /* D (GMT+08:00) Irkutsk, Ulaan Bataar */
 839
 840     "Asia/Tokyo",           "Tokyo", /* S (GMT+09:00) Osaka, Sapporo, Tokyo */
 841     "Asia/Seoul",           "Korea", /* S (GMT+09:00) Seoul */
 842     "Asia/Yakutsk",         "Yakutsk", /* D (GMT+09:00) Yakutsk */
 843
 844     "Australia/Darwin",     "AUS Central", /* S (GMT+09:30) Darwin */
 845     "Australia/Adelaide",   "Cen. Australia", /* D (GMT+09:30) Adelaide */
 846
 847     "Pacific/Guam",         "West Pacific", /* S (GMT+10:00) Guam, Port Moresby */
 848     "Australia/Brisbane",   "E. Australia", /* S (GMT+10:00) Brisbane */
 849     "Asia/Vladivostok",     "Vladivostok", /* D (GMT+10:00) Vladivostok */
 850     "Australia/Hobart",     "Tasmania", /* D (GMT+10:00) Hobart */
 851     "Australia/Sydney",     "AUS Eastern", /* D (GMT+10:00) Canberra, Melbourne, Sydney */
 852
 853     "Asia/Magadan",         "Central Pacific", /* S (GMT+11:00) Magadan, Solomon Is., New Caledonia */
 854
 855     "Pacific/Fiji",         "Fiji", /* S (GMT+12:00) Fiji, Kamchatka, Marshall Is. */
 856     "Pacific/Auckland",     "New Zealand", /* D (GMT+12:00) Auckland, Wellington */
 857
 858     "Pacific/Tongatapu",    "Tonga", /* S (GMT+13:00) Nuku'alofa */
 859     NULL,                   NULL
 860 };
 861
 862 typedef struct {
 863     const char* winid;
 864     const char* altwinid;
 865 } WindowsZoneRemap;
 866
 867 /**
 868  * If a lookup fails, we attempt to remap certain Windows ids to
 869  * alternate Windows ids.  If the alternate listed here begins with
 870  * '-', we use it as is (without the '-').  If it begins with '+', we
 871  * append a " Standard Time" if appropriate.
 872  */
 873 static const WindowsZoneRemap ZONE_REMAP[] = {
 874     "Central European",     "-Warsaw",
 875     "Central Europe",       "-Prague Bratislava",
 876     "China",                "-Beijing",
 877
 878     "Greenwich",            "+GMT",
 879     "GTB",                  "+GFT",
 880     "Arab",                 "+Saudi Arabia",
 881     "SE Asia",              "+Bangkok",
 882     "AUS Eastern",          "+Sydney",
 883     NULL,                   NULL,
 884 };
 885
 886 /**
 887  * Various registry keys and key fragments.
 888  */
 889 static const char CURRENT_ZONE_REGKEY[] = "SYSTEM\\CurrentControlSet\\Control\\TimeZoneInformation\\";
 890 static const char STANDARD_NAME_REGKEY[] = "StandardName";
 891 static const char STANDARD_TIME_REGKEY[] = " Standard Time";
 892 static const char TZI_REGKEY[] = "TZI";
 893 static const char STD_REGKEY[] = "Std";
 894
 895 /**
 896  * HKLM subkeys used to probe for the flavor of Windows.  Note that we
 897  * specifically check for the "GMT" zone subkey; this is present on
 898  * NT, but on XP has become "GMT Standard Time".  We need to
 899  * discriminate between these cases.
 900  */
 901 static const char* const WIN_TYPE_PROBE_REGKEY[] = {
 902     /* WIN_9X_ME_TYPE */
 903     "SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Time Zones",
 904
 905     /* WIN_NT_TYPE */
 906     "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Time Zones\\GMT"
 907
 908     /* otherwise: WIN_2K_XP_TYPE */
 909 };
 910
 911 /**
 912  * The time zone root subkeys (under HKLM) for different flavors of
 913  * Windows.
 914  */
 915 static const char* const TZ_REGKEY[] = {
 916     /* WIN_9X_ME_TYPE */
 917     "SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Time Zones\\",
 918
 919     /* WIN_NT_TYPE | WIN_2K_XP_TYPE */
 920     "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Time Zones\\"
 921 };
 922
 923 /**
 924  * Flavor of Windows, from our perspective.  Not a real OS version,
 925  * but rather the flavor of the layout of the time zone information in
 926  * the registry.
 927  */
 928 enum {
 929     WIN_9X_ME_TYPE = 0,
 930     WIN_NT_TYPE = 1,
 931     WIN_2K_XP_TYPE = 2
 932 };
 933
 934 /**
 935  * Auxiliary Windows time zone function.  Attempts to open the given
 936  * Windows time zone ID as a registry key.  Returns ERROR_SUCCESS if
 937  * successful.  Caller must close the registry key.  Handles
 938  * variations in the resource layout in different flavors of Windows.
 939  *
 940  * @param hkey output parameter to receive opened registry key
 941  * @param winid Windows zone ID, e.g., "Pacific", without the
 942  * " Standard Time" suffix (if any).  Special case "Mexico Standard Time 2"
 943  * allowed.
 944  * @param winType Windows flavor (WIN_9X_ME_TYPE, etc.)
 945  * @return ERROR_SUCCESS upon success
 946  */
 947 static LONG openTZRegKey(HKEY *hkey, const char* winid, int winType) {
 948     LONG result;
 949     char subKeyName[96];
 950     char* name;
 951     int i;
 952
 953     uprv_strcpy(subKeyName, TZ_REGKEY[(winType == WIN_9X_ME_TYPE) ? 0 : 1]);
 954     name = &subKeyName[strlen(subKeyName)];
 955     uprv_strcat(subKeyName, winid);
 956     if (winType != WIN_9X_ME_TYPE) {
 957         /* Don't modify "Mexico Standard Time 2", which does not occur
 958            on WIN_9X_ME_TYPE.  Also, if the type is WIN_NT_TYPE, then
 959            in practice this means the GMT key is not followed by
 960            " Standard Time", so don't append in that case. */
 961         int isMexico2 = (winid[uprv_strlen(winid)- 1] == '2');
 962         if (!isMexico2 &&
 963             !(winType == WIN_NT_TYPE && uprv_strcmp(winid, "GMT") == 0)) {
 964             uprv_strcat(subKeyName, STANDARD_TIME_REGKEY);
 965         }
 966     }
 967     result = RegOpenKeyEx(HKEY_LOCAL_MACHINE,
 968                           subKeyName,
 969                           0,
 970                           KEY_QUERY_VALUE,
 971                           hkey);
 972
 973     if (result != ERROR_SUCCESS) {
 974         /* If the primary lookup fails, try to remap the Windows zone
 975            ID, according to the remapping table. */
 976         for (i=0; ZONE_REMAP[i].winid; ++i) {
 977             if (uprv_strcmp(winid, ZONE_REMAP[i].winid) == 0) {
 978                 uprv_strcpy(name, ZONE_REMAP[i].altwinid + 1);
 979                 if (*(ZONE_REMAP[i].altwinid) == '+' &&
 980                     winType != WIN_9X_ME_TYPE) {
 981                     uprv_strcat(subKeyName, STANDARD_TIME_REGKEY);
 982                 }
 983                 result = RegOpenKeyEx(HKEY_LOCAL_MACHINE,
 984                                       subKeyName,
 985                                       0,
 986                                       KEY_QUERY_VALUE,
 987                                       hkey);
 988                 break;
 989             }
 990         }
 991     }
 992
 993     return result;
 994 }
 995
 996 /**
 997  * Main Windows time zone detection function.  Returns the Windows
 998  * time zone, translated to an ICU time zone, or NULL upon failure.
 999  */
1000 static const char* detectWindowsTimeZone() {
1001     int winType;
1002     LONG result;
1003     HKEY hkey;
1004     TZI tziKey;
1005     TZI tziReg;
1006     DWORD cbData = sizeof(TZI);
1007     TIME_ZONE_INFORMATION apiTZI;
1008     char stdName[32];
1009     DWORD stdNameSize;
1010     char stdRegName[64];
1011     DWORD stdRegNameSize;
1012     int firstMatch, lastMatch;
1013     int j;
1014
1015     /* Detect the version of windows by trying to open a sequence of
1016        probe keys.  We don't use the OS version API because what we
1017        really want to know is how the registry is laid out.
1018        Specifically, is it 9x/Me or not, and is it "GMT" or "GMT
1019        Standard Time". */
1020     for (winType=0; winType<2; ++winType) {
1021         result = RegOpenKeyEx(HKEY_LOCAL_MACHINE,
1022                               WIN_TYPE_PROBE_REGKEY[winType],
1023                               0,
1024                               KEY_QUERY_VALUE,
1025                               &hkey);
1026         RegCloseKey(hkey);
1027         if (result == ERROR_SUCCESS) {
1028             break;
1029         }
1030     }
1031
1032     /* Obtain TIME_ZONE_INFORMATION from the API, and then convert it
1033        to TZI.  We could also interrogate the registry directly; we do
1034        this below if needed. */
1035     uprv_memset(&apiTZI, 0, sizeof(apiTZI));
1036     GetTimeZoneInformation(&apiTZI);
1037     tziKey.Bias = apiTZI.Bias;
1038     uprv_memcpy((char *)&tziKey.StandardDate, (char*)&apiTZI.StandardDate,
1039            sizeof(apiTZI.StandardDate));
1040     uprv_memcpy((char *)&tziKey.DaylightDate, (char*)&apiTZI.DaylightDate,
1041            sizeof(apiTZI.DaylightDate));
1042
1043     /* For each zone that can be identified by Offset+Rules, see if we
1044        have a match.  Continue scanning after finding a match,
1045        recording the index of the first and the last match.  We have
1046        to do this because some zones are not unique under
1047        Offset+Rules. */
1048     firstMatch = lastMatch = -1;
1049     for (j=0; ZONE_MAP[j].icuid; j++) {
1050         result = openTZRegKey(&hkey, ZONE_MAP[j].winid, winType);
1051         if (result == ERROR_SUCCESS) {
1052             result = RegQueryValueEx(hkey,
1053                                      TZI_REGKEY,
1054                                      NULL,
1055                                      NULL,
1056                                      (LPBYTE)&tziReg,
1057                                      &cbData);
1058         }
1059         RegCloseKey(hkey);
1060         if (result == ERROR_SUCCESS) {
1061             /* Assume that offsets are grouped together, and bail out
1062                when we've scanned everything with a matching
1063                offset. */
1064             if (firstMatch >= 0 && tziKey.Bias != tziReg.Bias) {
1065                 break;
1066             }
1067             /* Windows alters the DaylightBias in some situations.
1068                Using the bias and the rules suffices, so overwrite
1069                these unreliable fields. */
1070             tziKey.StandardBias = tziReg.StandardBias;
1071             tziKey.DaylightBias = tziReg.DaylightBias;
1072             if (uprv_memcmp((char *)&tziKey, (char*)&tziReg,
1073                        sizeof(tziKey)) == 0) {
1074                 if (firstMatch < 0) {
1075                     firstMatch = j;
1076                 }
1077                 lastMatch = j;
1078             }
1079         }
1080     }
1081
1082     /* This should never happen; if it does it means our table doesn't
1083        match Windows AT ALL, perhaps because this is post-XP? */
1084     if (firstMatch < 0) {
1085         return NULL;
1086     }
1087
1088     if (firstMatch != lastMatch) {
1089         /* Offset+Rules lookup yielded >= 2 matches.  Try to match the
1090            localized display name.  Get the name from the registry
1091            (not the API). This avoids conversion issues.  Use the
1092            standard name, since Windows modifies the daylight name to
1093            match the standard name if there is no DST. */
1094         result = RegOpenKeyEx(HKEY_LOCAL_MACHINE,
1095                               CURRENT_ZONE_REGKEY,
1096                               0,
1097                               KEY_QUERY_VALUE,
1098                               &hkey);
1099         if (result == ERROR_SUCCESS) {
1100             stdNameSize = sizeof(stdName);
1101             result = RegQueryValueEx(hkey,
1102                                      (LPTSTR)STANDARD_NAME_REGKEY,
1103                                      NULL,
1104                                      NULL,
1105                                      (LPBYTE)stdName,
1106                                      &stdNameSize);
1107             RegCloseKey(hkey);
1108
1109             /* Scan through the Windows time zone data in the registry
1110                again (just the range of zones with matching TZIs) and
1111                look for a standard display name match. */
1112             for (j=firstMatch; j<=lastMatch; j++) {
1113                 result = openTZRegKey(&hkey, ZONE_MAP[j].winid, winType);
1114                 if (result == ERROR_SUCCESS) {
1115                     stdRegNameSize = sizeof(stdRegName);
1116                     result = RegQueryValueEx(hkey,
1117                                              (LPTSTR)STD_REGKEY,
1118                                              NULL,
1119                                              NULL,
1120                                              (LPBYTE)stdRegName,
1121                                              &stdRegNameSize);
1122                 }
1123                 RegCloseKey(hkey);
1124                 if (result == ERROR_SUCCESS &&
1125                     stdRegNameSize == stdNameSize &&
1126                     uprv_memcmp(stdName, stdRegName, stdNameSize) == 0) {
1127                     firstMatch = j; /* record the match */
1128                     break;
1129                 }
1130             }
1131         } else {
1132             RegCloseKey(hkey); /* should never get here */
1133         }
1134     }
1135
1136     return ZONE_MAP[firstMatch].icuid;
1137 }
1138
1139 #endif /*WIN32*/
1140
1141 /* Generic time zone layer -------------------------------------------------- */
1142
1143 /* Time zone utilities */
1144 U_CAPI void U_EXPORT2
1145 uprv_tzset()
1146 {
1147 #ifdef U_TZSET
1148     U_TZSET();
1149 #else
1150     /* no initialization*/
1151 #endif
1152 }
1153
1154 U_CAPI int32_t U_EXPORT2
1155 uprv_timezone()
1156 {
1157 #ifdef U_TIMEZONE
1158     return U_TIMEZONE;
1159 #else
1160     time_t t, t1, t2;
1161     struct tm tmrec;
1162     UBool dst_checked;
1163     int32_t tdiff = 0;
1164
1165     time(&t);
1166     uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
1167     dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
1168     t1 = mktime(&tmrec);                 /* local time in seconds*/
1169     uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
1170     t2 = mktime(&tmrec);                 /* GMT (or UTC) in seconds*/
1171     tdiff = t2 - t1;
1172     /* imitate NT behaviour, which returns same timezone offset to GMT for
1173        winter and summer*/
1174     if (dst_checked)
1175         tdiff += 3600;
1176     return tdiff;
1177 #endif
1178 }
1179
1180 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
1181    some platforms need to have it declared here. */
1182
1183 #if defined(U_TZNAME) && (defined(U_IRIX) || defined(U_DARWIN) || defined(U_CYGWIN))
1184 /* RS6000 and others reject char **tzname.  */
1185 extern U_IMPORT char *U_TZNAME[];
1186 #endif
1187
1188 #if defined(U_DARWIN)   /* For Mac OS X */
1189 #define TZZONELINK      "/etc/localtime"
1190 #define TZZONEINFO      "/usr/share/zoneinfo/"
1191 static char *gTimeZoneBuffer = NULL; /* Heap allocated */
1192 #endif
1193
1194 U_CAPI const char* U_EXPORT2
1195 uprv_tzname(int n)
1196 {
1197 #ifdef WIN32
1198     char* id = (char*) detectWindowsTimeZone();
1199     if (id != NULL) {
1200         return id;
1201     }
1202 #endif
1203
1204 #if defined(U_DARWIN)
1205     int ret;
1206
1207     char *tzenv;
1208
1209     tzenv = getenv("TZFILE");
1210     if (tzenv != NULL) {
1211         return tzenv;
1212     }
1213
1214 #if 0
1215     /* TZ is often set to "PST8PDT" or similar, so we cannot use it. Alan */
1216     tzenv = getenv("TZ");
1217     if (tzenv != NULL) {
1218         return tzenv;
1219     }
1220 #endif
1221
1222     /* Caller must handle threading issues */
1223     if (gTimeZoneBuffer == NULL) {
1224         gTimeZoneBuffer = (char *) uprv_malloc(MAXPATHLEN + 2);
1225
1226         ret = readlink(TZZONELINK, gTimeZoneBuffer, MAXPATHLEN + 2);
1227         if (0 < ret) {
1228             gTimeZoneBuffer[ret] = '\0';
1229             if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, sizeof(TZZONEINFO) - 1) == 0) {
1230                 return (gTimeZoneBuffer += sizeof(TZZONEINFO) - 1);
1231             }
1232         }
1233
1234         uprv_free(gTimeZoneBuffer);
1235         gTimeZoneBuffer = NULL;
1236     }
1237 #endif
1238
1239 #ifdef U_TZNAME
1240     return U_TZNAME[n];
1241 #else
1242     return "";
1243 #endif
1244 }
1245
1246 /* Get and set the ICU data directory --------------------------------------- */
1247
1248 static char *gDataDirectory = NULL;
1249 #if U_POSIX_LOCALE
1250  static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
1251 #endif
1252
1253 static UBool U_CALLCONV putil_cleanup(void)
1254 {
1255     if (gDataDirectory) {
1256         uprv_free(gDataDirectory);
1257         gDataDirectory = NULL;
1258     }
1259 #if U_POSIX_LOCALE
1260     if (gCorrectedPOSIXLocale) {
1261         uprv_free(gCorrectedPOSIXLocale);
1262         gCorrectedPOSIXLocale = NULL;
1263     }
1264 #endif
1265     return TRUE;
1266 }
1267
1268 /*
1269  * Set the data directory.
1270  *    Make a copy of the passed string, and set the global data dir to point to it.
1271  *    TODO:  see bug #2849, regarding thread safety.
1272  */
1273 U_CAPI void U_EXPORT2
1274 u_setDataDirectory(const char *directory) {
1275     char *newDataDir;
1276 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1277     char *p;
1278 #endif
1279     int32_t length;
1280
1281     if(directory==NULL) {
1282         directory = "";
1283     }
1284     length=(int32_t)uprv_strlen(directory);
1285     newDataDir = (char *)uprv_malloc(length + 2);
1286     uprv_strcpy(newDataDir, directory);
1287
1288 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1289     while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
1290        *p = U_FILE_SEP_CHAR;
1291     }
1292 #endif
1293
1294     umtx_lock(NULL);
1295     if (gDataDirectory) {
1296         uprv_free(gDataDirectory);
1297     }
1298     gDataDirectory = newDataDir;
1299     ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1300     umtx_unlock(NULL);
1301 }
1302
1303 U_CAPI UBool U_EXPORT2
1304 uprv_pathIsAbsolute(const char *path)
1305 {
1306   if(!path || !*path) {
1307     return FALSE;
1308   }
1309
1310   if(*path == U_FILE_SEP_CHAR) {
1311     return TRUE;
1312   }
1313
1314 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1315   if(*path == U_FILE_ALT_SEP_CHAR) {
1316     return TRUE;
1317   }
1318 #endif
1319
1320 #if defined(WIN32)
1321   if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1322        ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1323       path[1] == ':' ) {
1324     return TRUE;
1325   }
1326 #endif
1327
1328   return FALSE;
1329 }
1330
1331 U_CAPI const char * U_EXPORT2
1332 u_getDataDirectory(void) {
1333     const char *path = NULL;
1334     char pathBuffer[1024];
1335     const char *dataDir;
1336
1337     /* if we have the directory, then return it immediately */
1338     umtx_lock(NULL);
1339     dataDir = gDataDirectory;
1340     umtx_unlock(NULL);
1341
1342     if(dataDir) {
1343         return dataDir;
1344     }
1345
1346     /* we need to look for it */
1347     pathBuffer[0] = 0;                     /* Shuts up compiler warnings about unreferenced */
1348                                            /*   variables when the code using it is ifdefed out */
1349 #   if !defined(XP_MAC)
1350     /* first try to get the environment variable */
1351     path=getenv("ICU_DATA");
1352 #   else    /* XP_MAC */
1353     {
1354         OSErr myErr;
1355         short vRef;
1356         long  dir,newDir;
1357         int16_t volNum;
1358         Str255 xpath;
1359         FSSpec spec;
1360         short  len;
1361         Handle full;
1362
1363         xpath[0]=0;
1364
1365         myErr = HGetVol(xpath, &volNum, &dir);
1366
1367         if(myErr == noErr) {
1368             myErr = FindFolder(volNum, kApplicationSupportFolderType, TRUE, &vRef, &dir);
1369             newDir=-1;
1370             if (myErr == noErr) {
1371                 myErr = DirCreate(volNum,
1372                     dir,
1373                     "\pICU",
1374                     &newDir);
1375                 if( (myErr == noErr) || (myErr == dupFNErr) ) {
1376                     spec.vRefNum = volNum;
1377                     spec.parID = dir;
1378                     uprv_memcpy(spec.name, "\pICU", 4);
1379
1380                     myErr = FSpGetFullPath(&spec, &len, &full);
1381                     if(full != NULL)
1382                     {
1383                         HLock(full);
1384                         uprv_memcpy(pathBuffer,  ((char*)(*full)), len);
1385                         pathBuffer[len] = 0;
1386                         path = pathBuffer;
1387                         DisposeHandle(full);
1388                     }
1389                 }
1390             }
1391         }
1392     }
1393 #       endif
1394
1395
1396 #       if defined WIN32 && defined ICU_ENABLE_DEPRECATED_WIN_REGISTRY
1397     /* next, try to read the path from the registry */
1398     if(path==NULL || *path==0) {
1399         HKEY key;
1400
1401         if(ERROR_SUCCESS==RegOpenKeyEx(HKEY_LOCAL_MACHINE, "SOFTWARE\\ICU\\Unicode\\Data", 0, KEY_QUERY_VALUE, &key)) {
1402             DWORD type=REG_EXPAND_SZ, size=sizeof(pathBuffer);
1403
1404             if(ERROR_SUCCESS==RegQueryValueEx(key, "Path", NULL, &type, (unsigned char *)pathBuffer, &size) && size>1) {
1405                 if(type==REG_EXPAND_SZ) {
1406                     /* replace environment variable references by their values */
1407                     char temporaryPath[1024];
1408
1409                     /* copy the path with variables to the temporary one */
1410                     uprv_memcpy(temporaryPath, pathBuffer, size);
1411
1412                     /* do the replacement and store it in the pathBuffer */
1413                     size=ExpandEnvironmentStrings(temporaryPath, pathBuffer, sizeof(pathBuffer));
1414                     if(size>0 && size<sizeof(pathBuffer)) {
1415                         path=pathBuffer;
1416                     }
1417                 } else if(type==REG_SZ) {
1418                     path=pathBuffer;
1419                 }
1420             }
1421             RegCloseKey(key);
1422         }
1423     }
1424 #       endif
1425
1426     /* ICU_DATA_DIR may be set as a compile option */
1427 #   ifdef ICU_DATA_DIR
1428     if(path==NULL || *path==0) {
1429         path=ICU_DATA_DIR;
1430     }
1431 #   endif
1432
1433     if(path==NULL) {
1434         /* It looks really bad, set it to something. */
1435         path = "";
1436     }
1437
1438     u_setDataDirectory(path);
1439     return gDataDirectory;
1440 }
1441
1442
1443
1444
1445
1446 /* Macintosh-specific locale information ------------------------------------ */
1447 #ifdef XP_MAC
1448
1449 typedef struct {
1450     int32_t script;
1451     int32_t region;
1452     int32_t lang;
1453     int32_t date_region;
1454     const char* posixID;
1455 } mac_lc_rec;
1456
1457 /* Todo: This will be updated with a newer version from www.unicode.org web
1458    page when it's available.*/
1459 #define MAC_LC_MAGIC_NUMBER -5
1460 #define MAC_LC_INIT_NUMBER -9
1461
1462 static const mac_lc_rec mac_lc_recs[] = {
1463     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US",
1464     /* United States*/
1465     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR",
1466     /* France*/
1467     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB",
1468     /* Great Britain*/
1469     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE",
1470     /* Germany*/
1471     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT",
1472     /* Italy*/
1473     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL",
1474     /* Metherlands*/
1475     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE",
1476     /* French for Belgium or Lxembourg*/
1477     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE",
1478     /* Sweden*/
1479     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK",
1480     /* Denmark*/
1481     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT",
1482     /* Portugal*/
1483     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA",
1484     /* French Canada*/
1485     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS",
1486     /* Israel*/
1487     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP",
1488     /* Japan*/
1489     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU",
1490     /* Australia*/
1491     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE",
1492     /* the Arabic world (?)*/
1493     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI",
1494     /* Finland*/
1495     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH",
1496     /* French for Switzerland*/
1497     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH",
1498     /* German for Switzerland*/
1499     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR",
1500     /* Greece*/
1501     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS",
1502     /* Iceland ===*/
1503     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
1504     /* Malta ===*/
1505     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
1506     /* Cyprus ===*/
1507     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR",
1508     /* Turkey ===*/
1509     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU",
1510     /* Croatian system for Yugoslavia*/
1511     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
1512     /* Hindi system for India*/
1513     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
1514     /* Pakistan*/
1515     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT",
1516     /* Lithuania*/
1517     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL",
1518     /* Poland*/
1519     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU",
1520     /* Hungary*/
1521     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE",
1522     /* Estonia*/
1523     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV",
1524     /* Latvia*/
1525     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
1526     /* Lapland  [Ask Rich for the data. HS]*/
1527     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
1528     /* Faeroe Islands*/
1529     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR",
1530     /* Iran*/
1531     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU",
1532     /* Russia*/
1533     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE",
1534     /* Ireland*/
1535     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR",
1536     /* Korea*/
1537     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN",
1538     /* People's Republic of China*/
1539     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW",
1540     /* Taiwan*/
1541     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH",
1542     /* Thailand*/
1543
1544     /* fallback is en_US*/
1545     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER,
1546     MAC_LC_MAGIC_NUMBER, "en_US"
1547 };
1548
1549 #endif
1550
1551 #if U_POSIX_LOCALE
1552 /* Return just the POSIX id, whatever happens to be in it */
1553 static const char *uprv_getPOSIXID(void)
1554 {
1555     static const char* posixID = NULL;
1556     if (posixID == 0) {
1557         posixID = getenv("LC_ALL");
1558         if (posixID == 0) {
1559             posixID = getenv("LANG");
1560             if (posixID == 0) {
1561                 /*
1562                 * On Solaris two different calls to setlocale can result in
1563                 * different values. Only get this value once.
1564                 */
1565                 posixID = setlocale(LC_ALL, NULL);
1566             }
1567         }
1568     }
1569
1570     if (posixID==0)
1571     {
1572         /* Nothing worked.  Give it a nice value. */
1573         posixID = "en_US";
1574     }
1575     else if ((uprv_strcmp("C", posixID) == 0)
1576         || (uprv_strchr(posixID, ' ') != NULL)
1577         || (uprv_strchr(posixID, '/') != NULL))
1578     {   /* HPUX returns 'C C C C C C C' */
1579         /* Solaris can return /en_US/C/C/C/C/C on the second try. */
1580         /* Maybe we got some garbage.  Give it a nice value. */
1581         posixID = "en_US_POSIX";
1582     }
1583     return posixID;
1584 }
1585 #endif
1586
1587 /* NOTE: The caller should handle thread safety */
1588 U_CAPI const char* U_EXPORT2
1589 uprv_getDefaultLocaleID()
1590 {
1591 #if U_POSIX_LOCALE
1592 /*
1593   Note that:  (a '!' means the ID is improper somehow)
1594      LC_ALL  ---->     default_loc          codepage
1595 --------------------------------------------------------
1596      ab.CD             ab                   CD
1597      ab@CD             ab__CD               -
1598      ab@CD.EF          ab__CD               EF
1599
1600      ab_CD.EF@GH       ab_CD_GH             EF
1601
1602 Some 'improper' ways to do the same as above:
1603   !  ab_CD@GH.EF       ab_CD_GH             EF
1604   !  ab_CD.EF@GH.IJ    ab_CD_GH             EF
1605   !  ab_CD@ZZ.EF@GH.IJ ab_CD_GH             EF
1606
1607      _CD@GH            _CD_GH               -
1608      _CD.EF@GH         _CD_GH               EF
1609
1610 The variant cannot have dots in it.
1611 The 'rightmost' variant (@xxx) wins.
1612 The leftmost codepage (.xxx) wins.
1613 */
1614     char *correctedPOSIXLocale = 0;
1615     const char* posixID = uprv_getPOSIXID();
1616     const char *p;
1617     const char *q;
1618     int32_t len;
1619
1620     /* Format: (no spaces)
1621     ll [ _CC ] [ . MM ] [ @ VV]
1622
1623       l = lang, C = ctry, M = charmap, V = variant
1624     */
1625
1626     if (gCorrectedPOSIXLocale != NULL) {
1627         return gCorrectedPOSIXLocale;
1628     }
1629
1630     if ((p = uprv_strchr(posixID, '.')) != NULL) {
1631         /* assume new locale can't be larger than old one? */
1632         correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID));
1633         uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1634         correctedPOSIXLocale[p-posixID] = 0;
1635
1636         /* do not copy after the @ */
1637         if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
1638             correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
1639         }
1640     }
1641
1642     /* Note that we scan the *uncorrected* ID. */
1643     if ((p = uprv_strrchr(posixID, '@')) != NULL) {
1644         if (correctedPOSIXLocale == NULL) {
1645             correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID));
1646             uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1647             correctedPOSIXLocale[p-posixID] = 0;
1648         }
1649         p++;
1650
1651         /* Take care of any special cases here.. */
1652         if (!uprv_strcmp(p, "nynorsk")) {
1653             p = "NY";
1654
1655             /*      Should we assume no_NO_NY instead of possible no__NY?
1656             * if (!uprv_strcmp(correctedPOSIXLocale, "no")) {
1657             *     uprv_strcpy(correctedPOSIXLocale, "no_NO");
1658             * }
1659             */
1660         }
1661
1662         if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
1663             uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
1664         }
1665         else {
1666             uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1667         }
1668
1669         if ((q = uprv_strchr(p, '.')) != NULL) {
1670             /* How big will the resulting string be? */
1671             len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1672             uprv_strncat(correctedPOSIXLocale, p, q-p);
1673             correctedPOSIXLocale[len] = 0;
1674         }
1675         else {
1676             /* Anything following the @ sign */
1677             uprv_strcat(correctedPOSIXLocale, p);
1678         }
1679
1680         /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1681          * How about 'russian' -> 'ru'?
1682          */
1683     }
1684
1685     /* Was a correction made? */
1686     if (correctedPOSIXLocale != NULL) {
1687         posixID = correctedPOSIXLocale;
1688     }
1689     else {
1690         /* copy it, just in case the original pointer goes away.  See j2395 */
1691         correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
1692         posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
1693     }
1694
1695     if (gCorrectedPOSIXLocale == NULL) {
1696         gCorrectedPOSIXLocale = correctedPOSIXLocale;
1697         ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1698         correctedPOSIXLocale = NULL;
1699     }
1700
1701     if (correctedPOSIXLocale != NULL) {  /* Was already set - clean up. */
1702         uprv_free(correctedPOSIXLocale);
1703     }
1704
1705     return posixID;
1706
1707 #elif defined(WIN32)
1708     UErrorCode status = U_ZERO_ERROR;
1709     LCID id = GetThreadLocale();
1710     const char* locID = uprv_convertToPosix(id, &status);
1711
1712     if (U_FAILURE(status)) {
1713         locID = "en_US";
1714     }
1715     return locID;
1716
1717 #elif defined(XP_MAC)
1718     int32_t script = MAC_LC_INIT_NUMBER;
1719     /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
1720     int32_t region = MAC_LC_INIT_NUMBER;
1721     /* = GetScriptManagerVariable(smRegionCode);*/
1722     int32_t lang = MAC_LC_INIT_NUMBER;
1723     /* = GetScriptManagerVariable(smScriptLang);*/
1724     int32_t date_region = MAC_LC_INIT_NUMBER;
1725     const char* posixID = 0;
1726     int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec);
1727     int32_t i;
1728     Intl1Hndl ih;
1729
1730     ih = (Intl1Hndl) GetIntlResource(1);
1731     if (ih)
1732         date_region = ((uint16_t)(*ih)->intl1Vers) >> 8;
1733
1734     for (i = 0; i < count; i++) {
1735         if (   ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER)
1736              || (mac_lc_recs[i].script == script))
1737             && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER)
1738              || (mac_lc_recs[i].region == region))
1739             && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER)
1740              || (mac_lc_recs[i].lang == lang))
1741             && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER)
1742              || (mac_lc_recs[i].date_region == date_region))
1743             )
1744         {
1745             posixID = mac_lc_recs[i].posixID;
1746             break;
1747         }
1748     }
1749
1750     return posixID;
1751
1752 #elif defined(OS2)
1753     char * locID;
1754
1755     locID = getenv("LC_ALL");
1756     if (!locID || !*locID)
1757         locID = getenv("LANG");
1758     if (!locID || !*locID) {
1759         locID = "en_US";
1760     }
1761     if (!stricmp(locID, "c") || !stricmp(locID, "posix") ||
1762         !stricmp(locID, "univ"))
1763         locID = "en_US_POSIX";
1764     return locID;
1765
1766 #elif defined(OS400)
1767     /* locales are process scoped and are by definition thread safe */
1768     static char correctedLocale[64];
1769     const  char *localeID = getenv("LC_ALL");
1770            char *p;
1771
1772     if (localeID == NULL)
1773         localeID = getenv("LANG");
1774     if (localeID == NULL)
1775         localeID = setlocale(LC_ALL, NULL);
1776     /* Make sure we have something... */
1777     if (localeID == NULL)
1778         return "en_US_POSIX";
1779
1780     /* Extract the locale name from the path. */
1781     if((p = uprv_strrchr(localeID, '/')) != NULL)
1782     {
1783         /* Increment p to start of locale name. */
1784         p++;
1785         localeID = p;
1786     }
1787
1788     /* Copy to work location. */
1789     uprv_strcpy(correctedLocale, localeID);
1790
1791     /* Strip off the '.locale' extension. */
1792     if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1793         *p = 0;
1794     }
1795
1796     /* Upper case the locale name. */
1797     T_CString_toUpperCase(correctedLocale);
1798
1799     /* See if we are using the POSIX locale.  Any of the
1800     * following are equivalent and use the same QLGPGCMA
1801     * (POSIX) locale.
1802     */
1803     if ((uprv_strcmp("C", correctedLocale) == 0) ||
1804         (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1805         (uprv_strcmp("QLGPGCMA", correctedLocale) == 0))
1806     {
1807         uprv_strcpy(correctedLocale, "en_US_POSIX");
1808     }
1809     else
1810     {
1811         int16_t LocaleLen;
1812
1813         /* Lower case the lang portion. */
1814         for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1815         {
1816             *p = uprv_tolower(*p);
1817         }
1818
1819         /* Adjust for Euro.  After '_E' add 'URO'. */
1820         LocaleLen = uprv_strlen(correctedLocale);
1821         if (correctedLocale[LocaleLen - 2] == '_' &&
1822             correctedLocale[LocaleLen - 1] == 'E')
1823         {
1824             uprv_strcat(correctedLocale, "URO");
1825         }
1826
1827         /* If using Lotus-based locale then convert to
1828          * equivalent non Lotus.
1829          */
1830         else if (correctedLocale[LocaleLen - 2] == '_' &&
1831             correctedLocale[LocaleLen - 1] == 'L')
1832         {
1833             correctedLocale[LocaleLen - 2] = 0;
1834         }
1835
1836         /* There are separate simplified and traditional
1837          * locales called zh_HK_S and zh_HK_T.
1838          */
1839         else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1840         {
1841             uprv_strcpy(correctedLocale, "zh_HK");
1842         }
1843
1844         /* A special zh_CN_GBK locale...
1845         */
1846         else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1847         {
1848             uprv_strcpy(correctedLocale, "zh_CN");
1849         }
1850
1851     }
1852
1853     return correctedLocale;
1854 #endif
1855
1856 }
1857
1858
1859 static const char*
1860 int_getDefaultCodepage()
1861 {
1862 #if defined(OS400)
1863     uint32_t ccsid = 37; /* Default to ibm-37 */
1864     static char codepage[64];
1865     Qwc_JOBI0400_t jobinfo;
1866     Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
1867
1868     EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
1869         "*                         ", "                ", &error);
1870
1871     if (error.Bytes_Available == 0) {
1872         if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
1873             ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
1874         }
1875         else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
1876             ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
1877         }
1878         /* else use the default */
1879     }
1880     sprintf(codepage,"ibm-%d", ccsid);
1881     return codepage;
1882
1883 #elif defined(OS390)
1884     static char codepage[64];
1885     sprintf(codepage,"%s" UCNV_SWAP_LFNL_OPTION_STRING, nl_langinfo(CODESET));
1886     return codepage;
1887
1888 #elif defined(XP_MAC)
1889     return "ibm-1275"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
1890
1891 #elif defined(WIN32)
1892     static char codepage[64];
1893     sprintf(codepage, "windows-%d", GetACP());
1894     return codepage;
1895
1896 #elif U_POSIX_LOCALE
1897     static char codesetName[100];
1898     char *name = NULL;
1899     char *euro = NULL;
1900     const char *localeName = NULL;
1901
1902     uprv_memset(codesetName, 0, sizeof(codesetName));
1903
1904     /* Check setlocale before the environment variables
1905        because the application may have set it first */
1906     /* setlocale needs "" and not NULL for Linux and Solaris */
1907     localeName = setlocale(LC_CTYPE, "");
1908     if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
1909         /* strip the locale name and look at the suffix only */
1910         name = uprv_strncpy(codesetName, name+1, sizeof(codesetName));
1911         codesetName[sizeof(codesetName)-1] = 0;
1912         if ((euro = (uprv_strchr(name, '@'))) != NULL) {
1913            *euro = 0;
1914         }
1915         /* if we can find the codset name from setlocale, return that. */
1916         if (*name) {
1917             return name;
1918         }
1919     }
1920
1921 #if U_HAVE_NL_LANGINFO_CODESET
1922     if (*codesetName) {
1923         uprv_memset(codesetName, 0, sizeof(codesetName));
1924     }
1925     /* When available, check nl_langinfo first because it usually gives more
1926        useful names. It depends on LC_CTYPE and not LANG or LC_ALL */
1927     {
1928         const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
1929         if (codeset != NULL) {
1930             uprv_strncpy(codesetName, codeset, sizeof(codesetName));
1931             codesetName[sizeof(codesetName)-1] = 0;
1932             return codesetName;
1933         }
1934     }
1935 #endif
1936
1937     /* Try a locale specified by the user.
1938        This is usually underspecified and usually checked by setlocale already. */
1939     if (*codesetName) {
1940         uprv_memset(codesetName, 0, sizeof(codesetName));
1941     }
1942     localeName = uprv_getPOSIXID();
1943     if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
1944         /* strip the locale name and look at the suffix only */
1945         name = uprv_strncpy(codesetName, name+1, sizeof(codesetName));
1946         codesetName[sizeof(codesetName)-1] = 0;
1947         if ((euro = (uprv_strchr(name, '@'))) != NULL) {
1948            *euro = 0;
1949         }
1950         /* if we can find the codset name, return that. */
1951         if (*name) {
1952             return name;
1953         }
1954     }
1955
1956     if (*codesetName == 0)
1957     {
1958         /* if the table lookup failed, return US ASCII (ISO 646). */
1959         uprv_strcpy(codesetName, "US-ASCII");
1960     }
1961     return codesetName;
1962 #else
1963     return "US-ASCII";
1964 #endif
1965 }
1966
1967
1968 U_CAPI const char*  U_EXPORT2
1969 uprv_getDefaultCodepage()
1970 {
1971     static char const  *name = NULL;
1972     umtx_lock(NULL);
1973     if (name == NULL) {
1974         name = int_getDefaultCodepage();
1975     }
1976     umtx_unlock(NULL);
1977     return name;
1978 }
1979
1980
1981 /* end of platform-specific implementation -------------- */
1982
1983 /* version handling --------------------------------------------------------- */
1984
1985 U_CAPI void U_EXPORT2
1986 u_versionFromString(UVersionInfo versionArray, const char *versionString) {
1987     char *end;
1988     uint16_t part=0;
1989
1990     if(versionArray==NULL) {
1991         return;
1992     }
1993
1994     if(versionString!=NULL) {
1995         for(;;) {
1996             versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
1997             if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
1998                 break;
1999             }
2000             versionString=end+1;
2001         }
2002     }
2003
2004     while(part<U_MAX_VERSION_LENGTH) {
2005         versionArray[part++]=0;
2006     }
2007 }
2008
2009 U_CAPI void U_EXPORT2
2010 u_versionToString(UVersionInfo versionArray, char *versionString) {
2011     uint16_t count, part;
2012     uint8_t field;
2013
2014     if(versionString==NULL) {
2015         return;
2016     }
2017
2018     if(versionArray==NULL) {
2019         versionString[0]=0;
2020         return;
2021     }
2022
2023     /* count how many fields need to be written */
2024     for(count=4; count>0 && versionArray[count-1]==0; --count) {
2025     }
2026
2027     if(count <= 1) {
2028         count = 2;
2029     }
2030
2031     /* write the first part */
2032     /* write the decimal field value */
2033     field=versionArray[0];
2034     if(field>=100) {
2035         *versionString++=(char)('0'+field/100);
2036         field%=100;
2037     }
2038     if(field>=10) {
2039         *versionString++=(char)('0'+field/10);
2040         field%=10;
2041     }
2042     *versionString++=(char)('0'+field);
2043
2044     /* write the following parts */
2045     for(part=1; part<count; ++part) {
2046         /* write a dot first */
2047         *versionString++=U_VERSION_DELIMITER;
2048
2049         /* write the decimal field value */
2050         field=versionArray[part];
2051         if(field>=100) {
2052             *versionString++=(char)('0'+field/100);
2053             field%=100;
2054         }
2055         if(field>=10) {
2056             *versionString++=(char)('0'+field/10);
2057             field%=10;
2058         }
2059         *versionString++=(char)('0'+field);
2060     }
2061
2062     /* NUL-terminate */
2063     *versionString=0;
2064 }
2065
2066 U_CAPI void U_EXPORT2
2067 u_getVersion(UVersionInfo versionArray) {
2068     u_versionFromString(versionArray, U_ICU_VERSION);
2069 }
2070
2071 /*
2072  * Hey, Emacs, please set the following:
2073  *
2074  * Local Variables:
2075  * indent-tabs-mode: nil
2076  * End:
2077  *
2078  */