icuSources/common/putil.c

   1 /*
   2 ******************************************************************************
   3 *
   4 *   Copyright (C) 1997-2008, International Business Machines
   5 *   Corporation and others.  All Rights Reserved.
   6 *
   7 ******************************************************************************
   8 *
   9 *  FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
  10 *
  11 *   Date        Name        Description
  12 *   04/14/97    aliu        Creation.
  13 *   04/24/97    aliu        Added getDefaultDataDirectory() and
  14 *                            getDefaultLocaleID().
  15 *   04/28/97    aliu        Rewritten to assume Unix and apply general methods
  16 *                            for assumed case.  Non-UNIX platforms must be
  17 *                            special-cased.  Rewrote numeric methods dealing
  18 *                            with NaN and Infinity to be platform independent
  19 *                             over all IEEE 754 platforms.
  20 *   05/13/97    aliu        Restored sign of timezone
  21 *                            (semantics are hours West of GMT)
  22 *   06/16/98    erm         Added IEEE_754 stuff, cleaned up isInfinite, isNan,
  23 *                             nextDouble..
  24 *   07/22/98    stephen     Added remainder, max, min, trunc
  25 *   08/13/98    stephen     Added isNegativeInfinity, isPositiveInfinity
  26 *   08/24/98    stephen     Added longBitsFromDouble
  27 *   09/08/98    stephen     Minor changes for Mac Port
  28 *   03/02/99    stephen     Removed openFile().  Added AS400 support.
  29 *                            Fixed EBCDIC tables
  30 *   04/15/99    stephen     Converted to C.
  31 *   06/28/99    stephen     Removed mutex locking in u_isBigEndian().
  32 *   08/04/99    jeffrey R.  Added OS/2 changes
  33 *   11/15/99    helena      Integrated S/390 IEEE support.
  34 *   04/26/01    Barry N.    OS/400 support for uprv_getDefaultLocaleID
  35 *   08/15/01    Steven H.   OS/400 support for uprv_getDefaultCodepage
  36 *   01/03/08    Steven L.   Fake Time Support
  37 ******************************************************************************
  38 */
  39
  40 /* Define _XOPEN_SOURCE for Solaris and friends. */
  41 /* NetBSD needs it to be >= 4 */
  42 #if !defined(_XOPEN_SOURCE)
  43 #if __STDC_VERSION__ >= 199901L
  44 /* It is invalid to compile an XPG3, XPG4, XPG4v2 or XPG5 application using c99 on Solaris */
  45 #define _XOPEN_SOURCE 600
  46 #else
  47 #define _XOPEN_SOURCE 4
  48 #endif
  49 #endif
  50
  51 /* Make sure things like readlink and such functions work.
  52 Poorly upgraded Solaris machines can't have this defined.
  53 Cleanly installed Solaris can use this #define.
  54 */
  55 #if !defined(_XOPEN_SOURCE_EXTENDED) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ >= 199901L)
  56 #define _XOPEN_SOURCE_EXTENDED 1
  57 #endif
  58
  59 /* include ICU headers */
  60 #include "unicode/utypes.h"
  61 #include "unicode/putil.h"
  62 #include "unicode/ustring.h"
  63 #include "putilimp.h"
  64 #include "uassert.h"
  65 #include "umutex.h"
  66 #include "cmemory.h"
  67 #include "cstring.h"
  68 #include "locmap.h"
  69 #include "ucln_cmn.h"
  70
  71 /* Include standard headers. */
  72 #include <stdio.h>
  73 #include <stdlib.h>
  74 #include <string.h>
  75 #include <math.h>
  76 #include <locale.h>
  77 #include <float.h>
  78 #include <time.h>
  79
  80 /* include system headers */
  81 #ifdef U_WINDOWS
  82 #   define WIN32_LEAN_AND_MEAN
  83 #   define VC_EXTRALEAN
  84 #   define NOUSER
  85 #   define NOSERVICE
  86 #   define NOIME
  87 #   define NOMCX
  88 #   include <windows.h>
  89 #   include "wintz.h"
  90 #elif defined(U_CYGWIN) && defined(__STRICT_ANSI__)
  91 /* tzset isn't defined in strict ANSI on Cygwin. */
  92 #   undef __STRICT_ANSI__
  93 #elif defined(OS400)
  94 #   include <float.h>
  95 #   include <qusec.h>       /* error code structure */
  96 #   include <qusrjobi.h>
  97 #   include <qliept.h>      /* EPT_CALL macro  - this include must be after all other "QSYSINCs" */
  98 #   include <mih/testptr.h> /* For uprv_maximumPtr */
  99 #elif defined(XP_MAC)
 100 #   include <Files.h>
 101 #   include <IntlResources.h>
 102 #   include <Script.h>
 103 #   include <Folders.h>
 104 #   include <MacTypes.h>
 105 #   include <TextUtils.h>
 106 #   define ICU_NO_USER_DATA_OVERRIDE 1
 107 #elif defined(OS390)
 108 #include "unicode/ucnv.h"   /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
 109 #elif defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD)
 110 #include <limits.h>
 111 #include <unistd.h>
 112 #elif defined(U_QNX)
 113 #include <sys/neutrino.h>
 114 #endif
 115
 116 #if defined(U_DARWIN)
 117 #include <TargetConditionals.h>
 118 #endif
 119
 120 #ifndef U_WINDOWS
 121 #include <sys/time.h>
 122 #endif
 123
 124 /*
 125  * Only include langinfo.h if we have a way to get the codeset. If we later
 126  * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
 127  *
 128  */
 129
 130 #if U_HAVE_NL_LANGINFO_CODESET
 131 #include <langinfo.h>
 132 #endif
 133
 134 /* Define the extension for data files, again... */
 135 #define DATA_TYPE "dat"
 136
 137 /* Leave this copyright notice here! */
 138 static const char copyright[] = U_COPYRIGHT_STRING;
 139
 140 /* floating point implementations ------------------------------------------- */
 141
 142 /* We return QNAN rather than SNAN*/
 143 #define SIGN 0x80000000U
 144
 145 /* Make it easy to define certain types of constants */
 146 typedef union {
 147     int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
 148     double d64;
 149 } BitPatternConversion;
 150 static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
 151 static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
 152
 153 /*---------------------------------------------------------------------------
 154   Platform utilities
 155   Our general strategy is to assume we're on a POSIX platform.  Platforms which
 156   are non-POSIX must declare themselves so.  The default POSIX implementation
 157   will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
 158   functions).
 159   ---------------------------------------------------------------------------*/
 160
 161 #if defined(U_WINDOWS) || defined(XP_MAC) || defined(OS400)
 162 #   undef U_POSIX_LOCALE
 163 #else
 164 #   define U_POSIX_LOCALE    1
 165 #endif
 166
 167 /*
 168     WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
 169     can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
 170 */
 171 #if !IEEE_754
 172 static char*
 173 u_topNBytesOfDouble(double* d, int n)
 174 {
 175 #if U_IS_BIG_ENDIAN
 176     return (char*)d;
 177 #else
 178     return (char*)(d + 1) - n;
 179 #endif
 180 }
 181 #endif
 182
 183 static char*
 184 u_bottomNBytesOfDouble(double* d, int n)
 185 {
 186 #if U_IS_BIG_ENDIAN
 187     return (char*)(d + 1) - n;
 188 #else
 189     return (char*)d;
 190 #endif
 191 }
 192
 193 #if defined (U_DEBUG_FAKETIME)
 194 /* Override the clock to test things without having to move the system clock.
 195  * Assumes POSIX gettimeofday() will function
 196  */
 197 UDate fakeClock_t0 = 0; /** Time to start the clock from **/
 198 UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
 199 UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
 200 static UMTX fakeClockMutex = NULL;
 201
 202 static UDate getUTCtime_real() {
 203     struct timeval posixTime;
 204     gettimeofday(&posixTime, NULL);
 205     return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
 206 }
 207
 208 static UDate getUTCtime_fake() {
 209     umtx_lock(&fakeClockMutex);
 210     if(!fakeClock_set) {
 211         UDate real = getUTCtime_real();
 212         const char *fake_start = getenv("U_FAKETIME_START");
 213         if(fake_start!=NULL) {
 214             sscanf(fake_start,"%lf",&fakeClock_t0);
 215         }
 216         fakeClock_dt = fakeClock_t0 - real;
 217         fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
 218                        "U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
 219                             fakeClock_t0, fake_start, fakeClock_dt, real);
 220         fakeClock_set = TRUE;
 221     }
 222     umtx_unlock(&fakeClockMutex);
 223
 224     return getUTCtime_real() + fakeClock_dt;
 225 }
 226 #endif
 227
 228 #if defined(U_WINDOWS)
 229 typedef union {
 230     int64_t int64;
 231     FILETIME fileTime;
 232 } FileTimeConversion;   /* This is like a ULARGE_INTEGER */
 233
 234 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
 235 #define EPOCH_BIAS  INT64_C(116444736000000000)
 236 #define HECTONANOSECOND_PER_MILLISECOND   10000
 237
 238 #endif
 239
 240 /*---------------------------------------------------------------------------
 241   Universal Implementations
 242   These are designed to work on all platforms.  Try these, and if they
 243   don't work on your platform, then special case your platform with new
 244   implementations.
 245 ---------------------------------------------------------------------------*/
 246
 247 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
 248 U_CAPI UDate U_EXPORT2
 249 uprv_getUTCtime()
 250 {
 251 #if defined(U_DEBUG_FAKETIME)
 252     return getUTCtime_fake(); /* Hook for overriding the clock */
 253 #elif defined(XP_MAC)
 254     time_t t, t1, t2;
 255     struct tm tmrec;
 256
 257     uprv_memset( &tmrec, 0, sizeof(tmrec) );
 258     tmrec.tm_year = 70;
 259     tmrec.tm_mon = 0;
 260     tmrec.tm_mday = 1;
 261     t1 = mktime(&tmrec);    /* seconds of 1/1/1970*/
 262
 263     time(&t);
 264     uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
 265     t2 = mktime(&tmrec);    /* seconds of current GMT*/
 266     return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND;         /* GMT (or UTC) in seconds since 1970*/
 267 #elif defined(U_WINDOWS)
 268
 269     FileTimeConversion winTime;
 270     GetSystemTimeAsFileTime(&winTime.fileTime);
 271     return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
 272 #else
 273 /*
 274     struct timeval posixTime;
 275     gettimeofday(&posixTime, NULL);
 276     return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
 277 */
 278     time_t epochtime;
 279     time(&epochtime);
 280     return (UDate)epochtime * U_MILLIS_PER_SECOND;
 281 #endif
 282 }
 283
 284 /*-----------------------------------------------------------------------------
 285   IEEE 754
 286   These methods detect and return NaN and infinity values for doubles
 287   conforming to IEEE 754.  Platforms which support this standard include X86,
 288   Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
 289   If this doesn't work on your platform, you have non-IEEE floating-point, and
 290   will need to code your own versions.  A naive implementation is to return 0.0
 291   for getNaN and getInfinity, and false for isNaN and isInfinite.
 292   ---------------------------------------------------------------------------*/
 293
 294 U_CAPI UBool U_EXPORT2
 295 uprv_isNaN(double number)
 296 {
 297 #if IEEE_754
 298     BitPatternConversion convertedNumber;
 299     convertedNumber.d64 = number;
 300     /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
 301     return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
 302
 303 #elif defined(OS390)
 304     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
 305                         sizeof(uint32_t));
 306     uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
 307                         sizeof(uint32_t));
 308
 309     return ((highBits & 0x7F080000L) == 0x7F080000L) &&
 310       (lowBits == 0x00000000L);
 311
 312 #else
 313     /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
 314     /* you'll need to replace this default implementation with what's correct*/
 315     /* for your platform.*/
 316     return number != number;
 317 #endif
 318 }
 319
 320 U_CAPI UBool U_EXPORT2
 321 uprv_isInfinite(double number)
 322 {
 323 #if IEEE_754
 324     BitPatternConversion convertedNumber;
 325     convertedNumber.d64 = number;
 326     /* Infinity is exactly 0x7FF0000000000000U. */
 327     return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
 328 #elif defined(OS390)
 329     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
 330                         sizeof(uint32_t));
 331     uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
 332                         sizeof(uint32_t));
 333
 334     return ((highBits  & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
 335
 336 #else
 337     /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
 338     /* value, you'll need to replace this default implementation with what's*/
 339     /* correct for your platform.*/
 340     return number == (2.0 * number);
 341 #endif
 342 }
 343
 344 U_CAPI UBool U_EXPORT2
 345 uprv_isPositiveInfinity(double number)
 346 {
 347 #if IEEE_754 || defined(OS390)
 348     return (UBool)(number > 0 && uprv_isInfinite(number));
 349 #else
 350     return uprv_isInfinite(number);
 351 #endif
 352 }
 353
 354 U_CAPI UBool U_EXPORT2
 355 uprv_isNegativeInfinity(double number)
 356 {
 357 #if IEEE_754 || defined(OS390)
 358     return (UBool)(number < 0 && uprv_isInfinite(number));
 359
 360 #else
 361     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
 362                         sizeof(uint32_t));
 363     return((highBits & SIGN) && uprv_isInfinite(number));
 364
 365 #endif
 366 }
 367
 368 U_CAPI double U_EXPORT2
 369 uprv_getNaN()
 370 {
 371 #if IEEE_754 || defined(OS390)
 372     return gNan.d64;
 373 #else
 374     /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
 375     /* you'll need to replace this default implementation with what's correct*/
 376     /* for your platform.*/
 377     return 0.0;
 378 #endif
 379 }
 380
 381 U_CAPI double U_EXPORT2
 382 uprv_getInfinity()
 383 {
 384 #if IEEE_754 || defined(OS390)
 385     return gInf.d64;
 386 #else
 387     /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
 388     /* value, you'll need to replace this default implementation with what's*/
 389     /* correct for your platform.*/
 390     return 0.0;
 391 #endif
 392 }
 393
 394 U_CAPI double U_EXPORT2
 395 uprv_floor(double x)
 396 {
 397     return floor(x);
 398 }
 399
 400 U_CAPI double U_EXPORT2
 401 uprv_ceil(double x)
 402 {
 403     return ceil(x);
 404 }
 405
 406 U_CAPI double U_EXPORT2
 407 uprv_round(double x)
 408 {
 409     return uprv_floor(x + 0.5);
 410 }
 411
 412 U_CAPI double U_EXPORT2
 413 uprv_fabs(double x)
 414 {
 415     return fabs(x);
 416 }
 417
 418 U_CAPI double U_EXPORT2
 419 uprv_modf(double x, double* y)
 420 {
 421     return modf(x, y);
 422 }
 423
 424 U_CAPI double U_EXPORT2
 425 uprv_fmod(double x, double y)
 426 {
 427     return fmod(x, y);
 428 }
 429
 430 U_CAPI double U_EXPORT2
 431 uprv_pow(double x, double y)
 432 {
 433     /* This is declared as "double pow(double x, double y)" */
 434     return pow(x, y);
 435 }
 436
 437 U_CAPI double U_EXPORT2
 438 uprv_pow10(int32_t x)
 439 {
 440     return pow(10.0, (double)x);
 441 }
 442
 443 U_CAPI double U_EXPORT2
 444 uprv_fmax(double x, double y)
 445 {
 446 #if IEEE_754
 447     int32_t lowBits;
 448
 449     /* first handle NaN*/
 450     if(uprv_isNaN(x) || uprv_isNaN(y))
 451         return uprv_getNaN();
 452
 453     /* check for -0 and 0*/
 454     lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&x, sizeof(uint32_t));
 455     if(x == 0.0 && y == 0.0 && (lowBits & SIGN))
 456         return y;
 457
 458 #endif
 459
 460     /* this should work for all flt point w/o NaN and Infpecial cases */
 461     return (x > y ? x : y);
 462 }
 463
 464 U_CAPI double U_EXPORT2
 465 uprv_fmin(double x, double y)
 466 {
 467 #if IEEE_754
 468     int32_t lowBits;
 469
 470     /* first handle NaN*/
 471     if(uprv_isNaN(x) || uprv_isNaN(y))
 472         return uprv_getNaN();
 473
 474     /* check for -0 and 0*/
 475     lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&y, sizeof(uint32_t));
 476     if(x == 0.0 && y == 0.0 && (lowBits & SIGN))
 477         return y;
 478
 479 #endif
 480
 481     /* this should work for all flt point w/o NaN and Inf special cases */
 482     return (x > y ? y : x);
 483 }
 484
 485 /**
 486  * Truncates the given double.
 487  * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
 488  * This is different than calling floor() or ceil():
 489  * floor(3.3) = 3, floor(-3.3) = -4
 490  * ceil(3.3) = 4, ceil(-3.3) = -3
 491  */
 492 U_CAPI double U_EXPORT2
 493 uprv_trunc(double d)
 494 {
 495 #if IEEE_754
 496     int32_t lowBits;
 497
 498     /* handle error cases*/
 499     if(uprv_isNaN(d))
 500         return uprv_getNaN();
 501     if(uprv_isInfinite(d))
 502         return uprv_getInfinity();
 503
 504     lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&d, sizeof(uint32_t));
 505     if( (d == 0.0 && (lowBits & SIGN)) || d < 0)
 506         return ceil(d);
 507     else
 508         return floor(d);
 509
 510 #else
 511     return d >= 0 ? floor(d) : ceil(d);
 512
 513 #endif
 514 }
 515
 516 /**
 517  * Return the largest positive number that can be represented by an integer
 518  * type of arbitrary bit length.
 519  */
 520 U_CAPI double U_EXPORT2
 521 uprv_maxMantissa(void)
 522 {
 523     return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
 524 }
 525
 526 U_CAPI double U_EXPORT2
 527 uprv_log(double d)
 528 {
 529     return log(d);
 530 }
 531
 532 U_CAPI void * U_EXPORT2
 533 uprv_maximumPtr(void * base)
 534 {
 535 #if defined(OS400)
 536     /*
 537      * With the provided function we should never be out of range of a given segment
 538      * (a traditional/typical segment that is).  Our segments have 5 bytes for the
 539      * id and 3 bytes for the offset.  The key is that the casting takes care of
 540      * only retrieving the offset portion minus x1000.  Hence, the smallest offset
 541      * seen in a program is x001000 and when casted to an int would be 0.
 542      * That's why we can only add 0xffefff.  Otherwise, we would exceed the segment.
 543      *
 544      * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
 545      * non-TERASPACE.  If it is TERASPACE it is 2GB - 4k(header information).
 546      * This function determines the activation based on the pointer that is passed in and
 547      * calculates the appropriate maximum available size for
 548      * each pointer type (TERASPACE and non-TERASPACE)
 549      *
 550      * Unlike other operating systems, the pointer model isn't determined at
 551      * compile time on i5/OS.
 552      */
 553     if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
 554         /* if it is a TERASPACE pointer the max is 2GB - 4k */
 555         return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
 556     }
 557     /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
 558     return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
 559
 560 #else
 561     return U_MAX_PTR(base);
 562 #endif
 563 }
 564
 565 /*---------------------------------------------------------------------------
 566   Platform-specific Implementations
 567   Try these, and if they don't work on your platform, then special case your
 568   platform with new implementations.
 569   ---------------------------------------------------------------------------*/
 570
 571 /* Generic time zone layer -------------------------------------------------- */
 572
 573 /* Time zone utilities */
 574 U_CAPI void U_EXPORT2
 575 uprv_tzset()
 576 {
 577 #ifdef U_TZSET
 578     U_TZSET();
 579 #else
 580     /* no initialization*/
 581 #endif
 582 }
 583
 584 U_CAPI int32_t U_EXPORT2
 585 uprv_timezone()
 586 {
 587 #ifdef U_TIMEZONE
 588     return U_TIMEZONE;
 589 #else
 590     time_t t, t1, t2;
 591     struct tm tmrec;
 592     UBool dst_checked;
 593     int32_t tdiff = 0;
 594
 595     time(&t);
 596     uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
 597     dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
 598     t1 = mktime(&tmrec);                 /* local time in seconds*/
 599     uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
 600     t2 = mktime(&tmrec);                 /* GMT (or UTC) in seconds*/
 601     tdiff = t2 - t1;
 602     /* imitate NT behaviour, which returns same timezone offset to GMT for
 603        winter and summer*/
 604     if (dst_checked)
 605         tdiff += 3600;
 606     return tdiff;
 607 #endif
 608 }
 609
 610 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
 611    some platforms need to have it declared here. */
 612
 613 #if defined(U_TZNAME) && (defined(U_IRIX) || defined(U_DARWIN) || defined(U_CYGWIN))
 614 /* RS6000 and others reject char **tzname.  */
 615 extern U_IMPORT char *U_TZNAME[];
 616 #endif
 617
 618 #if !UCONFIG_NO_FILE_IO && (defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD))
 619 /* These platforms are likely to use Olson timezone IDs. */
 620 #define CHECK_LOCALTIME_LINK 1
 621 #if defined(U_DARWIN)
 622 #include <tzfile.h>
 623 #define TZZONEINFO      (TZDIR "/")
 624 #else
 625 #define TZDEFAULT       "/etc/localtime"
 626 #define TZZONEINFO      "/usr/share/zoneinfo/"
 627 #endif
 628 static char gTimeZoneBuffer[PATH_MAX];
 629 static char *gTimeZoneBufferPtr = NULL;
 630 #endif
 631
 632 #ifndef U_WINDOWS
 633 #define isNonDigit(ch) (ch < '0' || '9' < ch)
 634 static UBool isValidOlsonID(const char *id) {
 635     int32_t idx = 0;
 636
 637     /* Determine if this is something like Iceland (Olson ID)
 638     or AST4ADT (non-Olson ID) */
 639     while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
 640         idx++;
 641     }
 642
 643     /* If we went through the whole string, then it might be okay.
 644     The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
 645     "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
 646     The rest of the time it could be an Olson ID. George */
 647     return (UBool)(id[idx] == 0
 648         || uprv_strcmp(id, "PST8PDT") == 0
 649         || uprv_strcmp(id, "MST7MDT") == 0
 650         || uprv_strcmp(id, "CST6CDT") == 0
 651         || uprv_strcmp(id, "EST5EDT") == 0);
 652 }
 653 #endif
 654
 655 #if defined(U_TZNAME) && !defined(U_WINDOWS)
 656
 657 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
 658 typedef struct OffsetZoneMapping {
 659     int32_t offsetSeconds;
 660     int32_t daylightType; /* 1=daylight in June, 2=daylight in December*/
 661     const char *stdID;
 662     const char *dstID;
 663     const char *olsonID;
 664 } OffsetZoneMapping;
 665
 666 /*
 667 This list tries to disambiguate a set of abbreviated timezone IDs and offsets
 668 and maps it to an Olson ID.
 669 Before adding anything to this list, take a look at
 670 icu/source/tools/tzcode/tz.alias
 671 Sometimes no daylight savings (0) is important to define due to aliases.
 672 This list can be tested with icu/source/test/compat/tzone.pl
 673 More values could be added to daylightType to increase precision.
 674 */
 675 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
 676     {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
 677     {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
 678     {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
 679     {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
 680     {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
 681     {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
 682     {-36000, 2, "EST", "EST", "Australia/Sydney"},
 683     {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
 684     {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
 685     {-34200, 2, "CST", "CST", "Australia/South"},
 686     {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
 687     {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
 688     {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
 689     {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
 690     {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
 691     {-28800, 2, "WST", "WST", "Australia/West"},
 692     {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
 693     {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
 694     {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
 695     {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
 696     {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
 697     {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
 698     {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
 699     {-14400, 1, "AZT", "AZST", "Asia/Baku"},
 700     {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
 701     {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
 702     {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
 703     {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
 704     {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
 705     {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
 706     {-3600, 0, "CET", "WEST", "Africa/Algiers"},
 707     {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
 708     {0, 1, "GMT", "IST", "Europe/Dublin"},
 709     {0, 1, "GMT", "BST", "Europe/London"},
 710     {0, 0, "WET", "WEST", "Africa/Casablanca"},
 711     {0, 0, "WET", "WET", "Africa/El_Aaiun"},
 712     {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
 713     {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
 714     {10800, 1, "PMST", "PMDT", "America/Miquelon"},
 715     {10800, 2, "UYT", "UYST", "America/Montevideo"},
 716     {10800, 1, "WGT", "WGST", "America/Godthab"},
 717     {10800, 2, "BRT", "BRST", "Brazil/East"},
 718     {12600, 1, "NST", "NDT", "America/St_Johns"},
 719     {14400, 1, "AST", "ADT", "Canada/Atlantic"},
 720     {14400, 2, "AMT", "AMST", "America/Cuiaba"},
 721     {14400, 2, "CLT", "CLST", "Chile/Continental"},
 722     {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
 723     {14400, 2, "PYT", "PYST", "America/Asuncion"},
 724     {18000, 1, "CST", "CDT", "America/Havana"},
 725     {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
 726     {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
 727     {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
 728     {21600, 0, "CST", "CDT", "America/Guatemala"},
 729     {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
 730     {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
 731     {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
 732     {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
 733     {32400, 1, "AKST", "AKDT", "US/Alaska"},
 734     {36000, 1, "HAST", "HADT", "US/Aleutian"}
 735 };
 736
 737 /*#define DEBUG_TZNAME*/
 738
 739 static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
 740 {
 741     int32_t idx;
 742 #ifdef DEBUG_TZNAME
 743     fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
 744 #endif
 745     for (idx = 0; idx < (int32_t)sizeof(OFFSET_ZONE_MAPPINGS)/sizeof(OFFSET_ZONE_MAPPINGS[0]); idx++)
 746     {
 747         if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
 748             && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
 749             && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
 750             && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
 751         {
 752             return OFFSET_ZONE_MAPPINGS[idx].olsonID;
 753         }
 754     }
 755     return NULL;
 756 }
 757 #endif
 758
 759 U_CAPI const char* U_EXPORT2
 760 uprv_tzname(int n)
 761 {
 762     const char *tzid = NULL;
 763 #ifdef U_WINDOWS
 764     tzid = uprv_detectWindowsTimeZone();
 765
 766     if (tzid != NULL) {
 767         return tzid;
 768     }
 769 #else
 770
 771 /*#if defined(U_DARWIN)
 772     int ret;
 773
 774     tzid = getenv("TZFILE");
 775     if (tzid != NULL) {
 776         return tzid;
 777     }
 778 #endif*/
 779
 780 /* This code can be temporarily disabled to test tzname resolution later on. */
 781 #ifndef DEBUG_TZNAME
 782     tzid = getenv("TZ");
 783     if (tzid != NULL && isValidOlsonID(tzid))
 784     {
 785         /* This might be a good Olson ID. */
 786         if (uprv_strncmp(tzid, "posix/", 6) == 0
 787             || uprv_strncmp(tzid, "right/", 6) == 0)
 788         {
 789             /* Remove the posix/ or right/ prefix. */
 790             tzid += 6;
 791         }
 792         return tzid;
 793     }
 794     /* else U_TZNAME will give a better result. */
 795 #endif
 796
 797 #if defined(CHECK_LOCALTIME_LINK)
 798     /* Caller must handle threading issues */
 799     if (gTimeZoneBufferPtr == NULL) {
 800         /*
 801         This is a trick to look at the name of the link to get the Olson ID
 802         because the tzfile contents is underspecified.
 803         This isn't guaranteed to work because it may not be a symlink.
 804         */
 805         int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer));
 806         if (0 < ret) {
 807             int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO);
 808             gTimeZoneBuffer[ret] = 0;
 809             if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0
 810                 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
 811             {
 812                 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
 813             }
 814         }
 815     }
 816     else {
 817         return gTimeZoneBufferPtr;
 818     }
 819 #endif
 820 #endif
 821
 822 #ifdef U_TZNAME
 823 #if !defined(U_WINDOWS)
 824     /*
 825     U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
 826     So we remap the abbreviation to an olson ID.
 827
 828     Since Windows exposes a little more timezone information,
 829     we normally don't use this code on Windows because
 830     uprv_detectWindowsTimeZone should have already given the correct answer.
 831     */
 832     {
 833         struct tm juneSol, decemberSol;
 834         int daylightType;
 835         static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
 836         static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
 837
 838         /* This probing will tell us when daylight savings occurs.  */
 839         localtime_r(&juneSolstice, &juneSol);
 840         localtime_r(&decemberSolstice, &decemberSol);
 841         daylightType = ((decemberSol.tm_isdst > 0) << 1) | (juneSol.tm_isdst > 0);
 842         tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
 843         if (tzid != NULL) {
 844             return tzid;
 845         }
 846     }
 847 #endif
 848     return U_TZNAME[n];
 849 #else
 850     return "";
 851 #endif
 852 }
 853
 854 /* Get and set the ICU data directory --------------------------------------- */
 855
 856 static char *gDataDirectory = NULL;
 857 #if U_POSIX_LOCALE
 858  static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
 859 #endif
 860
 861 static UBool U_CALLCONV putil_cleanup(void)
 862 {
 863     if (gDataDirectory && *gDataDirectory) {
 864         uprv_free(gDataDirectory);
 865     }
 866     gDataDirectory = NULL;
 867 #if U_POSIX_LOCALE
 868     if (gCorrectedPOSIXLocale) {
 869         uprv_free(gCorrectedPOSIXLocale);
 870         gCorrectedPOSIXLocale = NULL;
 871     }
 872 #endif
 873     return TRUE;
 874 }
 875
 876 /*
 877  * Set the data directory.
 878  *    Make a copy of the passed string, and set the global data dir to point to it.
 879  *    TODO:  see bug #2849, regarding thread safety.
 880  */
 881 U_CAPI void U_EXPORT2
 882 u_setDataDirectory(const char *directory) {
 883     char *newDataDir;
 884     int32_t length;
 885
 886     if(directory==NULL || *directory==0) {
 887         /* A small optimization to prevent the malloc and copy when the
 888         shared library is used, and this is a way to make sure that NULL
 889         is never returned.
 890         */
 891         newDataDir = (char *)"";
 892     }
 893     else {
 894         length=(int32_t)uprv_strlen(directory);
 895         newDataDir = (char *)uprv_malloc(length + 2);
 896         /* Exit out if newDataDir could not be created. */
 897         if (newDataDir == NULL) {
 898             return;
 899         }
 900         uprv_strcpy(newDataDir, directory);
 901
 902 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
 903         {
 904             char *p;
 905             while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
 906                 *p = U_FILE_SEP_CHAR;
 907             }
 908         }
 909 #endif
 910     }
 911
 912     umtx_lock(NULL);
 913     if (gDataDirectory && *gDataDirectory) {
 914         uprv_free(gDataDirectory);
 915     }
 916     gDataDirectory = newDataDir;
 917     ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
 918     umtx_unlock(NULL);
 919 }
 920
 921 U_CAPI UBool U_EXPORT2
 922 uprv_pathIsAbsolute(const char *path)
 923 {
 924   if(!path || !*path) {
 925     return FALSE;
 926   }
 927
 928   if(*path == U_FILE_SEP_CHAR) {
 929     return TRUE;
 930   }
 931
 932 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
 933   if(*path == U_FILE_ALT_SEP_CHAR) {
 934     return TRUE;
 935   }
 936 #endif
 937
 938 #if defined(U_WINDOWS)
 939   if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
 940        ((path[0] >= 'a') && (path[0] <= 'z'))) &&
 941       path[1] == ':' ) {
 942     return TRUE;
 943   }
 944 #endif
 945
 946   return FALSE;
 947 }
 948
 949 U_CAPI const char * U_EXPORT2
 950 u_getDataDirectory(void) {
 951     const char *path = NULL;
 952 #if defined(U_DARWIN) && defined(TARGET_IPHONE_SIMULATOR) && TARGET_IPHONE_SIMULATOR
 953     const char *simulator_root = NULL;
 954     char datadir_path_buffer[PATH_MAX];
 955 #endif
 956
 957     /* if we have the directory, then return it immediately */
 958     UMTX_CHECK(NULL, gDataDirectory, path);
 959
 960     if(path) {
 961         return path;
 962     }
 963
 964     /*
 965     When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
 966     override ICU's data with the ICU_DATA environment variable. This prevents
 967     problems where multiple custom copies of ICU's specific version of data
 968     are installed on a system. Either the application must define the data
 969     directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
 970     ICU, set the data with udata_setCommonData or trust that all of the
 971     required data is contained in ICU's data library that contains
 972     the entry point defined by U_ICUDATA_ENTRY_POINT.
 973
 974     There may also be some platforms where environment variables
 975     are not allowed.
 976     */
 977 #   if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
 978     /* First try to get the environment variable */
 979     path=getenv("ICU_DATA");
 980 #   endif
 981
 982     /* ICU_DATA_DIR may be set as a compile option */
 983 #   ifdef ICU_DATA_DIR
 984     if(path==NULL || *path==0) {
 985         path=ICU_DATA_DIR;
 986 #if defined(U_DARWIN) && defined(TARGET_IPHONE_SIMULATOR) && TARGET_IPHONE_SIMULATOR
 987         simulator_root=getenv("IPHONE_SIMULATOR_ROOT");
 988         if (simulator_root != NULL) {
 989             (void) strlcpy(datadir_path_buffer, simulator_root, PATH_MAX);
 990             (void) strlcat(datadir_path_buffer, path, PATH_MAX);
 991             path=datadir_path_buffer;
 992         }
 993 #endif
 994     }
 995 #   endif
 996
 997     if(path==NULL) {
 998         /* It looks really bad, set it to something. */
 999         path = "";
1000     }
1001
1002     u_setDataDirectory(path);
1003     return gDataDirectory;
1004 }
1005
1006
1007
1008
1009
1010 /* Macintosh-specific locale information ------------------------------------ */
1011 #ifdef XP_MAC
1012
1013 typedef struct {
1014     int32_t script;
1015     int32_t region;
1016     int32_t lang;
1017     int32_t date_region;
1018     const char* posixID;
1019 } mac_lc_rec;
1020
1021 /* Todo: This will be updated with a newer version from www.unicode.org web
1022    page when it's available.*/
1023 #define MAC_LC_MAGIC_NUMBER -5
1024 #define MAC_LC_INIT_NUMBER -9
1025
1026 static const mac_lc_rec mac_lc_recs[] = {
1027     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US",
1028     /* United States*/
1029     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR",
1030     /* France*/
1031     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB",
1032     /* Great Britain*/
1033     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE",
1034     /* Germany*/
1035     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT",
1036     /* Italy*/
1037     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL",
1038     /* Metherlands*/
1039     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE",
1040     /* French for Belgium or Lxembourg*/
1041     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE",
1042     /* Sweden*/
1043     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK",
1044     /* Denmark*/
1045     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT",
1046     /* Portugal*/
1047     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA",
1048     /* French Canada*/
1049     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS",
1050     /* Israel*/
1051     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP",
1052     /* Japan*/
1053     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU",
1054     /* Australia*/
1055     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE",
1056     /* the Arabic world (?)*/
1057     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI",
1058     /* Finland*/
1059     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH",
1060     /* French for Switzerland*/
1061     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH",
1062     /* German for Switzerland*/
1063     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR",
1064     /* Greece*/
1065     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS",
1066     /* Iceland ===*/
1067     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
1068     /* Malta ===*/
1069     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
1070     /* Cyprus ===*/
1071     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR",
1072     /* Turkey ===*/
1073     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU",
1074     /* Croatian system for Yugoslavia*/
1075     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
1076     /* Hindi system for India*/
1077     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
1078     /* Pakistan*/
1079     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT",
1080     /* Lithuania*/
1081     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL",
1082     /* Poland*/
1083     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU",
1084     /* Hungary*/
1085     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE",
1086     /* Estonia*/
1087     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV",
1088     /* Latvia*/
1089     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
1090     /* Lapland  [Ask Rich for the data. HS]*/
1091     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
1092     /* Faeroe Islands*/
1093     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR",
1094     /* Iran*/
1095     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU",
1096     /* Russia*/
1097     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE",
1098     /* Ireland*/
1099     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR",
1100     /* Korea*/
1101     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN",
1102     /* People's Republic of China*/
1103     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW",
1104     /* Taiwan*/
1105     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH",
1106     /* Thailand*/
1107
1108     /* fallback is en_US*/
1109     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER,
1110     MAC_LC_MAGIC_NUMBER, "en_US"
1111 };
1112
1113 #endif
1114
1115 #if U_POSIX_LOCALE
1116 /* Return just the POSIX id, whatever happens to be in it */
1117 static const char *uprv_getPOSIXID(void)
1118 {
1119     static const char* posixID = NULL;
1120     if (posixID == 0) {
1121         /*
1122         * On Solaris two different calls to setlocale can result in
1123         * different values. Only get this value once.
1124         *
1125         * We must check this first because an application can set this.
1126         *
1127         * LC_ALL can't be used because it's platform dependent. The LANG
1128         * environment variable seems to affect LC_CTYPE variable by default.
1129         * Here is what setlocale(LC_ALL, NULL) can return.
1130         * HPUX can return 'C C C C C C C'
1131         * Solaris can return /en_US/C/C/C/C/C on the second try.
1132         * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1133         *
1134         * The default codepage detection also needs to use LC_CTYPE.
1135         *
1136         * Do not call setlocale(LC_*, "")! Using an empty string instead
1137         * of NULL, will modify the libc behavior.
1138         */
1139         posixID = setlocale(LC_CTYPE, NULL);
1140         if ((posixID == 0)
1141             || (uprv_strcmp("C", posixID) == 0)
1142             || (uprv_strcmp("POSIX", posixID) == 0))
1143         {
1144             /* Maybe we got some garbage.  Try something more reasonable */
1145             posixID = getenv("LC_ALL");
1146             if (posixID == 0) {
1147                 posixID = getenv("LC_CTYPE");
1148                 if (posixID == 0) {
1149                     posixID = getenv("LANG");
1150                 }
1151             }
1152         }
1153
1154         if ((posixID==0)
1155             || (uprv_strcmp("C", posixID) == 0)
1156             || (uprv_strcmp("POSIX", posixID) == 0))
1157         {
1158             /* Nothing worked.  Give it a nice POSIX default value. */
1159             posixID = "en_US_POSIX";
1160         }
1161     }
1162
1163     return posixID;
1164 }
1165 #endif
1166
1167 /* NOTE: The caller should handle thread safety */
1168 U_CAPI const char* U_EXPORT2
1169 uprv_getDefaultLocaleID()
1170 {
1171 #if U_POSIX_LOCALE
1172 /*
1173   Note that:  (a '!' means the ID is improper somehow)
1174      LC_ALL  ---->     default_loc          codepage
1175 --------------------------------------------------------
1176      ab.CD             ab                   CD
1177      ab@CD             ab__CD               -
1178      ab@CD.EF          ab__CD               EF
1179
1180      ab_CD.EF@GH       ab_CD_GH             EF
1181
1182 Some 'improper' ways to do the same as above:
1183   !  ab_CD@GH.EF       ab_CD_GH             EF
1184   !  ab_CD.EF@GH.IJ    ab_CD_GH             EF
1185   !  ab_CD@ZZ.EF@GH.IJ ab_CD_GH             EF
1186
1187      _CD@GH            _CD_GH               -
1188      _CD.EF@GH         _CD_GH               EF
1189
1190 The variant cannot have dots in it.
1191 The 'rightmost' variant (@xxx) wins.
1192 The leftmost codepage (.xxx) wins.
1193 */
1194     char *correctedPOSIXLocale = 0;
1195     const char* posixID = uprv_getPOSIXID();
1196     const char *p;
1197     const char *q;
1198     int32_t len;
1199
1200     /* Format: (no spaces)
1201     ll [ _CC ] [ . MM ] [ @ VV]
1202
1203       l = lang, C = ctry, M = charmap, V = variant
1204     */
1205
1206     if (gCorrectedPOSIXLocale != NULL) {
1207         return gCorrectedPOSIXLocale;
1208     }
1209
1210     if ((p = uprv_strchr(posixID, '.')) != NULL) {
1211         /* assume new locale can't be larger than old one? */
1212         correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1213         /* Exit on memory allocation error. */
1214         if (correctedPOSIXLocale == NULL) {
1215             return NULL;
1216         }
1217         uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1218         correctedPOSIXLocale[p-posixID] = 0;
1219
1220         /* do not copy after the @ */
1221         if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
1222             correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
1223         }
1224     }
1225
1226     /* Note that we scan the *uncorrected* ID. */
1227     if ((p = uprv_strrchr(posixID, '@')) != NULL) {
1228         if (correctedPOSIXLocale == NULL) {
1229             correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1230             /* Exit on memory allocation error. */
1231             if (correctedPOSIXLocale == NULL) {
1232                 return NULL;
1233             }
1234             uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1235             correctedPOSIXLocale[p-posixID] = 0;
1236         }
1237         p++;
1238
1239         /* Take care of any special cases here.. */
1240         if (!uprv_strcmp(p, "nynorsk")) {
1241             p = "NY";
1242             /* Don't worry about no__NY. In practice, it won't appear. */
1243         }
1244
1245         if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
1246             uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
1247         }
1248         else {
1249             uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1250         }
1251
1252         if ((q = uprv_strchr(p, '.')) != NULL) {
1253             /* How big will the resulting string be? */
1254             len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1255             uprv_strncat(correctedPOSIXLocale, p, q-p);
1256             correctedPOSIXLocale[len] = 0;
1257         }
1258         else {
1259             /* Anything following the @ sign */
1260             uprv_strcat(correctedPOSIXLocale, p);
1261         }
1262
1263         /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1264          * How about 'russian' -> 'ru'?
1265          * Many of the other locales using ISO codes will be handled by the
1266          * canonicalization functions in uloc_getDefault.
1267          */
1268     }
1269
1270     /* Was a correction made? */
1271     if (correctedPOSIXLocale != NULL) {
1272         posixID = correctedPOSIXLocale;
1273     }
1274     else {
1275         /* copy it, just in case the original pointer goes away.  See j2395 */
1276         correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
1277         /* Exit on memory allocation error. */
1278         if (correctedPOSIXLocale == NULL) {
1279             return NULL;
1280         }
1281         posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
1282     }
1283
1284     if (gCorrectedPOSIXLocale == NULL) {
1285         gCorrectedPOSIXLocale = correctedPOSIXLocale;
1286         ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1287         correctedPOSIXLocale = NULL;
1288     }
1289
1290     if (correctedPOSIXLocale != NULL) {  /* Was already set - clean up. */
1291         uprv_free(correctedPOSIXLocale);
1292     }
1293
1294     return posixID;
1295
1296 #elif defined(U_WINDOWS)
1297     UErrorCode status = U_ZERO_ERROR;
1298     LCID id = GetThreadLocale();
1299     const char* locID = uprv_convertToPosix(id, &status);
1300
1301     if (U_FAILURE(status)) {
1302         locID = "en_US";
1303     }
1304     return locID;
1305
1306 #elif defined(XP_MAC)
1307     int32_t script = MAC_LC_INIT_NUMBER;
1308     /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
1309     int32_t region = MAC_LC_INIT_NUMBER;
1310     /* = GetScriptManagerVariable(smRegionCode);*/
1311     int32_t lang = MAC_LC_INIT_NUMBER;
1312     /* = GetScriptManagerVariable(smScriptLang);*/
1313     int32_t date_region = MAC_LC_INIT_NUMBER;
1314     const char* posixID = 0;
1315     int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec);
1316     int32_t i;
1317     Intl1Hndl ih;
1318
1319     ih = (Intl1Hndl) GetIntlResource(1);
1320     if (ih)
1321         date_region = ((uint16_t)(*ih)->intl1Vers) >> 8;
1322
1323     for (i = 0; i < count; i++) {
1324         if (   ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER)
1325              || (mac_lc_recs[i].script == script))
1326             && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER)
1327              || (mac_lc_recs[i].region == region))
1328             && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER)
1329              || (mac_lc_recs[i].lang == lang))
1330             && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER)
1331              || (mac_lc_recs[i].date_region == date_region))
1332             )
1333         {
1334             posixID = mac_lc_recs[i].posixID;
1335             break;
1336         }
1337     }
1338
1339     return posixID;
1340
1341 #elif defined(OS400)
1342     /* locales are process scoped and are by definition thread safe */
1343     static char correctedLocale[64];
1344     const  char *localeID = getenv("LC_ALL");
1345            char *p;
1346
1347     if (localeID == NULL)
1348         localeID = getenv("LANG");
1349     if (localeID == NULL)
1350         localeID = setlocale(LC_ALL, NULL);
1351     /* Make sure we have something... */
1352     if (localeID == NULL)
1353         return "en_US_POSIX";
1354
1355     /* Extract the locale name from the path. */
1356     if((p = uprv_strrchr(localeID, '/')) != NULL)
1357     {
1358         /* Increment p to start of locale name. */
1359         p++;
1360         localeID = p;
1361     }
1362
1363     /* Copy to work location. */
1364     uprv_strcpy(correctedLocale, localeID);
1365
1366     /* Strip off the '.locale' extension. */
1367     if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1368         *p = 0;
1369     }
1370
1371     /* Upper case the locale name. */
1372     T_CString_toUpperCase(correctedLocale);
1373
1374     /* See if we are using the POSIX locale.  Any of the
1375     * following are equivalent and use the same QLGPGCMA
1376     * (POSIX) locale.
1377     * QLGPGCMA2 means UCS2
1378     * QLGPGCMA_4 means UTF-32
1379     * QLGPGCMA_8 means UTF-8
1380     */
1381     if ((uprv_strcmp("C", correctedLocale) == 0) ||
1382         (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1383         (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1384     {
1385         uprv_strcpy(correctedLocale, "en_US_POSIX");
1386     }
1387     else
1388     {
1389         int16_t LocaleLen;
1390
1391         /* Lower case the lang portion. */
1392         for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1393         {
1394             *p = uprv_tolower(*p);
1395         }
1396
1397         /* Adjust for Euro.  After '_E' add 'URO'. */
1398         LocaleLen = uprv_strlen(correctedLocale);
1399         if (correctedLocale[LocaleLen - 2] == '_' &&
1400             correctedLocale[LocaleLen - 1] == 'E')
1401         {
1402             uprv_strcat(correctedLocale, "URO");
1403         }
1404
1405         /* If using Lotus-based locale then convert to
1406          * equivalent non Lotus.
1407          */
1408         else if (correctedLocale[LocaleLen - 2] == '_' &&
1409             correctedLocale[LocaleLen - 1] == 'L')
1410         {
1411             correctedLocale[LocaleLen - 2] = 0;
1412         }
1413
1414         /* There are separate simplified and traditional
1415          * locales called zh_HK_S and zh_HK_T.
1416          */
1417         else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1418         {
1419             uprv_strcpy(correctedLocale, "zh_HK");
1420         }
1421
1422         /* A special zh_CN_GBK locale...
1423         */
1424         else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1425         {
1426             uprv_strcpy(correctedLocale, "zh_CN");
1427         }
1428
1429     }
1430
1431     return correctedLocale;
1432 #endif
1433
1434 }
1435
1436 #if U_POSIX_LOCALE
1437 /*
1438 Due to various platform differences, one platform may specify a charset,
1439 when they really mean a different charset. Remap the names so that they are
1440 compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1441 here. Before adding anything to this function, please consider adding unique
1442 names to the ICU alias table in the data directory.
1443 */
1444 static const char*
1445 remapPlatformDependentCodepage(const char *locale, const char *name) {
1446     if (locale != NULL && *locale == 0) {
1447         /* Make sure that an empty locale is handled the same way. */
1448         locale = NULL;
1449     }
1450     if (name == NULL) {
1451         return NULL;
1452     }
1453 #if defined(U_AIX)
1454     if (uprv_strcmp(name, "IBM-943") == 0) {
1455         /* Use the ASCII compatible ibm-943 */
1456         name = "Shift-JIS";
1457     }
1458     else if (uprv_strcmp(name, "IBM-1252") == 0) {
1459         /* Use the windows-1252 that contains the Euro */
1460         name = "IBM-5348";
1461     }
1462 #elif defined(U_SOLARIS)
1463     if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1464         /* Solaris underspecifies the "EUC" name. */
1465         if (uprv_strcmp(locale, "zh_CN") == 0) {
1466             name = "EUC-CN";
1467         }
1468         else if (uprv_strcmp(locale, "zh_TW") == 0) {
1469             name = "EUC-TW";
1470         }
1471         else if (uprv_strcmp(locale, "ko_KR") == 0) {
1472             name = "EUC-KR";
1473         }
1474     }
1475     else if (uprv_strcmp(name, "eucJP") == 0) {
1476         /*
1477         ibm-954 is the best match.
1478         ibm-33722 is the default for eucJP (similar to Windows).
1479         */
1480         name = "eucjis";
1481     }
1482     else if (uprv_strcmp(name, "646") == 0) {
1483         /*
1484          * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
1485          * ISO-8859-1 instead of US-ASCII(646).
1486          */
1487         name = "ISO-8859-1";
1488     }
1489 #elif defined(U_DARWIN)
1490     if (locale == NULL && *name == 0) {
1491         /*
1492         No locale was specified, and an empty name was passed in.
1493         This usually indicates that nl_langinfo didn't return valid information.
1494         Mac OS X uses UTF-8 by default (especially the locale data and console).
1495         */
1496         name = "UTF-8";
1497     }
1498 #elif defined(U_HPUX)
1499     if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
1500         /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
1501         /* zh_TW.big5 is not the same charset as zh_HK.big5! */
1502         name = "hkbig5";
1503     }
1504     else if (uprv_strcmp(name, "eucJP") == 0) {
1505         /*
1506         ibm-1350 is the best match, but unavailable.
1507         ibm-954 is mostly a superset of ibm-1350.
1508         ibm-33722 is the default for eucJP (similar to Windows).
1509         */
1510         name = "eucjis";
1511     }
1512 #elif defined(U_LINUX)
1513     if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
1514         /* Linux underspecifies the "EUC" name. */
1515         if (uprv_strcmp(locale, "korean") == 0) {
1516             name = "EUC-KR";
1517         }
1518         else if (uprv_strcmp(locale, "japanese") == 0) {
1519             /* See comment below about eucJP */
1520             name = "eucjis";
1521         }
1522     }
1523     else if (uprv_strcmp(name, "eucjp") == 0) {
1524         /*
1525         ibm-1350 is the best match, but unavailable.
1526         ibm-954 is mostly a superset of ibm-1350.
1527         ibm-33722 is the default for eucJP (similar to Windows).
1528         */
1529         name = "eucjis";
1530     }
1531 #endif
1532     /* return NULL when "" is passed in */
1533     if (*name == 0) {
1534         name = NULL;
1535     }
1536     return name;
1537 }
1538
1539 static const char*
1540 getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
1541 {
1542     char localeBuf[100];
1543     const char *name = NULL;
1544     char *variant = NULL;
1545
1546     if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
1547         size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
1548         uprv_strncpy(localeBuf, localeName, localeCapacity);
1549         localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
1550         name = uprv_strncpy(buffer, name+1, buffCapacity);
1551         buffer[buffCapacity-1] = 0; /* ensure NULL termination */
1552         if ((variant = (uprv_strchr(name, '@'))) != NULL) {
1553             *variant = 0;
1554         }
1555         name = remapPlatformDependentCodepage(localeBuf, name);
1556     }
1557     return name;
1558 }
1559 #endif
1560
1561 static const char*
1562 int_getDefaultCodepage()
1563 {
1564 #if defined(OS400)
1565     uint32_t ccsid = 37; /* Default to ibm-37 */
1566     static char codepage[64];
1567     Qwc_JOBI0400_t jobinfo;
1568     Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
1569
1570     EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
1571         "*                         ", "                ", &error);
1572
1573     if (error.Bytes_Available == 0) {
1574         if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
1575             ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
1576         }
1577         else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
1578             ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
1579         }
1580         /* else use the default */
1581     }
1582     sprintf(codepage,"ibm-%d", ccsid);
1583     return codepage;
1584
1585 #elif defined(OS390)
1586     static char codepage[64];
1587     sprintf(codepage,"%63s" UCNV_SWAP_LFNL_OPTION_STRING, nl_langinfo(CODESET));
1588     codepage[63] = 0; /* NULL terminate */
1589     return codepage;
1590
1591 #elif defined(XP_MAC)
1592     return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
1593
1594 #elif defined(U_WINDOWS)
1595     static char codepage[64];
1596     sprintf(codepage, "windows-%d", GetACP());
1597     return codepage;
1598
1599 #elif U_POSIX_LOCALE
1600     static char codesetName[100];
1601     const char *localeName = NULL;
1602     const char *name = NULL;
1603
1604     uprv_memset(codesetName, 0, sizeof(codesetName));
1605
1606     /* Use setlocale in a nice way, and then check some environment variables.
1607        Maybe the application used setlocale already.
1608     */
1609     localeName = uprv_getPOSIXID();
1610     name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
1611     if (name) {
1612         /* if we can find the codeset name from setlocale, return that. */
1613         return name;
1614     }
1615     /* else "C" was probably returned. That's underspecified. */
1616
1617 #if U_HAVE_NL_LANGINFO_CODESET
1618     if (*codesetName) {
1619         uprv_memset(codesetName, 0, sizeof(codesetName));
1620     }
1621     /* When available, check nl_langinfo because it usually gives more
1622        useful names. It depends on LC_CTYPE and not LANG or LC_ALL.
1623        nl_langinfo may use the same buffer as setlocale. */
1624     {
1625         const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
1626         codeset = remapPlatformDependentCodepage(NULL, codeset);
1627         if (codeset != NULL) {
1628             uprv_strncpy(codesetName, codeset, sizeof(codesetName));
1629             codesetName[sizeof(codesetName)-1] = 0;
1630             return codesetName;
1631         }
1632     }
1633 #endif
1634
1635     if (*codesetName == 0)
1636     {
1637         /* Everything failed. Return US ASCII (ISO 646). */
1638         (void)uprv_strcpy(codesetName, "US-ASCII");
1639     }
1640     return codesetName;
1641 #else
1642     return "US-ASCII";
1643 #endif
1644 }
1645
1646
1647 U_CAPI const char*  U_EXPORT2
1648 uprv_getDefaultCodepage()
1649 {
1650     static char const  *name = NULL;
1651     umtx_lock(NULL);
1652     if (name == NULL) {
1653         name = int_getDefaultCodepage();
1654     }
1655     umtx_unlock(NULL);
1656     return name;
1657 }
1658
1659
1660 /* end of platform-specific implementation -------------- */
1661
1662 /* version handling --------------------------------------------------------- */
1663
1664 U_CAPI void U_EXPORT2
1665 u_versionFromString(UVersionInfo versionArray, const char *versionString) {
1666     char *end;
1667     uint16_t part=0;
1668
1669     if(versionArray==NULL) {
1670         return;
1671     }
1672
1673     if(versionString!=NULL) {
1674         for(;;) {
1675             versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
1676             if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
1677                 break;
1678             }
1679             versionString=end+1;
1680         }
1681     }
1682
1683     while(part<U_MAX_VERSION_LENGTH) {
1684         versionArray[part++]=0;
1685     }
1686 }
1687
1688 U_CAPI void U_EXPORT2
1689 u_versionToString(UVersionInfo versionArray, char *versionString) {
1690     uint16_t count, part;
1691     uint8_t field;
1692
1693     if(versionString==NULL) {
1694         return;
1695     }
1696
1697     if(versionArray==NULL) {
1698         versionString[0]=0;
1699         return;
1700     }
1701
1702     /* count how many fields need to be written */
1703     for(count=4; count>0 && versionArray[count-1]==0; --count) {
1704     }
1705
1706     if(count <= 1) {
1707         count = 2;
1708     }
1709
1710     /* write the first part */
1711     /* write the decimal field value */
1712     field=versionArray[0];
1713     if(field>=100) {
1714         *versionString++=(char)('0'+field/100);
1715         field%=100;
1716     }
1717     if(field>=10) {
1718         *versionString++=(char)('0'+field/10);
1719         field%=10;
1720     }
1721     *versionString++=(char)('0'+field);
1722
1723     /* write the following parts */
1724     for(part=1; part<count; ++part) {
1725         /* write a dot first */
1726         *versionString++=U_VERSION_DELIMITER;
1727
1728         /* write the decimal field value */
1729         field=versionArray[part];
1730         if(field>=100) {
1731             *versionString++=(char)('0'+field/100);
1732             field%=100;
1733         }
1734         if(field>=10) {
1735             *versionString++=(char)('0'+field/10);
1736             field%=10;
1737         }
1738         *versionString++=(char)('0'+field);
1739     }
1740
1741     /* NUL-terminate */
1742     *versionString=0;
1743 }
1744
1745 U_CAPI void U_EXPORT2
1746 u_getVersion(UVersionInfo versionArray) {
1747     u_versionFromString(versionArray, U_ICU_VERSION);
1748 }
1749
1750 /*
1751  * Hey, Emacs, please set the following:
1752  *
1753  * Local Variables:
1754  * indent-tabs-mode: nil
1755  * End:
1756  *
1757  */