icuSources/common/putil.c

   1 /*
   2 ******************************************************************************
   3 *
   4 *   Copyright (C) 1997-2010, International Business Machines
   5 *   Corporation and others.  All Rights Reserved.
   6 *
   7 ******************************************************************************
   8 *
   9 *  FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
  10 *
  11 *   Date        Name        Description
  12 *   04/14/97    aliu        Creation.
  13 *   04/24/97    aliu        Added getDefaultDataDirectory() and
  14 *                            getDefaultLocaleID().
  15 *   04/28/97    aliu        Rewritten to assume Unix and apply general methods
  16 *                            for assumed case.  Non-UNIX platforms must be
  17 *                            special-cased.  Rewrote numeric methods dealing
  18 *                            with NaN and Infinity to be platform independent
  19 *                             over all IEEE 754 platforms.
  20 *   05/13/97    aliu        Restored sign of timezone
  21 *                            (semantics are hours West of GMT)
  22 *   06/16/98    erm         Added IEEE_754 stuff, cleaned up isInfinite, isNan,
  23 *                             nextDouble..
  24 *   07/22/98    stephen     Added remainder, max, min, trunc
  25 *   08/13/98    stephen     Added isNegativeInfinity, isPositiveInfinity
  26 *   08/24/98    stephen     Added longBitsFromDouble
  27 *   09/08/98    stephen     Minor changes for Mac Port
  28 *   03/02/99    stephen     Removed openFile().  Added AS400 support.
  29 *                            Fixed EBCDIC tables
  30 *   04/15/99    stephen     Converted to C.
  31 *   06/28/99    stephen     Removed mutex locking in u_isBigEndian().
  32 *   08/04/99    jeffrey R.  Added OS/2 changes
  33 *   11/15/99    helena      Integrated S/390 IEEE support.
  34 *   04/26/01    Barry N.    OS/400 support for uprv_getDefaultLocaleID
  35 *   08/15/01    Steven H.   OS/400 support for uprv_getDefaultCodepage
  36 *   01/03/08    Steven L.   Fake Time Support
  37 ******************************************************************************
  38 */
  39
  40 /* Define _XOPEN_SOURCE for Solaris and friends. */
  41 /* NetBSD needs it to be >= 4 */
  42 #if !defined(_XOPEN_SOURCE)
  43 #if __STDC_VERSION__ >= 199901L
  44 /* It is invalid to compile an XPG3, XPG4, XPG4v2 or XPG5 application using c99 on Solaris */
  45 #define _XOPEN_SOURCE 600
  46 #else
  47 #define _XOPEN_SOURCE 4
  48 #endif
  49 #endif
  50
  51 /* Make sure things like readlink and such functions work.
  52 Poorly upgraded Solaris machines can't have this defined.
  53 Cleanly installed Solaris can use this #define.
  54 */
  55 #if !defined(_XOPEN_SOURCE_EXTENDED) && ((!defined(__STDC_VERSION__) || __STDC_VERSION__ >= 199901L) || defined(__xlc__))
  56 #define _XOPEN_SOURCE_EXTENDED 1
  57 #endif
  58
  59 /* include ICU headers */
  60 #include "unicode/utypes.h"
  61 #include "unicode/putil.h"
  62 #include "unicode/ustring.h"
  63 #include "putilimp.h"
  64 #include "uassert.h"
  65 #include "umutex.h"
  66 #include "cmemory.h"
  67 #include "cstring.h"
  68 #include "locmap.h"
  69 #include "ucln_cmn.h"
  70
  71 /* Include standard headers. */
  72 #include <stdio.h>
  73 #include <stdlib.h>
  74 #include <string.h>
  75 #include <math.h>
  76 #include <locale.h>
  77 #include <float.h>
  78 #include <time.h>
  79
  80 /* include system headers */
  81 #ifdef U_WINDOWS
  82 #   define WIN32_LEAN_AND_MEAN
  83 #   define VC_EXTRALEAN
  84 #   define NOUSER
  85 #   define NOSERVICE
  86 #   define NOIME
  87 #   define NOMCX
  88 #   include <windows.h>
  89 #   include "wintz.h"
  90 #elif defined(U_CYGWIN) && defined(__STRICT_ANSI__)
  91 /* tzset isn't defined in strict ANSI on Cygwin. */
  92 #   undef __STRICT_ANSI__
  93 #elif defined(OS400)
  94 #   include <float.h>
  95 #   include <qusec.h>       /* error code structure */
  96 #   include <qusrjobi.h>
  97 #   include <qliept.h>      /* EPT_CALL macro  - this include must be after all other "QSYSINCs" */
  98 #   include <mih/testptr.h> /* For uprv_maximumPtr */
  99 #elif defined(XP_MAC)
 100 #   include <Files.h>
 101 #   include <IntlResources.h>
 102 #   include <Script.h>
 103 #   include <Folders.h>
 104 #   include <MacTypes.h>
 105 #   include <TextUtils.h>
 106 #   define ICU_NO_USER_DATA_OVERRIDE 1
 107 #elif defined(OS390)
 108 #include "unicode/ucnv.h"   /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
 109 #elif defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD)
 110 #include <limits.h>
 111 #include <unistd.h>
 112 #elif defined(U_QNX)
 113 #include <sys/neutrino.h>
 114 #elif defined(U_SOLARIS)
 115 # ifndef _XPG4_2
 116 #  define _XPG4_2
 117 # endif
 118 #endif
 119
 120
 121 #if defined(U_DARWIN)
 122 #include <TargetConditionals.h>
 123 #endif
 124
 125 #ifndef U_WINDOWS
 126 #include <sys/time.h>
 127 #endif
 128
 129 /*
 130  * Only include langinfo.h if we have a way to get the codeset. If we later
 131  * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
 132  *
 133  */
 134
 135 #if U_HAVE_NL_LANGINFO_CODESET
 136 #include <langinfo.h>
 137 #endif
 138
 139 /**
 140  * Simple things (presence of functions, etc) should just go in configure.in and be added to
 141  * icucfg.h via autoheader.
 142  */
 143 #if defined(HAVE_CONFIG_H)
 144 #include "icucfg.h"
 145 #endif
 146
 147 /* Define the extension for data files, again... */
 148 #define DATA_TYPE "dat"
 149
 150 /* Leave this copyright notice here! */
 151 static const char copyright[] = U_COPYRIGHT_STRING;
 152
 153 /* floating point implementations ------------------------------------------- */
 154
 155 /* We return QNAN rather than SNAN*/
 156 #define SIGN 0x80000000U
 157
 158 /* Make it easy to define certain types of constants */
 159 typedef union {
 160     int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
 161     double d64;
 162 } BitPatternConversion;
 163 static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
 164 static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
 165
 166 /*---------------------------------------------------------------------------
 167   Platform utilities
 168   Our general strategy is to assume we're on a POSIX platform.  Platforms which
 169   are non-POSIX must declare themselves so.  The default POSIX implementation
 170   will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
 171   functions).
 172   ---------------------------------------------------------------------------*/
 173
 174 #if defined(U_WINDOWS) || defined(XP_MAC) || defined(OS400)
 175 #   undef U_POSIX_LOCALE
 176 #else
 177 #   define U_POSIX_LOCALE    1
 178 #endif
 179
 180 /*
 181     WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
 182     can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
 183 */
 184 #if !IEEE_754
 185 static char*
 186 u_topNBytesOfDouble(double* d, int n)
 187 {
 188 #if U_IS_BIG_ENDIAN
 189     return (char*)d;
 190 #else
 191     return (char*)(d + 1) - n;
 192 #endif
 193 }
 194
 195 static char*
 196 u_bottomNBytesOfDouble(double* d, int n)
 197 {
 198 #if U_IS_BIG_ENDIAN
 199     return (char*)(d + 1) - n;
 200 #else
 201     return (char*)d;
 202 #endif
 203 }
 204 #endif   /* !IEEE_754 */
 205
 206 #if IEEE_754
 207 static UBool
 208 u_signBit(double d) {
 209     uint8_t hiByte;
 210 #if U_IS_BIG_ENDIAN
 211     hiByte = *(uint8_t *)&d;
 212 #else
 213     hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
 214 #endif
 215     return (hiByte & 0x80) != 0;
 216 }
 217 #endif
 218
 219
 220
 221 #if defined (U_DEBUG_FAKETIME)
 222 /* Override the clock to test things without having to move the system clock.
 223  * Assumes POSIX gettimeofday() will function
 224  */
 225 UDate fakeClock_t0 = 0; /** Time to start the clock from **/
 226 UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
 227 UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
 228 static UMTX fakeClockMutex = NULL;
 229
 230 static UDate getUTCtime_real() {
 231     struct timeval posixTime;
 232     gettimeofday(&posixTime, NULL);
 233     return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
 234 }
 235
 236 static UDate getUTCtime_fake() {
 237     umtx_lock(&fakeClockMutex);
 238     if(!fakeClock_set) {
 239         UDate real = getUTCtime_real();
 240         const char *fake_start = getenv("U_FAKETIME_START");
 241         if((fake_start!=NULL) && (fake_start[0]!=0)) {
 242             sscanf(fake_start,"%lf",&fakeClock_t0);
 243             fakeClock_dt = fakeClock_t0 - real;
 244             fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
 245                     "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
 246                     fakeClock_t0, fake_start, fakeClock_dt, real);
 247         } else {
 248           fakeClock_dt = 0;
 249             fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
 250                     "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
 251         }
 252         fakeClock_set = TRUE;
 253     }
 254     umtx_unlock(&fakeClockMutex);
 255
 256     return getUTCtime_real() + fakeClock_dt;
 257 }
 258 #endif
 259
 260 #if defined(U_WINDOWS)
 261 typedef union {
 262     int64_t int64;
 263     FILETIME fileTime;
 264 } FileTimeConversion;   /* This is like a ULARGE_INTEGER */
 265
 266 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
 267 #define EPOCH_BIAS  INT64_C(116444736000000000)
 268 #define HECTONANOSECOND_PER_MILLISECOND   10000
 269
 270 #endif
 271
 272 /*---------------------------------------------------------------------------
 273   Universal Implementations
 274   These are designed to work on all platforms.  Try these, and if they
 275   don't work on your platform, then special case your platform with new
 276   implementations.
 277 ---------------------------------------------------------------------------*/
 278
 279 U_CAPI UDate U_EXPORT2
 280 uprv_getUTCtime()
 281 {
 282 #if defined(U_DEBUG_FAKETIME)
 283     return getUTCtime_fake(); /* Hook for overriding the clock */
 284 #else
 285     return uprv_getRawUTCtime();
 286 #endif
 287 }
 288
 289 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
 290 U_CAPI UDate U_EXPORT2
 291 uprv_getRawUTCtime()
 292 {
 293 #if defined(XP_MAC)
 294     time_t t, t1, t2;
 295     struct tm tmrec;
 296
 297     uprv_memset( &tmrec, 0, sizeof(tmrec) );
 298     tmrec.tm_year = 70;
 299     tmrec.tm_mon = 0;
 300     tmrec.tm_mday = 1;
 301     t1 = mktime(&tmrec);    /* seconds of 1/1/1970*/
 302
 303     time(&t);
 304     uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
 305     t2 = mktime(&tmrec);    /* seconds of current GMT*/
 306     return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND;         /* GMT (or UTC) in seconds since 1970*/
 307 #elif defined(U_WINDOWS)
 308
 309     FileTimeConversion winTime;
 310     GetSystemTimeAsFileTime(&winTime.fileTime);
 311     return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
 312 #else
 313
 314 #if defined(HAVE_GETTIMEOFDAY)
 315     struct timeval posixTime;
 316     gettimeofday(&posixTime, NULL);
 317     return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
 318 #else
 319     time_t epochtime;
 320     time(&epochtime);
 321     return (UDate)epochtime * U_MILLIS_PER_SECOND;
 322 #endif
 323
 324 #endif
 325 }
 326
 327 /*-----------------------------------------------------------------------------
 328   IEEE 754
 329   These methods detect and return NaN and infinity values for doubles
 330   conforming to IEEE 754.  Platforms which support this standard include X86,
 331   Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
 332   If this doesn't work on your platform, you have non-IEEE floating-point, and
 333   will need to code your own versions.  A naive implementation is to return 0.0
 334   for getNaN and getInfinity, and false for isNaN and isInfinite.
 335   ---------------------------------------------------------------------------*/
 336
 337 U_CAPI UBool U_EXPORT2
 338 uprv_isNaN(double number)
 339 {
 340 #if IEEE_754
 341     BitPatternConversion convertedNumber;
 342     convertedNumber.d64 = number;
 343     /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
 344     return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
 345
 346 #elif defined(OS390)
 347     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
 348                         sizeof(uint32_t));
 349     uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
 350                         sizeof(uint32_t));
 351
 352     return ((highBits & 0x7F080000L) == 0x7F080000L) &&
 353       (lowBits == 0x00000000L);
 354
 355 #else
 356     /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
 357     /* you'll need to replace this default implementation with what's correct*/
 358     /* for your platform.*/
 359     return number != number;
 360 #endif
 361 }
 362
 363 U_CAPI UBool U_EXPORT2
 364 uprv_isInfinite(double number)
 365 {
 366 #if IEEE_754
 367     BitPatternConversion convertedNumber;
 368     convertedNumber.d64 = number;
 369     /* Infinity is exactly 0x7FF0000000000000U. */
 370     return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
 371 #elif defined(OS390)
 372     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
 373                         sizeof(uint32_t));
 374     uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
 375                         sizeof(uint32_t));
 376
 377     return ((highBits  & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
 378
 379 #else
 380     /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
 381     /* value, you'll need to replace this default implementation with what's*/
 382     /* correct for your platform.*/
 383     return number == (2.0 * number);
 384 #endif
 385 }
 386
 387 U_CAPI UBool U_EXPORT2
 388 uprv_isPositiveInfinity(double number)
 389 {
 390 #if IEEE_754 || defined(OS390)
 391     return (UBool)(number > 0 && uprv_isInfinite(number));
 392 #else
 393     return uprv_isInfinite(number);
 394 #endif
 395 }
 396
 397 U_CAPI UBool U_EXPORT2
 398 uprv_isNegativeInfinity(double number)
 399 {
 400 #if IEEE_754 || defined(OS390)
 401     return (UBool)(number < 0 && uprv_isInfinite(number));
 402
 403 #else
 404     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
 405                         sizeof(uint32_t));
 406     return((highBits & SIGN) && uprv_isInfinite(number));
 407
 408 #endif
 409 }
 410
 411 U_CAPI double U_EXPORT2
 412 uprv_getNaN()
 413 {
 414 #if IEEE_754 || defined(OS390)
 415     return gNan.d64;
 416 #else
 417     /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
 418     /* you'll need to replace this default implementation with what's correct*/
 419     /* for your platform.*/
 420     return 0.0;
 421 #endif
 422 }
 423
 424 U_CAPI double U_EXPORT2
 425 uprv_getInfinity()
 426 {
 427 #if IEEE_754 || defined(OS390)
 428     return gInf.d64;
 429 #else
 430     /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
 431     /* value, you'll need to replace this default implementation with what's*/
 432     /* correct for your platform.*/
 433     return 0.0;
 434 #endif
 435 }
 436
 437 U_CAPI double U_EXPORT2
 438 uprv_floor(double x)
 439 {
 440     return floor(x);
 441 }
 442
 443 U_CAPI double U_EXPORT2
 444 uprv_ceil(double x)
 445 {
 446     return ceil(x);
 447 }
 448
 449 U_CAPI double U_EXPORT2
 450 uprv_round(double x)
 451 {
 452     return uprv_floor(x + 0.5);
 453 }
 454
 455 U_CAPI double U_EXPORT2
 456 uprv_fabs(double x)
 457 {
 458     return fabs(x);
 459 }
 460
 461 U_CAPI double U_EXPORT2
 462 uprv_modf(double x, double* y)
 463 {
 464     return modf(x, y);
 465 }
 466
 467 U_CAPI double U_EXPORT2
 468 uprv_fmod(double x, double y)
 469 {
 470     return fmod(x, y);
 471 }
 472
 473 U_CAPI double U_EXPORT2
 474 uprv_pow(double x, double y)
 475 {
 476     /* This is declared as "double pow(double x, double y)" */
 477     return pow(x, y);
 478 }
 479
 480 U_CAPI double U_EXPORT2
 481 uprv_pow10(int32_t x)
 482 {
 483     return pow(10.0, (double)x);
 484 }
 485
 486 U_CAPI double U_EXPORT2
 487 uprv_fmax(double x, double y)
 488 {
 489 #if IEEE_754
 490     /* first handle NaN*/
 491     if(uprv_isNaN(x) || uprv_isNaN(y))
 492         return uprv_getNaN();
 493
 494     /* check for -0 and 0*/
 495     if(x == 0.0 && y == 0.0 && u_signBit(x))
 496         return y;
 497
 498 #endif
 499
 500     /* this should work for all flt point w/o NaN and Inf special cases */
 501     return (x > y ? x : y);
 502 }
 503
 504 U_CAPI double U_EXPORT2
 505 uprv_fmin(double x, double y)
 506 {
 507 #if IEEE_754
 508     /* first handle NaN*/
 509     if(uprv_isNaN(x) || uprv_isNaN(y))
 510         return uprv_getNaN();
 511
 512     /* check for -0 and 0*/
 513     if(x == 0.0 && y == 0.0 && u_signBit(y))
 514         return y;
 515
 516 #endif
 517
 518     /* this should work for all flt point w/o NaN and Inf special cases */
 519     return (x > y ? y : x);
 520 }
 521
 522 /**
 523  * Truncates the given double.
 524  * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
 525  * This is different than calling floor() or ceil():
 526  * floor(3.3) = 3, floor(-3.3) = -4
 527  * ceil(3.3) = 4, ceil(-3.3) = -3
 528  */
 529 U_CAPI double U_EXPORT2
 530 uprv_trunc(double d)
 531 {
 532 #if IEEE_754
 533     /* handle error cases*/
 534     if(uprv_isNaN(d))
 535         return uprv_getNaN();
 536     if(uprv_isInfinite(d))
 537         return uprv_getInfinity();
 538
 539     if(u_signBit(d))    /* Signbit() picks up -0.0;  d<0 does not. */
 540         return ceil(d);
 541     else
 542         return floor(d);
 543
 544 #else
 545     return d >= 0 ? floor(d) : ceil(d);
 546
 547 #endif
 548 }
 549
 550 /**
 551  * Return the largest positive number that can be represented by an integer
 552  * type of arbitrary bit length.
 553  */
 554 U_CAPI double U_EXPORT2
 555 uprv_maxMantissa(void)
 556 {
 557     return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
 558 }
 559
 560 U_CAPI double U_EXPORT2
 561 uprv_log(double d)
 562 {
 563     return log(d);
 564 }
 565
 566 U_CAPI void * U_EXPORT2
 567 uprv_maximumPtr(void * base)
 568 {
 569 #if defined(OS400)
 570     /*
 571      * With the provided function we should never be out of range of a given segment
 572      * (a traditional/typical segment that is).  Our segments have 5 bytes for the
 573      * id and 3 bytes for the offset.  The key is that the casting takes care of
 574      * only retrieving the offset portion minus x1000.  Hence, the smallest offset
 575      * seen in a program is x001000 and when casted to an int would be 0.
 576      * That's why we can only add 0xffefff.  Otherwise, we would exceed the segment.
 577      *
 578      * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
 579      * non-TERASPACE.  If it is TERASPACE it is 2GB - 4k(header information).
 580      * This function determines the activation based on the pointer that is passed in and
 581      * calculates the appropriate maximum available size for
 582      * each pointer type (TERASPACE and non-TERASPACE)
 583      *
 584      * Unlike other operating systems, the pointer model isn't determined at
 585      * compile time on i5/OS.
 586      */
 587     if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
 588         /* if it is a TERASPACE pointer the max is 2GB - 4k */
 589         return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
 590     }
 591     /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
 592     return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
 593
 594 #else
 595     return U_MAX_PTR(base);
 596 #endif
 597 }
 598
 599 /*---------------------------------------------------------------------------
 600   Platform-specific Implementations
 601   Try these, and if they don't work on your platform, then special case your
 602   platform with new implementations.
 603   ---------------------------------------------------------------------------*/
 604
 605 /* Generic time zone layer -------------------------------------------------- */
 606
 607 /* Time zone utilities */
 608 U_CAPI void U_EXPORT2
 609 uprv_tzset()
 610 {
 611 #ifdef U_TZSET
 612     U_TZSET();
 613 #else
 614     /* no initialization*/
 615 #endif
 616 }
 617
 618 U_CAPI int32_t U_EXPORT2
 619 uprv_timezone()
 620 {
 621 #ifdef U_TIMEZONE
 622     return U_TIMEZONE;
 623 #else
 624     time_t t, t1, t2;
 625     struct tm tmrec;
 626     UBool dst_checked;
 627     int32_t tdiff = 0;
 628
 629     time(&t);
 630     uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
 631     dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
 632     t1 = mktime(&tmrec);                 /* local time in seconds*/
 633     uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
 634     t2 = mktime(&tmrec);                 /* GMT (or UTC) in seconds*/
 635     tdiff = t2 - t1;
 636     /* imitate NT behaviour, which returns same timezone offset to GMT for
 637        winter and summer*/
 638     if (dst_checked)
 639         tdiff += 3600;
 640     return tdiff;
 641 #endif
 642 }
 643
 644 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
 645    some platforms need to have it declared here. */
 646
 647 #if defined(U_TZNAME) && (defined(U_IRIX) || defined(U_DARWIN) || defined(U_CYGWIN))
 648 /* RS6000 and others reject char **tzname.  */
 649 extern U_IMPORT char *U_TZNAME[];
 650 #endif
 651
 652 #if !UCONFIG_NO_FILE_IO && (defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD))
 653 /* These platforms are likely to use Olson timezone IDs. */
 654 #define CHECK_LOCALTIME_LINK 1
 655 #if defined(U_DARWIN)
 656 #include <tzfile.h>
 657 #define TZZONEINFO      (TZDIR "/")
 658 #else
 659 #define TZDEFAULT       "/etc/localtime"
 660 #define TZZONEINFO      "/usr/share/zoneinfo/"
 661 #endif
 662 #if U_HAVE_DIRENT_H
 663 #define TZFILE_SKIP     "posixrules" /* tz file to skip when searching. */
 664 /* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
 665    symlinked to /etc/localtime, which makes searchForTZFile return
 666    'localtime' when it's the first match. */
 667 #define TZFILE_SKIP2    "localtime"
 668 #define SEARCH_TZFILE
 669 #include <dirent.h>  /* Needed to search through system timezone files */
 670 #endif
 671 static char gTimeZoneBuffer[PATH_MAX];
 672 static char *gTimeZoneBufferPtr = NULL;
 673 #endif
 674
 675 #ifndef U_WINDOWS
 676 #define isNonDigit(ch) (ch < '0' || '9' < ch)
 677 static UBool isValidOlsonID(const char *id) {
 678     int32_t idx = 0;
 679
 680     /* Determine if this is something like Iceland (Olson ID)
 681     or AST4ADT (non-Olson ID) */
 682     while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
 683         idx++;
 684     }
 685
 686     /* If we went through the whole string, then it might be okay.
 687     The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
 688     "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
 689     The rest of the time it could be an Olson ID. George */
 690     return (UBool)(id[idx] == 0
 691         || uprv_strcmp(id, "PST8PDT") == 0
 692         || uprv_strcmp(id, "MST7MDT") == 0
 693         || uprv_strcmp(id, "CST6CDT") == 0
 694         || uprv_strcmp(id, "EST5EDT") == 0);
 695 }
 696
 697 /* On some Unix-like OS, 'posix' subdirectory in
 698    /usr/share/zoneinfo replicates the top-level contents. 'right'
 699    subdirectory has the same set of files, but individual files
 700    are different from those in the top-level directory or 'posix'
 701    because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
 702    has files for UTC.
 703    When the first match for /etc/localtime is in either of them
 704    (usually in posix because 'right' has different file contents),
 705    or TZ environment variable points to one of them, createTimeZone
 706    fails because, say, 'posix/America/New_York' is not an Olson
 707    timezone id ('America/New_York' is). So, we have to skip
 708    'posix/' and 'right/' at the beginning. */
 709 static void skipZoneIDPrefix(const char** id) {
 710     if (uprv_strncmp(*id, "posix/", 6) == 0
 711         || uprv_strncmp(*id, "right/", 6) == 0)
 712     {
 713         *id += 6;
 714     }
 715 }
 716 #endif
 717
 718 #if defined(U_TZNAME) && !defined(U_WINDOWS)
 719
 720 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
 721 typedef struct OffsetZoneMapping {
 722     int32_t offsetSeconds;
 723     int32_t daylightType; /* 1=daylight in June, 2=daylight in December*/
 724     const char *stdID;
 725     const char *dstID;
 726     const char *olsonID;
 727 } OffsetZoneMapping;
 728
 729 /*
 730 This list tries to disambiguate a set of abbreviated timezone IDs and offsets
 731 and maps it to an Olson ID.
 732 Before adding anything to this list, take a look at
 733 icu/source/tools/tzcode/tz.alias
 734 Sometimes no daylight savings (0) is important to define due to aliases.
 735 This list can be tested with icu/source/test/compat/tzone.pl
 736 More values could be added to daylightType to increase precision.
 737 */
 738 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
 739     {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
 740     {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
 741     {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
 742     {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
 743     {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
 744     {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
 745     {-36000, 2, "EST", "EST", "Australia/Sydney"},
 746     {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
 747     {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
 748     {-34200, 2, "CST", "CST", "Australia/South"},
 749     {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
 750     {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
 751     {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
 752     {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
 753     {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
 754     {-28800, 2, "WST", "WST", "Australia/West"},
 755     {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
 756     {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
 757     {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
 758     {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
 759     {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
 760     {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
 761     {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
 762     {-14400, 1, "AZT", "AZST", "Asia/Baku"},
 763     {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
 764     {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
 765     {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
 766     {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
 767     {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
 768     {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
 769     {-3600, 0, "CET", "WEST", "Africa/Algiers"},
 770     {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
 771     {0, 1, "GMT", "IST", "Europe/Dublin"},
 772     {0, 1, "GMT", "BST", "Europe/London"},
 773     {0, 0, "WET", "WEST", "Africa/Casablanca"},
 774     {0, 0, "WET", "WET", "Africa/El_Aaiun"},
 775     {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
 776     {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
 777     {10800, 1, "PMST", "PMDT", "America/Miquelon"},
 778     {10800, 2, "UYT", "UYST", "America/Montevideo"},
 779     {10800, 1, "WGT", "WGST", "America/Godthab"},
 780     {10800, 2, "BRT", "BRST", "Brazil/East"},
 781     {12600, 1, "NST", "NDT", "America/St_Johns"},
 782     {14400, 1, "AST", "ADT", "Canada/Atlantic"},
 783     {14400, 2, "AMT", "AMST", "America/Cuiaba"},
 784     {14400, 2, "CLT", "CLST", "Chile/Continental"},
 785     {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
 786     {14400, 2, "PYT", "PYST", "America/Asuncion"},
 787     {18000, 1, "CST", "CDT", "America/Havana"},
 788     {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
 789     {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
 790     {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
 791     {21600, 0, "CST", "CDT", "America/Guatemala"},
 792     {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
 793     {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
 794     {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
 795     {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
 796     {32400, 1, "AKST", "AKDT", "US/Alaska"},
 797     {36000, 1, "HAST", "HADT", "US/Aleutian"}
 798 };
 799
 800 /*#define DEBUG_TZNAME*/
 801
 802 static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
 803 {
 804     int32_t idx;
 805 #ifdef DEBUG_TZNAME
 806     fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
 807 #endif
 808     for (idx = 0; idx < (int32_t)sizeof(OFFSET_ZONE_MAPPINGS)/sizeof(OFFSET_ZONE_MAPPINGS[0]); idx++)
 809     {
 810         if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
 811             && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
 812             && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
 813             && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
 814         {
 815             return OFFSET_ZONE_MAPPINGS[idx].olsonID;
 816         }
 817     }
 818     return NULL;
 819 }
 820 #endif
 821
 822 #ifdef SEARCH_TZFILE
 823 #define MAX_PATH_SIZE PATH_MAX /* Set the limit for the size of the path. */
 824 #define MAX_READ_SIZE 512
 825
 826 typedef struct DefaultTZInfo {
 827     char* defaultTZBuffer;
 828     int64_t defaultTZFileSize;
 829     FILE* defaultTZFilePtr;
 830     UBool defaultTZstatus;
 831     int32_t defaultTZPosition;
 832 } DefaultTZInfo;
 833
 834 /*
 835  * This method compares the two files given to see if they are a match.
 836  * It is currently use to compare two TZ files.
 837  */
 838 static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
 839     FILE* file;
 840     int64_t sizeFile;
 841     int64_t sizeFileLeft;
 842     int32_t sizeFileRead;
 843     int32_t sizeFileToRead;
 844     char bufferFile[MAX_READ_SIZE];
 845     UBool result = TRUE;
 846
 847     if (tzInfo->defaultTZFilePtr == NULL) {
 848         tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
 849     }
 850     file = fopen(TZFileName, "r");
 851
 852     tzInfo->defaultTZPosition = 0; /* reset position to begin search */
 853
 854     if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
 855         /* First check that the file size are equal. */
 856         if (tzInfo->defaultTZFileSize == 0) {
 857             fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
 858             tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
 859         }
 860         fseek(file, 0, SEEK_END);
 861         sizeFile = ftell(file);
 862         sizeFileLeft = sizeFile;
 863
 864         if (sizeFile != tzInfo->defaultTZFileSize) {
 865             result = FALSE;
 866         } else {
 867             /* Store the data from the files in seperate buffers and
 868              * compare each byte to determine equality.
 869              */
 870             if (tzInfo->defaultTZBuffer == NULL) {
 871                 rewind(tzInfo->defaultTZFilePtr);
 872                 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
 873                 fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
 874             }
 875             rewind(file);
 876             while(sizeFileLeft > 0) {
 877                 uprv_memset(bufferFile, 0, MAX_READ_SIZE);
 878                 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
 879
 880                 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
 881                 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
 882                     result = FALSE;
 883                     break;
 884                 }
 885                 sizeFileLeft -= sizeFileRead;
 886                 tzInfo->defaultTZPosition += sizeFileRead;
 887             }
 888         }
 889     } else {
 890         result = FALSE;
 891     }
 892
 893     if (file != NULL) {
 894         fclose(file);
 895     }
 896
 897     return result;
 898 }
 899 /*
 900  * This method recursively traverses the directory given for a matching TZ file and returns the first match.
 901  */
 902 /* dirent also lists two entries: "." and ".." that we can safely ignore. */
 903 #define SKIP1 "."
 904 #define SKIP2 ".."
 905 static char SEARCH_TZFILE_RESULT[MAX_PATH_SIZE] = "";
 906 static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
 907     char curpath[MAX_PATH_SIZE];
 908     DIR* dirp = opendir(path);
 909     DIR* subDirp = NULL;
 910     struct dirent* dirEntry = NULL;
 911
 912     char* result = NULL;
 913     if (dirp == NULL) {
 914         return result;
 915     }
 916
 917     /* Save the current path */
 918     uprv_memset(curpath, 0, MAX_PATH_SIZE);
 919     uprv_strcpy(curpath, path);
 920
 921     /* Check each entry in the directory. */
 922     while((dirEntry = readdir(dirp)) != NULL) {
 923         const char* dirName = dirEntry->d_name;
 924         if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) {
 925             /* Create a newpath with the new entry to test each entry in the directory. */
 926             char newpath[MAX_PATH_SIZE];
 927             uprv_strcpy(newpath, curpath);
 928             uprv_strcat(newpath, dirName);
 929
 930             if ((subDirp = opendir(newpath)) != NULL) {
 931                 /* If this new path is a directory, make a recursive call with the newpath. */
 932                 closedir(subDirp);
 933                 uprv_strcat(newpath, "/");
 934                 result = searchForTZFile(newpath, tzInfo);
 935                 /*
 936                  Have to get out here. Otherwise, we'd keep looking
 937                  and return the first match in the top-level directory
 938                  if there's a match in the top-level. If not, this function
 939                  would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
 940                  It worked without this in most cases because we have a fallback of calling
 941                  localtime_r to figure out the default timezone.
 942                 */
 943                 if (result != NULL)
 944                     break;
 945             } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
 946                 if(compareBinaryFiles(TZDEFAULT, newpath, tzInfo)) {
 947                     const char* zoneid = newpath + (sizeof(TZZONEINFO)) - 1;
 948                     skipZoneIDPrefix(&zoneid);
 949                     uprv_strcpy(SEARCH_TZFILE_RESULT, zoneid);
 950                     result = SEARCH_TZFILE_RESULT;
 951                     /* Get out after the first one found. */
 952                     break;
 953                 }
 954             }
 955         }
 956     }
 957     closedir(dirp);
 958     return result;
 959 }
 960 #endif
 961 U_CAPI const char* U_EXPORT2
 962 uprv_tzname(int n)
 963 {
 964     const char *tzid = NULL;
 965 #ifdef U_WINDOWS
 966     tzid = uprv_detectWindowsTimeZone();
 967
 968     if (tzid != NULL) {
 969         return tzid;
 970     }
 971 #else
 972
 973 /*#if defined(U_DARWIN)
 974     int ret;
 975
 976     tzid = getenv("TZFILE");
 977     if (tzid != NULL) {
 978         return tzid;
 979     }
 980 #endif*/
 981
 982 /* This code can be temporarily disabled to test tzname resolution later on. */
 983 #ifndef DEBUG_TZNAME
 984     tzid = getenv("TZ");
 985     if (tzid != NULL && isValidOlsonID(tzid))
 986     {
 987         /* This might be a good Olson ID. */
 988         skipZoneIDPrefix(&tzid);
 989         return tzid;
 990     }
 991     /* else U_TZNAME will give a better result. */
 992 #endif
 993
 994 #if defined(CHECK_LOCALTIME_LINK)
 995     /* Caller must handle threading issues */
 996     if (gTimeZoneBufferPtr == NULL) {
 997         /*
 998         This is a trick to look at the name of the link to get the Olson ID
 999         because the tzfile contents is underspecified.
1000         This isn't guaranteed to work because it may not be a symlink.
1001         */
1002         int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer));
1003         if (0 < ret) {
1004             int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO);
1005             gTimeZoneBuffer[ret] = 0;
1006             if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0
1007                 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
1008             {
1009                 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
1010             }
1011         } else {
1012 #if defined(SEARCH_TZFILE)
1013             DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
1014             if (tzInfo != NULL) {
1015                 tzInfo->defaultTZBuffer = NULL;
1016                 tzInfo->defaultTZFileSize = 0;
1017                 tzInfo->defaultTZFilePtr = NULL;
1018                 tzInfo->defaultTZstatus = FALSE;
1019                 tzInfo->defaultTZPosition = 0;
1020
1021                 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
1022
1023                 /* Free previously allocated memory */
1024                 if (tzInfo->defaultTZBuffer != NULL) {
1025                     uprv_free(tzInfo->defaultTZBuffer);
1026                 }
1027                 if (tzInfo->defaultTZFilePtr != NULL) {
1028                     fclose(tzInfo->defaultTZFilePtr);
1029                 }
1030                 uprv_free(tzInfo);
1031             }
1032
1033             if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
1034                 return gTimeZoneBufferPtr;
1035             }
1036 #endif
1037         }
1038     }
1039     else {
1040         return gTimeZoneBufferPtr;
1041     }
1042 #endif
1043 #endif
1044
1045 #ifdef U_TZNAME
1046 #ifdef U_WINDOWS
1047     /* The return value is free'd in timezone.cpp on Windows because
1048      * the other code path returns a pointer to a heap location. */
1049     return uprv_strdup(U_TZNAME[n]);
1050 #else
1051     /*
1052     U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1053     So we remap the abbreviation to an olson ID.
1054
1055     Since Windows exposes a little more timezone information,
1056     we normally don't use this code on Windows because
1057     uprv_detectWindowsTimeZone should have already given the correct answer.
1058     */
1059     {
1060         struct tm juneSol, decemberSol;
1061         int daylightType;
1062         static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
1063         static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
1064
1065         /* This probing will tell us when daylight savings occurs.  */
1066         localtime_r(&juneSolstice, &juneSol);
1067         localtime_r(&decemberSolstice, &decemberSol);
1068         daylightType = ((decemberSol.tm_isdst > 0) << 1) | (juneSol.tm_isdst > 0);
1069         tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
1070         if (tzid != NULL) {
1071             return tzid;
1072         }
1073     }
1074     return U_TZNAME[n];
1075 #endif
1076 #else
1077     return "";
1078 #endif
1079 }
1080
1081 /* Get and set the ICU data directory --------------------------------------- */
1082
1083 static char *gDataDirectory = NULL;
1084 #if U_POSIX_LOCALE
1085  static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
1086 #endif
1087
1088 static UBool U_CALLCONV putil_cleanup(void)
1089 {
1090     if (gDataDirectory && *gDataDirectory) {
1091         uprv_free(gDataDirectory);
1092     }
1093     gDataDirectory = NULL;
1094 #if U_POSIX_LOCALE
1095     if (gCorrectedPOSIXLocale) {
1096         uprv_free(gCorrectedPOSIXLocale);
1097         gCorrectedPOSIXLocale = NULL;
1098     }
1099 #endif
1100     return TRUE;
1101 }
1102
1103 /*
1104  * Set the data directory.
1105  *    Make a copy of the passed string, and set the global data dir to point to it.
1106  *    TODO:  see bug #2849, regarding thread safety.
1107  */
1108 U_CAPI void U_EXPORT2
1109 u_setDataDirectory(const char *directory) {
1110     char *newDataDir;
1111     int32_t length;
1112
1113     if(directory==NULL || *directory==0) {
1114         /* A small optimization to prevent the malloc and copy when the
1115         shared library is used, and this is a way to make sure that NULL
1116         is never returned.
1117         */
1118         newDataDir = (char *)"";
1119     }
1120     else {
1121         length=(int32_t)uprv_strlen(directory);
1122         newDataDir = (char *)uprv_malloc(length + 2);
1123         /* Exit out if newDataDir could not be created. */
1124         if (newDataDir == NULL) {
1125             return;
1126         }
1127         uprv_strcpy(newDataDir, directory);
1128
1129 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1130         {
1131             char *p;
1132             while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
1133                 *p = U_FILE_SEP_CHAR;
1134             }
1135         }
1136 #endif
1137     }
1138
1139     umtx_lock(NULL);
1140     if (gDataDirectory && *gDataDirectory) {
1141         uprv_free(gDataDirectory);
1142     }
1143     gDataDirectory = newDataDir;
1144     ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1145     umtx_unlock(NULL);
1146 }
1147
1148 U_CAPI UBool U_EXPORT2
1149 uprv_pathIsAbsolute(const char *path)
1150 {
1151   if(!path || !*path) {
1152     return FALSE;
1153   }
1154
1155   if(*path == U_FILE_SEP_CHAR) {
1156     return TRUE;
1157   }
1158
1159 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1160   if(*path == U_FILE_ALT_SEP_CHAR) {
1161     return TRUE;
1162   }
1163 #endif
1164
1165 #if defined(U_WINDOWS)
1166   if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1167        ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1168       path[1] == ':' ) {
1169     return TRUE;
1170   }
1171 #endif
1172
1173   return FALSE;
1174 }
1175
1176 /* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1177    until some client wrapper makefiles are updated */
1178 #if defined(U_DARWIN) && TARGET_IPHONE_SIMULATOR
1179 # if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1180 #  define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1181 # endif
1182 #endif
1183
1184 U_CAPI const char * U_EXPORT2
1185 u_getDataDirectory(void) {
1186     const char *path = NULL;
1187 #if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1188     char datadir_path_buffer[PATH_MAX];
1189 #endif
1190
1191     /* if we have the directory, then return it immediately */
1192     UMTX_CHECK(NULL, gDataDirectory, path);
1193
1194     if(path) {
1195         return path;
1196     }
1197
1198     /*
1199     When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1200     override ICU's data with the ICU_DATA environment variable. This prevents
1201     problems where multiple custom copies of ICU's specific version of data
1202     are installed on a system. Either the application must define the data
1203     directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1204     ICU, set the data with udata_setCommonData or trust that all of the
1205     required data is contained in ICU's data library that contains
1206     the entry point defined by U_ICUDATA_ENTRY_POINT.
1207
1208     There may also be some platforms where environment variables
1209     are not allowed.
1210     */
1211 #   if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1212     /* First try to get the environment variable */
1213     path=getenv("ICU_DATA");
1214 #   endif
1215
1216     /* ICU_DATA_DIR may be set as a compile option.
1217      * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1218      * and is used only when data is built in archive mode eliminating the need
1219      * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1220      * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1221      * set their own path.
1222      */
1223 #if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
1224     if(path==NULL || *path==0) {
1225 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1226         const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
1227 # endif
1228 # ifdef ICU_DATA_DIR
1229         path=ICU_DATA_DIR;
1230 # else
1231         path=U_ICU_DATA_DEFAULT_DIR;
1232 # endif
1233 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1234         if (prefix != NULL) {
1235             snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path);
1236             path=datadir_path_buffer;
1237         }
1238 # endif
1239     }
1240 #endif
1241
1242     if(path==NULL) {
1243         /* It looks really bad, set it to something. */
1244         path = "";
1245     }
1246
1247     u_setDataDirectory(path);
1248     return gDataDirectory;
1249 }
1250
1251
1252
1253
1254
1255 /* Macintosh-specific locale information ------------------------------------ */
1256 #ifdef XP_MAC
1257
1258 typedef struct {
1259     int32_t script;
1260     int32_t region;
1261     int32_t lang;
1262     int32_t date_region;
1263     const char* posixID;
1264 } mac_lc_rec;
1265
1266 /* Todo: This will be updated with a newer version from www.unicode.org web
1267    page when it's available.*/
1268 #define MAC_LC_MAGIC_NUMBER -5
1269 #define MAC_LC_INIT_NUMBER -9
1270
1271 static const mac_lc_rec mac_lc_recs[] = {
1272     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US",
1273     /* United States*/
1274     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR",
1275     /* France*/
1276     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB",
1277     /* Great Britain*/
1278     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE",
1279     /* Germany*/
1280     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT",
1281     /* Italy*/
1282     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL",
1283     /* Metherlands*/
1284     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE",
1285     /* French for Belgium or Lxembourg*/
1286     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE",
1287     /* Sweden*/
1288     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK",
1289     /* Denmark*/
1290     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT",
1291     /* Portugal*/
1292     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA",
1293     /* French Canada*/
1294     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS",
1295     /* Israel*/
1296     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP",
1297     /* Japan*/
1298     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU",
1299     /* Australia*/
1300     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE",
1301     /* the Arabic world (?)*/
1302     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI",
1303     /* Finland*/
1304     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH",
1305     /* French for Switzerland*/
1306     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH",
1307     /* German for Switzerland*/
1308     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR",
1309     /* Greece*/
1310     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS",
1311     /* Iceland ===*/
1312     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
1313     /* Malta ===*/
1314     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
1315     /* Cyprus ===*/
1316     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR",
1317     /* Turkey ===*/
1318     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU",
1319     /* Croatian system for Yugoslavia*/
1320     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
1321     /* Hindi system for India*/
1322     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
1323     /* Pakistan*/
1324     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT",
1325     /* Lithuania*/
1326     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL",
1327     /* Poland*/
1328     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU",
1329     /* Hungary*/
1330     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE",
1331     /* Estonia*/
1332     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV",
1333     /* Latvia*/
1334     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
1335     /* Lapland  [Ask Rich for the data. HS]*/
1336     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
1337     /* Faeroe Islands*/
1338     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR",
1339     /* Iran*/
1340     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU",
1341     /* Russia*/
1342     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE",
1343     /* Ireland*/
1344     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR",
1345     /* Korea*/
1346     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN",
1347     /* People's Republic of China*/
1348     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW",
1349     /* Taiwan*/
1350     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH",
1351     /* Thailand*/
1352
1353     /* fallback is en_US*/
1354     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER,
1355     MAC_LC_MAGIC_NUMBER, "en_US"
1356 };
1357
1358 #endif
1359
1360 #if U_POSIX_LOCALE
1361 /* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1362  * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1363  * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1364  */
1365 static const char *uprv_getPOSIXIDForCategory(int category)
1366 {
1367     const char* posixID = NULL;
1368     if (category == LC_MESSAGES || category == LC_CTYPE) {
1369         /*
1370         * On Solaris two different calls to setlocale can result in
1371         * different values. Only get this value once.
1372         *
1373         * We must check this first because an application can set this.
1374         *
1375         * LC_ALL can't be used because it's platform dependent. The LANG
1376         * environment variable seems to affect LC_CTYPE variable by default.
1377         * Here is what setlocale(LC_ALL, NULL) can return.
1378         * HPUX can return 'C C C C C C C'
1379         * Solaris can return /en_US/C/C/C/C/C on the second try.
1380         * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1381         *
1382         * The default codepage detection also needs to use LC_CTYPE.
1383         *
1384         * Do not call setlocale(LC_*, "")! Using an empty string instead
1385         * of NULL, will modify the libc behavior.
1386         */
1387         posixID = setlocale(category, NULL);
1388         if ((posixID == 0)
1389             || (uprv_strcmp("C", posixID) == 0)
1390             || (uprv_strcmp("POSIX", posixID) == 0))
1391         {
1392             /* Maybe we got some garbage.  Try something more reasonable */
1393             posixID = getenv("LC_ALL");
1394             if (posixID == 0) {
1395                 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1396                 if (posixID == 0) {
1397                     posixID = getenv("LANG");
1398                 }
1399             }
1400         }
1401     }
1402     if ((posixID==0)
1403         || (uprv_strcmp("C", posixID) == 0)
1404         || (uprv_strcmp("POSIX", posixID) == 0))
1405     {
1406         /* Nothing worked.  Give it a nice POSIX default value. */
1407         posixID = "en_US_POSIX";
1408     }
1409     return posixID;
1410 }
1411
1412 /* Return just the POSIX id for the default locale, whatever happens to be in
1413  * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1414  */
1415 static const char *uprv_getPOSIXIDForDefaultLocale(void)
1416 {
1417     static const char* posixID = NULL;
1418     if (posixID == 0) {
1419         posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
1420     }
1421     return posixID;
1422 }
1423
1424 /* Return just the POSIX id for the default codepage, whatever happens to be in
1425  * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1426  */
1427 static const char *uprv_getPOSIXIDForDefaultCodepage(void)
1428 {
1429     static const char* posixID = NULL;
1430     if (posixID == 0) {
1431         posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
1432     }
1433     return posixID;
1434 }
1435 #endif
1436
1437 /* NOTE: The caller should handle thread safety */
1438 U_CAPI const char* U_EXPORT2
1439 uprv_getDefaultLocaleID()
1440 {
1441 #if U_POSIX_LOCALE
1442 /*
1443   Note that:  (a '!' means the ID is improper somehow)
1444      LC_ALL  ---->     default_loc          codepage
1445 --------------------------------------------------------
1446      ab.CD             ab                   CD
1447      ab@CD             ab__CD               -
1448      ab@CD.EF          ab__CD               EF
1449
1450      ab_CD.EF@GH       ab_CD_GH             EF
1451
1452 Some 'improper' ways to do the same as above:
1453   !  ab_CD@GH.EF       ab_CD_GH             EF
1454   !  ab_CD.EF@GH.IJ    ab_CD_GH             EF
1455   !  ab_CD@ZZ.EF@GH.IJ ab_CD_GH             EF
1456
1457      _CD@GH            _CD_GH               -
1458      _CD.EF@GH         _CD_GH               EF
1459
1460 The variant cannot have dots in it.
1461 The 'rightmost' variant (@xxx) wins.
1462 The leftmost codepage (.xxx) wins.
1463 */
1464     char *correctedPOSIXLocale = 0;
1465     const char* posixID = uprv_getPOSIXIDForDefaultLocale();
1466     const char *p;
1467     const char *q;
1468     int32_t len;
1469
1470     /* Format: (no spaces)
1471     ll [ _CC ] [ . MM ] [ @ VV]
1472
1473       l = lang, C = ctry, M = charmap, V = variant
1474     */
1475
1476     if (gCorrectedPOSIXLocale != NULL) {
1477         return gCorrectedPOSIXLocale;
1478     }
1479
1480     if ((p = uprv_strchr(posixID, '.')) != NULL) {
1481         /* assume new locale can't be larger than old one? */
1482         correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1483         /* Exit on memory allocation error. */
1484         if (correctedPOSIXLocale == NULL) {
1485             return NULL;
1486         }
1487         uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1488         correctedPOSIXLocale[p-posixID] = 0;
1489
1490         /* do not copy after the @ */
1491         if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
1492             correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
1493         }
1494     }
1495
1496     /* Note that we scan the *uncorrected* ID. */
1497     if ((p = uprv_strrchr(posixID, '@')) != NULL) {
1498         if (correctedPOSIXLocale == NULL) {
1499             correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1500             /* Exit on memory allocation error. */
1501             if (correctedPOSIXLocale == NULL) {
1502                 return NULL;
1503             }
1504             uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1505             correctedPOSIXLocale[p-posixID] = 0;
1506         }
1507         p++;
1508
1509         /* Take care of any special cases here.. */
1510         if (!uprv_strcmp(p, "nynorsk")) {
1511             p = "NY";
1512             /* Don't worry about no__NY. In practice, it won't appear. */
1513         }
1514
1515         if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
1516             uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
1517         }
1518         else {
1519             uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1520         }
1521
1522         if ((q = uprv_strchr(p, '.')) != NULL) {
1523             /* How big will the resulting string be? */
1524             len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1525             uprv_strncat(correctedPOSIXLocale, p, q-p);
1526             correctedPOSIXLocale[len] = 0;
1527         }
1528         else {
1529             /* Anything following the @ sign */
1530             uprv_strcat(correctedPOSIXLocale, p);
1531         }
1532
1533         /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1534          * How about 'russian' -> 'ru'?
1535          * Many of the other locales using ISO codes will be handled by the
1536          * canonicalization functions in uloc_getDefault.
1537          */
1538     }
1539
1540     /* Was a correction made? */
1541     if (correctedPOSIXLocale != NULL) {
1542         posixID = correctedPOSIXLocale;
1543     }
1544     else {
1545         /* copy it, just in case the original pointer goes away.  See j2395 */
1546         correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
1547         /* Exit on memory allocation error. */
1548         if (correctedPOSIXLocale == NULL) {
1549             return NULL;
1550         }
1551         posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
1552     }
1553
1554     if (gCorrectedPOSIXLocale == NULL) {
1555         gCorrectedPOSIXLocale = correctedPOSIXLocale;
1556         ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1557         correctedPOSIXLocale = NULL;
1558     }
1559
1560     if (correctedPOSIXLocale != NULL) {  /* Was already set - clean up. */
1561         uprv_free(correctedPOSIXLocale);
1562     }
1563
1564     return posixID;
1565
1566 #elif defined(U_WINDOWS)
1567     UErrorCode status = U_ZERO_ERROR;
1568     LCID id = GetThreadLocale();
1569     const char* locID = uprv_convertToPosix(id, &status);
1570
1571     if (U_FAILURE(status)) {
1572         locID = "en_US";
1573     }
1574     return locID;
1575
1576 #elif defined(XP_MAC)
1577     int32_t script = MAC_LC_INIT_NUMBER;
1578     /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
1579     int32_t region = MAC_LC_INIT_NUMBER;
1580     /* = GetScriptManagerVariable(smRegionCode);*/
1581     int32_t lang = MAC_LC_INIT_NUMBER;
1582     /* = GetScriptManagerVariable(smScriptLang);*/
1583     int32_t date_region = MAC_LC_INIT_NUMBER;
1584     const char* posixID = 0;
1585     int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec);
1586     int32_t i;
1587     Intl1Hndl ih;
1588
1589     ih = (Intl1Hndl) GetIntlResource(1);
1590     if (ih)
1591         date_region = ((uint16_t)(*ih)->intl1Vers) >> 8;
1592
1593     for (i = 0; i < count; i++) {
1594         if (   ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER)
1595              || (mac_lc_recs[i].script == script))
1596             && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER)
1597              || (mac_lc_recs[i].region == region))
1598             && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER)
1599              || (mac_lc_recs[i].lang == lang))
1600             && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER)
1601              || (mac_lc_recs[i].date_region == date_region))
1602             )
1603         {
1604             posixID = mac_lc_recs[i].posixID;
1605             break;
1606         }
1607     }
1608
1609     return posixID;
1610
1611 #elif defined(OS400)
1612     /* locales are process scoped and are by definition thread safe */
1613     static char correctedLocale[64];
1614     const  char *localeID = getenv("LC_ALL");
1615            char *p;
1616
1617     if (localeID == NULL)
1618         localeID = getenv("LANG");
1619     if (localeID == NULL)
1620         localeID = setlocale(LC_ALL, NULL);
1621     /* Make sure we have something... */
1622     if (localeID == NULL)
1623         return "en_US_POSIX";
1624
1625     /* Extract the locale name from the path. */
1626     if((p = uprv_strrchr(localeID, '/')) != NULL)
1627     {
1628         /* Increment p to start of locale name. */
1629         p++;
1630         localeID = p;
1631     }
1632
1633     /* Copy to work location. */
1634     uprv_strcpy(correctedLocale, localeID);
1635
1636     /* Strip off the '.locale' extension. */
1637     if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1638         *p = 0;
1639     }
1640
1641     /* Upper case the locale name. */
1642     T_CString_toUpperCase(correctedLocale);
1643
1644     /* See if we are using the POSIX locale.  Any of the
1645     * following are equivalent and use the same QLGPGCMA
1646     * (POSIX) locale.
1647     * QLGPGCMA2 means UCS2
1648     * QLGPGCMA_4 means UTF-32
1649     * QLGPGCMA_8 means UTF-8
1650     */
1651     if ((uprv_strcmp("C", correctedLocale) == 0) ||
1652         (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1653         (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1654     {
1655         uprv_strcpy(correctedLocale, "en_US_POSIX");
1656     }
1657     else
1658     {
1659         int16_t LocaleLen;
1660
1661         /* Lower case the lang portion. */
1662         for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1663         {
1664             *p = uprv_tolower(*p);
1665         }
1666
1667         /* Adjust for Euro.  After '_E' add 'URO'. */
1668         LocaleLen = uprv_strlen(correctedLocale);
1669         if (correctedLocale[LocaleLen - 2] == '_' &&
1670             correctedLocale[LocaleLen - 1] == 'E')
1671         {
1672             uprv_strcat(correctedLocale, "URO");
1673         }
1674
1675         /* If using Lotus-based locale then convert to
1676          * equivalent non Lotus.
1677          */
1678         else if (correctedLocale[LocaleLen - 2] == '_' &&
1679             correctedLocale[LocaleLen - 1] == 'L')
1680         {
1681             correctedLocale[LocaleLen - 2] = 0;
1682         }
1683
1684         /* There are separate simplified and traditional
1685          * locales called zh_HK_S and zh_HK_T.
1686          */
1687         else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1688         {
1689             uprv_strcpy(correctedLocale, "zh_HK");
1690         }
1691
1692         /* A special zh_CN_GBK locale...
1693         */
1694         else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1695         {
1696             uprv_strcpy(correctedLocale, "zh_CN");
1697         }
1698
1699     }
1700
1701     return correctedLocale;
1702 #endif
1703
1704 }
1705
1706 #if !U_CHARSET_IS_UTF8
1707 #if U_POSIX_LOCALE
1708 /*
1709 Due to various platform differences, one platform may specify a charset,
1710 when they really mean a different charset. Remap the names so that they are
1711 compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1712 here. Before adding anything to this function, please consider adding unique
1713 names to the ICU alias table in the data directory.
1714 */
1715 static const char*
1716 remapPlatformDependentCodepage(const char *locale, const char *name) {
1717     if (locale != NULL && *locale == 0) {
1718         /* Make sure that an empty locale is handled the same way. */
1719         locale = NULL;
1720     }
1721     if (name == NULL) {
1722         return NULL;
1723     }
1724 #if defined(U_AIX)
1725     if (uprv_strcmp(name, "IBM-943") == 0) {
1726         /* Use the ASCII compatible ibm-943 */
1727         name = "Shift-JIS";
1728     }
1729     else if (uprv_strcmp(name, "IBM-1252") == 0) {
1730         /* Use the windows-1252 that contains the Euro */
1731         name = "IBM-5348";
1732     }
1733 #elif defined(U_SOLARIS)
1734     if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1735         /* Solaris underspecifies the "EUC" name. */
1736         if (uprv_strcmp(locale, "zh_CN") == 0) {
1737             name = "EUC-CN";
1738         }
1739         else if (uprv_strcmp(locale, "zh_TW") == 0) {
1740             name = "EUC-TW";
1741         }
1742         else if (uprv_strcmp(locale, "ko_KR") == 0) {
1743             name = "EUC-KR";
1744         }
1745     }
1746     else if (uprv_strcmp(name, "eucJP") == 0) {
1747         /*
1748         ibm-954 is the best match.
1749         ibm-33722 is the default for eucJP (similar to Windows).
1750         */
1751         name = "eucjis";
1752     }
1753     else if (uprv_strcmp(name, "646") == 0) {
1754         /*
1755          * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
1756          * ISO-8859-1 instead of US-ASCII(646).
1757          */
1758         name = "ISO-8859-1";
1759     }
1760 #elif defined(U_DARWIN)
1761     if (locale == NULL && *name == 0) {
1762         /*
1763         No locale was specified, and an empty name was passed in.
1764         This usually indicates that nl_langinfo didn't return valid information.
1765         Mac OS X uses UTF-8 by default (especially the locale data and console).
1766         */
1767         name = "UTF-8";
1768     }
1769     else if (uprv_strcmp(name, "CP949") == 0) {
1770         /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1771         name = "EUC-KR";
1772     }
1773     else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
1774         /*
1775          * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1776          */
1777         name = "UTF-8";
1778     }
1779 #elif defined(U_BSD)
1780     if (uprv_strcmp(name, "CP949") == 0) {
1781         /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1782         name = "EUC-KR";
1783     }
1784 #elif defined(U_HPUX)
1785     if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
1786         /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
1787         /* zh_TW.big5 is not the same charset as zh_HK.big5! */
1788         name = "hkbig5";
1789     }
1790     else if (uprv_strcmp(name, "eucJP") == 0) {
1791         /*
1792         ibm-1350 is the best match, but unavailable.
1793         ibm-954 is mostly a superset of ibm-1350.
1794         ibm-33722 is the default for eucJP (similar to Windows).
1795         */
1796         name = "eucjis";
1797     }
1798 #elif defined(U_LINUX)
1799     if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
1800         /* Linux underspecifies the "EUC" name. */
1801         if (uprv_strcmp(locale, "korean") == 0) {
1802             name = "EUC-KR";
1803         }
1804         else if (uprv_strcmp(locale, "japanese") == 0) {
1805             /* See comment below about eucJP */
1806             name = "eucjis";
1807         }
1808     }
1809     else if (uprv_strcmp(name, "eucjp") == 0) {
1810         /*
1811         ibm-1350 is the best match, but unavailable.
1812         ibm-954 is mostly a superset of ibm-1350.
1813         ibm-33722 is the default for eucJP (similar to Windows).
1814         */
1815         name = "eucjis";
1816     }
1817     else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
1818             (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
1819         /*
1820          * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1821          */
1822         name = "UTF-8";
1823     }
1824     /*
1825      * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
1826      * it by falling back to 'US-ASCII' when NULL is returned from this
1827      * function. So, we don't have to worry about it here.
1828      */
1829 #endif
1830     /* return NULL when "" is passed in */
1831     if (*name == 0) {
1832         name = NULL;
1833     }
1834     return name;
1835 }
1836
1837 static const char*
1838 getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
1839 {
1840     char localeBuf[100];
1841     const char *name = NULL;
1842     char *variant = NULL;
1843
1844     if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
1845         size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
1846         uprv_strncpy(localeBuf, localeName, localeCapacity);
1847         localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
1848         name = uprv_strncpy(buffer, name+1, buffCapacity);
1849         buffer[buffCapacity-1] = 0; /* ensure NULL termination */
1850         if ((variant = (uprv_strchr(name, '@'))) != NULL) {
1851             *variant = 0;
1852         }
1853         name = remapPlatformDependentCodepage(localeBuf, name);
1854     }
1855     return name;
1856 }
1857 #endif
1858
1859 static const char*
1860 int_getDefaultCodepage()
1861 {
1862 #if defined(OS400)
1863     uint32_t ccsid = 37; /* Default to ibm-37 */
1864     static char codepage[64];
1865     Qwc_JOBI0400_t jobinfo;
1866     Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
1867
1868     EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
1869         "*                         ", "                ", &error);
1870
1871     if (error.Bytes_Available == 0) {
1872         if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
1873             ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
1874         }
1875         else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
1876             ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
1877         }
1878         /* else use the default */
1879     }
1880     sprintf(codepage,"ibm-%d", ccsid);
1881     return codepage;
1882
1883 #elif defined(OS390)
1884     static char codepage[64];
1885
1886     strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
1887     strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
1888     codepage[63] = 0; /* NULL terminate */
1889
1890     return codepage;
1891
1892 #elif defined(XP_MAC)
1893     return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
1894
1895 #elif defined(U_WINDOWS)
1896     static char codepage[64];
1897     sprintf(codepage, "windows-%d", GetACP());
1898     return codepage;
1899
1900 #elif U_POSIX_LOCALE
1901     static char codesetName[100];
1902     const char *localeName = NULL;
1903     const char *name = NULL;
1904
1905     localeName = uprv_getPOSIXIDForDefaultCodepage();
1906     uprv_memset(codesetName, 0, sizeof(codesetName));
1907 #if U_HAVE_NL_LANGINFO_CODESET
1908     /* When available, check nl_langinfo first because it usually gives more
1909        useful names. It depends on LC_CTYPE.
1910        nl_langinfo may use the same buffer as setlocale. */
1911     {
1912         const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
1913 #if defined(U_DARWIN) || defined(U_LINUX)
1914         /*
1915          * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
1916          * instead of ASCII.
1917          */
1918         if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
1919             codeset = remapPlatformDependentCodepage(localeName, codeset);
1920         } else
1921 #endif
1922         {
1923             codeset = remapPlatformDependentCodepage(NULL, codeset);
1924         }
1925
1926         if (codeset != NULL) {
1927             uprv_strncpy(codesetName, codeset, sizeof(codesetName));
1928             codesetName[sizeof(codesetName)-1] = 0;
1929             return codesetName;
1930         }
1931     }
1932 #endif
1933
1934     /* Use setlocale in a nice way, and then check some environment variables.
1935        Maybe the application used setlocale already.
1936     */
1937     uprv_memset(codesetName, 0, sizeof(codesetName));
1938     name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
1939     if (name) {
1940         /* if we can find the codeset name from setlocale, return that. */
1941         return name;
1942     }
1943
1944     if (*codesetName == 0)
1945     {
1946         /* Everything failed. Return US ASCII (ISO 646). */
1947         (void)uprv_strcpy(codesetName, "US-ASCII");
1948     }
1949     return codesetName;
1950 #else
1951     return "US-ASCII";
1952 #endif
1953 }
1954
1955
1956 U_CAPI const char*  U_EXPORT2
1957 uprv_getDefaultCodepage()
1958 {
1959     static char const  *name = NULL;
1960     umtx_lock(NULL);
1961     if (name == NULL) {
1962         name = int_getDefaultCodepage();
1963     }
1964     umtx_unlock(NULL);
1965     return name;
1966 }
1967 #endif  /* !U_CHARSET_IS_UTF8 */
1968
1969
1970 /* end of platform-specific implementation -------------- */
1971
1972 /* version handling --------------------------------------------------------- */
1973
1974 U_CAPI void U_EXPORT2
1975 u_versionFromString(UVersionInfo versionArray, const char *versionString) {
1976     char *end;
1977     uint16_t part=0;
1978
1979     if(versionArray==NULL) {
1980         return;
1981     }
1982
1983     if(versionString!=NULL) {
1984         for(;;) {
1985             versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
1986             if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
1987                 break;
1988             }
1989             versionString=end+1;
1990         }
1991     }
1992
1993     while(part<U_MAX_VERSION_LENGTH) {
1994         versionArray[part++]=0;
1995     }
1996 }
1997
1998 U_CAPI void U_EXPORT2
1999 u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
2000     if(versionArray!=NULL && versionString!=NULL) {
2001         char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
2002         int32_t len = u_strlen(versionString);
2003         if(len>U_MAX_VERSION_STRING_LENGTH) {
2004             len = U_MAX_VERSION_STRING_LENGTH;
2005         }
2006         u_UCharsToChars(versionString, versionChars, len);
2007         versionChars[len]=0;
2008         u_versionFromString(versionArray, versionChars);
2009     }
2010 }
2011
2012 U_CAPI void U_EXPORT2
2013 u_versionToString(UVersionInfo versionArray, char *versionString) {
2014     uint16_t count, part;
2015     uint8_t field;
2016
2017     if(versionString==NULL) {
2018         return;
2019     }
2020
2021     if(versionArray==NULL) {
2022         versionString[0]=0;
2023         return;
2024     }
2025
2026     /* count how many fields need to be written */
2027     for(count=4; count>0 && versionArray[count-1]==0; --count) {
2028     }
2029
2030     if(count <= 1) {
2031         count = 2;
2032     }
2033
2034     /* write the first part */
2035     /* write the decimal field value */
2036     field=versionArray[0];
2037     if(field>=100) {
2038         *versionString++=(char)('0'+field/100);
2039         field%=100;
2040     }
2041     if(field>=10) {
2042         *versionString++=(char)('0'+field/10);
2043         field%=10;
2044     }
2045     *versionString++=(char)('0'+field);
2046
2047     /* write the following parts */
2048     for(part=1; part<count; ++part) {
2049         /* write a dot first */
2050         *versionString++=U_VERSION_DELIMITER;
2051
2052         /* write the decimal field value */
2053         field=versionArray[part];
2054         if(field>=100) {
2055             *versionString++=(char)('0'+field/100);
2056             field%=100;
2057         }
2058         if(field>=10) {
2059             *versionString++=(char)('0'+field/10);
2060             field%=10;
2061         }
2062         *versionString++=(char)('0'+field);
2063     }
2064
2065     /* NUL-terminate */
2066     *versionString=0;
2067 }
2068
2069 U_CAPI void U_EXPORT2
2070 u_getVersion(UVersionInfo versionArray) {
2071     u_versionFromString(versionArray, U_ICU_VERSION);
2072 }
2073
2074 /**
2075  * icucfg.h dependent code
2076  */
2077
2078 #if U_ENABLE_DYLOAD
2079
2080 #if defined(U_CHECK_DYLOAD)
2081
2082 #if defined(HAVE_DLOPEN)
2083
2084 #ifdef HAVE_DLFCN_H
2085 #ifdef __MVS__
2086 #ifndef __SUSV3
2087 #define __SUSV3 1
2088 #endif
2089 #endif
2090 #include <dlfcn.h>
2091 #endif
2092
2093 U_INTERNAL void * U_EXPORT2
2094 uprv_dl_open(const char *libName, UErrorCode *status) {
2095   void *ret = NULL;
2096   if(U_FAILURE(*status)) return ret;
2097   ret =  dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
2098   if(ret==NULL) {
2099 #ifndef U_TRACE_DYLOAD
2100     perror("dlopen");
2101 #endif
2102     *status = U_MISSING_RESOURCE_ERROR;
2103   }
2104   return ret;
2105 }
2106
2107 U_INTERNAL void U_EXPORT2
2108 uprv_dl_close(void *lib, UErrorCode *status) {
2109   if(U_FAILURE(*status)) return;
2110   dlclose(lib);
2111 }
2112
2113 U_INTERNAL void* U_EXPORT2
2114 uprv_dl_sym(void *lib, const char* sym, UErrorCode *status) {
2115   void *ret = NULL;
2116   if(U_FAILURE(*status)) return ret;
2117   ret = dlsym(lib, sym);
2118   if(ret == NULL) {
2119     *status = U_MISSING_RESOURCE_ERROR;
2120   }
2121   return ret;
2122 }
2123
2124 #else
2125
2126 /* null (nonexistent) implementation. */
2127
2128 U_INTERNAL void * U_EXPORT2
2129 uprv_dl_open(const char *libName, UErrorCode *status) {
2130   if(U_FAILURE(*status)) return NULL;
2131   *status = U_UNSUPPORTED_ERROR;
2132   return NULL;
2133 }
2134
2135 U_INTERNAL void U_EXPORT2
2136 uprv_dl_close(void *lib, UErrorCode *status) {
2137   if(U_FAILURE(*status)) return;
2138   *status = U_UNSUPPORTED_ERROR;
2139   return;
2140 }
2141
2142
2143 U_INTERNAL void* U_EXPORT2
2144 uprv_dl_sym(void *lib, const char* sym, UErrorCode *status) {
2145   if(U_FAILURE(*status)) return NULL;
2146   *status = U_UNSUPPORTED_ERROR;
2147   return NULL;
2148 }
2149
2150
2151
2152 #endif
2153
2154 #elif defined U_WINDOWS
2155
2156 U_INTERNAL void * U_EXPORT2
2157 uprv_dl_open(const char *libName, UErrorCode *status) {
2158   HMODULE lib = NULL;
2159
2160   if(U_FAILURE(*status)) return NULL;
2161
2162   lib = LoadLibrary(libName);
2163
2164   if(lib==NULL) {
2165     *status = U_MISSING_RESOURCE_ERROR;
2166   }
2167
2168   return (void*)lib;
2169 }
2170
2171 U_INTERNAL void U_EXPORT2
2172 uprv_dl_close(void *lib, UErrorCode *status) {
2173   HMODULE handle = (HMODULE)lib;
2174   if(U_FAILURE(*status)) return;
2175
2176   FreeLibrary(handle);
2177
2178   return;
2179 }
2180
2181
2182 U_INTERNAL void* U_EXPORT2
2183 uprv_dl_sym(void *lib, const char* sym, UErrorCode *status) {
2184   HMODULE handle = (HMODULE)lib;
2185   void * addr = NULL;
2186
2187   if(U_FAILURE(*status) || lib==NULL) return NULL;
2188
2189   addr = GetProcAddress(handle, sym);
2190
2191   if(addr==NULL) {
2192     DWORD lastError = GetLastError();
2193     if(lastError == ERROR_PROC_NOT_FOUND) {
2194       *status = U_MISSING_RESOURCE_ERROR;
2195     } else {
2196       *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
2197     }
2198   }
2199
2200   return addr;
2201 }
2202
2203
2204 #else
2205
2206 /* No dynamic loading set. */
2207
2208 U_INTERNAL void * U_EXPORT2
2209 uprv_dl_open(const char *libName, UErrorCode *status) {
2210     if(U_FAILURE(*status)) return NULL;
2211     *status = U_UNSUPPORTED_ERROR;
2212     return NULL;
2213 }
2214
2215 U_INTERNAL void U_EXPORT2
2216 uprv_dl_close(void *lib, UErrorCode *status) {
2217     if(U_FAILURE(*status)) return;
2218     *status = U_UNSUPPORTED_ERROR;
2219     return;
2220 }
2221
2222
2223 U_INTERNAL void* U_EXPORT2
2224 uprv_dl_sym(void *lib, const char* sym, UErrorCode *status) {
2225     if(U_FAILURE(*status)) return NULL;
2226     *status = U_UNSUPPORTED_ERROR;
2227     return NULL;
2228 }
2229
2230
2231 #endif
2232
2233 #endif /* U_ENABLE_DYLOAD */
2234
2235 /*
2236  * Hey, Emacs, please set the following:
2237  *
2238  * Local Variables:
2239  * indent-tabs-mode: nil
2240  * End:
2241  *
2242  */