2 ******************************************************************************
4 * Copyright (C) 1997-2013, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
9 * FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
11 * Date Name Description
12 * 04/14/97 aliu Creation.
13 * 04/24/97 aliu Added getDefaultDataDirectory() and
14 * getDefaultLocaleID().
15 * 04/28/97 aliu Rewritten to assume Unix and apply general methods
16 * for assumed case. Non-UNIX platforms must be
17 * special-cased. Rewrote numeric methods dealing
18 * with NaN and Infinity to be platform independent
19 * over all IEEE 754 platforms.
20 * 05/13/97 aliu Restored sign of timezone
21 * (semantics are hours West of GMT)
22 * 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
24 * 07/22/98 stephen Added remainder, max, min, trunc
25 * 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
26 * 08/24/98 stephen Added longBitsFromDouble
27 * 09/08/98 stephen Minor changes for Mac Port
28 * 03/02/99 stephen Removed openFile(). Added AS400 support.
30 * 04/15/99 stephen Converted to C.
31 * 06/28/99 stephen Removed mutex locking in u_isBigEndian().
32 * 08/04/99 jeffrey R. Added OS/2 changes
33 * 11/15/99 helena Integrated S/390 IEEE support.
34 * 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
35 * 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
36 * 01/03/08 Steven L. Fake Time Support
37 ******************************************************************************
40 // Defines _XOPEN_SOURCE for access to POSIX functions.
41 // Must be before any other #includes.
42 #include "uposixdefs.h"
44 /* include ICU headers */
45 #include "unicode/utypes.h"
46 #include "unicode/putil.h"
47 #include "unicode/ustring.h"
56 /* Include standard headers. */
64 #ifndef U_COMMON_IMPLEMENTATION
65 #error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu
69 /* include system headers */
70 #if U_PLATFORM_USES_ONLY_WIN32_API
72 * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW.
73 * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API)
74 * to use native APIs as much as possible?
76 # define WIN32_LEAN_AND_MEAN
84 #elif U_PLATFORM == U_PF_OS400
86 # include <qusec.h> /* error code structure */
87 # include <qusrjobi.h>
88 # include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
89 # include <mih/testptr.h> /* For uprv_maximumPtr */
90 #elif U_PLATFORM == U_PF_CLASSIC_MACOS
92 # include <IntlResources.h>
95 # include <MacTypes.h>
96 # include <TextUtils.h>
97 # define ICU_NO_USER_DATA_OVERRIDE 1
98 #elif U_PLATFORM == U_PF_OS390
99 # include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
100 #elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS
103 # if U_PLATFORM == U_PF_SOLARIS
108 #elif U_PLATFORM == U_PF_QNX
109 # include <sys/neutrino.h>
112 #if (U_PF_MINGW <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(__STRICT_ANSI__)
113 /* tzset isn't defined in strict ANSI on Cygwin and MinGW. */
114 #undef __STRICT_ANSI__
118 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
122 #if !U_PLATFORM_USES_ONLY_WIN32_API
123 #include <sys/time.h>
127 * Only include langinfo.h if we have a way to get the codeset. If we later
128 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
132 #if U_HAVE_NL_LANGINFO_CODESET
133 #include <langinfo.h>
137 * Simple things (presence of functions, etc) should just go in configure.in and be added to
138 * icucfg.h via autoheader.
140 #if U_PLATFORM_IMPLEMENTS_POSIX
141 # if U_PLATFORM == U_PF_OS400
142 # define HAVE_DLFCN_H 0
143 # define HAVE_DLOPEN 0
145 # ifndef HAVE_DLFCN_H
146 # define HAVE_DLFCN_H 1
149 # define HAVE_DLOPEN 1
152 # ifndef HAVE_GETTIMEOFDAY
153 # define HAVE_GETTIMEOFDAY 1
156 # define HAVE_DLFCN_H 0
157 # define HAVE_DLOPEN 0
158 # define HAVE_GETTIMEOFDAY 0
161 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
163 /* Define the extension for data files, again... */
164 #define DATA_TYPE "dat"
166 /* Leave this copyright notice here! */
167 static const char copyright
[] = U_COPYRIGHT_STRING
;
169 /* floating point implementations ------------------------------------------- */
171 /* We return QNAN rather than SNAN*/
172 #define SIGN 0x80000000U
174 /* Make it easy to define certain types of constants */
176 int64_t i64
; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
178 } BitPatternConversion
;
179 static const BitPatternConversion gNan
= { (int64_t) INT64_C(0x7FF8000000000000) };
180 static const BitPatternConversion gInf
= { (int64_t) INT64_C(0x7FF0000000000000) };
182 /*---------------------------------------------------------------------------
184 Our general strategy is to assume we're on a POSIX platform. Platforms which
185 are non-POSIX must declare themselves so. The default POSIX implementation
186 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
188 ---------------------------------------------------------------------------*/
190 #if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_CLASSIC_MACOS || U_PLATFORM == U_PF_OS400
191 # undef U_POSIX_LOCALE
193 # define U_POSIX_LOCALE 1
197 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
198 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
202 u_topNBytesOfDouble(double* d
, int n
)
207 return (char*)(d
+ 1) - n
;
212 u_bottomNBytesOfDouble(double* d
, int n
)
215 return (char*)(d
+ 1) - n
;
220 #endif /* !IEEE_754 */
224 u_signBit(double d
) {
227 hiByte
= *(uint8_t *)&d
;
229 hiByte
= *(((uint8_t *)&d
) + sizeof(double) - 1);
231 return (hiByte
& 0x80) != 0;
237 #if defined (U_DEBUG_FAKETIME)
238 /* Override the clock to test things without having to move the system clock.
239 * Assumes POSIX gettimeofday() will function
241 UDate fakeClock_t0
= 0; /** Time to start the clock from **/
242 UDate fakeClock_dt
= 0; /** Offset (fake time - real time) **/
243 UBool fakeClock_set
= FALSE
; /** True if fake clock has spun up **/
244 static UMutex fakeClockMutex
= U_MUTEX_INTIALIZER
;
246 static UDate
getUTCtime_real() {
247 struct timeval posixTime
;
248 gettimeofday(&posixTime
, NULL
);
249 return (UDate
)(((int64_t)posixTime
.tv_sec
* U_MILLIS_PER_SECOND
) + (posixTime
.tv_usec
/1000));
252 static UDate
getUTCtime_fake() {
253 umtx_lock(&fakeClockMutex
);
255 UDate real
= getUTCtime_real();
256 const char *fake_start
= getenv("U_FAKETIME_START");
257 if((fake_start
!=NULL
) && (fake_start
[0]!=0)) {
258 sscanf(fake_start
,"%lf",&fakeClock_t0
);
259 fakeClock_dt
= fakeClock_t0
- real
;
260 fprintf(stderr
,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
261 "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
262 fakeClock_t0
, fake_start
, fakeClock_dt
, real
);
265 fprintf(stderr
,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
266 "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
268 fakeClock_set
= TRUE
;
270 umtx_unlock(&fakeClockMutex
);
272 return getUTCtime_real() + fakeClock_dt
;
276 #if U_PLATFORM_USES_ONLY_WIN32_API
280 } FileTimeConversion
; /* This is like a ULARGE_INTEGER */
282 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
283 #define EPOCH_BIAS INT64_C(116444736000000000)
284 #define HECTONANOSECOND_PER_MILLISECOND 10000
288 /*---------------------------------------------------------------------------
289 Universal Implementations
290 These are designed to work on all platforms. Try these, and if they
291 don't work on your platform, then special case your platform with new
293 ---------------------------------------------------------------------------*/
295 U_CAPI UDate U_EXPORT2
298 #if defined(U_DEBUG_FAKETIME)
299 return getUTCtime_fake(); /* Hook for overriding the clock */
301 return uprv_getRawUTCtime();
305 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
306 U_CAPI UDate U_EXPORT2
309 #if U_PLATFORM == U_PF_CLASSIC_MACOS
313 uprv_memset( &tmrec
, 0, sizeof(tmrec
) );
317 t1
= mktime(&tmrec
); /* seconds of 1/1/1970*/
320 uprv_memcpy( &tmrec
, gmtime(&t
), sizeof(tmrec
) );
321 t2
= mktime(&tmrec
); /* seconds of current GMT*/
322 return (UDate
)(t2
- t1
) * U_MILLIS_PER_SECOND
; /* GMT (or UTC) in seconds since 1970*/
323 #elif U_PLATFORM_USES_ONLY_WIN32_API
325 FileTimeConversion winTime
;
326 GetSystemTimeAsFileTime(&winTime
.fileTime
);
327 return (UDate
)((winTime
.int64
- EPOCH_BIAS
) / HECTONANOSECOND_PER_MILLISECOND
);
330 #if HAVE_GETTIMEOFDAY
331 struct timeval posixTime
;
332 gettimeofday(&posixTime
, NULL
);
333 return (UDate
)(((int64_t)posixTime
.tv_sec
* U_MILLIS_PER_SECOND
) + (posixTime
.tv_usec
/1000));
337 return (UDate
)epochtime
* U_MILLIS_PER_SECOND
;
343 /*-----------------------------------------------------------------------------
345 These methods detect and return NaN and infinity values for doubles
346 conforming to IEEE 754. Platforms which support this standard include X86,
347 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
348 If this doesn't work on your platform, you have non-IEEE floating-point, and
349 will need to code your own versions. A naive implementation is to return 0.0
350 for getNaN and getInfinity, and false for isNaN and isInfinite.
351 ---------------------------------------------------------------------------*/
353 U_CAPI UBool U_EXPORT2
354 uprv_isNaN(double number
)
357 BitPatternConversion convertedNumber
;
358 convertedNumber
.d64
= number
;
359 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
360 return (UBool
)((convertedNumber
.i64
& U_INT64_MAX
) > gInf
.i64
);
362 #elif U_PLATFORM == U_PF_OS390
363 uint32_t highBits
= *(uint32_t*)u_topNBytesOfDouble(&number
,
365 uint32_t lowBits
= *(uint32_t*)u_bottomNBytesOfDouble(&number
,
368 return ((highBits
& 0x7F080000L
) == 0x7F080000L
) &&
369 (lowBits
== 0x00000000L
);
372 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
373 /* you'll need to replace this default implementation with what's correct*/
374 /* for your platform.*/
375 return number
!= number
;
379 U_CAPI UBool U_EXPORT2
380 uprv_isInfinite(double number
)
383 BitPatternConversion convertedNumber
;
384 convertedNumber
.d64
= number
;
385 /* Infinity is exactly 0x7FF0000000000000U. */
386 return (UBool
)((convertedNumber
.i64
& U_INT64_MAX
) == gInf
.i64
);
387 #elif U_PLATFORM == U_PF_OS390
388 uint32_t highBits
= *(uint32_t*)u_topNBytesOfDouble(&number
,
390 uint32_t lowBits
= *(uint32_t*)u_bottomNBytesOfDouble(&number
,
393 return ((highBits
& ~SIGN
) == 0x70FF0000L
) && (lowBits
== 0x00000000L
);
396 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
397 /* value, you'll need to replace this default implementation with what's*/
398 /* correct for your platform.*/
399 return number
== (2.0 * number
);
403 U_CAPI UBool U_EXPORT2
404 uprv_isPositiveInfinity(double number
)
406 #if IEEE_754 || U_PLATFORM == U_PF_OS390
407 return (UBool
)(number
> 0 && uprv_isInfinite(number
));
409 return uprv_isInfinite(number
);
413 U_CAPI UBool U_EXPORT2
414 uprv_isNegativeInfinity(double number
)
416 #if IEEE_754 || U_PLATFORM == U_PF_OS390
417 return (UBool
)(number
< 0 && uprv_isInfinite(number
));
420 uint32_t highBits
= *(uint32_t*)u_topNBytesOfDouble(&number
,
422 return((highBits
& SIGN
) && uprv_isInfinite(number
));
427 U_CAPI
double U_EXPORT2
430 #if IEEE_754 || U_PLATFORM == U_PF_OS390
433 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
434 /* you'll need to replace this default implementation with what's correct*/
435 /* for your platform.*/
440 U_CAPI
double U_EXPORT2
443 #if IEEE_754 || U_PLATFORM == U_PF_OS390
446 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
447 /* value, you'll need to replace this default implementation with what's*/
448 /* correct for your platform.*/
453 U_CAPI
double U_EXPORT2
459 U_CAPI
double U_EXPORT2
465 U_CAPI
double U_EXPORT2
468 return uprv_floor(x
+ 0.5);
471 U_CAPI
double U_EXPORT2
477 U_CAPI
double U_EXPORT2
478 uprv_modf(double x
, double* y
)
483 U_CAPI
double U_EXPORT2
484 uprv_fmod(double x
, double y
)
489 U_CAPI
double U_EXPORT2
490 uprv_pow(double x
, double y
)
492 /* This is declared as "double pow(double x, double y)" */
496 U_CAPI
double U_EXPORT2
497 uprv_pow10(int32_t x
)
499 return pow(10.0, (double)x
);
502 U_CAPI
double U_EXPORT2
503 uprv_fmax(double x
, double y
)
506 /* first handle NaN*/
507 if(uprv_isNaN(x
) || uprv_isNaN(y
))
508 return uprv_getNaN();
510 /* check for -0 and 0*/
511 if(x
== 0.0 && y
== 0.0 && u_signBit(x
))
516 /* this should work for all flt point w/o NaN and Inf special cases */
517 return (x
> y
? x
: y
);
520 U_CAPI
double U_EXPORT2
521 uprv_fmin(double x
, double y
)
524 /* first handle NaN*/
525 if(uprv_isNaN(x
) || uprv_isNaN(y
))
526 return uprv_getNaN();
528 /* check for -0 and 0*/
529 if(x
== 0.0 && y
== 0.0 && u_signBit(y
))
534 /* this should work for all flt point w/o NaN and Inf special cases */
535 return (x
> y
? y
: x
);
539 * Truncates the given double.
540 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
541 * This is different than calling floor() or ceil():
542 * floor(3.3) = 3, floor(-3.3) = -4
543 * ceil(3.3) = 4, ceil(-3.3) = -3
545 U_CAPI
double U_EXPORT2
549 /* handle error cases*/
551 return uprv_getNaN();
552 if(uprv_isInfinite(d
))
553 return uprv_getInfinity();
555 if(u_signBit(d
)) /* Signbit() picks up -0.0; d<0 does not. */
561 return d
>= 0 ? floor(d
) : ceil(d
);
567 * Return the largest positive number that can be represented by an integer
568 * type of arbitrary bit length.
570 U_CAPI
double U_EXPORT2
571 uprv_maxMantissa(void)
573 return pow(2.0, DBL_MANT_DIG
+ 1.0) - 1.0;
576 U_CAPI
double U_EXPORT2
582 U_CAPI
void * U_EXPORT2
583 uprv_maximumPtr(void * base
)
585 #if U_PLATFORM == U_PF_OS400
587 * With the provided function we should never be out of range of a given segment
588 * (a traditional/typical segment that is). Our segments have 5 bytes for the
589 * id and 3 bytes for the offset. The key is that the casting takes care of
590 * only retrieving the offset portion minus x1000. Hence, the smallest offset
591 * seen in a program is x001000 and when casted to an int would be 0.
592 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment.
594 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
595 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information).
596 * This function determines the activation based on the pointer that is passed in and
597 * calculates the appropriate maximum available size for
598 * each pointer type (TERASPACE and non-TERASPACE)
600 * Unlike other operating systems, the pointer model isn't determined at
601 * compile time on i5/OS.
603 if ((base
!= NULL
) && (_TESTPTR(base
, _C_TERASPACE_CHECK
))) {
604 /* if it is a TERASPACE pointer the max is 2GB - 4k */
605 return ((void *)(((char *)base
)-((uint32_t)(base
))+((uint32_t)0x7fffefff)));
607 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
608 return ((void *)(((char *)base
)-((uint32_t)(base
))+((uint32_t)0xffefff)));
611 return U_MAX_PTR(base
);
615 /*---------------------------------------------------------------------------
616 Platform-specific Implementations
617 Try these, and if they don't work on your platform, then special case your
618 platform with new implementations.
619 ---------------------------------------------------------------------------*/
621 /* Generic time zone layer -------------------------------------------------- */
623 /* Time zone utilities */
624 U_CAPI
void U_EXPORT2
630 /* no initialization*/
634 U_CAPI
int32_t U_EXPORT2
646 uprv_memcpy( &tmrec
, localtime(&t
), sizeof(tmrec
) );
647 dst_checked
= (tmrec
.tm_isdst
!= 0); /* daylight savings time is checked*/
648 t1
= mktime(&tmrec
); /* local time in seconds*/
649 uprv_memcpy( &tmrec
, gmtime(&t
), sizeof(tmrec
) );
650 t2
= mktime(&tmrec
); /* GMT (or UTC) in seconds*/
652 /* imitate NT behaviour, which returns same timezone offset to GMT for
654 This does not work on all platforms. For instance, on glibc on Linux
655 and on Mac OS 10.5, tdiff calculated above remains the same
656 regardless of whether DST is in effect or not. However, U_TIMEZONE
657 is defined on those platforms and this code is not reached so that
658 we can leave this alone. If there's a platform behaving
659 like glibc that uses this code, we need to add platform-dependent
660 preprocessor here. */
667 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
668 some platforms need to have it declared here. */
670 #if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED || (U_PLATFORM == U_PF_CYGWIN && !U_PLATFORM_USES_ONLY_WIN32_API))
671 /* RS6000 and others reject char **tzname. */
672 extern U_IMPORT
char *U_TZNAME
[];
675 #if !UCONFIG_NO_FILE_IO && (U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
676 /* These platforms are likely to use Olson timezone IDs. */
677 #define CHECK_LOCALTIME_LINK 1
678 #if U_PLATFORM_IS_DARWIN_BASED
680 #define TZZONEINFO (TZDIR "/")
681 #elif U_PLATFORM == U_PF_SOLARIS
682 #define TZDEFAULT "/etc/localtime"
683 #define TZZONEINFO "/usr/share/lib/zoneinfo/"
684 #define TZ_ENV_CHECK "localtime"
686 #define TZDEFAULT "/etc/localtime"
687 #define TZZONEINFO "/usr/share/zoneinfo/"
690 #define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */
691 /* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
692 symlinked to /etc/localtime, which makes searchForTZFile return
693 'localtime' when it's the first match. */
694 #define TZFILE_SKIP2 "localtime"
695 #define SEARCH_TZFILE
696 #include <dirent.h> /* Needed to search through system timezone files */
698 static char gTimeZoneBuffer
[PATH_MAX
];
699 static char *gTimeZoneBufferPtr
= NULL
;
702 #if !U_PLATFORM_USES_ONLY_WIN32_API
703 #define isNonDigit(ch) (ch < '0' || '9' < ch)
704 static UBool
isValidOlsonID(const char *id
) {
707 /* Determine if this is something like Iceland (Olson ID)
708 or AST4ADT (non-Olson ID) */
709 while (id
[idx
] && isNonDigit(id
[idx
]) && id
[idx
] != ',') {
713 /* If we went through the whole string, then it might be okay.
714 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
715 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
716 The rest of the time it could be an Olson ID. George */
717 return (UBool
)(id
[idx
] == 0
718 || uprv_strcmp(id
, "PST8PDT") == 0
719 || uprv_strcmp(id
, "MST7MDT") == 0
720 || uprv_strcmp(id
, "CST6CDT") == 0
721 || uprv_strcmp(id
, "EST5EDT") == 0);
724 /* On some Unix-like OS, 'posix' subdirectory in
725 /usr/share/zoneinfo replicates the top-level contents. 'right'
726 subdirectory has the same set of files, but individual files
727 are different from those in the top-level directory or 'posix'
728 because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
730 When the first match for /etc/localtime is in either of them
731 (usually in posix because 'right' has different file contents),
732 or TZ environment variable points to one of them, createTimeZone
733 fails because, say, 'posix/America/New_York' is not an Olson
734 timezone id ('America/New_York' is). So, we have to skip
735 'posix/' and 'right/' at the beginning. */
736 static void skipZoneIDPrefix(const char** id
) {
737 if (uprv_strncmp(*id
, "posix/", 6) == 0
738 || uprv_strncmp(*id
, "right/", 6) == 0)
745 #if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API
747 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
748 typedef struct OffsetZoneMapping
{
749 int32_t offsetSeconds
;
750 int32_t daylightType
; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/
756 enum { U_DAYLIGHT_NONE
=0,U_DAYLIGHT_JUNE
=1,U_DAYLIGHT_DECEMBER
=2 };
759 This list tries to disambiguate a set of abbreviated timezone IDs and offsets
760 and maps it to an Olson ID.
761 Before adding anything to this list, take a look at
762 icu/source/tools/tzcode/tz.alias
763 Sometimes no daylight savings (0) is important to define due to aliases.
764 This list can be tested with icu/source/test/compat/tzone.pl
765 More values could be added to daylightType to increase precision.
767 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS
[] = {
768 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
769 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
770 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
771 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
772 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
773 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
774 {-36000, 2, "EST", "EST", "Australia/Sydney"},
775 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
776 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
777 {-34200, 2, "CST", "CST", "Australia/South"},
778 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
779 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
780 {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
781 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
782 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
783 {-28800, 2, "WST", "WST", "Australia/West"},
784 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
785 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
786 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
787 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
788 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
789 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
790 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
791 {-14400, 1, "AZT", "AZST", "Asia/Baku"},
792 {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
793 {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
794 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
795 {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
796 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
797 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
798 {-3600, 0, "CET", "WEST", "Africa/Algiers"},
799 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
800 {0, 1, "GMT", "IST", "Europe/Dublin"},
801 {0, 1, "GMT", "BST", "Europe/London"},
802 {0, 0, "WET", "WEST", "Africa/Casablanca"},
803 {0, 0, "WET", "WET", "Africa/El_Aaiun"},
804 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
805 {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
806 {10800, 1, "PMST", "PMDT", "America/Miquelon"},
807 {10800, 2, "UYT", "UYST", "America/Montevideo"},
808 {10800, 1, "WGT", "WGST", "America/Godthab"},
809 {10800, 2, "BRT", "BRST", "Brazil/East"},
810 {12600, 1, "NST", "NDT", "America/St_Johns"},
811 {14400, 1, "AST", "ADT", "Canada/Atlantic"},
812 {14400, 2, "AMT", "AMST", "America/Cuiaba"},
813 {14400, 2, "CLT", "CLST", "Chile/Continental"},
814 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
815 {14400, 2, "PYT", "PYST", "America/Asuncion"},
816 {18000, 1, "CST", "CDT", "America/Havana"},
817 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
818 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
819 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
820 {21600, 0, "CST", "CDT", "America/Guatemala"},
821 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
822 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
823 {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
824 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
825 {32400, 1, "AKST", "AKDT", "US/Alaska"},
826 {36000, 1, "HAST", "HADT", "US/Aleutian"}
829 /*#define DEBUG_TZNAME*/
831 static const char* remapShortTimeZone(const char *stdID
, const char *dstID
, int32_t daylightType
, int32_t offset
)
835 fprintf(stderr
, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID
, dstID
, daylightType
, offset
);
837 for (idx
= 0; idx
< LENGTHOF(OFFSET_ZONE_MAPPINGS
); idx
++)
839 if (offset
== OFFSET_ZONE_MAPPINGS
[idx
].offsetSeconds
840 && daylightType
== OFFSET_ZONE_MAPPINGS
[idx
].daylightType
841 && strcmp(OFFSET_ZONE_MAPPINGS
[idx
].stdID
, stdID
) == 0
842 && strcmp(OFFSET_ZONE_MAPPINGS
[idx
].dstID
, dstID
) == 0)
844 return OFFSET_ZONE_MAPPINGS
[idx
].olsonID
;
852 #define MAX_PATH_SIZE PATH_MAX /* Set the limit for the size of the path. */
853 #define MAX_READ_SIZE 512
855 typedef struct DefaultTZInfo
{
856 char* defaultTZBuffer
;
857 int64_t defaultTZFileSize
;
858 FILE* defaultTZFilePtr
;
859 UBool defaultTZstatus
;
860 int32_t defaultTZPosition
;
864 * This method compares the two files given to see if they are a match.
865 * It is currently use to compare two TZ files.
867 static UBool
compareBinaryFiles(const char* defaultTZFileName
, const char* TZFileName
, DefaultTZInfo
* tzInfo
) {
870 int64_t sizeFileLeft
;
871 int32_t sizeFileRead
;
872 int32_t sizeFileToRead
;
873 char bufferFile
[MAX_READ_SIZE
];
876 if (tzInfo
->defaultTZFilePtr
== NULL
) {
877 tzInfo
->defaultTZFilePtr
= fopen(defaultTZFileName
, "r");
879 file
= fopen(TZFileName
, "r");
881 tzInfo
->defaultTZPosition
= 0; /* reset position to begin search */
883 if (file
!= NULL
&& tzInfo
->defaultTZFilePtr
!= NULL
) {
884 /* First check that the file size are equal. */
885 if (tzInfo
->defaultTZFileSize
== 0) {
886 fseek(tzInfo
->defaultTZFilePtr
, 0, SEEK_END
);
887 tzInfo
->defaultTZFileSize
= ftell(tzInfo
->defaultTZFilePtr
);
889 fseek(file
, 0, SEEK_END
);
890 sizeFile
= ftell(file
);
891 sizeFileLeft
= sizeFile
;
893 if (sizeFile
!= tzInfo
->defaultTZFileSize
) {
896 /* Store the data from the files in seperate buffers and
897 * compare each byte to determine equality.
899 if (tzInfo
->defaultTZBuffer
== NULL
) {
900 rewind(tzInfo
->defaultTZFilePtr
);
901 tzInfo
->defaultTZBuffer
= (char*)uprv_malloc(sizeof(char) * tzInfo
->defaultTZFileSize
);
902 sizeFileRead
= fread(tzInfo
->defaultTZBuffer
, 1, tzInfo
->defaultTZFileSize
, tzInfo
->defaultTZFilePtr
);
905 while(sizeFileLeft
> 0) {
906 uprv_memset(bufferFile
, 0, MAX_READ_SIZE
);
907 sizeFileToRead
= sizeFileLeft
< MAX_READ_SIZE
? sizeFileLeft
: MAX_READ_SIZE
;
909 sizeFileRead
= fread(bufferFile
, 1, sizeFileToRead
, file
);
910 if (memcmp(tzInfo
->defaultTZBuffer
+ tzInfo
->defaultTZPosition
, bufferFile
, sizeFileRead
) != 0) {
914 sizeFileLeft
-= sizeFileRead
;
915 tzInfo
->defaultTZPosition
+= sizeFileRead
;
929 * This method recursively traverses the directory given for a matching TZ file and returns the first match.
931 /* dirent also lists two entries: "." and ".." that we can safely ignore. */
934 static char SEARCH_TZFILE_RESULT
[MAX_PATH_SIZE
] = "";
935 static char* searchForTZFile(const char* path
, DefaultTZInfo
* tzInfo
) {
936 char curpath
[MAX_PATH_SIZE
];
937 DIR* dirp
= opendir(path
);
939 struct dirent
* dirEntry
= NULL
;
946 /* Save the current path */
947 uprv_memset(curpath
, 0, MAX_PATH_SIZE
);
948 uprv_strcpy(curpath
, path
);
950 /* Check each entry in the directory. */
951 while((dirEntry
= readdir(dirp
)) != NULL
) {
952 const char* dirName
= dirEntry
->d_name
;
953 if (uprv_strcmp(dirName
, SKIP1
) != 0 && uprv_strcmp(dirName
, SKIP2
) != 0) {
954 /* Create a newpath with the new entry to test each entry in the directory. */
955 char newpath
[MAX_PATH_SIZE
];
956 uprv_strcpy(newpath
, curpath
);
957 uprv_strcat(newpath
, dirName
);
959 if ((subDirp
= opendir(newpath
)) != NULL
) {
960 /* If this new path is a directory, make a recursive call with the newpath. */
962 uprv_strcat(newpath
, "/");
963 result
= searchForTZFile(newpath
, tzInfo
);
965 Have to get out here. Otherwise, we'd keep looking
966 and return the first match in the top-level directory
967 if there's a match in the top-level. If not, this function
968 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
969 It worked without this in most cases because we have a fallback of calling
970 localtime_r to figure out the default timezone.
974 } else if (uprv_strcmp(TZFILE_SKIP
, dirName
) != 0 && uprv_strcmp(TZFILE_SKIP2
, dirName
) != 0) {
975 if(compareBinaryFiles(TZDEFAULT
, newpath
, tzInfo
)) {
976 const char* zoneid
= newpath
+ (sizeof(TZZONEINFO
)) - 1;
977 skipZoneIDPrefix(&zoneid
);
978 uprv_strcpy(SEARCH_TZFILE_RESULT
, zoneid
);
979 result
= SEARCH_TZFILE_RESULT
;
980 /* Get out after the first one found. */
990 U_CAPI
const char* U_EXPORT2
993 const char *tzid
= NULL
;
994 #if U_PLATFORM_USES_ONLY_WIN32_API
995 tzid
= uprv_detectWindowsTimeZone();
1002 /*#if U_PLATFORM_IS_DARWIN_BASED
1005 tzid = getenv("TZFILE");
1011 /* This code can be temporarily disabled to test tzname resolution later on. */
1012 #ifndef DEBUG_TZNAME
1013 tzid
= getenv("TZ");
1014 if (tzid
!= NULL
&& isValidOlsonID(tzid
)
1015 #if U_PLATFORM == U_PF_SOLARIS
1016 /* When TZ equals localtime on Solaris, check the /etc/localtime file. */
1017 && uprv_strcmp(tzid
, TZ_ENV_CHECK
) != 0
1020 /* This might be a good Olson ID. */
1021 skipZoneIDPrefix(&tzid
);
1024 /* else U_TZNAME will give a better result. */
1027 #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1028 /* Caller must handle threading issues */
1029 if (gTimeZoneBufferPtr
== NULL
) {
1031 This is a trick to look at the name of the link to get the Olson ID
1032 because the tzfile contents is underspecified.
1033 This isn't guaranteed to work because it may not be a symlink.
1035 int32_t ret
= (int32_t)readlink(TZDEFAULT
, gTimeZoneBuffer
, sizeof(gTimeZoneBuffer
));
1037 int32_t tzZoneInfoLen
= uprv_strlen(TZZONEINFO
);
1038 gTimeZoneBuffer
[ret
] = 0;
1039 if (uprv_strncmp(gTimeZoneBuffer
, TZZONEINFO
, tzZoneInfoLen
) == 0
1040 && isValidOlsonID(gTimeZoneBuffer
+ tzZoneInfoLen
))
1042 return (gTimeZoneBufferPtr
= gTimeZoneBuffer
+ tzZoneInfoLen
);
1045 #if defined(SEARCH_TZFILE)
1046 DefaultTZInfo
* tzInfo
= (DefaultTZInfo
*)uprv_malloc(sizeof(DefaultTZInfo
));
1047 if (tzInfo
!= NULL
) {
1048 tzInfo
->defaultTZBuffer
= NULL
;
1049 tzInfo
->defaultTZFileSize
= 0;
1050 tzInfo
->defaultTZFilePtr
= NULL
;
1051 tzInfo
->defaultTZstatus
= FALSE
;
1052 tzInfo
->defaultTZPosition
= 0;
1054 gTimeZoneBufferPtr
= searchForTZFile(TZZONEINFO
, tzInfo
);
1056 /* Free previously allocated memory */
1057 if (tzInfo
->defaultTZBuffer
!= NULL
) {
1058 uprv_free(tzInfo
->defaultTZBuffer
);
1060 if (tzInfo
->defaultTZFilePtr
!= NULL
) {
1061 fclose(tzInfo
->defaultTZFilePtr
);
1066 if (gTimeZoneBufferPtr
!= NULL
&& isValidOlsonID(gTimeZoneBufferPtr
)) {
1067 return gTimeZoneBufferPtr
;
1073 return gTimeZoneBufferPtr
;
1079 #if U_PLATFORM_USES_ONLY_WIN32_API
1080 /* The return value is free'd in timezone.cpp on Windows because
1081 * the other code path returns a pointer to a heap location. */
1082 return uprv_strdup(U_TZNAME
[n
]);
1085 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1086 So we remap the abbreviation to an olson ID.
1088 Since Windows exposes a little more timezone information,
1089 we normally don't use this code on Windows because
1090 uprv_detectWindowsTimeZone should have already given the correct answer.
1093 struct tm juneSol
, decemberSol
;
1095 static const time_t juneSolstice
=1182478260; /*2007-06-21 18:11 UT*/
1096 static const time_t decemberSolstice
=1198332540; /*2007-12-22 06:09 UT*/
1098 /* This probing will tell us when daylight savings occurs. */
1099 localtime_r(&juneSolstice
, &juneSol
);
1100 localtime_r(&decemberSolstice
, &decemberSol
);
1101 if(decemberSol
.tm_isdst
> 0) {
1102 daylightType
= U_DAYLIGHT_DECEMBER
;
1103 } else if(juneSol
.tm_isdst
> 0) {
1104 daylightType
= U_DAYLIGHT_JUNE
;
1106 daylightType
= U_DAYLIGHT_NONE
;
1108 tzid
= remapShortTimeZone(U_TZNAME
[0], U_TZNAME
[1], daylightType
, uprv_timezone());
1120 /* Get and set the ICU data directory --------------------------------------- */
1122 static char *gDataDirectory
= NULL
;
1124 static char *gCorrectedPOSIXLocale
= NULL
; /* Heap allocated */
1127 static UBool U_CALLCONV
putil_cleanup(void)
1129 if (gDataDirectory
&& *gDataDirectory
) {
1130 uprv_free(gDataDirectory
);
1132 gDataDirectory
= NULL
;
1134 if (gCorrectedPOSIXLocale
) {
1135 uprv_free(gCorrectedPOSIXLocale
);
1136 gCorrectedPOSIXLocale
= NULL
;
1143 * Set the data directory.
1144 * Make a copy of the passed string, and set the global data dir to point to it.
1145 * TODO: see bug #2849, regarding thread safety.
1147 U_CAPI
void U_EXPORT2
1148 u_setDataDirectory(const char *directory
) {
1152 if(directory
==NULL
|| *directory
==0) {
1153 /* A small optimization to prevent the malloc and copy when the
1154 shared library is used, and this is a way to make sure that NULL
1157 newDataDir
= (char *)"";
1160 length
=(int32_t)uprv_strlen(directory
);
1161 newDataDir
= (char *)uprv_malloc(length
+ 2);
1162 /* Exit out if newDataDir could not be created. */
1163 if (newDataDir
== NULL
) {
1166 uprv_strcpy(newDataDir
, directory
);
1168 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1171 while(p
= uprv_strchr(newDataDir
, U_FILE_ALT_SEP_CHAR
)) {
1172 *p
= U_FILE_SEP_CHAR
;
1179 if (gDataDirectory
&& *gDataDirectory
) {
1180 uprv_free(gDataDirectory
);
1182 gDataDirectory
= newDataDir
;
1183 ucln_common_registerCleanup(UCLN_COMMON_PUTIL
, putil_cleanup
);
1187 U_CAPI UBool U_EXPORT2
1188 uprv_pathIsAbsolute(const char *path
)
1190 if(!path
|| !*path
) {
1194 if(*path
== U_FILE_SEP_CHAR
) {
1198 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1199 if(*path
== U_FILE_ALT_SEP_CHAR
) {
1204 #if U_PLATFORM_USES_ONLY_WIN32_API
1205 if( (((path
[0] >= 'A') && (path
[0] <= 'Z')) ||
1206 ((path
[0] >= 'a') && (path
[0] <= 'z'))) &&
1215 /* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1216 until some client wrapper makefiles are updated */
1217 #if U_PLATFORM_IS_DARWIN_BASED && TARGET_IPHONE_SIMULATOR
1218 # if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1219 # define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1223 U_CAPI
const char * U_EXPORT2
1224 u_getDataDirectory(void) {
1225 const char *path
= NULL
;
1226 #if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1227 char datadir_path_buffer
[PATH_MAX
];
1230 /* if we have the directory, then return it immediately */
1231 UMTX_CHECK(NULL
, gDataDirectory
, path
);
1238 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1239 override ICU's data with the ICU_DATA environment variable. This prevents
1240 problems where multiple custom copies of ICU's specific version of data
1241 are installed on a system. Either the application must define the data
1242 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1243 ICU, set the data with udata_setCommonData or trust that all of the
1244 required data is contained in ICU's data library that contains
1245 the entry point defined by U_ICUDATA_ENTRY_POINT.
1247 There may also be some platforms where environment variables
1250 # if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1251 /* First try to get the environment variable */
1252 path
=getenv("ICU_DATA");
1255 /* ICU_DATA_DIR may be set as a compile option.
1256 * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1257 * and is used only when data is built in archive mode eliminating the need
1258 * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1259 * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1260 * set their own path.
1262 #if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
1263 if(path
==NULL
|| *path
==0) {
1264 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1265 const char *prefix
= getenv(ICU_DATA_DIR_PREFIX_ENV_VAR
);
1267 # ifdef ICU_DATA_DIR
1270 path
=U_ICU_DATA_DEFAULT_DIR
;
1272 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1273 if (prefix
!= NULL
) {
1274 snprintf(datadir_path_buffer
, PATH_MAX
, "%s%s", prefix
, path
);
1275 path
=datadir_path_buffer
;
1282 /* It looks really bad, set it to something. */
1286 u_setDataDirectory(path
);
1287 return gDataDirectory
;
1294 /* Macintosh-specific locale information ------------------------------------ */
1295 #if U_PLATFORM == U_PF_CLASSIC_MACOS
1301 int32_t date_region
;
1302 const char* posixID
;
1305 /* Todo: This will be updated with a newer version from www.unicode.org web
1306 page when it's available.*/
1307 #define MAC_LC_MAGIC_NUMBER -5
1308 #define MAC_LC_INIT_NUMBER -9
1310 static const mac_lc_rec mac_lc_recs
[] = {
1311 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 0, "en_US",
1313 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 1, "fr_FR",
1315 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 2, "en_GB",
1317 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 3, "de_DE",
1319 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 4, "it_IT",
1321 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 5, "nl_NL",
1323 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 6, "fr_BE",
1324 /* French for Belgium or Lxembourg*/
1325 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 7, "sv_SE",
1327 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 9, "da_DK",
1329 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 10, "pt_PT",
1331 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 11, "fr_CA",
1333 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 13, "is_IS",
1335 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 14, "ja_JP",
1337 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 15, "en_AU",
1339 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 16, "ar_AE",
1340 /* the Arabic world (?)*/
1341 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 17, "fi_FI",
1343 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 18, "fr_CH",
1344 /* French for Switzerland*/
1345 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 19, "de_CH",
1346 /* German for Switzerland*/
1347 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 20, "el_GR",
1349 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 21, "is_IS",
1351 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
1353 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
1355 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 24, "tr_TR",
1357 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 25, "sh_YU",
1358 /* Croatian system for Yugoslavia*/
1359 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
1360 /* Hindi system for India*/
1361 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
1363 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 41, "lt_LT",
1365 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 42, "pl_PL",
1367 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 43, "hu_HU",
1369 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 44, "et_EE",
1371 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 45, "lv_LV",
1373 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
1374 /* Lapland [Ask Rich for the data. HS]*/
1375 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
1377 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 48, "fa_IR",
1379 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 49, "ru_RU",
1381 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 50, "en_IE",
1383 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 51, "ko_KR",
1385 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 52, "zh_CN",
1386 /* People's Republic of China*/
1387 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 53, "zh_TW",
1389 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 54, "th_TH",
1392 /* fallback is en_US*/
1393 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
,
1394 MAC_LC_MAGIC_NUMBER
, "en_US"
1400 /* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1401 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1402 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1404 static const char *uprv_getPOSIXIDForCategory(int category
)
1406 const char* posixID
= NULL
;
1407 if (category
== LC_MESSAGES
|| category
== LC_CTYPE
) {
1409 * On Solaris two different calls to setlocale can result in
1410 * different values. Only get this value once.
1412 * We must check this first because an application can set this.
1414 * LC_ALL can't be used because it's platform dependent. The LANG
1415 * environment variable seems to affect LC_CTYPE variable by default.
1416 * Here is what setlocale(LC_ALL, NULL) can return.
1417 * HPUX can return 'C C C C C C C'
1418 * Solaris can return /en_US/C/C/C/C/C on the second try.
1419 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1421 * The default codepage detection also needs to use LC_CTYPE.
1423 * Do not call setlocale(LC_*, "")! Using an empty string instead
1424 * of NULL, will modify the libc behavior.
1426 posixID
= setlocale(category
, NULL
);
1428 || (uprv_strcmp("C", posixID
) == 0)
1429 || (uprv_strcmp("POSIX", posixID
) == 0))
1431 /* Maybe we got some garbage. Try something more reasonable */
1432 posixID
= getenv("LC_ALL");
1434 posixID
= getenv(category
== LC_MESSAGES
? "LC_MESSAGES" : "LC_CTYPE");
1436 posixID
= getenv("LANG");
1442 || (uprv_strcmp("C", posixID
) == 0)
1443 || (uprv_strcmp("POSIX", posixID
) == 0))
1445 /* Nothing worked. Give it a nice POSIX default value. */
1446 posixID
= "en_US_POSIX";
1451 /* Return just the POSIX id for the default locale, whatever happens to be in
1452 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1454 static const char *uprv_getPOSIXIDForDefaultLocale(void)
1456 static const char* posixID
= NULL
;
1458 posixID
= uprv_getPOSIXIDForCategory(LC_MESSAGES
);
1463 #if !U_CHARSET_IS_UTF8
1464 /* Return just the POSIX id for the default codepage, whatever happens to be in
1465 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1467 static const char *uprv_getPOSIXIDForDefaultCodepage(void)
1469 static const char* posixID
= NULL
;
1471 posixID
= uprv_getPOSIXIDForCategory(LC_CTYPE
);
1478 /* NOTE: The caller should handle thread safety */
1479 U_CAPI
const char* U_EXPORT2
1480 uprv_getDefaultLocaleID()
1484 Note that: (a '!' means the ID is improper somehow)
1485 LC_ALL ----> default_loc codepage
1486 --------------------------------------------------------
1491 ab_CD.EF@GH ab_CD_GH EF
1493 Some 'improper' ways to do the same as above:
1494 ! ab_CD@GH.EF ab_CD_GH EF
1495 ! ab_CD.EF@GH.IJ ab_CD_GH EF
1496 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
1501 The variant cannot have dots in it.
1502 The 'rightmost' variant (@xxx) wins.
1503 The leftmost codepage (.xxx) wins.
1505 char *correctedPOSIXLocale
= 0;
1506 const char* posixID
= uprv_getPOSIXIDForDefaultLocale();
1511 /* Format: (no spaces)
1512 ll [ _CC ] [ . MM ] [ @ VV]
1514 l = lang, C = ctry, M = charmap, V = variant
1517 if (gCorrectedPOSIXLocale
!= NULL
) {
1518 return gCorrectedPOSIXLocale
;
1521 if ((p
= uprv_strchr(posixID
, '.')) != NULL
) {
1522 /* assume new locale can't be larger than old one? */
1523 correctedPOSIXLocale
= static_cast<char *>(uprv_malloc(uprv_strlen(posixID
)+1));
1524 /* Exit on memory allocation error. */
1525 if (correctedPOSIXLocale
== NULL
) {
1528 uprv_strncpy(correctedPOSIXLocale
, posixID
, p
-posixID
);
1529 correctedPOSIXLocale
[p
-posixID
] = 0;
1531 /* do not copy after the @ */
1532 if ((p
= uprv_strchr(correctedPOSIXLocale
, '@')) != NULL
) {
1533 correctedPOSIXLocale
[p
-correctedPOSIXLocale
] = 0;
1537 /* Note that we scan the *uncorrected* ID. */
1538 if ((p
= uprv_strrchr(posixID
, '@')) != NULL
) {
1539 if (correctedPOSIXLocale
== NULL
) {
1540 correctedPOSIXLocale
= static_cast<char *>(uprv_malloc(uprv_strlen(posixID
)+1));
1541 /* Exit on memory allocation error. */
1542 if (correctedPOSIXLocale
== NULL
) {
1545 uprv_strncpy(correctedPOSIXLocale
, posixID
, p
-posixID
);
1546 correctedPOSIXLocale
[p
-posixID
] = 0;
1550 /* Take care of any special cases here.. */
1551 if (!uprv_strcmp(p
, "nynorsk")) {
1553 /* Don't worry about no__NY. In practice, it won't appear. */
1556 if (uprv_strchr(correctedPOSIXLocale
,'_') == NULL
) {
1557 uprv_strcat(correctedPOSIXLocale
, "__"); /* aa@b -> aa__b */
1560 uprv_strcat(correctedPOSIXLocale
, "_"); /* aa_CC@b -> aa_CC_b */
1563 if ((q
= uprv_strchr(p
, '.')) != NULL
) {
1564 /* How big will the resulting string be? */
1565 len
= (int32_t)(uprv_strlen(correctedPOSIXLocale
) + (q
-p
));
1566 uprv_strncat(correctedPOSIXLocale
, p
, q
-p
);
1567 correctedPOSIXLocale
[len
] = 0;
1570 /* Anything following the @ sign */
1571 uprv_strcat(correctedPOSIXLocale
, p
);
1574 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1575 * How about 'russian' -> 'ru'?
1576 * Many of the other locales using ISO codes will be handled by the
1577 * canonicalization functions in uloc_getDefault.
1581 /* Was a correction made? */
1582 if (correctedPOSIXLocale
!= NULL
) {
1583 posixID
= correctedPOSIXLocale
;
1586 /* copy it, just in case the original pointer goes away. See j2395 */
1587 correctedPOSIXLocale
= (char *)uprv_malloc(uprv_strlen(posixID
) + 1);
1588 /* Exit on memory allocation error. */
1589 if (correctedPOSIXLocale
== NULL
) {
1592 posixID
= uprv_strcpy(correctedPOSIXLocale
, posixID
);
1595 if (gCorrectedPOSIXLocale
== NULL
) {
1596 gCorrectedPOSIXLocale
= correctedPOSIXLocale
;
1597 ucln_common_registerCleanup(UCLN_COMMON_PUTIL
, putil_cleanup
);
1598 correctedPOSIXLocale
= NULL
;
1601 if (correctedPOSIXLocale
!= NULL
) { /* Was already set - clean up. */
1602 uprv_free(correctedPOSIXLocale
);
1607 #elif U_PLATFORM_USES_ONLY_WIN32_API
1608 UErrorCode status
= U_ZERO_ERROR
;
1609 LCID id
= GetThreadLocale();
1610 const char* locID
= uprv_convertToPosix(id
, &status
);
1612 if (U_FAILURE(status
)) {
1617 #elif U_PLATFORM == U_PF_CLASSIC_MACOS
1618 int32_t script
= MAC_LC_INIT_NUMBER
;
1619 /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
1620 int32_t region
= MAC_LC_INIT_NUMBER
;
1621 /* = GetScriptManagerVariable(smRegionCode);*/
1622 int32_t lang
= MAC_LC_INIT_NUMBER
;
1623 /* = GetScriptManagerVariable(smScriptLang);*/
1624 int32_t date_region
= MAC_LC_INIT_NUMBER
;
1625 const char* posixID
= 0;
1626 int32_t count
= sizeof(mac_lc_recs
) / sizeof(mac_lc_rec
);
1630 ih
= (Intl1Hndl
) GetIntlResource(1);
1632 date_region
= ((uint16_t)(*ih
)->intl1Vers
) >> 8;
1634 for (i
= 0; i
< count
; i
++) {
1635 if ( ((mac_lc_recs
[i
].script
== MAC_LC_MAGIC_NUMBER
)
1636 || (mac_lc_recs
[i
].script
== script
))
1637 && ((mac_lc_recs
[i
].region
== MAC_LC_MAGIC_NUMBER
)
1638 || (mac_lc_recs
[i
].region
== region
))
1639 && ((mac_lc_recs
[i
].lang
== MAC_LC_MAGIC_NUMBER
)
1640 || (mac_lc_recs
[i
].lang
== lang
))
1641 && ((mac_lc_recs
[i
].date_region
== MAC_LC_MAGIC_NUMBER
)
1642 || (mac_lc_recs
[i
].date_region
== date_region
))
1645 posixID
= mac_lc_recs
[i
].posixID
;
1652 #elif U_PLATFORM == U_PF_OS400
1653 /* locales are process scoped and are by definition thread safe */
1654 static char correctedLocale
[64];
1655 const char *localeID
= getenv("LC_ALL");
1658 if (localeID
== NULL
)
1659 localeID
= getenv("LANG");
1660 if (localeID
== NULL
)
1661 localeID
= setlocale(LC_ALL
, NULL
);
1662 /* Make sure we have something... */
1663 if (localeID
== NULL
)
1664 return "en_US_POSIX";
1666 /* Extract the locale name from the path. */
1667 if((p
= uprv_strrchr(localeID
, '/')) != NULL
)
1669 /* Increment p to start of locale name. */
1674 /* Copy to work location. */
1675 uprv_strcpy(correctedLocale
, localeID
);
1677 /* Strip off the '.locale' extension. */
1678 if((p
= uprv_strchr(correctedLocale
, '.')) != NULL
) {
1682 /* Upper case the locale name. */
1683 T_CString_toUpperCase(correctedLocale
);
1685 /* See if we are using the POSIX locale. Any of the
1686 * following are equivalent and use the same QLGPGCMA
1688 * QLGPGCMA2 means UCS2
1689 * QLGPGCMA_4 means UTF-32
1690 * QLGPGCMA_8 means UTF-8
1692 if ((uprv_strcmp("C", correctedLocale
) == 0) ||
1693 (uprv_strcmp("POSIX", correctedLocale
) == 0) ||
1694 (uprv_strncmp("QLGPGCMA", correctedLocale
, 8) == 0))
1696 uprv_strcpy(correctedLocale
, "en_US_POSIX");
1702 /* Lower case the lang portion. */
1703 for(p
= correctedLocale
; *p
!= 0 && *p
!= '_'; p
++)
1705 *p
= uprv_tolower(*p
);
1708 /* Adjust for Euro. After '_E' add 'URO'. */
1709 LocaleLen
= uprv_strlen(correctedLocale
);
1710 if (correctedLocale
[LocaleLen
- 2] == '_' &&
1711 correctedLocale
[LocaleLen
- 1] == 'E')
1713 uprv_strcat(correctedLocale
, "URO");
1716 /* If using Lotus-based locale then convert to
1717 * equivalent non Lotus.
1719 else if (correctedLocale
[LocaleLen
- 2] == '_' &&
1720 correctedLocale
[LocaleLen
- 1] == 'L')
1722 correctedLocale
[LocaleLen
- 2] = 0;
1725 /* There are separate simplified and traditional
1726 * locales called zh_HK_S and zh_HK_T.
1728 else if (uprv_strncmp(correctedLocale
, "zh_HK", 5) == 0)
1730 uprv_strcpy(correctedLocale
, "zh_HK");
1733 /* A special zh_CN_GBK locale...
1735 else if (uprv_strcmp(correctedLocale
, "zh_CN_GBK") == 0)
1737 uprv_strcpy(correctedLocale
, "zh_CN");
1742 return correctedLocale
;
1747 #if !U_CHARSET_IS_UTF8
1750 Due to various platform differences, one platform may specify a charset,
1751 when they really mean a different charset. Remap the names so that they are
1752 compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1753 here. Before adding anything to this function, please consider adding unique
1754 names to the ICU alias table in the data directory.
1757 remapPlatformDependentCodepage(const char *locale
, const char *name
) {
1758 if (locale
!= NULL
&& *locale
== 0) {
1759 /* Make sure that an empty locale is handled the same way. */
1765 #if U_PLATFORM == U_PF_AIX
1766 if (uprv_strcmp(name
, "IBM-943") == 0) {
1767 /* Use the ASCII compatible ibm-943 */
1770 else if (uprv_strcmp(name
, "IBM-1252") == 0) {
1771 /* Use the windows-1252 that contains the Euro */
1774 #elif U_PLATFORM == U_PF_SOLARIS
1775 if (locale
!= NULL
&& uprv_strcmp(name
, "EUC") == 0) {
1776 /* Solaris underspecifies the "EUC" name. */
1777 if (uprv_strcmp(locale
, "zh_CN") == 0) {
1780 else if (uprv_strcmp(locale
, "zh_TW") == 0) {
1783 else if (uprv_strcmp(locale
, "ko_KR") == 0) {
1787 else if (uprv_strcmp(name
, "eucJP") == 0) {
1789 ibm-954 is the best match.
1790 ibm-33722 is the default for eucJP (similar to Windows).
1794 else if (uprv_strcmp(name
, "646") == 0) {
1796 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
1797 * ISO-8859-1 instead of US-ASCII(646).
1799 name
= "ISO-8859-1";
1801 #elif U_PLATFORM_IS_DARWIN_BASED
1802 if (locale
== NULL
&& *name
== 0) {
1804 No locale was specified, and an empty name was passed in.
1805 This usually indicates that nl_langinfo didn't return valid information.
1806 Mac OS X uses UTF-8 by default (especially the locale data and console).
1810 else if (uprv_strcmp(name
, "CP949") == 0) {
1811 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1814 else if (locale
!= NULL
&& uprv_strcmp(locale
, "en_US_POSIX") != 0 && uprv_strcmp(name
, "US-ASCII") == 0) {
1816 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1820 #elif U_PLATFORM == U_PF_BSD
1821 if (uprv_strcmp(name
, "CP949") == 0) {
1822 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1825 #elif U_PLATFORM == U_PF_HPUX
1826 if (locale
!= NULL
&& uprv_strcmp(locale
, "zh_HK") == 0 && uprv_strcmp(name
, "big5") == 0) {
1827 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
1828 /* zh_TW.big5 is not the same charset as zh_HK.big5! */
1831 else if (uprv_strcmp(name
, "eucJP") == 0) {
1833 ibm-1350 is the best match, but unavailable.
1834 ibm-954 is mostly a superset of ibm-1350.
1835 ibm-33722 is the default for eucJP (similar to Windows).
1839 #elif U_PLATFORM == U_PF_LINUX
1840 if (locale
!= NULL
&& uprv_strcmp(name
, "euc") == 0) {
1841 /* Linux underspecifies the "EUC" name. */
1842 if (uprv_strcmp(locale
, "korean") == 0) {
1845 else if (uprv_strcmp(locale
, "japanese") == 0) {
1846 /* See comment below about eucJP */
1850 else if (uprv_strcmp(name
, "eucjp") == 0) {
1852 ibm-1350 is the best match, but unavailable.
1853 ibm-954 is mostly a superset of ibm-1350.
1854 ibm-33722 is the default for eucJP (similar to Windows).
1858 else if (locale
!= NULL
&& uprv_strcmp(locale
, "en_US_POSIX") != 0 &&
1859 (uprv_strcmp(name
, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name
, "US-ASCII") == 0)) {
1861 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1866 * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
1867 * it by falling back to 'US-ASCII' when NULL is returned from this
1868 * function. So, we don't have to worry about it here.
1871 /* return NULL when "" is passed in */
1879 getCodepageFromPOSIXID(const char *localeName
, char * buffer
, int32_t buffCapacity
)
1881 char localeBuf
[100];
1882 const char *name
= NULL
;
1883 char *variant
= NULL
;
1885 if (localeName
!= NULL
&& (name
= (uprv_strchr(localeName
, '.'))) != NULL
) {
1886 size_t localeCapacity
= uprv_min(sizeof(localeBuf
), (name
-localeName
)+1);
1887 uprv_strncpy(localeBuf
, localeName
, localeCapacity
);
1888 localeBuf
[localeCapacity
-1] = 0; /* ensure NULL termination */
1889 name
= uprv_strncpy(buffer
, name
+1, buffCapacity
);
1890 buffer
[buffCapacity
-1] = 0; /* ensure NULL termination */
1891 if ((variant
= const_cast<char *>(uprv_strchr(name
, '@'))) != NULL
) {
1894 name
= remapPlatformDependentCodepage(localeBuf
, name
);
1901 int_getDefaultCodepage()
1903 #if U_PLATFORM == U_PF_OS400
1904 uint32_t ccsid
= 37; /* Default to ibm-37 */
1905 static char codepage
[64];
1906 Qwc_JOBI0400_t jobinfo
;
1907 Qus_EC_t error
= { sizeof(Qus_EC_t
) }; /* SPI error code */
1909 EPT_CALL(QUSRJOBI
)(&jobinfo
, sizeof(jobinfo
), "JOBI0400",
1912 if (error
.Bytes_Available
== 0) {
1913 if (jobinfo
.Coded_Char_Set_ID
!= 0xFFFF) {
1914 ccsid
= (uint32_t)jobinfo
.Coded_Char_Set_ID
;
1916 else if (jobinfo
.Default_Coded_Char_Set_Id
!= 0xFFFF) {
1917 ccsid
= (uint32_t)jobinfo
.Default_Coded_Char_Set_Id
;
1919 /* else use the default */
1921 sprintf(codepage
,"ibm-%d", ccsid
);
1924 #elif U_PLATFORM == U_PF_OS390
1925 static char codepage
[64];
1927 strncpy(codepage
, nl_langinfo(CODESET
),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING
));
1928 strcat(codepage
,UCNV_SWAP_LFNL_OPTION_STRING
);
1929 codepage
[63] = 0; /* NULL terminate */
1933 #elif U_PLATFORM == U_PF_CLASSIC_MACOS
1934 return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
1936 #elif U_PLATFORM_USES_ONLY_WIN32_API
1937 static char codepage
[64];
1938 sprintf(codepage
, "windows-%d", GetACP());
1941 #elif U_POSIX_LOCALE
1942 static char codesetName
[100];
1943 const char *localeName
= NULL
;
1944 const char *name
= NULL
;
1946 localeName
= uprv_getPOSIXIDForDefaultCodepage();
1947 uprv_memset(codesetName
, 0, sizeof(codesetName
));
1948 #if U_HAVE_NL_LANGINFO_CODESET
1949 /* When available, check nl_langinfo first because it usually gives more
1950 useful names. It depends on LC_CTYPE.
1951 nl_langinfo may use the same buffer as setlocale. */
1953 const char *codeset
= nl_langinfo(U_NL_LANGINFO_CODESET
);
1954 #if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED
1956 * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
1959 if (uprv_strcmp(localeName
, "en_US_POSIX") != 0) {
1960 codeset
= remapPlatformDependentCodepage(localeName
, codeset
);
1964 codeset
= remapPlatformDependentCodepage(NULL
, codeset
);
1967 if (codeset
!= NULL
) {
1968 uprv_strncpy(codesetName
, codeset
, sizeof(codesetName
));
1969 codesetName
[sizeof(codesetName
)-1] = 0;
1975 /* Use setlocale in a nice way, and then check some environment variables.
1976 Maybe the application used setlocale already.
1978 uprv_memset(codesetName
, 0, sizeof(codesetName
));
1979 name
= getCodepageFromPOSIXID(localeName
, codesetName
, sizeof(codesetName
));
1981 /* if we can find the codeset name from setlocale, return that. */
1985 if (*codesetName
== 0)
1987 /* Everything failed. Return US ASCII (ISO 646). */
1988 (void)uprv_strcpy(codesetName
, "US-ASCII");
1997 U_CAPI
const char* U_EXPORT2
1998 uprv_getDefaultCodepage()
2000 static char const *name
= NULL
;
2003 name
= int_getDefaultCodepage();
2008 #endif /* !U_CHARSET_IS_UTF8 */
2011 /* end of platform-specific implementation -------------- */
2013 /* version handling --------------------------------------------------------- */
2015 U_CAPI
void U_EXPORT2
2016 u_versionFromString(UVersionInfo versionArray
, const char *versionString
) {
2020 if(versionArray
==NULL
) {
2024 if(versionString
!=NULL
) {
2026 versionArray
[part
]=(uint8_t)uprv_strtoul(versionString
, &end
, 10);
2027 if(end
==versionString
|| ++part
==U_MAX_VERSION_LENGTH
|| *end
!=U_VERSION_DELIMITER
) {
2030 versionString
=end
+1;
2034 while(part
<U_MAX_VERSION_LENGTH
) {
2035 versionArray
[part
++]=0;
2039 U_CAPI
void U_EXPORT2
2040 u_versionFromUString(UVersionInfo versionArray
, const UChar
*versionString
) {
2041 if(versionArray
!=NULL
&& versionString
!=NULL
) {
2042 char versionChars
[U_MAX_VERSION_STRING_LENGTH
+1];
2043 int32_t len
= u_strlen(versionString
);
2044 if(len
>U_MAX_VERSION_STRING_LENGTH
) {
2045 len
= U_MAX_VERSION_STRING_LENGTH
;
2047 u_UCharsToChars(versionString
, versionChars
, len
);
2048 versionChars
[len
]=0;
2049 u_versionFromString(versionArray
, versionChars
);
2053 U_CAPI
void U_EXPORT2
2054 u_versionToString(const UVersionInfo versionArray
, char *versionString
) {
2055 uint16_t count
, part
;
2058 if(versionString
==NULL
) {
2062 if(versionArray
==NULL
) {
2067 /* count how many fields need to be written */
2068 for(count
=4; count
>0 && versionArray
[count
-1]==0; --count
) {
2075 /* write the first part */
2076 /* write the decimal field value */
2077 field
=versionArray
[0];
2079 *versionString
++=(char)('0'+field
/100);
2083 *versionString
++=(char)('0'+field
/10);
2086 *versionString
++=(char)('0'+field
);
2088 /* write the following parts */
2089 for(part
=1; part
<count
; ++part
) {
2090 /* write a dot first */
2091 *versionString
++=U_VERSION_DELIMITER
;
2093 /* write the decimal field value */
2094 field
=versionArray
[part
];
2096 *versionString
++=(char)('0'+field
/100);
2100 *versionString
++=(char)('0'+field
/10);
2103 *versionString
++=(char)('0'+field
);
2110 U_CAPI
void U_EXPORT2
2111 u_getVersion(UVersionInfo versionArray
) {
2112 u_versionFromString(versionArray
, U_ICU_VERSION
);
2116 * icucfg.h dependent code
2121 #if HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
2133 U_INTERNAL
void * U_EXPORT2
2134 uprv_dl_open(const char *libName
, UErrorCode
*status
) {
2136 if(U_FAILURE(*status
)) return ret
;
2137 ret
= dlopen(libName
, RTLD_NOW
|RTLD_GLOBAL
);
2139 #ifdef U_TRACE_DYLOAD
2140 printf("dlerror on dlopen(%s): %s\n", libName
, dlerror());
2142 *status
= U_MISSING_RESOURCE_ERROR
;
2147 U_INTERNAL
void U_EXPORT2
2148 uprv_dl_close(void *lib
, UErrorCode
*status
) {
2149 if(U_FAILURE(*status
)) return;
2153 U_INTERNAL UVoidFunction
* U_EXPORT2
2154 uprv_dlsym_func(void *lib
, const char* sym
, UErrorCode
*status
) {
2160 if(U_FAILURE(*status
)) return uret
.fp
;
2161 uret
.vp
= dlsym(lib
, sym
);
2162 if(uret
.vp
== NULL
) {
2163 #ifdef U_TRACE_DYLOAD
2164 printf("dlerror on dlsym(%p,%s): %s\n", lib
,sym
, dlerror());
2166 *status
= U_MISSING_RESOURCE_ERROR
;
2173 /* null (nonexistent) implementation. */
2175 U_INTERNAL
void * U_EXPORT2
2176 uprv_dl_open(const char *libName
, UErrorCode
*status
) {
2177 if(U_FAILURE(*status
)) return NULL
;
2178 *status
= U_UNSUPPORTED_ERROR
;
2182 U_INTERNAL
void U_EXPORT2
2183 uprv_dl_close(void *lib
, UErrorCode
*status
) {
2184 if(U_FAILURE(*status
)) return;
2185 *status
= U_UNSUPPORTED_ERROR
;
2190 U_INTERNAL UVoidFunction
* U_EXPORT2
2191 uprv_dlsym_func(void *lib
, const char* sym
, UErrorCode
*status
) {
2192 if(U_SUCCESS(*status
)) {
2193 *status
= U_UNSUPPORTED_ERROR
;
2195 return (UVoidFunction
*)NULL
;
2202 #elif U_PLATFORM_USES_ONLY_WIN32_API
2204 U_INTERNAL
void * U_EXPORT2
2205 uprv_dl_open(const char *libName
, UErrorCode
*status
) {
2208 if(U_FAILURE(*status
)) return NULL
;
2210 lib
= LoadLibraryA(libName
);
2213 *status
= U_MISSING_RESOURCE_ERROR
;
2219 U_INTERNAL
void U_EXPORT2
2220 uprv_dl_close(void *lib
, UErrorCode
*status
) {
2221 HMODULE handle
= (HMODULE
)lib
;
2222 if(U_FAILURE(*status
)) return;
2224 FreeLibrary(handle
);
2230 U_INTERNAL UVoidFunction
* U_EXPORT2
2231 uprv_dlsym_func(void *lib
, const char* sym
, UErrorCode
*status
) {
2232 HMODULE handle
= (HMODULE
)lib
;
2233 UVoidFunction
* addr
= NULL
;
2235 if(U_FAILURE(*status
) || lib
==NULL
) return NULL
;
2237 addr
= (UVoidFunction
*)GetProcAddress(handle
, sym
);
2240 DWORD lastError
= GetLastError();
2241 if(lastError
== ERROR_PROC_NOT_FOUND
) {
2242 *status
= U_MISSING_RESOURCE_ERROR
;
2244 *status
= U_UNSUPPORTED_ERROR
; /* other unknown error. */
2254 /* No dynamic loading set. */
2256 U_INTERNAL
void * U_EXPORT2
2257 uprv_dl_open(const char *libName
, UErrorCode
*status
) {
2258 if(U_FAILURE(*status
)) return NULL
;
2259 *status
= U_UNSUPPORTED_ERROR
;
2263 U_INTERNAL
void U_EXPORT2
2264 uprv_dl_close(void *lib
, UErrorCode
*status
) {
2265 if(U_FAILURE(*status
)) return;
2266 *status
= U_UNSUPPORTED_ERROR
;
2271 U_INTERNAL UVoidFunction
* U_EXPORT2
2272 uprv_dlsym_func(void *lib
, const char* sym
, UErrorCode
*status
) {
2273 if(U_SUCCESS(*status
)) {
2274 *status
= U_UNSUPPORTED_ERROR
;
2276 return (UVoidFunction
*)NULL
;
2279 #endif /* U_ENABLE_DYLOAD */
2282 * Hey, Emacs, please set the following:
2285 * indent-tabs-mode: nil