1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ******************************************************************************
6 * Copyright (C) 1997-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 ******************************************************************************
11 * FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
13 * Date Name Description
14 * 04/14/97 aliu Creation.
15 * 04/24/97 aliu Added getDefaultDataDirectory() and
16 * getDefaultLocaleID().
17 * 04/28/97 aliu Rewritten to assume Unix and apply general methods
18 * for assumed case. Non-UNIX platforms must be
19 * special-cased. Rewrote numeric methods dealing
20 * with NaN and Infinity to be platform independent
21 * over all IEEE 754 platforms.
22 * 05/13/97 aliu Restored sign of timezone
23 * (semantics are hours West of GMT)
24 * 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
26 * 07/22/98 stephen Added remainder, max, min, trunc
27 * 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
28 * 08/24/98 stephen Added longBitsFromDouble
29 * 09/08/98 stephen Minor changes for Mac Port
30 * 03/02/99 stephen Removed openFile(). Added AS400 support.
32 * 04/15/99 stephen Converted to C.
33 * 06/28/99 stephen Removed mutex locking in u_isBigEndian().
34 * 08/04/99 jeffrey R. Added OS/2 changes
35 * 11/15/99 helena Integrated S/390 IEEE support.
36 * 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
37 * 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
38 * 01/03/08 Steven L. Fake Time Support
39 ******************************************************************************
42 // Defines _XOPEN_SOURCE for access to POSIX functions.
43 // Must be before any other #includes.
44 #include "uposixdefs.h"
46 // First, the platform type. Need this for U_PLATFORM.
47 #include "unicode/platform.h"
49 #if U_PLATFORM == U_PF_MINGW && defined __STRICT_ANSI__
50 /* tzset isn't defined in strict ANSI on MinGW. */
51 #undef __STRICT_ANSI__
55 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
59 #if !U_PLATFORM_USES_ONLY_WIN32_API
63 /* include the rest of the ICU headers */
64 #include "unicode/putil.h"
65 #include "unicode/ustring.h"
75 /* Include standard headers. */
83 #ifndef U_COMMON_IMPLEMENTATION
84 #error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu
88 /* include system headers */
89 #if U_PLATFORM_USES_ONLY_WIN32_API
91 * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW.
92 * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API)
93 * to use native APIs as much as possible?
95 #ifndef WIN32_LEAN_AND_MEAN
96 # define WIN32_LEAN_AND_MEAN
103 # include <windows.h>
104 # include "unicode\uloc.h"
105 #if U_PLATFORM_HAS_WINUWP_API == 0
107 #else // U_PLATFORM_HAS_WINUWP_API
108 typedef PVOID LPMSG
; // TODO: figure out how to get rid of this typedef
109 #include <Windows.Globalization.h>
110 #include <windows.system.userprofile.h>
111 #include <wrl\wrappers\corewrappers.h>
112 #include <wrl\client.h>
114 using namespace ABI::Windows::Foundation
;
115 using namespace Microsoft::WRL
;
116 using namespace Microsoft::WRL::Wrappers
;
118 #elif U_PLATFORM == U_PF_OS400
120 # include <qusec.h> /* error code structure */
121 # include <qusrjobi.h>
122 # include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
123 # include <mih/testptr.h> /* For uprv_maximumPtr */
124 #elif U_PLATFORM == U_PF_OS390
125 # include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
126 #elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS
129 # if U_PLATFORM == U_PF_SOLARIS
134 #elif U_PLATFORM == U_PF_QNX
135 # include <sys/neutrino.h>
139 * Only include langinfo.h if we have a way to get the codeset. If we later
140 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
144 #if U_HAVE_NL_LANGINFO_CODESET
145 #include <langinfo.h>
149 * Simple things (presence of functions, etc) should just go in configure.in and be added to
150 * icucfg.h via autoheader.
152 #if U_PLATFORM_IMPLEMENTS_POSIX
153 # if U_PLATFORM == U_PF_OS400
154 # define HAVE_DLFCN_H 0
155 # define HAVE_DLOPEN 0
157 # ifndef HAVE_DLFCN_H
158 # define HAVE_DLFCN_H 1
161 # define HAVE_DLOPEN 1
164 # ifndef HAVE_GETTIMEOFDAY
165 # define HAVE_GETTIMEOFDAY 1
168 # define HAVE_DLFCN_H 0
169 # define HAVE_DLOPEN 0
170 # define HAVE_GETTIMEOFDAY 0
175 /* Define the extension for data files, again... */
176 #define DATA_TYPE "dat"
178 /* Leave this copyright notice here! */
179 static const char copyright
[] = U_COPYRIGHT_STRING
;
181 /* floating point implementations ------------------------------------------- */
183 /* We return QNAN rather than SNAN*/
184 #define SIGN 0x80000000U
186 /* Make it easy to define certain types of constants */
188 int64_t i64
; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
190 } BitPatternConversion
;
191 static const BitPatternConversion gNan
= { (int64_t) INT64_C(0x7FF8000000000000) };
192 static const BitPatternConversion gInf
= { (int64_t) INT64_C(0x7FF0000000000000) };
194 /*---------------------------------------------------------------------------
196 Our general strategy is to assume we're on a POSIX platform. Platforms which
197 are non-POSIX must declare themselves so. The default POSIX implementation
198 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
200 ---------------------------------------------------------------------------*/
202 #if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400
203 # undef U_POSIX_LOCALE
205 # define U_POSIX_LOCALE 1
209 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
210 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
214 u_topNBytesOfDouble(double* d
, int n
)
219 return (char*)(d
+ 1) - n
;
224 u_bottomNBytesOfDouble(double* d
, int n
)
227 return (char*)(d
+ 1) - n
;
232 #endif /* !IEEE_754 */
236 u_signBit(double d
) {
239 hiByte
= *(uint8_t *)&d
;
241 hiByte
= *(((uint8_t *)&d
) + sizeof(double) - 1);
243 return (hiByte
& 0x80) != 0;
249 #if defined (U_DEBUG_FAKETIME)
250 /* Override the clock to test things without having to move the system clock.
251 * Assumes POSIX gettimeofday() will function
253 UDate fakeClock_t0
= 0; /** Time to start the clock from **/
254 UDate fakeClock_dt
= 0; /** Offset (fake time - real time) **/
255 UBool fakeClock_set
= FALSE
; /** True if fake clock has spun up **/
256 static UMutex fakeClockMutex
= U_MUTEX_INTIALIZER
;
258 static UDate
getUTCtime_real() {
259 struct timeval posixTime
;
260 gettimeofday(&posixTime
, NULL
);
261 return (UDate
)(((int64_t)posixTime
.tv_sec
* U_MILLIS_PER_SECOND
) + (posixTime
.tv_usec
/1000));
264 static UDate
getUTCtime_fake() {
265 umtx_lock(&fakeClockMutex
);
267 UDate real
= getUTCtime_real();
268 const char *fake_start
= getenv("U_FAKETIME_START");
269 if((fake_start
!=NULL
) && (fake_start
[0]!=0)) {
270 sscanf(fake_start
,"%lf",&fakeClock_t0
);
271 fakeClock_dt
= fakeClock_t0
- real
;
272 fprintf(stderr
,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
273 "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
274 fakeClock_t0
, fake_start
, fakeClock_dt
, real
);
277 fprintf(stderr
,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
278 "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
280 fakeClock_set
= TRUE
;
282 umtx_unlock(&fakeClockMutex
);
284 return getUTCtime_real() + fakeClock_dt
;
288 #if U_PLATFORM_USES_ONLY_WIN32_API
292 } FileTimeConversion
; /* This is like a ULARGE_INTEGER */
294 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
295 #define EPOCH_BIAS INT64_C(116444736000000000)
296 #define HECTONANOSECOND_PER_MILLISECOND 10000
300 /*---------------------------------------------------------------------------
301 Universal Implementations
302 These are designed to work on all platforms. Try these, and if they
303 don't work on your platform, then special case your platform with new
305 ---------------------------------------------------------------------------*/
307 U_CAPI UDate U_EXPORT2
310 #if defined(U_DEBUG_FAKETIME)
311 return getUTCtime_fake(); /* Hook for overriding the clock */
313 return uprv_getRawUTCtime();
317 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
318 U_CAPI UDate U_EXPORT2
321 #if U_PLATFORM_USES_ONLY_WIN32_API
323 FileTimeConversion winTime
;
324 GetSystemTimeAsFileTime(&winTime
.fileTime
);
325 return (UDate
)((winTime
.int64
- EPOCH_BIAS
) / HECTONANOSECOND_PER_MILLISECOND
);
328 #if HAVE_GETTIMEOFDAY
329 struct timeval posixTime
;
330 gettimeofday(&posixTime
, NULL
);
331 return (UDate
)(((int64_t)posixTime
.tv_sec
* U_MILLIS_PER_SECOND
) + (posixTime
.tv_usec
/1000));
335 return (UDate
)epochtime
* U_MILLIS_PER_SECOND
;
341 /*-----------------------------------------------------------------------------
343 These methods detect and return NaN and infinity values for doubles
344 conforming to IEEE 754. Platforms which support this standard include X86,
345 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
346 If this doesn't work on your platform, you have non-IEEE floating-point, and
347 will need to code your own versions. A naive implementation is to return 0.0
348 for getNaN and getInfinity, and false for isNaN and isInfinite.
349 ---------------------------------------------------------------------------*/
351 U_CAPI UBool U_EXPORT2
352 uprv_isNaN(double number
)
355 BitPatternConversion convertedNumber
;
356 convertedNumber
.d64
= number
;
357 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
358 return (UBool
)((convertedNumber
.i64
& U_INT64_MAX
) > gInf
.i64
);
360 #elif U_PLATFORM == U_PF_OS390
361 uint32_t highBits
= *(uint32_t*)u_topNBytesOfDouble(&number
,
363 uint32_t lowBits
= *(uint32_t*)u_bottomNBytesOfDouble(&number
,
366 return ((highBits
& 0x7F080000L
) == 0x7F080000L
) &&
367 (lowBits
== 0x00000000L
);
370 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
371 /* you'll need to replace this default implementation with what's correct*/
372 /* for your platform.*/
373 return number
!= number
;
377 U_CAPI UBool U_EXPORT2
378 uprv_isInfinite(double number
)
381 BitPatternConversion convertedNumber
;
382 convertedNumber
.d64
= number
;
383 /* Infinity is exactly 0x7FF0000000000000U. */
384 return (UBool
)((convertedNumber
.i64
& U_INT64_MAX
) == gInf
.i64
);
385 #elif U_PLATFORM == U_PF_OS390
386 uint32_t highBits
= *(uint32_t*)u_topNBytesOfDouble(&number
,
388 uint32_t lowBits
= *(uint32_t*)u_bottomNBytesOfDouble(&number
,
391 return ((highBits
& ~SIGN
) == 0x70FF0000L
) && (lowBits
== 0x00000000L
);
394 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
395 /* value, you'll need to replace this default implementation with what's*/
396 /* correct for your platform.*/
397 return number
== (2.0 * number
);
401 U_CAPI UBool U_EXPORT2
402 uprv_isPositiveInfinity(double number
)
404 #if IEEE_754 || U_PLATFORM == U_PF_OS390
405 return (UBool
)(number
> 0 && uprv_isInfinite(number
));
407 return uprv_isInfinite(number
);
411 U_CAPI UBool U_EXPORT2
412 uprv_isNegativeInfinity(double number
)
414 #if IEEE_754 || U_PLATFORM == U_PF_OS390
415 return (UBool
)(number
< 0 && uprv_isInfinite(number
));
418 uint32_t highBits
= *(uint32_t*)u_topNBytesOfDouble(&number
,
420 return((highBits
& SIGN
) && uprv_isInfinite(number
));
425 U_CAPI
double U_EXPORT2
428 #if IEEE_754 || U_PLATFORM == U_PF_OS390
431 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
432 /* you'll need to replace this default implementation with what's correct*/
433 /* for your platform.*/
438 U_CAPI
double U_EXPORT2
441 #if IEEE_754 || U_PLATFORM == U_PF_OS390
444 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
445 /* value, you'll need to replace this default implementation with what's*/
446 /* correct for your platform.*/
451 U_CAPI
double U_EXPORT2
457 U_CAPI
double U_EXPORT2
463 U_CAPI
double U_EXPORT2
466 return uprv_floor(x
+ 0.5);
469 U_CAPI
double U_EXPORT2
475 U_CAPI
double U_EXPORT2
476 uprv_modf(double x
, double* y
)
481 U_CAPI
double U_EXPORT2
482 uprv_fmod(double x
, double y
)
487 U_CAPI
double U_EXPORT2
488 uprv_pow(double x
, double y
)
490 /* This is declared as "double pow(double x, double y)" */
494 U_CAPI
double U_EXPORT2
495 uprv_pow10(int32_t x
)
497 return pow(10.0, (double)x
);
500 U_CAPI
double U_EXPORT2
501 uprv_fmax(double x
, double y
)
504 /* first handle NaN*/
505 if(uprv_isNaN(x
) || uprv_isNaN(y
))
506 return uprv_getNaN();
508 /* check for -0 and 0*/
509 if(x
== 0.0 && y
== 0.0 && u_signBit(x
))
514 /* this should work for all flt point w/o NaN and Inf special cases */
515 return (x
> y
? x
: y
);
518 U_CAPI
double U_EXPORT2
519 uprv_fmin(double x
, double y
)
522 /* first handle NaN*/
523 if(uprv_isNaN(x
) || uprv_isNaN(y
))
524 return uprv_getNaN();
526 /* check for -0 and 0*/
527 if(x
== 0.0 && y
== 0.0 && u_signBit(y
))
532 /* this should work for all flt point w/o NaN and Inf special cases */
533 return (x
> y
? y
: x
);
537 * Truncates the given double.
538 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
539 * This is different than calling floor() or ceil():
540 * floor(3.3) = 3, floor(-3.3) = -4
541 * ceil(3.3) = 4, ceil(-3.3) = -3
543 U_CAPI
double U_EXPORT2
547 /* handle error cases*/
549 return uprv_getNaN();
550 if(uprv_isInfinite(d
))
551 return uprv_getInfinity();
553 if(u_signBit(d
)) /* Signbit() picks up -0.0; d<0 does not. */
559 return d
>= 0 ? floor(d
) : ceil(d
);
565 * Return the largest positive number that can be represented by an integer
566 * type of arbitrary bit length.
568 U_CAPI
double U_EXPORT2
569 uprv_maxMantissa(void)
571 return pow(2.0, DBL_MANT_DIG
+ 1.0) - 1.0;
574 U_CAPI
double U_EXPORT2
580 U_CAPI
void * U_EXPORT2
581 uprv_maximumPtr(void * base
)
583 #if U_PLATFORM == U_PF_OS400
585 * With the provided function we should never be out of range of a given segment
586 * (a traditional/typical segment that is). Our segments have 5 bytes for the
587 * id and 3 bytes for the offset. The key is that the casting takes care of
588 * only retrieving the offset portion minus x1000. Hence, the smallest offset
589 * seen in a program is x001000 and when casted to an int would be 0.
590 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment.
592 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
593 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information).
594 * This function determines the activation based on the pointer that is passed in and
595 * calculates the appropriate maximum available size for
596 * each pointer type (TERASPACE and non-TERASPACE)
598 * Unlike other operating systems, the pointer model isn't determined at
599 * compile time on i5/OS.
601 if ((base
!= NULL
) && (_TESTPTR(base
, _C_TERASPACE_CHECK
))) {
602 /* if it is a TERASPACE pointer the max is 2GB - 4k */
603 return ((void *)(((char *)base
)-((uint32_t)(base
))+((uint32_t)0x7fffefff)));
605 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
606 return ((void *)(((char *)base
)-((uint32_t)(base
))+((uint32_t)0xffefff)));
609 return U_MAX_PTR(base
);
613 /*---------------------------------------------------------------------------
614 Platform-specific Implementations
615 Try these, and if they don't work on your platform, then special case your
616 platform with new implementations.
617 ---------------------------------------------------------------------------*/
619 /* Generic time zone layer -------------------------------------------------- */
621 /* Time zone utilities */
622 U_CAPI
void U_EXPORT2
628 /* no initialization*/
632 U_CAPI
int32_t U_EXPORT2
643 uprv_memcpy( &tmrec
, localtime(&t
), sizeof(tmrec
) );
644 #if U_PLATFORM != U_PF_IPHONE
645 UBool dst_checked
= (tmrec
.tm_isdst
!= 0); /* daylight savings time is checked*/
647 t1
= mktime(&tmrec
); /* local time in seconds*/
648 uprv_memcpy( &tmrec
, gmtime(&t
), sizeof(tmrec
) );
649 t2
= mktime(&tmrec
); /* GMT (or UTC) in seconds*/
652 #if U_PLATFORM != U_PF_IPHONE
653 /* imitate NT behaviour, which returns same timezone offset to GMT for
655 This does not work on all platforms. For instance, on glibc on Linux
656 and on Mac OS 10.5, tdiff calculated above remains the same
657 regardless of whether DST is in effect or not. iOS is another
658 platform where this does not work. Linux + glibc and Mac OS 10.5
659 have U_TIMEZONE defined so that this code is not reached.
668 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
669 some platforms need to have it declared here. */
671 #if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED)
672 /* RS6000 and others reject char **tzname. */
673 extern U_IMPORT
char *U_TZNAME
[];
676 #if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
677 /* These platforms are likely to use Olson timezone IDs. */
678 #define CHECK_LOCALTIME_LINK 1
679 #if U_PLATFORM_IS_DARWIN_BASED
681 #define TZZONEINFO (TZDIR "/")
682 #elif U_PLATFORM == U_PF_SOLARIS
683 #define TZDEFAULT "/etc/localtime"
684 #define TZZONEINFO "/usr/share/lib/zoneinfo/"
685 #define TZZONEINFO2 "../usr/share/lib/zoneinfo/"
686 #define TZ_ENV_CHECK "localtime"
688 #define TZDEFAULT "/etc/localtime"
689 #define TZZONEINFO "/usr/share/zoneinfo/"
692 #define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */
693 /* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
694 symlinked to /etc/localtime, which makes searchForTZFile return
695 'localtime' when it's the first match. */
696 #define TZFILE_SKIP2 "localtime"
697 #define SEARCH_TZFILE
698 #include <dirent.h> /* Needed to search through system timezone files */
700 static char gTimeZoneBuffer
[PATH_MAX
];
701 static char *gTimeZoneBufferPtr
= NULL
;
704 #if !U_PLATFORM_USES_ONLY_WIN32_API
705 #define isNonDigit(ch) (ch < '0' || '9' < ch)
706 static UBool
isValidOlsonID(const char *id
) {
709 /* Determine if this is something like Iceland (Olson ID)
710 or AST4ADT (non-Olson ID) */
711 while (id
[idx
] && isNonDigit(id
[idx
]) && id
[idx
] != ',') {
715 /* If we went through the whole string, then it might be okay.
716 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
717 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
718 The rest of the time it could be an Olson ID. George */
719 return (UBool
)(id
[idx
] == 0
720 || uprv_strcmp(id
, "PST8PDT") == 0
721 || uprv_strcmp(id
, "MST7MDT") == 0
722 || uprv_strcmp(id
, "CST6CDT") == 0
723 || uprv_strcmp(id
, "EST5EDT") == 0);
726 /* On some Unix-like OS, 'posix' subdirectory in
727 /usr/share/zoneinfo replicates the top-level contents. 'right'
728 subdirectory has the same set of files, but individual files
729 are different from those in the top-level directory or 'posix'
730 because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
732 When the first match for /etc/localtime is in either of them
733 (usually in posix because 'right' has different file contents),
734 or TZ environment variable points to one of them, createTimeZone
735 fails because, say, 'posix/America/New_York' is not an Olson
736 timezone id ('America/New_York' is). So, we have to skip
737 'posix/' and 'right/' at the beginning. */
738 static void skipZoneIDPrefix(const char** id
) {
739 if (uprv_strncmp(*id
, "posix/", 6) == 0
740 || uprv_strncmp(*id
, "right/", 6) == 0)
747 #if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API
749 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
750 typedef struct OffsetZoneMapping
{
751 int32_t offsetSeconds
;
752 int32_t daylightType
; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/
758 enum { U_DAYLIGHT_NONE
=0,U_DAYLIGHT_JUNE
=1,U_DAYLIGHT_DECEMBER
=2 };
761 This list tries to disambiguate a set of abbreviated timezone IDs and offsets
762 and maps it to an Olson ID.
763 Before adding anything to this list, take a look at
764 icu/source/tools/tzcode/tz.alias
765 Sometimes no daylight savings (0) is important to define due to aliases.
766 This list can be tested with icu/source/test/compat/tzone.pl
767 More values could be added to daylightType to increase precision.
769 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS
[] = {
770 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
771 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
772 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
773 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
774 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
775 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
776 {-36000, 2, "EST", "EST", "Australia/Sydney"},
777 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
778 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
779 {-34200, 2, "CST", "CST", "Australia/South"},
780 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
781 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
782 {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
783 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
784 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
785 {-28800, 2, "WST", "WST", "Australia/West"},
786 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
787 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
788 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
789 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
790 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
791 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
792 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
793 {-14400, 1, "AZT", "AZST", "Asia/Baku"},
794 {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
795 {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
796 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
797 {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
798 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
799 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
800 {-3600, 0, "CET", "WEST", "Africa/Algiers"},
801 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
802 {0, 1, "GMT", "IST", "Europe/Dublin"},
803 {0, 1, "GMT", "BST", "Europe/London"},
804 {0, 0, "WET", "WEST", "Africa/Casablanca"},
805 {0, 0, "WET", "WET", "Africa/El_Aaiun"},
806 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
807 {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
808 {10800, 1, "PMST", "PMDT", "America/Miquelon"},
809 {10800, 2, "UYT", "UYST", "America/Montevideo"},
810 {10800, 1, "WGT", "WGST", "America/Godthab"},
811 {10800, 2, "BRT", "BRST", "Brazil/East"},
812 {12600, 1, "NST", "NDT", "America/St_Johns"},
813 {14400, 1, "AST", "ADT", "Canada/Atlantic"},
814 {14400, 2, "AMT", "AMST", "America/Cuiaba"},
815 {14400, 2, "CLT", "CLST", "Chile/Continental"},
816 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
817 {14400, 2, "PYT", "PYST", "America/Asuncion"},
818 {18000, 1, "CST", "CDT", "America/Havana"},
819 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
820 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
821 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
822 {21600, 0, "CST", "CDT", "America/Guatemala"},
823 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
824 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
825 {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
826 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
827 {32400, 1, "AKST", "AKDT", "US/Alaska"},
828 {36000, 1, "HAST", "HADT", "US/Aleutian"}
831 /*#define DEBUG_TZNAME*/
833 static const char* remapShortTimeZone(const char *stdID
, const char *dstID
, int32_t daylightType
, int32_t offset
)
837 fprintf(stderr
, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID
, dstID
, daylightType
, offset
);
839 for (idx
= 0; idx
< UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS
); idx
++)
841 if (offset
== OFFSET_ZONE_MAPPINGS
[idx
].offsetSeconds
842 && daylightType
== OFFSET_ZONE_MAPPINGS
[idx
].daylightType
843 && strcmp(OFFSET_ZONE_MAPPINGS
[idx
].stdID
, stdID
) == 0
844 && strcmp(OFFSET_ZONE_MAPPINGS
[idx
].dstID
, dstID
) == 0)
846 return OFFSET_ZONE_MAPPINGS
[idx
].olsonID
;
854 #define MAX_READ_SIZE 512
856 typedef struct DefaultTZInfo
{
857 char* defaultTZBuffer
;
858 int64_t defaultTZFileSize
;
859 FILE* defaultTZFilePtr
;
860 UBool defaultTZstatus
;
861 int32_t defaultTZPosition
;
865 * This method compares the two files given to see if they are a match.
866 * It is currently use to compare two TZ files.
868 static UBool
compareBinaryFiles(const char* defaultTZFileName
, const char* TZFileName
, DefaultTZInfo
* tzInfo
) {
871 int64_t sizeFileLeft
;
872 int32_t sizeFileRead
;
873 int32_t sizeFileToRead
;
874 char bufferFile
[MAX_READ_SIZE
];
877 if (tzInfo
->defaultTZFilePtr
== NULL
) {
878 tzInfo
->defaultTZFilePtr
= fopen(defaultTZFileName
, "r");
880 file
= fopen(TZFileName
, "r");
882 tzInfo
->defaultTZPosition
= 0; /* reset position to begin search */
884 if (file
!= NULL
&& tzInfo
->defaultTZFilePtr
!= NULL
) {
885 /* First check that the file size are equal. */
886 if (tzInfo
->defaultTZFileSize
== 0) {
887 fseek(tzInfo
->defaultTZFilePtr
, 0, SEEK_END
);
888 tzInfo
->defaultTZFileSize
= ftell(tzInfo
->defaultTZFilePtr
);
890 fseek(file
, 0, SEEK_END
);
891 sizeFile
= ftell(file
);
892 sizeFileLeft
= sizeFile
;
894 if (sizeFile
!= tzInfo
->defaultTZFileSize
) {
897 /* Store the data from the files in seperate buffers and
898 * compare each byte to determine equality.
900 if (tzInfo
->defaultTZBuffer
== NULL
) {
901 rewind(tzInfo
->defaultTZFilePtr
);
902 tzInfo
->defaultTZBuffer
= (char*)uprv_malloc(sizeof(char) * tzInfo
->defaultTZFileSize
);
903 sizeFileRead
= fread(tzInfo
->defaultTZBuffer
, 1, tzInfo
->defaultTZFileSize
, tzInfo
->defaultTZFilePtr
);
906 while(sizeFileLeft
> 0) {
907 uprv_memset(bufferFile
, 0, MAX_READ_SIZE
);
908 sizeFileToRead
= sizeFileLeft
< MAX_READ_SIZE
? sizeFileLeft
: MAX_READ_SIZE
;
910 sizeFileRead
= fread(bufferFile
, 1, sizeFileToRead
, file
);
911 if (memcmp(tzInfo
->defaultTZBuffer
+ tzInfo
->defaultTZPosition
, bufferFile
, sizeFileRead
) != 0) {
915 sizeFileLeft
-= sizeFileRead
;
916 tzInfo
->defaultTZPosition
+= sizeFileRead
;
931 /* dirent also lists two entries: "." and ".." that we can safely ignore. */
934 static UBool U_CALLCONV
putil_cleanup(void);
935 static CharString
*gSearchTZFileResult
= NULL
;
938 * This method recursively traverses the directory given for a matching TZ file and returns the first match.
939 * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results.
941 static char* searchForTZFile(const char* path
, DefaultTZInfo
* tzInfo
) {
942 DIR* dirp
= opendir(path
);
944 struct dirent
* dirEntry
= NULL
;
951 if (gSearchTZFileResult
== NULL
) {
952 gSearchTZFileResult
= new CharString
;
953 if (gSearchTZFileResult
== NULL
) {
956 ucln_common_registerCleanup(UCLN_COMMON_PUTIL
, putil_cleanup
);
959 /* Save the current path */
960 UErrorCode status
= U_ZERO_ERROR
;
961 CharString
curpath(path
, -1, status
);
962 if (U_FAILURE(status
)) {
966 /* Check each entry in the directory. */
967 while((dirEntry
= readdir(dirp
)) != NULL
) {
968 const char* dirName
= dirEntry
->d_name
;
969 if (uprv_strcmp(dirName
, SKIP1
) != 0 && uprv_strcmp(dirName
, SKIP2
) != 0) {
970 /* Create a newpath with the new entry to test each entry in the directory. */
971 CharString
newpath(curpath
, status
);
972 newpath
.append(dirName
, -1, status
);
973 if (U_FAILURE(status
)) {
977 if ((subDirp
= opendir(newpath
.data())) != NULL
) {
978 /* If this new path is a directory, make a recursive call with the newpath. */
980 newpath
.append('/', status
);
981 if (U_FAILURE(status
)) {
984 result
= searchForTZFile(newpath
.data(), tzInfo
);
986 Have to get out here. Otherwise, we'd keep looking
987 and return the first match in the top-level directory
988 if there's a match in the top-level. If not, this function
989 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
990 It worked without this in most cases because we have a fallback of calling
991 localtime_r to figure out the default timezone.
995 } else if (uprv_strcmp(TZFILE_SKIP
, dirName
) != 0 && uprv_strcmp(TZFILE_SKIP2
, dirName
) != 0) {
996 if(compareBinaryFiles(TZDEFAULT
, newpath
.data(), tzInfo
)) {
997 int32_t amountToSkip
= sizeof(TZZONEINFO
) - 1;
998 if (amountToSkip
> newpath
.length()) {
999 amountToSkip
= newpath
.length();
1001 const char* zoneid
= newpath
.data() + amountToSkip
;
1002 skipZoneIDPrefix(&zoneid
);
1003 gSearchTZFileResult
->clear();
1004 gSearchTZFileResult
->append(zoneid
, -1, status
);
1005 if (U_FAILURE(status
)) {
1008 result
= gSearchTZFileResult
->data();
1009 /* Get out after the first one found. */
1020 U_CAPI
void U_EXPORT2
1021 uprv_tzname_clear_cache()
1023 #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1024 gTimeZoneBufferPtr
= NULL
;
1028 // With the Universal Windows Platform we can just ask Windows for the name
1029 #if U_PLATFORM_HAS_WINUWP_API
1030 U_CAPI
const char* U_EXPORT2
1031 uprv_getWindowsTimeZone()
1033 // Get default Windows timezone.
1034 ComPtr
<IInspectable
> calendar
;
1035 HRESULT hr
= RoActivateInstance(
1036 HStringReference(RuntimeClass_Windows_Globalization_Calendar
).Get(),
1040 ComPtr
<ABI::Windows::Globalization::ITimeZoneOnCalendar
> timezone
;
1041 hr
= calendar
.As(&timezone
);
1044 HString timezoneString
;
1045 hr
= timezone
->GetTimeZone(timezoneString
.GetAddressOf());
1048 int32_t length
= wcslen(timezoneString
.GetRawBuffer(NULL
));
1049 char* asciiId
= (char*)uprv_calloc(length
+ 1, sizeof(char));
1050 if (asciiId
!= nullptr)
1052 u_UCharsToChars((UChar
*)timezoneString
.GetRawBuffer(NULL
), asciiId
, length
);
1064 U_CAPI
const char* U_EXPORT2
1067 const char *tzid
= NULL
;
1068 #if U_PLATFORM_USES_ONLY_WIN32_API
1069 #if U_PLATFORM_HAS_WINUWP_API > 0
1070 tzid
= uprv_getWindowsTimeZone();
1072 tzid
= uprv_detectWindowsTimeZone();
1080 // The return value is free'd in timezone.cpp on Windows because
1081 // the other code path returns a pointer to a heap location.
1082 // If we don't have a name already, then tzname wouldn't be any
1083 // better, so just fall back.
1084 return uprv_strdup("Etc/UTC");
1089 /*#if U_PLATFORM_IS_DARWIN_BASED
1092 tzid = getenv("TZFILE");
1098 /* This code can be temporarily disabled to test tzname resolution later on. */
1099 #ifndef DEBUG_TZNAME
1100 tzid
= getenv("TZ");
1101 if (tzid
!= NULL
&& isValidOlsonID(tzid
)
1102 #if U_PLATFORM == U_PF_SOLARIS
1103 /* When TZ equals localtime on Solaris, check the /etc/localtime file. */
1104 && uprv_strcmp(tzid
, TZ_ENV_CHECK
) != 0
1107 /* The colon forces tzset() to treat the remainder as zoneinfo path */
1108 if (tzid
[0] == ':') {
1111 /* This might be a good Olson ID. */
1112 skipZoneIDPrefix(&tzid
);
1115 /* else U_TZNAME will give a better result. */
1118 #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1119 /* Caller must handle threading issues */
1120 if (gTimeZoneBufferPtr
== NULL
) {
1122 This is a trick to look at the name of the link to get the Olson ID
1123 because the tzfile contents is underspecified.
1124 This isn't guaranteed to work because it may not be a symlink.
1126 int32_t ret
= (int32_t)readlink(TZDEFAULT
, gTimeZoneBuffer
, sizeof(gTimeZoneBuffer
)-1);
1128 int32_t tzZoneInfoLen
= uprv_strlen(TZZONEINFO
);
1129 gTimeZoneBuffer
[ret
] = 0;
1130 if (uprv_strncmp(gTimeZoneBuffer
, TZZONEINFO
, tzZoneInfoLen
) == 0
1131 && isValidOlsonID(gTimeZoneBuffer
+ tzZoneInfoLen
))
1133 return (gTimeZoneBufferPtr
= gTimeZoneBuffer
+ tzZoneInfoLen
);
1135 #if U_PLATFORM == U_PF_SOLARIS
1138 tzZoneInfoLen
= uprv_strlen(TZZONEINFO2
);
1139 if (uprv_strncmp(gTimeZoneBuffer
, TZZONEINFO2
, tzZoneInfoLen
) == 0
1140 && isValidOlsonID(gTimeZoneBuffer
+ tzZoneInfoLen
))
1142 return (gTimeZoneBufferPtr
= gTimeZoneBuffer
+ tzZoneInfoLen
);
1147 #if defined(SEARCH_TZFILE)
1148 DefaultTZInfo
* tzInfo
= (DefaultTZInfo
*)uprv_malloc(sizeof(DefaultTZInfo
));
1149 if (tzInfo
!= NULL
) {
1150 tzInfo
->defaultTZBuffer
= NULL
;
1151 tzInfo
->defaultTZFileSize
= 0;
1152 tzInfo
->defaultTZFilePtr
= NULL
;
1153 tzInfo
->defaultTZstatus
= FALSE
;
1154 tzInfo
->defaultTZPosition
= 0;
1156 gTimeZoneBufferPtr
= searchForTZFile(TZZONEINFO
, tzInfo
);
1158 /* Free previously allocated memory */
1159 if (tzInfo
->defaultTZBuffer
!= NULL
) {
1160 uprv_free(tzInfo
->defaultTZBuffer
);
1162 if (tzInfo
->defaultTZFilePtr
!= NULL
) {
1163 fclose(tzInfo
->defaultTZFilePtr
);
1168 if (gTimeZoneBufferPtr
!= NULL
&& isValidOlsonID(gTimeZoneBufferPtr
)) {
1169 return gTimeZoneBufferPtr
;
1175 return gTimeZoneBufferPtr
;
1181 #if U_PLATFORM_USES_ONLY_WIN32_API
1182 /* The return value is free'd in timezone.cpp on Windows because
1183 * the other code path returns a pointer to a heap location. */
1184 return uprv_strdup(U_TZNAME
[n
]);
1187 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1188 So we remap the abbreviation to an olson ID.
1190 Since Windows exposes a little more timezone information,
1191 we normally don't use this code on Windows because
1192 uprv_detectWindowsTimeZone should have already given the correct answer.
1195 struct tm juneSol
, decemberSol
;
1197 static const time_t juneSolstice
=1182478260; /*2007-06-21 18:11 UT*/
1198 static const time_t decemberSolstice
=1198332540; /*2007-12-22 06:09 UT*/
1200 /* This probing will tell us when daylight savings occurs. */
1201 localtime_r(&juneSolstice
, &juneSol
);
1202 localtime_r(&decemberSolstice
, &decemberSol
);
1203 if(decemberSol
.tm_isdst
> 0) {
1204 daylightType
= U_DAYLIGHT_DECEMBER
;
1205 } else if(juneSol
.tm_isdst
> 0) {
1206 daylightType
= U_DAYLIGHT_JUNE
;
1208 daylightType
= U_DAYLIGHT_NONE
;
1210 tzid
= remapShortTimeZone(U_TZNAME
[0], U_TZNAME
[1], daylightType
, uprv_timezone());
1222 /* Get and set the ICU data directory --------------------------------------- */
1224 static icu::UInitOnce gDataDirInitOnce
= U_INITONCE_INITIALIZER
;
1225 static char *gDataDirectory
= NULL
;
1227 UInitOnce gTimeZoneFilesInitOnce
= U_INITONCE_INITIALIZER
;
1228 static CharString
*gTimeZoneFilesDirectory
= NULL
;
1230 #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1231 static char *gCorrectedPOSIXLocale
= NULL
; /* Sometimes heap allocated */
1232 static bool gCorrectedPOSIXLocaleHeapAllocated
= false;
1235 static UBool U_CALLCONV
putil_cleanup(void)
1237 if (gDataDirectory
&& *gDataDirectory
) {
1238 uprv_free(gDataDirectory
);
1240 gDataDirectory
= NULL
;
1241 gDataDirInitOnce
.reset();
1243 delete gTimeZoneFilesDirectory
;
1244 gTimeZoneFilesDirectory
= NULL
;
1245 gTimeZoneFilesInitOnce
.reset();
1247 #ifdef SEARCH_TZFILE
1248 delete gSearchTZFileResult
;
1249 gSearchTZFileResult
= NULL
;
1252 #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1253 if (gCorrectedPOSIXLocale
&& gCorrectedPOSIXLocaleHeapAllocated
) {
1254 uprv_free(gCorrectedPOSIXLocale
);
1255 gCorrectedPOSIXLocale
= NULL
;
1256 gCorrectedPOSIXLocaleHeapAllocated
= false;
1263 * Set the data directory.
1264 * Make a copy of the passed string, and set the global data dir to point to it.
1266 U_CAPI
void U_EXPORT2
1267 u_setDataDirectory(const char *directory
) {
1271 if(directory
==NULL
|| *directory
==0) {
1272 /* A small optimization to prevent the malloc and copy when the
1273 shared library is used, and this is a way to make sure that NULL
1276 newDataDir
= (char *)"";
1279 length
=(int32_t)uprv_strlen(directory
);
1280 newDataDir
= (char *)uprv_malloc(length
+ 2);
1281 /* Exit out if newDataDir could not be created. */
1282 if (newDataDir
== NULL
) {
1285 uprv_strcpy(newDataDir
, directory
);
1287 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1290 while(p
= uprv_strchr(newDataDir
, U_FILE_ALT_SEP_CHAR
)) {
1291 *p
= U_FILE_SEP_CHAR
;
1297 if (gDataDirectory
&& *gDataDirectory
) {
1298 uprv_free(gDataDirectory
);
1300 gDataDirectory
= newDataDir
;
1301 ucln_common_registerCleanup(UCLN_COMMON_PUTIL
, putil_cleanup
);
1304 U_CAPI UBool U_EXPORT2
1305 uprv_pathIsAbsolute(const char *path
)
1307 if(!path
|| !*path
) {
1311 if(*path
== U_FILE_SEP_CHAR
) {
1315 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1316 if(*path
== U_FILE_ALT_SEP_CHAR
) {
1321 #if U_PLATFORM_USES_ONLY_WIN32_API
1322 if( (((path
[0] >= 'A') && (path
[0] <= 'Z')) ||
1323 ((path
[0] >= 'a') && (path
[0] <= 'z'))) &&
1332 /* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1333 until some client wrapper makefiles are updated */
1334 #if U_PLATFORM_IS_DARWIN_BASED && TARGET_IPHONE_SIMULATOR
1335 # if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1336 # define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1340 static void U_CALLCONV
dataDirectoryInitFn() {
1341 /* If we already have the directory, then return immediately. Will happen if user called
1342 * u_setDataDirectory().
1344 if (gDataDirectory
) {
1348 const char *path
= NULL
;
1349 #if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1350 char datadir_path_buffer
[PATH_MAX
];
1354 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1355 override ICU's data with the ICU_DATA environment variable. This prevents
1356 problems where multiple custom copies of ICU's specific version of data
1357 are installed on a system. Either the application must define the data
1358 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1359 ICU, set the data with udata_setCommonData or trust that all of the
1360 required data is contained in ICU's data library that contains
1361 the entry point defined by U_ICUDATA_ENTRY_POINT.
1363 There may also be some platforms where environment variables
1366 # if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1367 /* First try to get the environment variable */
1368 # if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP does not support getenv
1369 path
=getenv("ICU_DATA");
1373 /* ICU_DATA_DIR may be set as a compile option.
1374 * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1375 * and is used only when data is built in archive mode eliminating the need
1376 * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1377 * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1378 * set their own path.
1380 #if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
1381 if(path
==NULL
|| *path
==0) {
1382 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1383 const char *prefix
= getenv(ICU_DATA_DIR_PREFIX_ENV_VAR
);
1385 # ifdef ICU_DATA_DIR
1388 path
=U_ICU_DATA_DEFAULT_DIR
;
1390 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1391 if (prefix
!= NULL
) {
1392 snprintf(datadir_path_buffer
, PATH_MAX
, "%s%s", prefix
, path
);
1393 path
=datadir_path_buffer
;
1399 #if defined(ICU_DATA_DIR_WINDOWS) && U_PLATFORM_HAS_WINUWP_API != 0
1400 // Use data from the %windir%\globalization\icu directory
1401 // This is only available if ICU is built as a system component
1402 char datadir_path_buffer
[MAX_PATH
];
1403 UINT length
= GetWindowsDirectoryA(datadir_path_buffer
, UPRV_LENGTHOF(datadir_path_buffer
));
1404 if (length
> 0 && length
< (UPRV_LENGTHOF(datadir_path_buffer
) - sizeof(ICU_DATA_DIR_WINDOWS
) - 1))
1406 if (datadir_path_buffer
[length
- 1] != '\\')
1408 datadir_path_buffer
[length
++] = '\\';
1409 datadir_path_buffer
[length
] = '\0';
1412 if ((length
+ 1 + sizeof(ICU_DATA_DIR_WINDOWS
)) < UPRV_LENGTHOF(datadir_path_buffer
))
1414 uprv_strcat(datadir_path_buffer
, ICU_DATA_DIR_WINDOWS
);
1415 path
= datadir_path_buffer
;
1421 /* It looks really bad, set it to something. */
1422 #if U_PLATFORM_HAS_WIN32_API
1423 // Windows UWP will require icudtl.dat file in same directory as icuuc.dll
1430 u_setDataDirectory(path
);
1434 U_CAPI
const char * U_EXPORT2
1435 u_getDataDirectory(void) {
1436 umtx_initOnce(gDataDirInitOnce
, &dataDirectoryInitFn
);
1437 return gDataDirectory
;
1440 static void setTimeZoneFilesDir(const char *path
, UErrorCode
&status
) {
1441 if (U_FAILURE(status
)) {
1444 gTimeZoneFilesDirectory
->clear();
1445 gTimeZoneFilesDirectory
->append(path
, status
);
1446 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1447 char *p
= gTimeZoneFilesDirectory
->data();
1448 while (p
= uprv_strchr(p
, U_FILE_ALT_SEP_CHAR
)) {
1449 *p
= U_FILE_SEP_CHAR
;
1454 #if U_PLATFORM_IMPLEMENTS_POSIX
1455 #include <sys/stat.h>
1456 #if defined(U_TIMEZONE_FILES_DIR)
1457 const char tzdirbuf
[] = U_TIMEZONE_FILES_DIR
;
1458 enum { kTzfilenamebufLen
= UPRV_LENGTHOF(tzdirbuf
) + 24 }; // extra room for "/icutz44l.dat" or "/zoneinfo64.res"
1462 #define TO_STRING(x) TO_STRING_2(x)
1463 #define TO_STRING_2(x) #x
1465 static void U_CALLCONV
TimeZoneDataDirInitFn(UErrorCode
&status
) {
1466 U_ASSERT(gTimeZoneFilesDirectory
== NULL
);
1467 ucln_common_registerCleanup(UCLN_COMMON_PUTIL
, putil_cleanup
);
1468 gTimeZoneFilesDirectory
= new CharString();
1469 if (gTimeZoneFilesDirectory
== NULL
) {
1470 status
= U_MEMORY_ALLOCATION_ERROR
;
1473 UBool usingUTzFilesDir
= FALSE
;
1474 #if U_PLATFORM_HAS_WINUWP_API == 0
1475 const char *dir
= getenv("ICU_TIMEZONE_FILES_DIR");
1477 // TODO: UWP does not support alternate timezone data directories at this time
1478 const char *dir
= "";
1479 #endif // U_PLATFORM_HAS_WINUWP_API
1480 #if defined(U_TIMEZONE_FILES_DIR)
1482 // dir = TO_STRING(U_TIMEZONE_FILES_DIR);
1483 // Not sure why the above was done for this path only;
1484 // it preserves unwanted quotes.
1486 usingUTzFilesDir
= TRUE
;
1489 #if U_PLATFORM_IMPLEMENTS_POSIX
1492 if (stat(dir
, &buf
) != 0) {
1495 #if defined(U_TIMEZONE_FILES_DIR)
1496 else if (usingUTzFilesDir
) {
1497 char tzfilenamebuf
[kTzfilenamebufLen
];
1498 uprv_strcpy(tzfilenamebuf
, tzdirbuf
);
1499 uprv_strcat(tzfilenamebuf
, U_FILE_SEP_STRING
);
1500 #if defined(U_TIMEZONE_PACKAGE)
1501 uprv_strcat(tzfilenamebuf
, U_TIMEZONE_PACKAGE
);
1502 uprv_strcat(tzfilenamebuf
, ".dat");
1504 uprv_strcat(tzfilenamebuf
, "zoneinfo64.res");
1506 if (stat(tzfilenamebuf
, &buf
) != 0) {
1510 #endif /* defined(U_TIMEZONE_FILES_DIR) */
1512 #endif /* U_PLATFORM_IMPLEMENTS_POSIX */
1516 setTimeZoneFilesDir(dir
, status
);
1520 U_CAPI
const char * U_EXPORT2
1521 u_getTimeZoneFilesDirectory(UErrorCode
*status
) {
1522 umtx_initOnce(gTimeZoneFilesInitOnce
, &TimeZoneDataDirInitFn
, *status
);
1523 return U_SUCCESS(*status
) ? gTimeZoneFilesDirectory
->data() : "";
1526 U_CAPI
void U_EXPORT2
1527 u_setTimeZoneFilesDirectory(const char *path
, UErrorCode
*status
) {
1528 umtx_initOnce(gTimeZoneFilesInitOnce
, &TimeZoneDataDirInitFn
, *status
);
1529 setTimeZoneFilesDir(path
, *status
);
1531 // Note: this function does some extra churn, first setting based on the
1532 // environment, then immediately replacing with the value passed in.
1533 // The logic is simpler that way, and performance shouldn't be an issue.
1538 /* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1539 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1540 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1542 static const char *uprv_getPOSIXIDForCategory(int category
)
1544 const char* posixID
= NULL
;
1545 if (category
== LC_MESSAGES
|| category
== LC_CTYPE
) {
1547 * On Solaris two different calls to setlocale can result in
1548 * different values. Only get this value once.
1550 * We must check this first because an application can set this.
1552 * LC_ALL can't be used because it's platform dependent. The LANG
1553 * environment variable seems to affect LC_CTYPE variable by default.
1554 * Here is what setlocale(LC_ALL, NULL) can return.
1555 * HPUX can return 'C C C C C C C'
1556 * Solaris can return /en_US/C/C/C/C/C on the second try.
1557 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1559 * The default codepage detection also needs to use LC_CTYPE.
1561 * Do not call setlocale(LC_*, "")! Using an empty string instead
1562 * of NULL, will modify the libc behavior.
1564 posixID
= setlocale(category
, NULL
);
1566 || (uprv_strcmp("C", posixID
) == 0)
1567 || (uprv_strcmp("POSIX", posixID
) == 0))
1569 /* Maybe we got some garbage. Try something more reasonable */
1570 posixID
= getenv("LC_ALL");
1571 /* Solaris speaks POSIX - See IEEE Std 1003.1-2008
1572 * This is needed to properly handle empty env. variables
1574 #if U_PLATFORM == U_PF_SOLARIS
1575 if ((posixID
== 0) || (posixID
[0] == '\0')) {
1576 posixID
= getenv(category
== LC_MESSAGES
? "LC_MESSAGES" : "LC_CTYPE");
1577 if ((posixID
== 0) || (posixID
[0] == '\0')) {
1580 posixID
= getenv(category
== LC_MESSAGES
? "LC_MESSAGES" : "LC_CTYPE");
1583 posixID
= getenv("LANG");
1589 || (uprv_strcmp("C", posixID
) == 0)
1590 || (uprv_strcmp("POSIX", posixID
) == 0))
1592 /* Nothing worked. Give it a nice POSIX default value. */
1593 posixID
= "en_US_POSIX";
1598 /* Return just the POSIX id for the default locale, whatever happens to be in
1599 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1601 static const char *uprv_getPOSIXIDForDefaultLocale(void)
1603 static const char* posixID
= NULL
;
1605 posixID
= uprv_getPOSIXIDForCategory(LC_MESSAGES
);
1610 #if !U_CHARSET_IS_UTF8
1611 /* Return just the POSIX id for the default codepage, whatever happens to be in
1612 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1614 static const char *uprv_getPOSIXIDForDefaultCodepage(void)
1616 static const char* posixID
= NULL
;
1618 posixID
= uprv_getPOSIXIDForCategory(LC_CTYPE
);
1625 /* NOTE: The caller should handle thread safety */
1626 U_CAPI
const char* U_EXPORT2
1627 uprv_getDefaultLocaleID()
1631 Note that: (a '!' means the ID is improper somehow)
1632 LC_ALL ----> default_loc codepage
1633 --------------------------------------------------------
1638 ab_CD.EF@GH ab_CD_GH EF
1640 Some 'improper' ways to do the same as above:
1641 ! ab_CD@GH.EF ab_CD_GH EF
1642 ! ab_CD.EF@GH.IJ ab_CD_GH EF
1643 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
1648 The variant cannot have dots in it.
1649 The 'rightmost' variant (@xxx) wins.
1650 The leftmost codepage (.xxx) wins.
1652 char *correctedPOSIXLocale
= 0;
1653 const char* posixID
= uprv_getPOSIXIDForDefaultLocale();
1658 /* Format: (no spaces)
1659 ll [ _CC ] [ . MM ] [ @ VV]
1661 l = lang, C = ctry, M = charmap, V = variant
1664 if (gCorrectedPOSIXLocale
!= NULL
) {
1665 return gCorrectedPOSIXLocale
;
1668 if ((p
= uprv_strchr(posixID
, '.')) != NULL
) {
1669 /* assume new locale can't be larger than old one? */
1670 correctedPOSIXLocale
= static_cast<char *>(uprv_malloc(uprv_strlen(posixID
)+1));
1671 /* Exit on memory allocation error. */
1672 if (correctedPOSIXLocale
== NULL
) {
1675 uprv_strncpy(correctedPOSIXLocale
, posixID
, p
-posixID
);
1676 correctedPOSIXLocale
[p
-posixID
] = 0;
1678 /* do not copy after the @ */
1679 if ((p
= uprv_strchr(correctedPOSIXLocale
, '@')) != NULL
) {
1680 correctedPOSIXLocale
[p
-correctedPOSIXLocale
] = 0;
1684 /* Note that we scan the *uncorrected* ID. */
1685 if ((p
= uprv_strrchr(posixID
, '@')) != NULL
) {
1686 if (correctedPOSIXLocale
== NULL
) {
1687 correctedPOSIXLocale
= static_cast<char *>(uprv_malloc(uprv_strlen(posixID
)+1));
1688 /* Exit on memory allocation error. */
1689 if (correctedPOSIXLocale
== NULL
) {
1692 uprv_strncpy(correctedPOSIXLocale
, posixID
, p
-posixID
);
1693 correctedPOSIXLocale
[p
-posixID
] = 0;
1697 /* Take care of any special cases here.. */
1698 if (!uprv_strcmp(p
, "nynorsk")) {
1700 /* Don't worry about no__NY. In practice, it won't appear. */
1703 if (uprv_strchr(correctedPOSIXLocale
,'_') == NULL
) {
1704 uprv_strcat(correctedPOSIXLocale
, "__"); /* aa@b -> aa__b */
1707 uprv_strcat(correctedPOSIXLocale
, "_"); /* aa_CC@b -> aa_CC_b */
1710 if ((q
= uprv_strchr(p
, '.')) != NULL
) {
1711 /* How big will the resulting string be? */
1712 len
= (int32_t)(uprv_strlen(correctedPOSIXLocale
) + (q
-p
));
1713 uprv_strncat(correctedPOSIXLocale
, p
, q
-p
);
1714 correctedPOSIXLocale
[len
] = 0;
1717 /* Anything following the @ sign */
1718 uprv_strcat(correctedPOSIXLocale
, p
);
1721 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1722 * How about 'russian' -> 'ru'?
1723 * Many of the other locales using ISO codes will be handled by the
1724 * canonicalization functions in uloc_getDefault.
1728 /* Was a correction made? */
1729 if (correctedPOSIXLocale
!= NULL
) {
1730 posixID
= correctedPOSIXLocale
;
1733 /* copy it, just in case the original pointer goes away. See j2395 */
1734 correctedPOSIXLocale
= (char *)uprv_malloc(uprv_strlen(posixID
) + 1);
1735 /* Exit on memory allocation error. */
1736 if (correctedPOSIXLocale
== NULL
) {
1739 posixID
= uprv_strcpy(correctedPOSIXLocale
, posixID
);
1742 if (gCorrectedPOSIXLocale
== NULL
) {
1743 gCorrectedPOSIXLocale
= correctedPOSIXLocale
;
1744 gCorrectedPOSIXLocaleHeapAllocated
= true;
1745 ucln_common_registerCleanup(UCLN_COMMON_PUTIL
, putil_cleanup
);
1746 correctedPOSIXLocale
= NULL
;
1749 if (correctedPOSIXLocale
!= NULL
) { /* Was already set - clean up. */
1750 uprv_free(correctedPOSIXLocale
);
1755 #elif U_PLATFORM_USES_ONLY_WIN32_API
1756 #define POSIX_LOCALE_CAPACITY 64
1757 UErrorCode status
= U_ZERO_ERROR
;
1758 char *correctedPOSIXLocale
= 0;
1760 // If we have already figured this out just use the cached value
1761 if (gCorrectedPOSIXLocale
!= NULL
) {
1762 return gCorrectedPOSIXLocale
;
1765 // No cached value, need to determine the current value
1766 static WCHAR windowsLocale
[LOCALE_NAME_MAX_LENGTH
];
1767 #if U_PLATFORM_HAS_WINUWP_API == 0
1768 // If not a Universal Windows App, we'll need user default language.
1769 // Vista and above should use Locale Names instead of LCIDs
1770 int length
= GetUserDefaultLocaleName(windowsLocale
, UPRV_LENGTHOF(windowsLocale
));
1772 // In a UWP app, we want the top language that the application and user agreed upon
1773 ComPtr
<ABI::Windows::Foundation::Collections::IVectorView
<HSTRING
>> languageList
;
1775 ComPtr
<ABI::Windows::Globalization::IApplicationLanguagesStatics
> applicationLanguagesStatics
;
1776 HRESULT hr
= GetActivationFactory(
1777 HStringReference(RuntimeClass_Windows_Globalization_ApplicationLanguages
).Get(),
1778 &applicationLanguagesStatics
);
1781 hr
= applicationLanguagesStatics
->get_Languages(&languageList
);
1786 // If there is no application context, then use the top language from the user language profile
1787 ComPtr
<ABI::Windows::System::UserProfile::IGlobalizationPreferencesStatics
> globalizationPreferencesStatics
;
1788 hr
= GetActivationFactory(
1789 HStringReference(RuntimeClass_Windows_System_UserProfile_GlobalizationPreferences
).Get(),
1790 &globalizationPreferencesStatics
);
1793 hr
= globalizationPreferencesStatics
->get_Languages(&languageList
);
1797 // We have a list of languages, ICU knows one, so use the top one for our locale
1798 HString topLanguage
;
1801 hr
= languageList
->GetAt(0, topLanguage
.GetAddressOf());
1806 // Unexpected, use en-US by default
1807 if (gCorrectedPOSIXLocale
== NULL
) {
1808 gCorrectedPOSIXLocale
= "en_US";
1811 return gCorrectedPOSIXLocale
;
1814 // ResolveLocaleName will get a likely subtags form consistent with Windows behavior.
1815 int length
= ResolveLocaleName(topLanguage
.GetRawBuffer(NULL
), windowsLocale
, UPRV_LENGTHOF(windowsLocale
));
1817 // Now we should have a Windows locale name that needs converted to the POSIX style,
1820 // First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.)
1821 char modifiedWindowsLocale
[LOCALE_NAME_MAX_LENGTH
];
1824 for (i
= 0; i
< UPRV_LENGTHOF(modifiedWindowsLocale
); i
++)
1826 if (windowsLocale
[i
] == '_')
1828 modifiedWindowsLocale
[i
] = '-';
1832 modifiedWindowsLocale
[i
] = static_cast<char>(windowsLocale
[i
]);
1835 if (modifiedWindowsLocale
[i
] == '\0')
1841 if (i
>= UPRV_LENGTHOF(modifiedWindowsLocale
))
1843 // Ran out of room, can't really happen, maybe we'll be lucky about a matching
1844 // locale when tags are dropped
1845 modifiedWindowsLocale
[UPRV_LENGTHOF(modifiedWindowsLocale
) - 1] = '\0';
1848 // Now normalize the resulting name
1849 if (correctedPOSIXLocale
)
1851 int32_t posixLen
= uloc_canonicalize(modifiedWindowsLocale
, correctedPOSIXLocale
, POSIX_LOCALE_CAPACITY
, &status
);
1852 if (U_SUCCESS(status
))
1854 *(correctedPOSIXLocale
+ posixLen
) = 0;
1855 gCorrectedPOSIXLocale
= correctedPOSIXLocale
;
1856 gCorrectedPOSIXLocaleHeapAllocated
= true;
1857 ucln_common_registerCleanup(UCLN_COMMON_PUTIL
, putil_cleanup
);
1861 uprv_free(correctedPOSIXLocale
);
1866 // If unable to find a locale we can agree upon, use en-US by default
1867 if (gCorrectedPOSIXLocale
== NULL
) {
1868 gCorrectedPOSIXLocale
= "en_US";
1870 return gCorrectedPOSIXLocale
;
1872 #elif U_PLATFORM == U_PF_OS400
1873 /* locales are process scoped and are by definition thread safe */
1874 static char correctedLocale
[64];
1875 const char *localeID
= getenv("LC_ALL");
1878 if (localeID
== NULL
)
1879 localeID
= getenv("LANG");
1880 if (localeID
== NULL
)
1881 localeID
= setlocale(LC_ALL
, NULL
);
1882 /* Make sure we have something... */
1883 if (localeID
== NULL
)
1884 return "en_US_POSIX";
1886 /* Extract the locale name from the path. */
1887 if((p
= uprv_strrchr(localeID
, '/')) != NULL
)
1889 /* Increment p to start of locale name. */
1894 /* Copy to work location. */
1895 uprv_strcpy(correctedLocale
, localeID
);
1897 /* Strip off the '.locale' extension. */
1898 if((p
= uprv_strchr(correctedLocale
, '.')) != NULL
) {
1902 /* Upper case the locale name. */
1903 T_CString_toUpperCase(correctedLocale
);
1905 /* See if we are using the POSIX locale. Any of the
1906 * following are equivalent and use the same QLGPGCMA
1908 * QLGPGCMA2 means UCS2
1909 * QLGPGCMA_4 means UTF-32
1910 * QLGPGCMA_8 means UTF-8
1912 if ((uprv_strcmp("C", correctedLocale
) == 0) ||
1913 (uprv_strcmp("POSIX", correctedLocale
) == 0) ||
1914 (uprv_strncmp("QLGPGCMA", correctedLocale
, 8) == 0))
1916 uprv_strcpy(correctedLocale
, "en_US_POSIX");
1922 /* Lower case the lang portion. */
1923 for(p
= correctedLocale
; *p
!= 0 && *p
!= '_'; p
++)
1925 *p
= uprv_tolower(*p
);
1928 /* Adjust for Euro. After '_E' add 'URO'. */
1929 LocaleLen
= uprv_strlen(correctedLocale
);
1930 if (correctedLocale
[LocaleLen
- 2] == '_' &&
1931 correctedLocale
[LocaleLen
- 1] == 'E')
1933 uprv_strcat(correctedLocale
, "URO");
1936 /* If using Lotus-based locale then convert to
1937 * equivalent non Lotus.
1939 else if (correctedLocale
[LocaleLen
- 2] == '_' &&
1940 correctedLocale
[LocaleLen
- 1] == 'L')
1942 correctedLocale
[LocaleLen
- 2] = 0;
1945 /* There are separate simplified and traditional
1946 * locales called zh_HK_S and zh_HK_T.
1948 else if (uprv_strncmp(correctedLocale
, "zh_HK", 5) == 0)
1950 uprv_strcpy(correctedLocale
, "zh_HK");
1953 /* A special zh_CN_GBK locale...
1955 else if (uprv_strcmp(correctedLocale
, "zh_CN_GBK") == 0)
1957 uprv_strcpy(correctedLocale
, "zh_CN");
1962 return correctedLocale
;
1967 #if !U_CHARSET_IS_UTF8
1970 Due to various platform differences, one platform may specify a charset,
1971 when they really mean a different charset. Remap the names so that they are
1972 compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1973 here. Before adding anything to this function, please consider adding unique
1974 names to the ICU alias table in the data directory.
1977 remapPlatformDependentCodepage(const char *locale
, const char *name
) {
1978 if (locale
!= NULL
&& *locale
== 0) {
1979 /* Make sure that an empty locale is handled the same way. */
1985 #if U_PLATFORM == U_PF_AIX
1986 if (uprv_strcmp(name
, "IBM-943") == 0) {
1987 /* Use the ASCII compatible ibm-943 */
1990 else if (uprv_strcmp(name
, "IBM-1252") == 0) {
1991 /* Use the windows-1252 that contains the Euro */
1994 #elif U_PLATFORM == U_PF_SOLARIS
1995 if (locale
!= NULL
&& uprv_strcmp(name
, "EUC") == 0) {
1996 /* Solaris underspecifies the "EUC" name. */
1997 if (uprv_strcmp(locale
, "zh_CN") == 0) {
2000 else if (uprv_strcmp(locale
, "zh_TW") == 0) {
2003 else if (uprv_strcmp(locale
, "ko_KR") == 0) {
2007 else if (uprv_strcmp(name
, "eucJP") == 0) {
2009 ibm-954 is the best match.
2010 ibm-33722 is the default for eucJP (similar to Windows).
2014 else if (uprv_strcmp(name
, "646") == 0) {
2016 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
2017 * ISO-8859-1 instead of US-ASCII(646).
2019 name
= "ISO-8859-1";
2021 #elif U_PLATFORM_IS_DARWIN_BASED
2022 if (locale
== NULL
&& *name
== 0) {
2024 No locale was specified, and an empty name was passed in.
2025 This usually indicates that nl_langinfo didn't return valid information.
2026 Mac OS X uses UTF-8 by default (especially the locale data and console).
2030 else if (uprv_strcmp(name
, "CP949") == 0) {
2031 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
2034 else if (locale
!= NULL
&& uprv_strcmp(locale
, "en_US_POSIX") != 0 && uprv_strcmp(name
, "US-ASCII") == 0) {
2036 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
2040 #elif U_PLATFORM == U_PF_BSD
2041 if (uprv_strcmp(name
, "CP949") == 0) {
2042 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
2045 #elif U_PLATFORM == U_PF_HPUX
2046 if (locale
!= NULL
&& uprv_strcmp(locale
, "zh_HK") == 0 && uprv_strcmp(name
, "big5") == 0) {
2047 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
2048 /* zh_TW.big5 is not the same charset as zh_HK.big5! */
2051 else if (uprv_strcmp(name
, "eucJP") == 0) {
2053 ibm-1350 is the best match, but unavailable.
2054 ibm-954 is mostly a superset of ibm-1350.
2055 ibm-33722 is the default for eucJP (similar to Windows).
2059 #elif U_PLATFORM == U_PF_LINUX
2060 if (locale
!= NULL
&& uprv_strcmp(name
, "euc") == 0) {
2061 /* Linux underspecifies the "EUC" name. */
2062 if (uprv_strcmp(locale
, "korean") == 0) {
2065 else if (uprv_strcmp(locale
, "japanese") == 0) {
2066 /* See comment below about eucJP */
2070 else if (uprv_strcmp(name
, "eucjp") == 0) {
2072 ibm-1350 is the best match, but unavailable.
2073 ibm-954 is mostly a superset of ibm-1350.
2074 ibm-33722 is the default for eucJP (similar to Windows).
2078 else if (locale
!= NULL
&& uprv_strcmp(locale
, "en_US_POSIX") != 0 &&
2079 (uprv_strcmp(name
, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name
, "US-ASCII") == 0)) {
2081 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
2086 * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
2087 * it by falling back to 'US-ASCII' when NULL is returned from this
2088 * function. So, we don't have to worry about it here.
2091 /* return NULL when "" is passed in */
2099 getCodepageFromPOSIXID(const char *localeName
, char * buffer
, int32_t buffCapacity
)
2101 char localeBuf
[100];
2102 const char *name
= NULL
;
2103 char *variant
= NULL
;
2105 if (localeName
!= NULL
&& (name
= (uprv_strchr(localeName
, '.'))) != NULL
) {
2106 size_t localeCapacity
= uprv_min(sizeof(localeBuf
), (name
-localeName
)+1);
2107 uprv_strncpy(localeBuf
, localeName
, localeCapacity
);
2108 localeBuf
[localeCapacity
-1] = 0; /* ensure NULL termination */
2109 name
= uprv_strncpy(buffer
, name
+1, buffCapacity
);
2110 buffer
[buffCapacity
-1] = 0; /* ensure NULL termination */
2111 if ((variant
= const_cast<char *>(uprv_strchr(name
, '@'))) != NULL
) {
2114 name
= remapPlatformDependentCodepage(localeBuf
, name
);
2121 int_getDefaultCodepage()
2123 #if U_PLATFORM == U_PF_OS400
2124 uint32_t ccsid
= 37; /* Default to ibm-37 */
2125 static char codepage
[64];
2126 Qwc_JOBI0400_t jobinfo
;
2127 Qus_EC_t error
= { sizeof(Qus_EC_t
) }; /* SPI error code */
2129 EPT_CALL(QUSRJOBI
)(&jobinfo
, sizeof(jobinfo
), "JOBI0400",
2132 if (error
.Bytes_Available
== 0) {
2133 if (jobinfo
.Coded_Char_Set_ID
!= 0xFFFF) {
2134 ccsid
= (uint32_t)jobinfo
.Coded_Char_Set_ID
;
2136 else if (jobinfo
.Default_Coded_Char_Set_Id
!= 0xFFFF) {
2137 ccsid
= (uint32_t)jobinfo
.Default_Coded_Char_Set_Id
;
2139 /* else use the default */
2141 sprintf(codepage
,"ibm-%d", ccsid
);
2144 #elif U_PLATFORM == U_PF_OS390
2145 static char codepage
[64];
2147 strncpy(codepage
, nl_langinfo(CODESET
),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING
));
2148 strcat(codepage
,UCNV_SWAP_LFNL_OPTION_STRING
);
2149 codepage
[63] = 0; /* NULL terminate */
2153 #elif U_PLATFORM_USES_ONLY_WIN32_API
2154 static char codepage
[64];
2155 DWORD codepageNumber
= 0;
2157 #if U_PLATFORM_HAS_WINUWP_API > 0
2158 // UWP doesn't have a direct API to get the default ACP as Microsoft would rather
2159 // have folks use Unicode than a "system" code page, however this is the same
2160 // codepage as the system default locale codepage. (FWIW, the system locale is
2161 // ONLY used for codepage, it should never be used for anything else)
2162 GetLocaleInfoEx(LOCALE_NAME_SYSTEM_DEFAULT
, LOCALE_IDEFAULTANSICODEPAGE
| LOCALE_RETURN_NUMBER
,
2163 (LPWSTR
)&codepageNumber
, sizeof(codepageNumber
) / sizeof(WCHAR
));
2165 // Win32 apps can call GetACP
2166 codepageNumber
= GetACP();
2168 // Special case for UTF-8
2169 if (codepageNumber
== 65001)
2173 // Windows codepages can look like windows-1252, so format the found number
2174 // the numbers are eclectic, however all valid system code pages, besides UTF-8
2175 // are between 3 and 19999
2176 if (codepageNumber
> 0 && codepageNumber
< 20000)
2178 sprintf(codepage
, "windows-%ld", codepageNumber
);
2181 // If the codepage number call failed then return UTF-8
2184 #elif U_POSIX_LOCALE
2185 static char codesetName
[100];
2186 const char *localeName
= NULL
;
2187 const char *name
= NULL
;
2189 localeName
= uprv_getPOSIXIDForDefaultCodepage();
2190 uprv_memset(codesetName
, 0, sizeof(codesetName
));
2191 /* On Solaris nl_langinfo returns C locale values unless setlocale
2192 * was called earlier.
2194 #if (U_HAVE_NL_LANGINFO_CODESET && U_PLATFORM != U_PF_SOLARIS)
2195 /* When available, check nl_langinfo first because it usually gives more
2196 useful names. It depends on LC_CTYPE.
2197 nl_langinfo may use the same buffer as setlocale. */
2199 const char *codeset
= nl_langinfo(U_NL_LANGINFO_CODESET
);
2200 #if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED
2202 * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
2205 if (uprv_strcmp(localeName
, "en_US_POSIX") != 0) {
2206 codeset
= remapPlatformDependentCodepage(localeName
, codeset
);
2210 codeset
= remapPlatformDependentCodepage(NULL
, codeset
);
2213 if (codeset
!= NULL
) {
2214 uprv_strncpy(codesetName
, codeset
, sizeof(codesetName
));
2215 codesetName
[sizeof(codesetName
)-1] = 0;
2221 /* Use setlocale in a nice way, and then check some environment variables.
2222 Maybe the application used setlocale already.
2224 uprv_memset(codesetName
, 0, sizeof(codesetName
));
2225 name
= getCodepageFromPOSIXID(localeName
, codesetName
, sizeof(codesetName
));
2227 /* if we can find the codeset name from setlocale, return that. */
2231 if (*codesetName
== 0)
2233 /* Everything failed. Return US ASCII (ISO 646). */
2234 (void)uprv_strcpy(codesetName
, "US-ASCII");
2243 U_CAPI
const char* U_EXPORT2
2244 uprv_getDefaultCodepage()
2246 static char const *name
= NULL
;
2249 name
= int_getDefaultCodepage();
2254 #endif /* !U_CHARSET_IS_UTF8 */
2257 /* end of platform-specific implementation -------------- */
2259 /* version handling --------------------------------------------------------- */
2261 U_CAPI
void U_EXPORT2
2262 u_versionFromString(UVersionInfo versionArray
, const char *versionString
) {
2266 if(versionArray
==NULL
) {
2270 if(versionString
!=NULL
) {
2272 versionArray
[part
]=(uint8_t)uprv_strtoul(versionString
, &end
, 10);
2273 if(end
==versionString
|| ++part
==U_MAX_VERSION_LENGTH
|| *end
!=U_VERSION_DELIMITER
) {
2276 versionString
=end
+1;
2280 while(part
<U_MAX_VERSION_LENGTH
) {
2281 versionArray
[part
++]=0;
2285 U_CAPI
void U_EXPORT2
2286 u_versionFromUString(UVersionInfo versionArray
, const UChar
*versionString
) {
2287 if(versionArray
!=NULL
&& versionString
!=NULL
) {
2288 char versionChars
[U_MAX_VERSION_STRING_LENGTH
+1];
2289 int32_t len
= u_strlen(versionString
);
2290 if(len
>U_MAX_VERSION_STRING_LENGTH
) {
2291 len
= U_MAX_VERSION_STRING_LENGTH
;
2293 u_UCharsToChars(versionString
, versionChars
, len
);
2294 versionChars
[len
]=0;
2295 u_versionFromString(versionArray
, versionChars
);
2299 U_CAPI
void U_EXPORT2
2300 u_versionToString(const UVersionInfo versionArray
, char *versionString
) {
2301 uint16_t count
, part
;
2304 if(versionString
==NULL
) {
2308 if(versionArray
==NULL
) {
2313 /* count how many fields need to be written */
2314 for(count
=4; count
>0 && versionArray
[count
-1]==0; --count
) {
2321 /* write the first part */
2322 /* write the decimal field value */
2323 field
=versionArray
[0];
2325 *versionString
++=(char)('0'+field
/100);
2329 *versionString
++=(char)('0'+field
/10);
2332 *versionString
++=(char)('0'+field
);
2334 /* write the following parts */
2335 for(part
=1; part
<count
; ++part
) {
2336 /* write a dot first */
2337 *versionString
++=U_VERSION_DELIMITER
;
2339 /* write the decimal field value */
2340 field
=versionArray
[part
];
2342 *versionString
++=(char)('0'+field
/100);
2346 *versionString
++=(char)('0'+field
/10);
2349 *versionString
++=(char)('0'+field
);
2356 U_CAPI
void U_EXPORT2
2357 u_getVersion(UVersionInfo versionArray
) {
2358 (void)copyright
; // Suppress unused variable warning from clang.
2359 u_versionFromString(versionArray
, U_ICU_VERSION
);
2363 * icucfg.h dependent code
2368 #if HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
2380 U_INTERNAL
void * U_EXPORT2
2381 uprv_dl_open(const char *libName
, UErrorCode
*status
) {
2383 if(U_FAILURE(*status
)) return ret
;
2384 ret
= dlopen(libName
, RTLD_NOW
|RTLD_GLOBAL
);
2386 #ifdef U_TRACE_DYLOAD
2387 printf("dlerror on dlopen(%s): %s\n", libName
, dlerror());
2389 *status
= U_MISSING_RESOURCE_ERROR
;
2394 U_INTERNAL
void U_EXPORT2
2395 uprv_dl_close(void *lib
, UErrorCode
*status
) {
2396 if(U_FAILURE(*status
)) return;
2400 U_INTERNAL UVoidFunction
* U_EXPORT2
2401 uprv_dlsym_func(void *lib
, const char* sym
, UErrorCode
*status
) {
2407 if(U_FAILURE(*status
)) return uret
.fp
;
2408 uret
.vp
= dlsym(lib
, sym
);
2409 if(uret
.vp
== NULL
) {
2410 #ifdef U_TRACE_DYLOAD
2411 printf("dlerror on dlsym(%p,%s): %s\n", lib
,sym
, dlerror());
2413 *status
= U_MISSING_RESOURCE_ERROR
;
2420 /* null (nonexistent) implementation. */
2422 U_INTERNAL
void * U_EXPORT2
2423 uprv_dl_open(const char *libName
, UErrorCode
*status
) {
2424 if(U_FAILURE(*status
)) return NULL
;
2425 *status
= U_UNSUPPORTED_ERROR
;
2429 U_INTERNAL
void U_EXPORT2
2430 uprv_dl_close(void *lib
, UErrorCode
*status
) {
2431 if(U_FAILURE(*status
)) return;
2432 *status
= U_UNSUPPORTED_ERROR
;
2437 U_INTERNAL UVoidFunction
* U_EXPORT2
2438 uprv_dlsym_func(void *lib
, const char* sym
, UErrorCode
*status
) {
2439 if(U_SUCCESS(*status
)) {
2440 *status
= U_UNSUPPORTED_ERROR
;
2442 return (UVoidFunction
*)NULL
;
2449 #elif U_PLATFORM_USES_ONLY_WIN32_API
2451 U_INTERNAL
void * U_EXPORT2
2452 uprv_dl_open(const char *libName
, UErrorCode
*status
) {
2455 if(U_FAILURE(*status
)) return NULL
;
2457 lib
= LoadLibraryA(libName
);
2460 *status
= U_MISSING_RESOURCE_ERROR
;
2466 U_INTERNAL
void U_EXPORT2
2467 uprv_dl_close(void *lib
, UErrorCode
*status
) {
2468 HMODULE handle
= (HMODULE
)lib
;
2469 if(U_FAILURE(*status
)) return;
2471 FreeLibrary(handle
);
2477 U_INTERNAL UVoidFunction
* U_EXPORT2
2478 uprv_dlsym_func(void *lib
, const char* sym
, UErrorCode
*status
) {
2479 HMODULE handle
= (HMODULE
)lib
;
2480 UVoidFunction
* addr
= NULL
;
2482 if(U_FAILURE(*status
) || lib
==NULL
) return NULL
;
2484 addr
= (UVoidFunction
*)GetProcAddress(handle
, sym
);
2487 DWORD lastError
= GetLastError();
2488 if(lastError
== ERROR_PROC_NOT_FOUND
) {
2489 *status
= U_MISSING_RESOURCE_ERROR
;
2491 *status
= U_UNSUPPORTED_ERROR
; /* other unknown error. */
2501 /* No dynamic loading set. */
2503 U_INTERNAL
void * U_EXPORT2
2504 uprv_dl_open(const char *libName
, UErrorCode
*status
) {
2506 if(U_FAILURE(*status
)) return NULL
;
2507 *status
= U_UNSUPPORTED_ERROR
;
2511 U_INTERNAL
void U_EXPORT2
2512 uprv_dl_close(void *lib
, UErrorCode
*status
) {
2514 if(U_FAILURE(*status
)) return;
2515 *status
= U_UNSUPPORTED_ERROR
;
2520 U_INTERNAL UVoidFunction
* U_EXPORT2
2521 uprv_dlsym_func(void *lib
, const char* sym
, UErrorCode
*status
) {
2524 if(U_SUCCESS(*status
)) {
2525 *status
= U_UNSUPPORTED_ERROR
;
2527 return (UVoidFunction
*)NULL
;
2530 #endif /* U_ENABLE_DYLOAD */
2533 * Hey, Emacs, please set the following:
2536 * indent-tabs-mode: nil