1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ******************************************************************************
6 * Copyright (C) 1997-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 ******************************************************************************
11 * FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
13 * Date Name Description
14 * 04/14/97 aliu Creation.
15 * 04/24/97 aliu Added getDefaultDataDirectory() and
16 * getDefaultLocaleID().
17 * 04/28/97 aliu Rewritten to assume Unix and apply general methods
18 * for assumed case. Non-UNIX platforms must be
19 * special-cased. Rewrote numeric methods dealing
20 * with NaN and Infinity to be platform independent
21 * over all IEEE 754 platforms.
22 * 05/13/97 aliu Restored sign of timezone
23 * (semantics are hours West of GMT)
24 * 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
26 * 07/22/98 stephen Added remainder, max, min, trunc
27 * 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
28 * 08/24/98 stephen Added longBitsFromDouble
29 * 09/08/98 stephen Minor changes for Mac Port
30 * 03/02/99 stephen Removed openFile(). Added AS400 support.
32 * 04/15/99 stephen Converted to C.
33 * 06/28/99 stephen Removed mutex locking in u_isBigEndian().
34 * 08/04/99 jeffrey R. Added OS/2 changes
35 * 11/15/99 helena Integrated S/390 IEEE support.
36 * 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
37 * 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
38 * 01/03/08 Steven L. Fake Time Support
39 ******************************************************************************
42 // Defines _XOPEN_SOURCE for access to POSIX functions.
43 // Must be before any other #includes.
44 #include "uposixdefs.h"
46 // First, the platform type. Need this for U_PLATFORM.
47 #include "unicode/platform.h"
49 #if U_PLATFORM == U_PF_MINGW && defined __STRICT_ANSI__
50 /* tzset isn't defined in strict ANSI on MinGW. */
51 #undef __STRICT_ANSI__
55 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
59 #if !U_PLATFORM_USES_ONLY_WIN32_API
63 /* include the rest of the ICU headers */
64 #include "unicode/putil.h"
65 #include "unicode/ustring.h"
75 /* Include standard headers. */
83 #ifndef U_COMMON_IMPLEMENTATION
84 #error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu
88 /* include system headers */
89 #if U_PLATFORM_USES_ONLY_WIN32_API
91 * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW.
92 * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API)
93 * to use native APIs as much as possible?
95 #ifndef WIN32_LEAN_AND_MEAN
96 # define WIN32_LEAN_AND_MEAN
103 # include <windows.h>
104 # include "unicode/uloc.h"
105 #if U_PLATFORM_HAS_WINUWP_API == 0
107 #else // U_PLATFORM_HAS_WINUWP_API
108 typedef PVOID LPMSG
; // TODO: figure out how to get rid of this typedef
109 #include <Windows.Globalization.h>
110 #include <windows.system.userprofile.h>
111 #include <wrl/wrappers/corewrappers.h>
112 #include <wrl/client.h>
114 using namespace ABI::Windows::Foundation
;
115 using namespace Microsoft::WRL
;
116 using namespace Microsoft::WRL::Wrappers
;
118 #elif U_PLATFORM == U_PF_OS400
120 # include <qusec.h> /* error code structure */
121 # include <qusrjobi.h>
122 # include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
123 # include <mih/testptr.h> /* For uprv_maximumPtr */
124 #elif U_PLATFORM == U_PF_OS390
125 # include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
126 #elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS
129 # if U_PLATFORM == U_PF_SOLARIS
134 #elif U_PLATFORM == U_PF_QNX
135 # include <sys/neutrino.h>
139 * Only include langinfo.h if we have a way to get the codeset. If we later
140 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
144 #if U_HAVE_NL_LANGINFO_CODESET
145 #include <langinfo.h>
149 * Simple things (presence of functions, etc) should just go in configure.in and be added to
150 * icucfg.h via autoheader.
152 #if U_PLATFORM_IMPLEMENTS_POSIX
153 # if U_PLATFORM == U_PF_OS400
154 # define HAVE_DLFCN_H 0
155 # define HAVE_DLOPEN 0
157 # ifndef HAVE_DLFCN_H
158 # define HAVE_DLFCN_H 1
161 # define HAVE_DLOPEN 1
164 # ifndef HAVE_GETTIMEOFDAY
165 # define HAVE_GETTIMEOFDAY 1
168 # define HAVE_DLFCN_H 0
169 # define HAVE_DLOPEN 0
170 # define HAVE_GETTIMEOFDAY 0
175 /* Define the extension for data files, again... */
176 #define DATA_TYPE "dat"
178 /* Leave this copyright notice here! */
179 static const char copyright
[] = U_COPYRIGHT_STRING
;
181 /* floating point implementations ------------------------------------------- */
183 /* We return QNAN rather than SNAN*/
184 #define SIGN 0x80000000U
186 /* Make it easy to define certain types of constants */
188 int64_t i64
; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
190 } BitPatternConversion
;
191 static const BitPatternConversion gNan
= { (int64_t) INT64_C(0x7FF8000000000000) };
192 static const BitPatternConversion gInf
= { (int64_t) INT64_C(0x7FF0000000000000) };
194 /*---------------------------------------------------------------------------
196 Our general strategy is to assume we're on a POSIX platform. Platforms which
197 are non-POSIX must declare themselves so. The default POSIX implementation
198 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
200 ---------------------------------------------------------------------------*/
202 #if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400
203 # undef U_POSIX_LOCALE
205 # define U_POSIX_LOCALE 1
209 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
210 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
214 u_topNBytesOfDouble(double* d
, int n
)
219 return (char*)(d
+ 1) - n
;
224 u_bottomNBytesOfDouble(double* d
, int n
)
227 return (char*)(d
+ 1) - n
;
232 #endif /* !IEEE_754 */
236 u_signBit(double d
) {
239 hiByte
= *(uint8_t *)&d
;
241 hiByte
= *(((uint8_t *)&d
) + sizeof(double) - 1);
243 return (hiByte
& 0x80) != 0;
249 #if defined (U_DEBUG_FAKETIME)
250 /* Override the clock to test things without having to move the system clock.
251 * Assumes POSIX gettimeofday() will function
253 UDate fakeClock_t0
= 0; /** Time to start the clock from **/
254 UDate fakeClock_dt
= 0; /** Offset (fake time - real time) **/
255 UBool fakeClock_set
= FALSE
; /** True if fake clock has spun up **/
256 static UMutex fakeClockMutex
= U_MUTEX_INTIALIZER
;
258 static UDate
getUTCtime_real() {
259 struct timeval posixTime
;
260 gettimeofday(&posixTime
, NULL
);
261 return (UDate
)(((int64_t)posixTime
.tv_sec
* U_MILLIS_PER_SECOND
) + (posixTime
.tv_usec
/1000));
264 static UDate
getUTCtime_fake() {
265 umtx_lock(&fakeClockMutex
);
267 UDate real
= getUTCtime_real();
268 const char *fake_start
= getenv("U_FAKETIME_START");
269 if((fake_start
!=NULL
) && (fake_start
[0]!=0)) {
270 sscanf(fake_start
,"%lf",&fakeClock_t0
);
271 fakeClock_dt
= fakeClock_t0
- real
;
272 fprintf(stderr
,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
273 "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
274 fakeClock_t0
, fake_start
, fakeClock_dt
, real
);
277 fprintf(stderr
,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
278 "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
280 fakeClock_set
= TRUE
;
282 umtx_unlock(&fakeClockMutex
);
284 return getUTCtime_real() + fakeClock_dt
;
288 #if U_PLATFORM_USES_ONLY_WIN32_API
292 } FileTimeConversion
; /* This is like a ULARGE_INTEGER */
294 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
295 #define EPOCH_BIAS INT64_C(116444736000000000)
296 #define HECTONANOSECOND_PER_MILLISECOND 10000
300 /*---------------------------------------------------------------------------
301 Universal Implementations
302 These are designed to work on all platforms. Try these, and if they
303 don't work on your platform, then special case your platform with new
305 ---------------------------------------------------------------------------*/
307 U_CAPI UDate U_EXPORT2
310 #if defined(U_DEBUG_FAKETIME)
311 return getUTCtime_fake(); /* Hook for overriding the clock */
313 return uprv_getRawUTCtime();
317 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
318 U_CAPI UDate U_EXPORT2
321 #if U_PLATFORM_USES_ONLY_WIN32_API
323 FileTimeConversion winTime
;
324 GetSystemTimeAsFileTime(&winTime
.fileTime
);
325 return (UDate
)((winTime
.int64
- EPOCH_BIAS
) / HECTONANOSECOND_PER_MILLISECOND
);
328 #if HAVE_GETTIMEOFDAY
329 struct timeval posixTime
;
330 gettimeofday(&posixTime
, NULL
);
331 return (UDate
)(((int64_t)posixTime
.tv_sec
* U_MILLIS_PER_SECOND
) + (posixTime
.tv_usec
/1000));
335 return (UDate
)epochtime
* U_MILLIS_PER_SECOND
;
341 /*-----------------------------------------------------------------------------
343 These methods detect and return NaN and infinity values for doubles
344 conforming to IEEE 754. Platforms which support this standard include X86,
345 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
346 If this doesn't work on your platform, you have non-IEEE floating-point, and
347 will need to code your own versions. A naive implementation is to return 0.0
348 for getNaN and getInfinity, and false for isNaN and isInfinite.
349 ---------------------------------------------------------------------------*/
351 U_CAPI UBool U_EXPORT2
352 uprv_isNaN(double number
)
355 BitPatternConversion convertedNumber
;
356 convertedNumber
.d64
= number
;
357 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
358 return (UBool
)((convertedNumber
.i64
& U_INT64_MAX
) > gInf
.i64
);
360 #elif U_PLATFORM == U_PF_OS390
361 uint32_t highBits
= *(uint32_t*)u_topNBytesOfDouble(&number
,
363 uint32_t lowBits
= *(uint32_t*)u_bottomNBytesOfDouble(&number
,
366 return ((highBits
& 0x7F080000L
) == 0x7F080000L
) &&
367 (lowBits
== 0x00000000L
);
370 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
371 /* you'll need to replace this default implementation with what's correct*/
372 /* for your platform.*/
373 return number
!= number
;
377 U_CAPI UBool U_EXPORT2
378 uprv_isInfinite(double number
)
381 BitPatternConversion convertedNumber
;
382 convertedNumber
.d64
= number
;
383 /* Infinity is exactly 0x7FF0000000000000U. */
384 return (UBool
)((convertedNumber
.i64
& U_INT64_MAX
) == gInf
.i64
);
385 #elif U_PLATFORM == U_PF_OS390
386 uint32_t highBits
= *(uint32_t*)u_topNBytesOfDouble(&number
,
388 uint32_t lowBits
= *(uint32_t*)u_bottomNBytesOfDouble(&number
,
391 return ((highBits
& ~SIGN
) == 0x70FF0000L
) && (lowBits
== 0x00000000L
);
394 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
395 /* value, you'll need to replace this default implementation with what's*/
396 /* correct for your platform.*/
397 return number
== (2.0 * number
);
401 U_CAPI UBool U_EXPORT2
402 uprv_isPositiveInfinity(double number
)
404 #if IEEE_754 || U_PLATFORM == U_PF_OS390
405 return (UBool
)(number
> 0 && uprv_isInfinite(number
));
407 return uprv_isInfinite(number
);
411 U_CAPI UBool U_EXPORT2
412 uprv_isNegativeInfinity(double number
)
414 #if IEEE_754 || U_PLATFORM == U_PF_OS390
415 return (UBool
)(number
< 0 && uprv_isInfinite(number
));
418 uint32_t highBits
= *(uint32_t*)u_topNBytesOfDouble(&number
,
420 return((highBits
& SIGN
) && uprv_isInfinite(number
));
425 U_CAPI
double U_EXPORT2
428 #if IEEE_754 || U_PLATFORM == U_PF_OS390
431 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
432 /* you'll need to replace this default implementation with what's correct*/
433 /* for your platform.*/
438 U_CAPI
double U_EXPORT2
441 #if IEEE_754 || U_PLATFORM == U_PF_OS390
444 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
445 /* value, you'll need to replace this default implementation with what's*/
446 /* correct for your platform.*/
451 U_CAPI
double U_EXPORT2
457 U_CAPI
double U_EXPORT2
463 U_CAPI
double U_EXPORT2
466 return uprv_floor(x
+ 0.5);
469 U_CAPI
double U_EXPORT2
475 U_CAPI
double U_EXPORT2
476 uprv_modf(double x
, double* y
)
481 U_CAPI
double U_EXPORT2
482 uprv_fmod(double x
, double y
)
487 U_CAPI
double U_EXPORT2
488 uprv_pow(double x
, double y
)
490 /* This is declared as "double pow(double x, double y)" */
494 U_CAPI
double U_EXPORT2
495 uprv_pow10(int32_t x
)
497 return pow(10.0, (double)x
);
500 U_CAPI
double U_EXPORT2
501 uprv_fmax(double x
, double y
)
504 /* first handle NaN*/
505 if(uprv_isNaN(x
) || uprv_isNaN(y
))
506 return uprv_getNaN();
508 /* check for -0 and 0*/
509 if(x
== 0.0 && y
== 0.0 && u_signBit(x
))
514 /* this should work for all flt point w/o NaN and Inf special cases */
515 return (x
> y
? x
: y
);
518 U_CAPI
double U_EXPORT2
519 uprv_fmin(double x
, double y
)
522 /* first handle NaN*/
523 if(uprv_isNaN(x
) || uprv_isNaN(y
))
524 return uprv_getNaN();
526 /* check for -0 and 0*/
527 if(x
== 0.0 && y
== 0.0 && u_signBit(y
))
532 /* this should work for all flt point w/o NaN and Inf special cases */
533 return (x
> y
? y
: x
);
536 U_CAPI UBool U_EXPORT2
537 uprv_add32_overflow(int32_t a
, int32_t b
, int32_t* res
) {
538 // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_add_overflow.
539 // This function could be optimized by calling one of those primitives.
540 auto a64
= static_cast<int64_t>(a
);
541 auto b64
= static_cast<int64_t>(b
);
542 int64_t res64
= a64
+ b64
;
543 *res
= static_cast<int32_t>(res64
);
544 return res64
!= *res
;
547 U_CAPI UBool U_EXPORT2
548 uprv_mul32_overflow(int32_t a
, int32_t b
, int32_t* res
) {
549 // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_mul_overflow.
550 // This function could be optimized by calling one of those primitives.
551 auto a64
= static_cast<int64_t>(a
);
552 auto b64
= static_cast<int64_t>(b
);
553 int64_t res64
= a64
* b64
;
554 *res
= static_cast<int32_t>(res64
);
555 return res64
!= *res
;
559 * Truncates the given double.
560 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
561 * This is different than calling floor() or ceil():
562 * floor(3.3) = 3, floor(-3.3) = -4
563 * ceil(3.3) = 4, ceil(-3.3) = -3
565 U_CAPI
double U_EXPORT2
569 /* handle error cases*/
571 return uprv_getNaN();
572 if(uprv_isInfinite(d
))
573 return uprv_getInfinity();
575 if(u_signBit(d
)) /* Signbit() picks up -0.0; d<0 does not. */
581 return d
>= 0 ? floor(d
) : ceil(d
);
587 * Return the largest positive number that can be represented by an integer
588 * type of arbitrary bit length.
590 U_CAPI
double U_EXPORT2
591 uprv_maxMantissa(void)
593 return pow(2.0, DBL_MANT_DIG
+ 1.0) - 1.0;
596 U_CAPI
double U_EXPORT2
602 U_CAPI
void * U_EXPORT2
603 uprv_maximumPtr(void * base
)
605 #if U_PLATFORM == U_PF_OS400
607 * With the provided function we should never be out of range of a given segment
608 * (a traditional/typical segment that is). Our segments have 5 bytes for the
609 * id and 3 bytes for the offset. The key is that the casting takes care of
610 * only retrieving the offset portion minus x1000. Hence, the smallest offset
611 * seen in a program is x001000 and when casted to an int would be 0.
612 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment.
614 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
615 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information).
616 * This function determines the activation based on the pointer that is passed in and
617 * calculates the appropriate maximum available size for
618 * each pointer type (TERASPACE and non-TERASPACE)
620 * Unlike other operating systems, the pointer model isn't determined at
621 * compile time on i5/OS.
623 if ((base
!= NULL
) && (_TESTPTR(base
, _C_TERASPACE_CHECK
))) {
624 /* if it is a TERASPACE pointer the max is 2GB - 4k */
625 return ((void *)(((char *)base
)-((uint32_t)(base
))+((uint32_t)0x7fffefff)));
627 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
628 return ((void *)(((char *)base
)-((uint32_t)(base
))+((uint32_t)0xffefff)));
631 return U_MAX_PTR(base
);
635 /*---------------------------------------------------------------------------
636 Platform-specific Implementations
637 Try these, and if they don't work on your platform, then special case your
638 platform with new implementations.
639 ---------------------------------------------------------------------------*/
641 /* Generic time zone layer -------------------------------------------------- */
643 /* Time zone utilities */
644 U_CAPI
void U_EXPORT2
650 /* no initialization*/
654 U_CAPI
int32_t U_EXPORT2
665 uprv_memcpy( &tmrec
, localtime(&t
), sizeof(tmrec
) );
666 #if U_PLATFORM != U_PF_IPHONE
667 UBool dst_checked
= (tmrec
.tm_isdst
!= 0); /* daylight savings time is checked*/
669 t1
= mktime(&tmrec
); /* local time in seconds*/
670 uprv_memcpy( &tmrec
, gmtime(&t
), sizeof(tmrec
) );
671 t2
= mktime(&tmrec
); /* GMT (or UTC) in seconds*/
674 #if U_PLATFORM != U_PF_IPHONE
675 /* imitate NT behaviour, which returns same timezone offset to GMT for
677 This does not work on all platforms. For instance, on glibc on Linux
678 and on Mac OS 10.5, tdiff calculated above remains the same
679 regardless of whether DST is in effect or not. iOS is another
680 platform where this does not work. Linux + glibc and Mac OS 10.5
681 have U_TIMEZONE defined so that this code is not reached.
690 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
691 some platforms need to have it declared here. */
693 #if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED)
694 /* RS6000 and others reject char **tzname. */
695 extern U_IMPORT
char *U_TZNAME
[];
698 #if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
699 /* These platforms are likely to use Olson timezone IDs. */
700 /* common targets of the symbolic link at TZDEFAULT are:
701 * "/usr/share/zoneinfo/<olsonID>" default, older Linux distros, macOS to 10.12
702 * "../usr/share/zoneinfo/<olsonID>" newer Linux distros: Red Hat Enterprise Linux 7, Ubuntu 16, SuSe Linux 12
703 * "/usr/share/lib/zoneinfo/<olsonID>" Solaris
704 * "../usr/share/lib/zoneinfo/<olsonID>" Solaris
705 * "/var/db/timezone/zoneinfo/<olsonID>" macOS 10.13
706 * To avoid checking lots of paths, just check that the target path
707 * before the <olsonID> ends with "/zoneinfo/", and the <olsonID> is valid.
710 #define CHECK_LOCALTIME_LINK 1
711 #if U_PLATFORM_IS_DARWIN_BASED
713 #define TZZONEINFO (TZDIR "/")
714 #elif U_PLATFORM == U_PF_SOLARIS
715 #define TZDEFAULT "/etc/localtime"
716 #define TZZONEINFO "/usr/share/lib/zoneinfo/"
717 #define TZ_ENV_CHECK "localtime"
719 #define TZDEFAULT "/etc/localtime"
720 #define TZZONEINFO "/usr/share/zoneinfo/"
722 #define TZZONEINFOTAIL "/zoneinfo/"
724 #define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */
725 /* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
726 symlinked to /etc/localtime, which makes searchForTZFile return
727 'localtime' when it's the first match. */
728 #define TZFILE_SKIP2 "localtime"
729 #define SEARCH_TZFILE
730 #include <dirent.h> /* Needed to search through system timezone files */
732 static char gTimeZoneBuffer
[PATH_MAX
];
733 static char *gTimeZoneBufferPtr
= NULL
;
736 #if !U_PLATFORM_USES_ONLY_WIN32_API
737 #define isNonDigit(ch) (ch < '0' || '9' < ch)
738 static UBool
isValidOlsonID(const char *id
) {
741 /* Determine if this is something like Iceland (Olson ID)
742 or AST4ADT (non-Olson ID) */
743 while (id
[idx
] && isNonDigit(id
[idx
]) && id
[idx
] != ',') {
747 /* If we went through the whole string, then it might be okay.
748 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
749 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
750 The rest of the time it could be an Olson ID. George */
751 return (UBool
)(id
[idx
] == 0
752 || uprv_strcmp(id
, "PST8PDT") == 0
753 || uprv_strcmp(id
, "MST7MDT") == 0
754 || uprv_strcmp(id
, "CST6CDT") == 0
755 || uprv_strcmp(id
, "EST5EDT") == 0);
758 /* On some Unix-like OS, 'posix' subdirectory in
759 /usr/share/zoneinfo replicates the top-level contents. 'right'
760 subdirectory has the same set of files, but individual files
761 are different from those in the top-level directory or 'posix'
762 because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
764 When the first match for /etc/localtime is in either of them
765 (usually in posix because 'right' has different file contents),
766 or TZ environment variable points to one of them, createTimeZone
767 fails because, say, 'posix/America/New_York' is not an Olson
768 timezone id ('America/New_York' is). So, we have to skip
769 'posix/' and 'right/' at the beginning. */
770 static void skipZoneIDPrefix(const char** id
) {
771 if (uprv_strncmp(*id
, "posix/", 6) == 0
772 || uprv_strncmp(*id
, "right/", 6) == 0)
779 #if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API
781 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
782 typedef struct OffsetZoneMapping
{
783 int32_t offsetSeconds
;
784 int32_t daylightType
; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/
790 enum { U_DAYLIGHT_NONE
=0,U_DAYLIGHT_JUNE
=1,U_DAYLIGHT_DECEMBER
=2 };
793 This list tries to disambiguate a set of abbreviated timezone IDs and offsets
794 and maps it to an Olson ID.
795 Before adding anything to this list, take a look at
796 icu/source/tools/tzcode/tz.alias
797 Sometimes no daylight savings (0) is important to define due to aliases.
798 This list can be tested with icu/source/test/compat/tzone.pl
799 More values could be added to daylightType to increase precision.
801 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS
[] = {
802 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
803 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
804 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
805 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
806 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
807 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
808 {-36000, 2, "EST", "EST", "Australia/Sydney"},
809 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
810 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
811 {-34200, 2, "CST", "CST", "Australia/South"},
812 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
813 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
814 {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
815 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
816 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
817 {-28800, 2, "WST", "WST", "Australia/West"},
818 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
819 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
820 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
821 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
822 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
823 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
824 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
825 {-14400, 1, "AZT", "AZST", "Asia/Baku"},
826 {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
827 {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
828 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
829 {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
830 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
831 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
832 {-3600, 0, "CET", "WEST", "Africa/Algiers"},
833 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
834 {0, 1, "GMT", "IST", "Europe/Dublin"},
835 {0, 1, "GMT", "BST", "Europe/London"},
836 {0, 0, "WET", "WEST", "Africa/Casablanca"},
837 {0, 0, "WET", "WET", "Africa/El_Aaiun"},
838 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
839 {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
840 {10800, 1, "PMST", "PMDT", "America/Miquelon"},
841 {10800, 2, "UYT", "UYST", "America/Montevideo"},
842 {10800, 1, "WGT", "WGST", "America/Godthab"},
843 {10800, 2, "BRT", "BRST", "Brazil/East"},
844 {12600, 1, "NST", "NDT", "America/St_Johns"},
845 {14400, 1, "AST", "ADT", "Canada/Atlantic"},
846 {14400, 2, "AMT", "AMST", "America/Cuiaba"},
847 {14400, 2, "CLT", "CLST", "Chile/Continental"},
848 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
849 {14400, 2, "PYT", "PYST", "America/Asuncion"},
850 {18000, 1, "CST", "CDT", "America/Havana"},
851 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
852 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
853 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
854 {21600, 0, "CST", "CDT", "America/Guatemala"},
855 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
856 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
857 {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
858 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
859 {32400, 1, "AKST", "AKDT", "US/Alaska"},
860 {36000, 1, "HAST", "HADT", "US/Aleutian"}
863 /*#define DEBUG_TZNAME*/
865 static const char* remapShortTimeZone(const char *stdID
, const char *dstID
, int32_t daylightType
, int32_t offset
)
869 fprintf(stderr
, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID
, dstID
, daylightType
, offset
);
871 for (idx
= 0; idx
< UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS
); idx
++)
873 if (offset
== OFFSET_ZONE_MAPPINGS
[idx
].offsetSeconds
874 && daylightType
== OFFSET_ZONE_MAPPINGS
[idx
].daylightType
875 && strcmp(OFFSET_ZONE_MAPPINGS
[idx
].stdID
, stdID
) == 0
876 && strcmp(OFFSET_ZONE_MAPPINGS
[idx
].dstID
, dstID
) == 0)
878 return OFFSET_ZONE_MAPPINGS
[idx
].olsonID
;
886 #define MAX_READ_SIZE 512
888 typedef struct DefaultTZInfo
{
889 char* defaultTZBuffer
;
890 int64_t defaultTZFileSize
;
891 FILE* defaultTZFilePtr
;
892 UBool defaultTZstatus
;
893 int32_t defaultTZPosition
;
897 * This method compares the two files given to see if they are a match.
898 * It is currently use to compare two TZ files.
900 static UBool
compareBinaryFiles(const char* defaultTZFileName
, const char* TZFileName
, DefaultTZInfo
* tzInfo
) {
903 int64_t sizeFileLeft
;
904 int32_t sizeFileRead
;
905 int32_t sizeFileToRead
;
906 char bufferFile
[MAX_READ_SIZE
];
909 if (tzInfo
->defaultTZFilePtr
== NULL
) {
910 tzInfo
->defaultTZFilePtr
= fopen(defaultTZFileName
, "r");
912 file
= fopen(TZFileName
, "r");
914 tzInfo
->defaultTZPosition
= 0; /* reset position to begin search */
916 if (file
!= NULL
&& tzInfo
->defaultTZFilePtr
!= NULL
) {
917 /* First check that the file size are equal. */
918 if (tzInfo
->defaultTZFileSize
== 0) {
919 fseek(tzInfo
->defaultTZFilePtr
, 0, SEEK_END
);
920 tzInfo
->defaultTZFileSize
= ftell(tzInfo
->defaultTZFilePtr
);
922 fseek(file
, 0, SEEK_END
);
923 sizeFile
= ftell(file
);
924 sizeFileLeft
= sizeFile
;
926 if (sizeFile
!= tzInfo
->defaultTZFileSize
) {
929 /* Store the data from the files in seperate buffers and
930 * compare each byte to determine equality.
932 if (tzInfo
->defaultTZBuffer
== NULL
) {
933 rewind(tzInfo
->defaultTZFilePtr
);
934 tzInfo
->defaultTZBuffer
= (char*)uprv_malloc(sizeof(char) * tzInfo
->defaultTZFileSize
);
935 sizeFileRead
= fread(tzInfo
->defaultTZBuffer
, 1, tzInfo
->defaultTZFileSize
, tzInfo
->defaultTZFilePtr
);
938 while(sizeFileLeft
> 0) {
939 uprv_memset(bufferFile
, 0, MAX_READ_SIZE
);
940 sizeFileToRead
= sizeFileLeft
< MAX_READ_SIZE
? sizeFileLeft
: MAX_READ_SIZE
;
942 sizeFileRead
= fread(bufferFile
, 1, sizeFileToRead
, file
);
943 if (memcmp(tzInfo
->defaultTZBuffer
+ tzInfo
->defaultTZPosition
, bufferFile
, sizeFileRead
) != 0) {
947 sizeFileLeft
-= sizeFileRead
;
948 tzInfo
->defaultTZPosition
+= sizeFileRead
;
963 /* dirent also lists two entries: "." and ".." that we can safely ignore. */
966 static UBool U_CALLCONV
putil_cleanup(void);
967 static CharString
*gSearchTZFileResult
= NULL
;
970 * This method recursively traverses the directory given for a matching TZ file and returns the first match.
971 * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results.
973 static char* searchForTZFile(const char* path
, DefaultTZInfo
* tzInfo
) {
975 struct dirent
* dirEntry
= NULL
;
977 UErrorCode status
= U_ZERO_ERROR
;
979 /* Save the current path */
980 CharString
curpath(path
, -1, status
);
981 if (U_FAILURE(status
)) {
982 goto cleanupAndReturn
;
985 dirp
= opendir(path
);
987 goto cleanupAndReturn
;
990 if (gSearchTZFileResult
== NULL
) {
991 gSearchTZFileResult
= new CharString
;
992 if (gSearchTZFileResult
== NULL
) {
993 goto cleanupAndReturn
;
995 ucln_common_registerCleanup(UCLN_COMMON_PUTIL
, putil_cleanup
);
998 /* Check each entry in the directory. */
999 while((dirEntry
= readdir(dirp
)) != NULL
) {
1000 const char* dirName
= dirEntry
->d_name
;
1001 if (uprv_strcmp(dirName
, SKIP1
) != 0 && uprv_strcmp(dirName
, SKIP2
) != 0) {
1002 /* Create a newpath with the new entry to test each entry in the directory. */
1003 CharString
newpath(curpath
, status
);
1004 newpath
.append(dirName
, -1, status
);
1005 if (U_FAILURE(status
)) {
1009 DIR* subDirp
= NULL
;
1010 if ((subDirp
= opendir(newpath
.data())) != NULL
) {
1011 /* If this new path is a directory, make a recursive call with the newpath. */
1013 newpath
.append('/', status
);
1014 if (U_FAILURE(status
)) {
1017 result
= searchForTZFile(newpath
.data(), tzInfo
);
1019 Have to get out here. Otherwise, we'd keep looking
1020 and return the first match in the top-level directory
1021 if there's a match in the top-level. If not, this function
1022 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
1023 It worked without this in most cases because we have a fallback of calling
1024 localtime_r to figure out the default timezone.
1028 } else if (uprv_strcmp(TZFILE_SKIP
, dirName
) != 0 && uprv_strcmp(TZFILE_SKIP2
, dirName
) != 0) {
1029 if(compareBinaryFiles(TZDEFAULT
, newpath
.data(), tzInfo
)) {
1030 int32_t amountToSkip
= sizeof(TZZONEINFO
) - 1;
1031 if (amountToSkip
> newpath
.length()) {
1032 amountToSkip
= newpath
.length();
1034 const char* zoneid
= newpath
.data() + amountToSkip
;
1035 skipZoneIDPrefix(&zoneid
);
1036 gSearchTZFileResult
->clear();
1037 gSearchTZFileResult
->append(zoneid
, -1, status
);
1038 if (U_FAILURE(status
)) {
1041 result
= gSearchTZFileResult
->data();
1042 /* Get out after the first one found. */
1057 U_CAPI
void U_EXPORT2
1058 uprv_tzname_clear_cache()
1060 #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1061 gTimeZoneBufferPtr
= NULL
;
1065 // With the Universal Windows Platform we can just ask Windows for the name
1066 #if U_PLATFORM_HAS_WINUWP_API
1067 U_CAPI
const char* U_EXPORT2
1068 uprv_getWindowsTimeZone()
1070 // Get default Windows timezone.
1071 ComPtr
<IInspectable
> calendar
;
1072 HRESULT hr
= RoActivateInstance(
1073 HStringReference(RuntimeClass_Windows_Globalization_Calendar
).Get(),
1077 ComPtr
<ABI::Windows::Globalization::ITimeZoneOnCalendar
> timezone
;
1078 hr
= calendar
.As(&timezone
);
1081 HString timezoneString
;
1082 hr
= timezone
->GetTimeZone(timezoneString
.GetAddressOf());
1085 int32_t length
= static_cast<int32_t>(wcslen(timezoneString
.GetRawBuffer(NULL
)));
1086 char* asciiId
= (char*)uprv_calloc(length
+ 1, sizeof(char));
1087 if (asciiId
!= nullptr)
1089 u_UCharsToChars((UChar
*)timezoneString
.GetRawBuffer(NULL
), asciiId
, length
);
1101 U_CAPI
const char* U_EXPORT2
1104 (void)n
; // Avoid unreferenced parameter warning.
1105 const char *tzid
= NULL
;
1106 #if U_PLATFORM_USES_ONLY_WIN32_API
1107 #if U_PLATFORM_HAS_WINUWP_API > 0
1108 tzid
= uprv_getWindowsTimeZone();
1110 tzid
= uprv_detectWindowsTimeZone();
1118 // The return value is free'd in timezone.cpp on Windows because
1119 // the other code path returns a pointer to a heap location.
1120 // If we don't have a name already, then tzname wouldn't be any
1121 // better, so just fall back.
1122 return uprv_strdup("Etc/UTC");
1127 /*#if U_PLATFORM_IS_DARWIN_BASED
1130 tzid = getenv("TZFILE");
1136 /* This code can be temporarily disabled to test tzname resolution later on. */
1137 #ifndef DEBUG_TZNAME
1138 tzid
= getenv("TZ");
1139 if (tzid
!= NULL
&& isValidOlsonID(tzid
)
1140 #if U_PLATFORM == U_PF_SOLARIS
1141 /* When TZ equals localtime on Solaris, check the /etc/localtime file. */
1142 && uprv_strcmp(tzid
, TZ_ENV_CHECK
) != 0
1145 /* The colon forces tzset() to treat the remainder as zoneinfo path */
1146 if (tzid
[0] == ':') {
1149 /* This might be a good Olson ID. */
1150 skipZoneIDPrefix(&tzid
);
1153 /* else U_TZNAME will give a better result. */
1156 #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1157 /* Caller must handle threading issues */
1158 if (gTimeZoneBufferPtr
== NULL
) {
1160 This is a trick to look at the name of the link to get the Olson ID
1161 because the tzfile contents is underspecified.
1162 This isn't guaranteed to work because it may not be a symlink.
1164 int32_t ret
= (int32_t)readlink(TZDEFAULT
, gTimeZoneBuffer
, sizeof(gTimeZoneBuffer
)-1);
1166 int32_t tzZoneInfoTailLen
= uprv_strlen(TZZONEINFOTAIL
);
1167 gTimeZoneBuffer
[ret
] = 0;
1168 char * tzZoneInfoTailPtr
= uprv_strstr(gTimeZoneBuffer
, TZZONEINFOTAIL
);
1170 if (tzZoneInfoTailPtr
!= NULL
1171 && isValidOlsonID(tzZoneInfoTailPtr
+ tzZoneInfoTailLen
))
1173 return (gTimeZoneBufferPtr
= tzZoneInfoTailPtr
+ tzZoneInfoTailLen
);
1176 #if defined(SEARCH_TZFILE)
1177 DefaultTZInfo
* tzInfo
= (DefaultTZInfo
*)uprv_malloc(sizeof(DefaultTZInfo
));
1178 if (tzInfo
!= NULL
) {
1179 tzInfo
->defaultTZBuffer
= NULL
;
1180 tzInfo
->defaultTZFileSize
= 0;
1181 tzInfo
->defaultTZFilePtr
= NULL
;
1182 tzInfo
->defaultTZstatus
= FALSE
;
1183 tzInfo
->defaultTZPosition
= 0;
1185 gTimeZoneBufferPtr
= searchForTZFile(TZZONEINFO
, tzInfo
);
1187 /* Free previously allocated memory */
1188 if (tzInfo
->defaultTZBuffer
!= NULL
) {
1189 uprv_free(tzInfo
->defaultTZBuffer
);
1191 if (tzInfo
->defaultTZFilePtr
!= NULL
) {
1192 fclose(tzInfo
->defaultTZFilePtr
);
1197 if (gTimeZoneBufferPtr
!= NULL
&& isValidOlsonID(gTimeZoneBufferPtr
)) {
1198 return gTimeZoneBufferPtr
;
1204 return gTimeZoneBufferPtr
;
1210 #if U_PLATFORM_USES_ONLY_WIN32_API
1211 /* The return value is free'd in timezone.cpp on Windows because
1212 * the other code path returns a pointer to a heap location. */
1213 return uprv_strdup(U_TZNAME
[n
]);
1216 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1217 So we remap the abbreviation to an olson ID.
1219 Since Windows exposes a little more timezone information,
1220 we normally don't use this code on Windows because
1221 uprv_detectWindowsTimeZone should have already given the correct answer.
1224 struct tm juneSol
, decemberSol
;
1226 static const time_t juneSolstice
=1182478260; /*2007-06-21 18:11 UT*/
1227 static const time_t decemberSolstice
=1198332540; /*2007-12-22 06:09 UT*/
1229 /* This probing will tell us when daylight savings occurs. */
1230 localtime_r(&juneSolstice
, &juneSol
);
1231 localtime_r(&decemberSolstice
, &decemberSol
);
1232 if(decemberSol
.tm_isdst
> 0) {
1233 daylightType
= U_DAYLIGHT_DECEMBER
;
1234 } else if(juneSol
.tm_isdst
> 0) {
1235 daylightType
= U_DAYLIGHT_JUNE
;
1237 daylightType
= U_DAYLIGHT_NONE
;
1239 tzid
= remapShortTimeZone(U_TZNAME
[0], U_TZNAME
[1], daylightType
, uprv_timezone());
1251 /* Get and set the ICU data directory --------------------------------------- */
1253 static icu::UInitOnce gDataDirInitOnce
= U_INITONCE_INITIALIZER
;
1254 static char *gDataDirectory
= NULL
;
1256 UInitOnce gTimeZoneFilesInitOnce
= U_INITONCE_INITIALIZER
;
1257 static CharString
*gTimeZoneFilesDirectory
= NULL
;
1259 #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1260 static const char *gCorrectedPOSIXLocale
= NULL
; /* Sometimes heap allocated */
1261 static bool gCorrectedPOSIXLocaleHeapAllocated
= false;
1264 static UBool U_CALLCONV
putil_cleanup(void)
1266 if (gDataDirectory
&& *gDataDirectory
) {
1267 uprv_free(gDataDirectory
);
1269 gDataDirectory
= NULL
;
1270 gDataDirInitOnce
.reset();
1272 delete gTimeZoneFilesDirectory
;
1273 gTimeZoneFilesDirectory
= NULL
;
1274 gTimeZoneFilesInitOnce
.reset();
1276 #ifdef SEARCH_TZFILE
1277 delete gSearchTZFileResult
;
1278 gSearchTZFileResult
= NULL
;
1281 #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1282 if (gCorrectedPOSIXLocale
&& gCorrectedPOSIXLocaleHeapAllocated
) {
1283 uprv_free(const_cast<char *>(gCorrectedPOSIXLocale
));
1284 gCorrectedPOSIXLocale
= NULL
;
1285 gCorrectedPOSIXLocaleHeapAllocated
= false;
1292 * Set the data directory.
1293 * Make a copy of the passed string, and set the global data dir to point to it.
1295 U_CAPI
void U_EXPORT2
1296 u_setDataDirectory(const char *directory
) {
1300 if(directory
==NULL
|| *directory
==0) {
1301 /* A small optimization to prevent the malloc and copy when the
1302 shared library is used, and this is a way to make sure that NULL
1305 newDataDir
= (char *)"";
1308 length
=(int32_t)uprv_strlen(directory
);
1309 newDataDir
= (char *)uprv_malloc(length
+ 2);
1310 /* Exit out if newDataDir could not be created. */
1311 if (newDataDir
== NULL
) {
1314 uprv_strcpy(newDataDir
, directory
);
1316 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1319 while((p
= uprv_strchr(newDataDir
, U_FILE_ALT_SEP_CHAR
)) != NULL
) {
1320 *p
= U_FILE_SEP_CHAR
;
1326 if (gDataDirectory
&& *gDataDirectory
) {
1327 uprv_free(gDataDirectory
);
1329 gDataDirectory
= newDataDir
;
1330 ucln_common_registerCleanup(UCLN_COMMON_PUTIL
, putil_cleanup
);
1333 U_CAPI UBool U_EXPORT2
1334 uprv_pathIsAbsolute(const char *path
)
1336 if(!path
|| !*path
) {
1340 if(*path
== U_FILE_SEP_CHAR
) {
1344 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1345 if(*path
== U_FILE_ALT_SEP_CHAR
) {
1350 #if U_PLATFORM_USES_ONLY_WIN32_API
1351 if( (((path
[0] >= 'A') && (path
[0] <= 'Z')) ||
1352 ((path
[0] >= 'a') && (path
[0] <= 'z'))) &&
1361 /* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1362 until some client wrapper makefiles are updated */
1363 #if U_PLATFORM_IS_DARWIN_BASED && TARGET_IPHONE_SIMULATOR
1364 # if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1365 # define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1369 static void U_CALLCONV
dataDirectoryInitFn() {
1370 /* If we already have the directory, then return immediately. Will happen if user called
1371 * u_setDataDirectory().
1373 if (gDataDirectory
) {
1377 const char *path
= NULL
;
1378 #if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1379 char datadir_path_buffer
[PATH_MAX
];
1383 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1384 override ICU's data with the ICU_DATA environment variable. This prevents
1385 problems where multiple custom copies of ICU's specific version of data
1386 are installed on a system. Either the application must define the data
1387 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1388 ICU, set the data with udata_setCommonData or trust that all of the
1389 required data is contained in ICU's data library that contains
1390 the entry point defined by U_ICUDATA_ENTRY_POINT.
1392 There may also be some platforms where environment variables
1395 # if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1396 /* First try to get the environment variable */
1397 # if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP does not support getenv
1398 path
=getenv("ICU_DATA");
1402 /* ICU_DATA_DIR may be set as a compile option.
1403 * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1404 * and is used only when data is built in archive mode eliminating the need
1405 * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1406 * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1407 * set their own path.
1409 #if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
1410 if(path
==NULL
|| *path
==0) {
1411 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1412 const char *prefix
= getenv(ICU_DATA_DIR_PREFIX_ENV_VAR
);
1414 # ifdef ICU_DATA_DIR
1417 path
=U_ICU_DATA_DEFAULT_DIR
;
1419 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1420 if (prefix
!= NULL
) {
1421 snprintf(datadir_path_buffer
, PATH_MAX
, "%s%s", prefix
, path
);
1422 path
=datadir_path_buffer
;
1428 #if defined(ICU_DATA_DIR_WINDOWS) && U_PLATFORM_HAS_WINUWP_API != 0
1429 // Use data from the %windir%\globalization\icu directory
1430 // This is only available if ICU is built as a system component
1431 char datadir_path_buffer
[MAX_PATH
];
1432 UINT length
= GetWindowsDirectoryA(datadir_path_buffer
, UPRV_LENGTHOF(datadir_path_buffer
));
1433 if (length
> 0 && length
< (UPRV_LENGTHOF(datadir_path_buffer
) - sizeof(ICU_DATA_DIR_WINDOWS
) - 1))
1435 if (datadir_path_buffer
[length
- 1] != '\\')
1437 datadir_path_buffer
[length
++] = '\\';
1438 datadir_path_buffer
[length
] = '\0';
1441 if ((length
+ 1 + sizeof(ICU_DATA_DIR_WINDOWS
)) < UPRV_LENGTHOF(datadir_path_buffer
))
1443 uprv_strcat(datadir_path_buffer
, ICU_DATA_DIR_WINDOWS
);
1444 path
= datadir_path_buffer
;
1450 /* It looks really bad, set it to something. */
1451 #if U_PLATFORM_HAS_WIN32_API
1452 // Windows UWP will require icudtl.dat file in same directory as icuuc.dll
1459 u_setDataDirectory(path
);
1463 U_CAPI
const char * U_EXPORT2
1464 u_getDataDirectory(void) {
1465 umtx_initOnce(gDataDirInitOnce
, &dataDirectoryInitFn
);
1466 return gDataDirectory
;
1469 static void setTimeZoneFilesDir(const char *path
, UErrorCode
&status
) {
1470 if (U_FAILURE(status
)) {
1473 gTimeZoneFilesDirectory
->clear();
1474 gTimeZoneFilesDirectory
->append(path
, status
);
1475 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1476 char *p
= gTimeZoneFilesDirectory
->data();
1477 while ((p
= uprv_strchr(p
, U_FILE_ALT_SEP_CHAR
)) != NULL
) {
1478 *p
= U_FILE_SEP_CHAR
;
1483 #if U_PLATFORM_IMPLEMENTS_POSIX
1484 #include <sys/stat.h>
1485 #if defined(U_TIMEZONE_FILES_DIR)
1486 const char tzdirbuf
[] = U_TIMEZONE_FILES_DIR
;
1487 enum { kTzfilenamebufLen
= UPRV_LENGTHOF(tzdirbuf
) + 24 }; // extra room for "/icutz44l.dat" or "/zoneinfo64.res"
1491 #define TO_STRING(x) TO_STRING_2(x)
1492 #define TO_STRING_2(x) #x
1494 static void U_CALLCONV
TimeZoneDataDirInitFn(UErrorCode
&status
) {
1495 U_ASSERT(gTimeZoneFilesDirectory
== NULL
);
1496 ucln_common_registerCleanup(UCLN_COMMON_PUTIL
, putil_cleanup
);
1497 gTimeZoneFilesDirectory
= new CharString();
1498 if (gTimeZoneFilesDirectory
== NULL
) {
1499 status
= U_MEMORY_ALLOCATION_ERROR
;
1502 UBool usingUTzFilesDir
= FALSE
;
1503 #if U_PLATFORM_HAS_WINUWP_API == 0
1504 const char *dir
= getenv("ICU_TIMEZONE_FILES_DIR");
1506 // TODO: UWP does not support alternate timezone data directories at this time
1507 const char *dir
= "";
1508 #endif // U_PLATFORM_HAS_WINUWP_API
1509 #if defined(U_TIMEZONE_FILES_DIR)
1511 // dir = TO_STRING(U_TIMEZONE_FILES_DIR);
1512 // Not sure why the above was done for this path only;
1513 // it preserves unwanted quotes.
1515 usingUTzFilesDir
= TRUE
;
1518 #if U_PLATFORM_IMPLEMENTS_POSIX
1521 if (stat(dir
, &buf
) != 0) {
1524 #if defined(U_TIMEZONE_FILES_DIR)
1525 else if (usingUTzFilesDir
) {
1526 char tzfilenamebuf
[kTzfilenamebufLen
];
1527 uprv_strcpy(tzfilenamebuf
, tzdirbuf
);
1528 uprv_strcat(tzfilenamebuf
, U_FILE_SEP_STRING
);
1529 #if defined(U_TIMEZONE_PACKAGE)
1530 uprv_strcat(tzfilenamebuf
, U_TIMEZONE_PACKAGE
);
1531 uprv_strcat(tzfilenamebuf
, ".dat");
1533 uprv_strcat(tzfilenamebuf
, "zoneinfo64.res");
1535 if (stat(tzfilenamebuf
, &buf
) != 0) {
1539 #endif /* defined(U_TIMEZONE_FILES_DIR) */
1541 #endif /* U_PLATFORM_IMPLEMENTS_POSIX */
1545 setTimeZoneFilesDir(dir
, status
);
1549 U_CAPI
const char * U_EXPORT2
1550 u_getTimeZoneFilesDirectory(UErrorCode
*status
) {
1551 umtx_initOnce(gTimeZoneFilesInitOnce
, &TimeZoneDataDirInitFn
, *status
);
1552 return U_SUCCESS(*status
) ? gTimeZoneFilesDirectory
->data() : "";
1555 U_CAPI
void U_EXPORT2
1556 u_setTimeZoneFilesDirectory(const char *path
, UErrorCode
*status
) {
1557 umtx_initOnce(gTimeZoneFilesInitOnce
, &TimeZoneDataDirInitFn
, *status
);
1558 setTimeZoneFilesDir(path
, *status
);
1560 // Note: this function does some extra churn, first setting based on the
1561 // environment, then immediately replacing with the value passed in.
1562 // The logic is simpler that way, and performance shouldn't be an issue.
1567 /* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1568 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1569 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1571 static const char *uprv_getPOSIXIDForCategory(int category
)
1573 const char* posixID
= NULL
;
1574 if (category
== LC_MESSAGES
|| category
== LC_CTYPE
) {
1576 * On Solaris two different calls to setlocale can result in
1577 * different values. Only get this value once.
1579 * We must check this first because an application can set this.
1581 * LC_ALL can't be used because it's platform dependent. The LANG
1582 * environment variable seems to affect LC_CTYPE variable by default.
1583 * Here is what setlocale(LC_ALL, NULL) can return.
1584 * HPUX can return 'C C C C C C C'
1585 * Solaris can return /en_US/C/C/C/C/C on the second try.
1586 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1588 * The default codepage detection also needs to use LC_CTYPE.
1590 * Do not call setlocale(LC_*, "")! Using an empty string instead
1591 * of NULL, will modify the libc behavior.
1593 posixID
= setlocale(category
, NULL
);
1595 || (uprv_strcmp("C", posixID
) == 0)
1596 || (uprv_strcmp("POSIX", posixID
) == 0))
1598 /* Maybe we got some garbage. Try something more reasonable */
1599 posixID
= getenv("LC_ALL");
1600 /* Solaris speaks POSIX - See IEEE Std 1003.1-2008
1601 * This is needed to properly handle empty env. variables
1603 #if U_PLATFORM == U_PF_SOLARIS
1604 if ((posixID
== 0) || (posixID
[0] == '\0')) {
1605 posixID
= getenv(category
== LC_MESSAGES
? "LC_MESSAGES" : "LC_CTYPE");
1606 if ((posixID
== 0) || (posixID
[0] == '\0')) {
1609 posixID
= getenv(category
== LC_MESSAGES
? "LC_MESSAGES" : "LC_CTYPE");
1612 posixID
= getenv("LANG");
1618 || (uprv_strcmp("C", posixID
) == 0)
1619 || (uprv_strcmp("POSIX", posixID
) == 0))
1621 /* Nothing worked. Give it a nice POSIX default value. */
1622 posixID
= "en_US_POSIX";
1627 /* Return just the POSIX id for the default locale, whatever happens to be in
1628 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1630 static const char *uprv_getPOSIXIDForDefaultLocale(void)
1632 static const char* posixID
= NULL
;
1634 posixID
= uprv_getPOSIXIDForCategory(LC_MESSAGES
);
1639 #if !U_CHARSET_IS_UTF8
1640 /* Return just the POSIX id for the default codepage, whatever happens to be in
1641 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1643 static const char *uprv_getPOSIXIDForDefaultCodepage(void)
1645 static const char* posixID
= NULL
;
1647 posixID
= uprv_getPOSIXIDForCategory(LC_CTYPE
);
1654 /* NOTE: The caller should handle thread safety */
1655 U_CAPI
const char* U_EXPORT2
1656 uprv_getDefaultLocaleID()
1660 Note that: (a '!' means the ID is improper somehow)
1661 LC_ALL ----> default_loc codepage
1662 --------------------------------------------------------
1667 ab_CD.EF@GH ab_CD_GH EF
1669 Some 'improper' ways to do the same as above:
1670 ! ab_CD@GH.EF ab_CD_GH EF
1671 ! ab_CD.EF@GH.IJ ab_CD_GH EF
1672 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
1677 The variant cannot have dots in it.
1678 The 'rightmost' variant (@xxx) wins.
1679 The leftmost codepage (.xxx) wins.
1681 char *correctedPOSIXLocale
= 0;
1682 const char* posixID
= uprv_getPOSIXIDForDefaultLocale();
1687 /* Format: (no spaces)
1688 ll [ _CC ] [ . MM ] [ @ VV]
1690 l = lang, C = ctry, M = charmap, V = variant
1693 if (gCorrectedPOSIXLocale
!= NULL
) {
1694 return gCorrectedPOSIXLocale
;
1697 if ((p
= uprv_strchr(posixID
, '.')) != NULL
) {
1698 /* assume new locale can't be larger than old one? */
1699 correctedPOSIXLocale
= static_cast<char *>(uprv_malloc(uprv_strlen(posixID
)+1));
1700 /* Exit on memory allocation error. */
1701 if (correctedPOSIXLocale
== NULL
) {
1704 uprv_strncpy(correctedPOSIXLocale
, posixID
, p
-posixID
);
1705 correctedPOSIXLocale
[p
-posixID
] = 0;
1707 /* do not copy after the @ */
1708 if ((p
= uprv_strchr(correctedPOSIXLocale
, '@')) != NULL
) {
1709 correctedPOSIXLocale
[p
-correctedPOSIXLocale
] = 0;
1713 /* Note that we scan the *uncorrected* ID. */
1714 if ((p
= uprv_strrchr(posixID
, '@')) != NULL
) {
1715 if (correctedPOSIXLocale
== NULL
) {
1716 /* new locale can be 1 char longer than old one if @ -> __ */
1717 correctedPOSIXLocale
= static_cast<char *>(uprv_malloc(uprv_strlen(posixID
)+2));
1718 /* Exit on memory allocation error. */
1719 if (correctedPOSIXLocale
== NULL
) {
1722 uprv_strncpy(correctedPOSIXLocale
, posixID
, p
-posixID
);
1723 correctedPOSIXLocale
[p
-posixID
] = 0;
1727 /* Take care of any special cases here.. */
1728 if (!uprv_strcmp(p
, "nynorsk")) {
1730 /* Don't worry about no__NY. In practice, it won't appear. */
1733 if (uprv_strchr(correctedPOSIXLocale
,'_') == NULL
) {
1734 uprv_strcat(correctedPOSIXLocale
, "__"); /* aa@b -> aa__b (note this can make the new locale 1 char longer) */
1737 uprv_strcat(correctedPOSIXLocale
, "_"); /* aa_CC@b -> aa_CC_b */
1740 if ((q
= uprv_strchr(p
, '.')) != NULL
) {
1741 /* How big will the resulting string be? */
1742 len
= (int32_t)(uprv_strlen(correctedPOSIXLocale
) + (q
-p
));
1743 uprv_strncat(correctedPOSIXLocale
, p
, q
-p
);
1744 correctedPOSIXLocale
[len
] = 0;
1747 /* Anything following the @ sign */
1748 uprv_strcat(correctedPOSIXLocale
, p
);
1751 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1752 * How about 'russian' -> 'ru'?
1753 * Many of the other locales using ISO codes will be handled by the
1754 * canonicalization functions in uloc_getDefault.
1758 /* Was a correction made? */
1759 if (correctedPOSIXLocale
!= NULL
) {
1760 posixID
= correctedPOSIXLocale
;
1763 /* copy it, just in case the original pointer goes away. See j2395 */
1764 correctedPOSIXLocale
= (char *)uprv_malloc(uprv_strlen(posixID
) + 1);
1765 /* Exit on memory allocation error. */
1766 if (correctedPOSIXLocale
== NULL
) {
1769 posixID
= uprv_strcpy(correctedPOSIXLocale
, posixID
);
1772 if (gCorrectedPOSIXLocale
== NULL
) {
1773 gCorrectedPOSIXLocale
= correctedPOSIXLocale
;
1774 gCorrectedPOSIXLocaleHeapAllocated
= true;
1775 ucln_common_registerCleanup(UCLN_COMMON_PUTIL
, putil_cleanup
);
1776 correctedPOSIXLocale
= NULL
;
1779 if (correctedPOSIXLocale
!= NULL
) { /* Was already set - clean up. */
1780 uprv_free(correctedPOSIXLocale
);
1785 #elif U_PLATFORM_USES_ONLY_WIN32_API
1786 #define POSIX_LOCALE_CAPACITY 64
1787 UErrorCode status
= U_ZERO_ERROR
;
1788 char *correctedPOSIXLocale
= 0;
1790 // If we have already figured this out just use the cached value
1791 if (gCorrectedPOSIXLocale
!= NULL
) {
1792 return gCorrectedPOSIXLocale
;
1795 // No cached value, need to determine the current value
1796 static WCHAR windowsLocale
[LOCALE_NAME_MAX_LENGTH
];
1797 #if U_PLATFORM_HAS_WINUWP_API == 0
1798 // If not a Universal Windows App, we'll need user default language.
1799 // Vista and above should use Locale Names instead of LCIDs
1800 int length
= GetUserDefaultLocaleName(windowsLocale
, UPRV_LENGTHOF(windowsLocale
));
1802 // In a UWP app, we want the top language that the application and user agreed upon
1803 ComPtr
<ABI::Windows::Foundation::Collections::IVectorView
<HSTRING
>> languageList
;
1805 ComPtr
<ABI::Windows::Globalization::IApplicationLanguagesStatics
> applicationLanguagesStatics
;
1806 HRESULT hr
= GetActivationFactory(
1807 HStringReference(RuntimeClass_Windows_Globalization_ApplicationLanguages
).Get(),
1808 &applicationLanguagesStatics
);
1811 hr
= applicationLanguagesStatics
->get_Languages(&languageList
);
1816 // If there is no application context, then use the top language from the user language profile
1817 ComPtr
<ABI::Windows::System::UserProfile::IGlobalizationPreferencesStatics
> globalizationPreferencesStatics
;
1818 hr
= GetActivationFactory(
1819 HStringReference(RuntimeClass_Windows_System_UserProfile_GlobalizationPreferences
).Get(),
1820 &globalizationPreferencesStatics
);
1823 hr
= globalizationPreferencesStatics
->get_Languages(&languageList
);
1827 // We have a list of languages, ICU knows one, so use the top one for our locale
1828 HString topLanguage
;
1831 hr
= languageList
->GetAt(0, topLanguage
.GetAddressOf());
1836 // Unexpected, use en-US by default
1837 if (gCorrectedPOSIXLocale
== NULL
) {
1838 gCorrectedPOSIXLocale
= "en_US";
1841 return gCorrectedPOSIXLocale
;
1844 // ResolveLocaleName will get a likely subtags form consistent with Windows behavior.
1845 int length
= ResolveLocaleName(topLanguage
.GetRawBuffer(NULL
), windowsLocale
, UPRV_LENGTHOF(windowsLocale
));
1847 // Now we should have a Windows locale name that needs converted to the POSIX style,
1850 // First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.)
1851 char modifiedWindowsLocale
[LOCALE_NAME_MAX_LENGTH
];
1854 for (i
= 0; i
< UPRV_LENGTHOF(modifiedWindowsLocale
); i
++)
1856 if (windowsLocale
[i
] == '_')
1858 modifiedWindowsLocale
[i
] = '-';
1862 modifiedWindowsLocale
[i
] = static_cast<char>(windowsLocale
[i
]);
1865 if (modifiedWindowsLocale
[i
] == '\0')
1871 if (i
>= UPRV_LENGTHOF(modifiedWindowsLocale
))
1873 // Ran out of room, can't really happen, maybe we'll be lucky about a matching
1874 // locale when tags are dropped
1875 modifiedWindowsLocale
[UPRV_LENGTHOF(modifiedWindowsLocale
) - 1] = '\0';
1878 // Now normalize the resulting name
1879 correctedPOSIXLocale
= static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY
+ 1));
1880 /* TODO: Should we just exit on memory allocation failure? */
1881 if (correctedPOSIXLocale
)
1883 int32_t posixLen
= uloc_canonicalize(modifiedWindowsLocale
, correctedPOSIXLocale
, POSIX_LOCALE_CAPACITY
, &status
);
1884 if (U_SUCCESS(status
))
1886 *(correctedPOSIXLocale
+ posixLen
) = 0;
1887 gCorrectedPOSIXLocale
= correctedPOSIXLocale
;
1888 gCorrectedPOSIXLocaleHeapAllocated
= true;
1889 ucln_common_registerCleanup(UCLN_COMMON_PUTIL
, putil_cleanup
);
1893 uprv_free(correctedPOSIXLocale
);
1898 // If unable to find a locale we can agree upon, use en-US by default
1899 if (gCorrectedPOSIXLocale
== NULL
) {
1900 gCorrectedPOSIXLocale
= "en_US";
1902 return gCorrectedPOSIXLocale
;
1904 #elif U_PLATFORM == U_PF_OS400
1905 /* locales are process scoped and are by definition thread safe */
1906 static char correctedLocale
[64];
1907 const char *localeID
= getenv("LC_ALL");
1910 if (localeID
== NULL
)
1911 localeID
= getenv("LANG");
1912 if (localeID
== NULL
)
1913 localeID
= setlocale(LC_ALL
, NULL
);
1914 /* Make sure we have something... */
1915 if (localeID
== NULL
)
1916 return "en_US_POSIX";
1918 /* Extract the locale name from the path. */
1919 if((p
= uprv_strrchr(localeID
, '/')) != NULL
)
1921 /* Increment p to start of locale name. */
1926 /* Copy to work location. */
1927 uprv_strcpy(correctedLocale
, localeID
);
1929 /* Strip off the '.locale' extension. */
1930 if((p
= uprv_strchr(correctedLocale
, '.')) != NULL
) {
1934 /* Upper case the locale name. */
1935 T_CString_toUpperCase(correctedLocale
);
1937 /* See if we are using the POSIX locale. Any of the
1938 * following are equivalent and use the same QLGPGCMA
1940 * QLGPGCMA2 means UCS2
1941 * QLGPGCMA_4 means UTF-32
1942 * QLGPGCMA_8 means UTF-8
1944 if ((uprv_strcmp("C", correctedLocale
) == 0) ||
1945 (uprv_strcmp("POSIX", correctedLocale
) == 0) ||
1946 (uprv_strncmp("QLGPGCMA", correctedLocale
, 8) == 0))
1948 uprv_strcpy(correctedLocale
, "en_US_POSIX");
1954 /* Lower case the lang portion. */
1955 for(p
= correctedLocale
; *p
!= 0 && *p
!= '_'; p
++)
1957 *p
= uprv_tolower(*p
);
1960 /* Adjust for Euro. After '_E' add 'URO'. */
1961 LocaleLen
= uprv_strlen(correctedLocale
);
1962 if (correctedLocale
[LocaleLen
- 2] == '_' &&
1963 correctedLocale
[LocaleLen
- 1] == 'E')
1965 uprv_strcat(correctedLocale
, "URO");
1968 /* If using Lotus-based locale then convert to
1969 * equivalent non Lotus.
1971 else if (correctedLocale
[LocaleLen
- 2] == '_' &&
1972 correctedLocale
[LocaleLen
- 1] == 'L')
1974 correctedLocale
[LocaleLen
- 2] = 0;
1977 /* There are separate simplified and traditional
1978 * locales called zh_HK_S and zh_HK_T.
1980 else if (uprv_strncmp(correctedLocale
, "zh_HK", 5) == 0)
1982 uprv_strcpy(correctedLocale
, "zh_HK");
1985 /* A special zh_CN_GBK locale...
1987 else if (uprv_strcmp(correctedLocale
, "zh_CN_GBK") == 0)
1989 uprv_strcpy(correctedLocale
, "zh_CN");
1994 return correctedLocale
;
1999 #if !U_CHARSET_IS_UTF8
2002 Due to various platform differences, one platform may specify a charset,
2003 when they really mean a different charset. Remap the names so that they are
2004 compatible with ICU. Only conflicting/ambiguous aliases should be resolved
2005 here. Before adding anything to this function, please consider adding unique
2006 names to the ICU alias table in the data directory.
2009 remapPlatformDependentCodepage(const char *locale
, const char *name
) {
2010 if (locale
!= NULL
&& *locale
== 0) {
2011 /* Make sure that an empty locale is handled the same way. */
2017 #if U_PLATFORM == U_PF_AIX
2018 if (uprv_strcmp(name
, "IBM-943") == 0) {
2019 /* Use the ASCII compatible ibm-943 */
2022 else if (uprv_strcmp(name
, "IBM-1252") == 0) {
2023 /* Use the windows-1252 that contains the Euro */
2026 #elif U_PLATFORM == U_PF_SOLARIS
2027 if (locale
!= NULL
&& uprv_strcmp(name
, "EUC") == 0) {
2028 /* Solaris underspecifies the "EUC" name. */
2029 if (uprv_strcmp(locale
, "zh_CN") == 0) {
2032 else if (uprv_strcmp(locale
, "zh_TW") == 0) {
2035 else if (uprv_strcmp(locale
, "ko_KR") == 0) {
2039 else if (uprv_strcmp(name
, "eucJP") == 0) {
2041 ibm-954 is the best match.
2042 ibm-33722 is the default for eucJP (similar to Windows).
2046 else if (uprv_strcmp(name
, "646") == 0) {
2048 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
2049 * ISO-8859-1 instead of US-ASCII(646).
2051 name
= "ISO-8859-1";
2053 #elif U_PLATFORM_IS_DARWIN_BASED
2054 if (locale
== NULL
&& *name
== 0) {
2056 No locale was specified, and an empty name was passed in.
2057 This usually indicates that nl_langinfo didn't return valid information.
2058 Mac OS X uses UTF-8 by default (especially the locale data and console).
2062 else if (uprv_strcmp(name
, "CP949") == 0) {
2063 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
2066 else if (locale
!= NULL
&& uprv_strcmp(locale
, "en_US_POSIX") != 0 && uprv_strcmp(name
, "US-ASCII") == 0) {
2068 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
2072 #elif U_PLATFORM == U_PF_BSD
2073 if (uprv_strcmp(name
, "CP949") == 0) {
2074 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
2077 #elif U_PLATFORM == U_PF_HPUX
2078 if (locale
!= NULL
&& uprv_strcmp(locale
, "zh_HK") == 0 && uprv_strcmp(name
, "big5") == 0) {
2079 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
2080 /* zh_TW.big5 is not the same charset as zh_HK.big5! */
2083 else if (uprv_strcmp(name
, "eucJP") == 0) {
2085 ibm-1350 is the best match, but unavailable.
2086 ibm-954 is mostly a superset of ibm-1350.
2087 ibm-33722 is the default for eucJP (similar to Windows).
2091 #elif U_PLATFORM == U_PF_LINUX
2092 if (locale
!= NULL
&& uprv_strcmp(name
, "euc") == 0) {
2093 /* Linux underspecifies the "EUC" name. */
2094 if (uprv_strcmp(locale
, "korean") == 0) {
2097 else if (uprv_strcmp(locale
, "japanese") == 0) {
2098 /* See comment below about eucJP */
2102 else if (uprv_strcmp(name
, "eucjp") == 0) {
2104 ibm-1350 is the best match, but unavailable.
2105 ibm-954 is mostly a superset of ibm-1350.
2106 ibm-33722 is the default for eucJP (similar to Windows).
2110 else if (locale
!= NULL
&& uprv_strcmp(locale
, "en_US_POSIX") != 0 &&
2111 (uprv_strcmp(name
, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name
, "US-ASCII") == 0)) {
2113 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
2118 * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
2119 * it by falling back to 'US-ASCII' when NULL is returned from this
2120 * function. So, we don't have to worry about it here.
2123 /* return NULL when "" is passed in */
2131 getCodepageFromPOSIXID(const char *localeName
, char * buffer
, int32_t buffCapacity
)
2133 char localeBuf
[100];
2134 const char *name
= NULL
;
2135 char *variant
= NULL
;
2137 if (localeName
!= NULL
&& (name
= (uprv_strchr(localeName
, '.'))) != NULL
) {
2138 size_t localeCapacity
= uprv_min(sizeof(localeBuf
), (name
-localeName
)+1);
2139 uprv_strncpy(localeBuf
, localeName
, localeCapacity
);
2140 localeBuf
[localeCapacity
-1] = 0; /* ensure NULL termination */
2141 name
= uprv_strncpy(buffer
, name
+1, buffCapacity
);
2142 buffer
[buffCapacity
-1] = 0; /* ensure NULL termination */
2143 if ((variant
= const_cast<char *>(uprv_strchr(name
, '@'))) != NULL
) {
2146 name
= remapPlatformDependentCodepage(localeBuf
, name
);
2153 int_getDefaultCodepage()
2155 #if U_PLATFORM == U_PF_OS400
2156 uint32_t ccsid
= 37; /* Default to ibm-37 */
2157 static char codepage
[64];
2158 Qwc_JOBI0400_t jobinfo
;
2159 Qus_EC_t error
= { sizeof(Qus_EC_t
) }; /* SPI error code */
2161 EPT_CALL(QUSRJOBI
)(&jobinfo
, sizeof(jobinfo
), "JOBI0400",
2164 if (error
.Bytes_Available
== 0) {
2165 if (jobinfo
.Coded_Char_Set_ID
!= 0xFFFF) {
2166 ccsid
= (uint32_t)jobinfo
.Coded_Char_Set_ID
;
2168 else if (jobinfo
.Default_Coded_Char_Set_Id
!= 0xFFFF) {
2169 ccsid
= (uint32_t)jobinfo
.Default_Coded_Char_Set_Id
;
2171 /* else use the default */
2173 sprintf(codepage
,"ibm-%d", ccsid
);
2176 #elif U_PLATFORM == U_PF_OS390
2177 static char codepage
[64];
2179 strncpy(codepage
, nl_langinfo(CODESET
),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING
));
2180 strcat(codepage
,UCNV_SWAP_LFNL_OPTION_STRING
);
2181 codepage
[63] = 0; /* NULL terminate */
2185 #elif U_PLATFORM_USES_ONLY_WIN32_API
2186 static char codepage
[64];
2187 DWORD codepageNumber
= 0;
2189 #if U_PLATFORM_HAS_WINUWP_API > 0
2190 // UWP doesn't have a direct API to get the default ACP as Microsoft would rather
2191 // have folks use Unicode than a "system" code page, however this is the same
2192 // codepage as the system default locale codepage. (FWIW, the system locale is
2193 // ONLY used for codepage, it should never be used for anything else)
2194 GetLocaleInfoEx(LOCALE_NAME_SYSTEM_DEFAULT
, LOCALE_IDEFAULTANSICODEPAGE
| LOCALE_RETURN_NUMBER
,
2195 (LPWSTR
)&codepageNumber
, sizeof(codepageNumber
) / sizeof(WCHAR
));
2197 // Win32 apps can call GetACP
2198 codepageNumber
= GetACP();
2200 // Special case for UTF-8
2201 if (codepageNumber
== 65001)
2205 // Windows codepages can look like windows-1252, so format the found number
2206 // the numbers are eclectic, however all valid system code pages, besides UTF-8
2207 // are between 3 and 19999
2208 if (codepageNumber
> 0 && codepageNumber
< 20000)
2210 sprintf(codepage
, "windows-%ld", codepageNumber
);
2213 // If the codepage number call failed then return UTF-8
2216 #elif U_POSIX_LOCALE
2217 static char codesetName
[100];
2218 const char *localeName
= NULL
;
2219 const char *name
= NULL
;
2221 localeName
= uprv_getPOSIXIDForDefaultCodepage();
2222 uprv_memset(codesetName
, 0, sizeof(codesetName
));
2223 /* On Solaris nl_langinfo returns C locale values unless setlocale
2224 * was called earlier.
2226 #if (U_HAVE_NL_LANGINFO_CODESET && U_PLATFORM != U_PF_SOLARIS)
2227 /* When available, check nl_langinfo first because it usually gives more
2228 useful names. It depends on LC_CTYPE.
2229 nl_langinfo may use the same buffer as setlocale. */
2231 const char *codeset
= nl_langinfo(U_NL_LANGINFO_CODESET
);
2232 #if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED
2234 * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
2237 if (uprv_strcmp(localeName
, "en_US_POSIX") != 0) {
2238 codeset
= remapPlatformDependentCodepage(localeName
, codeset
);
2242 codeset
= remapPlatformDependentCodepage(NULL
, codeset
);
2245 if (codeset
!= NULL
) {
2246 uprv_strncpy(codesetName
, codeset
, sizeof(codesetName
));
2247 codesetName
[sizeof(codesetName
)-1] = 0;
2253 /* Use setlocale in a nice way, and then check some environment variables.
2254 Maybe the application used setlocale already.
2256 uprv_memset(codesetName
, 0, sizeof(codesetName
));
2257 name
= getCodepageFromPOSIXID(localeName
, codesetName
, sizeof(codesetName
));
2259 /* if we can find the codeset name from setlocale, return that. */
2263 if (*codesetName
== 0)
2265 /* Everything failed. Return US ASCII (ISO 646). */
2266 (void)uprv_strcpy(codesetName
, "US-ASCII");
2275 U_CAPI
const char* U_EXPORT2
2276 uprv_getDefaultCodepage()
2278 static char const *name
= NULL
;
2281 name
= int_getDefaultCodepage();
2286 #endif /* !U_CHARSET_IS_UTF8 */
2289 /* end of platform-specific implementation -------------- */
2291 /* version handling --------------------------------------------------------- */
2293 U_CAPI
void U_EXPORT2
2294 u_versionFromString(UVersionInfo versionArray
, const char *versionString
) {
2298 if(versionArray
==NULL
) {
2302 if(versionString
!=NULL
) {
2304 versionArray
[part
]=(uint8_t)uprv_strtoul(versionString
, &end
, 10);
2305 if(end
==versionString
|| ++part
==U_MAX_VERSION_LENGTH
|| *end
!=U_VERSION_DELIMITER
) {
2308 versionString
=end
+1;
2312 while(part
<U_MAX_VERSION_LENGTH
) {
2313 versionArray
[part
++]=0;
2317 U_CAPI
void U_EXPORT2
2318 u_versionFromUString(UVersionInfo versionArray
, const UChar
*versionString
) {
2319 if(versionArray
!=NULL
&& versionString
!=NULL
) {
2320 char versionChars
[U_MAX_VERSION_STRING_LENGTH
+1];
2321 int32_t len
= u_strlen(versionString
);
2322 if(len
>U_MAX_VERSION_STRING_LENGTH
) {
2323 len
= U_MAX_VERSION_STRING_LENGTH
;
2325 u_UCharsToChars(versionString
, versionChars
, len
);
2326 versionChars
[len
]=0;
2327 u_versionFromString(versionArray
, versionChars
);
2331 U_CAPI
void U_EXPORT2
2332 u_versionToString(const UVersionInfo versionArray
, char *versionString
) {
2333 uint16_t count
, part
;
2336 if(versionString
==NULL
) {
2340 if(versionArray
==NULL
) {
2345 /* count how many fields need to be written */
2346 for(count
=4; count
>0 && versionArray
[count
-1]==0; --count
) {
2353 /* write the first part */
2354 /* write the decimal field value */
2355 field
=versionArray
[0];
2357 *versionString
++=(char)('0'+field
/100);
2361 *versionString
++=(char)('0'+field
/10);
2364 *versionString
++=(char)('0'+field
);
2366 /* write the following parts */
2367 for(part
=1; part
<count
; ++part
) {
2368 /* write a dot first */
2369 *versionString
++=U_VERSION_DELIMITER
;
2371 /* write the decimal field value */
2372 field
=versionArray
[part
];
2374 *versionString
++=(char)('0'+field
/100);
2378 *versionString
++=(char)('0'+field
/10);
2381 *versionString
++=(char)('0'+field
);
2388 U_CAPI
void U_EXPORT2
2389 u_getVersion(UVersionInfo versionArray
) {
2390 (void)copyright
; // Suppress unused variable warning from clang.
2391 u_versionFromString(versionArray
, U_ICU_VERSION
);
2395 * icucfg.h dependent code
2398 #if U_ENABLE_DYLOAD && HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
2407 #endif /* HAVE_DLFCN_H */
2409 U_INTERNAL
void * U_EXPORT2
2410 uprv_dl_open(const char *libName
, UErrorCode
*status
) {
2412 if(U_FAILURE(*status
)) return ret
;
2413 ret
= dlopen(libName
, RTLD_NOW
|RTLD_GLOBAL
);
2415 #ifdef U_TRACE_DYLOAD
2416 printf("dlerror on dlopen(%s): %s\n", libName
, dlerror());
2418 *status
= U_MISSING_RESOURCE_ERROR
;
2423 U_INTERNAL
void U_EXPORT2
2424 uprv_dl_close(void *lib
, UErrorCode
*status
) {
2425 if(U_FAILURE(*status
)) return;
2429 U_INTERNAL UVoidFunction
* U_EXPORT2
2430 uprv_dlsym_func(void *lib
, const char* sym
, UErrorCode
*status
) {
2436 if(U_FAILURE(*status
)) return uret
.fp
;
2437 uret
.vp
= dlsym(lib
, sym
);
2438 if(uret
.vp
== NULL
) {
2439 #ifdef U_TRACE_DYLOAD
2440 printf("dlerror on dlsym(%p,%s): %s\n", lib
,sym
, dlerror());
2442 *status
= U_MISSING_RESOURCE_ERROR
;
2447 #elif U_ENABLE_DYLOAD && U_PLATFORM_USES_ONLY_WIN32_API && !U_PLATFORM_HAS_WINUWP_API
2449 /* Windows API implementation. */
2450 // Note: UWP does not expose/allow these APIs, so the UWP version gets the null implementation. */
2452 U_INTERNAL
void * U_EXPORT2
2453 uprv_dl_open(const char *libName
, UErrorCode
*status
) {
2456 if(U_FAILURE(*status
)) return NULL
;
2458 lib
= LoadLibraryA(libName
);
2461 *status
= U_MISSING_RESOURCE_ERROR
;
2467 U_INTERNAL
void U_EXPORT2
2468 uprv_dl_close(void *lib
, UErrorCode
*status
) {
2469 HMODULE handle
= (HMODULE
)lib
;
2470 if(U_FAILURE(*status
)) return;
2472 FreeLibrary(handle
);
2477 U_INTERNAL UVoidFunction
* U_EXPORT2
2478 uprv_dlsym_func(void *lib
, const char* sym
, UErrorCode
*status
) {
2479 HMODULE handle
= (HMODULE
)lib
;
2480 UVoidFunction
* addr
= NULL
;
2482 if(U_FAILURE(*status
) || lib
==NULL
) return NULL
;
2484 addr
= (UVoidFunction
*)GetProcAddress(handle
, sym
);
2487 DWORD lastError
= GetLastError();
2488 if(lastError
== ERROR_PROC_NOT_FOUND
) {
2489 *status
= U_MISSING_RESOURCE_ERROR
;
2491 *status
= U_UNSUPPORTED_ERROR
; /* other unknown error. */
2500 /* No dynamic loading, null (nonexistent) implementation. */
2502 U_INTERNAL
void * U_EXPORT2
2503 uprv_dl_open(const char *libName
, UErrorCode
*status
) {
2505 if(U_FAILURE(*status
)) return NULL
;
2506 *status
= U_UNSUPPORTED_ERROR
;
2510 U_INTERNAL
void U_EXPORT2
2511 uprv_dl_close(void *lib
, UErrorCode
*status
) {
2513 if(U_FAILURE(*status
)) return;
2514 *status
= U_UNSUPPORTED_ERROR
;
2518 U_INTERNAL UVoidFunction
* U_EXPORT2
2519 uprv_dlsym_func(void *lib
, const char* sym
, UErrorCode
*status
) {
2522 if(U_SUCCESS(*status
)) {
2523 *status
= U_UNSUPPORTED_ERROR
;
2525 return (UVoidFunction
*)NULL
;
2531 * Hey, Emacs, please set the following:
2534 * indent-tabs-mode: nil