1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ******************************************************************************
6 * Copyright (C) 1997-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 ******************************************************************************
11 * FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
13 * Date Name Description
14 * 04/14/97 aliu Creation.
15 * 04/24/97 aliu Added getDefaultDataDirectory() and
16 * getDefaultLocaleID().
17 * 04/28/97 aliu Rewritten to assume Unix and apply general methods
18 * for assumed case. Non-UNIX platforms must be
19 * special-cased. Rewrote numeric methods dealing
20 * with NaN and Infinity to be platform independent
21 * over all IEEE 754 platforms.
22 * 05/13/97 aliu Restored sign of timezone
23 * (semantics are hours West of GMT)
24 * 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
26 * 07/22/98 stephen Added remainder, max, min, trunc
27 * 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
28 * 08/24/98 stephen Added longBitsFromDouble
29 * 09/08/98 stephen Minor changes for Mac Port
30 * 03/02/99 stephen Removed openFile(). Added AS400 support.
32 * 04/15/99 stephen Converted to C.
33 * 06/28/99 stephen Removed mutex locking in u_isBigEndian().
34 * 08/04/99 jeffrey R. Added OS/2 changes
35 * 11/15/99 helena Integrated S/390 IEEE support.
36 * 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
37 * 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
38 * 01/03/08 Steven L. Fake Time Support
39 ******************************************************************************
42 // Defines _XOPEN_SOURCE for access to POSIX functions.
43 // Must be before any other #includes.
44 #include "uposixdefs.h"
46 // First, the platform type. Need this for U_PLATFORM.
47 #include "unicode/platform.h"
49 #if U_PLATFORM == U_PF_MINGW && defined __STRICT_ANSI__
50 /* tzset isn't defined in strict ANSI on MinGW. */
51 #undef __STRICT_ANSI__
55 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
59 #if !U_PLATFORM_USES_ONLY_WIN32_API
63 /* include the rest of the ICU headers */
64 #include "unicode/putil.h"
65 #include "unicode/ustring.h"
75 /* Include standard headers. */
83 #ifndef U_COMMON_IMPLEMENTATION
84 #error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu
88 /* include system headers */
89 #if U_PLATFORM_USES_ONLY_WIN32_API
91 * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW.
92 * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API)
93 * to use native APIs as much as possible?
95 #ifndef WIN32_LEAN_AND_MEAN
96 # define WIN32_LEAN_AND_MEAN
103 # include <windows.h>
104 # include "unicode/uloc.h"
105 #if U_PLATFORM_HAS_WINUWP_API == 0
107 #else // U_PLATFORM_HAS_WINUWP_API
108 typedef PVOID LPMSG
; // TODO: figure out how to get rid of this typedef
109 #include <Windows.Globalization.h>
110 #include <windows.system.userprofile.h>
111 #include <wrl/wrappers/corewrappers.h>
112 #include <wrl/client.h>
114 using namespace ABI::Windows::Foundation
;
115 using namespace Microsoft::WRL
;
116 using namespace Microsoft::WRL::Wrappers
;
118 #elif U_PLATFORM == U_PF_OS400
120 # include <qusec.h> /* error code structure */
121 # include <qusrjobi.h>
122 # include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
123 # include <mih/testptr.h> /* For uprv_maximumPtr */
124 #elif U_PLATFORM == U_PF_OS390
125 # include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
126 #elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS
129 # if U_PLATFORM == U_PF_SOLARIS
134 #elif U_PLATFORM == U_PF_QNX
135 # include <sys/neutrino.h>
139 * Only include langinfo.h if we have a way to get the codeset. If we later
140 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
144 #if U_HAVE_NL_LANGINFO_CODESET
145 #include <langinfo.h>
149 * Simple things (presence of functions, etc) should just go in configure.in and be added to
150 * icucfg.h via autoheader.
152 #if U_PLATFORM_IMPLEMENTS_POSIX
153 # if U_PLATFORM == U_PF_OS400
154 # define HAVE_DLFCN_H 0
155 # define HAVE_DLOPEN 0
157 # ifndef HAVE_DLFCN_H
158 # define HAVE_DLFCN_H 1
161 # define HAVE_DLOPEN 1
164 # ifndef HAVE_GETTIMEOFDAY
165 # define HAVE_GETTIMEOFDAY 1
168 # define HAVE_DLFCN_H 0
169 # define HAVE_DLOPEN 0
170 # define HAVE_GETTIMEOFDAY 0
175 /* Define the extension for data files, again... */
176 #define DATA_TYPE "dat"
178 /* Leave this copyright notice here! */
179 static const char copyright
[] = U_COPYRIGHT_STRING
;
181 /* floating point implementations ------------------------------------------- */
183 /* We return QNAN rather than SNAN*/
184 #define SIGN 0x80000000U
186 /* Make it easy to define certain types of constants */
188 int64_t i64
; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
190 } BitPatternConversion
;
191 static const BitPatternConversion gNan
= { (int64_t) INT64_C(0x7FF8000000000000) };
192 static const BitPatternConversion gInf
= { (int64_t) INT64_C(0x7FF0000000000000) };
194 /*---------------------------------------------------------------------------
196 Our general strategy is to assume we're on a POSIX platform. Platforms which
197 are non-POSIX must declare themselves so. The default POSIX implementation
198 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
200 ---------------------------------------------------------------------------*/
202 #if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400
203 # undef U_POSIX_LOCALE
205 # define U_POSIX_LOCALE 1
209 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
210 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
214 u_topNBytesOfDouble(double* d
, int n
)
219 return (char*)(d
+ 1) - n
;
224 u_bottomNBytesOfDouble(double* d
, int n
)
227 return (char*)(d
+ 1) - n
;
232 #endif /* !IEEE_754 */
236 u_signBit(double d
) {
239 hiByte
= *(uint8_t *)&d
;
241 hiByte
= *(((uint8_t *)&d
) + sizeof(double) - 1);
243 return (hiByte
& 0x80) != 0;
249 #if defined (U_DEBUG_FAKETIME)
250 /* Override the clock to test things without having to move the system clock.
251 * Assumes POSIX gettimeofday() will function
253 UDate fakeClock_t0
= 0; /** Time to start the clock from **/
254 UDate fakeClock_dt
= 0; /** Offset (fake time - real time) **/
255 UBool fakeClock_set
= FALSE
; /** True if fake clock has spun up **/
256 static UMutex fakeClockMutex
= U_MUTEX_INTIALIZER
;
258 static UDate
getUTCtime_real() {
259 struct timeval posixTime
;
260 gettimeofday(&posixTime
, NULL
);
261 return (UDate
)(((int64_t)posixTime
.tv_sec
* U_MILLIS_PER_SECOND
) + (posixTime
.tv_usec
/1000));
264 static UDate
getUTCtime_fake() {
265 umtx_lock(&fakeClockMutex
);
267 UDate real
= getUTCtime_real();
268 const char *fake_start
= getenv("U_FAKETIME_START");
269 if((fake_start
!=NULL
) && (fake_start
[0]!=0)) {
270 sscanf(fake_start
,"%lf",&fakeClock_t0
);
271 fakeClock_dt
= fakeClock_t0
- real
;
272 fprintf(stderr
,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
273 "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
274 fakeClock_t0
, fake_start
, fakeClock_dt
, real
);
277 fprintf(stderr
,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
278 "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
280 fakeClock_set
= TRUE
;
282 umtx_unlock(&fakeClockMutex
);
284 return getUTCtime_real() + fakeClock_dt
;
288 #if U_PLATFORM_USES_ONLY_WIN32_API
292 } FileTimeConversion
; /* This is like a ULARGE_INTEGER */
294 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
295 #define EPOCH_BIAS INT64_C(116444736000000000)
296 #define HECTONANOSECOND_PER_MILLISECOND 10000
300 /*---------------------------------------------------------------------------
301 Universal Implementations
302 These are designed to work on all platforms. Try these, and if they
303 don't work on your platform, then special case your platform with new
305 ---------------------------------------------------------------------------*/
307 U_CAPI UDate U_EXPORT2
310 #if defined(U_DEBUG_FAKETIME)
311 return getUTCtime_fake(); /* Hook for overriding the clock */
313 return uprv_getRawUTCtime();
317 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
318 U_CAPI UDate U_EXPORT2
321 #if U_PLATFORM_USES_ONLY_WIN32_API
323 FileTimeConversion winTime
;
324 GetSystemTimeAsFileTime(&winTime
.fileTime
);
325 return (UDate
)((winTime
.int64
- EPOCH_BIAS
) / HECTONANOSECOND_PER_MILLISECOND
);
328 #if HAVE_GETTIMEOFDAY
329 struct timeval posixTime
;
330 gettimeofday(&posixTime
, NULL
);
331 return (UDate
)(((int64_t)posixTime
.tv_sec
* U_MILLIS_PER_SECOND
) + (posixTime
.tv_usec
/1000));
335 return (UDate
)epochtime
* U_MILLIS_PER_SECOND
;
341 /*-----------------------------------------------------------------------------
343 These methods detect and return NaN and infinity values for doubles
344 conforming to IEEE 754. Platforms which support this standard include X86,
345 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
346 If this doesn't work on your platform, you have non-IEEE floating-point, and
347 will need to code your own versions. A naive implementation is to return 0.0
348 for getNaN and getInfinity, and false for isNaN and isInfinite.
349 ---------------------------------------------------------------------------*/
351 U_CAPI UBool U_EXPORT2
352 uprv_isNaN(double number
)
355 BitPatternConversion convertedNumber
;
356 convertedNumber
.d64
= number
;
357 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
358 return (UBool
)((convertedNumber
.i64
& U_INT64_MAX
) > gInf
.i64
);
360 #elif U_PLATFORM == U_PF_OS390
361 uint32_t highBits
= *(uint32_t*)u_topNBytesOfDouble(&number
,
363 uint32_t lowBits
= *(uint32_t*)u_bottomNBytesOfDouble(&number
,
366 return ((highBits
& 0x7F080000L
) == 0x7F080000L
) &&
367 (lowBits
== 0x00000000L
);
370 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
371 /* you'll need to replace this default implementation with what's correct*/
372 /* for your platform.*/
373 return number
!= number
;
377 U_CAPI UBool U_EXPORT2
378 uprv_isInfinite(double number
)
381 BitPatternConversion convertedNumber
;
382 convertedNumber
.d64
= number
;
383 /* Infinity is exactly 0x7FF0000000000000U. */
384 return (UBool
)((convertedNumber
.i64
& U_INT64_MAX
) == gInf
.i64
);
385 #elif U_PLATFORM == U_PF_OS390
386 uint32_t highBits
= *(uint32_t*)u_topNBytesOfDouble(&number
,
388 uint32_t lowBits
= *(uint32_t*)u_bottomNBytesOfDouble(&number
,
391 return ((highBits
& ~SIGN
) == 0x70FF0000L
) && (lowBits
== 0x00000000L
);
394 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
395 /* value, you'll need to replace this default implementation with what's*/
396 /* correct for your platform.*/
397 return number
== (2.0 * number
);
401 U_CAPI UBool U_EXPORT2
402 uprv_isPositiveInfinity(double number
)
404 #if IEEE_754 || U_PLATFORM == U_PF_OS390
405 return (UBool
)(number
> 0 && uprv_isInfinite(number
));
407 return uprv_isInfinite(number
);
411 U_CAPI UBool U_EXPORT2
412 uprv_isNegativeInfinity(double number
)
414 #if IEEE_754 || U_PLATFORM == U_PF_OS390
415 return (UBool
)(number
< 0 && uprv_isInfinite(number
));
418 uint32_t highBits
= *(uint32_t*)u_topNBytesOfDouble(&number
,
420 return((highBits
& SIGN
) && uprv_isInfinite(number
));
425 U_CAPI
double U_EXPORT2
428 #if IEEE_754 || U_PLATFORM == U_PF_OS390
431 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
432 /* you'll need to replace this default implementation with what's correct*/
433 /* for your platform.*/
438 U_CAPI
double U_EXPORT2
441 #if IEEE_754 || U_PLATFORM == U_PF_OS390
444 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
445 /* value, you'll need to replace this default implementation with what's*/
446 /* correct for your platform.*/
451 U_CAPI
double U_EXPORT2
457 U_CAPI
double U_EXPORT2
463 U_CAPI
double U_EXPORT2
466 return uprv_floor(x
+ 0.5);
469 U_CAPI
double U_EXPORT2
475 U_CAPI
double U_EXPORT2
476 uprv_modf(double x
, double* y
)
481 U_CAPI
double U_EXPORT2
482 uprv_fmod(double x
, double y
)
487 U_CAPI
double U_EXPORT2
488 uprv_pow(double x
, double y
)
490 /* This is declared as "double pow(double x, double y)" */
494 U_CAPI
double U_EXPORT2
495 uprv_pow10(int32_t x
)
497 return pow(10.0, (double)x
);
500 U_CAPI
double U_EXPORT2
501 uprv_fmax(double x
, double y
)
504 /* first handle NaN*/
505 if(uprv_isNaN(x
) || uprv_isNaN(y
))
506 return uprv_getNaN();
508 /* check for -0 and 0*/
509 if(x
== 0.0 && y
== 0.0 && u_signBit(x
))
514 /* this should work for all flt point w/o NaN and Inf special cases */
515 return (x
> y
? x
: y
);
518 U_CAPI
double U_EXPORT2
519 uprv_fmin(double x
, double y
)
522 /* first handle NaN*/
523 if(uprv_isNaN(x
) || uprv_isNaN(y
))
524 return uprv_getNaN();
526 /* check for -0 and 0*/
527 if(x
== 0.0 && y
== 0.0 && u_signBit(y
))
532 /* this should work for all flt point w/o NaN and Inf special cases */
533 return (x
> y
? y
: x
);
536 U_CAPI UBool U_EXPORT2
537 uprv_add32_overflow(int32_t a
, int32_t b
, int32_t* res
) {
538 // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_add_overflow.
539 // This function could be optimized by calling one of those primitives.
540 auto a64
= static_cast<int64_t>(a
);
541 auto b64
= static_cast<int64_t>(b
);
542 int64_t res64
= a64
+ b64
;
543 *res
= static_cast<int32_t>(res64
);
544 return res64
!= *res
;
547 U_CAPI UBool U_EXPORT2
548 uprv_mul32_overflow(int32_t a
, int32_t b
, int32_t* res
) {
549 // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_mul_overflow.
550 // This function could be optimized by calling one of those primitives.
551 auto a64
= static_cast<int64_t>(a
);
552 auto b64
= static_cast<int64_t>(b
);
553 int64_t res64
= a64
* b64
;
554 *res
= static_cast<int32_t>(res64
);
555 return res64
!= *res
;
559 * Truncates the given double.
560 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
561 * This is different than calling floor() or ceil():
562 * floor(3.3) = 3, floor(-3.3) = -4
563 * ceil(3.3) = 4, ceil(-3.3) = -3
565 U_CAPI
double U_EXPORT2
569 /* handle error cases*/
571 return uprv_getNaN();
572 if(uprv_isInfinite(d
))
573 return uprv_getInfinity();
575 if(u_signBit(d
)) /* Signbit() picks up -0.0; d<0 does not. */
581 return d
>= 0 ? floor(d
) : ceil(d
);
587 * Return the largest positive number that can be represented by an integer
588 * type of arbitrary bit length.
590 U_CAPI
double U_EXPORT2
591 uprv_maxMantissa(void)
593 return pow(2.0, DBL_MANT_DIG
+ 1.0) - 1.0;
596 U_CAPI
double U_EXPORT2
602 U_CAPI
void * U_EXPORT2
603 uprv_maximumPtr(void * base
)
605 #if U_PLATFORM == U_PF_OS400
607 * With the provided function we should never be out of range of a given segment
608 * (a traditional/typical segment that is). Our segments have 5 bytes for the
609 * id and 3 bytes for the offset. The key is that the casting takes care of
610 * only retrieving the offset portion minus x1000. Hence, the smallest offset
611 * seen in a program is x001000 and when casted to an int would be 0.
612 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment.
614 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
615 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information).
616 * This function determines the activation based on the pointer that is passed in and
617 * calculates the appropriate maximum available size for
618 * each pointer type (TERASPACE and non-TERASPACE)
620 * Unlike other operating systems, the pointer model isn't determined at
621 * compile time on i5/OS.
623 if ((base
!= NULL
) && (_TESTPTR(base
, _C_TERASPACE_CHECK
))) {
624 /* if it is a TERASPACE pointer the max is 2GB - 4k */
625 return ((void *)(((char *)base
)-((uint32_t)(base
))+((uint32_t)0x7fffefff)));
627 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
628 return ((void *)(((char *)base
)-((uint32_t)(base
))+((uint32_t)0xffefff)));
631 return U_MAX_PTR(base
);
635 /*---------------------------------------------------------------------------
636 Platform-specific Implementations
637 Try these, and if they don't work on your platform, then special case your
638 platform with new implementations.
639 ---------------------------------------------------------------------------*/
641 /* Generic time zone layer -------------------------------------------------- */
643 /* Time zone utilities */
644 U_CAPI
void U_EXPORT2
650 /* no initialization*/
654 U_CAPI
int32_t U_EXPORT2
665 uprv_memcpy( &tmrec
, localtime(&t
), sizeof(tmrec
) );
666 #if U_PLATFORM != U_PF_IPHONE
667 UBool dst_checked
= (tmrec
.tm_isdst
!= 0); /* daylight savings time is checked*/
669 t1
= mktime(&tmrec
); /* local time in seconds*/
670 uprv_memcpy( &tmrec
, gmtime(&t
), sizeof(tmrec
) );
671 t2
= mktime(&tmrec
); /* GMT (or UTC) in seconds*/
674 #if U_PLATFORM != U_PF_IPHONE
675 /* imitate NT behaviour, which returns same timezone offset to GMT for
677 This does not work on all platforms. For instance, on glibc on Linux
678 and on Mac OS 10.5, tdiff calculated above remains the same
679 regardless of whether DST is in effect or not. iOS is another
680 platform where this does not work. Linux + glibc and Mac OS 10.5
681 have U_TIMEZONE defined so that this code is not reached.
690 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
691 some platforms need to have it declared here. */
693 #if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED)
694 /* RS6000 and others reject char **tzname. */
695 extern U_IMPORT
char *U_TZNAME
[];
698 #if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
699 /* These platforms are likely to use Olson timezone IDs. */
700 /* common targets of the symbolic link at TZDEFAULT are:
701 * "/usr/share/zoneinfo/<olsonID>" default, older Linux distros, macOS to 10.12
702 * "../usr/share/zoneinfo/<olsonID>" newer Linux distros: Red Hat Enterprise Linux 7, Ubuntu 16, SuSe Linux 12
703 * "/usr/share/lib/zoneinfo/<olsonID>" Solaris
704 * "../usr/share/lib/zoneinfo/<olsonID>" Solaris
705 * "/var/db/timezone/zoneinfo/<olsonID>" macOS 10.13
706 * To avoid checking lots of paths, just check that the target path
707 * before the <olsonID> ends with "/zoneinfo/", and the <olsonID> is valid.
710 #define CHECK_LOCALTIME_LINK 1
711 #if U_PLATFORM_IS_DARWIN_BASED
713 #define TZZONEINFO (TZDIR "/")
714 #elif U_PLATFORM == U_PF_SOLARIS
715 #define TZDEFAULT "/etc/localtime"
716 #define TZZONEINFO "/usr/share/lib/zoneinfo/"
717 #define TZ_ENV_CHECK "localtime"
719 #define TZDEFAULT "/etc/localtime"
720 #define TZZONEINFO "/usr/share/zoneinfo/"
722 #define TZZONEINFOTAIL "/zoneinfo/"
724 #define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */
725 /* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
726 symlinked to /etc/localtime, which makes searchForTZFile return
727 'localtime' when it's the first match. */
728 #define TZFILE_SKIP2 "localtime"
729 #define SEARCH_TZFILE
730 #include <dirent.h> /* Needed to search through system timezone files */
732 static char gTimeZoneBuffer
[PATH_MAX
];
733 static char *gTimeZoneBufferPtr
= NULL
;
736 #if !U_PLATFORM_USES_ONLY_WIN32_API
737 #define isNonDigit(ch) (ch < '0' || '9' < ch)
738 static UBool
isValidOlsonID(const char *id
) {
741 /* Determine if this is something like Iceland (Olson ID)
742 or AST4ADT (non-Olson ID) */
743 while (id
[idx
] && isNonDigit(id
[idx
]) && id
[idx
] != ',') {
747 /* If we went through the whole string, then it might be okay.
748 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
749 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
750 The rest of the time it could be an Olson ID. George */
751 return (UBool
)(id
[idx
] == 0
752 || uprv_strcmp(id
, "PST8PDT") == 0
753 || uprv_strcmp(id
, "MST7MDT") == 0
754 || uprv_strcmp(id
, "CST6CDT") == 0
755 || uprv_strcmp(id
, "EST5EDT") == 0);
758 /* On some Unix-like OS, 'posix' subdirectory in
759 /usr/share/zoneinfo replicates the top-level contents. 'right'
760 subdirectory has the same set of files, but individual files
761 are different from those in the top-level directory or 'posix'
762 because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
764 When the first match for /etc/localtime is in either of them
765 (usually in posix because 'right' has different file contents),
766 or TZ environment variable points to one of them, createTimeZone
767 fails because, say, 'posix/America/New_York' is not an Olson
768 timezone id ('America/New_York' is). So, we have to skip
769 'posix/' and 'right/' at the beginning. */
770 static void skipZoneIDPrefix(const char** id
) {
771 if (uprv_strncmp(*id
, "posix/", 6) == 0
772 || uprv_strncmp(*id
, "right/", 6) == 0)
779 #if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API
781 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
782 typedef struct OffsetZoneMapping
{
783 int32_t offsetSeconds
;
784 int32_t daylightType
; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/
790 enum { U_DAYLIGHT_NONE
=0,U_DAYLIGHT_JUNE
=1,U_DAYLIGHT_DECEMBER
=2 };
793 This list tries to disambiguate a set of abbreviated timezone IDs and offsets
794 and maps it to an Olson ID.
795 Before adding anything to this list, take a look at
796 icu/source/tools/tzcode/tz.alias
797 Sometimes no daylight savings (0) is important to define due to aliases.
798 This list can be tested with icu/source/test/compat/tzone.pl
799 More values could be added to daylightType to increase precision.
801 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS
[] = {
802 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
803 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
804 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
805 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
806 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
807 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
808 {-36000, 2, "EST", "EST", "Australia/Sydney"},
809 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
810 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
811 {-34200, 2, "CST", "CST", "Australia/South"},
812 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
813 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
814 {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
815 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
816 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
817 {-28800, 2, "WST", "WST", "Australia/West"},
818 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
819 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
820 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
821 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
822 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
823 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
824 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
825 {-14400, 1, "AZT", "AZST", "Asia/Baku"},
826 {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
827 {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
828 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
829 {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
830 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
831 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
832 {-3600, 0, "CET", "WEST", "Africa/Algiers"},
833 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
834 {0, 1, "GMT", "IST", "Europe/Dublin"},
835 {0, 1, "GMT", "BST", "Europe/London"},
836 {0, 0, "WET", "WEST", "Africa/Casablanca"},
837 {0, 0, "WET", "WET", "Africa/El_Aaiun"},
838 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
839 {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
840 {10800, 1, "PMST", "PMDT", "America/Miquelon"},
841 {10800, 2, "UYT", "UYST", "America/Montevideo"},
842 {10800, 1, "WGT", "WGST", "America/Godthab"},
843 {10800, 2, "BRT", "BRST", "Brazil/East"},
844 {12600, 1, "NST", "NDT", "America/St_Johns"},
845 {14400, 1, "AST", "ADT", "Canada/Atlantic"},
846 {14400, 2, "AMT", "AMST", "America/Cuiaba"},
847 {14400, 2, "CLT", "CLST", "Chile/Continental"},
848 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
849 {14400, 2, "PYT", "PYST", "America/Asuncion"},
850 {18000, 1, "CST", "CDT", "America/Havana"},
851 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
852 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
853 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
854 {21600, 0, "CST", "CDT", "America/Guatemala"},
855 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
856 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
857 {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
858 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
859 {32400, 1, "AKST", "AKDT", "US/Alaska"},
860 {36000, 1, "HAST", "HADT", "US/Aleutian"}
863 /*#define DEBUG_TZNAME*/
865 static const char* remapShortTimeZone(const char *stdID
, const char *dstID
, int32_t daylightType
, int32_t offset
)
869 fprintf(stderr
, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID
, dstID
, daylightType
, offset
);
871 for (idx
= 0; idx
< UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS
); idx
++)
873 if (offset
== OFFSET_ZONE_MAPPINGS
[idx
].offsetSeconds
874 && daylightType
== OFFSET_ZONE_MAPPINGS
[idx
].daylightType
875 && strcmp(OFFSET_ZONE_MAPPINGS
[idx
].stdID
, stdID
) == 0
876 && strcmp(OFFSET_ZONE_MAPPINGS
[idx
].dstID
, dstID
) == 0)
878 return OFFSET_ZONE_MAPPINGS
[idx
].olsonID
;
886 #define MAX_READ_SIZE 512
888 typedef struct DefaultTZInfo
{
889 char* defaultTZBuffer
;
890 int64_t defaultTZFileSize
;
891 FILE* defaultTZFilePtr
;
892 UBool defaultTZstatus
;
893 int32_t defaultTZPosition
;
897 * This method compares the two files given to see if they are a match.
898 * It is currently use to compare two TZ files.
900 static UBool
compareBinaryFiles(const char* defaultTZFileName
, const char* TZFileName
, DefaultTZInfo
* tzInfo
) {
903 int64_t sizeFileLeft
;
904 int32_t sizeFileRead
;
905 int32_t sizeFileToRead
;
906 char bufferFile
[MAX_READ_SIZE
];
909 if (tzInfo
->defaultTZFilePtr
== NULL
) {
910 tzInfo
->defaultTZFilePtr
= fopen(defaultTZFileName
, "r");
912 file
= fopen(TZFileName
, "r");
914 tzInfo
->defaultTZPosition
= 0; /* reset position to begin search */
916 if (file
!= NULL
&& tzInfo
->defaultTZFilePtr
!= NULL
) {
917 /* First check that the file size are equal. */
918 if (tzInfo
->defaultTZFileSize
== 0) {
919 fseek(tzInfo
->defaultTZFilePtr
, 0, SEEK_END
);
920 tzInfo
->defaultTZFileSize
= ftell(tzInfo
->defaultTZFilePtr
);
922 fseek(file
, 0, SEEK_END
);
923 sizeFile
= ftell(file
);
924 sizeFileLeft
= sizeFile
;
926 if (sizeFile
!= tzInfo
->defaultTZFileSize
) {
929 /* Store the data from the files in seperate buffers and
930 * compare each byte to determine equality.
932 if (tzInfo
->defaultTZBuffer
== NULL
) {
933 rewind(tzInfo
->defaultTZFilePtr
);
934 tzInfo
->defaultTZBuffer
= (char*)uprv_malloc(sizeof(char) * tzInfo
->defaultTZFileSize
);
935 sizeFileRead
= fread(tzInfo
->defaultTZBuffer
, 1, tzInfo
->defaultTZFileSize
, tzInfo
->defaultTZFilePtr
);
938 while(sizeFileLeft
> 0) {
939 uprv_memset(bufferFile
, 0, MAX_READ_SIZE
);
940 sizeFileToRead
= sizeFileLeft
< MAX_READ_SIZE
? sizeFileLeft
: MAX_READ_SIZE
;
942 sizeFileRead
= fread(bufferFile
, 1, sizeFileToRead
, file
);
943 if (memcmp(tzInfo
->defaultTZBuffer
+ tzInfo
->defaultTZPosition
, bufferFile
, sizeFileRead
) != 0) {
947 sizeFileLeft
-= sizeFileRead
;
948 tzInfo
->defaultTZPosition
+= sizeFileRead
;
963 /* dirent also lists two entries: "." and ".." that we can safely ignore. */
966 static UBool U_CALLCONV
putil_cleanup(void);
967 static CharString
*gSearchTZFileResult
= NULL
;
970 * This method recursively traverses the directory given for a matching TZ file and returns the first match.
971 * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results.
973 static char* searchForTZFile(const char* path
, DefaultTZInfo
* tzInfo
) {
975 struct dirent
* dirEntry
= NULL
;
977 UErrorCode status
= U_ZERO_ERROR
;
979 /* Save the current path */
980 CharString
curpath(path
, -1, status
);
981 if (U_FAILURE(status
)) {
982 goto cleanupAndReturn
;
985 dirp
= opendir(path
);
987 goto cleanupAndReturn
;
990 if (gSearchTZFileResult
== NULL
) {
991 gSearchTZFileResult
= new CharString
;
992 if (gSearchTZFileResult
== NULL
) {
993 goto cleanupAndReturn
;
995 ucln_common_registerCleanup(UCLN_COMMON_PUTIL
, putil_cleanup
);
998 /* Check each entry in the directory. */
999 while((dirEntry
= readdir(dirp
)) != NULL
) {
1000 const char* dirName
= dirEntry
->d_name
;
1001 if (uprv_strcmp(dirName
, SKIP1
) != 0 && uprv_strcmp(dirName
, SKIP2
) != 0) {
1002 /* Create a newpath with the new entry to test each entry in the directory. */
1003 CharString
newpath(curpath
, status
);
1004 newpath
.append(dirName
, -1, status
);
1005 if (U_FAILURE(status
)) {
1009 DIR* subDirp
= NULL
;
1010 if ((subDirp
= opendir(newpath
.data())) != NULL
) {
1011 /* If this new path is a directory, make a recursive call with the newpath. */
1013 newpath
.append('/', status
);
1014 if (U_FAILURE(status
)) {
1017 result
= searchForTZFile(newpath
.data(), tzInfo
);
1019 Have to get out here. Otherwise, we'd keep looking
1020 and return the first match in the top-level directory
1021 if there's a match in the top-level. If not, this function
1022 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
1023 It worked without this in most cases because we have a fallback of calling
1024 localtime_r to figure out the default timezone.
1028 } else if (uprv_strcmp(TZFILE_SKIP
, dirName
) != 0 && uprv_strcmp(TZFILE_SKIP2
, dirName
) != 0) {
1029 if(compareBinaryFiles(TZDEFAULT
, newpath
.data(), tzInfo
)) {
1030 int32_t amountToSkip
= sizeof(TZZONEINFO
) - 1;
1031 if (amountToSkip
> newpath
.length()) {
1032 amountToSkip
= newpath
.length();
1034 const char* zoneid
= newpath
.data() + amountToSkip
;
1035 skipZoneIDPrefix(&zoneid
);
1036 gSearchTZFileResult
->clear();
1037 gSearchTZFileResult
->append(zoneid
, -1, status
);
1038 if (U_FAILURE(status
)) {
1041 result
= gSearchTZFileResult
->data();
1042 /* Get out after the first one found. */
1057 U_CAPI
void U_EXPORT2
1058 uprv_tzname_clear_cache()
1060 #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1061 gTimeZoneBufferPtr
= NULL
;
1065 // With the Universal Windows Platform we can just ask Windows for the name
1066 #if U_PLATFORM_HAS_WINUWP_API
1067 U_CAPI
const char* U_EXPORT2
1068 uprv_getWindowsTimeZone()
1070 // Get default Windows timezone.
1071 ComPtr
<IInspectable
> calendar
;
1072 HRESULT hr
= RoActivateInstance(
1073 HStringReference(RuntimeClass_Windows_Globalization_Calendar
).Get(),
1077 ComPtr
<ABI::Windows::Globalization::ITimeZoneOnCalendar
> timezone
;
1078 hr
= calendar
.As(&timezone
);
1081 HString timezoneString
;
1082 hr
= timezone
->GetTimeZone(timezoneString
.GetAddressOf());
1085 int32_t length
= static_cast<int32_t>(wcslen(timezoneString
.GetRawBuffer(NULL
)));
1086 char* asciiId
= (char*)uprv_calloc(length
+ 1, sizeof(char));
1087 if (asciiId
!= nullptr)
1089 u_UCharsToChars((UChar
*)timezoneString
.GetRawBuffer(NULL
), asciiId
, length
);
1101 U_CAPI
const char* U_EXPORT2
1104 (void)n
; // Avoid unreferenced parameter warning.
1105 const char *tzid
= NULL
;
1106 #if U_PLATFORM_USES_ONLY_WIN32_API
1107 #if U_PLATFORM_HAS_WINUWP_API > 0
1108 tzid
= uprv_getWindowsTimeZone();
1110 tzid
= uprv_detectWindowsTimeZone();
1118 // The return value is free'd in timezone.cpp on Windows because
1119 // the other code path returns a pointer to a heap location.
1120 // If we don't have a name already, then tzname wouldn't be any
1121 // better, so just fall back.
1122 return uprv_strdup("Etc/UTC");
1127 /*#if U_PLATFORM_IS_DARWIN_BASED
1130 tzid = getenv("TZFILE");
1136 /* This code can be temporarily disabled to test tzname resolution later on. */
1137 #ifndef DEBUG_TZNAME
1138 tzid
= getenv("TZ");
1139 if (tzid
!= NULL
&& isValidOlsonID(tzid
)
1140 #if U_PLATFORM == U_PF_SOLARIS
1141 /* When TZ equals localtime on Solaris, check the /etc/localtime file. */
1142 && uprv_strcmp(tzid
, TZ_ENV_CHECK
) != 0
1145 /* The colon forces tzset() to treat the remainder as zoneinfo path */
1146 if (tzid
[0] == ':') {
1149 /* This might be a good Olson ID. */
1150 skipZoneIDPrefix(&tzid
);
1153 /* else U_TZNAME will give a better result. */
1156 #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1157 /* Caller must handle threading issues */
1158 if (gTimeZoneBufferPtr
== NULL
) {
1160 This is a trick to look at the name of the link to get the Olson ID
1161 because the tzfile contents is underspecified.
1162 This isn't guaranteed to work because it may not be a symlink.
1164 int32_t ret
= (int32_t)readlink(TZDEFAULT
, gTimeZoneBuffer
, sizeof(gTimeZoneBuffer
)-1);
1166 int32_t tzZoneInfoTailLen
= uprv_strlen(TZZONEINFOTAIL
);
1167 gTimeZoneBuffer
[ret
] = 0;
1168 char * tzZoneInfoTailPtr
= uprv_strstr(gTimeZoneBuffer
, TZZONEINFOTAIL
);
1170 if (tzZoneInfoTailPtr
!= NULL
1171 && isValidOlsonID(tzZoneInfoTailPtr
+ tzZoneInfoTailLen
))
1173 return (gTimeZoneBufferPtr
= tzZoneInfoTailPtr
+ tzZoneInfoTailLen
);
1176 #if defined(SEARCH_TZFILE)
1177 DefaultTZInfo
* tzInfo
= (DefaultTZInfo
*)uprv_malloc(sizeof(DefaultTZInfo
));
1178 if (tzInfo
!= NULL
) {
1179 tzInfo
->defaultTZBuffer
= NULL
;
1180 tzInfo
->defaultTZFileSize
= 0;
1181 tzInfo
->defaultTZFilePtr
= NULL
;
1182 tzInfo
->defaultTZstatus
= FALSE
;
1183 tzInfo
->defaultTZPosition
= 0;
1185 gTimeZoneBufferPtr
= searchForTZFile(TZZONEINFO
, tzInfo
);
1187 /* Free previously allocated memory */
1188 if (tzInfo
->defaultTZBuffer
!= NULL
) {
1189 uprv_free(tzInfo
->defaultTZBuffer
);
1191 if (tzInfo
->defaultTZFilePtr
!= NULL
) {
1192 fclose(tzInfo
->defaultTZFilePtr
);
1197 if (gTimeZoneBufferPtr
!= NULL
&& isValidOlsonID(gTimeZoneBufferPtr
)) {
1198 return gTimeZoneBufferPtr
;
1204 return gTimeZoneBufferPtr
;
1210 #if U_PLATFORM_USES_ONLY_WIN32_API
1211 /* The return value is free'd in timezone.cpp on Windows because
1212 * the other code path returns a pointer to a heap location. */
1213 return uprv_strdup(U_TZNAME
[n
]);
1216 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1217 So we remap the abbreviation to an olson ID.
1219 Since Windows exposes a little more timezone information,
1220 we normally don't use this code on Windows because
1221 uprv_detectWindowsTimeZone should have already given the correct answer.
1224 struct tm juneSol
, decemberSol
;
1226 static const time_t juneSolstice
=1182478260; /*2007-06-21 18:11 UT*/
1227 static const time_t decemberSolstice
=1198332540; /*2007-12-22 06:09 UT*/
1229 /* This probing will tell us when daylight savings occurs. */
1230 localtime_r(&juneSolstice
, &juneSol
);
1231 localtime_r(&decemberSolstice
, &decemberSol
);
1232 if(decemberSol
.tm_isdst
> 0) {
1233 daylightType
= U_DAYLIGHT_DECEMBER
;
1234 } else if(juneSol
.tm_isdst
> 0) {
1235 daylightType
= U_DAYLIGHT_JUNE
;
1237 daylightType
= U_DAYLIGHT_NONE
;
1239 tzid
= remapShortTimeZone(U_TZNAME
[0], U_TZNAME
[1], daylightType
, uprv_timezone());
1251 /* Get and set the ICU data directory --------------------------------------- */
1253 static icu::UInitOnce gDataDirInitOnce
= U_INITONCE_INITIALIZER
;
1254 static char *gDataDirectory
= NULL
;
1256 UInitOnce gTimeZoneFilesInitOnce
= U_INITONCE_INITIALIZER
;
1257 static CharString
*gTimeZoneFilesDirectory
= NULL
;
1259 #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1260 static const char *gCorrectedPOSIXLocale
= NULL
; /* Sometimes heap allocated */
1261 static bool gCorrectedPOSIXLocaleHeapAllocated
= false;
1264 static UBool U_CALLCONV
putil_cleanup(void)
1266 if (gDataDirectory
&& *gDataDirectory
) {
1267 uprv_free(gDataDirectory
);
1269 gDataDirectory
= NULL
;
1270 gDataDirInitOnce
.reset();
1272 delete gTimeZoneFilesDirectory
;
1273 gTimeZoneFilesDirectory
= NULL
;
1274 gTimeZoneFilesInitOnce
.reset();
1276 #ifdef SEARCH_TZFILE
1277 delete gSearchTZFileResult
;
1278 gSearchTZFileResult
= NULL
;
1281 #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1282 if (gCorrectedPOSIXLocale
&& gCorrectedPOSIXLocaleHeapAllocated
) {
1283 uprv_free(const_cast<char *>(gCorrectedPOSIXLocale
));
1284 gCorrectedPOSIXLocale
= NULL
;
1285 gCorrectedPOSIXLocaleHeapAllocated
= false;
1292 * Set the data directory.
1293 * Make a copy of the passed string, and set the global data dir to point to it.
1295 U_CAPI
void U_EXPORT2
1296 u_setDataDirectory(const char *directory
) {
1300 if(directory
==NULL
|| *directory
==0) {
1301 /* A small optimization to prevent the malloc and copy when the
1302 shared library is used, and this is a way to make sure that NULL
1305 newDataDir
= (char *)"";
1308 length
=(int32_t)uprv_strlen(directory
);
1309 newDataDir
= (char *)uprv_malloc(length
+ 2);
1310 /* Exit out if newDataDir could not be created. */
1311 if (newDataDir
== NULL
) {
1314 uprv_strcpy(newDataDir
, directory
);
1316 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1319 while((p
= uprv_strchr(newDataDir
, U_FILE_ALT_SEP_CHAR
)) != NULL
) {
1320 *p
= U_FILE_SEP_CHAR
;
1326 if (gDataDirectory
&& *gDataDirectory
) {
1327 uprv_free(gDataDirectory
);
1329 gDataDirectory
= newDataDir
;
1330 ucln_common_registerCleanup(UCLN_COMMON_PUTIL
, putil_cleanup
);
1333 U_CAPI UBool U_EXPORT2
1334 uprv_pathIsAbsolute(const char *path
)
1336 if(!path
|| !*path
) {
1340 if(*path
== U_FILE_SEP_CHAR
) {
1344 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1345 if(*path
== U_FILE_ALT_SEP_CHAR
) {
1350 #if U_PLATFORM_USES_ONLY_WIN32_API
1351 if( (((path
[0] >= 'A') && (path
[0] <= 'Z')) ||
1352 ((path
[0] >= 'a') && (path
[0] <= 'z'))) &&
1361 /* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1362 until some client wrapper makefiles are updated */
1363 #if U_PLATFORM_IS_DARWIN_BASED && TARGET_IPHONE_SIMULATOR
1364 # if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1365 # define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1369 static void U_CALLCONV
dataDirectoryInitFn() {
1370 /* If we already have the directory, then return immediately. Will happen if user called
1371 * u_setDataDirectory().
1373 if (gDataDirectory
) {
1377 const char *path
= NULL
;
1378 #if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1379 char datadir_path_buffer
[PATH_MAX
];
1383 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1384 override ICU's data with the ICU_DATA environment variable. This prevents
1385 problems where multiple custom copies of ICU's specific version of data
1386 are installed on a system. Either the application must define the data
1387 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1388 ICU, set the data with udata_setCommonData or trust that all of the
1389 required data is contained in ICU's data library that contains
1390 the entry point defined by U_ICUDATA_ENTRY_POINT.
1392 There may also be some platforms where environment variables
1395 # if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1396 /* First try to get the environment variable */
1397 # if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP does not support getenv
1398 path
=getenv("ICU_DATA");
1402 /* ICU_DATA_DIR may be set as a compile option.
1403 * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1404 * and is used only when data is built in archive mode eliminating the need
1405 * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1406 * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1407 * set their own path.
1409 #if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
1410 if(path
==NULL
|| *path
==0) {
1411 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1412 const char *prefix
= getenv(ICU_DATA_DIR_PREFIX_ENV_VAR
);
1414 # ifdef ICU_DATA_DIR
1417 path
=U_ICU_DATA_DEFAULT_DIR
;
1419 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1420 if (prefix
!= NULL
) {
1421 snprintf(datadir_path_buffer
, PATH_MAX
, "%s%s", prefix
, path
);
1422 path
=datadir_path_buffer
;
1428 #if defined(ICU_DATA_DIR_WINDOWS) && U_PLATFORM_HAS_WINUWP_API != 0
1429 // Use data from the %windir%\globalization\icu directory
1430 // This is only available if ICU is built as a system component
1431 char datadir_path_buffer
[MAX_PATH
];
1432 UINT length
= GetWindowsDirectoryA(datadir_path_buffer
, UPRV_LENGTHOF(datadir_path_buffer
));
1433 if (length
> 0 && length
< (UPRV_LENGTHOF(datadir_path_buffer
) - sizeof(ICU_DATA_DIR_WINDOWS
) - 1))
1435 if (datadir_path_buffer
[length
- 1] != '\\')
1437 datadir_path_buffer
[length
++] = '\\';
1438 datadir_path_buffer
[length
] = '\0';
1441 if ((length
+ 1 + sizeof(ICU_DATA_DIR_WINDOWS
)) < UPRV_LENGTHOF(datadir_path_buffer
))
1443 uprv_strcat(datadir_path_buffer
, ICU_DATA_DIR_WINDOWS
);
1444 path
= datadir_path_buffer
;
1450 /* It looks really bad, set it to something. */
1451 #if U_PLATFORM_HAS_WIN32_API
1452 // Windows UWP will require icudtl.dat file in same directory as icuuc.dll
1459 u_setDataDirectory(path
);
1463 U_CAPI
const char * U_EXPORT2
1464 u_getDataDirectory(void) {
1465 umtx_initOnce(gDataDirInitOnce
, &dataDirectoryInitFn
);
1466 return gDataDirectory
;
1469 static void setTimeZoneFilesDir(const char *path
, UErrorCode
&status
) {
1470 if (U_FAILURE(status
)) {
1473 gTimeZoneFilesDirectory
->clear();
1474 gTimeZoneFilesDirectory
->append(path
, status
);
1475 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1476 char *p
= gTimeZoneFilesDirectory
->data();
1477 while ((p
= uprv_strchr(p
, U_FILE_ALT_SEP_CHAR
)) != NULL
) {
1478 *p
= U_FILE_SEP_CHAR
;
1483 #if U_PLATFORM_IMPLEMENTS_POSIX
1484 #include <sys/stat.h>
1485 #if defined(U_TIMEZONE_FILES_DIR)
1486 const char tzdirbuf
[] = U_TIMEZONE_FILES_DIR
;
1487 enum { kTzfilenamebufLen
= UPRV_LENGTHOF(tzdirbuf
) + 24 }; // extra room for "/icutz44l.dat" or "/zoneinfo64.res"
1491 #define TO_STRING(x) TO_STRING_2(x)
1492 #define TO_STRING_2(x) #x
1494 static void U_CALLCONV
TimeZoneDataDirInitFn(UErrorCode
&status
) {
1495 U_ASSERT(gTimeZoneFilesDirectory
== NULL
);
1496 ucln_common_registerCleanup(UCLN_COMMON_PUTIL
, putil_cleanup
);
1497 gTimeZoneFilesDirectory
= new CharString();
1498 if (gTimeZoneFilesDirectory
== NULL
) {
1499 status
= U_MEMORY_ALLOCATION_ERROR
;
1502 UBool usingUTzFilesDir
= FALSE
;
1503 #if U_PLATFORM_HAS_WINUWP_API == 0
1504 const char *dir
= getenv("ICU_TIMEZONE_FILES_DIR");
1506 // TODO: UWP does not support alternate timezone data directories at this time
1507 const char *dir
= "";
1508 #endif // U_PLATFORM_HAS_WINUWP_API
1509 #if defined(U_TIMEZONE_FILES_DIR)
1511 // dir = TO_STRING(U_TIMEZONE_FILES_DIR);
1512 // Not sure why the above was done for this path only;
1513 // it preserves unwanted quotes.
1515 usingUTzFilesDir
= TRUE
;
1518 #if U_PLATFORM_IMPLEMENTS_POSIX
1521 if (stat(dir
, &buf
) != 0) {
1524 #if defined(U_TIMEZONE_FILES_DIR)
1525 else if (usingUTzFilesDir
) {
1526 char tzfilenamebuf
[kTzfilenamebufLen
];
1527 uprv_strcpy(tzfilenamebuf
, tzdirbuf
);
1528 uprv_strcat(tzfilenamebuf
, U_FILE_SEP_STRING
);
1529 #if defined(U_TIMEZONE_PACKAGE)
1530 uprv_strcat(tzfilenamebuf
, U_TIMEZONE_PACKAGE
);
1531 uprv_strcat(tzfilenamebuf
, ".dat");
1533 uprv_strcat(tzfilenamebuf
, "zoneinfo64.res");
1535 if (stat(tzfilenamebuf
, &buf
) != 0) {
1539 #endif /* defined(U_TIMEZONE_FILES_DIR) */
1541 #endif /* U_PLATFORM_IMPLEMENTS_POSIX */
1545 setTimeZoneFilesDir(dir
, status
);
1549 U_CAPI
const char * U_EXPORT2
1550 u_getTimeZoneFilesDirectory(UErrorCode
*status
) {
1551 umtx_initOnce(gTimeZoneFilesInitOnce
, &TimeZoneDataDirInitFn
, *status
);
1552 return U_SUCCESS(*status
) ? gTimeZoneFilesDirectory
->data() : "";
1555 U_CAPI
void U_EXPORT2
1556 u_setTimeZoneFilesDirectory(const char *path
, UErrorCode
*status
) {
1557 umtx_initOnce(gTimeZoneFilesInitOnce
, &TimeZoneDataDirInitFn
, *status
);
1558 setTimeZoneFilesDir(path
, *status
);
1560 // Note: this function does some extra churn, first setting based on the
1561 // environment, then immediately replacing with the value passed in.
1562 // The logic is simpler that way, and performance shouldn't be an issue.
1567 /* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1568 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1569 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1571 static const char *uprv_getPOSIXIDForCategory(int category
)
1573 const char* posixID
= NULL
;
1574 if (category
== LC_MESSAGES
|| category
== LC_CTYPE
) {
1576 * On Solaris two different calls to setlocale can result in
1577 * different values. Only get this value once.
1579 * We must check this first because an application can set this.
1581 * LC_ALL can't be used because it's platform dependent. The LANG
1582 * environment variable seems to affect LC_CTYPE variable by default.
1583 * Here is what setlocale(LC_ALL, NULL) can return.
1584 * HPUX can return 'C C C C C C C'
1585 * Solaris can return /en_US/C/C/C/C/C on the second try.
1586 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1588 * The default codepage detection also needs to use LC_CTYPE.
1590 * Do not call setlocale(LC_*, "")! Using an empty string instead
1591 * of NULL, will modify the libc behavior.
1593 posixID
= setlocale(category
, NULL
);
1595 || (uprv_strcmp("C", posixID
) == 0)
1596 || (uprv_strcmp("POSIX", posixID
) == 0))
1598 /* Maybe we got some garbage. Try something more reasonable */
1599 posixID
= getenv("LC_ALL");
1600 /* Solaris speaks POSIX - See IEEE Std 1003.1-2008
1601 * This is needed to properly handle empty env. variables
1603 #if U_PLATFORM == U_PF_SOLARIS
1604 if ((posixID
== 0) || (posixID
[0] == '\0')) {
1605 posixID
= getenv(category
== LC_MESSAGES
? "LC_MESSAGES" : "LC_CTYPE");
1606 if ((posixID
== 0) || (posixID
[0] == '\0')) {
1609 posixID
= getenv(category
== LC_MESSAGES
? "LC_MESSAGES" : "LC_CTYPE");
1612 posixID
= getenv("LANG");
1618 || (uprv_strcmp("C", posixID
) == 0)
1619 || (uprv_strcmp("POSIX", posixID
) == 0))
1621 /* Nothing worked. Give it a nice POSIX default value. */
1622 posixID
= "en_US_POSIX";
1627 /* Return just the POSIX id for the default locale, whatever happens to be in
1628 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1630 static const char *uprv_getPOSIXIDForDefaultLocale(void)
1632 static const char* posixID
= NULL
;
1634 posixID
= uprv_getPOSIXIDForCategory(LC_MESSAGES
);
1639 #if !U_CHARSET_IS_UTF8
1640 /* Return just the POSIX id for the default codepage, whatever happens to be in
1641 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1643 static const char *uprv_getPOSIXIDForDefaultCodepage(void)
1645 static const char* posixID
= NULL
;
1647 posixID
= uprv_getPOSIXIDForCategory(LC_CTYPE
);
1654 /* NOTE: The caller should handle thread safety */
1655 U_CAPI
const char* U_EXPORT2
1656 uprv_getDefaultLocaleID()
1660 Note that: (a '!' means the ID is improper somehow)
1661 LC_ALL ----> default_loc codepage
1662 --------------------------------------------------------
1667 ab_CD.EF@GH ab_CD_GH EF
1669 Some 'improper' ways to do the same as above:
1670 ! ab_CD@GH.EF ab_CD_GH EF
1671 ! ab_CD.EF@GH.IJ ab_CD_GH EF
1672 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
1677 The variant cannot have dots in it.
1678 The 'rightmost' variant (@xxx) wins.
1679 The leftmost codepage (.xxx) wins.
1681 char *correctedPOSIXLocale
= 0;
1682 const char* posixID
= uprv_getPOSIXIDForDefaultLocale();
1687 /* Format: (no spaces)
1688 ll [ _CC ] [ . MM ] [ @ VV]
1690 l = lang, C = ctry, M = charmap, V = variant
1693 if (gCorrectedPOSIXLocale
!= NULL
) {
1694 return gCorrectedPOSIXLocale
;
1697 if ((p
= uprv_strchr(posixID
, '.')) != NULL
) {
1698 /* assume new locale can't be larger than old one? */
1699 correctedPOSIXLocale
= static_cast<char *>(uprv_malloc(uprv_strlen(posixID
)+1));
1700 /* Exit on memory allocation error. */
1701 if (correctedPOSIXLocale
== NULL
) {
1704 uprv_strncpy(correctedPOSIXLocale
, posixID
, p
-posixID
);
1705 correctedPOSIXLocale
[p
-posixID
] = 0;
1707 /* do not copy after the @ */
1708 if ((p
= uprv_strchr(correctedPOSIXLocale
, '@')) != NULL
) {
1709 correctedPOSIXLocale
[p
-correctedPOSIXLocale
] = 0;
1713 /* Note that we scan the *uncorrected* ID. */
1714 if ((p
= uprv_strrchr(posixID
, '@')) != NULL
) {
1715 if (correctedPOSIXLocale
== NULL
) {
1716 correctedPOSIXLocale
= static_cast<char *>(uprv_malloc(uprv_strlen(posixID
)+1));
1717 /* Exit on memory allocation error. */
1718 if (correctedPOSIXLocale
== NULL
) {
1721 uprv_strncpy(correctedPOSIXLocale
, posixID
, p
-posixID
);
1722 correctedPOSIXLocale
[p
-posixID
] = 0;
1726 /* Take care of any special cases here.. */
1727 if (!uprv_strcmp(p
, "nynorsk")) {
1729 /* Don't worry about no__NY. In practice, it won't appear. */
1732 if (uprv_strchr(correctedPOSIXLocale
,'_') == NULL
) {
1733 uprv_strcat(correctedPOSIXLocale
, "__"); /* aa@b -> aa__b */
1736 uprv_strcat(correctedPOSIXLocale
, "_"); /* aa_CC@b -> aa_CC_b */
1739 if ((q
= uprv_strchr(p
, '.')) != NULL
) {
1740 /* How big will the resulting string be? */
1741 len
= (int32_t)(uprv_strlen(correctedPOSIXLocale
) + (q
-p
));
1742 uprv_strncat(correctedPOSIXLocale
, p
, q
-p
);
1743 correctedPOSIXLocale
[len
] = 0;
1746 /* Anything following the @ sign */
1747 uprv_strcat(correctedPOSIXLocale
, p
);
1750 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1751 * How about 'russian' -> 'ru'?
1752 * Many of the other locales using ISO codes will be handled by the
1753 * canonicalization functions in uloc_getDefault.
1757 /* Was a correction made? */
1758 if (correctedPOSIXLocale
!= NULL
) {
1759 posixID
= correctedPOSIXLocale
;
1762 /* copy it, just in case the original pointer goes away. See j2395 */
1763 correctedPOSIXLocale
= (char *)uprv_malloc(uprv_strlen(posixID
) + 1);
1764 /* Exit on memory allocation error. */
1765 if (correctedPOSIXLocale
== NULL
) {
1768 posixID
= uprv_strcpy(correctedPOSIXLocale
, posixID
);
1771 if (gCorrectedPOSIXLocale
== NULL
) {
1772 gCorrectedPOSIXLocale
= correctedPOSIXLocale
;
1773 gCorrectedPOSIXLocaleHeapAllocated
= true;
1774 ucln_common_registerCleanup(UCLN_COMMON_PUTIL
, putil_cleanup
);
1775 correctedPOSIXLocale
= NULL
;
1778 if (correctedPOSIXLocale
!= NULL
) { /* Was already set - clean up. */
1779 uprv_free(correctedPOSIXLocale
);
1784 #elif U_PLATFORM_USES_ONLY_WIN32_API
1785 #define POSIX_LOCALE_CAPACITY 64
1786 UErrorCode status
= U_ZERO_ERROR
;
1787 char *correctedPOSIXLocale
= 0;
1789 // If we have already figured this out just use the cached value
1790 if (gCorrectedPOSIXLocale
!= NULL
) {
1791 return gCorrectedPOSIXLocale
;
1794 // No cached value, need to determine the current value
1795 static WCHAR windowsLocale
[LOCALE_NAME_MAX_LENGTH
];
1796 #if U_PLATFORM_HAS_WINUWP_API == 0
1797 // If not a Universal Windows App, we'll need user default language.
1798 // Vista and above should use Locale Names instead of LCIDs
1799 int length
= GetUserDefaultLocaleName(windowsLocale
, UPRV_LENGTHOF(windowsLocale
));
1801 // In a UWP app, we want the top language that the application and user agreed upon
1802 ComPtr
<ABI::Windows::Foundation::Collections::IVectorView
<HSTRING
>> languageList
;
1804 ComPtr
<ABI::Windows::Globalization::IApplicationLanguagesStatics
> applicationLanguagesStatics
;
1805 HRESULT hr
= GetActivationFactory(
1806 HStringReference(RuntimeClass_Windows_Globalization_ApplicationLanguages
).Get(),
1807 &applicationLanguagesStatics
);
1810 hr
= applicationLanguagesStatics
->get_Languages(&languageList
);
1815 // If there is no application context, then use the top language from the user language profile
1816 ComPtr
<ABI::Windows::System::UserProfile::IGlobalizationPreferencesStatics
> globalizationPreferencesStatics
;
1817 hr
= GetActivationFactory(
1818 HStringReference(RuntimeClass_Windows_System_UserProfile_GlobalizationPreferences
).Get(),
1819 &globalizationPreferencesStatics
);
1822 hr
= globalizationPreferencesStatics
->get_Languages(&languageList
);
1826 // We have a list of languages, ICU knows one, so use the top one for our locale
1827 HString topLanguage
;
1830 hr
= languageList
->GetAt(0, topLanguage
.GetAddressOf());
1835 // Unexpected, use en-US by default
1836 if (gCorrectedPOSIXLocale
== NULL
) {
1837 gCorrectedPOSIXLocale
= "en_US";
1840 return gCorrectedPOSIXLocale
;
1843 // ResolveLocaleName will get a likely subtags form consistent with Windows behavior.
1844 int length
= ResolveLocaleName(topLanguage
.GetRawBuffer(NULL
), windowsLocale
, UPRV_LENGTHOF(windowsLocale
));
1846 // Now we should have a Windows locale name that needs converted to the POSIX style,
1849 // First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.)
1850 char modifiedWindowsLocale
[LOCALE_NAME_MAX_LENGTH
];
1853 for (i
= 0; i
< UPRV_LENGTHOF(modifiedWindowsLocale
); i
++)
1855 if (windowsLocale
[i
] == '_')
1857 modifiedWindowsLocale
[i
] = '-';
1861 modifiedWindowsLocale
[i
] = static_cast<char>(windowsLocale
[i
]);
1864 if (modifiedWindowsLocale
[i
] == '\0')
1870 if (i
>= UPRV_LENGTHOF(modifiedWindowsLocale
))
1872 // Ran out of room, can't really happen, maybe we'll be lucky about a matching
1873 // locale when tags are dropped
1874 modifiedWindowsLocale
[UPRV_LENGTHOF(modifiedWindowsLocale
) - 1] = '\0';
1877 // Now normalize the resulting name
1878 correctedPOSIXLocale
= static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY
+ 1));
1879 /* TODO: Should we just exit on memory allocation failure? */
1880 if (correctedPOSIXLocale
)
1882 int32_t posixLen
= uloc_canonicalize(modifiedWindowsLocale
, correctedPOSIXLocale
, POSIX_LOCALE_CAPACITY
, &status
);
1883 if (U_SUCCESS(status
))
1885 *(correctedPOSIXLocale
+ posixLen
) = 0;
1886 gCorrectedPOSIXLocale
= correctedPOSIXLocale
;
1887 gCorrectedPOSIXLocaleHeapAllocated
= true;
1888 ucln_common_registerCleanup(UCLN_COMMON_PUTIL
, putil_cleanup
);
1892 uprv_free(correctedPOSIXLocale
);
1897 // If unable to find a locale we can agree upon, use en-US by default
1898 if (gCorrectedPOSIXLocale
== NULL
) {
1899 gCorrectedPOSIXLocale
= "en_US";
1901 return gCorrectedPOSIXLocale
;
1903 #elif U_PLATFORM == U_PF_OS400
1904 /* locales are process scoped and are by definition thread safe */
1905 static char correctedLocale
[64];
1906 const char *localeID
= getenv("LC_ALL");
1909 if (localeID
== NULL
)
1910 localeID
= getenv("LANG");
1911 if (localeID
== NULL
)
1912 localeID
= setlocale(LC_ALL
, NULL
);
1913 /* Make sure we have something... */
1914 if (localeID
== NULL
)
1915 return "en_US_POSIX";
1917 /* Extract the locale name from the path. */
1918 if((p
= uprv_strrchr(localeID
, '/')) != NULL
)
1920 /* Increment p to start of locale name. */
1925 /* Copy to work location. */
1926 uprv_strcpy(correctedLocale
, localeID
);
1928 /* Strip off the '.locale' extension. */
1929 if((p
= uprv_strchr(correctedLocale
, '.')) != NULL
) {
1933 /* Upper case the locale name. */
1934 T_CString_toUpperCase(correctedLocale
);
1936 /* See if we are using the POSIX locale. Any of the
1937 * following are equivalent and use the same QLGPGCMA
1939 * QLGPGCMA2 means UCS2
1940 * QLGPGCMA_4 means UTF-32
1941 * QLGPGCMA_8 means UTF-8
1943 if ((uprv_strcmp("C", correctedLocale
) == 0) ||
1944 (uprv_strcmp("POSIX", correctedLocale
) == 0) ||
1945 (uprv_strncmp("QLGPGCMA", correctedLocale
, 8) == 0))
1947 uprv_strcpy(correctedLocale
, "en_US_POSIX");
1953 /* Lower case the lang portion. */
1954 for(p
= correctedLocale
; *p
!= 0 && *p
!= '_'; p
++)
1956 *p
= uprv_tolower(*p
);
1959 /* Adjust for Euro. After '_E' add 'URO'. */
1960 LocaleLen
= uprv_strlen(correctedLocale
);
1961 if (correctedLocale
[LocaleLen
- 2] == '_' &&
1962 correctedLocale
[LocaleLen
- 1] == 'E')
1964 uprv_strcat(correctedLocale
, "URO");
1967 /* If using Lotus-based locale then convert to
1968 * equivalent non Lotus.
1970 else if (correctedLocale
[LocaleLen
- 2] == '_' &&
1971 correctedLocale
[LocaleLen
- 1] == 'L')
1973 correctedLocale
[LocaleLen
- 2] = 0;
1976 /* There are separate simplified and traditional
1977 * locales called zh_HK_S and zh_HK_T.
1979 else if (uprv_strncmp(correctedLocale
, "zh_HK", 5) == 0)
1981 uprv_strcpy(correctedLocale
, "zh_HK");
1984 /* A special zh_CN_GBK locale...
1986 else if (uprv_strcmp(correctedLocale
, "zh_CN_GBK") == 0)
1988 uprv_strcpy(correctedLocale
, "zh_CN");
1993 return correctedLocale
;
1998 #if !U_CHARSET_IS_UTF8
2001 Due to various platform differences, one platform may specify a charset,
2002 when they really mean a different charset. Remap the names so that they are
2003 compatible with ICU. Only conflicting/ambiguous aliases should be resolved
2004 here. Before adding anything to this function, please consider adding unique
2005 names to the ICU alias table in the data directory.
2008 remapPlatformDependentCodepage(const char *locale
, const char *name
) {
2009 if (locale
!= NULL
&& *locale
== 0) {
2010 /* Make sure that an empty locale is handled the same way. */
2016 #if U_PLATFORM == U_PF_AIX
2017 if (uprv_strcmp(name
, "IBM-943") == 0) {
2018 /* Use the ASCII compatible ibm-943 */
2021 else if (uprv_strcmp(name
, "IBM-1252") == 0) {
2022 /* Use the windows-1252 that contains the Euro */
2025 #elif U_PLATFORM == U_PF_SOLARIS
2026 if (locale
!= NULL
&& uprv_strcmp(name
, "EUC") == 0) {
2027 /* Solaris underspecifies the "EUC" name. */
2028 if (uprv_strcmp(locale
, "zh_CN") == 0) {
2031 else if (uprv_strcmp(locale
, "zh_TW") == 0) {
2034 else if (uprv_strcmp(locale
, "ko_KR") == 0) {
2038 else if (uprv_strcmp(name
, "eucJP") == 0) {
2040 ibm-954 is the best match.
2041 ibm-33722 is the default for eucJP (similar to Windows).
2045 else if (uprv_strcmp(name
, "646") == 0) {
2047 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
2048 * ISO-8859-1 instead of US-ASCII(646).
2050 name
= "ISO-8859-1";
2052 #elif U_PLATFORM_IS_DARWIN_BASED
2053 if (locale
== NULL
&& *name
== 0) {
2055 No locale was specified, and an empty name was passed in.
2056 This usually indicates that nl_langinfo didn't return valid information.
2057 Mac OS X uses UTF-8 by default (especially the locale data and console).
2061 else if (uprv_strcmp(name
, "CP949") == 0) {
2062 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
2065 else if (locale
!= NULL
&& uprv_strcmp(locale
, "en_US_POSIX") != 0 && uprv_strcmp(name
, "US-ASCII") == 0) {
2067 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
2071 #elif U_PLATFORM == U_PF_BSD
2072 if (uprv_strcmp(name
, "CP949") == 0) {
2073 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
2076 #elif U_PLATFORM == U_PF_HPUX
2077 if (locale
!= NULL
&& uprv_strcmp(locale
, "zh_HK") == 0 && uprv_strcmp(name
, "big5") == 0) {
2078 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
2079 /* zh_TW.big5 is not the same charset as zh_HK.big5! */
2082 else if (uprv_strcmp(name
, "eucJP") == 0) {
2084 ibm-1350 is the best match, but unavailable.
2085 ibm-954 is mostly a superset of ibm-1350.
2086 ibm-33722 is the default for eucJP (similar to Windows).
2090 #elif U_PLATFORM == U_PF_LINUX
2091 if (locale
!= NULL
&& uprv_strcmp(name
, "euc") == 0) {
2092 /* Linux underspecifies the "EUC" name. */
2093 if (uprv_strcmp(locale
, "korean") == 0) {
2096 else if (uprv_strcmp(locale
, "japanese") == 0) {
2097 /* See comment below about eucJP */
2101 else if (uprv_strcmp(name
, "eucjp") == 0) {
2103 ibm-1350 is the best match, but unavailable.
2104 ibm-954 is mostly a superset of ibm-1350.
2105 ibm-33722 is the default for eucJP (similar to Windows).
2109 else if (locale
!= NULL
&& uprv_strcmp(locale
, "en_US_POSIX") != 0 &&
2110 (uprv_strcmp(name
, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name
, "US-ASCII") == 0)) {
2112 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
2117 * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
2118 * it by falling back to 'US-ASCII' when NULL is returned from this
2119 * function. So, we don't have to worry about it here.
2122 /* return NULL when "" is passed in */
2130 getCodepageFromPOSIXID(const char *localeName
, char * buffer
, int32_t buffCapacity
)
2132 char localeBuf
[100];
2133 const char *name
= NULL
;
2134 char *variant
= NULL
;
2136 if (localeName
!= NULL
&& (name
= (uprv_strchr(localeName
, '.'))) != NULL
) {
2137 size_t localeCapacity
= uprv_min(sizeof(localeBuf
), (name
-localeName
)+1);
2138 uprv_strncpy(localeBuf
, localeName
, localeCapacity
);
2139 localeBuf
[localeCapacity
-1] = 0; /* ensure NULL termination */
2140 name
= uprv_strncpy(buffer
, name
+1, buffCapacity
);
2141 buffer
[buffCapacity
-1] = 0; /* ensure NULL termination */
2142 if ((variant
= const_cast<char *>(uprv_strchr(name
, '@'))) != NULL
) {
2145 name
= remapPlatformDependentCodepage(localeBuf
, name
);
2152 int_getDefaultCodepage()
2154 #if U_PLATFORM == U_PF_OS400
2155 uint32_t ccsid
= 37; /* Default to ibm-37 */
2156 static char codepage
[64];
2157 Qwc_JOBI0400_t jobinfo
;
2158 Qus_EC_t error
= { sizeof(Qus_EC_t
) }; /* SPI error code */
2160 EPT_CALL(QUSRJOBI
)(&jobinfo
, sizeof(jobinfo
), "JOBI0400",
2163 if (error
.Bytes_Available
== 0) {
2164 if (jobinfo
.Coded_Char_Set_ID
!= 0xFFFF) {
2165 ccsid
= (uint32_t)jobinfo
.Coded_Char_Set_ID
;
2167 else if (jobinfo
.Default_Coded_Char_Set_Id
!= 0xFFFF) {
2168 ccsid
= (uint32_t)jobinfo
.Default_Coded_Char_Set_Id
;
2170 /* else use the default */
2172 sprintf(codepage
,"ibm-%d", ccsid
);
2175 #elif U_PLATFORM == U_PF_OS390
2176 static char codepage
[64];
2178 strncpy(codepage
, nl_langinfo(CODESET
),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING
));
2179 strcat(codepage
,UCNV_SWAP_LFNL_OPTION_STRING
);
2180 codepage
[63] = 0; /* NULL terminate */
2184 #elif U_PLATFORM_USES_ONLY_WIN32_API
2185 static char codepage
[64];
2186 DWORD codepageNumber
= 0;
2188 #if U_PLATFORM_HAS_WINUWP_API > 0
2189 // UWP doesn't have a direct API to get the default ACP as Microsoft would rather
2190 // have folks use Unicode than a "system" code page, however this is the same
2191 // codepage as the system default locale codepage. (FWIW, the system locale is
2192 // ONLY used for codepage, it should never be used for anything else)
2193 GetLocaleInfoEx(LOCALE_NAME_SYSTEM_DEFAULT
, LOCALE_IDEFAULTANSICODEPAGE
| LOCALE_RETURN_NUMBER
,
2194 (LPWSTR
)&codepageNumber
, sizeof(codepageNumber
) / sizeof(WCHAR
));
2196 // Win32 apps can call GetACP
2197 codepageNumber
= GetACP();
2199 // Special case for UTF-8
2200 if (codepageNumber
== 65001)
2204 // Windows codepages can look like windows-1252, so format the found number
2205 // the numbers are eclectic, however all valid system code pages, besides UTF-8
2206 // are between 3 and 19999
2207 if (codepageNumber
> 0 && codepageNumber
< 20000)
2209 sprintf(codepage
, "windows-%ld", codepageNumber
);
2212 // If the codepage number call failed then return UTF-8
2215 #elif U_POSIX_LOCALE
2216 static char codesetName
[100];
2217 const char *localeName
= NULL
;
2218 const char *name
= NULL
;
2220 localeName
= uprv_getPOSIXIDForDefaultCodepage();
2221 uprv_memset(codesetName
, 0, sizeof(codesetName
));
2222 /* On Solaris nl_langinfo returns C locale values unless setlocale
2223 * was called earlier.
2225 #if (U_HAVE_NL_LANGINFO_CODESET && U_PLATFORM != U_PF_SOLARIS)
2226 /* When available, check nl_langinfo first because it usually gives more
2227 useful names. It depends on LC_CTYPE.
2228 nl_langinfo may use the same buffer as setlocale. */
2230 const char *codeset
= nl_langinfo(U_NL_LANGINFO_CODESET
);
2231 #if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED
2233 * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
2236 if (uprv_strcmp(localeName
, "en_US_POSIX") != 0) {
2237 codeset
= remapPlatformDependentCodepage(localeName
, codeset
);
2241 codeset
= remapPlatformDependentCodepage(NULL
, codeset
);
2244 if (codeset
!= NULL
) {
2245 uprv_strncpy(codesetName
, codeset
, sizeof(codesetName
));
2246 codesetName
[sizeof(codesetName
)-1] = 0;
2252 /* Use setlocale in a nice way, and then check some environment variables.
2253 Maybe the application used setlocale already.
2255 uprv_memset(codesetName
, 0, sizeof(codesetName
));
2256 name
= getCodepageFromPOSIXID(localeName
, codesetName
, sizeof(codesetName
));
2258 /* if we can find the codeset name from setlocale, return that. */
2262 if (*codesetName
== 0)
2264 /* Everything failed. Return US ASCII (ISO 646). */
2265 (void)uprv_strcpy(codesetName
, "US-ASCII");
2274 U_CAPI
const char* U_EXPORT2
2275 uprv_getDefaultCodepage()
2277 static char const *name
= NULL
;
2280 name
= int_getDefaultCodepage();
2285 #endif /* !U_CHARSET_IS_UTF8 */
2288 /* end of platform-specific implementation -------------- */
2290 /* version handling --------------------------------------------------------- */
2292 U_CAPI
void U_EXPORT2
2293 u_versionFromString(UVersionInfo versionArray
, const char *versionString
) {
2297 if(versionArray
==NULL
) {
2301 if(versionString
!=NULL
) {
2303 versionArray
[part
]=(uint8_t)uprv_strtoul(versionString
, &end
, 10);
2304 if(end
==versionString
|| ++part
==U_MAX_VERSION_LENGTH
|| *end
!=U_VERSION_DELIMITER
) {
2307 versionString
=end
+1;
2311 while(part
<U_MAX_VERSION_LENGTH
) {
2312 versionArray
[part
++]=0;
2316 U_CAPI
void U_EXPORT2
2317 u_versionFromUString(UVersionInfo versionArray
, const UChar
*versionString
) {
2318 if(versionArray
!=NULL
&& versionString
!=NULL
) {
2319 char versionChars
[U_MAX_VERSION_STRING_LENGTH
+1];
2320 int32_t len
= u_strlen(versionString
);
2321 if(len
>U_MAX_VERSION_STRING_LENGTH
) {
2322 len
= U_MAX_VERSION_STRING_LENGTH
;
2324 u_UCharsToChars(versionString
, versionChars
, len
);
2325 versionChars
[len
]=0;
2326 u_versionFromString(versionArray
, versionChars
);
2330 U_CAPI
void U_EXPORT2
2331 u_versionToString(const UVersionInfo versionArray
, char *versionString
) {
2332 uint16_t count
, part
;
2335 if(versionString
==NULL
) {
2339 if(versionArray
==NULL
) {
2344 /* count how many fields need to be written */
2345 for(count
=4; count
>0 && versionArray
[count
-1]==0; --count
) {
2352 /* write the first part */
2353 /* write the decimal field value */
2354 field
=versionArray
[0];
2356 *versionString
++=(char)('0'+field
/100);
2360 *versionString
++=(char)('0'+field
/10);
2363 *versionString
++=(char)('0'+field
);
2365 /* write the following parts */
2366 for(part
=1; part
<count
; ++part
) {
2367 /* write a dot first */
2368 *versionString
++=U_VERSION_DELIMITER
;
2370 /* write the decimal field value */
2371 field
=versionArray
[part
];
2373 *versionString
++=(char)('0'+field
/100);
2377 *versionString
++=(char)('0'+field
/10);
2380 *versionString
++=(char)('0'+field
);
2387 U_CAPI
void U_EXPORT2
2388 u_getVersion(UVersionInfo versionArray
) {
2389 (void)copyright
; // Suppress unused variable warning from clang.
2390 u_versionFromString(versionArray
, U_ICU_VERSION
);
2394 * icucfg.h dependent code
2397 #if U_ENABLE_DYLOAD && HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
2406 #endif /* HAVE_DLFCN_H */
2408 U_INTERNAL
void * U_EXPORT2
2409 uprv_dl_open(const char *libName
, UErrorCode
*status
) {
2411 if(U_FAILURE(*status
)) return ret
;
2412 ret
= dlopen(libName
, RTLD_NOW
|RTLD_GLOBAL
);
2414 #ifdef U_TRACE_DYLOAD
2415 printf("dlerror on dlopen(%s): %s\n", libName
, dlerror());
2417 *status
= U_MISSING_RESOURCE_ERROR
;
2422 U_INTERNAL
void U_EXPORT2
2423 uprv_dl_close(void *lib
, UErrorCode
*status
) {
2424 if(U_FAILURE(*status
)) return;
2428 U_INTERNAL UVoidFunction
* U_EXPORT2
2429 uprv_dlsym_func(void *lib
, const char* sym
, UErrorCode
*status
) {
2435 if(U_FAILURE(*status
)) return uret
.fp
;
2436 uret
.vp
= dlsym(lib
, sym
);
2437 if(uret
.vp
== NULL
) {
2438 #ifdef U_TRACE_DYLOAD
2439 printf("dlerror on dlsym(%p,%s): %s\n", lib
,sym
, dlerror());
2441 *status
= U_MISSING_RESOURCE_ERROR
;
2446 #elif U_ENABLE_DYLOAD && U_PLATFORM_USES_ONLY_WIN32_API && !U_PLATFORM_HAS_WINUWP_API
2448 /* Windows API implementation. */
2449 // Note: UWP does not expose/allow these APIs, so the UWP version gets the null implementation. */
2451 U_INTERNAL
void * U_EXPORT2
2452 uprv_dl_open(const char *libName
, UErrorCode
*status
) {
2455 if(U_FAILURE(*status
)) return NULL
;
2457 lib
= LoadLibraryA(libName
);
2460 *status
= U_MISSING_RESOURCE_ERROR
;
2466 U_INTERNAL
void U_EXPORT2
2467 uprv_dl_close(void *lib
, UErrorCode
*status
) {
2468 HMODULE handle
= (HMODULE
)lib
;
2469 if(U_FAILURE(*status
)) return;
2471 FreeLibrary(handle
);
2476 U_INTERNAL UVoidFunction
* U_EXPORT2
2477 uprv_dlsym_func(void *lib
, const char* sym
, UErrorCode
*status
) {
2478 HMODULE handle
= (HMODULE
)lib
;
2479 UVoidFunction
* addr
= NULL
;
2481 if(U_FAILURE(*status
) || lib
==NULL
) return NULL
;
2483 addr
= (UVoidFunction
*)GetProcAddress(handle
, sym
);
2486 DWORD lastError
= GetLastError();
2487 if(lastError
== ERROR_PROC_NOT_FOUND
) {
2488 *status
= U_MISSING_RESOURCE_ERROR
;
2490 *status
= U_UNSUPPORTED_ERROR
; /* other unknown error. */
2499 /* No dynamic loading, null (nonexistent) implementation. */
2501 U_INTERNAL
void * U_EXPORT2
2502 uprv_dl_open(const char *libName
, UErrorCode
*status
) {
2504 if(U_FAILURE(*status
)) return NULL
;
2505 *status
= U_UNSUPPORTED_ERROR
;
2509 U_INTERNAL
void U_EXPORT2
2510 uprv_dl_close(void *lib
, UErrorCode
*status
) {
2512 if(U_FAILURE(*status
)) return;
2513 *status
= U_UNSUPPORTED_ERROR
;
2517 U_INTERNAL UVoidFunction
* U_EXPORT2
2518 uprv_dlsym_func(void *lib
, const char* sym
, UErrorCode
*status
) {
2521 if(U_SUCCESS(*status
)) {
2522 *status
= U_UNSUPPORTED_ERROR
;
2524 return (UVoidFunction
*)NULL
;
2530 * Hey, Emacs, please set the following:
2533 * indent-tabs-mode: nil