]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/putil.cpp
ICU-66108.tar.gz
[apple/icu.git] / icuSources / common / putil.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/*
4******************************************************************************
5*
2ca993e8 6* Copyright (C) 1997-2016, International Business Machines
b75a7d8f
A
7* Corporation and others. All Rights Reserved.
8*
9******************************************************************************
10*
11* FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
12*
13* Date Name Description
14* 04/14/97 aliu Creation.
15* 04/24/97 aliu Added getDefaultDataDirectory() and
16* getDefaultLocaleID().
17* 04/28/97 aliu Rewritten to assume Unix and apply general methods
18* for assumed case. Non-UNIX platforms must be
19* special-cased. Rewrote numeric methods dealing
20* with NaN and Infinity to be platform independent
21* over all IEEE 754 platforms.
22* 05/13/97 aliu Restored sign of timezone
23* (semantics are hours West of GMT)
24* 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
25* nextDouble..
26* 07/22/98 stephen Added remainder, max, min, trunc
27* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
28* 08/24/98 stephen Added longBitsFromDouble
29* 09/08/98 stephen Minor changes for Mac Port
30* 03/02/99 stephen Removed openFile(). Added AS400 support.
31* Fixed EBCDIC tables
32* 04/15/99 stephen Converted to C.
33* 06/28/99 stephen Removed mutex locking in u_isBigEndian().
34* 08/04/99 jeffrey R. Added OS/2 changes
35* 11/15/99 helena Integrated S/390 IEEE support.
36* 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
37* 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
46f4442e 38* 01/03/08 Steven L. Fake Time Support
b75a7d8f
A
39******************************************************************************
40*/
41
4388f060
A
42// Defines _XOPEN_SOURCE for access to POSIX functions.
43// Must be before any other #includes.
44#include "uposixdefs.h"
b75a7d8f 45
f3c0d7a5
A
46// First, the platform type. Need this for U_PLATFORM.
47#include "unicode/platform.h"
48
49#if U_PLATFORM == U_PF_MINGW && defined __STRICT_ANSI__
50/* tzset isn't defined in strict ANSI on MinGW. */
51#undef __STRICT_ANSI__
52#endif
53
54/*
55 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
56 */
57#include <time.h>
58
59#if !U_PLATFORM_USES_ONLY_WIN32_API
60#include <sys/time.h>
61#endif
62
63/* include the rest of the ICU headers */
b75a7d8f 64#include "unicode/putil.h"
374ca955
A
65#include "unicode/ustring.h"
66#include "putilimp.h"
67#include "uassert.h"
b75a7d8f
A
68#include "umutex.h"
69#include "cmemory.h"
70#include "cstring.h"
71#include "locmap.h"
72#include "ucln_cmn.h"
b331163b 73#include "charstr.h"
73c04bcf
A
74
75/* Include standard headers. */
76#include <stdio.h>
77#include <stdlib.h>
78#include <string.h>
79#include <math.h>
80#include <locale.h>
81#include <float.h>
4388f060
A
82
83#ifndef U_COMMON_IMPLEMENTATION
84#error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu
85#endif
86
b75a7d8f
A
87
88/* include system headers */
4388f060
A
89#if U_PLATFORM_USES_ONLY_WIN32_API
90 /*
91 * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW.
92 * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API)
93 * to use native APIs as much as possible?
94 */
f3c0d7a5 95#ifndef WIN32_LEAN_AND_MEAN
b75a7d8f 96# define WIN32_LEAN_AND_MEAN
f3c0d7a5 97#endif
374ca955 98# define VC_EXTRALEAN
b75a7d8f
A
99# define NOUSER
100# define NOSERVICE
101# define NOIME
102# define NOMCX
103# include <windows.h>
0f5d89e8 104# include "unicode/uloc.h"
73c04bcf 105# include "wintz.h"
4388f060 106#elif U_PLATFORM == U_PF_OS400
b75a7d8f
A
107# include <float.h>
108# include <qusec.h> /* error code structure */
109# include <qusrjobi.h>
110# include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
46f4442e 111# include <mih/testptr.h> /* For uprv_maximumPtr */
4388f060
A
112#elif U_PLATFORM == U_PF_OS390
113# include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
51004dcb 114#elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS
4388f060
A
115# include <limits.h>
116# include <unistd.h>
51004dcb
A
117# if U_PLATFORM == U_PF_SOLARIS
118# ifndef _XPG4_2
119# define _XPG4_2
120# endif
121# endif
4388f060
A
122#elif U_PLATFORM == U_PF_QNX
123# include <sys/neutrino.h>
374ca955
A
124#endif
125
b75a7d8f 126/*
374ca955
A
127 * Only include langinfo.h if we have a way to get the codeset. If we later
128 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
129 *
130 */
131
132#if U_HAVE_NL_LANGINFO_CODESET
133#include <langinfo.h>
b75a7d8f
A
134#endif
135
729e4ab9
A
136/**
137 * Simple things (presence of functions, etc) should just go in configure.in and be added to
138 * icucfg.h via autoheader.
139 */
4388f060
A
140#if U_PLATFORM_IMPLEMENTS_POSIX
141# if U_PLATFORM == U_PF_OS400
142# define HAVE_DLFCN_H 0
143# define HAVE_DLOPEN 0
144# else
145# ifndef HAVE_DLFCN_H
146# define HAVE_DLFCN_H 1
147# endif
148# ifndef HAVE_DLOPEN
149# define HAVE_DLOPEN 1
150# endif
151# endif
152# ifndef HAVE_GETTIMEOFDAY
153# define HAVE_GETTIMEOFDAY 1
154# endif
155#else
156# define HAVE_DLFCN_H 0
157# define HAVE_DLOPEN 0
158# define HAVE_GETTIMEOFDAY 0
729e4ab9
A
159#endif
160
b331163b 161U_NAMESPACE_USE
4388f060 162
b75a7d8f
A
163/* Define the extension for data files, again... */
164#define DATA_TYPE "dat"
165
166/* Leave this copyright notice here! */
167static const char copyright[] = U_COPYRIGHT_STRING;
168
169/* floating point implementations ------------------------------------------- */
170
171/* We return QNAN rather than SNAN*/
172#define SIGN 0x80000000U
b75a7d8f 173
73c04bcf
A
174/* Make it easy to define certain types of constants */
175typedef union {
176 int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
177 double d64;
178} BitPatternConversion;
179static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
180static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
b75a7d8f
A
181
182/*---------------------------------------------------------------------------
183 Platform utilities
184 Our general strategy is to assume we're on a POSIX platform. Platforms which
185 are non-POSIX must declare themselves so. The default POSIX implementation
186 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
187 functions).
188 ---------------------------------------------------------------------------*/
189
b331163b 190#if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400
b75a7d8f
A
191# undef U_POSIX_LOCALE
192#else
193# define U_POSIX_LOCALE 1
194#endif
195
73c04bcf
A
196/*
197 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
198 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
199*/
200#if !IEEE_754
b75a7d8f
A
201static char*
202u_topNBytesOfDouble(double* d, int n)
203{
204#if U_IS_BIG_ENDIAN
205 return (char*)d;
206#else
207 return (char*)(d + 1) - n;
208#endif
209}
210
211static char*
212u_bottomNBytesOfDouble(double* d, int n)
213{
214#if U_IS_BIG_ENDIAN
215 return (char*)(d + 1) - n;
216#else
217 return (char*)d;
218#endif
219}
729e4ab9
A
220#endif /* !IEEE_754 */
221
222#if IEEE_754
223static UBool
224u_signBit(double d) {
225 uint8_t hiByte;
226#if U_IS_BIG_ENDIAN
227 hiByte = *(uint8_t *)&d;
228#else
229 hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
230#endif
231 return (hiByte & 0x80) != 0;
232}
233#endif
234
235
b75a7d8f 236
46f4442e 237#if defined (U_DEBUG_FAKETIME)
729e4ab9 238/* Override the clock to test things without having to move the system clock.
46f4442e
A
239 * Assumes POSIX gettimeofday() will function
240 */
241UDate fakeClock_t0 = 0; /** Time to start the clock from **/
242UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
243UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
46f4442e
A
244
245static UDate getUTCtime_real() {
246 struct timeval posixTime;
247 gettimeofday(&posixTime, NULL);
248 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
249}
250
251static UDate getUTCtime_fake() {
340931cb
A
252 static UMutex fakeClockMutex;
253 umtx_lock(&fakeClockMutex);
46f4442e
A
254 if(!fakeClock_set) {
255 UDate real = getUTCtime_real();
256 const char *fake_start = getenv("U_FAKETIME_START");
729e4ab9 257 if((fake_start!=NULL) && (fake_start[0]!=0)) {
46f4442e 258 sscanf(fake_start,"%lf",&fakeClock_t0);
729e4ab9
A
259 fakeClock_dt = fakeClock_t0 - real;
260 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
261 "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
262 fakeClock_t0, fake_start, fakeClock_dt, real);
263 } else {
264 fakeClock_dt = 0;
265 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
266 "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
46f4442e 267 }
46f4442e
A
268 fakeClock_set = TRUE;
269 }
340931cb 270 umtx_unlock(&fakeClockMutex);
729e4ab9 271
46f4442e
A
272 return getUTCtime_real() + fakeClock_dt;
273}
274#endif
275
4388f060 276#if U_PLATFORM_USES_ONLY_WIN32_API
73c04bcf
A
277typedef union {
278 int64_t int64;
279 FILETIME fileTime;
280} FileTimeConversion; /* This is like a ULARGE_INTEGER */
281
282/* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
283#define EPOCH_BIAS INT64_C(116444736000000000)
284#define HECTONANOSECOND_PER_MILLISECOND 10000
285
286#endif
287
b75a7d8f
A
288/*---------------------------------------------------------------------------
289 Universal Implementations
73c04bcf
A
290 These are designed to work on all platforms. Try these, and if they
291 don't work on your platform, then special case your platform with new
b75a7d8f 292 implementations.
73c04bcf 293---------------------------------------------------------------------------*/
b75a7d8f 294
374ca955 295U_CAPI UDate U_EXPORT2
b75a7d8f
A
296uprv_getUTCtime()
297{
46f4442e
A
298#if defined(U_DEBUG_FAKETIME)
299 return getUTCtime_fake(); /* Hook for overriding the clock */
729e4ab9
A
300#else
301 return uprv_getRawUTCtime();
302#endif
303}
304
305/* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
306U_CAPI UDate U_EXPORT2
307uprv_getRawUTCtime()
308{
b331163b 309#if U_PLATFORM_USES_ONLY_WIN32_API
73c04bcf
A
310
311 FileTimeConversion winTime;
312 GetSystemTimeAsFileTime(&winTime.fileTime);
313 return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
b75a7d8f 314#else
729e4ab9 315
4388f060 316#if HAVE_GETTIMEOFDAY
73c04bcf
A
317 struct timeval posixTime;
318 gettimeofday(&posixTime, NULL);
319 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
729e4ab9 320#else
b75a7d8f
A
321 time_t epochtime;
322 time(&epochtime);
374ca955 323 return (UDate)epochtime * U_MILLIS_PER_SECOND;
b75a7d8f 324#endif
729e4ab9
A
325
326#endif
b75a7d8f
A
327}
328
329/*-----------------------------------------------------------------------------
330 IEEE 754
331 These methods detect and return NaN and infinity values for doubles
332 conforming to IEEE 754. Platforms which support this standard include X86,
333 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
334 If this doesn't work on your platform, you have non-IEEE floating-point, and
335 will need to code your own versions. A naive implementation is to return 0.0
336 for getNaN and getInfinity, and false for isNaN and isInfinite.
337 ---------------------------------------------------------------------------*/
338
339U_CAPI UBool U_EXPORT2
340uprv_isNaN(double number)
341{
342#if IEEE_754
73c04bcf
A
343 BitPatternConversion convertedNumber;
344 convertedNumber.d64 = number;
b75a7d8f 345 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
73c04bcf 346 return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
b75a7d8f 347
4388f060 348#elif U_PLATFORM == U_PF_OS390
b75a7d8f
A
349 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
350 sizeof(uint32_t));
351 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
352 sizeof(uint32_t));
353
354 return ((highBits & 0x7F080000L) == 0x7F080000L) &&
355 (lowBits == 0x00000000L);
356
357#else
358 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
359 /* you'll need to replace this default implementation with what's correct*/
360 /* for your platform.*/
361 return number != number;
362#endif
363}
364
365U_CAPI UBool U_EXPORT2
366uprv_isInfinite(double number)
367{
368#if IEEE_754
73c04bcf
A
369 BitPatternConversion convertedNumber;
370 convertedNumber.d64 = number;
371 /* Infinity is exactly 0x7FF0000000000000U. */
372 return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
4388f060 373#elif U_PLATFORM == U_PF_OS390
b75a7d8f
A
374 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
375 sizeof(uint32_t));
376 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
377 sizeof(uint32_t));
378
379 return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
380
381#else
382 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
383 /* value, you'll need to replace this default implementation with what's*/
384 /* correct for your platform.*/
385 return number == (2.0 * number);
386#endif
387}
388
389U_CAPI UBool U_EXPORT2
390uprv_isPositiveInfinity(double number)
391{
4388f060 392#if IEEE_754 || U_PLATFORM == U_PF_OS390
b75a7d8f
A
393 return (UBool)(number > 0 && uprv_isInfinite(number));
394#else
395 return uprv_isInfinite(number);
396#endif
397}
398
399U_CAPI UBool U_EXPORT2
400uprv_isNegativeInfinity(double number)
401{
4388f060 402#if IEEE_754 || U_PLATFORM == U_PF_OS390
b75a7d8f
A
403 return (UBool)(number < 0 && uprv_isInfinite(number));
404
405#else
406 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
407 sizeof(uint32_t));
408 return((highBits & SIGN) && uprv_isInfinite(number));
409
410#endif
411}
412
413U_CAPI double U_EXPORT2
414uprv_getNaN()
415{
4388f060 416#if IEEE_754 || U_PLATFORM == U_PF_OS390
73c04bcf 417 return gNan.d64;
b75a7d8f
A
418#else
419 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
420 /* you'll need to replace this default implementation with what's correct*/
421 /* for your platform.*/
422 return 0.0;
423#endif
424}
425
426U_CAPI double U_EXPORT2
427uprv_getInfinity()
428{
4388f060 429#if IEEE_754 || U_PLATFORM == U_PF_OS390
73c04bcf 430 return gInf.d64;
b75a7d8f
A
431#else
432 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
433 /* value, you'll need to replace this default implementation with what's*/
434 /* correct for your platform.*/
435 return 0.0;
436#endif
437}
438
439U_CAPI double U_EXPORT2
440uprv_floor(double x)
441{
442 return floor(x);
443}
444
445U_CAPI double U_EXPORT2
446uprv_ceil(double x)
447{
448 return ceil(x);
449}
450
451U_CAPI double U_EXPORT2
452uprv_round(double x)
453{
454 return uprv_floor(x + 0.5);
455}
456
457U_CAPI double U_EXPORT2
458uprv_fabs(double x)
459{
460 return fabs(x);
461}
462
463U_CAPI double U_EXPORT2
464uprv_modf(double x, double* y)
465{
466 return modf(x, y);
467}
468
469U_CAPI double U_EXPORT2
470uprv_fmod(double x, double y)
471{
472 return fmod(x, y);
473}
474
475U_CAPI double U_EXPORT2
476uprv_pow(double x, double y)
477{
478 /* This is declared as "double pow(double x, double y)" */
479 return pow(x, y);
480}
481
482U_CAPI double U_EXPORT2
483uprv_pow10(int32_t x)
484{
485 return pow(10.0, (double)x);
486}
487
488U_CAPI double U_EXPORT2
489uprv_fmax(double x, double y)
490{
491#if IEEE_754
b75a7d8f
A
492 /* first handle NaN*/
493 if(uprv_isNaN(x) || uprv_isNaN(y))
494 return uprv_getNaN();
495
496 /* check for -0 and 0*/
729e4ab9 497 if(x == 0.0 && y == 0.0 && u_signBit(x))
b75a7d8f
A
498 return y;
499
500#endif
501
729e4ab9 502 /* this should work for all flt point w/o NaN and Inf special cases */
b75a7d8f
A
503 return (x > y ? x : y);
504}
505
b75a7d8f
A
506U_CAPI double U_EXPORT2
507uprv_fmin(double x, double y)
508{
509#if IEEE_754
b75a7d8f
A
510 /* first handle NaN*/
511 if(uprv_isNaN(x) || uprv_isNaN(y))
512 return uprv_getNaN();
513
514 /* check for -0 and 0*/
729e4ab9 515 if(x == 0.0 && y == 0.0 && u_signBit(y))
b75a7d8f
A
516 return y;
517
518#endif
519
520 /* this should work for all flt point w/o NaN and Inf special cases */
521 return (x > y ? y : x);
522}
523
0f5d89e8
A
524U_CAPI UBool U_EXPORT2
525uprv_add32_overflow(int32_t a, int32_t b, int32_t* res) {
526 // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_add_overflow.
527 // This function could be optimized by calling one of those primitives.
528 auto a64 = static_cast<int64_t>(a);
529 auto b64 = static_cast<int64_t>(b);
530 int64_t res64 = a64 + b64;
531 *res = static_cast<int32_t>(res64);
532 return res64 != *res;
533}
534
535U_CAPI UBool U_EXPORT2
536uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res) {
537 // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_mul_overflow.
538 // This function could be optimized by calling one of those primitives.
539 auto a64 = static_cast<int64_t>(a);
540 auto b64 = static_cast<int64_t>(b);
541 int64_t res64 = a64 * b64;
542 *res = static_cast<int32_t>(res64);
543 return res64 != *res;
544}
545
b75a7d8f
A
546/**
547 * Truncates the given double.
548 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
549 * This is different than calling floor() or ceil():
550 * floor(3.3) = 3, floor(-3.3) = -4
551 * ceil(3.3) = 4, ceil(-3.3) = -3
552 */
553U_CAPI double U_EXPORT2
554uprv_trunc(double d)
555{
556#if IEEE_754
b75a7d8f
A
557 /* handle error cases*/
558 if(uprv_isNaN(d))
559 return uprv_getNaN();
560 if(uprv_isInfinite(d))
561 return uprv_getInfinity();
562
729e4ab9 563 if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */
b75a7d8f
A
564 return ceil(d);
565 else
566 return floor(d);
567
568#else
569 return d >= 0 ? floor(d) : ceil(d);
570
571#endif
572}
573
574/**
575 * Return the largest positive number that can be represented by an integer
576 * type of arbitrary bit length.
577 */
578U_CAPI double U_EXPORT2
579uprv_maxMantissa(void)
580{
581 return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
582}
583
b75a7d8f
A
584U_CAPI double U_EXPORT2
585uprv_log(double d)
586{
587 return log(d);
588}
589
46f4442e
A
590U_CAPI void * U_EXPORT2
591uprv_maximumPtr(void * base)
b75a7d8f 592{
4388f060 593#if U_PLATFORM == U_PF_OS400
46f4442e 594 /*
729e4ab9 595 * With the provided function we should never be out of range of a given segment
46f4442e
A
596 * (a traditional/typical segment that is). Our segments have 5 bytes for the
597 * id and 3 bytes for the offset. The key is that the casting takes care of
598 * only retrieving the offset portion minus x1000. Hence, the smallest offset
599 * seen in a program is x001000 and when casted to an int would be 0.
600 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment.
601 *
729e4ab9 602 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
46f4442e 603 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information).
729e4ab9
A
604 * This function determines the activation based on the pointer that is passed in and
605 * calculates the appropriate maximum available size for
46f4442e
A
606 * each pointer type (TERASPACE and non-TERASPACE)
607 *
608 * Unlike other operating systems, the pointer model isn't determined at
609 * compile time on i5/OS.
610 */
611 if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
612 /* if it is a TERASPACE pointer the max is 2GB - 4k */
613 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
b75a7d8f 614 }
46f4442e
A
615 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
616 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
b75a7d8f 617
46f4442e 618#else
729e4ab9 619 return U_MAX_PTR(base);
374ca955 620#endif
46f4442e 621}
b75a7d8f
A
622
623/*---------------------------------------------------------------------------
624 Platform-specific Implementations
625 Try these, and if they don't work on your platform, then special case your
626 platform with new implementations.
627 ---------------------------------------------------------------------------*/
628
b75a7d8f
A
629/* Generic time zone layer -------------------------------------------------- */
630
631/* Time zone utilities */
632U_CAPI void U_EXPORT2
633uprv_tzset()
634{
4388f060 635#if defined(U_TZSET)
b75a7d8f
A
636 U_TZSET();
637#else
638 /* no initialization*/
639#endif
640}
641
642U_CAPI int32_t U_EXPORT2
643uprv_timezone()
644{
374ca955 645#ifdef U_TIMEZONE
b75a7d8f
A
646 return U_TIMEZONE;
647#else
648 time_t t, t1, t2;
649 struct tm tmrec;
b75a7d8f
A
650 int32_t tdiff = 0;
651
652 time(&t);
374ca955 653 uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
57a6839d
A
654#if U_PLATFORM != U_PF_IPHONE
655 UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
656#endif
b75a7d8f 657 t1 = mktime(&tmrec); /* local time in seconds*/
374ca955 658 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
b75a7d8f
A
659 t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/
660 tdiff = t2 - t1;
57a6839d
A
661
662#if U_PLATFORM != U_PF_IPHONE
b75a7d8f 663 /* imitate NT behaviour, which returns same timezone offset to GMT for
51004dcb
A
664 winter and summer.
665 This does not work on all platforms. For instance, on glibc on Linux
666 and on Mac OS 10.5, tdiff calculated above remains the same
57a6839d
A
667 regardless of whether DST is in effect or not. iOS is another
668 platform where this does not work. Linux + glibc and Mac OS 10.5
669 have U_TIMEZONE defined so that this code is not reached.
670 */
b75a7d8f
A
671 if (dst_checked)
672 tdiff += 3600;
57a6839d 673#endif
b75a7d8f
A
674 return tdiff;
675#endif
676}
677
374ca955 678/* Note that U_TZNAME does *not* have to be tzname, but if it is,
729e4ab9 679 some platforms need to have it declared here. */
b75a7d8f 680
f3c0d7a5 681#if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED)
374ca955
A
682/* RS6000 and others reject char **tzname. */
683extern U_IMPORT char *U_TZNAME[];
684#endif
685
57a6839d 686#if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
73c04bcf 687/* These platforms are likely to use Olson timezone IDs. */
0f5d89e8
A
688/* common targets of the symbolic link at TZDEFAULT are:
689 * "/usr/share/zoneinfo/<olsonID>" default, older Linux distros, macOS to 10.12
690 * "../usr/share/zoneinfo/<olsonID>" newer Linux distros: Red Hat Enterprise Linux 7, Ubuntu 16, SuSe Linux 12
691 * "/usr/share/lib/zoneinfo/<olsonID>" Solaris
692 * "../usr/share/lib/zoneinfo/<olsonID>" Solaris
693 * "/var/db/timezone/zoneinfo/<olsonID>" macOS 10.13
694 * To avoid checking lots of paths, just check that the target path
695 * before the <olsonID> ends with "/zoneinfo/", and the <olsonID> is valid.
696 */
697
73c04bcf 698#define CHECK_LOCALTIME_LINK 1
4388f060 699#if U_PLATFORM_IS_DARWIN_BASED
73c04bcf
A
700#include <tzfile.h>
701#define TZZONEINFO (TZDIR "/")
51004dcb
A
702#elif U_PLATFORM == U_PF_SOLARIS
703#define TZDEFAULT "/etc/localtime"
704#define TZZONEINFO "/usr/share/lib/zoneinfo/"
705#define TZ_ENV_CHECK "localtime"
46f4442e
A
706#else
707#define TZDEFAULT "/etc/localtime"
708#define TZZONEINFO "/usr/share/zoneinfo/"
709#endif
0f5d89e8 710#define TZZONEINFOTAIL "/zoneinfo/"
729e4ab9
A
711#if U_HAVE_DIRENT_H
712#define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */
713/* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
714 symlinked to /etc/localtime, which makes searchForTZFile return
715 'localtime' when it's the first match. */
716#define TZFILE_SKIP2 "localtime"
717#define SEARCH_TZFILE
718#include <dirent.h> /* Needed to search through system timezone files */
719#endif
73c04bcf
A
720static char gTimeZoneBuffer[PATH_MAX];
721static char *gTimeZoneBufferPtr = NULL;
722#endif
723
4388f060 724#if !U_PLATFORM_USES_ONLY_WIN32_API
73c04bcf
A
725#define isNonDigit(ch) (ch < '0' || '9' < ch)
726static UBool isValidOlsonID(const char *id) {
727 int32_t idx = 0;
728
729 /* Determine if this is something like Iceland (Olson ID)
730 or AST4ADT (non-Olson ID) */
731 while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
732 idx++;
733 }
734
735 /* If we went through the whole string, then it might be okay.
736 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
737 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
738 The rest of the time it could be an Olson ID. George */
739 return (UBool)(id[idx] == 0
740 || uprv_strcmp(id, "PST8PDT") == 0
741 || uprv_strcmp(id, "MST7MDT") == 0
742 || uprv_strcmp(id, "CST6CDT") == 0
743 || uprv_strcmp(id, "EST5EDT") == 0);
744}
729e4ab9
A
745
746/* On some Unix-like OS, 'posix' subdirectory in
747 /usr/share/zoneinfo replicates the top-level contents. 'right'
748 subdirectory has the same set of files, but individual files
749 are different from those in the top-level directory or 'posix'
750 because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
751 has files for UTC.
752 When the first match for /etc/localtime is in either of them
753 (usually in posix because 'right' has different file contents),
754 or TZ environment variable points to one of them, createTimeZone
755 fails because, say, 'posix/America/New_York' is not an Olson
756 timezone id ('America/New_York' is). So, we have to skip
757 'posix/' and 'right/' at the beginning. */
758static void skipZoneIDPrefix(const char** id) {
759 if (uprv_strncmp(*id, "posix/", 6) == 0
760 || uprv_strncmp(*id, "right/", 6) == 0)
761 {
762 *id += 6;
763 }
764}
b75a7d8f
A
765#endif
766
4388f060 767#if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API
46f4442e
A
768
769#define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
770typedef struct OffsetZoneMapping {
771 int32_t offsetSeconds;
4388f060 772 int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/
46f4442e
A
773 const char *stdID;
774 const char *dstID;
775 const char *olsonID;
776} OffsetZoneMapping;
777
4388f060
A
778enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 };
779
46f4442e
A
780/*
781This list tries to disambiguate a set of abbreviated timezone IDs and offsets
782and maps it to an Olson ID.
783Before adding anything to this list, take a look at
784icu/source/tools/tzcode/tz.alias
785Sometimes no daylight savings (0) is important to define due to aliases.
786This list can be tested with icu/source/test/compat/tzone.pl
787More values could be added to daylightType to increase precision.
788*/
789static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
790 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
791 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
792 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
793 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
794 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
795 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
796 {-36000, 2, "EST", "EST", "Australia/Sydney"},
797 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
798 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
799 {-34200, 2, "CST", "CST", "Australia/South"},
800 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
801 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
802 {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
803 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
804 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
805 {-28800, 2, "WST", "WST", "Australia/West"},
806 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
807 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
808 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
809 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
810 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
811 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
812 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
813 {-14400, 1, "AZT", "AZST", "Asia/Baku"},
814 {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
815 {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
816 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
817 {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
818 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
819 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
820 {-3600, 0, "CET", "WEST", "Africa/Algiers"},
821 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
822 {0, 1, "GMT", "IST", "Europe/Dublin"},
823 {0, 1, "GMT", "BST", "Europe/London"},
824 {0, 0, "WET", "WEST", "Africa/Casablanca"},
825 {0, 0, "WET", "WET", "Africa/El_Aaiun"},
826 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
827 {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
828 {10800, 1, "PMST", "PMDT", "America/Miquelon"},
829 {10800, 2, "UYT", "UYST", "America/Montevideo"},
830 {10800, 1, "WGT", "WGST", "America/Godthab"},
831 {10800, 2, "BRT", "BRST", "Brazil/East"},
832 {12600, 1, "NST", "NDT", "America/St_Johns"},
833 {14400, 1, "AST", "ADT", "Canada/Atlantic"},
834 {14400, 2, "AMT", "AMST", "America/Cuiaba"},
835 {14400, 2, "CLT", "CLST", "Chile/Continental"},
836 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
837 {14400, 2, "PYT", "PYST", "America/Asuncion"},
838 {18000, 1, "CST", "CDT", "America/Havana"},
839 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
840 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
841 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
842 {21600, 0, "CST", "CDT", "America/Guatemala"},
843 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
844 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
845 {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
846 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
847 {32400, 1, "AKST", "AKDT", "US/Alaska"},
848 {36000, 1, "HAST", "HADT", "US/Aleutian"}
849};
850
851/*#define DEBUG_TZNAME*/
852
853static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
854{
855 int32_t idx;
856#ifdef DEBUG_TZNAME
857 fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
858#endif
b331163b 859 for (idx = 0; idx < UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++)
46f4442e
A
860 {
861 if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
862 && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
863 && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
864 && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
865 {
866 return OFFSET_ZONE_MAPPINGS[idx].olsonID;
867 }
868 }
869 return NULL;
870}
871#endif
872
729e4ab9 873#ifdef SEARCH_TZFILE
729e4ab9
A
874#define MAX_READ_SIZE 512
875
876typedef struct DefaultTZInfo {
877 char* defaultTZBuffer;
878 int64_t defaultTZFileSize;
879 FILE* defaultTZFilePtr;
880 UBool defaultTZstatus;
881 int32_t defaultTZPosition;
882} DefaultTZInfo;
883
884/*
885 * This method compares the two files given to see if they are a match.
886 * It is currently use to compare two TZ files.
887 */
888static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
889 FILE* file;
890 int64_t sizeFile;
891 int64_t sizeFileLeft;
892 int32_t sizeFileRead;
893 int32_t sizeFileToRead;
894 char bufferFile[MAX_READ_SIZE];
895 UBool result = TRUE;
896
897 if (tzInfo->defaultTZFilePtr == NULL) {
898 tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
899 }
900 file = fopen(TZFileName, "r");
901
902 tzInfo->defaultTZPosition = 0; /* reset position to begin search */
903
904 if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
905 /* First check that the file size are equal. */
906 if (tzInfo->defaultTZFileSize == 0) {
907 fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
908 tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
909 }
910 fseek(file, 0, SEEK_END);
911 sizeFile = ftell(file);
912 sizeFileLeft = sizeFile;
913
914 if (sizeFile != tzInfo->defaultTZFileSize) {
915 result = FALSE;
916 } else {
917 /* Store the data from the files in seperate buffers and
918 * compare each byte to determine equality.
919 */
920 if (tzInfo->defaultTZBuffer == NULL) {
921 rewind(tzInfo->defaultTZFilePtr);
922 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
4388f060 923 sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
729e4ab9
A
924 }
925 rewind(file);
926 while(sizeFileLeft > 0) {
927 uprv_memset(bufferFile, 0, MAX_READ_SIZE);
928 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
929
930 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
931 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
932 result = FALSE;
933 break;
934 }
935 sizeFileLeft -= sizeFileRead;
936 tzInfo->defaultTZPosition += sizeFileRead;
937 }
938 }
939 } else {
940 result = FALSE;
941 }
942
943 if (file != NULL) {
944 fclose(file);
945 }
946
947 return result;
948}
f3c0d7a5
A
949
950
729e4ab9
A
951/* dirent also lists two entries: "." and ".." that we can safely ignore. */
952#define SKIP1 "."
953#define SKIP2 ".."
f3c0d7a5
A
954static UBool U_CALLCONV putil_cleanup(void);
955static CharString *gSearchTZFileResult = NULL;
956
957/*
958 * This method recursively traverses the directory given for a matching TZ file and returns the first match.
959 * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results.
960 */
729e4ab9 961static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
0f5d89e8 962 DIR* dirp = NULL;
729e4ab9 963 struct dirent* dirEntry = NULL;
729e4ab9 964 char* result = NULL;
0f5d89e8
A
965 UErrorCode status = U_ZERO_ERROR;
966
967 /* Save the current path */
968 CharString curpath(path, -1, status);
969 if (U_FAILURE(status)) {
970 goto cleanupAndReturn;
971 }
972
973 dirp = opendir(path);
729e4ab9 974 if (dirp == NULL) {
0f5d89e8 975 goto cleanupAndReturn;
729e4ab9
A
976 }
977
f3c0d7a5
A
978 if (gSearchTZFileResult == NULL) {
979 gSearchTZFileResult = new CharString;
980 if (gSearchTZFileResult == NULL) {
0f5d89e8 981 goto cleanupAndReturn;
f3c0d7a5
A
982 }
983 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
984 }
985
729e4ab9
A
986 /* Check each entry in the directory. */
987 while((dirEntry = readdir(dirp)) != NULL) {
988 const char* dirName = dirEntry->d_name;
3d1f044b
A
989 if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0
990 && uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
729e4ab9 991 /* Create a newpath with the new entry to test each entry in the directory. */
f3c0d7a5
A
992 CharString newpath(curpath, status);
993 newpath.append(dirName, -1, status);
994 if (U_FAILURE(status)) {
0f5d89e8 995 break;
f3c0d7a5 996 }
729e4ab9 997
0f5d89e8 998 DIR* subDirp = NULL;
f3c0d7a5 999 if ((subDirp = opendir(newpath.data())) != NULL) {
729e4ab9
A
1000 /* If this new path is a directory, make a recursive call with the newpath. */
1001 closedir(subDirp);
f3c0d7a5
A
1002 newpath.append('/', status);
1003 if (U_FAILURE(status)) {
0f5d89e8 1004 break;
f3c0d7a5
A
1005 }
1006 result = searchForTZFile(newpath.data(), tzInfo);
729e4ab9
A
1007 /*
1008 Have to get out here. Otherwise, we'd keep looking
1009 and return the first match in the top-level directory
1010 if there's a match in the top-level. If not, this function
1011 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
1012 It worked without this in most cases because we have a fallback of calling
1013 localtime_r to figure out the default timezone.
1014 */
1015 if (result != NULL)
1016 break;
3d1f044b 1017 } else {
f3c0d7a5
A
1018 if(compareBinaryFiles(TZDEFAULT, newpath.data(), tzInfo)) {
1019 int32_t amountToSkip = sizeof(TZZONEINFO) - 1;
1020 if (amountToSkip > newpath.length()) {
1021 amountToSkip = newpath.length();
1022 }
1023 const char* zoneid = newpath.data() + amountToSkip;
729e4ab9 1024 skipZoneIDPrefix(&zoneid);
f3c0d7a5
A
1025 gSearchTZFileResult->clear();
1026 gSearchTZFileResult->append(zoneid, -1, status);
1027 if (U_FAILURE(status)) {
0f5d89e8 1028 break;
f3c0d7a5
A
1029 }
1030 result = gSearchTZFileResult->data();
729e4ab9
A
1031 /* Get out after the first one found. */
1032 break;
1033 }
1034 }
1035 }
1036 }
0f5d89e8
A
1037
1038 cleanupAndReturn:
1039 if (dirp) {
1040 closedir(dirp);
1041 }
729e4ab9
A
1042 return result;
1043}
1044#endif
f3c0d7a5
A
1045
1046U_CAPI void U_EXPORT2
1047uprv_tzname_clear_cache()
1048{
1049#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1050 gTimeZoneBufferPtr = NULL;
1051#endif
1052}
1053
374ca955 1054U_CAPI const char* U_EXPORT2
b75a7d8f
A
1055uprv_tzname(int n)
1056{
0f5d89e8 1057 (void)n; // Avoid unreferenced parameter warning.
46f4442e 1058 const char *tzid = NULL;
4388f060 1059#if U_PLATFORM_USES_ONLY_WIN32_API
46f4442e 1060 tzid = uprv_detectWindowsTimeZone();
73c04bcf 1061
46f4442e
A
1062 if (tzid != NULL) {
1063 return tzid;
b75a7d8f 1064 }
f3c0d7a5
A
1065
1066#ifndef U_TZNAME
1067 // The return value is free'd in timezone.cpp on Windows because
1068 // the other code path returns a pointer to a heap location.
1069 // If we don't have a name already, then tzname wouldn't be any
1070 // better, so just fall back.
3d1f044b 1071 return uprv_strdup("");
f3c0d7a5
A
1072#endif // !U_TZNAME
1073
73c04bcf 1074#else
b75a7d8f 1075
4388f060 1076/*#if U_PLATFORM_IS_DARWIN_BASED
374ca955
A
1077 int ret;
1078
46f4442e
A
1079 tzid = getenv("TZFILE");
1080 if (tzid != NULL) {
1081 return tzid;
374ca955 1082 }
73c04bcf 1083#endif*/
374ca955 1084
46f4442e
A
1085/* This code can be temporarily disabled to test tzname resolution later on. */
1086#ifndef DEBUG_TZNAME
1087 tzid = getenv("TZ");
51004dcb
A
1088 if (tzid != NULL && isValidOlsonID(tzid)
1089#if U_PLATFORM == U_PF_SOLARIS
1090 /* When TZ equals localtime on Solaris, check the /etc/localtime file. */
1091 && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0
1092#endif
1093 ) {
2ca993e8
A
1094 /* The colon forces tzset() to treat the remainder as zoneinfo path */
1095 if (tzid[0] == ':') {
1096 tzid++;
1097 }
73c04bcf 1098 /* This might be a good Olson ID. */
729e4ab9 1099 skipZoneIDPrefix(&tzid);
46f4442e 1100 return tzid;
374ca955 1101 }
73c04bcf 1102 /* else U_TZNAME will give a better result. */
46f4442e 1103#endif
374ca955 1104
4388f060 1105#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
73c04bcf
A
1106 /* Caller must handle threading issues */
1107 if (gTimeZoneBufferPtr == NULL) {
1108 /*
1109 This is a trick to look at the name of the link to get the Olson ID
1110 because the tzfile contents is underspecified.
1111 This isn't guaranteed to work because it may not be a symlink.
1112 */
f3c0d7a5 1113 int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)-1);
374ca955 1114 if (0 < ret) {
0f5d89e8 1115 int32_t tzZoneInfoTailLen = uprv_strlen(TZZONEINFOTAIL);
73c04bcf 1116 gTimeZoneBuffer[ret] = 0;
0f5d89e8
A
1117 char * tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL);
1118
1119 if (tzZoneInfoTailPtr != NULL
1120 && isValidOlsonID(tzZoneInfoTailPtr + tzZoneInfoTailLen))
57a6839d 1121 {
0f5d89e8 1122 return (gTimeZoneBufferPtr = tzZoneInfoTailPtr + tzZoneInfoTailLen);
57a6839d 1123 }
729e4ab9
A
1124 } else {
1125#if defined(SEARCH_TZFILE)
1126 DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
1127 if (tzInfo != NULL) {
1128 tzInfo->defaultTZBuffer = NULL;
1129 tzInfo->defaultTZFileSize = 0;
1130 tzInfo->defaultTZFilePtr = NULL;
1131 tzInfo->defaultTZstatus = FALSE;
1132 tzInfo->defaultTZPosition = 0;
1133
1134 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
1135
1136 /* Free previously allocated memory */
1137 if (tzInfo->defaultTZBuffer != NULL) {
1138 uprv_free(tzInfo->defaultTZBuffer);
1139 }
1140 if (tzInfo->defaultTZFilePtr != NULL) {
1141 fclose(tzInfo->defaultTZFilePtr);
1142 }
1143 uprv_free(tzInfo);
1144 }
1145
1146 if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
1147 return gTimeZoneBufferPtr;
1148 }
1149#endif
374ca955 1150 }
374ca955 1151 }
73c04bcf
A
1152 else {
1153 return gTimeZoneBufferPtr;
1154 }
1155#endif
374ca955
A
1156#endif
1157
b75a7d8f 1158#ifdef U_TZNAME
4388f060 1159#if U_PLATFORM_USES_ONLY_WIN32_API
729e4ab9
A
1160 /* The return value is free'd in timezone.cpp on Windows because
1161 * the other code path returns a pointer to a heap location. */
1162 return uprv_strdup(U_TZNAME[n]);
1163#else
73c04bcf 1164 /*
46f4442e
A
1165 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1166 So we remap the abbreviation to an olson ID.
1167
1168 Since Windows exposes a little more timezone information,
1169 we normally don't use this code on Windows because
1170 uprv_detectWindowsTimeZone should have already given the correct answer.
73c04bcf 1171 */
46f4442e
A
1172 {
1173 struct tm juneSol, decemberSol;
1174 int daylightType;
1175 static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
1176 static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
1177
1178 /* This probing will tell us when daylight savings occurs. */
1179 localtime_r(&juneSolstice, &juneSol);
1180 localtime_r(&decemberSolstice, &decemberSol);
4388f060
A
1181 if(decemberSol.tm_isdst > 0) {
1182 daylightType = U_DAYLIGHT_DECEMBER;
1183 } else if(juneSol.tm_isdst > 0) {
1184 daylightType = U_DAYLIGHT_JUNE;
1185 } else {
1186 daylightType = U_DAYLIGHT_NONE;
1187 }
46f4442e
A
1188 tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
1189 if (tzid != NULL) {
1190 return tzid;
1191 }
1192 }
b75a7d8f 1193 return U_TZNAME[n];
729e4ab9 1194#endif
b75a7d8f
A
1195#else
1196 return "";
1197#endif
1198}
1199
1200/* Get and set the ICU data directory --------------------------------------- */
1201
b331163b 1202static icu::UInitOnce gDataDirInitOnce = U_INITONCE_INITIALIZER;
b75a7d8f 1203static char *gDataDirectory = NULL;
b331163b
A
1204
1205UInitOnce gTimeZoneFilesInitOnce = U_INITONCE_INITIALIZER;
1206static CharString *gTimeZoneFilesDirectory = NULL;
1207
57a6839d 1208#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
0f5d89e8 1209 static const char *gCorrectedPOSIXLocale = NULL; /* Sometimes heap allocated */
f3c0d7a5 1210 static bool gCorrectedPOSIXLocaleHeapAllocated = false;
b75a7d8f
A
1211#endif
1212
374ca955 1213static UBool U_CALLCONV putil_cleanup(void)
b75a7d8f 1214{
73c04bcf 1215 if (gDataDirectory && *gDataDirectory) {
b75a7d8f 1216 uprv_free(gDataDirectory);
b75a7d8f 1217 }
73c04bcf 1218 gDataDirectory = NULL;
b331163b
A
1219 gDataDirInitOnce.reset();
1220
1221 delete gTimeZoneFilesDirectory;
1222 gTimeZoneFilesDirectory = NULL;
1223 gTimeZoneFilesInitOnce.reset();
1224
f3c0d7a5
A
1225#ifdef SEARCH_TZFILE
1226 delete gSearchTZFileResult;
1227 gSearchTZFileResult = NULL;
1228#endif
1229
57a6839d 1230#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
f3c0d7a5 1231 if (gCorrectedPOSIXLocale && gCorrectedPOSIXLocaleHeapAllocated) {
0f5d89e8 1232 uprv_free(const_cast<char *>(gCorrectedPOSIXLocale));
b75a7d8f 1233 gCorrectedPOSIXLocale = NULL;
f3c0d7a5 1234 gCorrectedPOSIXLocaleHeapAllocated = false;
b75a7d8f
A
1235 }
1236#endif
1237 return TRUE;
1238}
1239
1240/*
1241 * Set the data directory.
1242 * Make a copy of the passed string, and set the global data dir to point to it.
b75a7d8f
A
1243 */
1244U_CAPI void U_EXPORT2
1245u_setDataDirectory(const char *directory) {
1246 char *newDataDir;
374ca955 1247 int32_t length;
b75a7d8f 1248
73c04bcf
A
1249 if(directory==NULL || *directory==0) {
1250 /* A small optimization to prevent the malloc and copy when the
1251 shared library is used, and this is a way to make sure that NULL
1252 is never returned.
1253 */
1254 newDataDir = (char *)"";
b75a7d8f 1255 }
73c04bcf
A
1256 else {
1257 length=(int32_t)uprv_strlen(directory);
1258 newDataDir = (char *)uprv_malloc(length + 2);
46f4442e
A
1259 /* Exit out if newDataDir could not be created. */
1260 if (newDataDir == NULL) {
1261 return;
1262 }
73c04bcf 1263 uprv_strcpy(newDataDir, directory);
b75a7d8f 1264
374ca955 1265#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
73c04bcf
A
1266 {
1267 char *p;
0f5d89e8 1268 while((p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) != NULL) {
73c04bcf
A
1269 *p = U_FILE_SEP_CHAR;
1270 }
1271 }
374ca955 1272#endif
73c04bcf 1273 }
374ca955 1274
73c04bcf 1275 if (gDataDirectory && *gDataDirectory) {
b75a7d8f
A
1276 uprv_free(gDataDirectory);
1277 }
1278 gDataDirectory = newDataDir;
374ca955 1279 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
b75a7d8f
A
1280}
1281
374ca955 1282U_CAPI UBool U_EXPORT2
729e4ab9 1283uprv_pathIsAbsolute(const char *path)
374ca955 1284{
729e4ab9
A
1285 if(!path || !*path) {
1286 return FALSE;
374ca955
A
1287 }
1288
1289 if(*path == U_FILE_SEP_CHAR) {
1290 return TRUE;
1291 }
1292
1293#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1294 if(*path == U_FILE_ALT_SEP_CHAR) {
1295 return TRUE;
1296 }
1297#endif
1298
4388f060 1299#if U_PLATFORM_USES_ONLY_WIN32_API
374ca955
A
1300 if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1301 ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1302 path[1] == ':' ) {
1303 return TRUE;
1304 }
1305#endif
1306
1307 return FALSE;
1308}
1309
3d1f044b
A
1310/* Backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1311 (needed for some Darwin ICU build environments) */
1312#if U_PLATFORM_IS_DARWIN_BASED && TARGET_OS_SIMULATOR
729e4ab9
A
1313# if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1314# define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1315# endif
1316#endif
1317
340931cb 1318#if defined(ICU_DATA_DIR_WINDOWS)
3d1f044b
A
1319// Helper function to get the ICU Data Directory under the Windows directory location.
1320static BOOL U_CALLCONV getIcuDataDirectoryUnderWindowsDirectory(char* directoryBuffer, UINT bufferLength)
1321{
3d1f044b
A
1322 wchar_t windowsPath[MAX_PATH];
1323 char windowsPathUtf8[MAX_PATH];
1324
1325 UINT length = GetSystemWindowsDirectoryW(windowsPath, UPRV_LENGTHOF(windowsPath));
1326 if ((length > 0) && (length < (UPRV_LENGTHOF(windowsPath) - 1))) {
1327 // Convert UTF-16 to a UTF-8 string.
1328 UErrorCode status = U_ZERO_ERROR;
1329 int32_t windowsPathUtf8Len = 0;
1330 u_strToUTF8(windowsPathUtf8, static_cast<int32_t>(UPRV_LENGTHOF(windowsPathUtf8)),
1331 &windowsPathUtf8Len, reinterpret_cast<const UChar*>(windowsPath), -1, &status);
1332
1333 if (U_SUCCESS(status) && (status != U_STRING_NOT_TERMINATED_WARNING) &&
1334 (windowsPathUtf8Len < (UPRV_LENGTHOF(windowsPathUtf8) - 1))) {
1335 // Ensure it always has a separator, so we can append the ICU data path.
1336 if (windowsPathUtf8[windowsPathUtf8Len - 1] != U_FILE_SEP_CHAR) {
1337 windowsPathUtf8[windowsPathUtf8Len++] = U_FILE_SEP_CHAR;
1338 windowsPathUtf8[windowsPathUtf8Len] = '\0';
1339 }
1340 // Check if the concatenated string will fit.
1341 if ((windowsPathUtf8Len + UPRV_LENGTHOF(ICU_DATA_DIR_WINDOWS)) < bufferLength) {
1342 uprv_strcpy(directoryBuffer, windowsPathUtf8);
1343 uprv_strcat(directoryBuffer, ICU_DATA_DIR_WINDOWS);
1344 return TRUE;
1345 }
1346 }
1347 }
3d1f044b
A
1348
1349 return FALSE;
1350}
1351#endif
1352
b331163b
A
1353static void U_CALLCONV dataDirectoryInitFn() {
1354 /* If we already have the directory, then return immediately. Will happen if user called
1355 * u_setDataDirectory().
1356 */
1357 if (gDataDirectory) {
1358 return;
1359 }
1360
b75a7d8f 1361 const char *path = NULL;
729e4ab9 1362#if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
46f4442e
A
1363 char datadir_path_buffer[PATH_MAX];
1364#endif
b75a7d8f 1365
73c04bcf
A
1366 /*
1367 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1368 override ICU's data with the ICU_DATA environment variable. This prevents
1369 problems where multiple custom copies of ICU's specific version of data
1370 are installed on a system. Either the application must define the data
1371 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1372 ICU, set the data with udata_setCommonData or trust that all of the
1373 required data is contained in ICU's data library that contains
1374 the entry point defined by U_ICUDATA_ENTRY_POINT.
1375
1376 There may also be some platforms where environment variables
1377 are not allowed.
1378 */
1379# if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1380 /* First try to get the environment variable */
340931cb 1381# if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP does not support getenv
f3c0d7a5 1382 path=getenv("ICU_DATA");
340931cb 1383# endif
73c04bcf 1384# endif
b75a7d8f 1385
729e4ab9
A
1386 /* ICU_DATA_DIR may be set as a compile option.
1387 * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1388 * and is used only when data is built in archive mode eliminating the need
1389 * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1390 * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1391 * set their own path.
1392 */
1393#if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
b75a7d8f 1394 if(path==NULL || *path==0) {
729e4ab9
A
1395# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1396 const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
1397# endif
1398# ifdef ICU_DATA_DIR
b75a7d8f 1399 path=ICU_DATA_DIR;
729e4ab9
A
1400# else
1401 path=U_ICU_DATA_DEFAULT_DIR;
1402# endif
1403# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1404 if (prefix != NULL) {
1405 snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path);
46f4442e
A
1406 path=datadir_path_buffer;
1407 }
729e4ab9 1408# endif
b75a7d8f 1409 }
729e4ab9 1410#endif
b75a7d8f 1411
340931cb 1412#if defined(ICU_DATA_DIR_WINDOWS)
f3c0d7a5 1413 char datadir_path_buffer[MAX_PATH];
3d1f044b
A
1414 if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
1415 path = datadir_path_buffer;
f3c0d7a5
A
1416 }
1417#endif
1418
b75a7d8f
A
1419 if(path==NULL) {
1420 /* It looks really bad, set it to something. */
1421 path = "";
1422 }
1423
1424 u_setDataDirectory(path);
b331163b
A
1425 return;
1426}
1427
1428U_CAPI const char * U_EXPORT2
1429u_getDataDirectory(void) {
1430 umtx_initOnce(gDataDirInitOnce, &dataDirectoryInitFn);
b75a7d8f
A
1431 return gDataDirectory;
1432}
1433
b331163b
A
1434static void setTimeZoneFilesDir(const char *path, UErrorCode &status) {
1435 if (U_FAILURE(status)) {
1436 return;
1437 }
1438 gTimeZoneFilesDirectory->clear();
1439 gTimeZoneFilesDirectory->append(path, status);
1440#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1441 char *p = gTimeZoneFilesDirectory->data();
0f5d89e8 1442 while ((p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) != NULL) {
b331163b
A
1443 *p = U_FILE_SEP_CHAR;
1444 }
1445#endif
1446}
b75a7d8f 1447
2ca993e8
A
1448#if U_PLATFORM_IMPLEMENTS_POSIX
1449#include <sys/stat.h>
1450#if defined(U_TIMEZONE_FILES_DIR)
1451const char tzdirbuf[] = U_TIMEZONE_FILES_DIR;
1452enum { kTzfilenamebufLen = UPRV_LENGTHOF(tzdirbuf) + 24 }; // extra room for "/icutz44l.dat" or "/zoneinfo64.res"
1453#endif
1454#endif
1455
b331163b
A
1456#define TO_STRING(x) TO_STRING_2(x)
1457#define TO_STRING_2(x) #x
b75a7d8f 1458
b331163b
A
1459static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) {
1460 U_ASSERT(gTimeZoneFilesDirectory == NULL);
1461 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1462 gTimeZoneFilesDirectory = new CharString();
1463 if (gTimeZoneFilesDirectory == NULL) {
1464 status = U_MEMORY_ALLOCATION_ERROR;
1465 return;
1466 }
2ca993e8 1467 UBool usingUTzFilesDir = FALSE;
f3c0d7a5 1468 const char *dir = "";
3d1f044b 1469
340931cb
A
1470#if U_PLATFORM_HAS_WINUWP_API == 1
1471// The UWP version does not support the environment variable setting.
1472
1473# if defined(ICU_DATA_DIR_WINDOWS)
1474 // When using the Windows system data, we can possibly pick up time zone data from the Windows directory.
3d1f044b
A
1475 char datadir_path_buffer[MAX_PATH];
1476 if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
1477 dir = datadir_path_buffer;
1478 }
340931cb
A
1479# endif
1480
3d1f044b
A
1481#else
1482 dir = getenv("ICU_TIMEZONE_FILES_DIR");
f3c0d7a5 1483#endif // U_PLATFORM_HAS_WINUWP_API
3d1f044b 1484
b331163b
A
1485#if defined(U_TIMEZONE_FILES_DIR)
1486 if (dir == NULL) {
3d1f044b 1487 // Build time configuration setting.
2ca993e8
A
1488 // dir = TO_STRING(U_TIMEZONE_FILES_DIR);
1489 // Not sure why the above was done for this path only;
1490 // it preserves unwanted quotes.
1491 dir = tzdirbuf;
1492 usingUTzFilesDir = TRUE;
b331163b
A
1493 }
1494#endif
2ca993e8
A
1495#if U_PLATFORM_IMPLEMENTS_POSIX
1496 if (dir != NULL) {
1497 struct stat buf;
1498 if (stat(dir, &buf) != 0) {
1499 dir = NULL;
1500 }
1501#if defined(U_TIMEZONE_FILES_DIR)
1502 else if (usingUTzFilesDir) {
1503 char tzfilenamebuf[kTzfilenamebufLen];
1504 uprv_strcpy(tzfilenamebuf, tzdirbuf);
1505 uprv_strcat(tzfilenamebuf, U_FILE_SEP_STRING);
1506#if defined(U_TIMEZONE_PACKAGE)
1507 uprv_strcat(tzfilenamebuf, U_TIMEZONE_PACKAGE);
1508 uprv_strcat(tzfilenamebuf, ".dat");
1509#else
1510 uprv_strcat(tzfilenamebuf, "zoneinfo64.res");
1511#endif
1512 if (stat(tzfilenamebuf, &buf) != 0) {
1513 dir = NULL;
1514 }
1515 }
1516#endif /* defined(U_TIMEZONE_FILES_DIR) */
1517 }
1518#endif /* U_PLATFORM_IMPLEMENTS_POSIX */
b331163b
A
1519 if (dir == NULL) {
1520 dir = "";
1521 }
3d1f044b 1522
b331163b
A
1523 setTimeZoneFilesDir(dir, status);
1524}
b75a7d8f
A
1525
1526
b331163b
A
1527U_CAPI const char * U_EXPORT2
1528u_getTimeZoneFilesDirectory(UErrorCode *status) {
1529 umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1530 return U_SUCCESS(*status) ? gTimeZoneFilesDirectory->data() : "";
1531}
1532
1533U_CAPI void U_EXPORT2
1534u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status) {
1535 umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1536 setTimeZoneFilesDir(path, *status);
1537
1538 // Note: this function does some extra churn, first setting based on the
1539 // environment, then immediately replacing with the value passed in.
1540 // The logic is simpler that way, and performance shouldn't be an issue.
1541}
b75a7d8f 1542
b75a7d8f
A
1543
1544#if U_POSIX_LOCALE
729e4ab9
A
1545/* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1546 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1547 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1548 */
1549static const char *uprv_getPOSIXIDForCategory(int category)
b75a7d8f 1550{
729e4ab9
A
1551 const char* posixID = NULL;
1552 if (category == LC_MESSAGES || category == LC_CTYPE) {
73c04bcf 1553 /*
729e4ab9 1554 * On Solaris two different calls to setlocale can result in
73c04bcf
A
1555 * different values. Only get this value once.
1556 *
1557 * We must check this first because an application can set this.
1558 *
1559 * LC_ALL can't be used because it's platform dependent. The LANG
1560 * environment variable seems to affect LC_CTYPE variable by default.
1561 * Here is what setlocale(LC_ALL, NULL) can return.
1562 * HPUX can return 'C C C C C C C'
1563 * Solaris can return /en_US/C/C/C/C/C on the second try.
1564 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1565 *
1566 * The default codepage detection also needs to use LC_CTYPE.
729e4ab9 1567 *
73c04bcf
A
1568 * Do not call setlocale(LC_*, "")! Using an empty string instead
1569 * of NULL, will modify the libc behavior.
1570 */
729e4ab9 1571 posixID = setlocale(category, NULL);
73c04bcf
A
1572 if ((posixID == 0)
1573 || (uprv_strcmp("C", posixID) == 0)
1574 || (uprv_strcmp("POSIX", posixID) == 0))
1575 {
1576 /* Maybe we got some garbage. Try something more reasonable */
1577 posixID = getenv("LC_ALL");
2ca993e8
A
1578 /* Solaris speaks POSIX - See IEEE Std 1003.1-2008
1579 * This is needed to properly handle empty env. variables
1580 */
1581#if U_PLATFORM == U_PF_SOLARIS
1582 if ((posixID == 0) || (posixID[0] == '\0')) {
1583 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1584 if ((posixID == 0) || (posixID[0] == '\0')) {
1585#else
b75a7d8f 1586 if (posixID == 0) {
729e4ab9 1587 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
73c04bcf 1588 if (posixID == 0) {
2ca993e8 1589#endif
73c04bcf
A
1590 posixID = getenv("LANG");
1591 }
b75a7d8f
A
1592 }
1593 }
729e4ab9
A
1594 }
1595 if ((posixID==0)
1596 || (uprv_strcmp("C", posixID) == 0)
1597 || (uprv_strcmp("POSIX", posixID) == 0))
1598 {
1599 /* Nothing worked. Give it a nice POSIX default value. */
1600 posixID = "en_US_POSIX";
340931cb
A
1601 // Note: this test will not catch 'C.UTF-8',
1602 // that will be handled in uprv_getDefaultLocaleID().
1603 // Leave this mapping here for the uprv_getPOSIXIDForDefaultCodepage()
1604 // caller which expects to see "en_US_POSIX" in many branches.
729e4ab9
A
1605 }
1606 return posixID;
1607}
b75a7d8f 1608
729e4ab9
A
1609/* Return just the POSIX id for the default locale, whatever happens to be in
1610 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1611 */
1612static const char *uprv_getPOSIXIDForDefaultLocale(void)
1613{
1614 static const char* posixID = NULL;
1615 if (posixID == 0) {
1616 posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
b75a7d8f 1617 }
729e4ab9
A
1618 return posixID;
1619}
73c04bcf 1620
51004dcb 1621#if !U_CHARSET_IS_UTF8
729e4ab9
A
1622/* Return just the POSIX id for the default codepage, whatever happens to be in
1623 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1624 */
1625static const char *uprv_getPOSIXIDForDefaultCodepage(void)
1626{
1627 static const char* posixID = NULL;
1628 if (posixID == 0) {
1629 posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
1630 }
b75a7d8f
A
1631 return posixID;
1632}
1633#endif
51004dcb 1634#endif
b75a7d8f
A
1635
1636/* NOTE: The caller should handle thread safety */
1637U_CAPI const char* U_EXPORT2
1638uprv_getDefaultLocaleID()
1639{
1640#if U_POSIX_LOCALE
1641/*
1642 Note that: (a '!' means the ID is improper somehow)
1643 LC_ALL ----> default_loc codepage
1644--------------------------------------------------------
1645 ab.CD ab CD
1646 ab@CD ab__CD -
1647 ab@CD.EF ab__CD EF
1648
1649 ab_CD.EF@GH ab_CD_GH EF
1650
1651Some 'improper' ways to do the same as above:
1652 ! ab_CD@GH.EF ab_CD_GH EF
1653 ! ab_CD.EF@GH.IJ ab_CD_GH EF
1654 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
1655
1656 _CD@GH _CD_GH -
1657 _CD.EF@GH _CD_GH EF
1658
1659The variant cannot have dots in it.
1660The 'rightmost' variant (@xxx) wins.
1661The leftmost codepage (.xxx) wins.
1662*/
729e4ab9 1663 const char* posixID = uprv_getPOSIXIDForDefaultLocale();
b75a7d8f
A
1664
1665 /* Format: (no spaces)
1666 ll [ _CC ] [ . MM ] [ @ VV]
1667
1668 l = lang, C = ctry, M = charmap, V = variant
1669 */
1670
3d1f044b 1671 if (gCorrectedPOSIXLocale != nullptr) {
729e4ab9 1672 return gCorrectedPOSIXLocale;
b75a7d8f
A
1673 }
1674
3d1f044b 1675 // Copy the ID into owned memory.
340931cb
A
1676 // Over-allocate in case we replace "C" with "en_US_POSIX" (+10), + null termination
1677 char *correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID) + 10 + 1));
3d1f044b
A
1678 if (correctedPOSIXLocale == nullptr) {
1679 return nullptr;
1680 }
1681 uprv_strcpy(correctedPOSIXLocale, posixID);
b75a7d8f 1682
3d1f044b
A
1683 char *limit;
1684 if ((limit = uprv_strchr(correctedPOSIXLocale, '.')) != nullptr) {
1685 *limit = 0;
340931cb
A
1686 }
1687 if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) {
1688 *limit = 0;
b75a7d8f
A
1689 }
1690
340931cb
A
1691 if ((uprv_strcmp("C", correctedPOSIXLocale) == 0) // no @ variant
1692 || (uprv_strcmp("POSIX", correctedPOSIXLocale) == 0)) {
1693 // Raw input was C.* or POSIX.*, Give it a nice POSIX default value.
1694 // (The "C"/"POSIX" case is handled in uprv_getPOSIXIDForCategory())
1695 uprv_strcpy(correctedPOSIXLocale, "en_US_POSIX");
1696 }
1697
b75a7d8f 1698 /* Note that we scan the *uncorrected* ID. */
3d1f044b
A
1699 const char *p;
1700 if ((p = uprv_strrchr(posixID, '@')) != nullptr) {
b75a7d8f
A
1701 p++;
1702
1703 /* Take care of any special cases here.. */
1704 if (!uprv_strcmp(p, "nynorsk")) {
1705 p = "NY";
73c04bcf 1706 /* Don't worry about no__NY. In practice, it won't appear. */
b75a7d8f
A
1707 }
1708
3d1f044b 1709 if (uprv_strchr(correctedPOSIXLocale,'_') == nullptr) {
cecc3f93 1710 uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b (note this can make the new locale 1 char longer) */
b75a7d8f
A
1711 }
1712 else {
1713 uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1714 }
1715
3d1f044b
A
1716 const char *q;
1717 if ((q = uprv_strchr(p, '.')) != nullptr) {
b75a7d8f 1718 /* How big will the resulting string be? */
3d1f044b 1719 int32_t len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
340931cb 1720 uprv_strncat(correctedPOSIXLocale, p, q-p); // do not include charset
b75a7d8f
A
1721 correctedPOSIXLocale[len] = 0;
1722 }
1723 else {
1724 /* Anything following the @ sign */
1725 uprv_strcat(correctedPOSIXLocale, p);
1726 }
1727
1728 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1729 * How about 'russian' -> 'ru'?
73c04bcf
A
1730 * Many of the other locales using ISO codes will be handled by the
1731 * canonicalization functions in uloc_getDefault.
b75a7d8f
A
1732 */
1733 }
1734
3d1f044b 1735 if (gCorrectedPOSIXLocale == nullptr) {
b75a7d8f 1736 gCorrectedPOSIXLocale = correctedPOSIXLocale;
f3c0d7a5 1737 gCorrectedPOSIXLocaleHeapAllocated = true;
374ca955 1738 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
3d1f044b 1739 correctedPOSIXLocale = nullptr;
b75a7d8f 1740 }
3d1f044b 1741 posixID = gCorrectedPOSIXLocale;
b75a7d8f 1742
3d1f044b 1743 if (correctedPOSIXLocale != nullptr) { /* Was already set - clean up. */
729e4ab9 1744 uprv_free(correctedPOSIXLocale);
b75a7d8f
A
1745 }
1746
1747 return posixID;
1748
4388f060 1749#elif U_PLATFORM_USES_ONLY_WIN32_API
57a6839d 1750#define POSIX_LOCALE_CAPACITY 64
b75a7d8f 1751 UErrorCode status = U_ZERO_ERROR;
3d1f044b 1752 char *correctedPOSIXLocale = nullptr;
57a6839d 1753
f3c0d7a5 1754 // If we have already figured this out just use the cached value
3d1f044b 1755 if (gCorrectedPOSIXLocale != nullptr) {
57a6839d
A
1756 return gCorrectedPOSIXLocale;
1757 }
1758
f3c0d7a5 1759 // No cached value, need to determine the current value
3d1f044b
A
1760 static WCHAR windowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
1761 int length = GetLocaleInfoEx(LOCALE_NAME_USER_DEFAULT, LOCALE_SNAME, windowsLocale, LOCALE_NAME_MAX_LENGTH);
f3c0d7a5 1762
3d1f044b
A
1763 // Now we should have a Windows locale name that needs converted to the POSIX style.
1764 if (length > 0) // If length is 0, then the GetLocaleInfoEx failed.
f3c0d7a5
A
1765 {
1766 // First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.)
3d1f044b 1767 char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
f3c0d7a5
A
1768
1769 int32_t i;
1770 for (i = 0; i < UPRV_LENGTHOF(modifiedWindowsLocale); i++)
1771 {
1772 if (windowsLocale[i] == '_')
1773 {
1774 modifiedWindowsLocale[i] = '-';
1775 }
1776 else
1777 {
1778 modifiedWindowsLocale[i] = static_cast<char>(windowsLocale[i]);
1779 }
1780
1781 if (modifiedWindowsLocale[i] == '\0')
1782 {
1783 break;
1784 }
1785 }
1786
1787 if (i >= UPRV_LENGTHOF(modifiedWindowsLocale))
1788 {
1789 // Ran out of room, can't really happen, maybe we'll be lucky about a matching
1790 // locale when tags are dropped
1791 modifiedWindowsLocale[UPRV_LENGTHOF(modifiedWindowsLocale) - 1] = '\0';
1792 }
1793
1794 // Now normalize the resulting name
0f5d89e8
A
1795 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1));
1796 /* TODO: Should we just exit on memory allocation failure? */
f3c0d7a5
A
1797 if (correctedPOSIXLocale)
1798 {
1799 int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status);
1800 if (U_SUCCESS(status))
1801 {
1802 *(correctedPOSIXLocale + posixLen) = 0;
1803 gCorrectedPOSIXLocale = correctedPOSIXLocale;
1804 gCorrectedPOSIXLocaleHeapAllocated = true;
1805 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1806 }
1807 else
1808 {
1809 uprv_free(correctedPOSIXLocale);
1810 }
1811 }
1812 }
1813
1814 // If unable to find a locale we can agree upon, use en-US by default
3d1f044b 1815 if (gCorrectedPOSIXLocale == nullptr) {
f3c0d7a5 1816 gCorrectedPOSIXLocale = "en_US";
b75a7d8f 1817 }
57a6839d 1818 return gCorrectedPOSIXLocale;
b75a7d8f 1819
4388f060 1820#elif U_PLATFORM == U_PF_OS400
b75a7d8f
A
1821 /* locales are process scoped and are by definition thread safe */
1822 static char correctedLocale[64];
1823 const char *localeID = getenv("LC_ALL");
1824 char *p;
1825
1826 if (localeID == NULL)
1827 localeID = getenv("LANG");
1828 if (localeID == NULL)
1829 localeID = setlocale(LC_ALL, NULL);
1830 /* Make sure we have something... */
1831 if (localeID == NULL)
1832 return "en_US_POSIX";
1833
1834 /* Extract the locale name from the path. */
1835 if((p = uprv_strrchr(localeID, '/')) != NULL)
1836 {
1837 /* Increment p to start of locale name. */
1838 p++;
1839 localeID = p;
1840 }
1841
1842 /* Copy to work location. */
1843 uprv_strcpy(correctedLocale, localeID);
1844
1845 /* Strip off the '.locale' extension. */
1846 if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1847 *p = 0;
1848 }
1849
1850 /* Upper case the locale name. */
1851 T_CString_toUpperCase(correctedLocale);
1852
1853 /* See if we are using the POSIX locale. Any of the
1854 * following are equivalent and use the same QLGPGCMA
1855 * (POSIX) locale.
73c04bcf
A
1856 * QLGPGCMA2 means UCS2
1857 * QLGPGCMA_4 means UTF-32
1858 * QLGPGCMA_8 means UTF-8
b75a7d8f
A
1859 */
1860 if ((uprv_strcmp("C", correctedLocale) == 0) ||
1861 (uprv_strcmp("POSIX", correctedLocale) == 0) ||
73c04bcf 1862 (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
b75a7d8f
A
1863 {
1864 uprv_strcpy(correctedLocale, "en_US_POSIX");
1865 }
1866 else
1867 {
1868 int16_t LocaleLen;
1869
1870 /* Lower case the lang portion. */
1871 for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1872 {
1873 *p = uprv_tolower(*p);
1874 }
1875
1876 /* Adjust for Euro. After '_E' add 'URO'. */
1877 LocaleLen = uprv_strlen(correctedLocale);
1878 if (correctedLocale[LocaleLen - 2] == '_' &&
1879 correctedLocale[LocaleLen - 1] == 'E')
1880 {
1881 uprv_strcat(correctedLocale, "URO");
1882 }
1883
1884 /* If using Lotus-based locale then convert to
1885 * equivalent non Lotus.
1886 */
1887 else if (correctedLocale[LocaleLen - 2] == '_' &&
1888 correctedLocale[LocaleLen - 1] == 'L')
1889 {
1890 correctedLocale[LocaleLen - 2] = 0;
1891 }
1892
1893 /* There are separate simplified and traditional
1894 * locales called zh_HK_S and zh_HK_T.
1895 */
1896 else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1897 {
1898 uprv_strcpy(correctedLocale, "zh_HK");
1899 }
1900
1901 /* A special zh_CN_GBK locale...
1902 */
1903 else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1904 {
1905 uprv_strcpy(correctedLocale, "zh_CN");
1906 }
1907
1908 }
1909
1910 return correctedLocale;
1911#endif
1912
1913}
1914
729e4ab9 1915#if !U_CHARSET_IS_UTF8
73c04bcf
A
1916#if U_POSIX_LOCALE
1917/*
1918Due to various platform differences, one platform may specify a charset,
1919when they really mean a different charset. Remap the names so that they are
46f4442e
A
1920compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1921here. Before adding anything to this function, please consider adding unique
1922names to the ICU alias table in the data directory.
73c04bcf
A
1923*/
1924static const char*
1925remapPlatformDependentCodepage(const char *locale, const char *name) {
1926 if (locale != NULL && *locale == 0) {
1927 /* Make sure that an empty locale is handled the same way. */
1928 locale = NULL;
1929 }
1930 if (name == NULL) {
1931 return NULL;
1932 }
4388f060 1933#if U_PLATFORM == U_PF_AIX
73c04bcf
A
1934 if (uprv_strcmp(name, "IBM-943") == 0) {
1935 /* Use the ASCII compatible ibm-943 */
1936 name = "Shift-JIS";
1937 }
1938 else if (uprv_strcmp(name, "IBM-1252") == 0) {
1939 /* Use the windows-1252 that contains the Euro */
1940 name = "IBM-5348";
1941 }
4388f060 1942#elif U_PLATFORM == U_PF_SOLARIS
73c04bcf
A
1943 if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1944 /* Solaris underspecifies the "EUC" name. */
1945 if (uprv_strcmp(locale, "zh_CN") == 0) {
1946 name = "EUC-CN";
1947 }
1948 else if (uprv_strcmp(locale, "zh_TW") == 0) {
1949 name = "EUC-TW";
1950 }
1951 else if (uprv_strcmp(locale, "ko_KR") == 0) {
1952 name = "EUC-KR";
1953 }
1954 }
46f4442e
A
1955 else if (uprv_strcmp(name, "eucJP") == 0) {
1956 /*
1957 ibm-954 is the best match.
1958 ibm-33722 is the default for eucJP (similar to Windows).
1959 */
1960 name = "eucjis";
1961 }
1962 else if (uprv_strcmp(name, "646") == 0) {
1963 /*
729e4ab9 1964 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
46f4442e
A
1965 * ISO-8859-1 instead of US-ASCII(646).
1966 */
1967 name = "ISO-8859-1";
1968 }
4388f060 1969#elif U_PLATFORM_IS_DARWIN_BASED
73c04bcf
A
1970 if (locale == NULL && *name == 0) {
1971 /*
1972 No locale was specified, and an empty name was passed in.
1973 This usually indicates that nl_langinfo didn't return valid information.
1974 Mac OS X uses UTF-8 by default (especially the locale data and console).
1975 */
1976 name = "UTF-8";
1977 }
729e4ab9
A
1978 else if (uprv_strcmp(name, "CP949") == 0) {
1979 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1980 name = "EUC-KR";
1981 }
1982 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
1983 /*
1984 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1985 */
1986 name = "UTF-8";
1987 }
4388f060 1988#elif U_PLATFORM == U_PF_BSD
729e4ab9
A
1989 if (uprv_strcmp(name, "CP949") == 0) {
1990 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1991 name = "EUC-KR";
1992 }
4388f060 1993#elif U_PLATFORM == U_PF_HPUX
46f4442e
A
1994 if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
1995 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
1996 /* zh_TW.big5 is not the same charset as zh_HK.big5! */
1997 name = "hkbig5";
1998 }
1999 else if (uprv_strcmp(name, "eucJP") == 0) {
2000 /*
2001 ibm-1350 is the best match, but unavailable.
2002 ibm-954 is mostly a superset of ibm-1350.
2003 ibm-33722 is the default for eucJP (similar to Windows).
2004 */
2005 name = "eucjis";
2006 }
4388f060 2007#elif U_PLATFORM == U_PF_LINUX
46f4442e
A
2008 if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
2009 /* Linux underspecifies the "EUC" name. */
2010 if (uprv_strcmp(locale, "korean") == 0) {
2011 name = "EUC-KR";
2012 }
2013 else if (uprv_strcmp(locale, "japanese") == 0) {
2014 /* See comment below about eucJP */
2015 name = "eucjis";
2016 }
2017 }
2018 else if (uprv_strcmp(name, "eucjp") == 0) {
2019 /*
2020 ibm-1350 is the best match, but unavailable.
2021 ibm-954 is mostly a superset of ibm-1350.
2022 ibm-33722 is the default for eucJP (similar to Windows).
2023 */
2024 name = "eucjis";
2025 }
729e4ab9
A
2026 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
2027 (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
2028 /*
2029 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
2030 */
2031 name = "UTF-8";
2032 }
2033 /*
2034 * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
2035 * it by falling back to 'US-ASCII' when NULL is returned from this
2036 * function. So, we don't have to worry about it here.
2037 */
73c04bcf
A
2038#endif
2039 /* return NULL when "" is passed in */
2040 if (*name == 0) {
2041 name = NULL;
2042 }
2043 return name;
2044}
2045
729e4ab9 2046static const char*
73c04bcf
A
2047getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
2048{
2049 char localeBuf[100];
2050 const char *name = NULL;
2051 char *variant = NULL;
2052
2053 if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
2054 size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
2055 uprv_strncpy(localeBuf, localeName, localeCapacity);
2056 localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
2057 name = uprv_strncpy(buffer, name+1, buffCapacity);
2058 buffer[buffCapacity-1] = 0; /* ensure NULL termination */
4388f060 2059 if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != NULL) {
73c04bcf
A
2060 *variant = 0;
2061 }
2062 name = remapPlatformDependentCodepage(localeBuf, name);
2063 }
2064 return name;
2065}
2066#endif
374ca955 2067
729e4ab9 2068static const char*
374ca955 2069int_getDefaultCodepage()
b75a7d8f 2070{
4388f060 2071#if U_PLATFORM == U_PF_OS400
b75a7d8f
A
2072 uint32_t ccsid = 37; /* Default to ibm-37 */
2073 static char codepage[64];
2074 Qwc_JOBI0400_t jobinfo;
2075 Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
2076
2077 EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
2078 "* ", " ", &error);
2079
2080 if (error.Bytes_Available == 0) {
2081 if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
2082 ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
2083 }
2084 else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
2085 ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
2086 }
2087 /* else use the default */
2088 }
2089 sprintf(codepage,"ibm-%d", ccsid);
2090 return codepage;
2091
4388f060 2092#elif U_PLATFORM == U_PF_OS390
b75a7d8f 2093 static char codepage[64];
729e4ab9
A
2094
2095 strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
2096 strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
46f4442e 2097 codepage[63] = 0; /* NULL terminate */
729e4ab9 2098
b75a7d8f
A
2099 return codepage;
2100
4388f060 2101#elif U_PLATFORM_USES_ONLY_WIN32_API
b75a7d8f 2102 static char codepage[64];
f3c0d7a5
A
2103 DWORD codepageNumber = 0;
2104
340931cb 2105#if U_PLATFORM_HAS_WINUWP_API == 1
f3c0d7a5
A
2106 // UWP doesn't have a direct API to get the default ACP as Microsoft would rather
2107 // have folks use Unicode than a "system" code page, however this is the same
2108 // codepage as the system default locale codepage. (FWIW, the system locale is
2109 // ONLY used for codepage, it should never be used for anything else)
2110 GetLocaleInfoEx(LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
2111 (LPWSTR)&codepageNumber, sizeof(codepageNumber) / sizeof(WCHAR));
2112#else
2113 // Win32 apps can call GetACP
2114 codepageNumber = GetACP();
2115#endif
2116 // Special case for UTF-8
2117 if (codepageNumber == 65001)
2118 {
2119 return "UTF-8";
2120 }
2121 // Windows codepages can look like windows-1252, so format the found number
2122 // the numbers are eclectic, however all valid system code pages, besides UTF-8
2123 // are between 3 and 19999
2124 if (codepageNumber > 0 && codepageNumber < 20000)
2125 {
2126 sprintf(codepage, "windows-%ld", codepageNumber);
2127 return codepage;
2128 }
2129 // If the codepage number call failed then return UTF-8
2130 return "UTF-8";
b75a7d8f
A
2131
2132#elif U_POSIX_LOCALE
2133 static char codesetName[100];
b75a7d8f 2134 const char *localeName = NULL;
73c04bcf 2135 const char *name = NULL;
b75a7d8f 2136
729e4ab9 2137 localeName = uprv_getPOSIXIDForDefaultCodepage();
b75a7d8f 2138 uprv_memset(codesetName, 0, sizeof(codesetName));
2ca993e8
A
2139 /* On Solaris nl_langinfo returns C locale values unless setlocale
2140 * was called earlier.
2141 */
2142#if (U_HAVE_NL_LANGINFO_CODESET && U_PLATFORM != U_PF_SOLARIS)
729e4ab9
A
2143 /* When available, check nl_langinfo first because it usually gives more
2144 useful names. It depends on LC_CTYPE.
73c04bcf 2145 nl_langinfo may use the same buffer as setlocale. */
b75a7d8f
A
2146 {
2147 const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
4388f060 2148#if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED
729e4ab9
A
2149 /*
2150 * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
2151 * instead of ASCII.
2152 */
2153 if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
2154 codeset = remapPlatformDependentCodepage(localeName, codeset);
2155 } else
2156#endif
2157 {
2158 codeset = remapPlatformDependentCodepage(NULL, codeset);
2159 }
2160
b75a7d8f
A
2161 if (codeset != NULL) {
2162 uprv_strncpy(codesetName, codeset, sizeof(codesetName));
2163 codesetName[sizeof(codesetName)-1] = 0;
374ca955 2164 return codesetName;
b75a7d8f
A
2165 }
2166 }
2167#endif
374ca955 2168
729e4ab9
A
2169 /* Use setlocale in a nice way, and then check some environment variables.
2170 Maybe the application used setlocale already.
2171 */
2172 uprv_memset(codesetName, 0, sizeof(codesetName));
2173 name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
2174 if (name) {
2175 /* if we can find the codeset name from setlocale, return that. */
2176 return name;
2177 }
2178
374ca955
A
2179 if (*codesetName == 0)
2180 {
73c04bcf 2181 /* Everything failed. Return US ASCII (ISO 646). */
46f4442e 2182 (void)uprv_strcpy(codesetName, "US-ASCII");
374ca955 2183 }
b75a7d8f
A
2184 return codesetName;
2185#else
2186 return "US-ASCII";
2187#endif
2188}
2189
b75a7d8f 2190
374ca955
A
2191U_CAPI const char* U_EXPORT2
2192uprv_getDefaultCodepage()
2193{
2194 static char const *name = NULL;
2195 umtx_lock(NULL);
2196 if (name == NULL) {
2197 name = int_getDefaultCodepage();
b75a7d8f 2198 }
374ca955
A
2199 umtx_unlock(NULL);
2200 return name;
b75a7d8f 2201}
729e4ab9 2202#endif /* !U_CHARSET_IS_UTF8 */
b75a7d8f 2203
b75a7d8f 2204
374ca955
A
2205/* end of platform-specific implementation -------------- */
2206
2207/* version handling --------------------------------------------------------- */
b75a7d8f
A
2208
2209U_CAPI void U_EXPORT2
2210u_versionFromString(UVersionInfo versionArray, const char *versionString) {
2211 char *end;
2212 uint16_t part=0;
2213
2214 if(versionArray==NULL) {
2215 return;
2216 }
2217
2218 if(versionString!=NULL) {
2219 for(;;) {
2220 versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
2221 if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
2222 break;
2223 }
2224 versionString=end+1;
2225 }
2226 }
2227
2228 while(part<U_MAX_VERSION_LENGTH) {
2229 versionArray[part++]=0;
2230 }
2231}
2232
729e4ab9
A
2233U_CAPI void U_EXPORT2
2234u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
2235 if(versionArray!=NULL && versionString!=NULL) {
2236 char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
2237 int32_t len = u_strlen(versionString);
2238 if(len>U_MAX_VERSION_STRING_LENGTH) {
2239 len = U_MAX_VERSION_STRING_LENGTH;
2240 }
2241 u_UCharsToChars(versionString, versionChars, len);
2242 versionChars[len]=0;
2243 u_versionFromString(versionArray, versionChars);
2244 }
2245}
2246
b75a7d8f 2247U_CAPI void U_EXPORT2
4388f060 2248u_versionToString(const UVersionInfo versionArray, char *versionString) {
b75a7d8f
A
2249 uint16_t count, part;
2250 uint8_t field;
2251
2252 if(versionString==NULL) {
2253 return;
2254 }
2255
2256 if(versionArray==NULL) {
2257 versionString[0]=0;
2258 return;
2259 }
2260
2261 /* count how many fields need to be written */
2262 for(count=4; count>0 && versionArray[count-1]==0; --count) {
2263 }
2264
2265 if(count <= 1) {
2266 count = 2;
2267 }
2268
2269 /* write the first part */
2270 /* write the decimal field value */
2271 field=versionArray[0];
2272 if(field>=100) {
2273 *versionString++=(char)('0'+field/100);
2274 field%=100;
2275 }
2276 if(field>=10) {
2277 *versionString++=(char)('0'+field/10);
2278 field%=10;
2279 }
2280 *versionString++=(char)('0'+field);
2281
2282 /* write the following parts */
2283 for(part=1; part<count; ++part) {
2284 /* write a dot first */
2285 *versionString++=U_VERSION_DELIMITER;
2286
2287 /* write the decimal field value */
2288 field=versionArray[part];
2289 if(field>=100) {
2290 *versionString++=(char)('0'+field/100);
2291 field%=100;
2292 }
2293 if(field>=10) {
2294 *versionString++=(char)('0'+field/10);
2295 field%=10;
2296 }
2297 *versionString++=(char)('0'+field);
2298 }
2299
2300 /* NUL-terminate */
2301 *versionString=0;
2302}
2303
2304U_CAPI void U_EXPORT2
2305u_getVersion(UVersionInfo versionArray) {
57a6839d 2306 (void)copyright; // Suppress unused variable warning from clang.
b75a7d8f
A
2307 u_versionFromString(versionArray, U_ICU_VERSION);
2308}
2309
729e4ab9
A
2310/**
2311 * icucfg.h dependent code
2312 */
2313
0f5d89e8 2314#if U_ENABLE_DYLOAD && HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
729e4ab9 2315
4388f060 2316#if HAVE_DLFCN_H
729e4ab9
A
2317#ifdef __MVS__
2318#ifndef __SUSV3
2319#define __SUSV3 1
2320#endif
2321#endif
2322#include <dlfcn.h>
0f5d89e8 2323#endif /* HAVE_DLFCN_H */
729e4ab9
A
2324
2325U_INTERNAL void * U_EXPORT2
2326uprv_dl_open(const char *libName, UErrorCode *status) {
2327 void *ret = NULL;
2328 if(U_FAILURE(*status)) return ret;
2329 ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
2330 if(ret==NULL) {
4388f060
A
2331#ifdef U_TRACE_DYLOAD
2332 printf("dlerror on dlopen(%s): %s\n", libName, dlerror());
729e4ab9
A
2333#endif
2334 *status = U_MISSING_RESOURCE_ERROR;
2335 }
2336 return ret;
2337}
2338
2339U_INTERNAL void U_EXPORT2
2340uprv_dl_close(void *lib, UErrorCode *status) {
2341 if(U_FAILURE(*status)) return;
2342 dlclose(lib);
2343}
2344
4388f060
A
2345U_INTERNAL UVoidFunction* U_EXPORT2
2346uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2347 union {
2348 UVoidFunction *fp;
2349 void *vp;
2350 } uret;
2351 uret.fp = NULL;
2352 if(U_FAILURE(*status)) return uret.fp;
2353 uret.vp = dlsym(lib, sym);
2354 if(uret.vp == NULL) {
2355#ifdef U_TRACE_DYLOAD
2356 printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror());
2357#endif
729e4ab9
A
2358 *status = U_MISSING_RESOURCE_ERROR;
2359 }
4388f060 2360 return uret.fp;
729e4ab9
A
2361}
2362
0f5d89e8 2363#elif U_ENABLE_DYLOAD && U_PLATFORM_USES_ONLY_WIN32_API && !U_PLATFORM_HAS_WINUWP_API
729e4ab9 2364
0f5d89e8
A
2365/* Windows API implementation. */
2366// Note: UWP does not expose/allow these APIs, so the UWP version gets the null implementation. */
729e4ab9
A
2367
2368U_INTERNAL void * U_EXPORT2
2369uprv_dl_open(const char *libName, UErrorCode *status) {
2370 HMODULE lib = NULL;
2371
2372 if(U_FAILURE(*status)) return NULL;
2373
4388f060 2374 lib = LoadLibraryA(libName);
729e4ab9
A
2375
2376 if(lib==NULL) {
2377 *status = U_MISSING_RESOURCE_ERROR;
2378 }
2379
2380 return (void*)lib;
2381}
2382
2383U_INTERNAL void U_EXPORT2
2384uprv_dl_close(void *lib, UErrorCode *status) {
2385 HMODULE handle = (HMODULE)lib;
2386 if(U_FAILURE(*status)) return;
2387
2388 FreeLibrary(handle);
2389
2390 return;
2391}
2392
4388f060
A
2393U_INTERNAL UVoidFunction* U_EXPORT2
2394uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
729e4ab9 2395 HMODULE handle = (HMODULE)lib;
4388f060 2396 UVoidFunction* addr = NULL;
729e4ab9
A
2397
2398 if(U_FAILURE(*status) || lib==NULL) return NULL;
2399
4388f060 2400 addr = (UVoidFunction*)GetProcAddress(handle, sym);
729e4ab9
A
2401
2402 if(addr==NULL) {
2403 DWORD lastError = GetLastError();
2404 if(lastError == ERROR_PROC_NOT_FOUND) {
2405 *status = U_MISSING_RESOURCE_ERROR;
2406 } else {
2407 *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
2408 }
2409 }
2410
2411 return addr;
2412}
2413
729e4ab9
A
2414#else
2415
0f5d89e8 2416/* No dynamic loading, null (nonexistent) implementation. */
729e4ab9
A
2417
2418U_INTERNAL void * U_EXPORT2
2419uprv_dl_open(const char *libName, UErrorCode *status) {
2ca993e8 2420 (void)libName;
729e4ab9
A
2421 if(U_FAILURE(*status)) return NULL;
2422 *status = U_UNSUPPORTED_ERROR;
2423 return NULL;
2424}
2425
2426U_INTERNAL void U_EXPORT2
2427uprv_dl_close(void *lib, UErrorCode *status) {
2ca993e8 2428 (void)lib;
729e4ab9
A
2429 if(U_FAILURE(*status)) return;
2430 *status = U_UNSUPPORTED_ERROR;
2431 return;
2432}
2433
4388f060
A
2434U_INTERNAL UVoidFunction* U_EXPORT2
2435uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2ca993e8
A
2436 (void)lib;
2437 (void)sym;
4388f060 2438 if(U_SUCCESS(*status)) {
729e4ab9 2439 *status = U_UNSUPPORTED_ERROR;
4388f060
A
2440 }
2441 return (UVoidFunction*)NULL;
729e4ab9
A
2442}
2443
0f5d89e8 2444#endif
729e4ab9 2445
b75a7d8f
A
2446/*
2447 * Hey, Emacs, please set the following:
2448 *
2449 * Local Variables:
2450 * indent-tabs-mode: nil
2451 * End:
2452 *
2453 */