]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/putil.cpp
ICU-59173.0.1.tar.gz
[apple/icu.git] / icuSources / common / putil.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/*
4******************************************************************************
5*
2ca993e8 6* Copyright (C) 1997-2016, International Business Machines
b75a7d8f
A
7* Corporation and others. All Rights Reserved.
8*
9******************************************************************************
10*
11* FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
12*
13* Date Name Description
14* 04/14/97 aliu Creation.
15* 04/24/97 aliu Added getDefaultDataDirectory() and
16* getDefaultLocaleID().
17* 04/28/97 aliu Rewritten to assume Unix and apply general methods
18* for assumed case. Non-UNIX platforms must be
19* special-cased. Rewrote numeric methods dealing
20* with NaN and Infinity to be platform independent
21* over all IEEE 754 platforms.
22* 05/13/97 aliu Restored sign of timezone
23* (semantics are hours West of GMT)
24* 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
25* nextDouble..
26* 07/22/98 stephen Added remainder, max, min, trunc
27* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
28* 08/24/98 stephen Added longBitsFromDouble
29* 09/08/98 stephen Minor changes for Mac Port
30* 03/02/99 stephen Removed openFile(). Added AS400 support.
31* Fixed EBCDIC tables
32* 04/15/99 stephen Converted to C.
33* 06/28/99 stephen Removed mutex locking in u_isBigEndian().
34* 08/04/99 jeffrey R. Added OS/2 changes
35* 11/15/99 helena Integrated S/390 IEEE support.
36* 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
37* 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
46f4442e 38* 01/03/08 Steven L. Fake Time Support
b75a7d8f
A
39******************************************************************************
40*/
41
4388f060
A
42// Defines _XOPEN_SOURCE for access to POSIX functions.
43// Must be before any other #includes.
44#include "uposixdefs.h"
b75a7d8f 45
f3c0d7a5
A
46// First, the platform type. Need this for U_PLATFORM.
47#include "unicode/platform.h"
48
49#if U_PLATFORM == U_PF_MINGW && defined __STRICT_ANSI__
50/* tzset isn't defined in strict ANSI on MinGW. */
51#undef __STRICT_ANSI__
52#endif
53
54/*
55 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
56 */
57#include <time.h>
58
59#if !U_PLATFORM_USES_ONLY_WIN32_API
60#include <sys/time.h>
61#endif
62
63/* include the rest of the ICU headers */
b75a7d8f 64#include "unicode/putil.h"
374ca955
A
65#include "unicode/ustring.h"
66#include "putilimp.h"
67#include "uassert.h"
b75a7d8f
A
68#include "umutex.h"
69#include "cmemory.h"
70#include "cstring.h"
71#include "locmap.h"
72#include "ucln_cmn.h"
b331163b 73#include "charstr.h"
73c04bcf
A
74
75/* Include standard headers. */
76#include <stdio.h>
77#include <stdlib.h>
78#include <string.h>
79#include <math.h>
80#include <locale.h>
81#include <float.h>
4388f060
A
82
83#ifndef U_COMMON_IMPLEMENTATION
84#error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu
85#endif
86
b75a7d8f
A
87
88/* include system headers */
4388f060
A
89#if U_PLATFORM_USES_ONLY_WIN32_API
90 /*
91 * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW.
92 * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API)
93 * to use native APIs as much as possible?
94 */
f3c0d7a5 95#ifndef WIN32_LEAN_AND_MEAN
b75a7d8f 96# define WIN32_LEAN_AND_MEAN
f3c0d7a5 97#endif
374ca955 98# define VC_EXTRALEAN
b75a7d8f
A
99# define NOUSER
100# define NOSERVICE
101# define NOIME
102# define NOMCX
103# include <windows.h>
f3c0d7a5
A
104# include "unicode\uloc.h"
105#if U_PLATFORM_HAS_WINUWP_API == 0
73c04bcf 106# include "wintz.h"
f3c0d7a5
A
107#else // U_PLATFORM_HAS_WINUWP_API
108typedef PVOID LPMSG; // TODO: figure out how to get rid of this typedef
109#include <Windows.Globalization.h>
110#include <windows.system.userprofile.h>
111#include <wrl\wrappers\corewrappers.h>
112#include <wrl\client.h>
113
114using namespace ABI::Windows::Foundation;
115using namespace Microsoft::WRL;
116using namespace Microsoft::WRL::Wrappers;
117#endif
4388f060 118#elif U_PLATFORM == U_PF_OS400
b75a7d8f
A
119# include <float.h>
120# include <qusec.h> /* error code structure */
121# include <qusrjobi.h>
122# include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
46f4442e 123# include <mih/testptr.h> /* For uprv_maximumPtr */
4388f060
A
124#elif U_PLATFORM == U_PF_OS390
125# include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
51004dcb 126#elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS
4388f060
A
127# include <limits.h>
128# include <unistd.h>
51004dcb
A
129# if U_PLATFORM == U_PF_SOLARIS
130# ifndef _XPG4_2
131# define _XPG4_2
132# endif
133# endif
4388f060
A
134#elif U_PLATFORM == U_PF_QNX
135# include <sys/neutrino.h>
374ca955
A
136#endif
137
b75a7d8f 138/*
374ca955
A
139 * Only include langinfo.h if we have a way to get the codeset. If we later
140 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
141 *
142 */
143
144#if U_HAVE_NL_LANGINFO_CODESET
145#include <langinfo.h>
b75a7d8f
A
146#endif
147
729e4ab9
A
148/**
149 * Simple things (presence of functions, etc) should just go in configure.in and be added to
150 * icucfg.h via autoheader.
151 */
4388f060
A
152#if U_PLATFORM_IMPLEMENTS_POSIX
153# if U_PLATFORM == U_PF_OS400
154# define HAVE_DLFCN_H 0
155# define HAVE_DLOPEN 0
156# else
157# ifndef HAVE_DLFCN_H
158# define HAVE_DLFCN_H 1
159# endif
160# ifndef HAVE_DLOPEN
161# define HAVE_DLOPEN 1
162# endif
163# endif
164# ifndef HAVE_GETTIMEOFDAY
165# define HAVE_GETTIMEOFDAY 1
166# endif
167#else
168# define HAVE_DLFCN_H 0
169# define HAVE_DLOPEN 0
170# define HAVE_GETTIMEOFDAY 0
729e4ab9
A
171#endif
172
b331163b 173U_NAMESPACE_USE
4388f060 174
b75a7d8f
A
175/* Define the extension for data files, again... */
176#define DATA_TYPE "dat"
177
178/* Leave this copyright notice here! */
179static const char copyright[] = U_COPYRIGHT_STRING;
180
181/* floating point implementations ------------------------------------------- */
182
183/* We return QNAN rather than SNAN*/
184#define SIGN 0x80000000U
b75a7d8f 185
73c04bcf
A
186/* Make it easy to define certain types of constants */
187typedef union {
188 int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
189 double d64;
190} BitPatternConversion;
191static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
192static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
b75a7d8f
A
193
194/*---------------------------------------------------------------------------
195 Platform utilities
196 Our general strategy is to assume we're on a POSIX platform. Platforms which
197 are non-POSIX must declare themselves so. The default POSIX implementation
198 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
199 functions).
200 ---------------------------------------------------------------------------*/
201
b331163b 202#if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400
b75a7d8f
A
203# undef U_POSIX_LOCALE
204#else
205# define U_POSIX_LOCALE 1
206#endif
207
73c04bcf
A
208/*
209 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
210 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
211*/
212#if !IEEE_754
b75a7d8f
A
213static char*
214u_topNBytesOfDouble(double* d, int n)
215{
216#if U_IS_BIG_ENDIAN
217 return (char*)d;
218#else
219 return (char*)(d + 1) - n;
220#endif
221}
222
223static char*
224u_bottomNBytesOfDouble(double* d, int n)
225{
226#if U_IS_BIG_ENDIAN
227 return (char*)(d + 1) - n;
228#else
229 return (char*)d;
230#endif
231}
729e4ab9
A
232#endif /* !IEEE_754 */
233
234#if IEEE_754
235static UBool
236u_signBit(double d) {
237 uint8_t hiByte;
238#if U_IS_BIG_ENDIAN
239 hiByte = *(uint8_t *)&d;
240#else
241 hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
242#endif
243 return (hiByte & 0x80) != 0;
244}
245#endif
246
247
b75a7d8f 248
46f4442e 249#if defined (U_DEBUG_FAKETIME)
729e4ab9 250/* Override the clock to test things without having to move the system clock.
46f4442e
A
251 * Assumes POSIX gettimeofday() will function
252 */
253UDate fakeClock_t0 = 0; /** Time to start the clock from **/
254UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
255UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
51004dcb 256static UMutex fakeClockMutex = U_MUTEX_INTIALIZER;
46f4442e
A
257
258static UDate getUTCtime_real() {
259 struct timeval posixTime;
260 gettimeofday(&posixTime, NULL);
261 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
262}
263
264static UDate getUTCtime_fake() {
265 umtx_lock(&fakeClockMutex);
266 if(!fakeClock_set) {
267 UDate real = getUTCtime_real();
268 const char *fake_start = getenv("U_FAKETIME_START");
729e4ab9 269 if((fake_start!=NULL) && (fake_start[0]!=0)) {
46f4442e 270 sscanf(fake_start,"%lf",&fakeClock_t0);
729e4ab9
A
271 fakeClock_dt = fakeClock_t0 - real;
272 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
273 "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
274 fakeClock_t0, fake_start, fakeClock_dt, real);
275 } else {
276 fakeClock_dt = 0;
277 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
278 "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
46f4442e 279 }
46f4442e
A
280 fakeClock_set = TRUE;
281 }
282 umtx_unlock(&fakeClockMutex);
729e4ab9 283
46f4442e
A
284 return getUTCtime_real() + fakeClock_dt;
285}
286#endif
287
4388f060 288#if U_PLATFORM_USES_ONLY_WIN32_API
73c04bcf
A
289typedef union {
290 int64_t int64;
291 FILETIME fileTime;
292} FileTimeConversion; /* This is like a ULARGE_INTEGER */
293
294/* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
295#define EPOCH_BIAS INT64_C(116444736000000000)
296#define HECTONANOSECOND_PER_MILLISECOND 10000
297
298#endif
299
b75a7d8f
A
300/*---------------------------------------------------------------------------
301 Universal Implementations
73c04bcf
A
302 These are designed to work on all platforms. Try these, and if they
303 don't work on your platform, then special case your platform with new
b75a7d8f 304 implementations.
73c04bcf 305---------------------------------------------------------------------------*/
b75a7d8f 306
374ca955 307U_CAPI UDate U_EXPORT2
b75a7d8f
A
308uprv_getUTCtime()
309{
46f4442e
A
310#if defined(U_DEBUG_FAKETIME)
311 return getUTCtime_fake(); /* Hook for overriding the clock */
729e4ab9
A
312#else
313 return uprv_getRawUTCtime();
314#endif
315}
316
317/* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
318U_CAPI UDate U_EXPORT2
319uprv_getRawUTCtime()
320{
b331163b 321#if U_PLATFORM_USES_ONLY_WIN32_API
73c04bcf
A
322
323 FileTimeConversion winTime;
324 GetSystemTimeAsFileTime(&winTime.fileTime);
325 return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
b75a7d8f 326#else
729e4ab9 327
4388f060 328#if HAVE_GETTIMEOFDAY
73c04bcf
A
329 struct timeval posixTime;
330 gettimeofday(&posixTime, NULL);
331 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
729e4ab9 332#else
b75a7d8f
A
333 time_t epochtime;
334 time(&epochtime);
374ca955 335 return (UDate)epochtime * U_MILLIS_PER_SECOND;
b75a7d8f 336#endif
729e4ab9
A
337
338#endif
b75a7d8f
A
339}
340
341/*-----------------------------------------------------------------------------
342 IEEE 754
343 These methods detect and return NaN and infinity values for doubles
344 conforming to IEEE 754. Platforms which support this standard include X86,
345 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
346 If this doesn't work on your platform, you have non-IEEE floating-point, and
347 will need to code your own versions. A naive implementation is to return 0.0
348 for getNaN and getInfinity, and false for isNaN and isInfinite.
349 ---------------------------------------------------------------------------*/
350
351U_CAPI UBool U_EXPORT2
352uprv_isNaN(double number)
353{
354#if IEEE_754
73c04bcf
A
355 BitPatternConversion convertedNumber;
356 convertedNumber.d64 = number;
b75a7d8f 357 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
73c04bcf 358 return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
b75a7d8f 359
4388f060 360#elif U_PLATFORM == U_PF_OS390
b75a7d8f
A
361 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
362 sizeof(uint32_t));
363 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
364 sizeof(uint32_t));
365
366 return ((highBits & 0x7F080000L) == 0x7F080000L) &&
367 (lowBits == 0x00000000L);
368
369#else
370 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
371 /* you'll need to replace this default implementation with what's correct*/
372 /* for your platform.*/
373 return number != number;
374#endif
375}
376
377U_CAPI UBool U_EXPORT2
378uprv_isInfinite(double number)
379{
380#if IEEE_754
73c04bcf
A
381 BitPatternConversion convertedNumber;
382 convertedNumber.d64 = number;
383 /* Infinity is exactly 0x7FF0000000000000U. */
384 return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
4388f060 385#elif U_PLATFORM == U_PF_OS390
b75a7d8f
A
386 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
387 sizeof(uint32_t));
388 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
389 sizeof(uint32_t));
390
391 return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
392
393#else
394 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
395 /* value, you'll need to replace this default implementation with what's*/
396 /* correct for your platform.*/
397 return number == (2.0 * number);
398#endif
399}
400
401U_CAPI UBool U_EXPORT2
402uprv_isPositiveInfinity(double number)
403{
4388f060 404#if IEEE_754 || U_PLATFORM == U_PF_OS390
b75a7d8f
A
405 return (UBool)(number > 0 && uprv_isInfinite(number));
406#else
407 return uprv_isInfinite(number);
408#endif
409}
410
411U_CAPI UBool U_EXPORT2
412uprv_isNegativeInfinity(double number)
413{
4388f060 414#if IEEE_754 || U_PLATFORM == U_PF_OS390
b75a7d8f
A
415 return (UBool)(number < 0 && uprv_isInfinite(number));
416
417#else
418 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
419 sizeof(uint32_t));
420 return((highBits & SIGN) && uprv_isInfinite(number));
421
422#endif
423}
424
425U_CAPI double U_EXPORT2
426uprv_getNaN()
427{
4388f060 428#if IEEE_754 || U_PLATFORM == U_PF_OS390
73c04bcf 429 return gNan.d64;
b75a7d8f
A
430#else
431 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
432 /* you'll need to replace this default implementation with what's correct*/
433 /* for your platform.*/
434 return 0.0;
435#endif
436}
437
438U_CAPI double U_EXPORT2
439uprv_getInfinity()
440{
4388f060 441#if IEEE_754 || U_PLATFORM == U_PF_OS390
73c04bcf 442 return gInf.d64;
b75a7d8f
A
443#else
444 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
445 /* value, you'll need to replace this default implementation with what's*/
446 /* correct for your platform.*/
447 return 0.0;
448#endif
449}
450
451U_CAPI double U_EXPORT2
452uprv_floor(double x)
453{
454 return floor(x);
455}
456
457U_CAPI double U_EXPORT2
458uprv_ceil(double x)
459{
460 return ceil(x);
461}
462
463U_CAPI double U_EXPORT2
464uprv_round(double x)
465{
466 return uprv_floor(x + 0.5);
467}
468
469U_CAPI double U_EXPORT2
470uprv_fabs(double x)
471{
472 return fabs(x);
473}
474
475U_CAPI double U_EXPORT2
476uprv_modf(double x, double* y)
477{
478 return modf(x, y);
479}
480
481U_CAPI double U_EXPORT2
482uprv_fmod(double x, double y)
483{
484 return fmod(x, y);
485}
486
487U_CAPI double U_EXPORT2
488uprv_pow(double x, double y)
489{
490 /* This is declared as "double pow(double x, double y)" */
491 return pow(x, y);
492}
493
494U_CAPI double U_EXPORT2
495uprv_pow10(int32_t x)
496{
497 return pow(10.0, (double)x);
498}
499
500U_CAPI double U_EXPORT2
501uprv_fmax(double x, double y)
502{
503#if IEEE_754
b75a7d8f
A
504 /* first handle NaN*/
505 if(uprv_isNaN(x) || uprv_isNaN(y))
506 return uprv_getNaN();
507
508 /* check for -0 and 0*/
729e4ab9 509 if(x == 0.0 && y == 0.0 && u_signBit(x))
b75a7d8f
A
510 return y;
511
512#endif
513
729e4ab9 514 /* this should work for all flt point w/o NaN and Inf special cases */
b75a7d8f
A
515 return (x > y ? x : y);
516}
517
b75a7d8f
A
518U_CAPI double U_EXPORT2
519uprv_fmin(double x, double y)
520{
521#if IEEE_754
b75a7d8f
A
522 /* first handle NaN*/
523 if(uprv_isNaN(x) || uprv_isNaN(y))
524 return uprv_getNaN();
525
526 /* check for -0 and 0*/
729e4ab9 527 if(x == 0.0 && y == 0.0 && u_signBit(y))
b75a7d8f
A
528 return y;
529
530#endif
531
532 /* this should work for all flt point w/o NaN and Inf special cases */
533 return (x > y ? y : x);
534}
535
b75a7d8f
A
536/**
537 * Truncates the given double.
538 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
539 * This is different than calling floor() or ceil():
540 * floor(3.3) = 3, floor(-3.3) = -4
541 * ceil(3.3) = 4, ceil(-3.3) = -3
542 */
543U_CAPI double U_EXPORT2
544uprv_trunc(double d)
545{
546#if IEEE_754
b75a7d8f
A
547 /* handle error cases*/
548 if(uprv_isNaN(d))
549 return uprv_getNaN();
550 if(uprv_isInfinite(d))
551 return uprv_getInfinity();
552
729e4ab9 553 if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */
b75a7d8f
A
554 return ceil(d);
555 else
556 return floor(d);
557
558#else
559 return d >= 0 ? floor(d) : ceil(d);
560
561#endif
562}
563
564/**
565 * Return the largest positive number that can be represented by an integer
566 * type of arbitrary bit length.
567 */
568U_CAPI double U_EXPORT2
569uprv_maxMantissa(void)
570{
571 return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
572}
573
b75a7d8f
A
574U_CAPI double U_EXPORT2
575uprv_log(double d)
576{
577 return log(d);
578}
579
46f4442e
A
580U_CAPI void * U_EXPORT2
581uprv_maximumPtr(void * base)
b75a7d8f 582{
4388f060 583#if U_PLATFORM == U_PF_OS400
46f4442e 584 /*
729e4ab9 585 * With the provided function we should never be out of range of a given segment
46f4442e
A
586 * (a traditional/typical segment that is). Our segments have 5 bytes for the
587 * id and 3 bytes for the offset. The key is that the casting takes care of
588 * only retrieving the offset portion minus x1000. Hence, the smallest offset
589 * seen in a program is x001000 and when casted to an int would be 0.
590 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment.
591 *
729e4ab9 592 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
46f4442e 593 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information).
729e4ab9
A
594 * This function determines the activation based on the pointer that is passed in and
595 * calculates the appropriate maximum available size for
46f4442e
A
596 * each pointer type (TERASPACE and non-TERASPACE)
597 *
598 * Unlike other operating systems, the pointer model isn't determined at
599 * compile time on i5/OS.
600 */
601 if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
602 /* if it is a TERASPACE pointer the max is 2GB - 4k */
603 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
b75a7d8f 604 }
46f4442e
A
605 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
606 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
b75a7d8f 607
46f4442e 608#else
729e4ab9 609 return U_MAX_PTR(base);
374ca955 610#endif
46f4442e 611}
b75a7d8f
A
612
613/*---------------------------------------------------------------------------
614 Platform-specific Implementations
615 Try these, and if they don't work on your platform, then special case your
616 platform with new implementations.
617 ---------------------------------------------------------------------------*/
618
b75a7d8f
A
619/* Generic time zone layer -------------------------------------------------- */
620
621/* Time zone utilities */
622U_CAPI void U_EXPORT2
623uprv_tzset()
624{
4388f060 625#if defined(U_TZSET)
b75a7d8f
A
626 U_TZSET();
627#else
628 /* no initialization*/
629#endif
630}
631
632U_CAPI int32_t U_EXPORT2
633uprv_timezone()
634{
374ca955 635#ifdef U_TIMEZONE
b75a7d8f
A
636 return U_TIMEZONE;
637#else
638 time_t t, t1, t2;
639 struct tm tmrec;
b75a7d8f
A
640 int32_t tdiff = 0;
641
642 time(&t);
374ca955 643 uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
57a6839d
A
644#if U_PLATFORM != U_PF_IPHONE
645 UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
646#endif
b75a7d8f 647 t1 = mktime(&tmrec); /* local time in seconds*/
374ca955 648 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
b75a7d8f
A
649 t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/
650 tdiff = t2 - t1;
57a6839d
A
651
652#if U_PLATFORM != U_PF_IPHONE
b75a7d8f 653 /* imitate NT behaviour, which returns same timezone offset to GMT for
51004dcb
A
654 winter and summer.
655 This does not work on all platforms. For instance, on glibc on Linux
656 and on Mac OS 10.5, tdiff calculated above remains the same
57a6839d
A
657 regardless of whether DST is in effect or not. iOS is another
658 platform where this does not work. Linux + glibc and Mac OS 10.5
659 have U_TIMEZONE defined so that this code is not reached.
660 */
b75a7d8f
A
661 if (dst_checked)
662 tdiff += 3600;
57a6839d 663#endif
b75a7d8f
A
664 return tdiff;
665#endif
666}
667
374ca955 668/* Note that U_TZNAME does *not* have to be tzname, but if it is,
729e4ab9 669 some platforms need to have it declared here. */
b75a7d8f 670
f3c0d7a5 671#if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED)
374ca955
A
672/* RS6000 and others reject char **tzname. */
673extern U_IMPORT char *U_TZNAME[];
674#endif
675
57a6839d 676#if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
73c04bcf
A
677/* These platforms are likely to use Olson timezone IDs. */
678#define CHECK_LOCALTIME_LINK 1
4388f060 679#if U_PLATFORM_IS_DARWIN_BASED
73c04bcf
A
680#include <tzfile.h>
681#define TZZONEINFO (TZDIR "/")
51004dcb
A
682#elif U_PLATFORM == U_PF_SOLARIS
683#define TZDEFAULT "/etc/localtime"
684#define TZZONEINFO "/usr/share/lib/zoneinfo/"
57a6839d 685#define TZZONEINFO2 "../usr/share/lib/zoneinfo/"
51004dcb 686#define TZ_ENV_CHECK "localtime"
46f4442e
A
687#else
688#define TZDEFAULT "/etc/localtime"
689#define TZZONEINFO "/usr/share/zoneinfo/"
690#endif
729e4ab9
A
691#if U_HAVE_DIRENT_H
692#define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */
693/* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
694 symlinked to /etc/localtime, which makes searchForTZFile return
695 'localtime' when it's the first match. */
696#define TZFILE_SKIP2 "localtime"
697#define SEARCH_TZFILE
698#include <dirent.h> /* Needed to search through system timezone files */
699#endif
73c04bcf
A
700static char gTimeZoneBuffer[PATH_MAX];
701static char *gTimeZoneBufferPtr = NULL;
702#endif
703
4388f060 704#if !U_PLATFORM_USES_ONLY_WIN32_API
73c04bcf
A
705#define isNonDigit(ch) (ch < '0' || '9' < ch)
706static UBool isValidOlsonID(const char *id) {
707 int32_t idx = 0;
708
709 /* Determine if this is something like Iceland (Olson ID)
710 or AST4ADT (non-Olson ID) */
711 while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
712 idx++;
713 }
714
715 /* If we went through the whole string, then it might be okay.
716 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
717 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
718 The rest of the time it could be an Olson ID. George */
719 return (UBool)(id[idx] == 0
720 || uprv_strcmp(id, "PST8PDT") == 0
721 || uprv_strcmp(id, "MST7MDT") == 0
722 || uprv_strcmp(id, "CST6CDT") == 0
723 || uprv_strcmp(id, "EST5EDT") == 0);
724}
729e4ab9
A
725
726/* On some Unix-like OS, 'posix' subdirectory in
727 /usr/share/zoneinfo replicates the top-level contents. 'right'
728 subdirectory has the same set of files, but individual files
729 are different from those in the top-level directory or 'posix'
730 because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
731 has files for UTC.
732 When the first match for /etc/localtime is in either of them
733 (usually in posix because 'right' has different file contents),
734 or TZ environment variable points to one of them, createTimeZone
735 fails because, say, 'posix/America/New_York' is not an Olson
736 timezone id ('America/New_York' is). So, we have to skip
737 'posix/' and 'right/' at the beginning. */
738static void skipZoneIDPrefix(const char** id) {
739 if (uprv_strncmp(*id, "posix/", 6) == 0
740 || uprv_strncmp(*id, "right/", 6) == 0)
741 {
742 *id += 6;
743 }
744}
b75a7d8f
A
745#endif
746
4388f060 747#if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API
46f4442e
A
748
749#define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
750typedef struct OffsetZoneMapping {
751 int32_t offsetSeconds;
4388f060 752 int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/
46f4442e
A
753 const char *stdID;
754 const char *dstID;
755 const char *olsonID;
756} OffsetZoneMapping;
757
4388f060
A
758enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 };
759
46f4442e
A
760/*
761This list tries to disambiguate a set of abbreviated timezone IDs and offsets
762and maps it to an Olson ID.
763Before adding anything to this list, take a look at
764icu/source/tools/tzcode/tz.alias
765Sometimes no daylight savings (0) is important to define due to aliases.
766This list can be tested with icu/source/test/compat/tzone.pl
767More values could be added to daylightType to increase precision.
768*/
769static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
770 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
771 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
772 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
773 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
774 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
775 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
776 {-36000, 2, "EST", "EST", "Australia/Sydney"},
777 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
778 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
779 {-34200, 2, "CST", "CST", "Australia/South"},
780 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
781 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
782 {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
783 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
784 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
785 {-28800, 2, "WST", "WST", "Australia/West"},
786 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
787 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
788 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
789 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
790 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
791 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
792 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
793 {-14400, 1, "AZT", "AZST", "Asia/Baku"},
794 {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
795 {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
796 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
797 {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
798 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
799 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
800 {-3600, 0, "CET", "WEST", "Africa/Algiers"},
801 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
802 {0, 1, "GMT", "IST", "Europe/Dublin"},
803 {0, 1, "GMT", "BST", "Europe/London"},
804 {0, 0, "WET", "WEST", "Africa/Casablanca"},
805 {0, 0, "WET", "WET", "Africa/El_Aaiun"},
806 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
807 {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
808 {10800, 1, "PMST", "PMDT", "America/Miquelon"},
809 {10800, 2, "UYT", "UYST", "America/Montevideo"},
810 {10800, 1, "WGT", "WGST", "America/Godthab"},
811 {10800, 2, "BRT", "BRST", "Brazil/East"},
812 {12600, 1, "NST", "NDT", "America/St_Johns"},
813 {14400, 1, "AST", "ADT", "Canada/Atlantic"},
814 {14400, 2, "AMT", "AMST", "America/Cuiaba"},
815 {14400, 2, "CLT", "CLST", "Chile/Continental"},
816 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
817 {14400, 2, "PYT", "PYST", "America/Asuncion"},
818 {18000, 1, "CST", "CDT", "America/Havana"},
819 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
820 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
821 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
822 {21600, 0, "CST", "CDT", "America/Guatemala"},
823 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
824 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
825 {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
826 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
827 {32400, 1, "AKST", "AKDT", "US/Alaska"},
828 {36000, 1, "HAST", "HADT", "US/Aleutian"}
829};
830
831/*#define DEBUG_TZNAME*/
832
833static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
834{
835 int32_t idx;
836#ifdef DEBUG_TZNAME
837 fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
838#endif
b331163b 839 for (idx = 0; idx < UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++)
46f4442e
A
840 {
841 if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
842 && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
843 && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
844 && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
845 {
846 return OFFSET_ZONE_MAPPINGS[idx].olsonID;
847 }
848 }
849 return NULL;
850}
851#endif
852
729e4ab9 853#ifdef SEARCH_TZFILE
729e4ab9
A
854#define MAX_READ_SIZE 512
855
856typedef struct DefaultTZInfo {
857 char* defaultTZBuffer;
858 int64_t defaultTZFileSize;
859 FILE* defaultTZFilePtr;
860 UBool defaultTZstatus;
861 int32_t defaultTZPosition;
862} DefaultTZInfo;
863
864/*
865 * This method compares the two files given to see if they are a match.
866 * It is currently use to compare two TZ files.
867 */
868static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
869 FILE* file;
870 int64_t sizeFile;
871 int64_t sizeFileLeft;
872 int32_t sizeFileRead;
873 int32_t sizeFileToRead;
874 char bufferFile[MAX_READ_SIZE];
875 UBool result = TRUE;
876
877 if (tzInfo->defaultTZFilePtr == NULL) {
878 tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
879 }
880 file = fopen(TZFileName, "r");
881
882 tzInfo->defaultTZPosition = 0; /* reset position to begin search */
883
884 if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
885 /* First check that the file size are equal. */
886 if (tzInfo->defaultTZFileSize == 0) {
887 fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
888 tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
889 }
890 fseek(file, 0, SEEK_END);
891 sizeFile = ftell(file);
892 sizeFileLeft = sizeFile;
893
894 if (sizeFile != tzInfo->defaultTZFileSize) {
895 result = FALSE;
896 } else {
897 /* Store the data from the files in seperate buffers and
898 * compare each byte to determine equality.
899 */
900 if (tzInfo->defaultTZBuffer == NULL) {
901 rewind(tzInfo->defaultTZFilePtr);
902 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
4388f060 903 sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
729e4ab9
A
904 }
905 rewind(file);
906 while(sizeFileLeft > 0) {
907 uprv_memset(bufferFile, 0, MAX_READ_SIZE);
908 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
909
910 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
911 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
912 result = FALSE;
913 break;
914 }
915 sizeFileLeft -= sizeFileRead;
916 tzInfo->defaultTZPosition += sizeFileRead;
917 }
918 }
919 } else {
920 result = FALSE;
921 }
922
923 if (file != NULL) {
924 fclose(file);
925 }
926
927 return result;
928}
f3c0d7a5
A
929
930
729e4ab9
A
931/* dirent also lists two entries: "." and ".." that we can safely ignore. */
932#define SKIP1 "."
933#define SKIP2 ".."
f3c0d7a5
A
934static UBool U_CALLCONV putil_cleanup(void);
935static CharString *gSearchTZFileResult = NULL;
936
937/*
938 * This method recursively traverses the directory given for a matching TZ file and returns the first match.
939 * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results.
940 */
729e4ab9 941static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
729e4ab9
A
942 DIR* dirp = opendir(path);
943 DIR* subDirp = NULL;
944 struct dirent* dirEntry = NULL;
945
946 char* result = NULL;
947 if (dirp == NULL) {
948 return result;
949 }
950
f3c0d7a5
A
951 if (gSearchTZFileResult == NULL) {
952 gSearchTZFileResult = new CharString;
953 if (gSearchTZFileResult == NULL) {
954 return NULL;
955 }
956 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
957 }
958
729e4ab9 959 /* Save the current path */
f3c0d7a5
A
960 UErrorCode status = U_ZERO_ERROR;
961 CharString curpath(path, -1, status);
962 if (U_FAILURE(status)) {
963 return NULL;
964 }
729e4ab9
A
965
966 /* Check each entry in the directory. */
967 while((dirEntry = readdir(dirp)) != NULL) {
968 const char* dirName = dirEntry->d_name;
969 if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) {
970 /* Create a newpath with the new entry to test each entry in the directory. */
f3c0d7a5
A
971 CharString newpath(curpath, status);
972 newpath.append(dirName, -1, status);
973 if (U_FAILURE(status)) {
974 return NULL;
975 }
729e4ab9 976
f3c0d7a5 977 if ((subDirp = opendir(newpath.data())) != NULL) {
729e4ab9
A
978 /* If this new path is a directory, make a recursive call with the newpath. */
979 closedir(subDirp);
f3c0d7a5
A
980 newpath.append('/', status);
981 if (U_FAILURE(status)) {
982 return NULL;
983 }
984 result = searchForTZFile(newpath.data(), tzInfo);
729e4ab9
A
985 /*
986 Have to get out here. Otherwise, we'd keep looking
987 and return the first match in the top-level directory
988 if there's a match in the top-level. If not, this function
989 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
990 It worked without this in most cases because we have a fallback of calling
991 localtime_r to figure out the default timezone.
992 */
993 if (result != NULL)
994 break;
995 } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
f3c0d7a5
A
996 if(compareBinaryFiles(TZDEFAULT, newpath.data(), tzInfo)) {
997 int32_t amountToSkip = sizeof(TZZONEINFO) - 1;
998 if (amountToSkip > newpath.length()) {
999 amountToSkip = newpath.length();
1000 }
1001 const char* zoneid = newpath.data() + amountToSkip;
729e4ab9 1002 skipZoneIDPrefix(&zoneid);
f3c0d7a5
A
1003 gSearchTZFileResult->clear();
1004 gSearchTZFileResult->append(zoneid, -1, status);
1005 if (U_FAILURE(status)) {
1006 return NULL;
1007 }
1008 result = gSearchTZFileResult->data();
729e4ab9
A
1009 /* Get out after the first one found. */
1010 break;
1011 }
1012 }
1013 }
1014 }
1015 closedir(dirp);
1016 return result;
1017}
1018#endif
f3c0d7a5
A
1019
1020U_CAPI void U_EXPORT2
1021uprv_tzname_clear_cache()
1022{
1023#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1024 gTimeZoneBufferPtr = NULL;
1025#endif
1026}
1027
1028// With the Universal Windows Platform we can just ask Windows for the name
1029#if U_PLATFORM_HAS_WINUWP_API
1030U_CAPI const char* U_EXPORT2
1031uprv_getWindowsTimeZone()
1032{
1033 // Get default Windows timezone.
1034 ComPtr<IInspectable> calendar;
1035 HRESULT hr = RoActivateInstance(
1036 HStringReference(RuntimeClass_Windows_Globalization_Calendar).Get(),
1037 &calendar);
1038 if (SUCCEEDED(hr))
1039 {
1040 ComPtr<ABI::Windows::Globalization::ITimeZoneOnCalendar> timezone;
1041 hr = calendar.As(&timezone);
1042 if (SUCCEEDED(hr))
1043 {
1044 HString timezoneString;
1045 hr = timezone->GetTimeZone(timezoneString.GetAddressOf());
1046 if (SUCCEEDED(hr))
1047 {
1048 int32_t length = wcslen(timezoneString.GetRawBuffer(NULL));
1049 char* asciiId = (char*)uprv_calloc(length + 1, sizeof(char));
1050 if (asciiId != nullptr)
1051 {
1052 u_UCharsToChars((UChar*)timezoneString.GetRawBuffer(NULL), asciiId, length);
1053 return asciiId;
1054 }
1055 }
1056 }
1057 }
1058
1059 // Failed
1060 return nullptr;
1061}
1062#endif
1063
374ca955 1064U_CAPI const char* U_EXPORT2
b75a7d8f
A
1065uprv_tzname(int n)
1066{
46f4442e 1067 const char *tzid = NULL;
4388f060 1068#if U_PLATFORM_USES_ONLY_WIN32_API
f3c0d7a5
A
1069#if U_PLATFORM_HAS_WINUWP_API > 0
1070 tzid = uprv_getWindowsTimeZone();
1071#else
46f4442e 1072 tzid = uprv_detectWindowsTimeZone();
f3c0d7a5 1073#endif
73c04bcf 1074
46f4442e
A
1075 if (tzid != NULL) {
1076 return tzid;
b75a7d8f 1077 }
f3c0d7a5
A
1078
1079#ifndef U_TZNAME
1080 // The return value is free'd in timezone.cpp on Windows because
1081 // the other code path returns a pointer to a heap location.
1082 // If we don't have a name already, then tzname wouldn't be any
1083 // better, so just fall back.
1084 return uprv_strdup("Etc/UTC");
1085#endif // !U_TZNAME
1086
73c04bcf 1087#else
b75a7d8f 1088
4388f060 1089/*#if U_PLATFORM_IS_DARWIN_BASED
374ca955
A
1090 int ret;
1091
46f4442e
A
1092 tzid = getenv("TZFILE");
1093 if (tzid != NULL) {
1094 return tzid;
374ca955 1095 }
73c04bcf 1096#endif*/
374ca955 1097
46f4442e
A
1098/* This code can be temporarily disabled to test tzname resolution later on. */
1099#ifndef DEBUG_TZNAME
1100 tzid = getenv("TZ");
51004dcb
A
1101 if (tzid != NULL && isValidOlsonID(tzid)
1102#if U_PLATFORM == U_PF_SOLARIS
1103 /* When TZ equals localtime on Solaris, check the /etc/localtime file. */
1104 && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0
1105#endif
1106 ) {
2ca993e8
A
1107 /* The colon forces tzset() to treat the remainder as zoneinfo path */
1108 if (tzid[0] == ':') {
1109 tzid++;
1110 }
73c04bcf 1111 /* This might be a good Olson ID. */
729e4ab9 1112 skipZoneIDPrefix(&tzid);
46f4442e 1113 return tzid;
374ca955 1114 }
73c04bcf 1115 /* else U_TZNAME will give a better result. */
46f4442e 1116#endif
374ca955 1117
4388f060 1118#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
73c04bcf
A
1119 /* Caller must handle threading issues */
1120 if (gTimeZoneBufferPtr == NULL) {
1121 /*
1122 This is a trick to look at the name of the link to get the Olson ID
1123 because the tzfile contents is underspecified.
1124 This isn't guaranteed to work because it may not be a symlink.
1125 */
f3c0d7a5 1126 int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)-1);
374ca955 1127 if (0 < ret) {
73c04bcf
A
1128 int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO);
1129 gTimeZoneBuffer[ret] = 0;
1130 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0
1131 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
1132 {
1133 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
374ca955 1134 }
57a6839d
A
1135#if U_PLATFORM == U_PF_SOLARIS
1136 else
1137 {
1138 tzZoneInfoLen = uprv_strlen(TZZONEINFO2);
1139 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO2, tzZoneInfoLen) == 0
1140 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
1141 {
1142 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
1143 }
1144 }
1145#endif
729e4ab9
A
1146 } else {
1147#if defined(SEARCH_TZFILE)
1148 DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
1149 if (tzInfo != NULL) {
1150 tzInfo->defaultTZBuffer = NULL;
1151 tzInfo->defaultTZFileSize = 0;
1152 tzInfo->defaultTZFilePtr = NULL;
1153 tzInfo->defaultTZstatus = FALSE;
1154 tzInfo->defaultTZPosition = 0;
1155
1156 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
1157
1158 /* Free previously allocated memory */
1159 if (tzInfo->defaultTZBuffer != NULL) {
1160 uprv_free(tzInfo->defaultTZBuffer);
1161 }
1162 if (tzInfo->defaultTZFilePtr != NULL) {
1163 fclose(tzInfo->defaultTZFilePtr);
1164 }
1165 uprv_free(tzInfo);
1166 }
1167
1168 if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
1169 return gTimeZoneBufferPtr;
1170 }
1171#endif
374ca955 1172 }
374ca955 1173 }
73c04bcf
A
1174 else {
1175 return gTimeZoneBufferPtr;
1176 }
1177#endif
374ca955
A
1178#endif
1179
b75a7d8f 1180#ifdef U_TZNAME
4388f060 1181#if U_PLATFORM_USES_ONLY_WIN32_API
729e4ab9
A
1182 /* The return value is free'd in timezone.cpp on Windows because
1183 * the other code path returns a pointer to a heap location. */
1184 return uprv_strdup(U_TZNAME[n]);
1185#else
73c04bcf 1186 /*
46f4442e
A
1187 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1188 So we remap the abbreviation to an olson ID.
1189
1190 Since Windows exposes a little more timezone information,
1191 we normally don't use this code on Windows because
1192 uprv_detectWindowsTimeZone should have already given the correct answer.
73c04bcf 1193 */
46f4442e
A
1194 {
1195 struct tm juneSol, decemberSol;
1196 int daylightType;
1197 static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
1198 static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
1199
1200 /* This probing will tell us when daylight savings occurs. */
1201 localtime_r(&juneSolstice, &juneSol);
1202 localtime_r(&decemberSolstice, &decemberSol);
4388f060
A
1203 if(decemberSol.tm_isdst > 0) {
1204 daylightType = U_DAYLIGHT_DECEMBER;
1205 } else if(juneSol.tm_isdst > 0) {
1206 daylightType = U_DAYLIGHT_JUNE;
1207 } else {
1208 daylightType = U_DAYLIGHT_NONE;
1209 }
46f4442e
A
1210 tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
1211 if (tzid != NULL) {
1212 return tzid;
1213 }
1214 }
b75a7d8f 1215 return U_TZNAME[n];
729e4ab9 1216#endif
b75a7d8f
A
1217#else
1218 return "";
1219#endif
1220}
1221
1222/* Get and set the ICU data directory --------------------------------------- */
1223
b331163b 1224static icu::UInitOnce gDataDirInitOnce = U_INITONCE_INITIALIZER;
b75a7d8f 1225static char *gDataDirectory = NULL;
b331163b
A
1226
1227UInitOnce gTimeZoneFilesInitOnce = U_INITONCE_INITIALIZER;
1228static CharString *gTimeZoneFilesDirectory = NULL;
1229
57a6839d 1230#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
f3c0d7a5
A
1231 static char *gCorrectedPOSIXLocale = NULL; /* Sometimes heap allocated */
1232 static bool gCorrectedPOSIXLocaleHeapAllocated = false;
b75a7d8f
A
1233#endif
1234
374ca955 1235static UBool U_CALLCONV putil_cleanup(void)
b75a7d8f 1236{
73c04bcf 1237 if (gDataDirectory && *gDataDirectory) {
b75a7d8f 1238 uprv_free(gDataDirectory);
b75a7d8f 1239 }
73c04bcf 1240 gDataDirectory = NULL;
b331163b
A
1241 gDataDirInitOnce.reset();
1242
1243 delete gTimeZoneFilesDirectory;
1244 gTimeZoneFilesDirectory = NULL;
1245 gTimeZoneFilesInitOnce.reset();
1246
f3c0d7a5
A
1247#ifdef SEARCH_TZFILE
1248 delete gSearchTZFileResult;
1249 gSearchTZFileResult = NULL;
1250#endif
1251
57a6839d 1252#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
f3c0d7a5 1253 if (gCorrectedPOSIXLocale && gCorrectedPOSIXLocaleHeapAllocated) {
b75a7d8f
A
1254 uprv_free(gCorrectedPOSIXLocale);
1255 gCorrectedPOSIXLocale = NULL;
f3c0d7a5 1256 gCorrectedPOSIXLocaleHeapAllocated = false;
b75a7d8f
A
1257 }
1258#endif
1259 return TRUE;
1260}
1261
1262/*
1263 * Set the data directory.
1264 * Make a copy of the passed string, and set the global data dir to point to it.
b75a7d8f
A
1265 */
1266U_CAPI void U_EXPORT2
1267u_setDataDirectory(const char *directory) {
1268 char *newDataDir;
374ca955 1269 int32_t length;
b75a7d8f 1270
73c04bcf
A
1271 if(directory==NULL || *directory==0) {
1272 /* A small optimization to prevent the malloc and copy when the
1273 shared library is used, and this is a way to make sure that NULL
1274 is never returned.
1275 */
1276 newDataDir = (char *)"";
b75a7d8f 1277 }
73c04bcf
A
1278 else {
1279 length=(int32_t)uprv_strlen(directory);
1280 newDataDir = (char *)uprv_malloc(length + 2);
46f4442e
A
1281 /* Exit out if newDataDir could not be created. */
1282 if (newDataDir == NULL) {
1283 return;
1284 }
73c04bcf 1285 uprv_strcpy(newDataDir, directory);
b75a7d8f 1286
374ca955 1287#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
73c04bcf
A
1288 {
1289 char *p;
1290 while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
1291 *p = U_FILE_SEP_CHAR;
1292 }
1293 }
374ca955 1294#endif
73c04bcf 1295 }
374ca955 1296
73c04bcf 1297 if (gDataDirectory && *gDataDirectory) {
b75a7d8f
A
1298 uprv_free(gDataDirectory);
1299 }
1300 gDataDirectory = newDataDir;
374ca955 1301 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
b75a7d8f
A
1302}
1303
374ca955 1304U_CAPI UBool U_EXPORT2
729e4ab9 1305uprv_pathIsAbsolute(const char *path)
374ca955 1306{
729e4ab9
A
1307 if(!path || !*path) {
1308 return FALSE;
374ca955
A
1309 }
1310
1311 if(*path == U_FILE_SEP_CHAR) {
1312 return TRUE;
1313 }
1314
1315#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1316 if(*path == U_FILE_ALT_SEP_CHAR) {
1317 return TRUE;
1318 }
1319#endif
1320
4388f060 1321#if U_PLATFORM_USES_ONLY_WIN32_API
374ca955
A
1322 if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1323 ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1324 path[1] == ':' ) {
1325 return TRUE;
1326 }
1327#endif
1328
1329 return FALSE;
1330}
1331
729e4ab9
A
1332/* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1333 until some client wrapper makefiles are updated */
4388f060 1334#if U_PLATFORM_IS_DARWIN_BASED && TARGET_IPHONE_SIMULATOR
729e4ab9
A
1335# if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1336# define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1337# endif
1338#endif
1339
b331163b
A
1340static void U_CALLCONV dataDirectoryInitFn() {
1341 /* If we already have the directory, then return immediately. Will happen if user called
1342 * u_setDataDirectory().
1343 */
1344 if (gDataDirectory) {
1345 return;
1346 }
1347
b75a7d8f 1348 const char *path = NULL;
729e4ab9 1349#if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
46f4442e
A
1350 char datadir_path_buffer[PATH_MAX];
1351#endif
b75a7d8f 1352
73c04bcf
A
1353 /*
1354 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1355 override ICU's data with the ICU_DATA environment variable. This prevents
1356 problems where multiple custom copies of ICU's specific version of data
1357 are installed on a system. Either the application must define the data
1358 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1359 ICU, set the data with udata_setCommonData or trust that all of the
1360 required data is contained in ICU's data library that contains
1361 the entry point defined by U_ICUDATA_ENTRY_POINT.
1362
1363 There may also be some platforms where environment variables
1364 are not allowed.
1365 */
1366# if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1367 /* First try to get the environment variable */
f3c0d7a5
A
1368# if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP does not support getenv
1369 path=getenv("ICU_DATA");
1370# endif
73c04bcf 1371# endif
b75a7d8f 1372
729e4ab9
A
1373 /* ICU_DATA_DIR may be set as a compile option.
1374 * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1375 * and is used only when data is built in archive mode eliminating the need
1376 * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1377 * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1378 * set their own path.
1379 */
1380#if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
b75a7d8f 1381 if(path==NULL || *path==0) {
729e4ab9
A
1382# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1383 const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
1384# endif
1385# ifdef ICU_DATA_DIR
b75a7d8f 1386 path=ICU_DATA_DIR;
729e4ab9
A
1387# else
1388 path=U_ICU_DATA_DEFAULT_DIR;
1389# endif
1390# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1391 if (prefix != NULL) {
1392 snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path);
46f4442e
A
1393 path=datadir_path_buffer;
1394 }
729e4ab9 1395# endif
b75a7d8f 1396 }
729e4ab9 1397#endif
b75a7d8f 1398
f3c0d7a5
A
1399#if defined(ICU_DATA_DIR_WINDOWS) && U_PLATFORM_HAS_WINUWP_API != 0
1400 // Use data from the %windir%\globalization\icu directory
1401 // This is only available if ICU is built as a system component
1402 char datadir_path_buffer[MAX_PATH];
1403 UINT length = GetWindowsDirectoryA(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer));
1404 if (length > 0 && length < (UPRV_LENGTHOF(datadir_path_buffer) - sizeof(ICU_DATA_DIR_WINDOWS) - 1))
1405 {
1406 if (datadir_path_buffer[length - 1] != '\\')
1407 {
1408 datadir_path_buffer[length++] = '\\';
1409 datadir_path_buffer[length] = '\0';
1410 }
1411
1412 if ((length + 1 + sizeof(ICU_DATA_DIR_WINDOWS)) < UPRV_LENGTHOF(datadir_path_buffer))
1413 {
1414 uprv_strcat(datadir_path_buffer, ICU_DATA_DIR_WINDOWS);
1415 path = datadir_path_buffer;
1416 }
1417 }
1418#endif
1419
b75a7d8f
A
1420 if(path==NULL) {
1421 /* It looks really bad, set it to something. */
f3c0d7a5
A
1422#if U_PLATFORM_HAS_WIN32_API
1423 // Windows UWP will require icudtl.dat file in same directory as icuuc.dll
1424 path = ".\\";
1425#else
b75a7d8f 1426 path = "";
f3c0d7a5 1427#endif
b75a7d8f
A
1428 }
1429
1430 u_setDataDirectory(path);
b331163b
A
1431 return;
1432}
1433
1434U_CAPI const char * U_EXPORT2
1435u_getDataDirectory(void) {
1436 umtx_initOnce(gDataDirInitOnce, &dataDirectoryInitFn);
b75a7d8f
A
1437 return gDataDirectory;
1438}
1439
b331163b
A
1440static void setTimeZoneFilesDir(const char *path, UErrorCode &status) {
1441 if (U_FAILURE(status)) {
1442 return;
1443 }
1444 gTimeZoneFilesDirectory->clear();
1445 gTimeZoneFilesDirectory->append(path, status);
1446#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1447 char *p = gTimeZoneFilesDirectory->data();
1448 while (p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) {
1449 *p = U_FILE_SEP_CHAR;
1450 }
1451#endif
1452}
b75a7d8f 1453
2ca993e8
A
1454#if U_PLATFORM_IMPLEMENTS_POSIX
1455#include <sys/stat.h>
1456#if defined(U_TIMEZONE_FILES_DIR)
1457const char tzdirbuf[] = U_TIMEZONE_FILES_DIR;
1458enum { kTzfilenamebufLen = UPRV_LENGTHOF(tzdirbuf) + 24 }; // extra room for "/icutz44l.dat" or "/zoneinfo64.res"
1459#endif
1460#endif
1461
b331163b
A
1462#define TO_STRING(x) TO_STRING_2(x)
1463#define TO_STRING_2(x) #x
b75a7d8f 1464
b331163b
A
1465static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) {
1466 U_ASSERT(gTimeZoneFilesDirectory == NULL);
1467 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1468 gTimeZoneFilesDirectory = new CharString();
1469 if (gTimeZoneFilesDirectory == NULL) {
1470 status = U_MEMORY_ALLOCATION_ERROR;
1471 return;
1472 }
2ca993e8 1473 UBool usingUTzFilesDir = FALSE;
f3c0d7a5
A
1474#if U_PLATFORM_HAS_WINUWP_API == 0
1475 const char *dir = getenv("ICU_TIMEZONE_FILES_DIR");
1476#else
1477 // TODO: UWP does not support alternate timezone data directories at this time
1478 const char *dir = "";
1479#endif // U_PLATFORM_HAS_WINUWP_API
b331163b
A
1480#if defined(U_TIMEZONE_FILES_DIR)
1481 if (dir == NULL) {
2ca993e8
A
1482 // dir = TO_STRING(U_TIMEZONE_FILES_DIR);
1483 // Not sure why the above was done for this path only;
1484 // it preserves unwanted quotes.
1485 dir = tzdirbuf;
1486 usingUTzFilesDir = TRUE;
b331163b
A
1487 }
1488#endif
2ca993e8
A
1489#if U_PLATFORM_IMPLEMENTS_POSIX
1490 if (dir != NULL) {
1491 struct stat buf;
1492 if (stat(dir, &buf) != 0) {
1493 dir = NULL;
1494 }
1495#if defined(U_TIMEZONE_FILES_DIR)
1496 else if (usingUTzFilesDir) {
1497 char tzfilenamebuf[kTzfilenamebufLen];
1498 uprv_strcpy(tzfilenamebuf, tzdirbuf);
1499 uprv_strcat(tzfilenamebuf, U_FILE_SEP_STRING);
1500#if defined(U_TIMEZONE_PACKAGE)
1501 uprv_strcat(tzfilenamebuf, U_TIMEZONE_PACKAGE);
1502 uprv_strcat(tzfilenamebuf, ".dat");
1503#else
1504 uprv_strcat(tzfilenamebuf, "zoneinfo64.res");
1505#endif
1506 if (stat(tzfilenamebuf, &buf) != 0) {
1507 dir = NULL;
1508 }
1509 }
1510#endif /* defined(U_TIMEZONE_FILES_DIR) */
1511 }
1512#endif /* U_PLATFORM_IMPLEMENTS_POSIX */
b331163b
A
1513 if (dir == NULL) {
1514 dir = "";
1515 }
1516 setTimeZoneFilesDir(dir, status);
1517}
b75a7d8f
A
1518
1519
b331163b
A
1520U_CAPI const char * U_EXPORT2
1521u_getTimeZoneFilesDirectory(UErrorCode *status) {
1522 umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1523 return U_SUCCESS(*status) ? gTimeZoneFilesDirectory->data() : "";
1524}
1525
1526U_CAPI void U_EXPORT2
1527u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status) {
1528 umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1529 setTimeZoneFilesDir(path, *status);
1530
1531 // Note: this function does some extra churn, first setting based on the
1532 // environment, then immediately replacing with the value passed in.
1533 // The logic is simpler that way, and performance shouldn't be an issue.
1534}
b75a7d8f 1535
b75a7d8f
A
1536
1537#if U_POSIX_LOCALE
729e4ab9
A
1538/* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1539 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1540 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1541 */
1542static const char *uprv_getPOSIXIDForCategory(int category)
b75a7d8f 1543{
729e4ab9
A
1544 const char* posixID = NULL;
1545 if (category == LC_MESSAGES || category == LC_CTYPE) {
73c04bcf 1546 /*
729e4ab9 1547 * On Solaris two different calls to setlocale can result in
73c04bcf
A
1548 * different values. Only get this value once.
1549 *
1550 * We must check this first because an application can set this.
1551 *
1552 * LC_ALL can't be used because it's platform dependent. The LANG
1553 * environment variable seems to affect LC_CTYPE variable by default.
1554 * Here is what setlocale(LC_ALL, NULL) can return.
1555 * HPUX can return 'C C C C C C C'
1556 * Solaris can return /en_US/C/C/C/C/C on the second try.
1557 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1558 *
1559 * The default codepage detection also needs to use LC_CTYPE.
729e4ab9 1560 *
73c04bcf
A
1561 * Do not call setlocale(LC_*, "")! Using an empty string instead
1562 * of NULL, will modify the libc behavior.
1563 */
729e4ab9 1564 posixID = setlocale(category, NULL);
73c04bcf
A
1565 if ((posixID == 0)
1566 || (uprv_strcmp("C", posixID) == 0)
1567 || (uprv_strcmp("POSIX", posixID) == 0))
1568 {
1569 /* Maybe we got some garbage. Try something more reasonable */
1570 posixID = getenv("LC_ALL");
2ca993e8
A
1571 /* Solaris speaks POSIX - See IEEE Std 1003.1-2008
1572 * This is needed to properly handle empty env. variables
1573 */
1574#if U_PLATFORM == U_PF_SOLARIS
1575 if ((posixID == 0) || (posixID[0] == '\0')) {
1576 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1577 if ((posixID == 0) || (posixID[0] == '\0')) {
1578#else
b75a7d8f 1579 if (posixID == 0) {
729e4ab9 1580 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
73c04bcf 1581 if (posixID == 0) {
2ca993e8 1582#endif
73c04bcf
A
1583 posixID = getenv("LANG");
1584 }
b75a7d8f
A
1585 }
1586 }
729e4ab9
A
1587 }
1588 if ((posixID==0)
1589 || (uprv_strcmp("C", posixID) == 0)
1590 || (uprv_strcmp("POSIX", posixID) == 0))
1591 {
1592 /* Nothing worked. Give it a nice POSIX default value. */
1593 posixID = "en_US_POSIX";
1594 }
1595 return posixID;
1596}
b75a7d8f 1597
729e4ab9
A
1598/* Return just the POSIX id for the default locale, whatever happens to be in
1599 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1600 */
1601static const char *uprv_getPOSIXIDForDefaultLocale(void)
1602{
1603 static const char* posixID = NULL;
1604 if (posixID == 0) {
1605 posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
b75a7d8f 1606 }
729e4ab9
A
1607 return posixID;
1608}
73c04bcf 1609
51004dcb 1610#if !U_CHARSET_IS_UTF8
729e4ab9
A
1611/* Return just the POSIX id for the default codepage, whatever happens to be in
1612 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1613 */
1614static const char *uprv_getPOSIXIDForDefaultCodepage(void)
1615{
1616 static const char* posixID = NULL;
1617 if (posixID == 0) {
1618 posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
1619 }
b75a7d8f
A
1620 return posixID;
1621}
1622#endif
51004dcb 1623#endif
b75a7d8f
A
1624
1625/* NOTE: The caller should handle thread safety */
1626U_CAPI const char* U_EXPORT2
1627uprv_getDefaultLocaleID()
1628{
1629#if U_POSIX_LOCALE
1630/*
1631 Note that: (a '!' means the ID is improper somehow)
1632 LC_ALL ----> default_loc codepage
1633--------------------------------------------------------
1634 ab.CD ab CD
1635 ab@CD ab__CD -
1636 ab@CD.EF ab__CD EF
1637
1638 ab_CD.EF@GH ab_CD_GH EF
1639
1640Some 'improper' ways to do the same as above:
1641 ! ab_CD@GH.EF ab_CD_GH EF
1642 ! ab_CD.EF@GH.IJ ab_CD_GH EF
1643 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
1644
1645 _CD@GH _CD_GH -
1646 _CD.EF@GH _CD_GH EF
1647
1648The variant cannot have dots in it.
1649The 'rightmost' variant (@xxx) wins.
1650The leftmost codepage (.xxx) wins.
1651*/
1652 char *correctedPOSIXLocale = 0;
729e4ab9 1653 const char* posixID = uprv_getPOSIXIDForDefaultLocale();
b75a7d8f
A
1654 const char *p;
1655 const char *q;
1656 int32_t len;
1657
1658 /* Format: (no spaces)
1659 ll [ _CC ] [ . MM ] [ @ VV]
1660
1661 l = lang, C = ctry, M = charmap, V = variant
1662 */
1663
1664 if (gCorrectedPOSIXLocale != NULL) {
729e4ab9 1665 return gCorrectedPOSIXLocale;
b75a7d8f
A
1666 }
1667
1668 if ((p = uprv_strchr(posixID, '.')) != NULL) {
1669 /* assume new locale can't be larger than old one? */
51004dcb 1670 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1));
46f4442e
A
1671 /* Exit on memory allocation error. */
1672 if (correctedPOSIXLocale == NULL) {
1673 return NULL;
1674 }
b75a7d8f
A
1675 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1676 correctedPOSIXLocale[p-posixID] = 0;
1677
1678 /* do not copy after the @ */
1679 if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
1680 correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
1681 }
1682 }
1683
1684 /* Note that we scan the *uncorrected* ID. */
1685 if ((p = uprv_strrchr(posixID, '@')) != NULL) {
1686 if (correctedPOSIXLocale == NULL) {
51004dcb 1687 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1));
46f4442e
A
1688 /* Exit on memory allocation error. */
1689 if (correctedPOSIXLocale == NULL) {
1690 return NULL;
1691 }
b75a7d8f
A
1692 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1693 correctedPOSIXLocale[p-posixID] = 0;
1694 }
1695 p++;
1696
1697 /* Take care of any special cases here.. */
1698 if (!uprv_strcmp(p, "nynorsk")) {
1699 p = "NY";
73c04bcf 1700 /* Don't worry about no__NY. In practice, it won't appear. */
b75a7d8f
A
1701 }
1702
1703 if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
1704 uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
1705 }
1706 else {
1707 uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1708 }
1709
1710 if ((q = uprv_strchr(p, '.')) != NULL) {
1711 /* How big will the resulting string be? */
374ca955 1712 len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
b75a7d8f
A
1713 uprv_strncat(correctedPOSIXLocale, p, q-p);
1714 correctedPOSIXLocale[len] = 0;
1715 }
1716 else {
1717 /* Anything following the @ sign */
1718 uprv_strcat(correctedPOSIXLocale, p);
1719 }
1720
1721 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1722 * How about 'russian' -> 'ru'?
73c04bcf
A
1723 * Many of the other locales using ISO codes will be handled by the
1724 * canonicalization functions in uloc_getDefault.
b75a7d8f
A
1725 */
1726 }
1727
1728 /* Was a correction made? */
1729 if (correctedPOSIXLocale != NULL) {
1730 posixID = correctedPOSIXLocale;
1731 }
1732 else {
1733 /* copy it, just in case the original pointer goes away. See j2395 */
1734 correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
46f4442e
A
1735 /* Exit on memory allocation error. */
1736 if (correctedPOSIXLocale == NULL) {
1737 return NULL;
1738 }
b75a7d8f
A
1739 posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
1740 }
1741
1742 if (gCorrectedPOSIXLocale == NULL) {
1743 gCorrectedPOSIXLocale = correctedPOSIXLocale;
f3c0d7a5 1744 gCorrectedPOSIXLocaleHeapAllocated = true;
374ca955 1745 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
b75a7d8f
A
1746 correctedPOSIXLocale = NULL;
1747 }
1748
1749 if (correctedPOSIXLocale != NULL) { /* Was already set - clean up. */
729e4ab9 1750 uprv_free(correctedPOSIXLocale);
b75a7d8f
A
1751 }
1752
1753 return posixID;
1754
4388f060 1755#elif U_PLATFORM_USES_ONLY_WIN32_API
57a6839d 1756#define POSIX_LOCALE_CAPACITY 64
b75a7d8f 1757 UErrorCode status = U_ZERO_ERROR;
57a6839d
A
1758 char *correctedPOSIXLocale = 0;
1759
f3c0d7a5 1760 // If we have already figured this out just use the cached value
57a6839d
A
1761 if (gCorrectedPOSIXLocale != NULL) {
1762 return gCorrectedPOSIXLocale;
1763 }
1764
f3c0d7a5
A
1765 // No cached value, need to determine the current value
1766 static WCHAR windowsLocale[LOCALE_NAME_MAX_LENGTH];
1767#if U_PLATFORM_HAS_WINUWP_API == 0
1768 // If not a Universal Windows App, we'll need user default language.
1769 // Vista and above should use Locale Names instead of LCIDs
1770 int length = GetUserDefaultLocaleName(windowsLocale, UPRV_LENGTHOF(windowsLocale));
1771#else
1772 // In a UWP app, we want the top language that the application and user agreed upon
1773 ComPtr<ABI::Windows::Foundation::Collections::IVectorView<HSTRING>> languageList;
1774
1775 ComPtr<ABI::Windows::Globalization::IApplicationLanguagesStatics> applicationLanguagesStatics;
1776 HRESULT hr = GetActivationFactory(
1777 HStringReference(RuntimeClass_Windows_Globalization_ApplicationLanguages).Get(),
1778 &applicationLanguagesStatics);
1779 if (SUCCEEDED(hr))
1780 {
1781 hr = applicationLanguagesStatics->get_Languages(&languageList);
1782 }
1783
1784 if (FAILED(hr))
1785 {
1786 // If there is no application context, then use the top language from the user language profile
1787 ComPtr<ABI::Windows::System::UserProfile::IGlobalizationPreferencesStatics> globalizationPreferencesStatics;
1788 hr = GetActivationFactory(
1789 HStringReference(RuntimeClass_Windows_System_UserProfile_GlobalizationPreferences).Get(),
1790 &globalizationPreferencesStatics);
1791 if (SUCCEEDED(hr))
1792 {
1793 hr = globalizationPreferencesStatics->get_Languages(&languageList);
57a6839d
A
1794 }
1795 }
b75a7d8f 1796
f3c0d7a5
A
1797 // We have a list of languages, ICU knows one, so use the top one for our locale
1798 HString topLanguage;
1799 if (SUCCEEDED(hr))
1800 {
1801 hr = languageList->GetAt(0, topLanguage.GetAddressOf());
1802 }
1803
1804 if (FAILED(hr))
1805 {
1806 // Unexpected, use en-US by default
1807 if (gCorrectedPOSIXLocale == NULL) {
1808 gCorrectedPOSIXLocale = "en_US";
1809 }
1810
1811 return gCorrectedPOSIXLocale;
1812 }
1813
1814 // ResolveLocaleName will get a likely subtags form consistent with Windows behavior.
1815 int length = ResolveLocaleName(topLanguage.GetRawBuffer(NULL), windowsLocale, UPRV_LENGTHOF(windowsLocale));
1816#endif
1817 // Now we should have a Windows locale name that needs converted to the POSIX style,
1818 if (length > 0)
1819 {
1820 // First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.)
1821 char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH];
1822
1823 int32_t i;
1824 for (i = 0; i < UPRV_LENGTHOF(modifiedWindowsLocale); i++)
1825 {
1826 if (windowsLocale[i] == '_')
1827 {
1828 modifiedWindowsLocale[i] = '-';
1829 }
1830 else
1831 {
1832 modifiedWindowsLocale[i] = static_cast<char>(windowsLocale[i]);
1833 }
1834
1835 if (modifiedWindowsLocale[i] == '\0')
1836 {
1837 break;
1838 }
1839 }
1840
1841 if (i >= UPRV_LENGTHOF(modifiedWindowsLocale))
1842 {
1843 // Ran out of room, can't really happen, maybe we'll be lucky about a matching
1844 // locale when tags are dropped
1845 modifiedWindowsLocale[UPRV_LENGTHOF(modifiedWindowsLocale) - 1] = '\0';
1846 }
1847
1848 // Now normalize the resulting name
1849 if (correctedPOSIXLocale)
1850 {
1851 int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status);
1852 if (U_SUCCESS(status))
1853 {
1854 *(correctedPOSIXLocale + posixLen) = 0;
1855 gCorrectedPOSIXLocale = correctedPOSIXLocale;
1856 gCorrectedPOSIXLocaleHeapAllocated = true;
1857 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1858 }
1859 else
1860 {
1861 uprv_free(correctedPOSIXLocale);
1862 }
1863 }
1864 }
1865
1866 // If unable to find a locale we can agree upon, use en-US by default
57a6839d 1867 if (gCorrectedPOSIXLocale == NULL) {
f3c0d7a5 1868 gCorrectedPOSIXLocale = "en_US";
b75a7d8f 1869 }
57a6839d 1870 return gCorrectedPOSIXLocale;
b75a7d8f 1871
4388f060 1872#elif U_PLATFORM == U_PF_OS400
b75a7d8f
A
1873 /* locales are process scoped and are by definition thread safe */
1874 static char correctedLocale[64];
1875 const char *localeID = getenv("LC_ALL");
1876 char *p;
1877
1878 if (localeID == NULL)
1879 localeID = getenv("LANG");
1880 if (localeID == NULL)
1881 localeID = setlocale(LC_ALL, NULL);
1882 /* Make sure we have something... */
1883 if (localeID == NULL)
1884 return "en_US_POSIX";
1885
1886 /* Extract the locale name from the path. */
1887 if((p = uprv_strrchr(localeID, '/')) != NULL)
1888 {
1889 /* Increment p to start of locale name. */
1890 p++;
1891 localeID = p;
1892 }
1893
1894 /* Copy to work location. */
1895 uprv_strcpy(correctedLocale, localeID);
1896
1897 /* Strip off the '.locale' extension. */
1898 if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1899 *p = 0;
1900 }
1901
1902 /* Upper case the locale name. */
1903 T_CString_toUpperCase(correctedLocale);
1904
1905 /* See if we are using the POSIX locale. Any of the
1906 * following are equivalent and use the same QLGPGCMA
1907 * (POSIX) locale.
73c04bcf
A
1908 * QLGPGCMA2 means UCS2
1909 * QLGPGCMA_4 means UTF-32
1910 * QLGPGCMA_8 means UTF-8
b75a7d8f
A
1911 */
1912 if ((uprv_strcmp("C", correctedLocale) == 0) ||
1913 (uprv_strcmp("POSIX", correctedLocale) == 0) ||
73c04bcf 1914 (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
b75a7d8f
A
1915 {
1916 uprv_strcpy(correctedLocale, "en_US_POSIX");
1917 }
1918 else
1919 {
1920 int16_t LocaleLen;
1921
1922 /* Lower case the lang portion. */
1923 for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1924 {
1925 *p = uprv_tolower(*p);
1926 }
1927
1928 /* Adjust for Euro. After '_E' add 'URO'. */
1929 LocaleLen = uprv_strlen(correctedLocale);
1930 if (correctedLocale[LocaleLen - 2] == '_' &&
1931 correctedLocale[LocaleLen - 1] == 'E')
1932 {
1933 uprv_strcat(correctedLocale, "URO");
1934 }
1935
1936 /* If using Lotus-based locale then convert to
1937 * equivalent non Lotus.
1938 */
1939 else if (correctedLocale[LocaleLen - 2] == '_' &&
1940 correctedLocale[LocaleLen - 1] == 'L')
1941 {
1942 correctedLocale[LocaleLen - 2] = 0;
1943 }
1944
1945 /* There are separate simplified and traditional
1946 * locales called zh_HK_S and zh_HK_T.
1947 */
1948 else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1949 {
1950 uprv_strcpy(correctedLocale, "zh_HK");
1951 }
1952
1953 /* A special zh_CN_GBK locale...
1954 */
1955 else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1956 {
1957 uprv_strcpy(correctedLocale, "zh_CN");
1958 }
1959
1960 }
1961
1962 return correctedLocale;
1963#endif
1964
1965}
1966
729e4ab9 1967#if !U_CHARSET_IS_UTF8
73c04bcf
A
1968#if U_POSIX_LOCALE
1969/*
1970Due to various platform differences, one platform may specify a charset,
1971when they really mean a different charset. Remap the names so that they are
46f4442e
A
1972compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1973here. Before adding anything to this function, please consider adding unique
1974names to the ICU alias table in the data directory.
73c04bcf
A
1975*/
1976static const char*
1977remapPlatformDependentCodepage(const char *locale, const char *name) {
1978 if (locale != NULL && *locale == 0) {
1979 /* Make sure that an empty locale is handled the same way. */
1980 locale = NULL;
1981 }
1982 if (name == NULL) {
1983 return NULL;
1984 }
4388f060 1985#if U_PLATFORM == U_PF_AIX
73c04bcf
A
1986 if (uprv_strcmp(name, "IBM-943") == 0) {
1987 /* Use the ASCII compatible ibm-943 */
1988 name = "Shift-JIS";
1989 }
1990 else if (uprv_strcmp(name, "IBM-1252") == 0) {
1991 /* Use the windows-1252 that contains the Euro */
1992 name = "IBM-5348";
1993 }
4388f060 1994#elif U_PLATFORM == U_PF_SOLARIS
73c04bcf
A
1995 if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1996 /* Solaris underspecifies the "EUC" name. */
1997 if (uprv_strcmp(locale, "zh_CN") == 0) {
1998 name = "EUC-CN";
1999 }
2000 else if (uprv_strcmp(locale, "zh_TW") == 0) {
2001 name = "EUC-TW";
2002 }
2003 else if (uprv_strcmp(locale, "ko_KR") == 0) {
2004 name = "EUC-KR";
2005 }
2006 }
46f4442e
A
2007 else if (uprv_strcmp(name, "eucJP") == 0) {
2008 /*
2009 ibm-954 is the best match.
2010 ibm-33722 is the default for eucJP (similar to Windows).
2011 */
2012 name = "eucjis";
2013 }
2014 else if (uprv_strcmp(name, "646") == 0) {
2015 /*
729e4ab9 2016 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
46f4442e
A
2017 * ISO-8859-1 instead of US-ASCII(646).
2018 */
2019 name = "ISO-8859-1";
2020 }
4388f060 2021#elif U_PLATFORM_IS_DARWIN_BASED
73c04bcf
A
2022 if (locale == NULL && *name == 0) {
2023 /*
2024 No locale was specified, and an empty name was passed in.
2025 This usually indicates that nl_langinfo didn't return valid information.
2026 Mac OS X uses UTF-8 by default (especially the locale data and console).
2027 */
2028 name = "UTF-8";
2029 }
729e4ab9
A
2030 else if (uprv_strcmp(name, "CP949") == 0) {
2031 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
2032 name = "EUC-KR";
2033 }
2034 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
2035 /*
2036 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
2037 */
2038 name = "UTF-8";
2039 }
4388f060 2040#elif U_PLATFORM == U_PF_BSD
729e4ab9
A
2041 if (uprv_strcmp(name, "CP949") == 0) {
2042 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
2043 name = "EUC-KR";
2044 }
4388f060 2045#elif U_PLATFORM == U_PF_HPUX
46f4442e
A
2046 if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
2047 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
2048 /* zh_TW.big5 is not the same charset as zh_HK.big5! */
2049 name = "hkbig5";
2050 }
2051 else if (uprv_strcmp(name, "eucJP") == 0) {
2052 /*
2053 ibm-1350 is the best match, but unavailable.
2054 ibm-954 is mostly a superset of ibm-1350.
2055 ibm-33722 is the default for eucJP (similar to Windows).
2056 */
2057 name = "eucjis";
2058 }
4388f060 2059#elif U_PLATFORM == U_PF_LINUX
46f4442e
A
2060 if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
2061 /* Linux underspecifies the "EUC" name. */
2062 if (uprv_strcmp(locale, "korean") == 0) {
2063 name = "EUC-KR";
2064 }
2065 else if (uprv_strcmp(locale, "japanese") == 0) {
2066 /* See comment below about eucJP */
2067 name = "eucjis";
2068 }
2069 }
2070 else if (uprv_strcmp(name, "eucjp") == 0) {
2071 /*
2072 ibm-1350 is the best match, but unavailable.
2073 ibm-954 is mostly a superset of ibm-1350.
2074 ibm-33722 is the default for eucJP (similar to Windows).
2075 */
2076 name = "eucjis";
2077 }
729e4ab9
A
2078 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
2079 (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
2080 /*
2081 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
2082 */
2083 name = "UTF-8";
2084 }
2085 /*
2086 * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
2087 * it by falling back to 'US-ASCII' when NULL is returned from this
2088 * function. So, we don't have to worry about it here.
2089 */
73c04bcf
A
2090#endif
2091 /* return NULL when "" is passed in */
2092 if (*name == 0) {
2093 name = NULL;
2094 }
2095 return name;
2096}
2097
729e4ab9 2098static const char*
73c04bcf
A
2099getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
2100{
2101 char localeBuf[100];
2102 const char *name = NULL;
2103 char *variant = NULL;
2104
2105 if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
2106 size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
2107 uprv_strncpy(localeBuf, localeName, localeCapacity);
2108 localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
2109 name = uprv_strncpy(buffer, name+1, buffCapacity);
2110 buffer[buffCapacity-1] = 0; /* ensure NULL termination */
4388f060 2111 if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != NULL) {
73c04bcf
A
2112 *variant = 0;
2113 }
2114 name = remapPlatformDependentCodepage(localeBuf, name);
2115 }
2116 return name;
2117}
2118#endif
374ca955 2119
729e4ab9 2120static const char*
374ca955 2121int_getDefaultCodepage()
b75a7d8f 2122{
4388f060 2123#if U_PLATFORM == U_PF_OS400
b75a7d8f
A
2124 uint32_t ccsid = 37; /* Default to ibm-37 */
2125 static char codepage[64];
2126 Qwc_JOBI0400_t jobinfo;
2127 Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
2128
2129 EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
2130 "* ", " ", &error);
2131
2132 if (error.Bytes_Available == 0) {
2133 if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
2134 ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
2135 }
2136 else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
2137 ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
2138 }
2139 /* else use the default */
2140 }
2141 sprintf(codepage,"ibm-%d", ccsid);
2142 return codepage;
2143
4388f060 2144#elif U_PLATFORM == U_PF_OS390
b75a7d8f 2145 static char codepage[64];
729e4ab9
A
2146
2147 strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
2148 strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
46f4442e 2149 codepage[63] = 0; /* NULL terminate */
729e4ab9 2150
b75a7d8f
A
2151 return codepage;
2152
4388f060 2153#elif U_PLATFORM_USES_ONLY_WIN32_API
b75a7d8f 2154 static char codepage[64];
f3c0d7a5
A
2155 DWORD codepageNumber = 0;
2156
2157#if U_PLATFORM_HAS_WINUWP_API > 0
2158 // UWP doesn't have a direct API to get the default ACP as Microsoft would rather
2159 // have folks use Unicode than a "system" code page, however this is the same
2160 // codepage as the system default locale codepage. (FWIW, the system locale is
2161 // ONLY used for codepage, it should never be used for anything else)
2162 GetLocaleInfoEx(LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
2163 (LPWSTR)&codepageNumber, sizeof(codepageNumber) / sizeof(WCHAR));
2164#else
2165 // Win32 apps can call GetACP
2166 codepageNumber = GetACP();
2167#endif
2168 // Special case for UTF-8
2169 if (codepageNumber == 65001)
2170 {
2171 return "UTF-8";
2172 }
2173 // Windows codepages can look like windows-1252, so format the found number
2174 // the numbers are eclectic, however all valid system code pages, besides UTF-8
2175 // are between 3 and 19999
2176 if (codepageNumber > 0 && codepageNumber < 20000)
2177 {
2178 sprintf(codepage, "windows-%ld", codepageNumber);
2179 return codepage;
2180 }
2181 // If the codepage number call failed then return UTF-8
2182 return "UTF-8";
b75a7d8f
A
2183
2184#elif U_POSIX_LOCALE
2185 static char codesetName[100];
b75a7d8f 2186 const char *localeName = NULL;
73c04bcf 2187 const char *name = NULL;
b75a7d8f 2188
729e4ab9 2189 localeName = uprv_getPOSIXIDForDefaultCodepage();
b75a7d8f 2190 uprv_memset(codesetName, 0, sizeof(codesetName));
2ca993e8
A
2191 /* On Solaris nl_langinfo returns C locale values unless setlocale
2192 * was called earlier.
2193 */
2194#if (U_HAVE_NL_LANGINFO_CODESET && U_PLATFORM != U_PF_SOLARIS)
729e4ab9
A
2195 /* When available, check nl_langinfo first because it usually gives more
2196 useful names. It depends on LC_CTYPE.
73c04bcf 2197 nl_langinfo may use the same buffer as setlocale. */
b75a7d8f
A
2198 {
2199 const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
4388f060 2200#if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED
729e4ab9
A
2201 /*
2202 * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
2203 * instead of ASCII.
2204 */
2205 if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
2206 codeset = remapPlatformDependentCodepage(localeName, codeset);
2207 } else
2208#endif
2209 {
2210 codeset = remapPlatformDependentCodepage(NULL, codeset);
2211 }
2212
b75a7d8f
A
2213 if (codeset != NULL) {
2214 uprv_strncpy(codesetName, codeset, sizeof(codesetName));
2215 codesetName[sizeof(codesetName)-1] = 0;
374ca955 2216 return codesetName;
b75a7d8f
A
2217 }
2218 }
2219#endif
374ca955 2220
729e4ab9
A
2221 /* Use setlocale in a nice way, and then check some environment variables.
2222 Maybe the application used setlocale already.
2223 */
2224 uprv_memset(codesetName, 0, sizeof(codesetName));
2225 name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
2226 if (name) {
2227 /* if we can find the codeset name from setlocale, return that. */
2228 return name;
2229 }
2230
374ca955
A
2231 if (*codesetName == 0)
2232 {
73c04bcf 2233 /* Everything failed. Return US ASCII (ISO 646). */
46f4442e 2234 (void)uprv_strcpy(codesetName, "US-ASCII");
374ca955 2235 }
b75a7d8f
A
2236 return codesetName;
2237#else
2238 return "US-ASCII";
2239#endif
2240}
2241
b75a7d8f 2242
374ca955
A
2243U_CAPI const char* U_EXPORT2
2244uprv_getDefaultCodepage()
2245{
2246 static char const *name = NULL;
2247 umtx_lock(NULL);
2248 if (name == NULL) {
2249 name = int_getDefaultCodepage();
b75a7d8f 2250 }
374ca955
A
2251 umtx_unlock(NULL);
2252 return name;
b75a7d8f 2253}
729e4ab9 2254#endif /* !U_CHARSET_IS_UTF8 */
b75a7d8f 2255
b75a7d8f 2256
374ca955
A
2257/* end of platform-specific implementation -------------- */
2258
2259/* version handling --------------------------------------------------------- */
b75a7d8f
A
2260
2261U_CAPI void U_EXPORT2
2262u_versionFromString(UVersionInfo versionArray, const char *versionString) {
2263 char *end;
2264 uint16_t part=0;
2265
2266 if(versionArray==NULL) {
2267 return;
2268 }
2269
2270 if(versionString!=NULL) {
2271 for(;;) {
2272 versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
2273 if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
2274 break;
2275 }
2276 versionString=end+1;
2277 }
2278 }
2279
2280 while(part<U_MAX_VERSION_LENGTH) {
2281 versionArray[part++]=0;
2282 }
2283}
2284
729e4ab9
A
2285U_CAPI void U_EXPORT2
2286u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
2287 if(versionArray!=NULL && versionString!=NULL) {
2288 char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
2289 int32_t len = u_strlen(versionString);
2290 if(len>U_MAX_VERSION_STRING_LENGTH) {
2291 len = U_MAX_VERSION_STRING_LENGTH;
2292 }
2293 u_UCharsToChars(versionString, versionChars, len);
2294 versionChars[len]=0;
2295 u_versionFromString(versionArray, versionChars);
2296 }
2297}
2298
b75a7d8f 2299U_CAPI void U_EXPORT2
4388f060 2300u_versionToString(const UVersionInfo versionArray, char *versionString) {
b75a7d8f
A
2301 uint16_t count, part;
2302 uint8_t field;
2303
2304 if(versionString==NULL) {
2305 return;
2306 }
2307
2308 if(versionArray==NULL) {
2309 versionString[0]=0;
2310 return;
2311 }
2312
2313 /* count how many fields need to be written */
2314 for(count=4; count>0 && versionArray[count-1]==0; --count) {
2315 }
2316
2317 if(count <= 1) {
2318 count = 2;
2319 }
2320
2321 /* write the first part */
2322 /* write the decimal field value */
2323 field=versionArray[0];
2324 if(field>=100) {
2325 *versionString++=(char)('0'+field/100);
2326 field%=100;
2327 }
2328 if(field>=10) {
2329 *versionString++=(char)('0'+field/10);
2330 field%=10;
2331 }
2332 *versionString++=(char)('0'+field);
2333
2334 /* write the following parts */
2335 for(part=1; part<count; ++part) {
2336 /* write a dot first */
2337 *versionString++=U_VERSION_DELIMITER;
2338
2339 /* write the decimal field value */
2340 field=versionArray[part];
2341 if(field>=100) {
2342 *versionString++=(char)('0'+field/100);
2343 field%=100;
2344 }
2345 if(field>=10) {
2346 *versionString++=(char)('0'+field/10);
2347 field%=10;
2348 }
2349 *versionString++=(char)('0'+field);
2350 }
2351
2352 /* NUL-terminate */
2353 *versionString=0;
2354}
2355
2356U_CAPI void U_EXPORT2
2357u_getVersion(UVersionInfo versionArray) {
57a6839d 2358 (void)copyright; // Suppress unused variable warning from clang.
b75a7d8f
A
2359 u_versionFromString(versionArray, U_ICU_VERSION);
2360}
2361
729e4ab9
A
2362/**
2363 * icucfg.h dependent code
2364 */
2365
2366#if U_ENABLE_DYLOAD
2367
51004dcb 2368#if HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
729e4ab9 2369
4388f060 2370#if HAVE_DLFCN_H
729e4ab9 2371
729e4ab9
A
2372#ifdef __MVS__
2373#ifndef __SUSV3
2374#define __SUSV3 1
2375#endif
2376#endif
2377#include <dlfcn.h>
2378#endif
2379
2380U_INTERNAL void * U_EXPORT2
2381uprv_dl_open(const char *libName, UErrorCode *status) {
2382 void *ret = NULL;
2383 if(U_FAILURE(*status)) return ret;
2384 ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
2385 if(ret==NULL) {
4388f060
A
2386#ifdef U_TRACE_DYLOAD
2387 printf("dlerror on dlopen(%s): %s\n", libName, dlerror());
729e4ab9
A
2388#endif
2389 *status = U_MISSING_RESOURCE_ERROR;
2390 }
2391 return ret;
2392}
2393
2394U_INTERNAL void U_EXPORT2
2395uprv_dl_close(void *lib, UErrorCode *status) {
2396 if(U_FAILURE(*status)) return;
2397 dlclose(lib);
2398}
2399
4388f060
A
2400U_INTERNAL UVoidFunction* U_EXPORT2
2401uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2402 union {
2403 UVoidFunction *fp;
2404 void *vp;
2405 } uret;
2406 uret.fp = NULL;
2407 if(U_FAILURE(*status)) return uret.fp;
2408 uret.vp = dlsym(lib, sym);
2409 if(uret.vp == NULL) {
2410#ifdef U_TRACE_DYLOAD
2411 printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror());
2412#endif
729e4ab9
A
2413 *status = U_MISSING_RESOURCE_ERROR;
2414 }
4388f060 2415 return uret.fp;
729e4ab9
A
2416}
2417
2418#else
2419
2420/* null (nonexistent) implementation. */
2421
2422U_INTERNAL void * U_EXPORT2
2423uprv_dl_open(const char *libName, UErrorCode *status) {
2424 if(U_FAILURE(*status)) return NULL;
2425 *status = U_UNSUPPORTED_ERROR;
2426 return NULL;
2427}
2428
2429U_INTERNAL void U_EXPORT2
2430uprv_dl_close(void *lib, UErrorCode *status) {
2431 if(U_FAILURE(*status)) return;
2432 *status = U_UNSUPPORTED_ERROR;
2433 return;
2434}
2435
2436
4388f060
A
2437U_INTERNAL UVoidFunction* U_EXPORT2
2438uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2439 if(U_SUCCESS(*status)) {
2440 *status = U_UNSUPPORTED_ERROR;
2441 }
2442 return (UVoidFunction*)NULL;
729e4ab9
A
2443}
2444
2445
2446
2447#endif
2448
51004dcb 2449#elif U_PLATFORM_USES_ONLY_WIN32_API
729e4ab9
A
2450
2451U_INTERNAL void * U_EXPORT2
2452uprv_dl_open(const char *libName, UErrorCode *status) {
2453 HMODULE lib = NULL;
2454
2455 if(U_FAILURE(*status)) return NULL;
2456
4388f060 2457 lib = LoadLibraryA(libName);
729e4ab9
A
2458
2459 if(lib==NULL) {
2460 *status = U_MISSING_RESOURCE_ERROR;
2461 }
2462
2463 return (void*)lib;
2464}
2465
2466U_INTERNAL void U_EXPORT2
2467uprv_dl_close(void *lib, UErrorCode *status) {
2468 HMODULE handle = (HMODULE)lib;
2469 if(U_FAILURE(*status)) return;
2470
2471 FreeLibrary(handle);
2472
2473 return;
2474}
2475
2476
4388f060
A
2477U_INTERNAL UVoidFunction* U_EXPORT2
2478uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
729e4ab9 2479 HMODULE handle = (HMODULE)lib;
4388f060 2480 UVoidFunction* addr = NULL;
729e4ab9
A
2481
2482 if(U_FAILURE(*status) || lib==NULL) return NULL;
2483
4388f060 2484 addr = (UVoidFunction*)GetProcAddress(handle, sym);
729e4ab9
A
2485
2486 if(addr==NULL) {
2487 DWORD lastError = GetLastError();
2488 if(lastError == ERROR_PROC_NOT_FOUND) {
2489 *status = U_MISSING_RESOURCE_ERROR;
2490 } else {
2491 *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
2492 }
2493 }
2494
2495 return addr;
2496}
2497
2498
2499#else
2500
2501/* No dynamic loading set. */
2502
2503U_INTERNAL void * U_EXPORT2
2504uprv_dl_open(const char *libName, UErrorCode *status) {
2ca993e8 2505 (void)libName;
729e4ab9
A
2506 if(U_FAILURE(*status)) return NULL;
2507 *status = U_UNSUPPORTED_ERROR;
2508 return NULL;
2509}
2510
2511U_INTERNAL void U_EXPORT2
2512uprv_dl_close(void *lib, UErrorCode *status) {
2ca993e8 2513 (void)lib;
729e4ab9
A
2514 if(U_FAILURE(*status)) return;
2515 *status = U_UNSUPPORTED_ERROR;
2516 return;
2517}
2518
2519
4388f060
A
2520U_INTERNAL UVoidFunction* U_EXPORT2
2521uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2ca993e8
A
2522 (void)lib;
2523 (void)sym;
4388f060 2524 if(U_SUCCESS(*status)) {
729e4ab9 2525 *status = U_UNSUPPORTED_ERROR;
4388f060
A
2526 }
2527 return (UVoidFunction*)NULL;
729e4ab9
A
2528}
2529
729e4ab9
A
2530#endif /* U_ENABLE_DYLOAD */
2531
b75a7d8f
A
2532/*
2533 * Hey, Emacs, please set the following:
2534 *
2535 * Local Variables:
2536 * indent-tabs-mode: nil
2537 * End:
2538 *
2539 */