]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/putil.cpp
ICU-62107.0.1.tar.gz
[apple/icu.git] / icuSources / common / putil.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/*
4******************************************************************************
5*
2ca993e8 6* Copyright (C) 1997-2016, International Business Machines
b75a7d8f
A
7* Corporation and others. All Rights Reserved.
8*
9******************************************************************************
10*
11* FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
12*
13* Date Name Description
14* 04/14/97 aliu Creation.
15* 04/24/97 aliu Added getDefaultDataDirectory() and
16* getDefaultLocaleID().
17* 04/28/97 aliu Rewritten to assume Unix and apply general methods
18* for assumed case. Non-UNIX platforms must be
19* special-cased. Rewrote numeric methods dealing
20* with NaN and Infinity to be platform independent
21* over all IEEE 754 platforms.
22* 05/13/97 aliu Restored sign of timezone
23* (semantics are hours West of GMT)
24* 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
25* nextDouble..
26* 07/22/98 stephen Added remainder, max, min, trunc
27* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
28* 08/24/98 stephen Added longBitsFromDouble
29* 09/08/98 stephen Minor changes for Mac Port
30* 03/02/99 stephen Removed openFile(). Added AS400 support.
31* Fixed EBCDIC tables
32* 04/15/99 stephen Converted to C.
33* 06/28/99 stephen Removed mutex locking in u_isBigEndian().
34* 08/04/99 jeffrey R. Added OS/2 changes
35* 11/15/99 helena Integrated S/390 IEEE support.
36* 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
37* 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
46f4442e 38* 01/03/08 Steven L. Fake Time Support
b75a7d8f
A
39******************************************************************************
40*/
41
4388f060
A
42// Defines _XOPEN_SOURCE for access to POSIX functions.
43// Must be before any other #includes.
44#include "uposixdefs.h"
b75a7d8f 45
f3c0d7a5
A
46// First, the platform type. Need this for U_PLATFORM.
47#include "unicode/platform.h"
48
49#if U_PLATFORM == U_PF_MINGW && defined __STRICT_ANSI__
50/* tzset isn't defined in strict ANSI on MinGW. */
51#undef __STRICT_ANSI__
52#endif
53
54/*
55 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
56 */
57#include <time.h>
58
59#if !U_PLATFORM_USES_ONLY_WIN32_API
60#include <sys/time.h>
61#endif
62
63/* include the rest of the ICU headers */
b75a7d8f 64#include "unicode/putil.h"
374ca955
A
65#include "unicode/ustring.h"
66#include "putilimp.h"
67#include "uassert.h"
b75a7d8f
A
68#include "umutex.h"
69#include "cmemory.h"
70#include "cstring.h"
71#include "locmap.h"
72#include "ucln_cmn.h"
b331163b 73#include "charstr.h"
73c04bcf
A
74
75/* Include standard headers. */
76#include <stdio.h>
77#include <stdlib.h>
78#include <string.h>
79#include <math.h>
80#include <locale.h>
81#include <float.h>
4388f060
A
82
83#ifndef U_COMMON_IMPLEMENTATION
84#error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu
85#endif
86
b75a7d8f
A
87
88/* include system headers */
4388f060
A
89#if U_PLATFORM_USES_ONLY_WIN32_API
90 /*
91 * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW.
92 * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API)
93 * to use native APIs as much as possible?
94 */
f3c0d7a5 95#ifndef WIN32_LEAN_AND_MEAN
b75a7d8f 96# define WIN32_LEAN_AND_MEAN
f3c0d7a5 97#endif
374ca955 98# define VC_EXTRALEAN
b75a7d8f
A
99# define NOUSER
100# define NOSERVICE
101# define NOIME
102# define NOMCX
103# include <windows.h>
0f5d89e8 104# include "unicode/uloc.h"
f3c0d7a5 105#if U_PLATFORM_HAS_WINUWP_API == 0
73c04bcf 106# include "wintz.h"
f3c0d7a5
A
107#else // U_PLATFORM_HAS_WINUWP_API
108typedef PVOID LPMSG; // TODO: figure out how to get rid of this typedef
109#include <Windows.Globalization.h>
110#include <windows.system.userprofile.h>
0f5d89e8
A
111#include <wrl/wrappers/corewrappers.h>
112#include <wrl/client.h>
f3c0d7a5
A
113
114using namespace ABI::Windows::Foundation;
115using namespace Microsoft::WRL;
116using namespace Microsoft::WRL::Wrappers;
117#endif
4388f060 118#elif U_PLATFORM == U_PF_OS400
b75a7d8f
A
119# include <float.h>
120# include <qusec.h> /* error code structure */
121# include <qusrjobi.h>
122# include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
46f4442e 123# include <mih/testptr.h> /* For uprv_maximumPtr */
4388f060
A
124#elif U_PLATFORM == U_PF_OS390
125# include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
51004dcb 126#elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS
4388f060
A
127# include <limits.h>
128# include <unistd.h>
51004dcb
A
129# if U_PLATFORM == U_PF_SOLARIS
130# ifndef _XPG4_2
131# define _XPG4_2
132# endif
133# endif
4388f060
A
134#elif U_PLATFORM == U_PF_QNX
135# include <sys/neutrino.h>
374ca955
A
136#endif
137
b75a7d8f 138/*
374ca955
A
139 * Only include langinfo.h if we have a way to get the codeset. If we later
140 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
141 *
142 */
143
144#if U_HAVE_NL_LANGINFO_CODESET
145#include <langinfo.h>
b75a7d8f
A
146#endif
147
729e4ab9
A
148/**
149 * Simple things (presence of functions, etc) should just go in configure.in and be added to
150 * icucfg.h via autoheader.
151 */
4388f060
A
152#if U_PLATFORM_IMPLEMENTS_POSIX
153# if U_PLATFORM == U_PF_OS400
154# define HAVE_DLFCN_H 0
155# define HAVE_DLOPEN 0
156# else
157# ifndef HAVE_DLFCN_H
158# define HAVE_DLFCN_H 1
159# endif
160# ifndef HAVE_DLOPEN
161# define HAVE_DLOPEN 1
162# endif
163# endif
164# ifndef HAVE_GETTIMEOFDAY
165# define HAVE_GETTIMEOFDAY 1
166# endif
167#else
168# define HAVE_DLFCN_H 0
169# define HAVE_DLOPEN 0
170# define HAVE_GETTIMEOFDAY 0
729e4ab9
A
171#endif
172
b331163b 173U_NAMESPACE_USE
4388f060 174
b75a7d8f
A
175/* Define the extension for data files, again... */
176#define DATA_TYPE "dat"
177
178/* Leave this copyright notice here! */
179static const char copyright[] = U_COPYRIGHT_STRING;
180
181/* floating point implementations ------------------------------------------- */
182
183/* We return QNAN rather than SNAN*/
184#define SIGN 0x80000000U
b75a7d8f 185
73c04bcf
A
186/* Make it easy to define certain types of constants */
187typedef union {
188 int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
189 double d64;
190} BitPatternConversion;
191static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
192static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
b75a7d8f
A
193
194/*---------------------------------------------------------------------------
195 Platform utilities
196 Our general strategy is to assume we're on a POSIX platform. Platforms which
197 are non-POSIX must declare themselves so. The default POSIX implementation
198 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
199 functions).
200 ---------------------------------------------------------------------------*/
201
b331163b 202#if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400
b75a7d8f
A
203# undef U_POSIX_LOCALE
204#else
205# define U_POSIX_LOCALE 1
206#endif
207
73c04bcf
A
208/*
209 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
210 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
211*/
212#if !IEEE_754
b75a7d8f
A
213static char*
214u_topNBytesOfDouble(double* d, int n)
215{
216#if U_IS_BIG_ENDIAN
217 return (char*)d;
218#else
219 return (char*)(d + 1) - n;
220#endif
221}
222
223static char*
224u_bottomNBytesOfDouble(double* d, int n)
225{
226#if U_IS_BIG_ENDIAN
227 return (char*)(d + 1) - n;
228#else
229 return (char*)d;
230#endif
231}
729e4ab9
A
232#endif /* !IEEE_754 */
233
234#if IEEE_754
235static UBool
236u_signBit(double d) {
237 uint8_t hiByte;
238#if U_IS_BIG_ENDIAN
239 hiByte = *(uint8_t *)&d;
240#else
241 hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
242#endif
243 return (hiByte & 0x80) != 0;
244}
245#endif
246
247
b75a7d8f 248
46f4442e 249#if defined (U_DEBUG_FAKETIME)
729e4ab9 250/* Override the clock to test things without having to move the system clock.
46f4442e
A
251 * Assumes POSIX gettimeofday() will function
252 */
253UDate fakeClock_t0 = 0; /** Time to start the clock from **/
254UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
255UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
51004dcb 256static UMutex fakeClockMutex = U_MUTEX_INTIALIZER;
46f4442e
A
257
258static UDate getUTCtime_real() {
259 struct timeval posixTime;
260 gettimeofday(&posixTime, NULL);
261 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
262}
263
264static UDate getUTCtime_fake() {
265 umtx_lock(&fakeClockMutex);
266 if(!fakeClock_set) {
267 UDate real = getUTCtime_real();
268 const char *fake_start = getenv("U_FAKETIME_START");
729e4ab9 269 if((fake_start!=NULL) && (fake_start[0]!=0)) {
46f4442e 270 sscanf(fake_start,"%lf",&fakeClock_t0);
729e4ab9
A
271 fakeClock_dt = fakeClock_t0 - real;
272 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
273 "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
274 fakeClock_t0, fake_start, fakeClock_dt, real);
275 } else {
276 fakeClock_dt = 0;
277 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
278 "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
46f4442e 279 }
46f4442e
A
280 fakeClock_set = TRUE;
281 }
282 umtx_unlock(&fakeClockMutex);
729e4ab9 283
46f4442e
A
284 return getUTCtime_real() + fakeClock_dt;
285}
286#endif
287
4388f060 288#if U_PLATFORM_USES_ONLY_WIN32_API
73c04bcf
A
289typedef union {
290 int64_t int64;
291 FILETIME fileTime;
292} FileTimeConversion; /* This is like a ULARGE_INTEGER */
293
294/* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
295#define EPOCH_BIAS INT64_C(116444736000000000)
296#define HECTONANOSECOND_PER_MILLISECOND 10000
297
298#endif
299
b75a7d8f
A
300/*---------------------------------------------------------------------------
301 Universal Implementations
73c04bcf
A
302 These are designed to work on all platforms. Try these, and if they
303 don't work on your platform, then special case your platform with new
b75a7d8f 304 implementations.
73c04bcf 305---------------------------------------------------------------------------*/
b75a7d8f 306
374ca955 307U_CAPI UDate U_EXPORT2
b75a7d8f
A
308uprv_getUTCtime()
309{
46f4442e
A
310#if defined(U_DEBUG_FAKETIME)
311 return getUTCtime_fake(); /* Hook for overriding the clock */
729e4ab9
A
312#else
313 return uprv_getRawUTCtime();
314#endif
315}
316
317/* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
318U_CAPI UDate U_EXPORT2
319uprv_getRawUTCtime()
320{
b331163b 321#if U_PLATFORM_USES_ONLY_WIN32_API
73c04bcf
A
322
323 FileTimeConversion winTime;
324 GetSystemTimeAsFileTime(&winTime.fileTime);
325 return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
b75a7d8f 326#else
729e4ab9 327
4388f060 328#if HAVE_GETTIMEOFDAY
73c04bcf
A
329 struct timeval posixTime;
330 gettimeofday(&posixTime, NULL);
331 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
729e4ab9 332#else
b75a7d8f
A
333 time_t epochtime;
334 time(&epochtime);
374ca955 335 return (UDate)epochtime * U_MILLIS_PER_SECOND;
b75a7d8f 336#endif
729e4ab9
A
337
338#endif
b75a7d8f
A
339}
340
341/*-----------------------------------------------------------------------------
342 IEEE 754
343 These methods detect and return NaN and infinity values for doubles
344 conforming to IEEE 754. Platforms which support this standard include X86,
345 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
346 If this doesn't work on your platform, you have non-IEEE floating-point, and
347 will need to code your own versions. A naive implementation is to return 0.0
348 for getNaN and getInfinity, and false for isNaN and isInfinite.
349 ---------------------------------------------------------------------------*/
350
351U_CAPI UBool U_EXPORT2
352uprv_isNaN(double number)
353{
354#if IEEE_754
73c04bcf
A
355 BitPatternConversion convertedNumber;
356 convertedNumber.d64 = number;
b75a7d8f 357 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
73c04bcf 358 return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
b75a7d8f 359
4388f060 360#elif U_PLATFORM == U_PF_OS390
b75a7d8f
A
361 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
362 sizeof(uint32_t));
363 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
364 sizeof(uint32_t));
365
366 return ((highBits & 0x7F080000L) == 0x7F080000L) &&
367 (lowBits == 0x00000000L);
368
369#else
370 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
371 /* you'll need to replace this default implementation with what's correct*/
372 /* for your platform.*/
373 return number != number;
374#endif
375}
376
377U_CAPI UBool U_EXPORT2
378uprv_isInfinite(double number)
379{
380#if IEEE_754
73c04bcf
A
381 BitPatternConversion convertedNumber;
382 convertedNumber.d64 = number;
383 /* Infinity is exactly 0x7FF0000000000000U. */
384 return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
4388f060 385#elif U_PLATFORM == U_PF_OS390
b75a7d8f
A
386 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
387 sizeof(uint32_t));
388 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
389 sizeof(uint32_t));
390
391 return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
392
393#else
394 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
395 /* value, you'll need to replace this default implementation with what's*/
396 /* correct for your platform.*/
397 return number == (2.0 * number);
398#endif
399}
400
401U_CAPI UBool U_EXPORT2
402uprv_isPositiveInfinity(double number)
403{
4388f060 404#if IEEE_754 || U_PLATFORM == U_PF_OS390
b75a7d8f
A
405 return (UBool)(number > 0 && uprv_isInfinite(number));
406#else
407 return uprv_isInfinite(number);
408#endif
409}
410
411U_CAPI UBool U_EXPORT2
412uprv_isNegativeInfinity(double number)
413{
4388f060 414#if IEEE_754 || U_PLATFORM == U_PF_OS390
b75a7d8f
A
415 return (UBool)(number < 0 && uprv_isInfinite(number));
416
417#else
418 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
419 sizeof(uint32_t));
420 return((highBits & SIGN) && uprv_isInfinite(number));
421
422#endif
423}
424
425U_CAPI double U_EXPORT2
426uprv_getNaN()
427{
4388f060 428#if IEEE_754 || U_PLATFORM == U_PF_OS390
73c04bcf 429 return gNan.d64;
b75a7d8f
A
430#else
431 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
432 /* you'll need to replace this default implementation with what's correct*/
433 /* for your platform.*/
434 return 0.0;
435#endif
436}
437
438U_CAPI double U_EXPORT2
439uprv_getInfinity()
440{
4388f060 441#if IEEE_754 || U_PLATFORM == U_PF_OS390
73c04bcf 442 return gInf.d64;
b75a7d8f
A
443#else
444 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
445 /* value, you'll need to replace this default implementation with what's*/
446 /* correct for your platform.*/
447 return 0.0;
448#endif
449}
450
451U_CAPI double U_EXPORT2
452uprv_floor(double x)
453{
454 return floor(x);
455}
456
457U_CAPI double U_EXPORT2
458uprv_ceil(double x)
459{
460 return ceil(x);
461}
462
463U_CAPI double U_EXPORT2
464uprv_round(double x)
465{
466 return uprv_floor(x + 0.5);
467}
468
469U_CAPI double U_EXPORT2
470uprv_fabs(double x)
471{
472 return fabs(x);
473}
474
475U_CAPI double U_EXPORT2
476uprv_modf(double x, double* y)
477{
478 return modf(x, y);
479}
480
481U_CAPI double U_EXPORT2
482uprv_fmod(double x, double y)
483{
484 return fmod(x, y);
485}
486
487U_CAPI double U_EXPORT2
488uprv_pow(double x, double y)
489{
490 /* This is declared as "double pow(double x, double y)" */
491 return pow(x, y);
492}
493
494U_CAPI double U_EXPORT2
495uprv_pow10(int32_t x)
496{
497 return pow(10.0, (double)x);
498}
499
500U_CAPI double U_EXPORT2
501uprv_fmax(double x, double y)
502{
503#if IEEE_754
b75a7d8f
A
504 /* first handle NaN*/
505 if(uprv_isNaN(x) || uprv_isNaN(y))
506 return uprv_getNaN();
507
508 /* check for -0 and 0*/
729e4ab9 509 if(x == 0.0 && y == 0.0 && u_signBit(x))
b75a7d8f
A
510 return y;
511
512#endif
513
729e4ab9 514 /* this should work for all flt point w/o NaN and Inf special cases */
b75a7d8f
A
515 return (x > y ? x : y);
516}
517
b75a7d8f
A
518U_CAPI double U_EXPORT2
519uprv_fmin(double x, double y)
520{
521#if IEEE_754
b75a7d8f
A
522 /* first handle NaN*/
523 if(uprv_isNaN(x) || uprv_isNaN(y))
524 return uprv_getNaN();
525
526 /* check for -0 and 0*/
729e4ab9 527 if(x == 0.0 && y == 0.0 && u_signBit(y))
b75a7d8f
A
528 return y;
529
530#endif
531
532 /* this should work for all flt point w/o NaN and Inf special cases */
533 return (x > y ? y : x);
534}
535
0f5d89e8
A
536U_CAPI UBool U_EXPORT2
537uprv_add32_overflow(int32_t a, int32_t b, int32_t* res) {
538 // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_add_overflow.
539 // This function could be optimized by calling one of those primitives.
540 auto a64 = static_cast<int64_t>(a);
541 auto b64 = static_cast<int64_t>(b);
542 int64_t res64 = a64 + b64;
543 *res = static_cast<int32_t>(res64);
544 return res64 != *res;
545}
546
547U_CAPI UBool U_EXPORT2
548uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res) {
549 // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_mul_overflow.
550 // This function could be optimized by calling one of those primitives.
551 auto a64 = static_cast<int64_t>(a);
552 auto b64 = static_cast<int64_t>(b);
553 int64_t res64 = a64 * b64;
554 *res = static_cast<int32_t>(res64);
555 return res64 != *res;
556}
557
b75a7d8f
A
558/**
559 * Truncates the given double.
560 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
561 * This is different than calling floor() or ceil():
562 * floor(3.3) = 3, floor(-3.3) = -4
563 * ceil(3.3) = 4, ceil(-3.3) = -3
564 */
565U_CAPI double U_EXPORT2
566uprv_trunc(double d)
567{
568#if IEEE_754
b75a7d8f
A
569 /* handle error cases*/
570 if(uprv_isNaN(d))
571 return uprv_getNaN();
572 if(uprv_isInfinite(d))
573 return uprv_getInfinity();
574
729e4ab9 575 if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */
b75a7d8f
A
576 return ceil(d);
577 else
578 return floor(d);
579
580#else
581 return d >= 0 ? floor(d) : ceil(d);
582
583#endif
584}
585
586/**
587 * Return the largest positive number that can be represented by an integer
588 * type of arbitrary bit length.
589 */
590U_CAPI double U_EXPORT2
591uprv_maxMantissa(void)
592{
593 return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
594}
595
b75a7d8f
A
596U_CAPI double U_EXPORT2
597uprv_log(double d)
598{
599 return log(d);
600}
601
46f4442e
A
602U_CAPI void * U_EXPORT2
603uprv_maximumPtr(void * base)
b75a7d8f 604{
4388f060 605#if U_PLATFORM == U_PF_OS400
46f4442e 606 /*
729e4ab9 607 * With the provided function we should never be out of range of a given segment
46f4442e
A
608 * (a traditional/typical segment that is). Our segments have 5 bytes for the
609 * id and 3 bytes for the offset. The key is that the casting takes care of
610 * only retrieving the offset portion minus x1000. Hence, the smallest offset
611 * seen in a program is x001000 and when casted to an int would be 0.
612 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment.
613 *
729e4ab9 614 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
46f4442e 615 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information).
729e4ab9
A
616 * This function determines the activation based on the pointer that is passed in and
617 * calculates the appropriate maximum available size for
46f4442e
A
618 * each pointer type (TERASPACE and non-TERASPACE)
619 *
620 * Unlike other operating systems, the pointer model isn't determined at
621 * compile time on i5/OS.
622 */
623 if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
624 /* if it is a TERASPACE pointer the max is 2GB - 4k */
625 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
b75a7d8f 626 }
46f4442e
A
627 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
628 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
b75a7d8f 629
46f4442e 630#else
729e4ab9 631 return U_MAX_PTR(base);
374ca955 632#endif
46f4442e 633}
b75a7d8f
A
634
635/*---------------------------------------------------------------------------
636 Platform-specific Implementations
637 Try these, and if they don't work on your platform, then special case your
638 platform with new implementations.
639 ---------------------------------------------------------------------------*/
640
b75a7d8f
A
641/* Generic time zone layer -------------------------------------------------- */
642
643/* Time zone utilities */
644U_CAPI void U_EXPORT2
645uprv_tzset()
646{
4388f060 647#if defined(U_TZSET)
b75a7d8f
A
648 U_TZSET();
649#else
650 /* no initialization*/
651#endif
652}
653
654U_CAPI int32_t U_EXPORT2
655uprv_timezone()
656{
374ca955 657#ifdef U_TIMEZONE
b75a7d8f
A
658 return U_TIMEZONE;
659#else
660 time_t t, t1, t2;
661 struct tm tmrec;
b75a7d8f
A
662 int32_t tdiff = 0;
663
664 time(&t);
374ca955 665 uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
57a6839d
A
666#if U_PLATFORM != U_PF_IPHONE
667 UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
668#endif
b75a7d8f 669 t1 = mktime(&tmrec); /* local time in seconds*/
374ca955 670 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
b75a7d8f
A
671 t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/
672 tdiff = t2 - t1;
57a6839d
A
673
674#if U_PLATFORM != U_PF_IPHONE
b75a7d8f 675 /* imitate NT behaviour, which returns same timezone offset to GMT for
51004dcb
A
676 winter and summer.
677 This does not work on all platforms. For instance, on glibc on Linux
678 and on Mac OS 10.5, tdiff calculated above remains the same
57a6839d
A
679 regardless of whether DST is in effect or not. iOS is another
680 platform where this does not work. Linux + glibc and Mac OS 10.5
681 have U_TIMEZONE defined so that this code is not reached.
682 */
b75a7d8f
A
683 if (dst_checked)
684 tdiff += 3600;
57a6839d 685#endif
b75a7d8f
A
686 return tdiff;
687#endif
688}
689
374ca955 690/* Note that U_TZNAME does *not* have to be tzname, but if it is,
729e4ab9 691 some platforms need to have it declared here. */
b75a7d8f 692
f3c0d7a5 693#if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED)
374ca955
A
694/* RS6000 and others reject char **tzname. */
695extern U_IMPORT char *U_TZNAME[];
696#endif
697
57a6839d 698#if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
73c04bcf 699/* These platforms are likely to use Olson timezone IDs. */
0f5d89e8
A
700/* common targets of the symbolic link at TZDEFAULT are:
701 * "/usr/share/zoneinfo/<olsonID>" default, older Linux distros, macOS to 10.12
702 * "../usr/share/zoneinfo/<olsonID>" newer Linux distros: Red Hat Enterprise Linux 7, Ubuntu 16, SuSe Linux 12
703 * "/usr/share/lib/zoneinfo/<olsonID>" Solaris
704 * "../usr/share/lib/zoneinfo/<olsonID>" Solaris
705 * "/var/db/timezone/zoneinfo/<olsonID>" macOS 10.13
706 * To avoid checking lots of paths, just check that the target path
707 * before the <olsonID> ends with "/zoneinfo/", and the <olsonID> is valid.
708 */
709
73c04bcf 710#define CHECK_LOCALTIME_LINK 1
4388f060 711#if U_PLATFORM_IS_DARWIN_BASED
73c04bcf
A
712#include <tzfile.h>
713#define TZZONEINFO (TZDIR "/")
51004dcb
A
714#elif U_PLATFORM == U_PF_SOLARIS
715#define TZDEFAULT "/etc/localtime"
716#define TZZONEINFO "/usr/share/lib/zoneinfo/"
717#define TZ_ENV_CHECK "localtime"
46f4442e
A
718#else
719#define TZDEFAULT "/etc/localtime"
720#define TZZONEINFO "/usr/share/zoneinfo/"
721#endif
0f5d89e8 722#define TZZONEINFOTAIL "/zoneinfo/"
729e4ab9
A
723#if U_HAVE_DIRENT_H
724#define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */
725/* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
726 symlinked to /etc/localtime, which makes searchForTZFile return
727 'localtime' when it's the first match. */
728#define TZFILE_SKIP2 "localtime"
729#define SEARCH_TZFILE
730#include <dirent.h> /* Needed to search through system timezone files */
731#endif
73c04bcf
A
732static char gTimeZoneBuffer[PATH_MAX];
733static char *gTimeZoneBufferPtr = NULL;
734#endif
735
4388f060 736#if !U_PLATFORM_USES_ONLY_WIN32_API
73c04bcf
A
737#define isNonDigit(ch) (ch < '0' || '9' < ch)
738static UBool isValidOlsonID(const char *id) {
739 int32_t idx = 0;
740
741 /* Determine if this is something like Iceland (Olson ID)
742 or AST4ADT (non-Olson ID) */
743 while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
744 idx++;
745 }
746
747 /* If we went through the whole string, then it might be okay.
748 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
749 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
750 The rest of the time it could be an Olson ID. George */
751 return (UBool)(id[idx] == 0
752 || uprv_strcmp(id, "PST8PDT") == 0
753 || uprv_strcmp(id, "MST7MDT") == 0
754 || uprv_strcmp(id, "CST6CDT") == 0
755 || uprv_strcmp(id, "EST5EDT") == 0);
756}
729e4ab9
A
757
758/* On some Unix-like OS, 'posix' subdirectory in
759 /usr/share/zoneinfo replicates the top-level contents. 'right'
760 subdirectory has the same set of files, but individual files
761 are different from those in the top-level directory or 'posix'
762 because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
763 has files for UTC.
764 When the first match for /etc/localtime is in either of them
765 (usually in posix because 'right' has different file contents),
766 or TZ environment variable points to one of them, createTimeZone
767 fails because, say, 'posix/America/New_York' is not an Olson
768 timezone id ('America/New_York' is). So, we have to skip
769 'posix/' and 'right/' at the beginning. */
770static void skipZoneIDPrefix(const char** id) {
771 if (uprv_strncmp(*id, "posix/", 6) == 0
772 || uprv_strncmp(*id, "right/", 6) == 0)
773 {
774 *id += 6;
775 }
776}
b75a7d8f
A
777#endif
778
4388f060 779#if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API
46f4442e
A
780
781#define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
782typedef struct OffsetZoneMapping {
783 int32_t offsetSeconds;
4388f060 784 int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/
46f4442e
A
785 const char *stdID;
786 const char *dstID;
787 const char *olsonID;
788} OffsetZoneMapping;
789
4388f060
A
790enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 };
791
46f4442e
A
792/*
793This list tries to disambiguate a set of abbreviated timezone IDs and offsets
794and maps it to an Olson ID.
795Before adding anything to this list, take a look at
796icu/source/tools/tzcode/tz.alias
797Sometimes no daylight savings (0) is important to define due to aliases.
798This list can be tested with icu/source/test/compat/tzone.pl
799More values could be added to daylightType to increase precision.
800*/
801static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
802 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
803 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
804 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
805 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
806 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
807 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
808 {-36000, 2, "EST", "EST", "Australia/Sydney"},
809 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
810 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
811 {-34200, 2, "CST", "CST", "Australia/South"},
812 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
813 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
814 {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
815 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
816 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
817 {-28800, 2, "WST", "WST", "Australia/West"},
818 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
819 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
820 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
821 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
822 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
823 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
824 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
825 {-14400, 1, "AZT", "AZST", "Asia/Baku"},
826 {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
827 {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
828 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
829 {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
830 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
831 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
832 {-3600, 0, "CET", "WEST", "Africa/Algiers"},
833 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
834 {0, 1, "GMT", "IST", "Europe/Dublin"},
835 {0, 1, "GMT", "BST", "Europe/London"},
836 {0, 0, "WET", "WEST", "Africa/Casablanca"},
837 {0, 0, "WET", "WET", "Africa/El_Aaiun"},
838 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
839 {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
840 {10800, 1, "PMST", "PMDT", "America/Miquelon"},
841 {10800, 2, "UYT", "UYST", "America/Montevideo"},
842 {10800, 1, "WGT", "WGST", "America/Godthab"},
843 {10800, 2, "BRT", "BRST", "Brazil/East"},
844 {12600, 1, "NST", "NDT", "America/St_Johns"},
845 {14400, 1, "AST", "ADT", "Canada/Atlantic"},
846 {14400, 2, "AMT", "AMST", "America/Cuiaba"},
847 {14400, 2, "CLT", "CLST", "Chile/Continental"},
848 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
849 {14400, 2, "PYT", "PYST", "America/Asuncion"},
850 {18000, 1, "CST", "CDT", "America/Havana"},
851 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
852 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
853 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
854 {21600, 0, "CST", "CDT", "America/Guatemala"},
855 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
856 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
857 {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
858 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
859 {32400, 1, "AKST", "AKDT", "US/Alaska"},
860 {36000, 1, "HAST", "HADT", "US/Aleutian"}
861};
862
863/*#define DEBUG_TZNAME*/
864
865static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
866{
867 int32_t idx;
868#ifdef DEBUG_TZNAME
869 fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
870#endif
b331163b 871 for (idx = 0; idx < UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++)
46f4442e
A
872 {
873 if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
874 && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
875 && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
876 && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
877 {
878 return OFFSET_ZONE_MAPPINGS[idx].olsonID;
879 }
880 }
881 return NULL;
882}
883#endif
884
729e4ab9 885#ifdef SEARCH_TZFILE
729e4ab9
A
886#define MAX_READ_SIZE 512
887
888typedef struct DefaultTZInfo {
889 char* defaultTZBuffer;
890 int64_t defaultTZFileSize;
891 FILE* defaultTZFilePtr;
892 UBool defaultTZstatus;
893 int32_t defaultTZPosition;
894} DefaultTZInfo;
895
896/*
897 * This method compares the two files given to see if they are a match.
898 * It is currently use to compare two TZ files.
899 */
900static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
901 FILE* file;
902 int64_t sizeFile;
903 int64_t sizeFileLeft;
904 int32_t sizeFileRead;
905 int32_t sizeFileToRead;
906 char bufferFile[MAX_READ_SIZE];
907 UBool result = TRUE;
908
909 if (tzInfo->defaultTZFilePtr == NULL) {
910 tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
911 }
912 file = fopen(TZFileName, "r");
913
914 tzInfo->defaultTZPosition = 0; /* reset position to begin search */
915
916 if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
917 /* First check that the file size are equal. */
918 if (tzInfo->defaultTZFileSize == 0) {
919 fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
920 tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
921 }
922 fseek(file, 0, SEEK_END);
923 sizeFile = ftell(file);
924 sizeFileLeft = sizeFile;
925
926 if (sizeFile != tzInfo->defaultTZFileSize) {
927 result = FALSE;
928 } else {
929 /* Store the data from the files in seperate buffers and
930 * compare each byte to determine equality.
931 */
932 if (tzInfo->defaultTZBuffer == NULL) {
933 rewind(tzInfo->defaultTZFilePtr);
934 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
4388f060 935 sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
729e4ab9
A
936 }
937 rewind(file);
938 while(sizeFileLeft > 0) {
939 uprv_memset(bufferFile, 0, MAX_READ_SIZE);
940 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
941
942 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
943 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
944 result = FALSE;
945 break;
946 }
947 sizeFileLeft -= sizeFileRead;
948 tzInfo->defaultTZPosition += sizeFileRead;
949 }
950 }
951 } else {
952 result = FALSE;
953 }
954
955 if (file != NULL) {
956 fclose(file);
957 }
958
959 return result;
960}
f3c0d7a5
A
961
962
729e4ab9
A
963/* dirent also lists two entries: "." and ".." that we can safely ignore. */
964#define SKIP1 "."
965#define SKIP2 ".."
f3c0d7a5
A
966static UBool U_CALLCONV putil_cleanup(void);
967static CharString *gSearchTZFileResult = NULL;
968
969/*
970 * This method recursively traverses the directory given for a matching TZ file and returns the first match.
971 * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results.
972 */
729e4ab9 973static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
0f5d89e8 974 DIR* dirp = NULL;
729e4ab9 975 struct dirent* dirEntry = NULL;
729e4ab9 976 char* result = NULL;
0f5d89e8
A
977 UErrorCode status = U_ZERO_ERROR;
978
979 /* Save the current path */
980 CharString curpath(path, -1, status);
981 if (U_FAILURE(status)) {
982 goto cleanupAndReturn;
983 }
984
985 dirp = opendir(path);
729e4ab9 986 if (dirp == NULL) {
0f5d89e8 987 goto cleanupAndReturn;
729e4ab9
A
988 }
989
f3c0d7a5
A
990 if (gSearchTZFileResult == NULL) {
991 gSearchTZFileResult = new CharString;
992 if (gSearchTZFileResult == NULL) {
0f5d89e8 993 goto cleanupAndReturn;
f3c0d7a5
A
994 }
995 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
996 }
997
729e4ab9
A
998 /* Check each entry in the directory. */
999 while((dirEntry = readdir(dirp)) != NULL) {
1000 const char* dirName = dirEntry->d_name;
1001 if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) {
1002 /* Create a newpath with the new entry to test each entry in the directory. */
f3c0d7a5
A
1003 CharString newpath(curpath, status);
1004 newpath.append(dirName, -1, status);
1005 if (U_FAILURE(status)) {
0f5d89e8 1006 break;
f3c0d7a5 1007 }
729e4ab9 1008
0f5d89e8 1009 DIR* subDirp = NULL;
f3c0d7a5 1010 if ((subDirp = opendir(newpath.data())) != NULL) {
729e4ab9
A
1011 /* If this new path is a directory, make a recursive call with the newpath. */
1012 closedir(subDirp);
f3c0d7a5
A
1013 newpath.append('/', status);
1014 if (U_FAILURE(status)) {
0f5d89e8 1015 break;
f3c0d7a5
A
1016 }
1017 result = searchForTZFile(newpath.data(), tzInfo);
729e4ab9
A
1018 /*
1019 Have to get out here. Otherwise, we'd keep looking
1020 and return the first match in the top-level directory
1021 if there's a match in the top-level. If not, this function
1022 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
1023 It worked without this in most cases because we have a fallback of calling
1024 localtime_r to figure out the default timezone.
1025 */
1026 if (result != NULL)
1027 break;
1028 } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
f3c0d7a5
A
1029 if(compareBinaryFiles(TZDEFAULT, newpath.data(), tzInfo)) {
1030 int32_t amountToSkip = sizeof(TZZONEINFO) - 1;
1031 if (amountToSkip > newpath.length()) {
1032 amountToSkip = newpath.length();
1033 }
1034 const char* zoneid = newpath.data() + amountToSkip;
729e4ab9 1035 skipZoneIDPrefix(&zoneid);
f3c0d7a5
A
1036 gSearchTZFileResult->clear();
1037 gSearchTZFileResult->append(zoneid, -1, status);
1038 if (U_FAILURE(status)) {
0f5d89e8 1039 break;
f3c0d7a5
A
1040 }
1041 result = gSearchTZFileResult->data();
729e4ab9
A
1042 /* Get out after the first one found. */
1043 break;
1044 }
1045 }
1046 }
1047 }
0f5d89e8
A
1048
1049 cleanupAndReturn:
1050 if (dirp) {
1051 closedir(dirp);
1052 }
729e4ab9
A
1053 return result;
1054}
1055#endif
f3c0d7a5
A
1056
1057U_CAPI void U_EXPORT2
1058uprv_tzname_clear_cache()
1059{
1060#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1061 gTimeZoneBufferPtr = NULL;
1062#endif
1063}
1064
1065// With the Universal Windows Platform we can just ask Windows for the name
1066#if U_PLATFORM_HAS_WINUWP_API
1067U_CAPI const char* U_EXPORT2
1068uprv_getWindowsTimeZone()
1069{
1070 // Get default Windows timezone.
1071 ComPtr<IInspectable> calendar;
1072 HRESULT hr = RoActivateInstance(
1073 HStringReference(RuntimeClass_Windows_Globalization_Calendar).Get(),
1074 &calendar);
1075 if (SUCCEEDED(hr))
1076 {
1077 ComPtr<ABI::Windows::Globalization::ITimeZoneOnCalendar> timezone;
1078 hr = calendar.As(&timezone);
1079 if (SUCCEEDED(hr))
1080 {
1081 HString timezoneString;
1082 hr = timezone->GetTimeZone(timezoneString.GetAddressOf());
1083 if (SUCCEEDED(hr))
1084 {
0f5d89e8 1085 int32_t length = static_cast<int32_t>(wcslen(timezoneString.GetRawBuffer(NULL)));
f3c0d7a5
A
1086 char* asciiId = (char*)uprv_calloc(length + 1, sizeof(char));
1087 if (asciiId != nullptr)
1088 {
1089 u_UCharsToChars((UChar*)timezoneString.GetRawBuffer(NULL), asciiId, length);
1090 return asciiId;
1091 }
1092 }
1093 }
1094 }
1095
1096 // Failed
1097 return nullptr;
1098}
1099#endif
1100
374ca955 1101U_CAPI const char* U_EXPORT2
b75a7d8f
A
1102uprv_tzname(int n)
1103{
0f5d89e8 1104 (void)n; // Avoid unreferenced parameter warning.
46f4442e 1105 const char *tzid = NULL;
4388f060 1106#if U_PLATFORM_USES_ONLY_WIN32_API
f3c0d7a5
A
1107#if U_PLATFORM_HAS_WINUWP_API > 0
1108 tzid = uprv_getWindowsTimeZone();
1109#else
46f4442e 1110 tzid = uprv_detectWindowsTimeZone();
f3c0d7a5 1111#endif
73c04bcf 1112
46f4442e
A
1113 if (tzid != NULL) {
1114 return tzid;
b75a7d8f 1115 }
f3c0d7a5
A
1116
1117#ifndef U_TZNAME
1118 // The return value is free'd in timezone.cpp on Windows because
1119 // the other code path returns a pointer to a heap location.
1120 // If we don't have a name already, then tzname wouldn't be any
1121 // better, so just fall back.
1122 return uprv_strdup("Etc/UTC");
1123#endif // !U_TZNAME
1124
73c04bcf 1125#else
b75a7d8f 1126
4388f060 1127/*#if U_PLATFORM_IS_DARWIN_BASED
374ca955
A
1128 int ret;
1129
46f4442e
A
1130 tzid = getenv("TZFILE");
1131 if (tzid != NULL) {
1132 return tzid;
374ca955 1133 }
73c04bcf 1134#endif*/
374ca955 1135
46f4442e
A
1136/* This code can be temporarily disabled to test tzname resolution later on. */
1137#ifndef DEBUG_TZNAME
1138 tzid = getenv("TZ");
51004dcb
A
1139 if (tzid != NULL && isValidOlsonID(tzid)
1140#if U_PLATFORM == U_PF_SOLARIS
1141 /* When TZ equals localtime on Solaris, check the /etc/localtime file. */
1142 && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0
1143#endif
1144 ) {
2ca993e8
A
1145 /* The colon forces tzset() to treat the remainder as zoneinfo path */
1146 if (tzid[0] == ':') {
1147 tzid++;
1148 }
73c04bcf 1149 /* This might be a good Olson ID. */
729e4ab9 1150 skipZoneIDPrefix(&tzid);
46f4442e 1151 return tzid;
374ca955 1152 }
73c04bcf 1153 /* else U_TZNAME will give a better result. */
46f4442e 1154#endif
374ca955 1155
4388f060 1156#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
73c04bcf
A
1157 /* Caller must handle threading issues */
1158 if (gTimeZoneBufferPtr == NULL) {
1159 /*
1160 This is a trick to look at the name of the link to get the Olson ID
1161 because the tzfile contents is underspecified.
1162 This isn't guaranteed to work because it may not be a symlink.
1163 */
f3c0d7a5 1164 int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)-1);
374ca955 1165 if (0 < ret) {
0f5d89e8 1166 int32_t tzZoneInfoTailLen = uprv_strlen(TZZONEINFOTAIL);
73c04bcf 1167 gTimeZoneBuffer[ret] = 0;
0f5d89e8
A
1168 char * tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL);
1169
1170 if (tzZoneInfoTailPtr != NULL
1171 && isValidOlsonID(tzZoneInfoTailPtr + tzZoneInfoTailLen))
57a6839d 1172 {
0f5d89e8 1173 return (gTimeZoneBufferPtr = tzZoneInfoTailPtr + tzZoneInfoTailLen);
57a6839d 1174 }
729e4ab9
A
1175 } else {
1176#if defined(SEARCH_TZFILE)
1177 DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
1178 if (tzInfo != NULL) {
1179 tzInfo->defaultTZBuffer = NULL;
1180 tzInfo->defaultTZFileSize = 0;
1181 tzInfo->defaultTZFilePtr = NULL;
1182 tzInfo->defaultTZstatus = FALSE;
1183 tzInfo->defaultTZPosition = 0;
1184
1185 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
1186
1187 /* Free previously allocated memory */
1188 if (tzInfo->defaultTZBuffer != NULL) {
1189 uprv_free(tzInfo->defaultTZBuffer);
1190 }
1191 if (tzInfo->defaultTZFilePtr != NULL) {
1192 fclose(tzInfo->defaultTZFilePtr);
1193 }
1194 uprv_free(tzInfo);
1195 }
1196
1197 if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
1198 return gTimeZoneBufferPtr;
1199 }
1200#endif
374ca955 1201 }
374ca955 1202 }
73c04bcf
A
1203 else {
1204 return gTimeZoneBufferPtr;
1205 }
1206#endif
374ca955
A
1207#endif
1208
b75a7d8f 1209#ifdef U_TZNAME
4388f060 1210#if U_PLATFORM_USES_ONLY_WIN32_API
729e4ab9
A
1211 /* The return value is free'd in timezone.cpp on Windows because
1212 * the other code path returns a pointer to a heap location. */
1213 return uprv_strdup(U_TZNAME[n]);
1214#else
73c04bcf 1215 /*
46f4442e
A
1216 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1217 So we remap the abbreviation to an olson ID.
1218
1219 Since Windows exposes a little more timezone information,
1220 we normally don't use this code on Windows because
1221 uprv_detectWindowsTimeZone should have already given the correct answer.
73c04bcf 1222 */
46f4442e
A
1223 {
1224 struct tm juneSol, decemberSol;
1225 int daylightType;
1226 static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
1227 static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
1228
1229 /* This probing will tell us when daylight savings occurs. */
1230 localtime_r(&juneSolstice, &juneSol);
1231 localtime_r(&decemberSolstice, &decemberSol);
4388f060
A
1232 if(decemberSol.tm_isdst > 0) {
1233 daylightType = U_DAYLIGHT_DECEMBER;
1234 } else if(juneSol.tm_isdst > 0) {
1235 daylightType = U_DAYLIGHT_JUNE;
1236 } else {
1237 daylightType = U_DAYLIGHT_NONE;
1238 }
46f4442e
A
1239 tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
1240 if (tzid != NULL) {
1241 return tzid;
1242 }
1243 }
b75a7d8f 1244 return U_TZNAME[n];
729e4ab9 1245#endif
b75a7d8f
A
1246#else
1247 return "";
1248#endif
1249}
1250
1251/* Get and set the ICU data directory --------------------------------------- */
1252
b331163b 1253static icu::UInitOnce gDataDirInitOnce = U_INITONCE_INITIALIZER;
b75a7d8f 1254static char *gDataDirectory = NULL;
b331163b
A
1255
1256UInitOnce gTimeZoneFilesInitOnce = U_INITONCE_INITIALIZER;
1257static CharString *gTimeZoneFilesDirectory = NULL;
1258
57a6839d 1259#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
0f5d89e8 1260 static const char *gCorrectedPOSIXLocale = NULL; /* Sometimes heap allocated */
f3c0d7a5 1261 static bool gCorrectedPOSIXLocaleHeapAllocated = false;
b75a7d8f
A
1262#endif
1263
374ca955 1264static UBool U_CALLCONV putil_cleanup(void)
b75a7d8f 1265{
73c04bcf 1266 if (gDataDirectory && *gDataDirectory) {
b75a7d8f 1267 uprv_free(gDataDirectory);
b75a7d8f 1268 }
73c04bcf 1269 gDataDirectory = NULL;
b331163b
A
1270 gDataDirInitOnce.reset();
1271
1272 delete gTimeZoneFilesDirectory;
1273 gTimeZoneFilesDirectory = NULL;
1274 gTimeZoneFilesInitOnce.reset();
1275
f3c0d7a5
A
1276#ifdef SEARCH_TZFILE
1277 delete gSearchTZFileResult;
1278 gSearchTZFileResult = NULL;
1279#endif
1280
57a6839d 1281#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
f3c0d7a5 1282 if (gCorrectedPOSIXLocale && gCorrectedPOSIXLocaleHeapAllocated) {
0f5d89e8 1283 uprv_free(const_cast<char *>(gCorrectedPOSIXLocale));
b75a7d8f 1284 gCorrectedPOSIXLocale = NULL;
f3c0d7a5 1285 gCorrectedPOSIXLocaleHeapAllocated = false;
b75a7d8f
A
1286 }
1287#endif
1288 return TRUE;
1289}
1290
1291/*
1292 * Set the data directory.
1293 * Make a copy of the passed string, and set the global data dir to point to it.
b75a7d8f
A
1294 */
1295U_CAPI void U_EXPORT2
1296u_setDataDirectory(const char *directory) {
1297 char *newDataDir;
374ca955 1298 int32_t length;
b75a7d8f 1299
73c04bcf
A
1300 if(directory==NULL || *directory==0) {
1301 /* A small optimization to prevent the malloc and copy when the
1302 shared library is used, and this is a way to make sure that NULL
1303 is never returned.
1304 */
1305 newDataDir = (char *)"";
b75a7d8f 1306 }
73c04bcf
A
1307 else {
1308 length=(int32_t)uprv_strlen(directory);
1309 newDataDir = (char *)uprv_malloc(length + 2);
46f4442e
A
1310 /* Exit out if newDataDir could not be created. */
1311 if (newDataDir == NULL) {
1312 return;
1313 }
73c04bcf 1314 uprv_strcpy(newDataDir, directory);
b75a7d8f 1315
374ca955 1316#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
73c04bcf
A
1317 {
1318 char *p;
0f5d89e8 1319 while((p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) != NULL) {
73c04bcf
A
1320 *p = U_FILE_SEP_CHAR;
1321 }
1322 }
374ca955 1323#endif
73c04bcf 1324 }
374ca955 1325
73c04bcf 1326 if (gDataDirectory && *gDataDirectory) {
b75a7d8f
A
1327 uprv_free(gDataDirectory);
1328 }
1329 gDataDirectory = newDataDir;
374ca955 1330 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
b75a7d8f
A
1331}
1332
374ca955 1333U_CAPI UBool U_EXPORT2
729e4ab9 1334uprv_pathIsAbsolute(const char *path)
374ca955 1335{
729e4ab9
A
1336 if(!path || !*path) {
1337 return FALSE;
374ca955
A
1338 }
1339
1340 if(*path == U_FILE_SEP_CHAR) {
1341 return TRUE;
1342 }
1343
1344#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1345 if(*path == U_FILE_ALT_SEP_CHAR) {
1346 return TRUE;
1347 }
1348#endif
1349
4388f060 1350#if U_PLATFORM_USES_ONLY_WIN32_API
374ca955
A
1351 if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1352 ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1353 path[1] == ':' ) {
1354 return TRUE;
1355 }
1356#endif
1357
1358 return FALSE;
1359}
1360
729e4ab9
A
1361/* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1362 until some client wrapper makefiles are updated */
4388f060 1363#if U_PLATFORM_IS_DARWIN_BASED && TARGET_IPHONE_SIMULATOR
729e4ab9
A
1364# if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1365# define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1366# endif
1367#endif
1368
b331163b
A
1369static void U_CALLCONV dataDirectoryInitFn() {
1370 /* If we already have the directory, then return immediately. Will happen if user called
1371 * u_setDataDirectory().
1372 */
1373 if (gDataDirectory) {
1374 return;
1375 }
1376
b75a7d8f 1377 const char *path = NULL;
729e4ab9 1378#if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
46f4442e
A
1379 char datadir_path_buffer[PATH_MAX];
1380#endif
b75a7d8f 1381
73c04bcf
A
1382 /*
1383 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1384 override ICU's data with the ICU_DATA environment variable. This prevents
1385 problems where multiple custom copies of ICU's specific version of data
1386 are installed on a system. Either the application must define the data
1387 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1388 ICU, set the data with udata_setCommonData or trust that all of the
1389 required data is contained in ICU's data library that contains
1390 the entry point defined by U_ICUDATA_ENTRY_POINT.
1391
1392 There may also be some platforms where environment variables
1393 are not allowed.
1394 */
1395# if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1396 /* First try to get the environment variable */
f3c0d7a5
A
1397# if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP does not support getenv
1398 path=getenv("ICU_DATA");
1399# endif
73c04bcf 1400# endif
b75a7d8f 1401
729e4ab9
A
1402 /* ICU_DATA_DIR may be set as a compile option.
1403 * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1404 * and is used only when data is built in archive mode eliminating the need
1405 * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1406 * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1407 * set their own path.
1408 */
1409#if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
b75a7d8f 1410 if(path==NULL || *path==0) {
729e4ab9
A
1411# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1412 const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
1413# endif
1414# ifdef ICU_DATA_DIR
b75a7d8f 1415 path=ICU_DATA_DIR;
729e4ab9
A
1416# else
1417 path=U_ICU_DATA_DEFAULT_DIR;
1418# endif
1419# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1420 if (prefix != NULL) {
1421 snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path);
46f4442e
A
1422 path=datadir_path_buffer;
1423 }
729e4ab9 1424# endif
b75a7d8f 1425 }
729e4ab9 1426#endif
b75a7d8f 1427
f3c0d7a5
A
1428#if defined(ICU_DATA_DIR_WINDOWS) && U_PLATFORM_HAS_WINUWP_API != 0
1429 // Use data from the %windir%\globalization\icu directory
1430 // This is only available if ICU is built as a system component
1431 char datadir_path_buffer[MAX_PATH];
1432 UINT length = GetWindowsDirectoryA(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer));
1433 if (length > 0 && length < (UPRV_LENGTHOF(datadir_path_buffer) - sizeof(ICU_DATA_DIR_WINDOWS) - 1))
1434 {
1435 if (datadir_path_buffer[length - 1] != '\\')
1436 {
1437 datadir_path_buffer[length++] = '\\';
1438 datadir_path_buffer[length] = '\0';
1439 }
1440
1441 if ((length + 1 + sizeof(ICU_DATA_DIR_WINDOWS)) < UPRV_LENGTHOF(datadir_path_buffer))
1442 {
1443 uprv_strcat(datadir_path_buffer, ICU_DATA_DIR_WINDOWS);
1444 path = datadir_path_buffer;
1445 }
1446 }
1447#endif
1448
b75a7d8f
A
1449 if(path==NULL) {
1450 /* It looks really bad, set it to something. */
f3c0d7a5
A
1451#if U_PLATFORM_HAS_WIN32_API
1452 // Windows UWP will require icudtl.dat file in same directory as icuuc.dll
1453 path = ".\\";
1454#else
b75a7d8f 1455 path = "";
f3c0d7a5 1456#endif
b75a7d8f
A
1457 }
1458
1459 u_setDataDirectory(path);
b331163b
A
1460 return;
1461}
1462
1463U_CAPI const char * U_EXPORT2
1464u_getDataDirectory(void) {
1465 umtx_initOnce(gDataDirInitOnce, &dataDirectoryInitFn);
b75a7d8f
A
1466 return gDataDirectory;
1467}
1468
b331163b
A
1469static void setTimeZoneFilesDir(const char *path, UErrorCode &status) {
1470 if (U_FAILURE(status)) {
1471 return;
1472 }
1473 gTimeZoneFilesDirectory->clear();
1474 gTimeZoneFilesDirectory->append(path, status);
1475#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1476 char *p = gTimeZoneFilesDirectory->data();
0f5d89e8 1477 while ((p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) != NULL) {
b331163b
A
1478 *p = U_FILE_SEP_CHAR;
1479 }
1480#endif
1481}
b75a7d8f 1482
2ca993e8
A
1483#if U_PLATFORM_IMPLEMENTS_POSIX
1484#include <sys/stat.h>
1485#if defined(U_TIMEZONE_FILES_DIR)
1486const char tzdirbuf[] = U_TIMEZONE_FILES_DIR;
1487enum { kTzfilenamebufLen = UPRV_LENGTHOF(tzdirbuf) + 24 }; // extra room for "/icutz44l.dat" or "/zoneinfo64.res"
1488#endif
1489#endif
1490
b331163b
A
1491#define TO_STRING(x) TO_STRING_2(x)
1492#define TO_STRING_2(x) #x
b75a7d8f 1493
b331163b
A
1494static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) {
1495 U_ASSERT(gTimeZoneFilesDirectory == NULL);
1496 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1497 gTimeZoneFilesDirectory = new CharString();
1498 if (gTimeZoneFilesDirectory == NULL) {
1499 status = U_MEMORY_ALLOCATION_ERROR;
1500 return;
1501 }
2ca993e8 1502 UBool usingUTzFilesDir = FALSE;
f3c0d7a5
A
1503#if U_PLATFORM_HAS_WINUWP_API == 0
1504 const char *dir = getenv("ICU_TIMEZONE_FILES_DIR");
1505#else
1506 // TODO: UWP does not support alternate timezone data directories at this time
1507 const char *dir = "";
1508#endif // U_PLATFORM_HAS_WINUWP_API
b331163b
A
1509#if defined(U_TIMEZONE_FILES_DIR)
1510 if (dir == NULL) {
2ca993e8
A
1511 // dir = TO_STRING(U_TIMEZONE_FILES_DIR);
1512 // Not sure why the above was done for this path only;
1513 // it preserves unwanted quotes.
1514 dir = tzdirbuf;
1515 usingUTzFilesDir = TRUE;
b331163b
A
1516 }
1517#endif
2ca993e8
A
1518#if U_PLATFORM_IMPLEMENTS_POSIX
1519 if (dir != NULL) {
1520 struct stat buf;
1521 if (stat(dir, &buf) != 0) {
1522 dir = NULL;
1523 }
1524#if defined(U_TIMEZONE_FILES_DIR)
1525 else if (usingUTzFilesDir) {
1526 char tzfilenamebuf[kTzfilenamebufLen];
1527 uprv_strcpy(tzfilenamebuf, tzdirbuf);
1528 uprv_strcat(tzfilenamebuf, U_FILE_SEP_STRING);
1529#if defined(U_TIMEZONE_PACKAGE)
1530 uprv_strcat(tzfilenamebuf, U_TIMEZONE_PACKAGE);
1531 uprv_strcat(tzfilenamebuf, ".dat");
1532#else
1533 uprv_strcat(tzfilenamebuf, "zoneinfo64.res");
1534#endif
1535 if (stat(tzfilenamebuf, &buf) != 0) {
1536 dir = NULL;
1537 }
1538 }
1539#endif /* defined(U_TIMEZONE_FILES_DIR) */
1540 }
1541#endif /* U_PLATFORM_IMPLEMENTS_POSIX */
b331163b
A
1542 if (dir == NULL) {
1543 dir = "";
1544 }
1545 setTimeZoneFilesDir(dir, status);
1546}
b75a7d8f
A
1547
1548
b331163b
A
1549U_CAPI const char * U_EXPORT2
1550u_getTimeZoneFilesDirectory(UErrorCode *status) {
1551 umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1552 return U_SUCCESS(*status) ? gTimeZoneFilesDirectory->data() : "";
1553}
1554
1555U_CAPI void U_EXPORT2
1556u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status) {
1557 umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1558 setTimeZoneFilesDir(path, *status);
1559
1560 // Note: this function does some extra churn, first setting based on the
1561 // environment, then immediately replacing with the value passed in.
1562 // The logic is simpler that way, and performance shouldn't be an issue.
1563}
b75a7d8f 1564
b75a7d8f
A
1565
1566#if U_POSIX_LOCALE
729e4ab9
A
1567/* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1568 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1569 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1570 */
1571static const char *uprv_getPOSIXIDForCategory(int category)
b75a7d8f 1572{
729e4ab9
A
1573 const char* posixID = NULL;
1574 if (category == LC_MESSAGES || category == LC_CTYPE) {
73c04bcf 1575 /*
729e4ab9 1576 * On Solaris two different calls to setlocale can result in
73c04bcf
A
1577 * different values. Only get this value once.
1578 *
1579 * We must check this first because an application can set this.
1580 *
1581 * LC_ALL can't be used because it's platform dependent. The LANG
1582 * environment variable seems to affect LC_CTYPE variable by default.
1583 * Here is what setlocale(LC_ALL, NULL) can return.
1584 * HPUX can return 'C C C C C C C'
1585 * Solaris can return /en_US/C/C/C/C/C on the second try.
1586 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1587 *
1588 * The default codepage detection also needs to use LC_CTYPE.
729e4ab9 1589 *
73c04bcf
A
1590 * Do not call setlocale(LC_*, "")! Using an empty string instead
1591 * of NULL, will modify the libc behavior.
1592 */
729e4ab9 1593 posixID = setlocale(category, NULL);
73c04bcf
A
1594 if ((posixID == 0)
1595 || (uprv_strcmp("C", posixID) == 0)
1596 || (uprv_strcmp("POSIX", posixID) == 0))
1597 {
1598 /* Maybe we got some garbage. Try something more reasonable */
1599 posixID = getenv("LC_ALL");
2ca993e8
A
1600 /* Solaris speaks POSIX - See IEEE Std 1003.1-2008
1601 * This is needed to properly handle empty env. variables
1602 */
1603#if U_PLATFORM == U_PF_SOLARIS
1604 if ((posixID == 0) || (posixID[0] == '\0')) {
1605 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1606 if ((posixID == 0) || (posixID[0] == '\0')) {
1607#else
b75a7d8f 1608 if (posixID == 0) {
729e4ab9 1609 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
73c04bcf 1610 if (posixID == 0) {
2ca993e8 1611#endif
73c04bcf
A
1612 posixID = getenv("LANG");
1613 }
b75a7d8f
A
1614 }
1615 }
729e4ab9
A
1616 }
1617 if ((posixID==0)
1618 || (uprv_strcmp("C", posixID) == 0)
1619 || (uprv_strcmp("POSIX", posixID) == 0))
1620 {
1621 /* Nothing worked. Give it a nice POSIX default value. */
1622 posixID = "en_US_POSIX";
1623 }
1624 return posixID;
1625}
b75a7d8f 1626
729e4ab9
A
1627/* Return just the POSIX id for the default locale, whatever happens to be in
1628 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1629 */
1630static const char *uprv_getPOSIXIDForDefaultLocale(void)
1631{
1632 static const char* posixID = NULL;
1633 if (posixID == 0) {
1634 posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
b75a7d8f 1635 }
729e4ab9
A
1636 return posixID;
1637}
73c04bcf 1638
51004dcb 1639#if !U_CHARSET_IS_UTF8
729e4ab9
A
1640/* Return just the POSIX id for the default codepage, whatever happens to be in
1641 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1642 */
1643static const char *uprv_getPOSIXIDForDefaultCodepage(void)
1644{
1645 static const char* posixID = NULL;
1646 if (posixID == 0) {
1647 posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
1648 }
b75a7d8f
A
1649 return posixID;
1650}
1651#endif
51004dcb 1652#endif
b75a7d8f
A
1653
1654/* NOTE: The caller should handle thread safety */
1655U_CAPI const char* U_EXPORT2
1656uprv_getDefaultLocaleID()
1657{
1658#if U_POSIX_LOCALE
1659/*
1660 Note that: (a '!' means the ID is improper somehow)
1661 LC_ALL ----> default_loc codepage
1662--------------------------------------------------------
1663 ab.CD ab CD
1664 ab@CD ab__CD -
1665 ab@CD.EF ab__CD EF
1666
1667 ab_CD.EF@GH ab_CD_GH EF
1668
1669Some 'improper' ways to do the same as above:
1670 ! ab_CD@GH.EF ab_CD_GH EF
1671 ! ab_CD.EF@GH.IJ ab_CD_GH EF
1672 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
1673
1674 _CD@GH _CD_GH -
1675 _CD.EF@GH _CD_GH EF
1676
1677The variant cannot have dots in it.
1678The 'rightmost' variant (@xxx) wins.
1679The leftmost codepage (.xxx) wins.
1680*/
1681 char *correctedPOSIXLocale = 0;
729e4ab9 1682 const char* posixID = uprv_getPOSIXIDForDefaultLocale();
b75a7d8f
A
1683 const char *p;
1684 const char *q;
1685 int32_t len;
1686
1687 /* Format: (no spaces)
1688 ll [ _CC ] [ . MM ] [ @ VV]
1689
1690 l = lang, C = ctry, M = charmap, V = variant
1691 */
1692
1693 if (gCorrectedPOSIXLocale != NULL) {
729e4ab9 1694 return gCorrectedPOSIXLocale;
b75a7d8f
A
1695 }
1696
1697 if ((p = uprv_strchr(posixID, '.')) != NULL) {
1698 /* assume new locale can't be larger than old one? */
51004dcb 1699 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1));
46f4442e
A
1700 /* Exit on memory allocation error. */
1701 if (correctedPOSIXLocale == NULL) {
1702 return NULL;
1703 }
b75a7d8f
A
1704 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1705 correctedPOSIXLocale[p-posixID] = 0;
1706
1707 /* do not copy after the @ */
1708 if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
1709 correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
1710 }
1711 }
1712
1713 /* Note that we scan the *uncorrected* ID. */
1714 if ((p = uprv_strrchr(posixID, '@')) != NULL) {
1715 if (correctedPOSIXLocale == NULL) {
51004dcb 1716 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1));
46f4442e
A
1717 /* Exit on memory allocation error. */
1718 if (correctedPOSIXLocale == NULL) {
1719 return NULL;
1720 }
b75a7d8f
A
1721 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1722 correctedPOSIXLocale[p-posixID] = 0;
1723 }
1724 p++;
1725
1726 /* Take care of any special cases here.. */
1727 if (!uprv_strcmp(p, "nynorsk")) {
1728 p = "NY";
73c04bcf 1729 /* Don't worry about no__NY. In practice, it won't appear. */
b75a7d8f
A
1730 }
1731
1732 if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
1733 uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
1734 }
1735 else {
1736 uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1737 }
1738
1739 if ((q = uprv_strchr(p, '.')) != NULL) {
1740 /* How big will the resulting string be? */
374ca955 1741 len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
b75a7d8f
A
1742 uprv_strncat(correctedPOSIXLocale, p, q-p);
1743 correctedPOSIXLocale[len] = 0;
1744 }
1745 else {
1746 /* Anything following the @ sign */
1747 uprv_strcat(correctedPOSIXLocale, p);
1748 }
1749
1750 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1751 * How about 'russian' -> 'ru'?
73c04bcf
A
1752 * Many of the other locales using ISO codes will be handled by the
1753 * canonicalization functions in uloc_getDefault.
b75a7d8f
A
1754 */
1755 }
1756
1757 /* Was a correction made? */
1758 if (correctedPOSIXLocale != NULL) {
1759 posixID = correctedPOSIXLocale;
1760 }
1761 else {
1762 /* copy it, just in case the original pointer goes away. See j2395 */
1763 correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
46f4442e
A
1764 /* Exit on memory allocation error. */
1765 if (correctedPOSIXLocale == NULL) {
1766 return NULL;
1767 }
b75a7d8f
A
1768 posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
1769 }
1770
1771 if (gCorrectedPOSIXLocale == NULL) {
1772 gCorrectedPOSIXLocale = correctedPOSIXLocale;
f3c0d7a5 1773 gCorrectedPOSIXLocaleHeapAllocated = true;
374ca955 1774 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
b75a7d8f
A
1775 correctedPOSIXLocale = NULL;
1776 }
1777
1778 if (correctedPOSIXLocale != NULL) { /* Was already set - clean up. */
729e4ab9 1779 uprv_free(correctedPOSIXLocale);
b75a7d8f
A
1780 }
1781
1782 return posixID;
1783
4388f060 1784#elif U_PLATFORM_USES_ONLY_WIN32_API
57a6839d 1785#define POSIX_LOCALE_CAPACITY 64
b75a7d8f 1786 UErrorCode status = U_ZERO_ERROR;
57a6839d
A
1787 char *correctedPOSIXLocale = 0;
1788
f3c0d7a5 1789 // If we have already figured this out just use the cached value
57a6839d
A
1790 if (gCorrectedPOSIXLocale != NULL) {
1791 return gCorrectedPOSIXLocale;
1792 }
1793
f3c0d7a5
A
1794 // No cached value, need to determine the current value
1795 static WCHAR windowsLocale[LOCALE_NAME_MAX_LENGTH];
1796#if U_PLATFORM_HAS_WINUWP_API == 0
1797 // If not a Universal Windows App, we'll need user default language.
1798 // Vista and above should use Locale Names instead of LCIDs
1799 int length = GetUserDefaultLocaleName(windowsLocale, UPRV_LENGTHOF(windowsLocale));
1800#else
1801 // In a UWP app, we want the top language that the application and user agreed upon
1802 ComPtr<ABI::Windows::Foundation::Collections::IVectorView<HSTRING>> languageList;
1803
1804 ComPtr<ABI::Windows::Globalization::IApplicationLanguagesStatics> applicationLanguagesStatics;
1805 HRESULT hr = GetActivationFactory(
1806 HStringReference(RuntimeClass_Windows_Globalization_ApplicationLanguages).Get(),
1807 &applicationLanguagesStatics);
1808 if (SUCCEEDED(hr))
1809 {
1810 hr = applicationLanguagesStatics->get_Languages(&languageList);
1811 }
1812
1813 if (FAILED(hr))
1814 {
1815 // If there is no application context, then use the top language from the user language profile
1816 ComPtr<ABI::Windows::System::UserProfile::IGlobalizationPreferencesStatics> globalizationPreferencesStatics;
1817 hr = GetActivationFactory(
1818 HStringReference(RuntimeClass_Windows_System_UserProfile_GlobalizationPreferences).Get(),
1819 &globalizationPreferencesStatics);
1820 if (SUCCEEDED(hr))
1821 {
1822 hr = globalizationPreferencesStatics->get_Languages(&languageList);
57a6839d
A
1823 }
1824 }
b75a7d8f 1825
f3c0d7a5
A
1826 // We have a list of languages, ICU knows one, so use the top one for our locale
1827 HString topLanguage;
1828 if (SUCCEEDED(hr))
1829 {
1830 hr = languageList->GetAt(0, topLanguage.GetAddressOf());
1831 }
1832
1833 if (FAILED(hr))
1834 {
1835 // Unexpected, use en-US by default
1836 if (gCorrectedPOSIXLocale == NULL) {
1837 gCorrectedPOSIXLocale = "en_US";
1838 }
1839
1840 return gCorrectedPOSIXLocale;
1841 }
1842
1843 // ResolveLocaleName will get a likely subtags form consistent with Windows behavior.
1844 int length = ResolveLocaleName(topLanguage.GetRawBuffer(NULL), windowsLocale, UPRV_LENGTHOF(windowsLocale));
1845#endif
1846 // Now we should have a Windows locale name that needs converted to the POSIX style,
1847 if (length > 0)
1848 {
1849 // First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.)
1850 char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH];
1851
1852 int32_t i;
1853 for (i = 0; i < UPRV_LENGTHOF(modifiedWindowsLocale); i++)
1854 {
1855 if (windowsLocale[i] == '_')
1856 {
1857 modifiedWindowsLocale[i] = '-';
1858 }
1859 else
1860 {
1861 modifiedWindowsLocale[i] = static_cast<char>(windowsLocale[i]);
1862 }
1863
1864 if (modifiedWindowsLocale[i] == '\0')
1865 {
1866 break;
1867 }
1868 }
1869
1870 if (i >= UPRV_LENGTHOF(modifiedWindowsLocale))
1871 {
1872 // Ran out of room, can't really happen, maybe we'll be lucky about a matching
1873 // locale when tags are dropped
1874 modifiedWindowsLocale[UPRV_LENGTHOF(modifiedWindowsLocale) - 1] = '\0';
1875 }
1876
1877 // Now normalize the resulting name
0f5d89e8
A
1878 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1));
1879 /* TODO: Should we just exit on memory allocation failure? */
f3c0d7a5
A
1880 if (correctedPOSIXLocale)
1881 {
1882 int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status);
1883 if (U_SUCCESS(status))
1884 {
1885 *(correctedPOSIXLocale + posixLen) = 0;
1886 gCorrectedPOSIXLocale = correctedPOSIXLocale;
1887 gCorrectedPOSIXLocaleHeapAllocated = true;
1888 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1889 }
1890 else
1891 {
1892 uprv_free(correctedPOSIXLocale);
1893 }
1894 }
1895 }
1896
1897 // If unable to find a locale we can agree upon, use en-US by default
57a6839d 1898 if (gCorrectedPOSIXLocale == NULL) {
f3c0d7a5 1899 gCorrectedPOSIXLocale = "en_US";
b75a7d8f 1900 }
57a6839d 1901 return gCorrectedPOSIXLocale;
b75a7d8f 1902
4388f060 1903#elif U_PLATFORM == U_PF_OS400
b75a7d8f
A
1904 /* locales are process scoped and are by definition thread safe */
1905 static char correctedLocale[64];
1906 const char *localeID = getenv("LC_ALL");
1907 char *p;
1908
1909 if (localeID == NULL)
1910 localeID = getenv("LANG");
1911 if (localeID == NULL)
1912 localeID = setlocale(LC_ALL, NULL);
1913 /* Make sure we have something... */
1914 if (localeID == NULL)
1915 return "en_US_POSIX";
1916
1917 /* Extract the locale name from the path. */
1918 if((p = uprv_strrchr(localeID, '/')) != NULL)
1919 {
1920 /* Increment p to start of locale name. */
1921 p++;
1922 localeID = p;
1923 }
1924
1925 /* Copy to work location. */
1926 uprv_strcpy(correctedLocale, localeID);
1927
1928 /* Strip off the '.locale' extension. */
1929 if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1930 *p = 0;
1931 }
1932
1933 /* Upper case the locale name. */
1934 T_CString_toUpperCase(correctedLocale);
1935
1936 /* See if we are using the POSIX locale. Any of the
1937 * following are equivalent and use the same QLGPGCMA
1938 * (POSIX) locale.
73c04bcf
A
1939 * QLGPGCMA2 means UCS2
1940 * QLGPGCMA_4 means UTF-32
1941 * QLGPGCMA_8 means UTF-8
b75a7d8f
A
1942 */
1943 if ((uprv_strcmp("C", correctedLocale) == 0) ||
1944 (uprv_strcmp("POSIX", correctedLocale) == 0) ||
73c04bcf 1945 (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
b75a7d8f
A
1946 {
1947 uprv_strcpy(correctedLocale, "en_US_POSIX");
1948 }
1949 else
1950 {
1951 int16_t LocaleLen;
1952
1953 /* Lower case the lang portion. */
1954 for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1955 {
1956 *p = uprv_tolower(*p);
1957 }
1958
1959 /* Adjust for Euro. After '_E' add 'URO'. */
1960 LocaleLen = uprv_strlen(correctedLocale);
1961 if (correctedLocale[LocaleLen - 2] == '_' &&
1962 correctedLocale[LocaleLen - 1] == 'E')
1963 {
1964 uprv_strcat(correctedLocale, "URO");
1965 }
1966
1967 /* If using Lotus-based locale then convert to
1968 * equivalent non Lotus.
1969 */
1970 else if (correctedLocale[LocaleLen - 2] == '_' &&
1971 correctedLocale[LocaleLen - 1] == 'L')
1972 {
1973 correctedLocale[LocaleLen - 2] = 0;
1974 }
1975
1976 /* There are separate simplified and traditional
1977 * locales called zh_HK_S and zh_HK_T.
1978 */
1979 else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1980 {
1981 uprv_strcpy(correctedLocale, "zh_HK");
1982 }
1983
1984 /* A special zh_CN_GBK locale...
1985 */
1986 else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1987 {
1988 uprv_strcpy(correctedLocale, "zh_CN");
1989 }
1990
1991 }
1992
1993 return correctedLocale;
1994#endif
1995
1996}
1997
729e4ab9 1998#if !U_CHARSET_IS_UTF8
73c04bcf
A
1999#if U_POSIX_LOCALE
2000/*
2001Due to various platform differences, one platform may specify a charset,
2002when they really mean a different charset. Remap the names so that they are
46f4442e
A
2003compatible with ICU. Only conflicting/ambiguous aliases should be resolved
2004here. Before adding anything to this function, please consider adding unique
2005names to the ICU alias table in the data directory.
73c04bcf
A
2006*/
2007static const char*
2008remapPlatformDependentCodepage(const char *locale, const char *name) {
2009 if (locale != NULL && *locale == 0) {
2010 /* Make sure that an empty locale is handled the same way. */
2011 locale = NULL;
2012 }
2013 if (name == NULL) {
2014 return NULL;
2015 }
4388f060 2016#if U_PLATFORM == U_PF_AIX
73c04bcf
A
2017 if (uprv_strcmp(name, "IBM-943") == 0) {
2018 /* Use the ASCII compatible ibm-943 */
2019 name = "Shift-JIS";
2020 }
2021 else if (uprv_strcmp(name, "IBM-1252") == 0) {
2022 /* Use the windows-1252 that contains the Euro */
2023 name = "IBM-5348";
2024 }
4388f060 2025#elif U_PLATFORM == U_PF_SOLARIS
73c04bcf
A
2026 if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
2027 /* Solaris underspecifies the "EUC" name. */
2028 if (uprv_strcmp(locale, "zh_CN") == 0) {
2029 name = "EUC-CN";
2030 }
2031 else if (uprv_strcmp(locale, "zh_TW") == 0) {
2032 name = "EUC-TW";
2033 }
2034 else if (uprv_strcmp(locale, "ko_KR") == 0) {
2035 name = "EUC-KR";
2036 }
2037 }
46f4442e
A
2038 else if (uprv_strcmp(name, "eucJP") == 0) {
2039 /*
2040 ibm-954 is the best match.
2041 ibm-33722 is the default for eucJP (similar to Windows).
2042 */
2043 name = "eucjis";
2044 }
2045 else if (uprv_strcmp(name, "646") == 0) {
2046 /*
729e4ab9 2047 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
46f4442e
A
2048 * ISO-8859-1 instead of US-ASCII(646).
2049 */
2050 name = "ISO-8859-1";
2051 }
4388f060 2052#elif U_PLATFORM_IS_DARWIN_BASED
73c04bcf
A
2053 if (locale == NULL && *name == 0) {
2054 /*
2055 No locale was specified, and an empty name was passed in.
2056 This usually indicates that nl_langinfo didn't return valid information.
2057 Mac OS X uses UTF-8 by default (especially the locale data and console).
2058 */
2059 name = "UTF-8";
2060 }
729e4ab9
A
2061 else if (uprv_strcmp(name, "CP949") == 0) {
2062 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
2063 name = "EUC-KR";
2064 }
2065 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
2066 /*
2067 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
2068 */
2069 name = "UTF-8";
2070 }
4388f060 2071#elif U_PLATFORM == U_PF_BSD
729e4ab9
A
2072 if (uprv_strcmp(name, "CP949") == 0) {
2073 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
2074 name = "EUC-KR";
2075 }
4388f060 2076#elif U_PLATFORM == U_PF_HPUX
46f4442e
A
2077 if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
2078 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
2079 /* zh_TW.big5 is not the same charset as zh_HK.big5! */
2080 name = "hkbig5";
2081 }
2082 else if (uprv_strcmp(name, "eucJP") == 0) {
2083 /*
2084 ibm-1350 is the best match, but unavailable.
2085 ibm-954 is mostly a superset of ibm-1350.
2086 ibm-33722 is the default for eucJP (similar to Windows).
2087 */
2088 name = "eucjis";
2089 }
4388f060 2090#elif U_PLATFORM == U_PF_LINUX
46f4442e
A
2091 if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
2092 /* Linux underspecifies the "EUC" name. */
2093 if (uprv_strcmp(locale, "korean") == 0) {
2094 name = "EUC-KR";
2095 }
2096 else if (uprv_strcmp(locale, "japanese") == 0) {
2097 /* See comment below about eucJP */
2098 name = "eucjis";
2099 }
2100 }
2101 else if (uprv_strcmp(name, "eucjp") == 0) {
2102 /*
2103 ibm-1350 is the best match, but unavailable.
2104 ibm-954 is mostly a superset of ibm-1350.
2105 ibm-33722 is the default for eucJP (similar to Windows).
2106 */
2107 name = "eucjis";
2108 }
729e4ab9
A
2109 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
2110 (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
2111 /*
2112 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
2113 */
2114 name = "UTF-8";
2115 }
2116 /*
2117 * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
2118 * it by falling back to 'US-ASCII' when NULL is returned from this
2119 * function. So, we don't have to worry about it here.
2120 */
73c04bcf
A
2121#endif
2122 /* return NULL when "" is passed in */
2123 if (*name == 0) {
2124 name = NULL;
2125 }
2126 return name;
2127}
2128
729e4ab9 2129static const char*
73c04bcf
A
2130getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
2131{
2132 char localeBuf[100];
2133 const char *name = NULL;
2134 char *variant = NULL;
2135
2136 if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
2137 size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
2138 uprv_strncpy(localeBuf, localeName, localeCapacity);
2139 localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
2140 name = uprv_strncpy(buffer, name+1, buffCapacity);
2141 buffer[buffCapacity-1] = 0; /* ensure NULL termination */
4388f060 2142 if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != NULL) {
73c04bcf
A
2143 *variant = 0;
2144 }
2145 name = remapPlatformDependentCodepage(localeBuf, name);
2146 }
2147 return name;
2148}
2149#endif
374ca955 2150
729e4ab9 2151static const char*
374ca955 2152int_getDefaultCodepage()
b75a7d8f 2153{
4388f060 2154#if U_PLATFORM == U_PF_OS400
b75a7d8f
A
2155 uint32_t ccsid = 37; /* Default to ibm-37 */
2156 static char codepage[64];
2157 Qwc_JOBI0400_t jobinfo;
2158 Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
2159
2160 EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
2161 "* ", " ", &error);
2162
2163 if (error.Bytes_Available == 0) {
2164 if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
2165 ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
2166 }
2167 else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
2168 ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
2169 }
2170 /* else use the default */
2171 }
2172 sprintf(codepage,"ibm-%d", ccsid);
2173 return codepage;
2174
4388f060 2175#elif U_PLATFORM == U_PF_OS390
b75a7d8f 2176 static char codepage[64];
729e4ab9
A
2177
2178 strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
2179 strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
46f4442e 2180 codepage[63] = 0; /* NULL terminate */
729e4ab9 2181
b75a7d8f
A
2182 return codepage;
2183
4388f060 2184#elif U_PLATFORM_USES_ONLY_WIN32_API
b75a7d8f 2185 static char codepage[64];
f3c0d7a5
A
2186 DWORD codepageNumber = 0;
2187
2188#if U_PLATFORM_HAS_WINUWP_API > 0
2189 // UWP doesn't have a direct API to get the default ACP as Microsoft would rather
2190 // have folks use Unicode than a "system" code page, however this is the same
2191 // codepage as the system default locale codepage. (FWIW, the system locale is
2192 // ONLY used for codepage, it should never be used for anything else)
2193 GetLocaleInfoEx(LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
2194 (LPWSTR)&codepageNumber, sizeof(codepageNumber) / sizeof(WCHAR));
2195#else
2196 // Win32 apps can call GetACP
2197 codepageNumber = GetACP();
2198#endif
2199 // Special case for UTF-8
2200 if (codepageNumber == 65001)
2201 {
2202 return "UTF-8";
2203 }
2204 // Windows codepages can look like windows-1252, so format the found number
2205 // the numbers are eclectic, however all valid system code pages, besides UTF-8
2206 // are between 3 and 19999
2207 if (codepageNumber > 0 && codepageNumber < 20000)
2208 {
2209 sprintf(codepage, "windows-%ld", codepageNumber);
2210 return codepage;
2211 }
2212 // If the codepage number call failed then return UTF-8
2213 return "UTF-8";
b75a7d8f
A
2214
2215#elif U_POSIX_LOCALE
2216 static char codesetName[100];
b75a7d8f 2217 const char *localeName = NULL;
73c04bcf 2218 const char *name = NULL;
b75a7d8f 2219
729e4ab9 2220 localeName = uprv_getPOSIXIDForDefaultCodepage();
b75a7d8f 2221 uprv_memset(codesetName, 0, sizeof(codesetName));
2ca993e8
A
2222 /* On Solaris nl_langinfo returns C locale values unless setlocale
2223 * was called earlier.
2224 */
2225#if (U_HAVE_NL_LANGINFO_CODESET && U_PLATFORM != U_PF_SOLARIS)
729e4ab9
A
2226 /* When available, check nl_langinfo first because it usually gives more
2227 useful names. It depends on LC_CTYPE.
73c04bcf 2228 nl_langinfo may use the same buffer as setlocale. */
b75a7d8f
A
2229 {
2230 const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
4388f060 2231#if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED
729e4ab9
A
2232 /*
2233 * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
2234 * instead of ASCII.
2235 */
2236 if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
2237 codeset = remapPlatformDependentCodepage(localeName, codeset);
2238 } else
2239#endif
2240 {
2241 codeset = remapPlatformDependentCodepage(NULL, codeset);
2242 }
2243
b75a7d8f
A
2244 if (codeset != NULL) {
2245 uprv_strncpy(codesetName, codeset, sizeof(codesetName));
2246 codesetName[sizeof(codesetName)-1] = 0;
374ca955 2247 return codesetName;
b75a7d8f
A
2248 }
2249 }
2250#endif
374ca955 2251
729e4ab9
A
2252 /* Use setlocale in a nice way, and then check some environment variables.
2253 Maybe the application used setlocale already.
2254 */
2255 uprv_memset(codesetName, 0, sizeof(codesetName));
2256 name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
2257 if (name) {
2258 /* if we can find the codeset name from setlocale, return that. */
2259 return name;
2260 }
2261
374ca955
A
2262 if (*codesetName == 0)
2263 {
73c04bcf 2264 /* Everything failed. Return US ASCII (ISO 646). */
46f4442e 2265 (void)uprv_strcpy(codesetName, "US-ASCII");
374ca955 2266 }
b75a7d8f
A
2267 return codesetName;
2268#else
2269 return "US-ASCII";
2270#endif
2271}
2272
b75a7d8f 2273
374ca955
A
2274U_CAPI const char* U_EXPORT2
2275uprv_getDefaultCodepage()
2276{
2277 static char const *name = NULL;
2278 umtx_lock(NULL);
2279 if (name == NULL) {
2280 name = int_getDefaultCodepage();
b75a7d8f 2281 }
374ca955
A
2282 umtx_unlock(NULL);
2283 return name;
b75a7d8f 2284}
729e4ab9 2285#endif /* !U_CHARSET_IS_UTF8 */
b75a7d8f 2286
b75a7d8f 2287
374ca955
A
2288/* end of platform-specific implementation -------------- */
2289
2290/* version handling --------------------------------------------------------- */
b75a7d8f
A
2291
2292U_CAPI void U_EXPORT2
2293u_versionFromString(UVersionInfo versionArray, const char *versionString) {
2294 char *end;
2295 uint16_t part=0;
2296
2297 if(versionArray==NULL) {
2298 return;
2299 }
2300
2301 if(versionString!=NULL) {
2302 for(;;) {
2303 versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
2304 if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
2305 break;
2306 }
2307 versionString=end+1;
2308 }
2309 }
2310
2311 while(part<U_MAX_VERSION_LENGTH) {
2312 versionArray[part++]=0;
2313 }
2314}
2315
729e4ab9
A
2316U_CAPI void U_EXPORT2
2317u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
2318 if(versionArray!=NULL && versionString!=NULL) {
2319 char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
2320 int32_t len = u_strlen(versionString);
2321 if(len>U_MAX_VERSION_STRING_LENGTH) {
2322 len = U_MAX_VERSION_STRING_LENGTH;
2323 }
2324 u_UCharsToChars(versionString, versionChars, len);
2325 versionChars[len]=0;
2326 u_versionFromString(versionArray, versionChars);
2327 }
2328}
2329
b75a7d8f 2330U_CAPI void U_EXPORT2
4388f060 2331u_versionToString(const UVersionInfo versionArray, char *versionString) {
b75a7d8f
A
2332 uint16_t count, part;
2333 uint8_t field;
2334
2335 if(versionString==NULL) {
2336 return;
2337 }
2338
2339 if(versionArray==NULL) {
2340 versionString[0]=0;
2341 return;
2342 }
2343
2344 /* count how many fields need to be written */
2345 for(count=4; count>0 && versionArray[count-1]==0; --count) {
2346 }
2347
2348 if(count <= 1) {
2349 count = 2;
2350 }
2351
2352 /* write the first part */
2353 /* write the decimal field value */
2354 field=versionArray[0];
2355 if(field>=100) {
2356 *versionString++=(char)('0'+field/100);
2357 field%=100;
2358 }
2359 if(field>=10) {
2360 *versionString++=(char)('0'+field/10);
2361 field%=10;
2362 }
2363 *versionString++=(char)('0'+field);
2364
2365 /* write the following parts */
2366 for(part=1; part<count; ++part) {
2367 /* write a dot first */
2368 *versionString++=U_VERSION_DELIMITER;
2369
2370 /* write the decimal field value */
2371 field=versionArray[part];
2372 if(field>=100) {
2373 *versionString++=(char)('0'+field/100);
2374 field%=100;
2375 }
2376 if(field>=10) {
2377 *versionString++=(char)('0'+field/10);
2378 field%=10;
2379 }
2380 *versionString++=(char)('0'+field);
2381 }
2382
2383 /* NUL-terminate */
2384 *versionString=0;
2385}
2386
2387U_CAPI void U_EXPORT2
2388u_getVersion(UVersionInfo versionArray) {
57a6839d 2389 (void)copyright; // Suppress unused variable warning from clang.
b75a7d8f
A
2390 u_versionFromString(versionArray, U_ICU_VERSION);
2391}
2392
729e4ab9
A
2393/**
2394 * icucfg.h dependent code
2395 */
2396
0f5d89e8 2397#if U_ENABLE_DYLOAD && HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
729e4ab9 2398
4388f060 2399#if HAVE_DLFCN_H
729e4ab9
A
2400#ifdef __MVS__
2401#ifndef __SUSV3
2402#define __SUSV3 1
2403#endif
2404#endif
2405#include <dlfcn.h>
0f5d89e8 2406#endif /* HAVE_DLFCN_H */
729e4ab9
A
2407
2408U_INTERNAL void * U_EXPORT2
2409uprv_dl_open(const char *libName, UErrorCode *status) {
2410 void *ret = NULL;
2411 if(U_FAILURE(*status)) return ret;
2412 ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
2413 if(ret==NULL) {
4388f060
A
2414#ifdef U_TRACE_DYLOAD
2415 printf("dlerror on dlopen(%s): %s\n", libName, dlerror());
729e4ab9
A
2416#endif
2417 *status = U_MISSING_RESOURCE_ERROR;
2418 }
2419 return ret;
2420}
2421
2422U_INTERNAL void U_EXPORT2
2423uprv_dl_close(void *lib, UErrorCode *status) {
2424 if(U_FAILURE(*status)) return;
2425 dlclose(lib);
2426}
2427
4388f060
A
2428U_INTERNAL UVoidFunction* U_EXPORT2
2429uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2430 union {
2431 UVoidFunction *fp;
2432 void *vp;
2433 } uret;
2434 uret.fp = NULL;
2435 if(U_FAILURE(*status)) return uret.fp;
2436 uret.vp = dlsym(lib, sym);
2437 if(uret.vp == NULL) {
2438#ifdef U_TRACE_DYLOAD
2439 printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror());
2440#endif
729e4ab9
A
2441 *status = U_MISSING_RESOURCE_ERROR;
2442 }
4388f060 2443 return uret.fp;
729e4ab9
A
2444}
2445
0f5d89e8 2446#elif U_ENABLE_DYLOAD && U_PLATFORM_USES_ONLY_WIN32_API && !U_PLATFORM_HAS_WINUWP_API
729e4ab9 2447
0f5d89e8
A
2448/* Windows API implementation. */
2449// Note: UWP does not expose/allow these APIs, so the UWP version gets the null implementation. */
729e4ab9
A
2450
2451U_INTERNAL void * U_EXPORT2
2452uprv_dl_open(const char *libName, UErrorCode *status) {
2453 HMODULE lib = NULL;
2454
2455 if(U_FAILURE(*status)) return NULL;
2456
4388f060 2457 lib = LoadLibraryA(libName);
729e4ab9
A
2458
2459 if(lib==NULL) {
2460 *status = U_MISSING_RESOURCE_ERROR;
2461 }
2462
2463 return (void*)lib;
2464}
2465
2466U_INTERNAL void U_EXPORT2
2467uprv_dl_close(void *lib, UErrorCode *status) {
2468 HMODULE handle = (HMODULE)lib;
2469 if(U_FAILURE(*status)) return;
2470
2471 FreeLibrary(handle);
2472
2473 return;
2474}
2475
4388f060
A
2476U_INTERNAL UVoidFunction* U_EXPORT2
2477uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
729e4ab9 2478 HMODULE handle = (HMODULE)lib;
4388f060 2479 UVoidFunction* addr = NULL;
729e4ab9
A
2480
2481 if(U_FAILURE(*status) || lib==NULL) return NULL;
2482
4388f060 2483 addr = (UVoidFunction*)GetProcAddress(handle, sym);
729e4ab9
A
2484
2485 if(addr==NULL) {
2486 DWORD lastError = GetLastError();
2487 if(lastError == ERROR_PROC_NOT_FOUND) {
2488 *status = U_MISSING_RESOURCE_ERROR;
2489 } else {
2490 *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
2491 }
2492 }
2493
2494 return addr;
2495}
2496
729e4ab9
A
2497#else
2498
0f5d89e8 2499/* No dynamic loading, null (nonexistent) implementation. */
729e4ab9
A
2500
2501U_INTERNAL void * U_EXPORT2
2502uprv_dl_open(const char *libName, UErrorCode *status) {
2ca993e8 2503 (void)libName;
729e4ab9
A
2504 if(U_FAILURE(*status)) return NULL;
2505 *status = U_UNSUPPORTED_ERROR;
2506 return NULL;
2507}
2508
2509U_INTERNAL void U_EXPORT2
2510uprv_dl_close(void *lib, UErrorCode *status) {
2ca993e8 2511 (void)lib;
729e4ab9
A
2512 if(U_FAILURE(*status)) return;
2513 *status = U_UNSUPPORTED_ERROR;
2514 return;
2515}
2516
4388f060
A
2517U_INTERNAL UVoidFunction* U_EXPORT2
2518uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2ca993e8
A
2519 (void)lib;
2520 (void)sym;
4388f060 2521 if(U_SUCCESS(*status)) {
729e4ab9 2522 *status = U_UNSUPPORTED_ERROR;
4388f060
A
2523 }
2524 return (UVoidFunction*)NULL;
729e4ab9
A
2525}
2526
0f5d89e8 2527#endif
729e4ab9 2528
b75a7d8f
A
2529/*
2530 * Hey, Emacs, please set the following:
2531 *
2532 * Local Variables:
2533 * indent-tabs-mode: nil
2534 * End:
2535 *
2536 */