]> git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/common/putil.cpp
ICU-57166.0.1.tar.gz
[apple/icu.git] / icuSources / common / putil.cpp
... / ...
CommitLineData
1/*
2******************************************************************************
3*
4* Copyright (C) 1997-2016, International Business Machines
5* Corporation and others. All Rights Reserved.
6*
7******************************************************************************
8*
9* FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
10*
11* Date Name Description
12* 04/14/97 aliu Creation.
13* 04/24/97 aliu Added getDefaultDataDirectory() and
14* getDefaultLocaleID().
15* 04/28/97 aliu Rewritten to assume Unix and apply general methods
16* for assumed case. Non-UNIX platforms must be
17* special-cased. Rewrote numeric methods dealing
18* with NaN and Infinity to be platform independent
19* over all IEEE 754 platforms.
20* 05/13/97 aliu Restored sign of timezone
21* (semantics are hours West of GMT)
22* 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
23* nextDouble..
24* 07/22/98 stephen Added remainder, max, min, trunc
25* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
26* 08/24/98 stephen Added longBitsFromDouble
27* 09/08/98 stephen Minor changes for Mac Port
28* 03/02/99 stephen Removed openFile(). Added AS400 support.
29* Fixed EBCDIC tables
30* 04/15/99 stephen Converted to C.
31* 06/28/99 stephen Removed mutex locking in u_isBigEndian().
32* 08/04/99 jeffrey R. Added OS/2 changes
33* 11/15/99 helena Integrated S/390 IEEE support.
34* 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
35* 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
36* 01/03/08 Steven L. Fake Time Support
37******************************************************************************
38*/
39
40// Defines _XOPEN_SOURCE for access to POSIX functions.
41// Must be before any other #includes.
42#include "uposixdefs.h"
43
44/* include ICU headers */
45#include "unicode/utypes.h"
46#include "unicode/putil.h"
47#include "unicode/ustring.h"
48#include "putilimp.h"
49#include "uassert.h"
50#include "umutex.h"
51#include "cmemory.h"
52#include "cstring.h"
53#include "locmap.h"
54#include "ucln_cmn.h"
55#include "charstr.h"
56
57/* Include standard headers. */
58#include <stdio.h>
59#include <stdlib.h>
60#include <string.h>
61#include <math.h>
62#include <locale.h>
63#include <float.h>
64
65#ifndef U_COMMON_IMPLEMENTATION
66#error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu
67#endif
68
69
70/* include system headers */
71#if U_PLATFORM_USES_ONLY_WIN32_API
72 /*
73 * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW.
74 * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API)
75 * to use native APIs as much as possible?
76 */
77# define WIN32_LEAN_AND_MEAN
78# define VC_EXTRALEAN
79# define NOUSER
80# define NOSERVICE
81# define NOIME
82# define NOMCX
83# include <windows.h>
84# include "wintz.h"
85#elif U_PLATFORM == U_PF_OS400
86# include <float.h>
87# include <qusec.h> /* error code structure */
88# include <qusrjobi.h>
89# include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
90# include <mih/testptr.h> /* For uprv_maximumPtr */
91#elif U_PLATFORM == U_PF_OS390
92# include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
93#elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS
94# include <limits.h>
95# include <unistd.h>
96# if U_PLATFORM == U_PF_SOLARIS
97# ifndef _XPG4_2
98# define _XPG4_2
99# endif
100# endif
101#elif U_PLATFORM == U_PF_QNX
102# include <sys/neutrino.h>
103#endif
104
105#if (U_PF_MINGW <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(__STRICT_ANSI__)
106/* tzset isn't defined in strict ANSI on Cygwin and MinGW. */
107#undef __STRICT_ANSI__
108#endif
109
110/*
111 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
112 */
113#include <time.h>
114
115#if !U_PLATFORM_USES_ONLY_WIN32_API
116#include <sys/time.h>
117#endif
118
119/*
120 * Only include langinfo.h if we have a way to get the codeset. If we later
121 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
122 *
123 */
124
125#if U_HAVE_NL_LANGINFO_CODESET
126#include <langinfo.h>
127#endif
128
129/**
130 * Simple things (presence of functions, etc) should just go in configure.in and be added to
131 * icucfg.h via autoheader.
132 */
133#if U_PLATFORM_IMPLEMENTS_POSIX
134# if U_PLATFORM == U_PF_OS400
135# define HAVE_DLFCN_H 0
136# define HAVE_DLOPEN 0
137# else
138# ifndef HAVE_DLFCN_H
139# define HAVE_DLFCN_H 1
140# endif
141# ifndef HAVE_DLOPEN
142# define HAVE_DLOPEN 1
143# endif
144# endif
145# ifndef HAVE_GETTIMEOFDAY
146# define HAVE_GETTIMEOFDAY 1
147# endif
148#else
149# define HAVE_DLFCN_H 0
150# define HAVE_DLOPEN 0
151# define HAVE_GETTIMEOFDAY 0
152#endif
153
154U_NAMESPACE_USE
155
156/* Define the extension for data files, again... */
157#define DATA_TYPE "dat"
158
159/* Leave this copyright notice here! */
160static const char copyright[] = U_COPYRIGHT_STRING;
161
162/* floating point implementations ------------------------------------------- */
163
164/* We return QNAN rather than SNAN*/
165#define SIGN 0x80000000U
166
167/* Make it easy to define certain types of constants */
168typedef union {
169 int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
170 double d64;
171} BitPatternConversion;
172static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
173static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
174
175/*---------------------------------------------------------------------------
176 Platform utilities
177 Our general strategy is to assume we're on a POSIX platform. Platforms which
178 are non-POSIX must declare themselves so. The default POSIX implementation
179 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
180 functions).
181 ---------------------------------------------------------------------------*/
182
183#if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400
184# undef U_POSIX_LOCALE
185#else
186# define U_POSIX_LOCALE 1
187#endif
188
189/*
190 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
191 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
192*/
193#if !IEEE_754
194static char*
195u_topNBytesOfDouble(double* d, int n)
196{
197#if U_IS_BIG_ENDIAN
198 return (char*)d;
199#else
200 return (char*)(d + 1) - n;
201#endif
202}
203
204static char*
205u_bottomNBytesOfDouble(double* d, int n)
206{
207#if U_IS_BIG_ENDIAN
208 return (char*)(d + 1) - n;
209#else
210 return (char*)d;
211#endif
212}
213#endif /* !IEEE_754 */
214
215#if IEEE_754
216static UBool
217u_signBit(double d) {
218 uint8_t hiByte;
219#if U_IS_BIG_ENDIAN
220 hiByte = *(uint8_t *)&d;
221#else
222 hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
223#endif
224 return (hiByte & 0x80) != 0;
225}
226#endif
227
228
229
230#if defined (U_DEBUG_FAKETIME)
231/* Override the clock to test things without having to move the system clock.
232 * Assumes POSIX gettimeofday() will function
233 */
234UDate fakeClock_t0 = 0; /** Time to start the clock from **/
235UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
236UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
237static UMutex fakeClockMutex = U_MUTEX_INTIALIZER;
238
239static UDate getUTCtime_real() {
240 struct timeval posixTime;
241 gettimeofday(&posixTime, NULL);
242 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
243}
244
245static UDate getUTCtime_fake() {
246 umtx_lock(&fakeClockMutex);
247 if(!fakeClock_set) {
248 UDate real = getUTCtime_real();
249 const char *fake_start = getenv("U_FAKETIME_START");
250 if((fake_start!=NULL) && (fake_start[0]!=0)) {
251 sscanf(fake_start,"%lf",&fakeClock_t0);
252 fakeClock_dt = fakeClock_t0 - real;
253 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
254 "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
255 fakeClock_t0, fake_start, fakeClock_dt, real);
256 } else {
257 fakeClock_dt = 0;
258 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
259 "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
260 }
261 fakeClock_set = TRUE;
262 }
263 umtx_unlock(&fakeClockMutex);
264
265 return getUTCtime_real() + fakeClock_dt;
266}
267#endif
268
269#if U_PLATFORM_USES_ONLY_WIN32_API
270typedef union {
271 int64_t int64;
272 FILETIME fileTime;
273} FileTimeConversion; /* This is like a ULARGE_INTEGER */
274
275/* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
276#define EPOCH_BIAS INT64_C(116444736000000000)
277#define HECTONANOSECOND_PER_MILLISECOND 10000
278
279#endif
280
281/*---------------------------------------------------------------------------
282 Universal Implementations
283 These are designed to work on all platforms. Try these, and if they
284 don't work on your platform, then special case your platform with new
285 implementations.
286---------------------------------------------------------------------------*/
287
288U_CAPI UDate U_EXPORT2
289uprv_getUTCtime()
290{
291#if defined(U_DEBUG_FAKETIME)
292 return getUTCtime_fake(); /* Hook for overriding the clock */
293#else
294 return uprv_getRawUTCtime();
295#endif
296}
297
298/* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
299U_CAPI UDate U_EXPORT2
300uprv_getRawUTCtime()
301{
302#if U_PLATFORM_USES_ONLY_WIN32_API
303
304 FileTimeConversion winTime;
305 GetSystemTimeAsFileTime(&winTime.fileTime);
306 return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
307#else
308
309#if HAVE_GETTIMEOFDAY
310 struct timeval posixTime;
311 gettimeofday(&posixTime, NULL);
312 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
313#else
314 time_t epochtime;
315 time(&epochtime);
316 return (UDate)epochtime * U_MILLIS_PER_SECOND;
317#endif
318
319#endif
320}
321
322/*-----------------------------------------------------------------------------
323 IEEE 754
324 These methods detect and return NaN and infinity values for doubles
325 conforming to IEEE 754. Platforms which support this standard include X86,
326 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
327 If this doesn't work on your platform, you have non-IEEE floating-point, and
328 will need to code your own versions. A naive implementation is to return 0.0
329 for getNaN and getInfinity, and false for isNaN and isInfinite.
330 ---------------------------------------------------------------------------*/
331
332U_CAPI UBool U_EXPORT2
333uprv_isNaN(double number)
334{
335#if IEEE_754
336 BitPatternConversion convertedNumber;
337 convertedNumber.d64 = number;
338 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
339 return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
340
341#elif U_PLATFORM == U_PF_OS390
342 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
343 sizeof(uint32_t));
344 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
345 sizeof(uint32_t));
346
347 return ((highBits & 0x7F080000L) == 0x7F080000L) &&
348 (lowBits == 0x00000000L);
349
350#else
351 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
352 /* you'll need to replace this default implementation with what's correct*/
353 /* for your platform.*/
354 return number != number;
355#endif
356}
357
358U_CAPI UBool U_EXPORT2
359uprv_isInfinite(double number)
360{
361#if IEEE_754
362 BitPatternConversion convertedNumber;
363 convertedNumber.d64 = number;
364 /* Infinity is exactly 0x7FF0000000000000U. */
365 return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
366#elif U_PLATFORM == U_PF_OS390
367 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
368 sizeof(uint32_t));
369 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
370 sizeof(uint32_t));
371
372 return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
373
374#else
375 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
376 /* value, you'll need to replace this default implementation with what's*/
377 /* correct for your platform.*/
378 return number == (2.0 * number);
379#endif
380}
381
382U_CAPI UBool U_EXPORT2
383uprv_isPositiveInfinity(double number)
384{
385#if IEEE_754 || U_PLATFORM == U_PF_OS390
386 return (UBool)(number > 0 && uprv_isInfinite(number));
387#else
388 return uprv_isInfinite(number);
389#endif
390}
391
392U_CAPI UBool U_EXPORT2
393uprv_isNegativeInfinity(double number)
394{
395#if IEEE_754 || U_PLATFORM == U_PF_OS390
396 return (UBool)(number < 0 && uprv_isInfinite(number));
397
398#else
399 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
400 sizeof(uint32_t));
401 return((highBits & SIGN) && uprv_isInfinite(number));
402
403#endif
404}
405
406U_CAPI double U_EXPORT2
407uprv_getNaN()
408{
409#if IEEE_754 || U_PLATFORM == U_PF_OS390
410 return gNan.d64;
411#else
412 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
413 /* you'll need to replace this default implementation with what's correct*/
414 /* for your platform.*/
415 return 0.0;
416#endif
417}
418
419U_CAPI double U_EXPORT2
420uprv_getInfinity()
421{
422#if IEEE_754 || U_PLATFORM == U_PF_OS390
423 return gInf.d64;
424#else
425 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
426 /* value, you'll need to replace this default implementation with what's*/
427 /* correct for your platform.*/
428 return 0.0;
429#endif
430}
431
432U_CAPI double U_EXPORT2
433uprv_floor(double x)
434{
435 return floor(x);
436}
437
438U_CAPI double U_EXPORT2
439uprv_ceil(double x)
440{
441 return ceil(x);
442}
443
444U_CAPI double U_EXPORT2
445uprv_round(double x)
446{
447 return uprv_floor(x + 0.5);
448}
449
450U_CAPI double U_EXPORT2
451uprv_fabs(double x)
452{
453 return fabs(x);
454}
455
456U_CAPI double U_EXPORT2
457uprv_modf(double x, double* y)
458{
459 return modf(x, y);
460}
461
462U_CAPI double U_EXPORT2
463uprv_fmod(double x, double y)
464{
465 return fmod(x, y);
466}
467
468U_CAPI double U_EXPORT2
469uprv_pow(double x, double y)
470{
471 /* This is declared as "double pow(double x, double y)" */
472 return pow(x, y);
473}
474
475U_CAPI double U_EXPORT2
476uprv_pow10(int32_t x)
477{
478 return pow(10.0, (double)x);
479}
480
481U_CAPI double U_EXPORT2
482uprv_fmax(double x, double y)
483{
484#if IEEE_754
485 /* first handle NaN*/
486 if(uprv_isNaN(x) || uprv_isNaN(y))
487 return uprv_getNaN();
488
489 /* check for -0 and 0*/
490 if(x == 0.0 && y == 0.0 && u_signBit(x))
491 return y;
492
493#endif
494
495 /* this should work for all flt point w/o NaN and Inf special cases */
496 return (x > y ? x : y);
497}
498
499U_CAPI double U_EXPORT2
500uprv_fmin(double x, double y)
501{
502#if IEEE_754
503 /* first handle NaN*/
504 if(uprv_isNaN(x) || uprv_isNaN(y))
505 return uprv_getNaN();
506
507 /* check for -0 and 0*/
508 if(x == 0.0 && y == 0.0 && u_signBit(y))
509 return y;
510
511#endif
512
513 /* this should work for all flt point w/o NaN and Inf special cases */
514 return (x > y ? y : x);
515}
516
517/**
518 * Truncates the given double.
519 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
520 * This is different than calling floor() or ceil():
521 * floor(3.3) = 3, floor(-3.3) = -4
522 * ceil(3.3) = 4, ceil(-3.3) = -3
523 */
524U_CAPI double U_EXPORT2
525uprv_trunc(double d)
526{
527#if IEEE_754
528 /* handle error cases*/
529 if(uprv_isNaN(d))
530 return uprv_getNaN();
531 if(uprv_isInfinite(d))
532 return uprv_getInfinity();
533
534 if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */
535 return ceil(d);
536 else
537 return floor(d);
538
539#else
540 return d >= 0 ? floor(d) : ceil(d);
541
542#endif
543}
544
545/**
546 * Return the largest positive number that can be represented by an integer
547 * type of arbitrary bit length.
548 */
549U_CAPI double U_EXPORT2
550uprv_maxMantissa(void)
551{
552 return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
553}
554
555U_CAPI double U_EXPORT2
556uprv_log(double d)
557{
558 return log(d);
559}
560
561U_CAPI void * U_EXPORT2
562uprv_maximumPtr(void * base)
563{
564#if U_PLATFORM == U_PF_OS400
565 /*
566 * With the provided function we should never be out of range of a given segment
567 * (a traditional/typical segment that is). Our segments have 5 bytes for the
568 * id and 3 bytes for the offset. The key is that the casting takes care of
569 * only retrieving the offset portion minus x1000. Hence, the smallest offset
570 * seen in a program is x001000 and when casted to an int would be 0.
571 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment.
572 *
573 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
574 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information).
575 * This function determines the activation based on the pointer that is passed in and
576 * calculates the appropriate maximum available size for
577 * each pointer type (TERASPACE and non-TERASPACE)
578 *
579 * Unlike other operating systems, the pointer model isn't determined at
580 * compile time on i5/OS.
581 */
582 if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
583 /* if it is a TERASPACE pointer the max is 2GB - 4k */
584 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
585 }
586 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
587 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
588
589#else
590 return U_MAX_PTR(base);
591#endif
592}
593
594/*---------------------------------------------------------------------------
595 Platform-specific Implementations
596 Try these, and if they don't work on your platform, then special case your
597 platform with new implementations.
598 ---------------------------------------------------------------------------*/
599
600/* Generic time zone layer -------------------------------------------------- */
601
602/* Time zone utilities */
603U_CAPI void U_EXPORT2
604uprv_tzset()
605{
606#if defined(U_TZSET)
607 U_TZSET();
608#else
609 /* no initialization*/
610#endif
611}
612
613U_CAPI int32_t U_EXPORT2
614uprv_timezone()
615{
616#ifdef U_TIMEZONE
617 return U_TIMEZONE;
618#else
619 time_t t, t1, t2;
620 struct tm tmrec;
621 int32_t tdiff = 0;
622
623 time(&t);
624 uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
625#if U_PLATFORM != U_PF_IPHONE
626 UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
627#endif
628 t1 = mktime(&tmrec); /* local time in seconds*/
629 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
630 t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/
631 tdiff = t2 - t1;
632
633#if U_PLATFORM != U_PF_IPHONE
634 /* imitate NT behaviour, which returns same timezone offset to GMT for
635 winter and summer.
636 This does not work on all platforms. For instance, on glibc on Linux
637 and on Mac OS 10.5, tdiff calculated above remains the same
638 regardless of whether DST is in effect or not. iOS is another
639 platform where this does not work. Linux + glibc and Mac OS 10.5
640 have U_TIMEZONE defined so that this code is not reached.
641 */
642 if (dst_checked)
643 tdiff += 3600;
644#endif
645 return tdiff;
646#endif
647}
648
649/* Note that U_TZNAME does *not* have to be tzname, but if it is,
650 some platforms need to have it declared here. */
651
652#if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED || (U_PLATFORM == U_PF_CYGWIN && !U_PLATFORM_USES_ONLY_WIN32_API))
653/* RS6000 and others reject char **tzname. */
654extern U_IMPORT char *U_TZNAME[];
655#endif
656
657#if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
658/* These platforms are likely to use Olson timezone IDs. */
659#define CHECK_LOCALTIME_LINK 1
660#if U_PLATFORM_IS_DARWIN_BASED
661#include <tzfile.h>
662#define TZZONEINFO (TZDIR "/")
663#elif U_PLATFORM == U_PF_SOLARIS
664#define TZDEFAULT "/etc/localtime"
665#define TZZONEINFO "/usr/share/lib/zoneinfo/"
666#define TZZONEINFO2 "../usr/share/lib/zoneinfo/"
667#define TZ_ENV_CHECK "localtime"
668#else
669#define TZDEFAULT "/etc/localtime"
670#define TZZONEINFO "/usr/share/zoneinfo/"
671#endif
672#if U_HAVE_DIRENT_H
673#define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */
674/* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
675 symlinked to /etc/localtime, which makes searchForTZFile return
676 'localtime' when it's the first match. */
677#define TZFILE_SKIP2 "localtime"
678#define SEARCH_TZFILE
679#include <dirent.h> /* Needed to search through system timezone files */
680#endif
681static char gTimeZoneBuffer[PATH_MAX];
682static char *gTimeZoneBufferPtr = NULL;
683#endif
684
685#if !U_PLATFORM_USES_ONLY_WIN32_API
686#define isNonDigit(ch) (ch < '0' || '9' < ch)
687static UBool isValidOlsonID(const char *id) {
688 int32_t idx = 0;
689
690 /* Determine if this is something like Iceland (Olson ID)
691 or AST4ADT (non-Olson ID) */
692 while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
693 idx++;
694 }
695
696 /* If we went through the whole string, then it might be okay.
697 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
698 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
699 The rest of the time it could be an Olson ID. George */
700 return (UBool)(id[idx] == 0
701 || uprv_strcmp(id, "PST8PDT") == 0
702 || uprv_strcmp(id, "MST7MDT") == 0
703 || uprv_strcmp(id, "CST6CDT") == 0
704 || uprv_strcmp(id, "EST5EDT") == 0);
705}
706
707/* On some Unix-like OS, 'posix' subdirectory in
708 /usr/share/zoneinfo replicates the top-level contents. 'right'
709 subdirectory has the same set of files, but individual files
710 are different from those in the top-level directory or 'posix'
711 because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
712 has files for UTC.
713 When the first match for /etc/localtime is in either of them
714 (usually in posix because 'right' has different file contents),
715 or TZ environment variable points to one of them, createTimeZone
716 fails because, say, 'posix/America/New_York' is not an Olson
717 timezone id ('America/New_York' is). So, we have to skip
718 'posix/' and 'right/' at the beginning. */
719static void skipZoneIDPrefix(const char** id) {
720 if (uprv_strncmp(*id, "posix/", 6) == 0
721 || uprv_strncmp(*id, "right/", 6) == 0)
722 {
723 *id += 6;
724 }
725}
726#endif
727
728#if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API
729
730#define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
731typedef struct OffsetZoneMapping {
732 int32_t offsetSeconds;
733 int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/
734 const char *stdID;
735 const char *dstID;
736 const char *olsonID;
737} OffsetZoneMapping;
738
739enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 };
740
741/*
742This list tries to disambiguate a set of abbreviated timezone IDs and offsets
743and maps it to an Olson ID.
744Before adding anything to this list, take a look at
745icu/source/tools/tzcode/tz.alias
746Sometimes no daylight savings (0) is important to define due to aliases.
747This list can be tested with icu/source/test/compat/tzone.pl
748More values could be added to daylightType to increase precision.
749*/
750static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
751 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
752 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
753 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
754 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
755 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
756 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
757 {-36000, 2, "EST", "EST", "Australia/Sydney"},
758 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
759 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
760 {-34200, 2, "CST", "CST", "Australia/South"},
761 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
762 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
763 {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
764 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
765 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
766 {-28800, 2, "WST", "WST", "Australia/West"},
767 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
768 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
769 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
770 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
771 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
772 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
773 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
774 {-14400, 1, "AZT", "AZST", "Asia/Baku"},
775 {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
776 {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
777 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
778 {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
779 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
780 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
781 {-3600, 0, "CET", "WEST", "Africa/Algiers"},
782 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
783 {0, 1, "GMT", "IST", "Europe/Dublin"},
784 {0, 1, "GMT", "BST", "Europe/London"},
785 {0, 0, "WET", "WEST", "Africa/Casablanca"},
786 {0, 0, "WET", "WET", "Africa/El_Aaiun"},
787 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
788 {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
789 {10800, 1, "PMST", "PMDT", "America/Miquelon"},
790 {10800, 2, "UYT", "UYST", "America/Montevideo"},
791 {10800, 1, "WGT", "WGST", "America/Godthab"},
792 {10800, 2, "BRT", "BRST", "Brazil/East"},
793 {12600, 1, "NST", "NDT", "America/St_Johns"},
794 {14400, 1, "AST", "ADT", "Canada/Atlantic"},
795 {14400, 2, "AMT", "AMST", "America/Cuiaba"},
796 {14400, 2, "CLT", "CLST", "Chile/Continental"},
797 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
798 {14400, 2, "PYT", "PYST", "America/Asuncion"},
799 {18000, 1, "CST", "CDT", "America/Havana"},
800 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
801 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
802 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
803 {21600, 0, "CST", "CDT", "America/Guatemala"},
804 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
805 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
806 {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
807 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
808 {32400, 1, "AKST", "AKDT", "US/Alaska"},
809 {36000, 1, "HAST", "HADT", "US/Aleutian"}
810};
811
812/*#define DEBUG_TZNAME*/
813
814static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
815{
816 int32_t idx;
817#ifdef DEBUG_TZNAME
818 fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
819#endif
820 for (idx = 0; idx < UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++)
821 {
822 if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
823 && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
824 && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
825 && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
826 {
827 return OFFSET_ZONE_MAPPINGS[idx].olsonID;
828 }
829 }
830 return NULL;
831}
832#endif
833
834#ifdef SEARCH_TZFILE
835#define MAX_PATH_SIZE PATH_MAX /* Set the limit for the size of the path. */
836#define MAX_READ_SIZE 512
837
838typedef struct DefaultTZInfo {
839 char* defaultTZBuffer;
840 int64_t defaultTZFileSize;
841 FILE* defaultTZFilePtr;
842 UBool defaultTZstatus;
843 int32_t defaultTZPosition;
844} DefaultTZInfo;
845
846/*
847 * This method compares the two files given to see if they are a match.
848 * It is currently use to compare two TZ files.
849 */
850static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
851 FILE* file;
852 int64_t sizeFile;
853 int64_t sizeFileLeft;
854 int32_t sizeFileRead;
855 int32_t sizeFileToRead;
856 char bufferFile[MAX_READ_SIZE];
857 UBool result = TRUE;
858
859 if (tzInfo->defaultTZFilePtr == NULL) {
860 tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
861 }
862 file = fopen(TZFileName, "r");
863
864 tzInfo->defaultTZPosition = 0; /* reset position to begin search */
865
866 if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
867 /* First check that the file size are equal. */
868 if (tzInfo->defaultTZFileSize == 0) {
869 fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
870 tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
871 }
872 fseek(file, 0, SEEK_END);
873 sizeFile = ftell(file);
874 sizeFileLeft = sizeFile;
875
876 if (sizeFile != tzInfo->defaultTZFileSize) {
877 result = FALSE;
878 } else {
879 /* Store the data from the files in seperate buffers and
880 * compare each byte to determine equality.
881 */
882 if (tzInfo->defaultTZBuffer == NULL) {
883 rewind(tzInfo->defaultTZFilePtr);
884 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
885 sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
886 }
887 rewind(file);
888 while(sizeFileLeft > 0) {
889 uprv_memset(bufferFile, 0, MAX_READ_SIZE);
890 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
891
892 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
893 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
894 result = FALSE;
895 break;
896 }
897 sizeFileLeft -= sizeFileRead;
898 tzInfo->defaultTZPosition += sizeFileRead;
899 }
900 }
901 } else {
902 result = FALSE;
903 }
904
905 if (file != NULL) {
906 fclose(file);
907 }
908
909 return result;
910}
911/*
912 * This method recursively traverses the directory given for a matching TZ file and returns the first match.
913 */
914/* dirent also lists two entries: "." and ".." that we can safely ignore. */
915#define SKIP1 "."
916#define SKIP2 ".."
917static char SEARCH_TZFILE_RESULT[MAX_PATH_SIZE] = "";
918static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
919 char curpath[MAX_PATH_SIZE];
920 DIR* dirp = opendir(path);
921 DIR* subDirp = NULL;
922 struct dirent* dirEntry = NULL;
923
924 char* result = NULL;
925 if (dirp == NULL) {
926 return result;
927 }
928
929 /* Save the current path */
930 uprv_memset(curpath, 0, MAX_PATH_SIZE);
931 uprv_strcpy(curpath, path);
932
933 /* Check each entry in the directory. */
934 while((dirEntry = readdir(dirp)) != NULL) {
935 const char* dirName = dirEntry->d_name;
936 if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) {
937 /* Create a newpath with the new entry to test each entry in the directory. */
938 char newpath[MAX_PATH_SIZE];
939 uprv_strcpy(newpath, curpath);
940 uprv_strcat(newpath, dirName);
941
942 if ((subDirp = opendir(newpath)) != NULL) {
943 /* If this new path is a directory, make a recursive call with the newpath. */
944 closedir(subDirp);
945 uprv_strcat(newpath, "/");
946 result = searchForTZFile(newpath, tzInfo);
947 /*
948 Have to get out here. Otherwise, we'd keep looking
949 and return the first match in the top-level directory
950 if there's a match in the top-level. If not, this function
951 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
952 It worked without this in most cases because we have a fallback of calling
953 localtime_r to figure out the default timezone.
954 */
955 if (result != NULL)
956 break;
957 } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
958 if(compareBinaryFiles(TZDEFAULT, newpath, tzInfo)) {
959 const char* zoneid = newpath + (sizeof(TZZONEINFO)) - 1;
960 skipZoneIDPrefix(&zoneid);
961 uprv_strcpy(SEARCH_TZFILE_RESULT, zoneid);
962 result = SEARCH_TZFILE_RESULT;
963 /* Get out after the first one found. */
964 break;
965 }
966 }
967 }
968 }
969 closedir(dirp);
970 return result;
971}
972#endif
973U_CAPI const char* U_EXPORT2
974uprv_tzname(int n)
975{
976 const char *tzid = NULL;
977#if U_PLATFORM_USES_ONLY_WIN32_API
978 tzid = uprv_detectWindowsTimeZone();
979
980 if (tzid != NULL) {
981 return tzid;
982 }
983#else
984
985/*#if U_PLATFORM_IS_DARWIN_BASED
986 int ret;
987
988 tzid = getenv("TZFILE");
989 if (tzid != NULL) {
990 return tzid;
991 }
992#endif*/
993
994/* This code can be temporarily disabled to test tzname resolution later on. */
995#ifndef DEBUG_TZNAME
996 tzid = getenv("TZ");
997 if (tzid != NULL && isValidOlsonID(tzid)
998#if U_PLATFORM == U_PF_SOLARIS
999 /* When TZ equals localtime on Solaris, check the /etc/localtime file. */
1000 && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0
1001#endif
1002 ) {
1003 /* The colon forces tzset() to treat the remainder as zoneinfo path */
1004 if (tzid[0] == ':') {
1005 tzid++;
1006 }
1007 /* This might be a good Olson ID. */
1008 skipZoneIDPrefix(&tzid);
1009 return tzid;
1010 }
1011 /* else U_TZNAME will give a better result. */
1012#endif
1013
1014#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1015 /* Caller must handle threading issues */
1016 if (gTimeZoneBufferPtr == NULL) {
1017 /*
1018 This is a trick to look at the name of the link to get the Olson ID
1019 because the tzfile contents is underspecified.
1020 This isn't guaranteed to work because it may not be a symlink.
1021 */
1022 int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer));
1023 if (0 < ret) {
1024 int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO);
1025 gTimeZoneBuffer[ret] = 0;
1026 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0
1027 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
1028 {
1029 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
1030 }
1031#if U_PLATFORM == U_PF_SOLARIS
1032 else
1033 {
1034 tzZoneInfoLen = uprv_strlen(TZZONEINFO2);
1035 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO2, tzZoneInfoLen) == 0
1036 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
1037 {
1038 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
1039 }
1040 }
1041#endif
1042 } else {
1043#if defined(SEARCH_TZFILE)
1044 DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
1045 if (tzInfo != NULL) {
1046 tzInfo->defaultTZBuffer = NULL;
1047 tzInfo->defaultTZFileSize = 0;
1048 tzInfo->defaultTZFilePtr = NULL;
1049 tzInfo->defaultTZstatus = FALSE;
1050 tzInfo->defaultTZPosition = 0;
1051
1052 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
1053
1054 /* Free previously allocated memory */
1055 if (tzInfo->defaultTZBuffer != NULL) {
1056 uprv_free(tzInfo->defaultTZBuffer);
1057 }
1058 if (tzInfo->defaultTZFilePtr != NULL) {
1059 fclose(tzInfo->defaultTZFilePtr);
1060 }
1061 uprv_free(tzInfo);
1062 }
1063
1064 if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
1065 return gTimeZoneBufferPtr;
1066 }
1067#endif
1068 }
1069 }
1070 else {
1071 return gTimeZoneBufferPtr;
1072 }
1073#endif
1074#endif
1075
1076#ifdef U_TZNAME
1077#if U_PLATFORM_USES_ONLY_WIN32_API
1078 /* The return value is free'd in timezone.cpp on Windows because
1079 * the other code path returns a pointer to a heap location. */
1080 return uprv_strdup(U_TZNAME[n]);
1081#else
1082 /*
1083 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1084 So we remap the abbreviation to an olson ID.
1085
1086 Since Windows exposes a little more timezone information,
1087 we normally don't use this code on Windows because
1088 uprv_detectWindowsTimeZone should have already given the correct answer.
1089 */
1090 {
1091 struct tm juneSol, decemberSol;
1092 int daylightType;
1093 static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
1094 static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
1095
1096 /* This probing will tell us when daylight savings occurs. */
1097 localtime_r(&juneSolstice, &juneSol);
1098 localtime_r(&decemberSolstice, &decemberSol);
1099 if(decemberSol.tm_isdst > 0) {
1100 daylightType = U_DAYLIGHT_DECEMBER;
1101 } else if(juneSol.tm_isdst > 0) {
1102 daylightType = U_DAYLIGHT_JUNE;
1103 } else {
1104 daylightType = U_DAYLIGHT_NONE;
1105 }
1106 tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
1107 if (tzid != NULL) {
1108 return tzid;
1109 }
1110 }
1111 return U_TZNAME[n];
1112#endif
1113#else
1114 return "";
1115#endif
1116}
1117
1118/* Get and set the ICU data directory --------------------------------------- */
1119
1120static icu::UInitOnce gDataDirInitOnce = U_INITONCE_INITIALIZER;
1121static char *gDataDirectory = NULL;
1122
1123UInitOnce gTimeZoneFilesInitOnce = U_INITONCE_INITIALIZER;
1124static CharString *gTimeZoneFilesDirectory = NULL;
1125
1126#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1127 static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
1128#endif
1129
1130static UBool U_CALLCONV putil_cleanup(void)
1131{
1132 if (gDataDirectory && *gDataDirectory) {
1133 uprv_free(gDataDirectory);
1134 }
1135 gDataDirectory = NULL;
1136 gDataDirInitOnce.reset();
1137
1138 delete gTimeZoneFilesDirectory;
1139 gTimeZoneFilesDirectory = NULL;
1140 gTimeZoneFilesInitOnce.reset();
1141
1142#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1143 if (gCorrectedPOSIXLocale) {
1144 uprv_free(gCorrectedPOSIXLocale);
1145 gCorrectedPOSIXLocale = NULL;
1146 }
1147#endif
1148 return TRUE;
1149}
1150
1151/*
1152 * Set the data directory.
1153 * Make a copy of the passed string, and set the global data dir to point to it.
1154 */
1155U_CAPI void U_EXPORT2
1156u_setDataDirectory(const char *directory) {
1157 char *newDataDir;
1158 int32_t length;
1159
1160 if(directory==NULL || *directory==0) {
1161 /* A small optimization to prevent the malloc and copy when the
1162 shared library is used, and this is a way to make sure that NULL
1163 is never returned.
1164 */
1165 newDataDir = (char *)"";
1166 }
1167 else {
1168 length=(int32_t)uprv_strlen(directory);
1169 newDataDir = (char *)uprv_malloc(length + 2);
1170 /* Exit out if newDataDir could not be created. */
1171 if (newDataDir == NULL) {
1172 return;
1173 }
1174 uprv_strcpy(newDataDir, directory);
1175
1176#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1177 {
1178 char *p;
1179 while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
1180 *p = U_FILE_SEP_CHAR;
1181 }
1182 }
1183#endif
1184 }
1185
1186 if (gDataDirectory && *gDataDirectory) {
1187 uprv_free(gDataDirectory);
1188 }
1189 gDataDirectory = newDataDir;
1190 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1191}
1192
1193U_CAPI UBool U_EXPORT2
1194uprv_pathIsAbsolute(const char *path)
1195{
1196 if(!path || !*path) {
1197 return FALSE;
1198 }
1199
1200 if(*path == U_FILE_SEP_CHAR) {
1201 return TRUE;
1202 }
1203
1204#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1205 if(*path == U_FILE_ALT_SEP_CHAR) {
1206 return TRUE;
1207 }
1208#endif
1209
1210#if U_PLATFORM_USES_ONLY_WIN32_API
1211 if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1212 ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1213 path[1] == ':' ) {
1214 return TRUE;
1215 }
1216#endif
1217
1218 return FALSE;
1219}
1220
1221/* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1222 until some client wrapper makefiles are updated */
1223#if U_PLATFORM_IS_DARWIN_BASED && TARGET_IPHONE_SIMULATOR
1224# if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1225# define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1226# endif
1227#endif
1228
1229static void U_CALLCONV dataDirectoryInitFn() {
1230 /* If we already have the directory, then return immediately. Will happen if user called
1231 * u_setDataDirectory().
1232 */
1233 if (gDataDirectory) {
1234 return;
1235 }
1236
1237 const char *path = NULL;
1238#if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1239 char datadir_path_buffer[PATH_MAX];
1240#endif
1241
1242 /*
1243 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1244 override ICU's data with the ICU_DATA environment variable. This prevents
1245 problems where multiple custom copies of ICU's specific version of data
1246 are installed on a system. Either the application must define the data
1247 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1248 ICU, set the data with udata_setCommonData or trust that all of the
1249 required data is contained in ICU's data library that contains
1250 the entry point defined by U_ICUDATA_ENTRY_POINT.
1251
1252 There may also be some platforms where environment variables
1253 are not allowed.
1254 */
1255# if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1256 /* First try to get the environment variable */
1257 path=getenv("ICU_DATA");
1258# endif
1259
1260 /* ICU_DATA_DIR may be set as a compile option.
1261 * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1262 * and is used only when data is built in archive mode eliminating the need
1263 * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1264 * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1265 * set their own path.
1266 */
1267#if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
1268 if(path==NULL || *path==0) {
1269# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1270 const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
1271# endif
1272# ifdef ICU_DATA_DIR
1273 path=ICU_DATA_DIR;
1274# else
1275 path=U_ICU_DATA_DEFAULT_DIR;
1276# endif
1277# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1278 if (prefix != NULL) {
1279 snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path);
1280 path=datadir_path_buffer;
1281 }
1282# endif
1283 }
1284#endif
1285
1286 if(path==NULL) {
1287 /* It looks really bad, set it to something. */
1288 path = "";
1289 }
1290
1291 u_setDataDirectory(path);
1292 return;
1293}
1294
1295U_CAPI const char * U_EXPORT2
1296u_getDataDirectory(void) {
1297 umtx_initOnce(gDataDirInitOnce, &dataDirectoryInitFn);
1298 return gDataDirectory;
1299}
1300
1301static void setTimeZoneFilesDir(const char *path, UErrorCode &status) {
1302 if (U_FAILURE(status)) {
1303 return;
1304 }
1305 gTimeZoneFilesDirectory->clear();
1306 gTimeZoneFilesDirectory->append(path, status);
1307#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1308 char *p = gTimeZoneFilesDirectory->data();
1309 while (p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) {
1310 *p = U_FILE_SEP_CHAR;
1311 }
1312#endif
1313}
1314
1315#if U_PLATFORM_IMPLEMENTS_POSIX
1316#include <sys/stat.h>
1317#if defined(U_TIMEZONE_FILES_DIR)
1318const char tzdirbuf[] = U_TIMEZONE_FILES_DIR;
1319enum { kTzfilenamebufLen = UPRV_LENGTHOF(tzdirbuf) + 24 }; // extra room for "/icutz44l.dat" or "/zoneinfo64.res"
1320#endif
1321#endif
1322
1323#define TO_STRING(x) TO_STRING_2(x)
1324#define TO_STRING_2(x) #x
1325
1326static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) {
1327 U_ASSERT(gTimeZoneFilesDirectory == NULL);
1328 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1329 gTimeZoneFilesDirectory = new CharString();
1330 if (gTimeZoneFilesDirectory == NULL) {
1331 status = U_MEMORY_ALLOCATION_ERROR;
1332 return;
1333 }
1334 const char *dir = getenv("ICU_TIMEZONE_FILES_DIR");
1335 UBool usingUTzFilesDir = FALSE;
1336#if defined(U_TIMEZONE_FILES_DIR)
1337 if (dir == NULL) {
1338 // dir = TO_STRING(U_TIMEZONE_FILES_DIR);
1339 // Not sure why the above was done for this path only;
1340 // it preserves unwanted quotes.
1341 dir = tzdirbuf;
1342 usingUTzFilesDir = TRUE;
1343 }
1344#endif
1345#if U_PLATFORM_IMPLEMENTS_POSIX
1346 if (dir != NULL) {
1347 struct stat buf;
1348 if (stat(dir, &buf) != 0) {
1349 dir = NULL;
1350 }
1351#if defined(U_TIMEZONE_FILES_DIR)
1352 else if (usingUTzFilesDir) {
1353 char tzfilenamebuf[kTzfilenamebufLen];
1354 uprv_strcpy(tzfilenamebuf, tzdirbuf);
1355 uprv_strcat(tzfilenamebuf, U_FILE_SEP_STRING);
1356#if defined(U_TIMEZONE_PACKAGE)
1357 uprv_strcat(tzfilenamebuf, U_TIMEZONE_PACKAGE);
1358 uprv_strcat(tzfilenamebuf, ".dat");
1359#else
1360 uprv_strcat(tzfilenamebuf, "zoneinfo64.res");
1361#endif
1362 if (stat(tzfilenamebuf, &buf) != 0) {
1363 dir = NULL;
1364 }
1365 }
1366#endif /* defined(U_TIMEZONE_FILES_DIR) */
1367 }
1368#endif /* U_PLATFORM_IMPLEMENTS_POSIX */
1369 if (dir == NULL) {
1370 dir = "";
1371 }
1372 setTimeZoneFilesDir(dir, status);
1373}
1374
1375
1376U_CAPI const char * U_EXPORT2
1377u_getTimeZoneFilesDirectory(UErrorCode *status) {
1378 umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1379 return U_SUCCESS(*status) ? gTimeZoneFilesDirectory->data() : "";
1380}
1381
1382U_CAPI void U_EXPORT2
1383u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status) {
1384 umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1385 setTimeZoneFilesDir(path, *status);
1386
1387 // Note: this function does some extra churn, first setting based on the
1388 // environment, then immediately replacing with the value passed in.
1389 // The logic is simpler that way, and performance shouldn't be an issue.
1390}
1391
1392
1393#if U_POSIX_LOCALE
1394/* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1395 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1396 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1397 */
1398static const char *uprv_getPOSIXIDForCategory(int category)
1399{
1400 const char* posixID = NULL;
1401 if (category == LC_MESSAGES || category == LC_CTYPE) {
1402 /*
1403 * On Solaris two different calls to setlocale can result in
1404 * different values. Only get this value once.
1405 *
1406 * We must check this first because an application can set this.
1407 *
1408 * LC_ALL can't be used because it's platform dependent. The LANG
1409 * environment variable seems to affect LC_CTYPE variable by default.
1410 * Here is what setlocale(LC_ALL, NULL) can return.
1411 * HPUX can return 'C C C C C C C'
1412 * Solaris can return /en_US/C/C/C/C/C on the second try.
1413 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1414 *
1415 * The default codepage detection also needs to use LC_CTYPE.
1416 *
1417 * Do not call setlocale(LC_*, "")! Using an empty string instead
1418 * of NULL, will modify the libc behavior.
1419 */
1420 posixID = setlocale(category, NULL);
1421 if ((posixID == 0)
1422 || (uprv_strcmp("C", posixID) == 0)
1423 || (uprv_strcmp("POSIX", posixID) == 0))
1424 {
1425 /* Maybe we got some garbage. Try something more reasonable */
1426 posixID = getenv("LC_ALL");
1427 /* Solaris speaks POSIX - See IEEE Std 1003.1-2008
1428 * This is needed to properly handle empty env. variables
1429 */
1430#if U_PLATFORM == U_PF_SOLARIS
1431 if ((posixID == 0) || (posixID[0] == '\0')) {
1432 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1433 if ((posixID == 0) || (posixID[0] == '\0')) {
1434#else
1435 if (posixID == 0) {
1436 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1437 if (posixID == 0) {
1438#endif
1439 posixID = getenv("LANG");
1440 }
1441 }
1442 }
1443 }
1444 if ((posixID==0)
1445 || (uprv_strcmp("C", posixID) == 0)
1446 || (uprv_strcmp("POSIX", posixID) == 0))
1447 {
1448 /* Nothing worked. Give it a nice POSIX default value. */
1449 posixID = "en_US_POSIX";
1450 }
1451 return posixID;
1452}
1453
1454/* Return just the POSIX id for the default locale, whatever happens to be in
1455 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1456 */
1457static const char *uprv_getPOSIXIDForDefaultLocale(void)
1458{
1459 static const char* posixID = NULL;
1460 if (posixID == 0) {
1461 posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
1462 }
1463 return posixID;
1464}
1465
1466#if !U_CHARSET_IS_UTF8
1467/* Return just the POSIX id for the default codepage, whatever happens to be in
1468 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1469 */
1470static const char *uprv_getPOSIXIDForDefaultCodepage(void)
1471{
1472 static const char* posixID = NULL;
1473 if (posixID == 0) {
1474 posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
1475 }
1476 return posixID;
1477}
1478#endif
1479#endif
1480
1481/* NOTE: The caller should handle thread safety */
1482U_CAPI const char* U_EXPORT2
1483uprv_getDefaultLocaleID()
1484{
1485#if U_POSIX_LOCALE
1486/*
1487 Note that: (a '!' means the ID is improper somehow)
1488 LC_ALL ----> default_loc codepage
1489--------------------------------------------------------
1490 ab.CD ab CD
1491 ab@CD ab__CD -
1492 ab@CD.EF ab__CD EF
1493
1494 ab_CD.EF@GH ab_CD_GH EF
1495
1496Some 'improper' ways to do the same as above:
1497 ! ab_CD@GH.EF ab_CD_GH EF
1498 ! ab_CD.EF@GH.IJ ab_CD_GH EF
1499 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
1500
1501 _CD@GH _CD_GH -
1502 _CD.EF@GH _CD_GH EF
1503
1504The variant cannot have dots in it.
1505The 'rightmost' variant (@xxx) wins.
1506The leftmost codepage (.xxx) wins.
1507*/
1508 char *correctedPOSIXLocale = 0;
1509 const char* posixID = uprv_getPOSIXIDForDefaultLocale();
1510 const char *p;
1511 const char *q;
1512 int32_t len;
1513
1514 /* Format: (no spaces)
1515 ll [ _CC ] [ . MM ] [ @ VV]
1516
1517 l = lang, C = ctry, M = charmap, V = variant
1518 */
1519
1520 if (gCorrectedPOSIXLocale != NULL) {
1521 return gCorrectedPOSIXLocale;
1522 }
1523
1524 if ((p = uprv_strchr(posixID, '.')) != NULL) {
1525 /* assume new locale can't be larger than old one? */
1526 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1));
1527 /* Exit on memory allocation error. */
1528 if (correctedPOSIXLocale == NULL) {
1529 return NULL;
1530 }
1531 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1532 correctedPOSIXLocale[p-posixID] = 0;
1533
1534 /* do not copy after the @ */
1535 if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
1536 correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
1537 }
1538 }
1539
1540 /* Note that we scan the *uncorrected* ID. */
1541 if ((p = uprv_strrchr(posixID, '@')) != NULL) {
1542 if (correctedPOSIXLocale == NULL) {
1543 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1));
1544 /* Exit on memory allocation error. */
1545 if (correctedPOSIXLocale == NULL) {
1546 return NULL;
1547 }
1548 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1549 correctedPOSIXLocale[p-posixID] = 0;
1550 }
1551 p++;
1552
1553 /* Take care of any special cases here.. */
1554 if (!uprv_strcmp(p, "nynorsk")) {
1555 p = "NY";
1556 /* Don't worry about no__NY. In practice, it won't appear. */
1557 }
1558
1559 if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
1560 uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
1561 }
1562 else {
1563 uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1564 }
1565
1566 if ((q = uprv_strchr(p, '.')) != NULL) {
1567 /* How big will the resulting string be? */
1568 len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1569 uprv_strncat(correctedPOSIXLocale, p, q-p);
1570 correctedPOSIXLocale[len] = 0;
1571 }
1572 else {
1573 /* Anything following the @ sign */
1574 uprv_strcat(correctedPOSIXLocale, p);
1575 }
1576
1577 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1578 * How about 'russian' -> 'ru'?
1579 * Many of the other locales using ISO codes will be handled by the
1580 * canonicalization functions in uloc_getDefault.
1581 */
1582 }
1583
1584 /* Was a correction made? */
1585 if (correctedPOSIXLocale != NULL) {
1586 posixID = correctedPOSIXLocale;
1587 }
1588 else {
1589 /* copy it, just in case the original pointer goes away. See j2395 */
1590 correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
1591 /* Exit on memory allocation error. */
1592 if (correctedPOSIXLocale == NULL) {
1593 return NULL;
1594 }
1595 posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
1596 }
1597
1598 if (gCorrectedPOSIXLocale == NULL) {
1599 gCorrectedPOSIXLocale = correctedPOSIXLocale;
1600 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1601 correctedPOSIXLocale = NULL;
1602 }
1603
1604 if (correctedPOSIXLocale != NULL) { /* Was already set - clean up. */
1605 uprv_free(correctedPOSIXLocale);
1606 }
1607
1608 return posixID;
1609
1610#elif U_PLATFORM_USES_ONLY_WIN32_API
1611#define POSIX_LOCALE_CAPACITY 64
1612 UErrorCode status = U_ZERO_ERROR;
1613 char *correctedPOSIXLocale = 0;
1614
1615 if (gCorrectedPOSIXLocale != NULL) {
1616 return gCorrectedPOSIXLocale;
1617 }
1618
1619 LCID id = GetThreadLocale();
1620 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1));
1621 if (correctedPOSIXLocale) {
1622 int32_t posixLen = uprv_convertToPosix(id, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status);
1623 if (U_SUCCESS(status)) {
1624 *(correctedPOSIXLocale + posixLen) = 0;
1625 gCorrectedPOSIXLocale = correctedPOSIXLocale;
1626 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1627 } else {
1628 uprv_free(correctedPOSIXLocale);
1629 }
1630 }
1631
1632 if (gCorrectedPOSIXLocale == NULL) {
1633 return "en_US";
1634 }
1635 return gCorrectedPOSIXLocale;
1636
1637#elif U_PLATFORM == U_PF_OS400
1638 /* locales are process scoped and are by definition thread safe */
1639 static char correctedLocale[64];
1640 const char *localeID = getenv("LC_ALL");
1641 char *p;
1642
1643 if (localeID == NULL)
1644 localeID = getenv("LANG");
1645 if (localeID == NULL)
1646 localeID = setlocale(LC_ALL, NULL);
1647 /* Make sure we have something... */
1648 if (localeID == NULL)
1649 return "en_US_POSIX";
1650
1651 /* Extract the locale name from the path. */
1652 if((p = uprv_strrchr(localeID, '/')) != NULL)
1653 {
1654 /* Increment p to start of locale name. */
1655 p++;
1656 localeID = p;
1657 }
1658
1659 /* Copy to work location. */
1660 uprv_strcpy(correctedLocale, localeID);
1661
1662 /* Strip off the '.locale' extension. */
1663 if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1664 *p = 0;
1665 }
1666
1667 /* Upper case the locale name. */
1668 T_CString_toUpperCase(correctedLocale);
1669
1670 /* See if we are using the POSIX locale. Any of the
1671 * following are equivalent and use the same QLGPGCMA
1672 * (POSIX) locale.
1673 * QLGPGCMA2 means UCS2
1674 * QLGPGCMA_4 means UTF-32
1675 * QLGPGCMA_8 means UTF-8
1676 */
1677 if ((uprv_strcmp("C", correctedLocale) == 0) ||
1678 (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1679 (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1680 {
1681 uprv_strcpy(correctedLocale, "en_US_POSIX");
1682 }
1683 else
1684 {
1685 int16_t LocaleLen;
1686
1687 /* Lower case the lang portion. */
1688 for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1689 {
1690 *p = uprv_tolower(*p);
1691 }
1692
1693 /* Adjust for Euro. After '_E' add 'URO'. */
1694 LocaleLen = uprv_strlen(correctedLocale);
1695 if (correctedLocale[LocaleLen - 2] == '_' &&
1696 correctedLocale[LocaleLen - 1] == 'E')
1697 {
1698 uprv_strcat(correctedLocale, "URO");
1699 }
1700
1701 /* If using Lotus-based locale then convert to
1702 * equivalent non Lotus.
1703 */
1704 else if (correctedLocale[LocaleLen - 2] == '_' &&
1705 correctedLocale[LocaleLen - 1] == 'L')
1706 {
1707 correctedLocale[LocaleLen - 2] = 0;
1708 }
1709
1710 /* There are separate simplified and traditional
1711 * locales called zh_HK_S and zh_HK_T.
1712 */
1713 else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1714 {
1715 uprv_strcpy(correctedLocale, "zh_HK");
1716 }
1717
1718 /* A special zh_CN_GBK locale...
1719 */
1720 else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1721 {
1722 uprv_strcpy(correctedLocale, "zh_CN");
1723 }
1724
1725 }
1726
1727 return correctedLocale;
1728#endif
1729
1730}
1731
1732#if !U_CHARSET_IS_UTF8
1733#if U_POSIX_LOCALE
1734/*
1735Due to various platform differences, one platform may specify a charset,
1736when they really mean a different charset. Remap the names so that they are
1737compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1738here. Before adding anything to this function, please consider adding unique
1739names to the ICU alias table in the data directory.
1740*/
1741static const char*
1742remapPlatformDependentCodepage(const char *locale, const char *name) {
1743 if (locale != NULL && *locale == 0) {
1744 /* Make sure that an empty locale is handled the same way. */
1745 locale = NULL;
1746 }
1747 if (name == NULL) {
1748 return NULL;
1749 }
1750#if U_PLATFORM == U_PF_AIX
1751 if (uprv_strcmp(name, "IBM-943") == 0) {
1752 /* Use the ASCII compatible ibm-943 */
1753 name = "Shift-JIS";
1754 }
1755 else if (uprv_strcmp(name, "IBM-1252") == 0) {
1756 /* Use the windows-1252 that contains the Euro */
1757 name = "IBM-5348";
1758 }
1759#elif U_PLATFORM == U_PF_SOLARIS
1760 if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1761 /* Solaris underspecifies the "EUC" name. */
1762 if (uprv_strcmp(locale, "zh_CN") == 0) {
1763 name = "EUC-CN";
1764 }
1765 else if (uprv_strcmp(locale, "zh_TW") == 0) {
1766 name = "EUC-TW";
1767 }
1768 else if (uprv_strcmp(locale, "ko_KR") == 0) {
1769 name = "EUC-KR";
1770 }
1771 }
1772 else if (uprv_strcmp(name, "eucJP") == 0) {
1773 /*
1774 ibm-954 is the best match.
1775 ibm-33722 is the default for eucJP (similar to Windows).
1776 */
1777 name = "eucjis";
1778 }
1779 else if (uprv_strcmp(name, "646") == 0) {
1780 /*
1781 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
1782 * ISO-8859-1 instead of US-ASCII(646).
1783 */
1784 name = "ISO-8859-1";
1785 }
1786#elif U_PLATFORM_IS_DARWIN_BASED
1787 if (locale == NULL && *name == 0) {
1788 /*
1789 No locale was specified, and an empty name was passed in.
1790 This usually indicates that nl_langinfo didn't return valid information.
1791 Mac OS X uses UTF-8 by default (especially the locale data and console).
1792 */
1793 name = "UTF-8";
1794 }
1795 else if (uprv_strcmp(name, "CP949") == 0) {
1796 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1797 name = "EUC-KR";
1798 }
1799 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
1800 /*
1801 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1802 */
1803 name = "UTF-8";
1804 }
1805#elif U_PLATFORM == U_PF_BSD
1806 if (uprv_strcmp(name, "CP949") == 0) {
1807 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1808 name = "EUC-KR";
1809 }
1810#elif U_PLATFORM == U_PF_HPUX
1811 if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
1812 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
1813 /* zh_TW.big5 is not the same charset as zh_HK.big5! */
1814 name = "hkbig5";
1815 }
1816 else if (uprv_strcmp(name, "eucJP") == 0) {
1817 /*
1818 ibm-1350 is the best match, but unavailable.
1819 ibm-954 is mostly a superset of ibm-1350.
1820 ibm-33722 is the default for eucJP (similar to Windows).
1821 */
1822 name = "eucjis";
1823 }
1824#elif U_PLATFORM == U_PF_LINUX
1825 if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
1826 /* Linux underspecifies the "EUC" name. */
1827 if (uprv_strcmp(locale, "korean") == 0) {
1828 name = "EUC-KR";
1829 }
1830 else if (uprv_strcmp(locale, "japanese") == 0) {
1831 /* See comment below about eucJP */
1832 name = "eucjis";
1833 }
1834 }
1835 else if (uprv_strcmp(name, "eucjp") == 0) {
1836 /*
1837 ibm-1350 is the best match, but unavailable.
1838 ibm-954 is mostly a superset of ibm-1350.
1839 ibm-33722 is the default for eucJP (similar to Windows).
1840 */
1841 name = "eucjis";
1842 }
1843 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
1844 (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
1845 /*
1846 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1847 */
1848 name = "UTF-8";
1849 }
1850 /*
1851 * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
1852 * it by falling back to 'US-ASCII' when NULL is returned from this
1853 * function. So, we don't have to worry about it here.
1854 */
1855#endif
1856 /* return NULL when "" is passed in */
1857 if (*name == 0) {
1858 name = NULL;
1859 }
1860 return name;
1861}
1862
1863static const char*
1864getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
1865{
1866 char localeBuf[100];
1867 const char *name = NULL;
1868 char *variant = NULL;
1869
1870 if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
1871 size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
1872 uprv_strncpy(localeBuf, localeName, localeCapacity);
1873 localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
1874 name = uprv_strncpy(buffer, name+1, buffCapacity);
1875 buffer[buffCapacity-1] = 0; /* ensure NULL termination */
1876 if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != NULL) {
1877 *variant = 0;
1878 }
1879 name = remapPlatformDependentCodepage(localeBuf, name);
1880 }
1881 return name;
1882}
1883#endif
1884
1885static const char*
1886int_getDefaultCodepage()
1887{
1888#if U_PLATFORM == U_PF_OS400
1889 uint32_t ccsid = 37; /* Default to ibm-37 */
1890 static char codepage[64];
1891 Qwc_JOBI0400_t jobinfo;
1892 Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
1893
1894 EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
1895 "* ", " ", &error);
1896
1897 if (error.Bytes_Available == 0) {
1898 if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
1899 ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
1900 }
1901 else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
1902 ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
1903 }
1904 /* else use the default */
1905 }
1906 sprintf(codepage,"ibm-%d", ccsid);
1907 return codepage;
1908
1909#elif U_PLATFORM == U_PF_OS390
1910 static char codepage[64];
1911
1912 strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
1913 strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
1914 codepage[63] = 0; /* NULL terminate */
1915
1916 return codepage;
1917
1918#elif U_PLATFORM_USES_ONLY_WIN32_API
1919 static char codepage[64];
1920 sprintf(codepage, "windows-%d", GetACP());
1921 return codepage;
1922
1923#elif U_POSIX_LOCALE
1924 static char codesetName[100];
1925 const char *localeName = NULL;
1926 const char *name = NULL;
1927
1928 localeName = uprv_getPOSIXIDForDefaultCodepage();
1929 uprv_memset(codesetName, 0, sizeof(codesetName));
1930 /* On Solaris nl_langinfo returns C locale values unless setlocale
1931 * was called earlier.
1932 */
1933#if (U_HAVE_NL_LANGINFO_CODESET && U_PLATFORM != U_PF_SOLARIS)
1934 /* When available, check nl_langinfo first because it usually gives more
1935 useful names. It depends on LC_CTYPE.
1936 nl_langinfo may use the same buffer as setlocale. */
1937 {
1938 const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
1939#if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED
1940 /*
1941 * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
1942 * instead of ASCII.
1943 */
1944 if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
1945 codeset = remapPlatformDependentCodepage(localeName, codeset);
1946 } else
1947#endif
1948 {
1949 codeset = remapPlatformDependentCodepage(NULL, codeset);
1950 }
1951
1952 if (codeset != NULL) {
1953 uprv_strncpy(codesetName, codeset, sizeof(codesetName));
1954 codesetName[sizeof(codesetName)-1] = 0;
1955 return codesetName;
1956 }
1957 }
1958#endif
1959
1960 /* Use setlocale in a nice way, and then check some environment variables.
1961 Maybe the application used setlocale already.
1962 */
1963 uprv_memset(codesetName, 0, sizeof(codesetName));
1964 name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
1965 if (name) {
1966 /* if we can find the codeset name from setlocale, return that. */
1967 return name;
1968 }
1969
1970 if (*codesetName == 0)
1971 {
1972 /* Everything failed. Return US ASCII (ISO 646). */
1973 (void)uprv_strcpy(codesetName, "US-ASCII");
1974 }
1975 return codesetName;
1976#else
1977 return "US-ASCII";
1978#endif
1979}
1980
1981
1982U_CAPI const char* U_EXPORT2
1983uprv_getDefaultCodepage()
1984{
1985 static char const *name = NULL;
1986 umtx_lock(NULL);
1987 if (name == NULL) {
1988 name = int_getDefaultCodepage();
1989 }
1990 umtx_unlock(NULL);
1991 return name;
1992}
1993#endif /* !U_CHARSET_IS_UTF8 */
1994
1995
1996/* end of platform-specific implementation -------------- */
1997
1998/* version handling --------------------------------------------------------- */
1999
2000U_CAPI void U_EXPORT2
2001u_versionFromString(UVersionInfo versionArray, const char *versionString) {
2002 char *end;
2003 uint16_t part=0;
2004
2005 if(versionArray==NULL) {
2006 return;
2007 }
2008
2009 if(versionString!=NULL) {
2010 for(;;) {
2011 versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
2012 if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
2013 break;
2014 }
2015 versionString=end+1;
2016 }
2017 }
2018
2019 while(part<U_MAX_VERSION_LENGTH) {
2020 versionArray[part++]=0;
2021 }
2022}
2023
2024U_CAPI void U_EXPORT2
2025u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
2026 if(versionArray!=NULL && versionString!=NULL) {
2027 char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
2028 int32_t len = u_strlen(versionString);
2029 if(len>U_MAX_VERSION_STRING_LENGTH) {
2030 len = U_MAX_VERSION_STRING_LENGTH;
2031 }
2032 u_UCharsToChars(versionString, versionChars, len);
2033 versionChars[len]=0;
2034 u_versionFromString(versionArray, versionChars);
2035 }
2036}
2037
2038U_CAPI void U_EXPORT2
2039u_versionToString(const UVersionInfo versionArray, char *versionString) {
2040 uint16_t count, part;
2041 uint8_t field;
2042
2043 if(versionString==NULL) {
2044 return;
2045 }
2046
2047 if(versionArray==NULL) {
2048 versionString[0]=0;
2049 return;
2050 }
2051
2052 /* count how many fields need to be written */
2053 for(count=4; count>0 && versionArray[count-1]==0; --count) {
2054 }
2055
2056 if(count <= 1) {
2057 count = 2;
2058 }
2059
2060 /* write the first part */
2061 /* write the decimal field value */
2062 field=versionArray[0];
2063 if(field>=100) {
2064 *versionString++=(char)('0'+field/100);
2065 field%=100;
2066 }
2067 if(field>=10) {
2068 *versionString++=(char)('0'+field/10);
2069 field%=10;
2070 }
2071 *versionString++=(char)('0'+field);
2072
2073 /* write the following parts */
2074 for(part=1; part<count; ++part) {
2075 /* write a dot first */
2076 *versionString++=U_VERSION_DELIMITER;
2077
2078 /* write the decimal field value */
2079 field=versionArray[part];
2080 if(field>=100) {
2081 *versionString++=(char)('0'+field/100);
2082 field%=100;
2083 }
2084 if(field>=10) {
2085 *versionString++=(char)('0'+field/10);
2086 field%=10;
2087 }
2088 *versionString++=(char)('0'+field);
2089 }
2090
2091 /* NUL-terminate */
2092 *versionString=0;
2093}
2094
2095U_CAPI void U_EXPORT2
2096u_getVersion(UVersionInfo versionArray) {
2097 (void)copyright; // Suppress unused variable warning from clang.
2098 u_versionFromString(versionArray, U_ICU_VERSION);
2099}
2100
2101/**
2102 * icucfg.h dependent code
2103 */
2104
2105#if U_ENABLE_DYLOAD
2106
2107#if HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
2108
2109#if HAVE_DLFCN_H
2110
2111#ifdef __MVS__
2112#ifndef __SUSV3
2113#define __SUSV3 1
2114#endif
2115#endif
2116#include <dlfcn.h>
2117#endif
2118
2119U_INTERNAL void * U_EXPORT2
2120uprv_dl_open(const char *libName, UErrorCode *status) {
2121 void *ret = NULL;
2122 if(U_FAILURE(*status)) return ret;
2123 ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
2124 if(ret==NULL) {
2125#ifdef U_TRACE_DYLOAD
2126 printf("dlerror on dlopen(%s): %s\n", libName, dlerror());
2127#endif
2128 *status = U_MISSING_RESOURCE_ERROR;
2129 }
2130 return ret;
2131}
2132
2133U_INTERNAL void U_EXPORT2
2134uprv_dl_close(void *lib, UErrorCode *status) {
2135 if(U_FAILURE(*status)) return;
2136 dlclose(lib);
2137}
2138
2139U_INTERNAL UVoidFunction* U_EXPORT2
2140uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2141 union {
2142 UVoidFunction *fp;
2143 void *vp;
2144 } uret;
2145 uret.fp = NULL;
2146 if(U_FAILURE(*status)) return uret.fp;
2147 uret.vp = dlsym(lib, sym);
2148 if(uret.vp == NULL) {
2149#ifdef U_TRACE_DYLOAD
2150 printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror());
2151#endif
2152 *status = U_MISSING_RESOURCE_ERROR;
2153 }
2154 return uret.fp;
2155}
2156
2157#else
2158
2159/* null (nonexistent) implementation. */
2160
2161U_INTERNAL void * U_EXPORT2
2162uprv_dl_open(const char *libName, UErrorCode *status) {
2163 if(U_FAILURE(*status)) return NULL;
2164 *status = U_UNSUPPORTED_ERROR;
2165 return NULL;
2166}
2167
2168U_INTERNAL void U_EXPORT2
2169uprv_dl_close(void *lib, UErrorCode *status) {
2170 if(U_FAILURE(*status)) return;
2171 *status = U_UNSUPPORTED_ERROR;
2172 return;
2173}
2174
2175
2176U_INTERNAL UVoidFunction* U_EXPORT2
2177uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2178 if(U_SUCCESS(*status)) {
2179 *status = U_UNSUPPORTED_ERROR;
2180 }
2181 return (UVoidFunction*)NULL;
2182}
2183
2184
2185
2186#endif
2187
2188#elif U_PLATFORM_USES_ONLY_WIN32_API
2189
2190U_INTERNAL void * U_EXPORT2
2191uprv_dl_open(const char *libName, UErrorCode *status) {
2192 HMODULE lib = NULL;
2193
2194 if(U_FAILURE(*status)) return NULL;
2195
2196 lib = LoadLibraryA(libName);
2197
2198 if(lib==NULL) {
2199 *status = U_MISSING_RESOURCE_ERROR;
2200 }
2201
2202 return (void*)lib;
2203}
2204
2205U_INTERNAL void U_EXPORT2
2206uprv_dl_close(void *lib, UErrorCode *status) {
2207 HMODULE handle = (HMODULE)lib;
2208 if(U_FAILURE(*status)) return;
2209
2210 FreeLibrary(handle);
2211
2212 return;
2213}
2214
2215
2216U_INTERNAL UVoidFunction* U_EXPORT2
2217uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2218 HMODULE handle = (HMODULE)lib;
2219 UVoidFunction* addr = NULL;
2220
2221 if(U_FAILURE(*status) || lib==NULL) return NULL;
2222
2223 addr = (UVoidFunction*)GetProcAddress(handle, sym);
2224
2225 if(addr==NULL) {
2226 DWORD lastError = GetLastError();
2227 if(lastError == ERROR_PROC_NOT_FOUND) {
2228 *status = U_MISSING_RESOURCE_ERROR;
2229 } else {
2230 *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
2231 }
2232 }
2233
2234 return addr;
2235}
2236
2237
2238#else
2239
2240/* No dynamic loading set. */
2241
2242U_INTERNAL void * U_EXPORT2
2243uprv_dl_open(const char *libName, UErrorCode *status) {
2244 (void)libName;
2245 if(U_FAILURE(*status)) return NULL;
2246 *status = U_UNSUPPORTED_ERROR;
2247 return NULL;
2248}
2249
2250U_INTERNAL void U_EXPORT2
2251uprv_dl_close(void *lib, UErrorCode *status) {
2252 (void)lib;
2253 if(U_FAILURE(*status)) return;
2254 *status = U_UNSUPPORTED_ERROR;
2255 return;
2256}
2257
2258
2259U_INTERNAL UVoidFunction* U_EXPORT2
2260uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2261 (void)lib;
2262 (void)sym;
2263 if(U_SUCCESS(*status)) {
2264 *status = U_UNSUPPORTED_ERROR;
2265 }
2266 return (UVoidFunction*)NULL;
2267}
2268
2269#endif /* U_ENABLE_DYLOAD */
2270
2271/*
2272 * Hey, Emacs, please set the following:
2273 *
2274 * Local Variables:
2275 * indent-tabs-mode: nil
2276 * End:
2277 *
2278 */