]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/putil.cpp
ICU-491.11.3.tar.gz
[apple/icu.git] / icuSources / common / putil.cpp
1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 1997-2012, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 *
9 * FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
10 *
11 * Date Name Description
12 * 04/14/97 aliu Creation.
13 * 04/24/97 aliu Added getDefaultDataDirectory() and
14 * getDefaultLocaleID().
15 * 04/28/97 aliu Rewritten to assume Unix and apply general methods
16 * for assumed case. Non-UNIX platforms must be
17 * special-cased. Rewrote numeric methods dealing
18 * with NaN and Infinity to be platform independent
19 * over all IEEE 754 platforms.
20 * 05/13/97 aliu Restored sign of timezone
21 * (semantics are hours West of GMT)
22 * 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
23 * nextDouble..
24 * 07/22/98 stephen Added remainder, max, min, trunc
25 * 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
26 * 08/24/98 stephen Added longBitsFromDouble
27 * 09/08/98 stephen Minor changes for Mac Port
28 * 03/02/99 stephen Removed openFile(). Added AS400 support.
29 * Fixed EBCDIC tables
30 * 04/15/99 stephen Converted to C.
31 * 06/28/99 stephen Removed mutex locking in u_isBigEndian().
32 * 08/04/99 jeffrey R. Added OS/2 changes
33 * 11/15/99 helena Integrated S/390 IEEE support.
34 * 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
35 * 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
36 * 01/03/08 Steven L. Fake Time Support
37 ******************************************************************************
38 */
39
40 // Defines _XOPEN_SOURCE for access to POSIX functions.
41 // Must be before any other #includes.
42 #include "uposixdefs.h"
43
44 /* include ICU headers */
45 #include "unicode/utypes.h"
46 #include "unicode/putil.h"
47 #include "unicode/ustring.h"
48 #include "putilimp.h"
49 #include "uassert.h"
50 #include "umutex.h"
51 #include "cmemory.h"
52 #include "cstring.h"
53 #include "locmap.h"
54 #include "ucln_cmn.h"
55
56 /* Include standard headers. */
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <math.h>
61 #include <locale.h>
62 #include <float.h>
63
64 #ifndef U_COMMON_IMPLEMENTATION
65 #error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu
66 #endif
67
68
69 /* include system headers */
70 #if U_PLATFORM_USES_ONLY_WIN32_API
71 /*
72 * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW.
73 * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API)
74 * to use native APIs as much as possible?
75 */
76 # define WIN32_LEAN_AND_MEAN
77 # define VC_EXTRALEAN
78 # define NOUSER
79 # define NOSERVICE
80 # define NOIME
81 # define NOMCX
82 # include <windows.h>
83 # include "wintz.h"
84 #elif U_PLATFORM == U_PF_OS400
85 # include <float.h>
86 # include <qusec.h> /* error code structure */
87 # include <qusrjobi.h>
88 # include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
89 # include <mih/testptr.h> /* For uprv_maximumPtr */
90 #elif U_PLATFORM == U_PF_CLASSIC_MACOS
91 # include <Files.h>
92 # include <IntlResources.h>
93 # include <Script.h>
94 # include <Folders.h>
95 # include <MacTypes.h>
96 # include <TextUtils.h>
97 # define ICU_NO_USER_DATA_OVERRIDE 1
98 #elif U_PLATFORM == U_PF_OS390
99 # include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
100 #elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD
101 # include <limits.h>
102 # include <unistd.h>
103 #elif U_PLATFORM == U_PF_QNX
104 # include <sys/neutrino.h>
105 #elif U_PLATFORM == U_PF_SOLARIS
106 # ifndef _XPG4_2
107 # define _XPG4_2
108 # endif
109 #endif
110
111 #if (U_PF_MINGW <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(__STRICT_ANSI__)
112 /* tzset isn't defined in strict ANSI on Cygwin and MinGW. */
113 #undef __STRICT_ANSI__
114 #endif
115
116 /*
117 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
118 */
119 #include <time.h>
120
121 #if !U_PLATFORM_USES_ONLY_WIN32_API
122 #include <sys/time.h>
123 #endif
124
125 /*
126 * Only include langinfo.h if we have a way to get the codeset. If we later
127 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
128 *
129 */
130
131 #if U_HAVE_NL_LANGINFO_CODESET
132 #include <langinfo.h>
133 #endif
134
135 /**
136 * Simple things (presence of functions, etc) should just go in configure.in and be added to
137 * icucfg.h via autoheader.
138 */
139 #if U_PLATFORM_IMPLEMENTS_POSIX
140 # if U_PLATFORM == U_PF_OS400
141 # define HAVE_DLFCN_H 0
142 # define HAVE_DLOPEN 0
143 # else
144 # ifndef HAVE_DLFCN_H
145 # define HAVE_DLFCN_H 1
146 # endif
147 # ifndef HAVE_DLOPEN
148 # define HAVE_DLOPEN 1
149 # endif
150 # endif
151 # ifndef HAVE_GETTIMEOFDAY
152 # define HAVE_GETTIMEOFDAY 1
153 # endif
154 #else
155 # define HAVE_DLFCN_H 0
156 # define HAVE_DLOPEN 0
157 # define HAVE_GETTIMEOFDAY 0
158 #endif
159
160 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
161
162 /* Define the extension for data files, again... */
163 #define DATA_TYPE "dat"
164
165 /* Leave this copyright notice here! */
166 static const char copyright[] = U_COPYRIGHT_STRING;
167
168 /* floating point implementations ------------------------------------------- */
169
170 /* We return QNAN rather than SNAN*/
171 #define SIGN 0x80000000U
172
173 /* Make it easy to define certain types of constants */
174 typedef union {
175 int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
176 double d64;
177 } BitPatternConversion;
178 static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
179 static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
180
181 /*---------------------------------------------------------------------------
182 Platform utilities
183 Our general strategy is to assume we're on a POSIX platform. Platforms which
184 are non-POSIX must declare themselves so. The default POSIX implementation
185 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
186 functions).
187 ---------------------------------------------------------------------------*/
188
189 #if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_CLASSIC_MACOS || U_PLATFORM == U_PF_OS400
190 # undef U_POSIX_LOCALE
191 #else
192 # define U_POSIX_LOCALE 1
193 #endif
194
195 /*
196 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
197 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
198 */
199 #if !IEEE_754
200 static char*
201 u_topNBytesOfDouble(double* d, int n)
202 {
203 #if U_IS_BIG_ENDIAN
204 return (char*)d;
205 #else
206 return (char*)(d + 1) - n;
207 #endif
208 }
209
210 static char*
211 u_bottomNBytesOfDouble(double* d, int n)
212 {
213 #if U_IS_BIG_ENDIAN
214 return (char*)(d + 1) - n;
215 #else
216 return (char*)d;
217 #endif
218 }
219 #endif /* !IEEE_754 */
220
221 #if IEEE_754
222 static UBool
223 u_signBit(double d) {
224 uint8_t hiByte;
225 #if U_IS_BIG_ENDIAN
226 hiByte = *(uint8_t *)&d;
227 #else
228 hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
229 #endif
230 return (hiByte & 0x80) != 0;
231 }
232 #endif
233
234
235
236 #if defined (U_DEBUG_FAKETIME)
237 /* Override the clock to test things without having to move the system clock.
238 * Assumes POSIX gettimeofday() will function
239 */
240 UDate fakeClock_t0 = 0; /** Time to start the clock from **/
241 UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
242 UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
243 static UMTX fakeClockMutex = NULL;
244
245 static UDate getUTCtime_real() {
246 struct timeval posixTime;
247 gettimeofday(&posixTime, NULL);
248 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
249 }
250
251 static UDate getUTCtime_fake() {
252 umtx_lock(&fakeClockMutex);
253 if(!fakeClock_set) {
254 UDate real = getUTCtime_real();
255 const char *fake_start = getenv("U_FAKETIME_START");
256 if((fake_start!=NULL) && (fake_start[0]!=0)) {
257 sscanf(fake_start,"%lf",&fakeClock_t0);
258 fakeClock_dt = fakeClock_t0 - real;
259 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
260 "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
261 fakeClock_t0, fake_start, fakeClock_dt, real);
262 } else {
263 fakeClock_dt = 0;
264 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
265 "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
266 }
267 fakeClock_set = TRUE;
268 }
269 umtx_unlock(&fakeClockMutex);
270
271 return getUTCtime_real() + fakeClock_dt;
272 }
273 #endif
274
275 #if U_PLATFORM_USES_ONLY_WIN32_API
276 typedef union {
277 int64_t int64;
278 FILETIME fileTime;
279 } FileTimeConversion; /* This is like a ULARGE_INTEGER */
280
281 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
282 #define EPOCH_BIAS INT64_C(116444736000000000)
283 #define HECTONANOSECOND_PER_MILLISECOND 10000
284
285 #endif
286
287 /*---------------------------------------------------------------------------
288 Universal Implementations
289 These are designed to work on all platforms. Try these, and if they
290 don't work on your platform, then special case your platform with new
291 implementations.
292 ---------------------------------------------------------------------------*/
293
294 U_CAPI UDate U_EXPORT2
295 uprv_getUTCtime()
296 {
297 #if defined(U_DEBUG_FAKETIME)
298 return getUTCtime_fake(); /* Hook for overriding the clock */
299 #else
300 return uprv_getRawUTCtime();
301 #endif
302 }
303
304 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
305 U_CAPI UDate U_EXPORT2
306 uprv_getRawUTCtime()
307 {
308 #if U_PLATFORM == U_PF_CLASSIC_MACOS
309 time_t t, t1, t2;
310 struct tm tmrec;
311
312 uprv_memset( &tmrec, 0, sizeof(tmrec) );
313 tmrec.tm_year = 70;
314 tmrec.tm_mon = 0;
315 tmrec.tm_mday = 1;
316 t1 = mktime(&tmrec); /* seconds of 1/1/1970*/
317
318 time(&t);
319 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
320 t2 = mktime(&tmrec); /* seconds of current GMT*/
321 return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND; /* GMT (or UTC) in seconds since 1970*/
322 #elif U_PLATFORM_USES_ONLY_WIN32_API
323
324 FileTimeConversion winTime;
325 GetSystemTimeAsFileTime(&winTime.fileTime);
326 return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
327 #else
328
329 #if HAVE_GETTIMEOFDAY
330 struct timeval posixTime;
331 gettimeofday(&posixTime, NULL);
332 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
333 #else
334 time_t epochtime;
335 time(&epochtime);
336 return (UDate)epochtime * U_MILLIS_PER_SECOND;
337 #endif
338
339 #endif
340 }
341
342 /*-----------------------------------------------------------------------------
343 IEEE 754
344 These methods detect and return NaN and infinity values for doubles
345 conforming to IEEE 754. Platforms which support this standard include X86,
346 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
347 If this doesn't work on your platform, you have non-IEEE floating-point, and
348 will need to code your own versions. A naive implementation is to return 0.0
349 for getNaN and getInfinity, and false for isNaN and isInfinite.
350 ---------------------------------------------------------------------------*/
351
352 U_CAPI UBool U_EXPORT2
353 uprv_isNaN(double number)
354 {
355 #if IEEE_754
356 BitPatternConversion convertedNumber;
357 convertedNumber.d64 = number;
358 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
359 return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
360
361 #elif U_PLATFORM == U_PF_OS390
362 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
363 sizeof(uint32_t));
364 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
365 sizeof(uint32_t));
366
367 return ((highBits & 0x7F080000L) == 0x7F080000L) &&
368 (lowBits == 0x00000000L);
369
370 #else
371 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
372 /* you'll need to replace this default implementation with what's correct*/
373 /* for your platform.*/
374 return number != number;
375 #endif
376 }
377
378 U_CAPI UBool U_EXPORT2
379 uprv_isInfinite(double number)
380 {
381 #if IEEE_754
382 BitPatternConversion convertedNumber;
383 convertedNumber.d64 = number;
384 /* Infinity is exactly 0x7FF0000000000000U. */
385 return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
386 #elif U_PLATFORM == U_PF_OS390
387 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
388 sizeof(uint32_t));
389 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
390 sizeof(uint32_t));
391
392 return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
393
394 #else
395 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
396 /* value, you'll need to replace this default implementation with what's*/
397 /* correct for your platform.*/
398 return number == (2.0 * number);
399 #endif
400 }
401
402 U_CAPI UBool U_EXPORT2
403 uprv_isPositiveInfinity(double number)
404 {
405 #if IEEE_754 || U_PLATFORM == U_PF_OS390
406 return (UBool)(number > 0 && uprv_isInfinite(number));
407 #else
408 return uprv_isInfinite(number);
409 #endif
410 }
411
412 U_CAPI UBool U_EXPORT2
413 uprv_isNegativeInfinity(double number)
414 {
415 #if IEEE_754 || U_PLATFORM == U_PF_OS390
416 return (UBool)(number < 0 && uprv_isInfinite(number));
417
418 #else
419 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
420 sizeof(uint32_t));
421 return((highBits & SIGN) && uprv_isInfinite(number));
422
423 #endif
424 }
425
426 U_CAPI double U_EXPORT2
427 uprv_getNaN()
428 {
429 #if IEEE_754 || U_PLATFORM == U_PF_OS390
430 return gNan.d64;
431 #else
432 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
433 /* you'll need to replace this default implementation with what's correct*/
434 /* for your platform.*/
435 return 0.0;
436 #endif
437 }
438
439 U_CAPI double U_EXPORT2
440 uprv_getInfinity()
441 {
442 #if IEEE_754 || U_PLATFORM == U_PF_OS390
443 return gInf.d64;
444 #else
445 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
446 /* value, you'll need to replace this default implementation with what's*/
447 /* correct for your platform.*/
448 return 0.0;
449 #endif
450 }
451
452 U_CAPI double U_EXPORT2
453 uprv_floor(double x)
454 {
455 return floor(x);
456 }
457
458 U_CAPI double U_EXPORT2
459 uprv_ceil(double x)
460 {
461 return ceil(x);
462 }
463
464 U_CAPI double U_EXPORT2
465 uprv_round(double x)
466 {
467 return uprv_floor(x + 0.5);
468 }
469
470 U_CAPI double U_EXPORT2
471 uprv_fabs(double x)
472 {
473 return fabs(x);
474 }
475
476 U_CAPI double U_EXPORT2
477 uprv_modf(double x, double* y)
478 {
479 return modf(x, y);
480 }
481
482 U_CAPI double U_EXPORT2
483 uprv_fmod(double x, double y)
484 {
485 return fmod(x, y);
486 }
487
488 U_CAPI double U_EXPORT2
489 uprv_pow(double x, double y)
490 {
491 /* This is declared as "double pow(double x, double y)" */
492 return pow(x, y);
493 }
494
495 U_CAPI double U_EXPORT2
496 uprv_pow10(int32_t x)
497 {
498 return pow(10.0, (double)x);
499 }
500
501 U_CAPI double U_EXPORT2
502 uprv_fmax(double x, double y)
503 {
504 #if IEEE_754
505 /* first handle NaN*/
506 if(uprv_isNaN(x) || uprv_isNaN(y))
507 return uprv_getNaN();
508
509 /* check for -0 and 0*/
510 if(x == 0.0 && y == 0.0 && u_signBit(x))
511 return y;
512
513 #endif
514
515 /* this should work for all flt point w/o NaN and Inf special cases */
516 return (x > y ? x : y);
517 }
518
519 U_CAPI double U_EXPORT2
520 uprv_fmin(double x, double y)
521 {
522 #if IEEE_754
523 /* first handle NaN*/
524 if(uprv_isNaN(x) || uprv_isNaN(y))
525 return uprv_getNaN();
526
527 /* check for -0 and 0*/
528 if(x == 0.0 && y == 0.0 && u_signBit(y))
529 return y;
530
531 #endif
532
533 /* this should work for all flt point w/o NaN and Inf special cases */
534 return (x > y ? y : x);
535 }
536
537 /**
538 * Truncates the given double.
539 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
540 * This is different than calling floor() or ceil():
541 * floor(3.3) = 3, floor(-3.3) = -4
542 * ceil(3.3) = 4, ceil(-3.3) = -3
543 */
544 U_CAPI double U_EXPORT2
545 uprv_trunc(double d)
546 {
547 #if IEEE_754
548 /* handle error cases*/
549 if(uprv_isNaN(d))
550 return uprv_getNaN();
551 if(uprv_isInfinite(d))
552 return uprv_getInfinity();
553
554 if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */
555 return ceil(d);
556 else
557 return floor(d);
558
559 #else
560 return d >= 0 ? floor(d) : ceil(d);
561
562 #endif
563 }
564
565 /**
566 * Return the largest positive number that can be represented by an integer
567 * type of arbitrary bit length.
568 */
569 U_CAPI double U_EXPORT2
570 uprv_maxMantissa(void)
571 {
572 return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
573 }
574
575 U_CAPI double U_EXPORT2
576 uprv_log(double d)
577 {
578 return log(d);
579 }
580
581 U_CAPI void * U_EXPORT2
582 uprv_maximumPtr(void * base)
583 {
584 #if U_PLATFORM == U_PF_OS400
585 /*
586 * With the provided function we should never be out of range of a given segment
587 * (a traditional/typical segment that is). Our segments have 5 bytes for the
588 * id and 3 bytes for the offset. The key is that the casting takes care of
589 * only retrieving the offset portion minus x1000. Hence, the smallest offset
590 * seen in a program is x001000 and when casted to an int would be 0.
591 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment.
592 *
593 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
594 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information).
595 * This function determines the activation based on the pointer that is passed in and
596 * calculates the appropriate maximum available size for
597 * each pointer type (TERASPACE and non-TERASPACE)
598 *
599 * Unlike other operating systems, the pointer model isn't determined at
600 * compile time on i5/OS.
601 */
602 if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
603 /* if it is a TERASPACE pointer the max is 2GB - 4k */
604 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
605 }
606 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
607 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
608
609 #else
610 return U_MAX_PTR(base);
611 #endif
612 }
613
614 /*---------------------------------------------------------------------------
615 Platform-specific Implementations
616 Try these, and if they don't work on your platform, then special case your
617 platform with new implementations.
618 ---------------------------------------------------------------------------*/
619
620 /* Generic time zone layer -------------------------------------------------- */
621
622 /* Time zone utilities */
623 U_CAPI void U_EXPORT2
624 uprv_tzset()
625 {
626 #if defined(U_TZSET)
627 U_TZSET();
628 #else
629 /* no initialization*/
630 #endif
631 }
632
633 U_CAPI int32_t U_EXPORT2
634 uprv_timezone()
635 {
636 #ifdef U_TIMEZONE
637 return U_TIMEZONE;
638 #else
639 time_t t, t1, t2;
640 struct tm tmrec;
641 UBool dst_checked;
642 int32_t tdiff = 0;
643
644 time(&t);
645 uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
646 dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
647 t1 = mktime(&tmrec); /* local time in seconds*/
648 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
649 t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/
650 tdiff = t2 - t1;
651 /* imitate NT behaviour, which returns same timezone offset to GMT for
652 winter and summer*/
653 if (dst_checked)
654 tdiff += 3600;
655 return tdiff;
656 #endif
657 }
658
659 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
660 some platforms need to have it declared here. */
661
662 #if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED || (U_PLATFORM == U_PF_CYGWIN && !U_PLATFORM_USES_ONLY_WIN32_API))
663 /* RS6000 and others reject char **tzname. */
664 extern U_IMPORT char *U_TZNAME[];
665 #endif
666
667 #if !UCONFIG_NO_FILE_IO && (U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD)
668 /* These platforms are likely to use Olson timezone IDs. */
669 #define CHECK_LOCALTIME_LINK 1
670 #if U_PLATFORM_IS_DARWIN_BASED
671 #include <tzfile.h>
672 #define TZZONEINFO (TZDIR "/")
673 #else
674 #define TZDEFAULT "/etc/localtime"
675 #define TZZONEINFO "/usr/share/zoneinfo/"
676 #endif
677 #if U_HAVE_DIRENT_H
678 #define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */
679 /* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
680 symlinked to /etc/localtime, which makes searchForTZFile return
681 'localtime' when it's the first match. */
682 #define TZFILE_SKIP2 "localtime"
683 #define SEARCH_TZFILE
684 #include <dirent.h> /* Needed to search through system timezone files */
685 #endif
686 static char gTimeZoneBuffer[PATH_MAX];
687 static char *gTimeZoneBufferPtr = NULL;
688 #endif
689
690 #if !U_PLATFORM_USES_ONLY_WIN32_API
691 #define isNonDigit(ch) (ch < '0' || '9' < ch)
692 static UBool isValidOlsonID(const char *id) {
693 int32_t idx = 0;
694
695 /* Determine if this is something like Iceland (Olson ID)
696 or AST4ADT (non-Olson ID) */
697 while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
698 idx++;
699 }
700
701 /* If we went through the whole string, then it might be okay.
702 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
703 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
704 The rest of the time it could be an Olson ID. George */
705 return (UBool)(id[idx] == 0
706 || uprv_strcmp(id, "PST8PDT") == 0
707 || uprv_strcmp(id, "MST7MDT") == 0
708 || uprv_strcmp(id, "CST6CDT") == 0
709 || uprv_strcmp(id, "EST5EDT") == 0);
710 }
711
712 /* On some Unix-like OS, 'posix' subdirectory in
713 /usr/share/zoneinfo replicates the top-level contents. 'right'
714 subdirectory has the same set of files, but individual files
715 are different from those in the top-level directory or 'posix'
716 because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
717 has files for UTC.
718 When the first match for /etc/localtime is in either of them
719 (usually in posix because 'right' has different file contents),
720 or TZ environment variable points to one of them, createTimeZone
721 fails because, say, 'posix/America/New_York' is not an Olson
722 timezone id ('America/New_York' is). So, we have to skip
723 'posix/' and 'right/' at the beginning. */
724 static void skipZoneIDPrefix(const char** id) {
725 if (uprv_strncmp(*id, "posix/", 6) == 0
726 || uprv_strncmp(*id, "right/", 6) == 0)
727 {
728 *id += 6;
729 }
730 }
731 #endif
732
733 #if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API
734
735 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
736 typedef struct OffsetZoneMapping {
737 int32_t offsetSeconds;
738 int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/
739 const char *stdID;
740 const char *dstID;
741 const char *olsonID;
742 } OffsetZoneMapping;
743
744 enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 };
745
746 /*
747 This list tries to disambiguate a set of abbreviated timezone IDs and offsets
748 and maps it to an Olson ID.
749 Before adding anything to this list, take a look at
750 icu/source/tools/tzcode/tz.alias
751 Sometimes no daylight savings (0) is important to define due to aliases.
752 This list can be tested with icu/source/test/compat/tzone.pl
753 More values could be added to daylightType to increase precision.
754 */
755 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
756 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
757 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
758 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
759 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
760 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
761 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
762 {-36000, 2, "EST", "EST", "Australia/Sydney"},
763 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
764 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
765 {-34200, 2, "CST", "CST", "Australia/South"},
766 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
767 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
768 {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
769 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
770 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
771 {-28800, 2, "WST", "WST", "Australia/West"},
772 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
773 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
774 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
775 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
776 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
777 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
778 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
779 {-14400, 1, "AZT", "AZST", "Asia/Baku"},
780 {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
781 {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
782 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
783 {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
784 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
785 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
786 {-3600, 0, "CET", "WEST", "Africa/Algiers"},
787 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
788 {0, 1, "GMT", "IST", "Europe/Dublin"},
789 {0, 1, "GMT", "BST", "Europe/London"},
790 {0, 0, "WET", "WEST", "Africa/Casablanca"},
791 {0, 0, "WET", "WET", "Africa/El_Aaiun"},
792 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
793 {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
794 {10800, 1, "PMST", "PMDT", "America/Miquelon"},
795 {10800, 2, "UYT", "UYST", "America/Montevideo"},
796 {10800, 1, "WGT", "WGST", "America/Godthab"},
797 {10800, 2, "BRT", "BRST", "Brazil/East"},
798 {12600, 1, "NST", "NDT", "America/St_Johns"},
799 {14400, 1, "AST", "ADT", "Canada/Atlantic"},
800 {14400, 2, "AMT", "AMST", "America/Cuiaba"},
801 {14400, 2, "CLT", "CLST", "Chile/Continental"},
802 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
803 {14400, 2, "PYT", "PYST", "America/Asuncion"},
804 {18000, 1, "CST", "CDT", "America/Havana"},
805 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
806 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
807 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
808 {21600, 0, "CST", "CDT", "America/Guatemala"},
809 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
810 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
811 {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
812 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
813 {32400, 1, "AKST", "AKDT", "US/Alaska"},
814 {36000, 1, "HAST", "HADT", "US/Aleutian"}
815 };
816
817 /*#define DEBUG_TZNAME*/
818
819 static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
820 {
821 int32_t idx;
822 #ifdef DEBUG_TZNAME
823 fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
824 #endif
825 for (idx = 0; idx < LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++)
826 {
827 if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
828 && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
829 && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
830 && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
831 {
832 return OFFSET_ZONE_MAPPINGS[idx].olsonID;
833 }
834 }
835 return NULL;
836 }
837 #endif
838
839 #ifdef SEARCH_TZFILE
840 #define MAX_PATH_SIZE PATH_MAX /* Set the limit for the size of the path. */
841 #define MAX_READ_SIZE 512
842
843 typedef struct DefaultTZInfo {
844 char* defaultTZBuffer;
845 int64_t defaultTZFileSize;
846 FILE* defaultTZFilePtr;
847 UBool defaultTZstatus;
848 int32_t defaultTZPosition;
849 } DefaultTZInfo;
850
851 /*
852 * This method compares the two files given to see if they are a match.
853 * It is currently use to compare two TZ files.
854 */
855 static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
856 FILE* file;
857 int64_t sizeFile;
858 int64_t sizeFileLeft;
859 int32_t sizeFileRead;
860 int32_t sizeFileToRead;
861 char bufferFile[MAX_READ_SIZE];
862 UBool result = TRUE;
863
864 if (tzInfo->defaultTZFilePtr == NULL) {
865 tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
866 }
867 file = fopen(TZFileName, "r");
868
869 tzInfo->defaultTZPosition = 0; /* reset position to begin search */
870
871 if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
872 /* First check that the file size are equal. */
873 if (tzInfo->defaultTZFileSize == 0) {
874 fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
875 tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
876 }
877 fseek(file, 0, SEEK_END);
878 sizeFile = ftell(file);
879 sizeFileLeft = sizeFile;
880
881 if (sizeFile != tzInfo->defaultTZFileSize) {
882 result = FALSE;
883 } else {
884 /* Store the data from the files in seperate buffers and
885 * compare each byte to determine equality.
886 */
887 if (tzInfo->defaultTZBuffer == NULL) {
888 rewind(tzInfo->defaultTZFilePtr);
889 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
890 sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
891 }
892 rewind(file);
893 while(sizeFileLeft > 0) {
894 uprv_memset(bufferFile, 0, MAX_READ_SIZE);
895 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
896
897 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
898 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
899 result = FALSE;
900 break;
901 }
902 sizeFileLeft -= sizeFileRead;
903 tzInfo->defaultTZPosition += sizeFileRead;
904 }
905 }
906 } else {
907 result = FALSE;
908 }
909
910 if (file != NULL) {
911 fclose(file);
912 }
913
914 return result;
915 }
916 /*
917 * This method recursively traverses the directory given for a matching TZ file and returns the first match.
918 */
919 /* dirent also lists two entries: "." and ".." that we can safely ignore. */
920 #define SKIP1 "."
921 #define SKIP2 ".."
922 static char SEARCH_TZFILE_RESULT[MAX_PATH_SIZE] = "";
923 static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
924 char curpath[MAX_PATH_SIZE];
925 DIR* dirp = opendir(path);
926 DIR* subDirp = NULL;
927 struct dirent* dirEntry = NULL;
928
929 char* result = NULL;
930 if (dirp == NULL) {
931 return result;
932 }
933
934 /* Save the current path */
935 uprv_memset(curpath, 0, MAX_PATH_SIZE);
936 uprv_strcpy(curpath, path);
937
938 /* Check each entry in the directory. */
939 while((dirEntry = readdir(dirp)) != NULL) {
940 const char* dirName = dirEntry->d_name;
941 if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) {
942 /* Create a newpath with the new entry to test each entry in the directory. */
943 char newpath[MAX_PATH_SIZE];
944 uprv_strcpy(newpath, curpath);
945 uprv_strcat(newpath, dirName);
946
947 if ((subDirp = opendir(newpath)) != NULL) {
948 /* If this new path is a directory, make a recursive call with the newpath. */
949 closedir(subDirp);
950 uprv_strcat(newpath, "/");
951 result = searchForTZFile(newpath, tzInfo);
952 /*
953 Have to get out here. Otherwise, we'd keep looking
954 and return the first match in the top-level directory
955 if there's a match in the top-level. If not, this function
956 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
957 It worked without this in most cases because we have a fallback of calling
958 localtime_r to figure out the default timezone.
959 */
960 if (result != NULL)
961 break;
962 } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
963 if(compareBinaryFiles(TZDEFAULT, newpath, tzInfo)) {
964 const char* zoneid = newpath + (sizeof(TZZONEINFO)) - 1;
965 skipZoneIDPrefix(&zoneid);
966 uprv_strcpy(SEARCH_TZFILE_RESULT, zoneid);
967 result = SEARCH_TZFILE_RESULT;
968 /* Get out after the first one found. */
969 break;
970 }
971 }
972 }
973 }
974 closedir(dirp);
975 return result;
976 }
977 #endif
978 U_CAPI const char* U_EXPORT2
979 uprv_tzname(int n)
980 {
981 const char *tzid = NULL;
982 #if U_PLATFORM_USES_ONLY_WIN32_API
983 tzid = uprv_detectWindowsTimeZone();
984
985 if (tzid != NULL) {
986 return tzid;
987 }
988 #else
989
990 /*#if U_PLATFORM_IS_DARWIN_BASED
991 int ret;
992
993 tzid = getenv("TZFILE");
994 if (tzid != NULL) {
995 return tzid;
996 }
997 #endif*/
998
999 /* This code can be temporarily disabled to test tzname resolution later on. */
1000 #ifndef DEBUG_TZNAME
1001 tzid = getenv("TZ");
1002 if (tzid != NULL && isValidOlsonID(tzid))
1003 {
1004 /* This might be a good Olson ID. */
1005 skipZoneIDPrefix(&tzid);
1006 return tzid;
1007 }
1008 /* else U_TZNAME will give a better result. */
1009 #endif
1010
1011 #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1012 /* Caller must handle threading issues */
1013 if (gTimeZoneBufferPtr == NULL) {
1014 /*
1015 This is a trick to look at the name of the link to get the Olson ID
1016 because the tzfile contents is underspecified.
1017 This isn't guaranteed to work because it may not be a symlink.
1018 */
1019 int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer));
1020 if (0 < ret) {
1021 int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO);
1022 gTimeZoneBuffer[ret] = 0;
1023 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0
1024 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
1025 {
1026 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
1027 }
1028 } else {
1029 #if defined(SEARCH_TZFILE)
1030 DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
1031 if (tzInfo != NULL) {
1032 tzInfo->defaultTZBuffer = NULL;
1033 tzInfo->defaultTZFileSize = 0;
1034 tzInfo->defaultTZFilePtr = NULL;
1035 tzInfo->defaultTZstatus = FALSE;
1036 tzInfo->defaultTZPosition = 0;
1037
1038 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
1039
1040 /* Free previously allocated memory */
1041 if (tzInfo->defaultTZBuffer != NULL) {
1042 uprv_free(tzInfo->defaultTZBuffer);
1043 }
1044 if (tzInfo->defaultTZFilePtr != NULL) {
1045 fclose(tzInfo->defaultTZFilePtr);
1046 }
1047 uprv_free(tzInfo);
1048 }
1049
1050 if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
1051 return gTimeZoneBufferPtr;
1052 }
1053 #endif
1054 }
1055 }
1056 else {
1057 return gTimeZoneBufferPtr;
1058 }
1059 #endif
1060 #endif
1061
1062 #ifdef U_TZNAME
1063 #if U_PLATFORM_USES_ONLY_WIN32_API
1064 /* The return value is free'd in timezone.cpp on Windows because
1065 * the other code path returns a pointer to a heap location. */
1066 return uprv_strdup(U_TZNAME[n]);
1067 #else
1068 /*
1069 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1070 So we remap the abbreviation to an olson ID.
1071
1072 Since Windows exposes a little more timezone information,
1073 we normally don't use this code on Windows because
1074 uprv_detectWindowsTimeZone should have already given the correct answer.
1075 */
1076 {
1077 struct tm juneSol, decemberSol;
1078 int daylightType;
1079 static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
1080 static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
1081
1082 /* This probing will tell us when daylight savings occurs. */
1083 localtime_r(&juneSolstice, &juneSol);
1084 localtime_r(&decemberSolstice, &decemberSol);
1085 if(decemberSol.tm_isdst > 0) {
1086 daylightType = U_DAYLIGHT_DECEMBER;
1087 } else if(juneSol.tm_isdst > 0) {
1088 daylightType = U_DAYLIGHT_JUNE;
1089 } else {
1090 daylightType = U_DAYLIGHT_NONE;
1091 }
1092 tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
1093 if (tzid != NULL) {
1094 return tzid;
1095 }
1096 }
1097 return U_TZNAME[n];
1098 #endif
1099 #else
1100 return "";
1101 #endif
1102 }
1103
1104 /* Get and set the ICU data directory --------------------------------------- */
1105
1106 static char *gDataDirectory = NULL;
1107 #if U_POSIX_LOCALE
1108 static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
1109 #endif
1110
1111 static UBool U_CALLCONV putil_cleanup(void)
1112 {
1113 if (gDataDirectory && *gDataDirectory) {
1114 uprv_free(gDataDirectory);
1115 }
1116 gDataDirectory = NULL;
1117 #if U_POSIX_LOCALE
1118 if (gCorrectedPOSIXLocale) {
1119 uprv_free(gCorrectedPOSIXLocale);
1120 gCorrectedPOSIXLocale = NULL;
1121 }
1122 #endif
1123 return TRUE;
1124 }
1125
1126 /*
1127 * Set the data directory.
1128 * Make a copy of the passed string, and set the global data dir to point to it.
1129 * TODO: see bug #2849, regarding thread safety.
1130 */
1131 U_CAPI void U_EXPORT2
1132 u_setDataDirectory(const char *directory) {
1133 char *newDataDir;
1134 int32_t length;
1135
1136 if(directory==NULL || *directory==0) {
1137 /* A small optimization to prevent the malloc and copy when the
1138 shared library is used, and this is a way to make sure that NULL
1139 is never returned.
1140 */
1141 newDataDir = (char *)"";
1142 }
1143 else {
1144 length=(int32_t)uprv_strlen(directory);
1145 newDataDir = (char *)uprv_malloc(length + 2);
1146 /* Exit out if newDataDir could not be created. */
1147 if (newDataDir == NULL) {
1148 return;
1149 }
1150 uprv_strcpy(newDataDir, directory);
1151
1152 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1153 {
1154 char *p;
1155 while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
1156 *p = U_FILE_SEP_CHAR;
1157 }
1158 }
1159 #endif
1160 }
1161
1162 umtx_lock(NULL);
1163 if (gDataDirectory && *gDataDirectory) {
1164 uprv_free(gDataDirectory);
1165 }
1166 gDataDirectory = newDataDir;
1167 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1168 umtx_unlock(NULL);
1169 }
1170
1171 U_CAPI UBool U_EXPORT2
1172 uprv_pathIsAbsolute(const char *path)
1173 {
1174 if(!path || !*path) {
1175 return FALSE;
1176 }
1177
1178 if(*path == U_FILE_SEP_CHAR) {
1179 return TRUE;
1180 }
1181
1182 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1183 if(*path == U_FILE_ALT_SEP_CHAR) {
1184 return TRUE;
1185 }
1186 #endif
1187
1188 #if U_PLATFORM_USES_ONLY_WIN32_API
1189 if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1190 ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1191 path[1] == ':' ) {
1192 return TRUE;
1193 }
1194 #endif
1195
1196 return FALSE;
1197 }
1198
1199 /* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1200 until some client wrapper makefiles are updated */
1201 #if U_PLATFORM_IS_DARWIN_BASED && TARGET_IPHONE_SIMULATOR
1202 # if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1203 # define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1204 # endif
1205 #endif
1206
1207 U_CAPI const char * U_EXPORT2
1208 u_getDataDirectory(void) {
1209 const char *path = NULL;
1210 #if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1211 char datadir_path_buffer[PATH_MAX];
1212 #endif
1213
1214 /* if we have the directory, then return it immediately */
1215 UMTX_CHECK(NULL, gDataDirectory, path);
1216
1217 if(path) {
1218 return path;
1219 }
1220
1221 /*
1222 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1223 override ICU's data with the ICU_DATA environment variable. This prevents
1224 problems where multiple custom copies of ICU's specific version of data
1225 are installed on a system. Either the application must define the data
1226 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1227 ICU, set the data with udata_setCommonData or trust that all of the
1228 required data is contained in ICU's data library that contains
1229 the entry point defined by U_ICUDATA_ENTRY_POINT.
1230
1231 There may also be some platforms where environment variables
1232 are not allowed.
1233 */
1234 # if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1235 /* First try to get the environment variable */
1236 path=getenv("ICU_DATA");
1237 # endif
1238
1239 /* ICU_DATA_DIR may be set as a compile option.
1240 * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1241 * and is used only when data is built in archive mode eliminating the need
1242 * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1243 * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1244 * set their own path.
1245 */
1246 #if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
1247 if(path==NULL || *path==0) {
1248 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1249 const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
1250 # endif
1251 # ifdef ICU_DATA_DIR
1252 path=ICU_DATA_DIR;
1253 # else
1254 path=U_ICU_DATA_DEFAULT_DIR;
1255 # endif
1256 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1257 if (prefix != NULL) {
1258 snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path);
1259 path=datadir_path_buffer;
1260 }
1261 # endif
1262 }
1263 #endif
1264
1265 if(path==NULL) {
1266 /* It looks really bad, set it to something. */
1267 path = "";
1268 }
1269
1270 u_setDataDirectory(path);
1271 return gDataDirectory;
1272 }
1273
1274
1275
1276
1277
1278 /* Macintosh-specific locale information ------------------------------------ */
1279 #if U_PLATFORM == U_PF_CLASSIC_MACOS
1280
1281 typedef struct {
1282 int32_t script;
1283 int32_t region;
1284 int32_t lang;
1285 int32_t date_region;
1286 const char* posixID;
1287 } mac_lc_rec;
1288
1289 /* Todo: This will be updated with a newer version from www.unicode.org web
1290 page when it's available.*/
1291 #define MAC_LC_MAGIC_NUMBER -5
1292 #define MAC_LC_INIT_NUMBER -9
1293
1294 static const mac_lc_rec mac_lc_recs[] = {
1295 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US",
1296 /* United States*/
1297 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR",
1298 /* France*/
1299 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB",
1300 /* Great Britain*/
1301 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE",
1302 /* Germany*/
1303 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT",
1304 /* Italy*/
1305 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL",
1306 /* Metherlands*/
1307 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE",
1308 /* French for Belgium or Lxembourg*/
1309 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE",
1310 /* Sweden*/
1311 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK",
1312 /* Denmark*/
1313 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT",
1314 /* Portugal*/
1315 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA",
1316 /* French Canada*/
1317 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS",
1318 /* Israel*/
1319 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP",
1320 /* Japan*/
1321 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU",
1322 /* Australia*/
1323 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE",
1324 /* the Arabic world (?)*/
1325 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI",
1326 /* Finland*/
1327 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH",
1328 /* French for Switzerland*/
1329 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH",
1330 /* German for Switzerland*/
1331 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR",
1332 /* Greece*/
1333 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS",
1334 /* Iceland ===*/
1335 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
1336 /* Malta ===*/
1337 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
1338 /* Cyprus ===*/
1339 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR",
1340 /* Turkey ===*/
1341 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU",
1342 /* Croatian system for Yugoslavia*/
1343 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
1344 /* Hindi system for India*/
1345 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
1346 /* Pakistan*/
1347 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT",
1348 /* Lithuania*/
1349 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL",
1350 /* Poland*/
1351 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU",
1352 /* Hungary*/
1353 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE",
1354 /* Estonia*/
1355 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV",
1356 /* Latvia*/
1357 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
1358 /* Lapland [Ask Rich for the data. HS]*/
1359 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
1360 /* Faeroe Islands*/
1361 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR",
1362 /* Iran*/
1363 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU",
1364 /* Russia*/
1365 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE",
1366 /* Ireland*/
1367 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR",
1368 /* Korea*/
1369 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN",
1370 /* People's Republic of China*/
1371 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW",
1372 /* Taiwan*/
1373 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH",
1374 /* Thailand*/
1375
1376 /* fallback is en_US*/
1377 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER,
1378 MAC_LC_MAGIC_NUMBER, "en_US"
1379 };
1380
1381 #endif
1382
1383 #if U_POSIX_LOCALE
1384 /* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1385 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1386 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1387 */
1388 static const char *uprv_getPOSIXIDForCategory(int category)
1389 {
1390 const char* posixID = NULL;
1391 if (category == LC_MESSAGES || category == LC_CTYPE) {
1392 /*
1393 * On Solaris two different calls to setlocale can result in
1394 * different values. Only get this value once.
1395 *
1396 * We must check this first because an application can set this.
1397 *
1398 * LC_ALL can't be used because it's platform dependent. The LANG
1399 * environment variable seems to affect LC_CTYPE variable by default.
1400 * Here is what setlocale(LC_ALL, NULL) can return.
1401 * HPUX can return 'C C C C C C C'
1402 * Solaris can return /en_US/C/C/C/C/C on the second try.
1403 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1404 *
1405 * The default codepage detection also needs to use LC_CTYPE.
1406 *
1407 * Do not call setlocale(LC_*, "")! Using an empty string instead
1408 * of NULL, will modify the libc behavior.
1409 */
1410 posixID = setlocale(category, NULL);
1411 if ((posixID == 0)
1412 || (uprv_strcmp("C", posixID) == 0)
1413 || (uprv_strcmp("POSIX", posixID) == 0))
1414 {
1415 /* Maybe we got some garbage. Try something more reasonable */
1416 posixID = getenv("LC_ALL");
1417 if (posixID == 0) {
1418 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1419 if (posixID == 0) {
1420 posixID = getenv("LANG");
1421 }
1422 }
1423 }
1424 }
1425 if ((posixID==0)
1426 || (uprv_strcmp("C", posixID) == 0)
1427 || (uprv_strcmp("POSIX", posixID) == 0))
1428 {
1429 /* Nothing worked. Give it a nice POSIX default value. */
1430 posixID = "en_US_POSIX";
1431 }
1432 return posixID;
1433 }
1434
1435 /* Return just the POSIX id for the default locale, whatever happens to be in
1436 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1437 */
1438 static const char *uprv_getPOSIXIDForDefaultLocale(void)
1439 {
1440 static const char* posixID = NULL;
1441 if (posixID == 0) {
1442 posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
1443 }
1444 return posixID;
1445 }
1446
1447 /* Return just the POSIX id for the default codepage, whatever happens to be in
1448 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1449 */
1450 static const char *uprv_getPOSIXIDForDefaultCodepage(void)
1451 {
1452 static const char* posixID = NULL;
1453 if (posixID == 0) {
1454 posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
1455 }
1456 return posixID;
1457 }
1458 #endif
1459
1460 /* NOTE: The caller should handle thread safety */
1461 U_CAPI const char* U_EXPORT2
1462 uprv_getDefaultLocaleID()
1463 {
1464 #if U_POSIX_LOCALE
1465 /*
1466 Note that: (a '!' means the ID is improper somehow)
1467 LC_ALL ----> default_loc codepage
1468 --------------------------------------------------------
1469 ab.CD ab CD
1470 ab@CD ab__CD -
1471 ab@CD.EF ab__CD EF
1472
1473 ab_CD.EF@GH ab_CD_GH EF
1474
1475 Some 'improper' ways to do the same as above:
1476 ! ab_CD@GH.EF ab_CD_GH EF
1477 ! ab_CD.EF@GH.IJ ab_CD_GH EF
1478 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
1479
1480 _CD@GH _CD_GH -
1481 _CD.EF@GH _CD_GH EF
1482
1483 The variant cannot have dots in it.
1484 The 'rightmost' variant (@xxx) wins.
1485 The leftmost codepage (.xxx) wins.
1486 */
1487 char *correctedPOSIXLocale = 0;
1488 const char* posixID = uprv_getPOSIXIDForDefaultLocale();
1489 const char *p;
1490 const char *q;
1491 int32_t len;
1492
1493 /* Format: (no spaces)
1494 ll [ _CC ] [ . MM ] [ @ VV]
1495
1496 l = lang, C = ctry, M = charmap, V = variant
1497 */
1498
1499 if (gCorrectedPOSIXLocale != NULL) {
1500 return gCorrectedPOSIXLocale;
1501 }
1502
1503 if ((p = uprv_strchr(posixID, '.')) != NULL) {
1504 /* assume new locale can't be larger than old one? */
1505 correctedPOSIXLocale = reinterpret_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1));
1506 /* Exit on memory allocation error. */
1507 if (correctedPOSIXLocale == NULL) {
1508 return NULL;
1509 }
1510 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1511 correctedPOSIXLocale[p-posixID] = 0;
1512
1513 /* do not copy after the @ */
1514 if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
1515 correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
1516 }
1517 }
1518
1519 /* Note that we scan the *uncorrected* ID. */
1520 if ((p = uprv_strrchr(posixID, '@')) != NULL) {
1521 if (correctedPOSIXLocale == NULL) {
1522 correctedPOSIXLocale = reinterpret_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1));
1523 /* Exit on memory allocation error. */
1524 if (correctedPOSIXLocale == NULL) {
1525 return NULL;
1526 }
1527 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1528 correctedPOSIXLocale[p-posixID] = 0;
1529 }
1530 p++;
1531
1532 /* Take care of any special cases here.. */
1533 if (!uprv_strcmp(p, "nynorsk")) {
1534 p = "NY";
1535 /* Don't worry about no__NY. In practice, it won't appear. */
1536 }
1537
1538 if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
1539 uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
1540 }
1541 else {
1542 uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1543 }
1544
1545 if ((q = uprv_strchr(p, '.')) != NULL) {
1546 /* How big will the resulting string be? */
1547 len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1548 uprv_strncat(correctedPOSIXLocale, p, q-p);
1549 correctedPOSIXLocale[len] = 0;
1550 }
1551 else {
1552 /* Anything following the @ sign */
1553 uprv_strcat(correctedPOSIXLocale, p);
1554 }
1555
1556 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1557 * How about 'russian' -> 'ru'?
1558 * Many of the other locales using ISO codes will be handled by the
1559 * canonicalization functions in uloc_getDefault.
1560 */
1561 }
1562
1563 /* Was a correction made? */
1564 if (correctedPOSIXLocale != NULL) {
1565 posixID = correctedPOSIXLocale;
1566 }
1567 else {
1568 /* copy it, just in case the original pointer goes away. See j2395 */
1569 correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
1570 /* Exit on memory allocation error. */
1571 if (correctedPOSIXLocale == NULL) {
1572 return NULL;
1573 }
1574 posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
1575 }
1576
1577 if (gCorrectedPOSIXLocale == NULL) {
1578 gCorrectedPOSIXLocale = correctedPOSIXLocale;
1579 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1580 correctedPOSIXLocale = NULL;
1581 }
1582
1583 if (correctedPOSIXLocale != NULL) { /* Was already set - clean up. */
1584 uprv_free(correctedPOSIXLocale);
1585 }
1586
1587 return posixID;
1588
1589 #elif U_PLATFORM_USES_ONLY_WIN32_API
1590 UErrorCode status = U_ZERO_ERROR;
1591 LCID id = GetThreadLocale();
1592 const char* locID = uprv_convertToPosix(id, &status);
1593
1594 if (U_FAILURE(status)) {
1595 locID = "en_US";
1596 }
1597 return locID;
1598
1599 #elif U_PLATFORM == U_PF_CLASSIC_MACOS
1600 int32_t script = MAC_LC_INIT_NUMBER;
1601 /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
1602 int32_t region = MAC_LC_INIT_NUMBER;
1603 /* = GetScriptManagerVariable(smRegionCode);*/
1604 int32_t lang = MAC_LC_INIT_NUMBER;
1605 /* = GetScriptManagerVariable(smScriptLang);*/
1606 int32_t date_region = MAC_LC_INIT_NUMBER;
1607 const char* posixID = 0;
1608 int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec);
1609 int32_t i;
1610 Intl1Hndl ih;
1611
1612 ih = (Intl1Hndl) GetIntlResource(1);
1613 if (ih)
1614 date_region = ((uint16_t)(*ih)->intl1Vers) >> 8;
1615
1616 for (i = 0; i < count; i++) {
1617 if ( ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER)
1618 || (mac_lc_recs[i].script == script))
1619 && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER)
1620 || (mac_lc_recs[i].region == region))
1621 && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER)
1622 || (mac_lc_recs[i].lang == lang))
1623 && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER)
1624 || (mac_lc_recs[i].date_region == date_region))
1625 )
1626 {
1627 posixID = mac_lc_recs[i].posixID;
1628 break;
1629 }
1630 }
1631
1632 return posixID;
1633
1634 #elif U_PLATFORM == U_PF_OS400
1635 /* locales are process scoped and are by definition thread safe */
1636 static char correctedLocale[64];
1637 const char *localeID = getenv("LC_ALL");
1638 char *p;
1639
1640 if (localeID == NULL)
1641 localeID = getenv("LANG");
1642 if (localeID == NULL)
1643 localeID = setlocale(LC_ALL, NULL);
1644 /* Make sure we have something... */
1645 if (localeID == NULL)
1646 return "en_US_POSIX";
1647
1648 /* Extract the locale name from the path. */
1649 if((p = uprv_strrchr(localeID, '/')) != NULL)
1650 {
1651 /* Increment p to start of locale name. */
1652 p++;
1653 localeID = p;
1654 }
1655
1656 /* Copy to work location. */
1657 uprv_strcpy(correctedLocale, localeID);
1658
1659 /* Strip off the '.locale' extension. */
1660 if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1661 *p = 0;
1662 }
1663
1664 /* Upper case the locale name. */
1665 T_CString_toUpperCase(correctedLocale);
1666
1667 /* See if we are using the POSIX locale. Any of the
1668 * following are equivalent and use the same QLGPGCMA
1669 * (POSIX) locale.
1670 * QLGPGCMA2 means UCS2
1671 * QLGPGCMA_4 means UTF-32
1672 * QLGPGCMA_8 means UTF-8
1673 */
1674 if ((uprv_strcmp("C", correctedLocale) == 0) ||
1675 (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1676 (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1677 {
1678 uprv_strcpy(correctedLocale, "en_US_POSIX");
1679 }
1680 else
1681 {
1682 int16_t LocaleLen;
1683
1684 /* Lower case the lang portion. */
1685 for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1686 {
1687 *p = uprv_tolower(*p);
1688 }
1689
1690 /* Adjust for Euro. After '_E' add 'URO'. */
1691 LocaleLen = uprv_strlen(correctedLocale);
1692 if (correctedLocale[LocaleLen - 2] == '_' &&
1693 correctedLocale[LocaleLen - 1] == 'E')
1694 {
1695 uprv_strcat(correctedLocale, "URO");
1696 }
1697
1698 /* If using Lotus-based locale then convert to
1699 * equivalent non Lotus.
1700 */
1701 else if (correctedLocale[LocaleLen - 2] == '_' &&
1702 correctedLocale[LocaleLen - 1] == 'L')
1703 {
1704 correctedLocale[LocaleLen - 2] = 0;
1705 }
1706
1707 /* There are separate simplified and traditional
1708 * locales called zh_HK_S and zh_HK_T.
1709 */
1710 else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1711 {
1712 uprv_strcpy(correctedLocale, "zh_HK");
1713 }
1714
1715 /* A special zh_CN_GBK locale...
1716 */
1717 else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1718 {
1719 uprv_strcpy(correctedLocale, "zh_CN");
1720 }
1721
1722 }
1723
1724 return correctedLocale;
1725 #endif
1726
1727 }
1728
1729 #if !U_CHARSET_IS_UTF8
1730 #if U_POSIX_LOCALE
1731 /*
1732 Due to various platform differences, one platform may specify a charset,
1733 when they really mean a different charset. Remap the names so that they are
1734 compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1735 here. Before adding anything to this function, please consider adding unique
1736 names to the ICU alias table in the data directory.
1737 */
1738 static const char*
1739 remapPlatformDependentCodepage(const char *locale, const char *name) {
1740 if (locale != NULL && *locale == 0) {
1741 /* Make sure that an empty locale is handled the same way. */
1742 locale = NULL;
1743 }
1744 if (name == NULL) {
1745 return NULL;
1746 }
1747 #if U_PLATFORM == U_PF_AIX
1748 if (uprv_strcmp(name, "IBM-943") == 0) {
1749 /* Use the ASCII compatible ibm-943 */
1750 name = "Shift-JIS";
1751 }
1752 else if (uprv_strcmp(name, "IBM-1252") == 0) {
1753 /* Use the windows-1252 that contains the Euro */
1754 name = "IBM-5348";
1755 }
1756 #elif U_PLATFORM == U_PF_SOLARIS
1757 if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1758 /* Solaris underspecifies the "EUC" name. */
1759 if (uprv_strcmp(locale, "zh_CN") == 0) {
1760 name = "EUC-CN";
1761 }
1762 else if (uprv_strcmp(locale, "zh_TW") == 0) {
1763 name = "EUC-TW";
1764 }
1765 else if (uprv_strcmp(locale, "ko_KR") == 0) {
1766 name = "EUC-KR";
1767 }
1768 }
1769 else if (uprv_strcmp(name, "eucJP") == 0) {
1770 /*
1771 ibm-954 is the best match.
1772 ibm-33722 is the default for eucJP (similar to Windows).
1773 */
1774 name = "eucjis";
1775 }
1776 else if (uprv_strcmp(name, "646") == 0) {
1777 /*
1778 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
1779 * ISO-8859-1 instead of US-ASCII(646).
1780 */
1781 name = "ISO-8859-1";
1782 }
1783 #elif U_PLATFORM_IS_DARWIN_BASED
1784 if (locale == NULL && *name == 0) {
1785 /*
1786 No locale was specified, and an empty name was passed in.
1787 This usually indicates that nl_langinfo didn't return valid information.
1788 Mac OS X uses UTF-8 by default (especially the locale data and console).
1789 */
1790 name = "UTF-8";
1791 }
1792 else if (uprv_strcmp(name, "CP949") == 0) {
1793 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1794 name = "EUC-KR";
1795 }
1796 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
1797 /*
1798 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1799 */
1800 name = "UTF-8";
1801 }
1802 #elif U_PLATFORM == U_PF_BSD
1803 if (uprv_strcmp(name, "CP949") == 0) {
1804 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1805 name = "EUC-KR";
1806 }
1807 #elif U_PLATFORM == U_PF_HPUX
1808 if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
1809 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
1810 /* zh_TW.big5 is not the same charset as zh_HK.big5! */
1811 name = "hkbig5";
1812 }
1813 else if (uprv_strcmp(name, "eucJP") == 0) {
1814 /*
1815 ibm-1350 is the best match, but unavailable.
1816 ibm-954 is mostly a superset of ibm-1350.
1817 ibm-33722 is the default for eucJP (similar to Windows).
1818 */
1819 name = "eucjis";
1820 }
1821 #elif U_PLATFORM == U_PF_LINUX
1822 if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
1823 /* Linux underspecifies the "EUC" name. */
1824 if (uprv_strcmp(locale, "korean") == 0) {
1825 name = "EUC-KR";
1826 }
1827 else if (uprv_strcmp(locale, "japanese") == 0) {
1828 /* See comment below about eucJP */
1829 name = "eucjis";
1830 }
1831 }
1832 else if (uprv_strcmp(name, "eucjp") == 0) {
1833 /*
1834 ibm-1350 is the best match, but unavailable.
1835 ibm-954 is mostly a superset of ibm-1350.
1836 ibm-33722 is the default for eucJP (similar to Windows).
1837 */
1838 name = "eucjis";
1839 }
1840 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
1841 (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
1842 /*
1843 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1844 */
1845 name = "UTF-8";
1846 }
1847 /*
1848 * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
1849 * it by falling back to 'US-ASCII' when NULL is returned from this
1850 * function. So, we don't have to worry about it here.
1851 */
1852 #endif
1853 /* return NULL when "" is passed in */
1854 if (*name == 0) {
1855 name = NULL;
1856 }
1857 return name;
1858 }
1859
1860 static const char*
1861 getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
1862 {
1863 char localeBuf[100];
1864 const char *name = NULL;
1865 char *variant = NULL;
1866
1867 if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
1868 size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
1869 uprv_strncpy(localeBuf, localeName, localeCapacity);
1870 localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
1871 name = uprv_strncpy(buffer, name+1, buffCapacity);
1872 buffer[buffCapacity-1] = 0; /* ensure NULL termination */
1873 if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != NULL) {
1874 *variant = 0;
1875 }
1876 name = remapPlatformDependentCodepage(localeBuf, name);
1877 }
1878 return name;
1879 }
1880 #endif
1881
1882 static const char*
1883 int_getDefaultCodepage()
1884 {
1885 #if U_PLATFORM == U_PF_OS400
1886 uint32_t ccsid = 37; /* Default to ibm-37 */
1887 static char codepage[64];
1888 Qwc_JOBI0400_t jobinfo;
1889 Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
1890
1891 EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
1892 "* ", " ", &error);
1893
1894 if (error.Bytes_Available == 0) {
1895 if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
1896 ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
1897 }
1898 else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
1899 ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
1900 }
1901 /* else use the default */
1902 }
1903 sprintf(codepage,"ibm-%d", ccsid);
1904 return codepage;
1905
1906 #elif U_PLATFORM == U_PF_OS390
1907 static char codepage[64];
1908
1909 strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
1910 strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
1911 codepage[63] = 0; /* NULL terminate */
1912
1913 return codepage;
1914
1915 #elif U_PLATFORM == U_PF_CLASSIC_MACOS
1916 return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
1917
1918 #elif U_PLATFORM_USES_ONLY_WIN32_API
1919 static char codepage[64];
1920 sprintf(codepage, "windows-%d", GetACP());
1921 return codepage;
1922
1923 #elif U_POSIX_LOCALE
1924 static char codesetName[100];
1925 const char *localeName = NULL;
1926 const char *name = NULL;
1927
1928 localeName = uprv_getPOSIXIDForDefaultCodepage();
1929 uprv_memset(codesetName, 0, sizeof(codesetName));
1930 #if U_HAVE_NL_LANGINFO_CODESET
1931 /* When available, check nl_langinfo first because it usually gives more
1932 useful names. It depends on LC_CTYPE.
1933 nl_langinfo may use the same buffer as setlocale. */
1934 {
1935 const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
1936 #if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED
1937 /*
1938 * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
1939 * instead of ASCII.
1940 */
1941 if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
1942 codeset = remapPlatformDependentCodepage(localeName, codeset);
1943 } else
1944 #endif
1945 {
1946 codeset = remapPlatformDependentCodepage(NULL, codeset);
1947 }
1948
1949 if (codeset != NULL) {
1950 uprv_strncpy(codesetName, codeset, sizeof(codesetName));
1951 codesetName[sizeof(codesetName)-1] = 0;
1952 return codesetName;
1953 }
1954 }
1955 #endif
1956
1957 /* Use setlocale in a nice way, and then check some environment variables.
1958 Maybe the application used setlocale already.
1959 */
1960 uprv_memset(codesetName, 0, sizeof(codesetName));
1961 name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
1962 if (name) {
1963 /* if we can find the codeset name from setlocale, return that. */
1964 return name;
1965 }
1966
1967 if (*codesetName == 0)
1968 {
1969 /* Everything failed. Return US ASCII (ISO 646). */
1970 (void)uprv_strcpy(codesetName, "US-ASCII");
1971 }
1972 return codesetName;
1973 #else
1974 return "US-ASCII";
1975 #endif
1976 }
1977
1978
1979 U_CAPI const char* U_EXPORT2
1980 uprv_getDefaultCodepage()
1981 {
1982 static char const *name = NULL;
1983 umtx_lock(NULL);
1984 if (name == NULL) {
1985 name = int_getDefaultCodepage();
1986 }
1987 umtx_unlock(NULL);
1988 return name;
1989 }
1990 #endif /* !U_CHARSET_IS_UTF8 */
1991
1992
1993 /* end of platform-specific implementation -------------- */
1994
1995 /* version handling --------------------------------------------------------- */
1996
1997 U_CAPI void U_EXPORT2
1998 u_versionFromString(UVersionInfo versionArray, const char *versionString) {
1999 char *end;
2000 uint16_t part=0;
2001
2002 if(versionArray==NULL) {
2003 return;
2004 }
2005
2006 if(versionString!=NULL) {
2007 for(;;) {
2008 versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
2009 if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
2010 break;
2011 }
2012 versionString=end+1;
2013 }
2014 }
2015
2016 while(part<U_MAX_VERSION_LENGTH) {
2017 versionArray[part++]=0;
2018 }
2019 }
2020
2021 U_CAPI void U_EXPORT2
2022 u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
2023 if(versionArray!=NULL && versionString!=NULL) {
2024 char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
2025 int32_t len = u_strlen(versionString);
2026 if(len>U_MAX_VERSION_STRING_LENGTH) {
2027 len = U_MAX_VERSION_STRING_LENGTH;
2028 }
2029 u_UCharsToChars(versionString, versionChars, len);
2030 versionChars[len]=0;
2031 u_versionFromString(versionArray, versionChars);
2032 }
2033 }
2034
2035 U_CAPI void U_EXPORT2
2036 u_versionToString(const UVersionInfo versionArray, char *versionString) {
2037 uint16_t count, part;
2038 uint8_t field;
2039
2040 if(versionString==NULL) {
2041 return;
2042 }
2043
2044 if(versionArray==NULL) {
2045 versionString[0]=0;
2046 return;
2047 }
2048
2049 /* count how many fields need to be written */
2050 for(count=4; count>0 && versionArray[count-1]==0; --count) {
2051 }
2052
2053 if(count <= 1) {
2054 count = 2;
2055 }
2056
2057 /* write the first part */
2058 /* write the decimal field value */
2059 field=versionArray[0];
2060 if(field>=100) {
2061 *versionString++=(char)('0'+field/100);
2062 field%=100;
2063 }
2064 if(field>=10) {
2065 *versionString++=(char)('0'+field/10);
2066 field%=10;
2067 }
2068 *versionString++=(char)('0'+field);
2069
2070 /* write the following parts */
2071 for(part=1; part<count; ++part) {
2072 /* write a dot first */
2073 *versionString++=U_VERSION_DELIMITER;
2074
2075 /* write the decimal field value */
2076 field=versionArray[part];
2077 if(field>=100) {
2078 *versionString++=(char)('0'+field/100);
2079 field%=100;
2080 }
2081 if(field>=10) {
2082 *versionString++=(char)('0'+field/10);
2083 field%=10;
2084 }
2085 *versionString++=(char)('0'+field);
2086 }
2087
2088 /* NUL-terminate */
2089 *versionString=0;
2090 }
2091
2092 U_CAPI void U_EXPORT2
2093 u_getVersion(UVersionInfo versionArray) {
2094 u_versionFromString(versionArray, U_ICU_VERSION);
2095 }
2096
2097 /**
2098 * icucfg.h dependent code
2099 */
2100
2101 #if U_ENABLE_DYLOAD
2102
2103 #if HAVE_DLOPEN && !U_PLATFORM_HAS_WIN32_API
2104
2105 #if HAVE_DLFCN_H
2106
2107 #ifdef __MVS__
2108 #ifndef __SUSV3
2109 #define __SUSV3 1
2110 #endif
2111 #endif
2112 #include <dlfcn.h>
2113 #endif
2114
2115 U_INTERNAL void * U_EXPORT2
2116 uprv_dl_open(const char *libName, UErrorCode *status) {
2117 void *ret = NULL;
2118 if(U_FAILURE(*status)) return ret;
2119 ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
2120 if(ret==NULL) {
2121 #ifdef U_TRACE_DYLOAD
2122 printf("dlerror on dlopen(%s): %s\n", libName, dlerror());
2123 #endif
2124 *status = U_MISSING_RESOURCE_ERROR;
2125 }
2126 return ret;
2127 }
2128
2129 U_INTERNAL void U_EXPORT2
2130 uprv_dl_close(void *lib, UErrorCode *status) {
2131 if(U_FAILURE(*status)) return;
2132 dlclose(lib);
2133 }
2134
2135 U_INTERNAL UVoidFunction* U_EXPORT2
2136 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2137 union {
2138 UVoidFunction *fp;
2139 void *vp;
2140 } uret;
2141 uret.fp = NULL;
2142 if(U_FAILURE(*status)) return uret.fp;
2143 uret.vp = dlsym(lib, sym);
2144 if(uret.vp == NULL) {
2145 #ifdef U_TRACE_DYLOAD
2146 printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror());
2147 #endif
2148 *status = U_MISSING_RESOURCE_ERROR;
2149 }
2150 return uret.fp;
2151 }
2152
2153 #else
2154
2155 /* null (nonexistent) implementation. */
2156
2157 U_INTERNAL void * U_EXPORT2
2158 uprv_dl_open(const char *libName, UErrorCode *status) {
2159 if(U_FAILURE(*status)) return NULL;
2160 *status = U_UNSUPPORTED_ERROR;
2161 return NULL;
2162 }
2163
2164 U_INTERNAL void U_EXPORT2
2165 uprv_dl_close(void *lib, UErrorCode *status) {
2166 if(U_FAILURE(*status)) return;
2167 *status = U_UNSUPPORTED_ERROR;
2168 return;
2169 }
2170
2171
2172 U_INTERNAL UVoidFunction* U_EXPORT2
2173 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2174 if(U_SUCCESS(*status)) {
2175 *status = U_UNSUPPORTED_ERROR;
2176 }
2177 return (UVoidFunction*)NULL;
2178 }
2179
2180
2181
2182 #endif
2183
2184 #elif U_PLATFORM_HAS_WIN32_API
2185
2186 U_INTERNAL void * U_EXPORT2
2187 uprv_dl_open(const char *libName, UErrorCode *status) {
2188 HMODULE lib = NULL;
2189
2190 if(U_FAILURE(*status)) return NULL;
2191
2192 lib = LoadLibraryA(libName);
2193
2194 if(lib==NULL) {
2195 *status = U_MISSING_RESOURCE_ERROR;
2196 }
2197
2198 return (void*)lib;
2199 }
2200
2201 U_INTERNAL void U_EXPORT2
2202 uprv_dl_close(void *lib, UErrorCode *status) {
2203 HMODULE handle = (HMODULE)lib;
2204 if(U_FAILURE(*status)) return;
2205
2206 FreeLibrary(handle);
2207
2208 return;
2209 }
2210
2211
2212 U_INTERNAL UVoidFunction* U_EXPORT2
2213 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2214 HMODULE handle = (HMODULE)lib;
2215 UVoidFunction* addr = NULL;
2216
2217 if(U_FAILURE(*status) || lib==NULL) return NULL;
2218
2219 addr = (UVoidFunction*)GetProcAddress(handle, sym);
2220
2221 if(addr==NULL) {
2222 DWORD lastError = GetLastError();
2223 if(lastError == ERROR_PROC_NOT_FOUND) {
2224 *status = U_MISSING_RESOURCE_ERROR;
2225 } else {
2226 *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
2227 }
2228 }
2229
2230 return addr;
2231 }
2232
2233
2234 #else
2235
2236 /* No dynamic loading set. */
2237
2238 U_INTERNAL void * U_EXPORT2
2239 uprv_dl_open(const char *libName, UErrorCode *status) {
2240 if(U_FAILURE(*status)) return NULL;
2241 *status = U_UNSUPPORTED_ERROR;
2242 return NULL;
2243 }
2244
2245 U_INTERNAL void U_EXPORT2
2246 uprv_dl_close(void *lib, UErrorCode *status) {
2247 if(U_FAILURE(*status)) return;
2248 *status = U_UNSUPPORTED_ERROR;
2249 return;
2250 }
2251
2252
2253 U_INTERNAL UVoidFunction* U_EXPORT2
2254 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2255 if(U_SUCCESS(*status)) {
2256 *status = U_UNSUPPORTED_ERROR;
2257 }
2258 return (UVoidFunction*)NULL;
2259 }
2260
2261 #endif /* U_ENABLE_DYLOAD */
2262
2263 /*
2264 * Hey, Emacs, please set the following:
2265 *
2266 * Local Variables:
2267 * indent-tabs-mode: nil
2268 * End:
2269 *
2270 */