]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/putil.cpp
ICU-511.25.tar.gz
[apple/icu.git] / icuSources / common / putil.cpp
1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 1997-2013, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 *
9 * FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
10 *
11 * Date Name Description
12 * 04/14/97 aliu Creation.
13 * 04/24/97 aliu Added getDefaultDataDirectory() and
14 * getDefaultLocaleID().
15 * 04/28/97 aliu Rewritten to assume Unix and apply general methods
16 * for assumed case. Non-UNIX platforms must be
17 * special-cased. Rewrote numeric methods dealing
18 * with NaN and Infinity to be platform independent
19 * over all IEEE 754 platforms.
20 * 05/13/97 aliu Restored sign of timezone
21 * (semantics are hours West of GMT)
22 * 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
23 * nextDouble..
24 * 07/22/98 stephen Added remainder, max, min, trunc
25 * 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
26 * 08/24/98 stephen Added longBitsFromDouble
27 * 09/08/98 stephen Minor changes for Mac Port
28 * 03/02/99 stephen Removed openFile(). Added AS400 support.
29 * Fixed EBCDIC tables
30 * 04/15/99 stephen Converted to C.
31 * 06/28/99 stephen Removed mutex locking in u_isBigEndian().
32 * 08/04/99 jeffrey R. Added OS/2 changes
33 * 11/15/99 helena Integrated S/390 IEEE support.
34 * 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
35 * 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
36 * 01/03/08 Steven L. Fake Time Support
37 ******************************************************************************
38 */
39
40 // Defines _XOPEN_SOURCE for access to POSIX functions.
41 // Must be before any other #includes.
42 #include "uposixdefs.h"
43
44 /* include ICU headers */
45 #include "unicode/utypes.h"
46 #include "unicode/putil.h"
47 #include "unicode/ustring.h"
48 #include "putilimp.h"
49 #include "uassert.h"
50 #include "umutex.h"
51 #include "cmemory.h"
52 #include "cstring.h"
53 #include "locmap.h"
54 #include "ucln_cmn.h"
55
56 /* Include standard headers. */
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <math.h>
61 #include <locale.h>
62 #include <float.h>
63
64 #ifndef U_COMMON_IMPLEMENTATION
65 #error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu
66 #endif
67
68
69 /* include system headers */
70 #if U_PLATFORM_USES_ONLY_WIN32_API
71 /*
72 * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW.
73 * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API)
74 * to use native APIs as much as possible?
75 */
76 # define WIN32_LEAN_AND_MEAN
77 # define VC_EXTRALEAN
78 # define NOUSER
79 # define NOSERVICE
80 # define NOIME
81 # define NOMCX
82 # include <windows.h>
83 # include "wintz.h"
84 #elif U_PLATFORM == U_PF_OS400
85 # include <float.h>
86 # include <qusec.h> /* error code structure */
87 # include <qusrjobi.h>
88 # include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
89 # include <mih/testptr.h> /* For uprv_maximumPtr */
90 #elif U_PLATFORM == U_PF_CLASSIC_MACOS
91 # include <Files.h>
92 # include <IntlResources.h>
93 # include <Script.h>
94 # include <Folders.h>
95 # include <MacTypes.h>
96 # include <TextUtils.h>
97 # define ICU_NO_USER_DATA_OVERRIDE 1
98 #elif U_PLATFORM == U_PF_OS390
99 # include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
100 #elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS
101 # include <limits.h>
102 # include <unistd.h>
103 # if U_PLATFORM == U_PF_SOLARIS
104 # ifndef _XPG4_2
105 # define _XPG4_2
106 # endif
107 # endif
108 #elif U_PLATFORM == U_PF_QNX
109 # include <sys/neutrino.h>
110 #endif
111
112 #if (U_PF_MINGW <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(__STRICT_ANSI__)
113 /* tzset isn't defined in strict ANSI on Cygwin and MinGW. */
114 #undef __STRICT_ANSI__
115 #endif
116
117 /*
118 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
119 */
120 #include <time.h>
121
122 #if !U_PLATFORM_USES_ONLY_WIN32_API
123 #include <sys/time.h>
124 #endif
125
126 /*
127 * Only include langinfo.h if we have a way to get the codeset. If we later
128 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
129 *
130 */
131
132 #if U_HAVE_NL_LANGINFO_CODESET
133 #include <langinfo.h>
134 #endif
135
136 /**
137 * Simple things (presence of functions, etc) should just go in configure.in and be added to
138 * icucfg.h via autoheader.
139 */
140 #if U_PLATFORM_IMPLEMENTS_POSIX
141 # if U_PLATFORM == U_PF_OS400
142 # define HAVE_DLFCN_H 0
143 # define HAVE_DLOPEN 0
144 # else
145 # ifndef HAVE_DLFCN_H
146 # define HAVE_DLFCN_H 1
147 # endif
148 # ifndef HAVE_DLOPEN
149 # define HAVE_DLOPEN 1
150 # endif
151 # endif
152 # ifndef HAVE_GETTIMEOFDAY
153 # define HAVE_GETTIMEOFDAY 1
154 # endif
155 #else
156 # define HAVE_DLFCN_H 0
157 # define HAVE_DLOPEN 0
158 # define HAVE_GETTIMEOFDAY 0
159 #endif
160
161 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
162
163 /* Define the extension for data files, again... */
164 #define DATA_TYPE "dat"
165
166 /* Leave this copyright notice here! */
167 static const char copyright[] = U_COPYRIGHT_STRING;
168
169 /* floating point implementations ------------------------------------------- */
170
171 /* We return QNAN rather than SNAN*/
172 #define SIGN 0x80000000U
173
174 /* Make it easy to define certain types of constants */
175 typedef union {
176 int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
177 double d64;
178 } BitPatternConversion;
179 static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
180 static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
181
182 /*---------------------------------------------------------------------------
183 Platform utilities
184 Our general strategy is to assume we're on a POSIX platform. Platforms which
185 are non-POSIX must declare themselves so. The default POSIX implementation
186 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
187 functions).
188 ---------------------------------------------------------------------------*/
189
190 #if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_CLASSIC_MACOS || U_PLATFORM == U_PF_OS400
191 # undef U_POSIX_LOCALE
192 #else
193 # define U_POSIX_LOCALE 1
194 #endif
195
196 /*
197 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
198 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
199 */
200 #if !IEEE_754
201 static char*
202 u_topNBytesOfDouble(double* d, int n)
203 {
204 #if U_IS_BIG_ENDIAN
205 return (char*)d;
206 #else
207 return (char*)(d + 1) - n;
208 #endif
209 }
210
211 static char*
212 u_bottomNBytesOfDouble(double* d, int n)
213 {
214 #if U_IS_BIG_ENDIAN
215 return (char*)(d + 1) - n;
216 #else
217 return (char*)d;
218 #endif
219 }
220 #endif /* !IEEE_754 */
221
222 #if IEEE_754
223 static UBool
224 u_signBit(double d) {
225 uint8_t hiByte;
226 #if U_IS_BIG_ENDIAN
227 hiByte = *(uint8_t *)&d;
228 #else
229 hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
230 #endif
231 return (hiByte & 0x80) != 0;
232 }
233 #endif
234
235
236
237 #if defined (U_DEBUG_FAKETIME)
238 /* Override the clock to test things without having to move the system clock.
239 * Assumes POSIX gettimeofday() will function
240 */
241 UDate fakeClock_t0 = 0; /** Time to start the clock from **/
242 UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
243 UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
244 static UMutex fakeClockMutex = U_MUTEX_INTIALIZER;
245
246 static UDate getUTCtime_real() {
247 struct timeval posixTime;
248 gettimeofday(&posixTime, NULL);
249 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
250 }
251
252 static UDate getUTCtime_fake() {
253 umtx_lock(&fakeClockMutex);
254 if(!fakeClock_set) {
255 UDate real = getUTCtime_real();
256 const char *fake_start = getenv("U_FAKETIME_START");
257 if((fake_start!=NULL) && (fake_start[0]!=0)) {
258 sscanf(fake_start,"%lf",&fakeClock_t0);
259 fakeClock_dt = fakeClock_t0 - real;
260 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
261 "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
262 fakeClock_t0, fake_start, fakeClock_dt, real);
263 } else {
264 fakeClock_dt = 0;
265 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
266 "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
267 }
268 fakeClock_set = TRUE;
269 }
270 umtx_unlock(&fakeClockMutex);
271
272 return getUTCtime_real() + fakeClock_dt;
273 }
274 #endif
275
276 #if U_PLATFORM_USES_ONLY_WIN32_API
277 typedef union {
278 int64_t int64;
279 FILETIME fileTime;
280 } FileTimeConversion; /* This is like a ULARGE_INTEGER */
281
282 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
283 #define EPOCH_BIAS INT64_C(116444736000000000)
284 #define HECTONANOSECOND_PER_MILLISECOND 10000
285
286 #endif
287
288 /*---------------------------------------------------------------------------
289 Universal Implementations
290 These are designed to work on all platforms. Try these, and if they
291 don't work on your platform, then special case your platform with new
292 implementations.
293 ---------------------------------------------------------------------------*/
294
295 U_CAPI UDate U_EXPORT2
296 uprv_getUTCtime()
297 {
298 #if defined(U_DEBUG_FAKETIME)
299 return getUTCtime_fake(); /* Hook for overriding the clock */
300 #else
301 return uprv_getRawUTCtime();
302 #endif
303 }
304
305 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
306 U_CAPI UDate U_EXPORT2
307 uprv_getRawUTCtime()
308 {
309 #if U_PLATFORM == U_PF_CLASSIC_MACOS
310 time_t t, t1, t2;
311 struct tm tmrec;
312
313 uprv_memset( &tmrec, 0, sizeof(tmrec) );
314 tmrec.tm_year = 70;
315 tmrec.tm_mon = 0;
316 tmrec.tm_mday = 1;
317 t1 = mktime(&tmrec); /* seconds of 1/1/1970*/
318
319 time(&t);
320 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
321 t2 = mktime(&tmrec); /* seconds of current GMT*/
322 return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND; /* GMT (or UTC) in seconds since 1970*/
323 #elif U_PLATFORM_USES_ONLY_WIN32_API
324
325 FileTimeConversion winTime;
326 GetSystemTimeAsFileTime(&winTime.fileTime);
327 return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
328 #else
329
330 #if HAVE_GETTIMEOFDAY
331 struct timeval posixTime;
332 gettimeofday(&posixTime, NULL);
333 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
334 #else
335 time_t epochtime;
336 time(&epochtime);
337 return (UDate)epochtime * U_MILLIS_PER_SECOND;
338 #endif
339
340 #endif
341 }
342
343 /*-----------------------------------------------------------------------------
344 IEEE 754
345 These methods detect and return NaN and infinity values for doubles
346 conforming to IEEE 754. Platforms which support this standard include X86,
347 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
348 If this doesn't work on your platform, you have non-IEEE floating-point, and
349 will need to code your own versions. A naive implementation is to return 0.0
350 for getNaN and getInfinity, and false for isNaN and isInfinite.
351 ---------------------------------------------------------------------------*/
352
353 U_CAPI UBool U_EXPORT2
354 uprv_isNaN(double number)
355 {
356 #if IEEE_754
357 BitPatternConversion convertedNumber;
358 convertedNumber.d64 = number;
359 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
360 return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
361
362 #elif U_PLATFORM == U_PF_OS390
363 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
364 sizeof(uint32_t));
365 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
366 sizeof(uint32_t));
367
368 return ((highBits & 0x7F080000L) == 0x7F080000L) &&
369 (lowBits == 0x00000000L);
370
371 #else
372 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
373 /* you'll need to replace this default implementation with what's correct*/
374 /* for your platform.*/
375 return number != number;
376 #endif
377 }
378
379 U_CAPI UBool U_EXPORT2
380 uprv_isInfinite(double number)
381 {
382 #if IEEE_754
383 BitPatternConversion convertedNumber;
384 convertedNumber.d64 = number;
385 /* Infinity is exactly 0x7FF0000000000000U. */
386 return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
387 #elif U_PLATFORM == U_PF_OS390
388 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
389 sizeof(uint32_t));
390 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
391 sizeof(uint32_t));
392
393 return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
394
395 #else
396 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
397 /* value, you'll need to replace this default implementation with what's*/
398 /* correct for your platform.*/
399 return number == (2.0 * number);
400 #endif
401 }
402
403 U_CAPI UBool U_EXPORT2
404 uprv_isPositiveInfinity(double number)
405 {
406 #if IEEE_754 || U_PLATFORM == U_PF_OS390
407 return (UBool)(number > 0 && uprv_isInfinite(number));
408 #else
409 return uprv_isInfinite(number);
410 #endif
411 }
412
413 U_CAPI UBool U_EXPORT2
414 uprv_isNegativeInfinity(double number)
415 {
416 #if IEEE_754 || U_PLATFORM == U_PF_OS390
417 return (UBool)(number < 0 && uprv_isInfinite(number));
418
419 #else
420 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
421 sizeof(uint32_t));
422 return((highBits & SIGN) && uprv_isInfinite(number));
423
424 #endif
425 }
426
427 U_CAPI double U_EXPORT2
428 uprv_getNaN()
429 {
430 #if IEEE_754 || U_PLATFORM == U_PF_OS390
431 return gNan.d64;
432 #else
433 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
434 /* you'll need to replace this default implementation with what's correct*/
435 /* for your platform.*/
436 return 0.0;
437 #endif
438 }
439
440 U_CAPI double U_EXPORT2
441 uprv_getInfinity()
442 {
443 #if IEEE_754 || U_PLATFORM == U_PF_OS390
444 return gInf.d64;
445 #else
446 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
447 /* value, you'll need to replace this default implementation with what's*/
448 /* correct for your platform.*/
449 return 0.0;
450 #endif
451 }
452
453 U_CAPI double U_EXPORT2
454 uprv_floor(double x)
455 {
456 return floor(x);
457 }
458
459 U_CAPI double U_EXPORT2
460 uprv_ceil(double x)
461 {
462 return ceil(x);
463 }
464
465 U_CAPI double U_EXPORT2
466 uprv_round(double x)
467 {
468 return uprv_floor(x + 0.5);
469 }
470
471 U_CAPI double U_EXPORT2
472 uprv_fabs(double x)
473 {
474 return fabs(x);
475 }
476
477 U_CAPI double U_EXPORT2
478 uprv_modf(double x, double* y)
479 {
480 return modf(x, y);
481 }
482
483 U_CAPI double U_EXPORT2
484 uprv_fmod(double x, double y)
485 {
486 return fmod(x, y);
487 }
488
489 U_CAPI double U_EXPORT2
490 uprv_pow(double x, double y)
491 {
492 /* This is declared as "double pow(double x, double y)" */
493 return pow(x, y);
494 }
495
496 U_CAPI double U_EXPORT2
497 uprv_pow10(int32_t x)
498 {
499 return pow(10.0, (double)x);
500 }
501
502 U_CAPI double U_EXPORT2
503 uprv_fmax(double x, double y)
504 {
505 #if IEEE_754
506 /* first handle NaN*/
507 if(uprv_isNaN(x) || uprv_isNaN(y))
508 return uprv_getNaN();
509
510 /* check for -0 and 0*/
511 if(x == 0.0 && y == 0.0 && u_signBit(x))
512 return y;
513
514 #endif
515
516 /* this should work for all flt point w/o NaN and Inf special cases */
517 return (x > y ? x : y);
518 }
519
520 U_CAPI double U_EXPORT2
521 uprv_fmin(double x, double y)
522 {
523 #if IEEE_754
524 /* first handle NaN*/
525 if(uprv_isNaN(x) || uprv_isNaN(y))
526 return uprv_getNaN();
527
528 /* check for -0 and 0*/
529 if(x == 0.0 && y == 0.0 && u_signBit(y))
530 return y;
531
532 #endif
533
534 /* this should work for all flt point w/o NaN and Inf special cases */
535 return (x > y ? y : x);
536 }
537
538 /**
539 * Truncates the given double.
540 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
541 * This is different than calling floor() or ceil():
542 * floor(3.3) = 3, floor(-3.3) = -4
543 * ceil(3.3) = 4, ceil(-3.3) = -3
544 */
545 U_CAPI double U_EXPORT2
546 uprv_trunc(double d)
547 {
548 #if IEEE_754
549 /* handle error cases*/
550 if(uprv_isNaN(d))
551 return uprv_getNaN();
552 if(uprv_isInfinite(d))
553 return uprv_getInfinity();
554
555 if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */
556 return ceil(d);
557 else
558 return floor(d);
559
560 #else
561 return d >= 0 ? floor(d) : ceil(d);
562
563 #endif
564 }
565
566 /**
567 * Return the largest positive number that can be represented by an integer
568 * type of arbitrary bit length.
569 */
570 U_CAPI double U_EXPORT2
571 uprv_maxMantissa(void)
572 {
573 return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
574 }
575
576 U_CAPI double U_EXPORT2
577 uprv_log(double d)
578 {
579 return log(d);
580 }
581
582 U_CAPI void * U_EXPORT2
583 uprv_maximumPtr(void * base)
584 {
585 #if U_PLATFORM == U_PF_OS400
586 /*
587 * With the provided function we should never be out of range of a given segment
588 * (a traditional/typical segment that is). Our segments have 5 bytes for the
589 * id and 3 bytes for the offset. The key is that the casting takes care of
590 * only retrieving the offset portion minus x1000. Hence, the smallest offset
591 * seen in a program is x001000 and when casted to an int would be 0.
592 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment.
593 *
594 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
595 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information).
596 * This function determines the activation based on the pointer that is passed in and
597 * calculates the appropriate maximum available size for
598 * each pointer type (TERASPACE and non-TERASPACE)
599 *
600 * Unlike other operating systems, the pointer model isn't determined at
601 * compile time on i5/OS.
602 */
603 if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
604 /* if it is a TERASPACE pointer the max is 2GB - 4k */
605 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
606 }
607 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
608 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
609
610 #else
611 return U_MAX_PTR(base);
612 #endif
613 }
614
615 /*---------------------------------------------------------------------------
616 Platform-specific Implementations
617 Try these, and if they don't work on your platform, then special case your
618 platform with new implementations.
619 ---------------------------------------------------------------------------*/
620
621 /* Generic time zone layer -------------------------------------------------- */
622
623 /* Time zone utilities */
624 U_CAPI void U_EXPORT2
625 uprv_tzset()
626 {
627 #if defined(U_TZSET)
628 U_TZSET();
629 #else
630 /* no initialization*/
631 #endif
632 }
633
634 U_CAPI int32_t U_EXPORT2
635 uprv_timezone()
636 {
637 #ifdef U_TIMEZONE
638 return U_TIMEZONE;
639 #else
640 time_t t, t1, t2;
641 struct tm tmrec;
642 UBool dst_checked;
643 int32_t tdiff = 0;
644
645 time(&t);
646 uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
647 dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
648 t1 = mktime(&tmrec); /* local time in seconds*/
649 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
650 t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/
651 tdiff = t2 - t1;
652 /* imitate NT behaviour, which returns same timezone offset to GMT for
653 winter and summer.
654 This does not work on all platforms. For instance, on glibc on Linux
655 and on Mac OS 10.5, tdiff calculated above remains the same
656 regardless of whether DST is in effect or not. However, U_TIMEZONE
657 is defined on those platforms and this code is not reached so that
658 we can leave this alone. If there's a platform behaving
659 like glibc that uses this code, we need to add platform-dependent
660 preprocessor here. */
661 if (dst_checked)
662 tdiff += 3600;
663 return tdiff;
664 #endif
665 }
666
667 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
668 some platforms need to have it declared here. */
669
670 #if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED || (U_PLATFORM == U_PF_CYGWIN && !U_PLATFORM_USES_ONLY_WIN32_API))
671 /* RS6000 and others reject char **tzname. */
672 extern U_IMPORT char *U_TZNAME[];
673 #endif
674
675 #if !UCONFIG_NO_FILE_IO && (U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
676 /* These platforms are likely to use Olson timezone IDs. */
677 #define CHECK_LOCALTIME_LINK 1
678 #if U_PLATFORM_IS_DARWIN_BASED
679 #include <tzfile.h>
680 #define TZZONEINFO (TZDIR "/")
681 #elif U_PLATFORM == U_PF_SOLARIS
682 #define TZDEFAULT "/etc/localtime"
683 #define TZZONEINFO "/usr/share/lib/zoneinfo/"
684 #define TZ_ENV_CHECK "localtime"
685 #else
686 #define TZDEFAULT "/etc/localtime"
687 #define TZZONEINFO "/usr/share/zoneinfo/"
688 #endif
689 #if U_HAVE_DIRENT_H
690 #define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */
691 /* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
692 symlinked to /etc/localtime, which makes searchForTZFile return
693 'localtime' when it's the first match. */
694 #define TZFILE_SKIP2 "localtime"
695 #define SEARCH_TZFILE
696 #include <dirent.h> /* Needed to search through system timezone files */
697 #endif
698 static char gTimeZoneBuffer[PATH_MAX];
699 static char *gTimeZoneBufferPtr = NULL;
700 #endif
701
702 #if !U_PLATFORM_USES_ONLY_WIN32_API
703 #define isNonDigit(ch) (ch < '0' || '9' < ch)
704 static UBool isValidOlsonID(const char *id) {
705 int32_t idx = 0;
706
707 /* Determine if this is something like Iceland (Olson ID)
708 or AST4ADT (non-Olson ID) */
709 while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
710 idx++;
711 }
712
713 /* If we went through the whole string, then it might be okay.
714 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
715 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
716 The rest of the time it could be an Olson ID. George */
717 return (UBool)(id[idx] == 0
718 || uprv_strcmp(id, "PST8PDT") == 0
719 || uprv_strcmp(id, "MST7MDT") == 0
720 || uprv_strcmp(id, "CST6CDT") == 0
721 || uprv_strcmp(id, "EST5EDT") == 0);
722 }
723
724 /* On some Unix-like OS, 'posix' subdirectory in
725 /usr/share/zoneinfo replicates the top-level contents. 'right'
726 subdirectory has the same set of files, but individual files
727 are different from those in the top-level directory or 'posix'
728 because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
729 has files for UTC.
730 When the first match for /etc/localtime is in either of them
731 (usually in posix because 'right' has different file contents),
732 or TZ environment variable points to one of them, createTimeZone
733 fails because, say, 'posix/America/New_York' is not an Olson
734 timezone id ('America/New_York' is). So, we have to skip
735 'posix/' and 'right/' at the beginning. */
736 static void skipZoneIDPrefix(const char** id) {
737 if (uprv_strncmp(*id, "posix/", 6) == 0
738 || uprv_strncmp(*id, "right/", 6) == 0)
739 {
740 *id += 6;
741 }
742 }
743 #endif
744
745 #if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API
746
747 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
748 typedef struct OffsetZoneMapping {
749 int32_t offsetSeconds;
750 int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/
751 const char *stdID;
752 const char *dstID;
753 const char *olsonID;
754 } OffsetZoneMapping;
755
756 enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 };
757
758 /*
759 This list tries to disambiguate a set of abbreviated timezone IDs and offsets
760 and maps it to an Olson ID.
761 Before adding anything to this list, take a look at
762 icu/source/tools/tzcode/tz.alias
763 Sometimes no daylight savings (0) is important to define due to aliases.
764 This list can be tested with icu/source/test/compat/tzone.pl
765 More values could be added to daylightType to increase precision.
766 */
767 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
768 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
769 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
770 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
771 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
772 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
773 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
774 {-36000, 2, "EST", "EST", "Australia/Sydney"},
775 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
776 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
777 {-34200, 2, "CST", "CST", "Australia/South"},
778 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
779 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
780 {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
781 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
782 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
783 {-28800, 2, "WST", "WST", "Australia/West"},
784 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
785 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
786 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
787 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
788 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
789 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
790 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
791 {-14400, 1, "AZT", "AZST", "Asia/Baku"},
792 {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
793 {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
794 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
795 {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
796 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
797 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
798 {-3600, 0, "CET", "WEST", "Africa/Algiers"},
799 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
800 {0, 1, "GMT", "IST", "Europe/Dublin"},
801 {0, 1, "GMT", "BST", "Europe/London"},
802 {0, 0, "WET", "WEST", "Africa/Casablanca"},
803 {0, 0, "WET", "WET", "Africa/El_Aaiun"},
804 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
805 {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
806 {10800, 1, "PMST", "PMDT", "America/Miquelon"},
807 {10800, 2, "UYT", "UYST", "America/Montevideo"},
808 {10800, 1, "WGT", "WGST", "America/Godthab"},
809 {10800, 2, "BRT", "BRST", "Brazil/East"},
810 {12600, 1, "NST", "NDT", "America/St_Johns"},
811 {14400, 1, "AST", "ADT", "Canada/Atlantic"},
812 {14400, 2, "AMT", "AMST", "America/Cuiaba"},
813 {14400, 2, "CLT", "CLST", "Chile/Continental"},
814 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
815 {14400, 2, "PYT", "PYST", "America/Asuncion"},
816 {18000, 1, "CST", "CDT", "America/Havana"},
817 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
818 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
819 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
820 {21600, 0, "CST", "CDT", "America/Guatemala"},
821 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
822 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
823 {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
824 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
825 {32400, 1, "AKST", "AKDT", "US/Alaska"},
826 {36000, 1, "HAST", "HADT", "US/Aleutian"}
827 };
828
829 /*#define DEBUG_TZNAME*/
830
831 static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
832 {
833 int32_t idx;
834 #ifdef DEBUG_TZNAME
835 fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
836 #endif
837 for (idx = 0; idx < LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++)
838 {
839 if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
840 && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
841 && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
842 && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
843 {
844 return OFFSET_ZONE_MAPPINGS[idx].olsonID;
845 }
846 }
847 return NULL;
848 }
849 #endif
850
851 #ifdef SEARCH_TZFILE
852 #define MAX_PATH_SIZE PATH_MAX /* Set the limit for the size of the path. */
853 #define MAX_READ_SIZE 512
854
855 typedef struct DefaultTZInfo {
856 char* defaultTZBuffer;
857 int64_t defaultTZFileSize;
858 FILE* defaultTZFilePtr;
859 UBool defaultTZstatus;
860 int32_t defaultTZPosition;
861 } DefaultTZInfo;
862
863 /*
864 * This method compares the two files given to see if they are a match.
865 * It is currently use to compare two TZ files.
866 */
867 static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
868 FILE* file;
869 int64_t sizeFile;
870 int64_t sizeFileLeft;
871 int32_t sizeFileRead;
872 int32_t sizeFileToRead;
873 char bufferFile[MAX_READ_SIZE];
874 UBool result = TRUE;
875
876 if (tzInfo->defaultTZFilePtr == NULL) {
877 tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
878 }
879 file = fopen(TZFileName, "r");
880
881 tzInfo->defaultTZPosition = 0; /* reset position to begin search */
882
883 if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
884 /* First check that the file size are equal. */
885 if (tzInfo->defaultTZFileSize == 0) {
886 fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
887 tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
888 }
889 fseek(file, 0, SEEK_END);
890 sizeFile = ftell(file);
891 sizeFileLeft = sizeFile;
892
893 if (sizeFile != tzInfo->defaultTZFileSize) {
894 result = FALSE;
895 } else {
896 /* Store the data from the files in seperate buffers and
897 * compare each byte to determine equality.
898 */
899 if (tzInfo->defaultTZBuffer == NULL) {
900 rewind(tzInfo->defaultTZFilePtr);
901 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
902 sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
903 }
904 rewind(file);
905 while(sizeFileLeft > 0) {
906 uprv_memset(bufferFile, 0, MAX_READ_SIZE);
907 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
908
909 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
910 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
911 result = FALSE;
912 break;
913 }
914 sizeFileLeft -= sizeFileRead;
915 tzInfo->defaultTZPosition += sizeFileRead;
916 }
917 }
918 } else {
919 result = FALSE;
920 }
921
922 if (file != NULL) {
923 fclose(file);
924 }
925
926 return result;
927 }
928 /*
929 * This method recursively traverses the directory given for a matching TZ file and returns the first match.
930 */
931 /* dirent also lists two entries: "." and ".." that we can safely ignore. */
932 #define SKIP1 "."
933 #define SKIP2 ".."
934 static char SEARCH_TZFILE_RESULT[MAX_PATH_SIZE] = "";
935 static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
936 char curpath[MAX_PATH_SIZE];
937 DIR* dirp = opendir(path);
938 DIR* subDirp = NULL;
939 struct dirent* dirEntry = NULL;
940
941 char* result = NULL;
942 if (dirp == NULL) {
943 return result;
944 }
945
946 /* Save the current path */
947 uprv_memset(curpath, 0, MAX_PATH_SIZE);
948 uprv_strcpy(curpath, path);
949
950 /* Check each entry in the directory. */
951 while((dirEntry = readdir(dirp)) != NULL) {
952 const char* dirName = dirEntry->d_name;
953 if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) {
954 /* Create a newpath with the new entry to test each entry in the directory. */
955 char newpath[MAX_PATH_SIZE];
956 uprv_strcpy(newpath, curpath);
957 uprv_strcat(newpath, dirName);
958
959 if ((subDirp = opendir(newpath)) != NULL) {
960 /* If this new path is a directory, make a recursive call with the newpath. */
961 closedir(subDirp);
962 uprv_strcat(newpath, "/");
963 result = searchForTZFile(newpath, tzInfo);
964 /*
965 Have to get out here. Otherwise, we'd keep looking
966 and return the first match in the top-level directory
967 if there's a match in the top-level. If not, this function
968 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
969 It worked without this in most cases because we have a fallback of calling
970 localtime_r to figure out the default timezone.
971 */
972 if (result != NULL)
973 break;
974 } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
975 if(compareBinaryFiles(TZDEFAULT, newpath, tzInfo)) {
976 const char* zoneid = newpath + (sizeof(TZZONEINFO)) - 1;
977 skipZoneIDPrefix(&zoneid);
978 uprv_strcpy(SEARCH_TZFILE_RESULT, zoneid);
979 result = SEARCH_TZFILE_RESULT;
980 /* Get out after the first one found. */
981 break;
982 }
983 }
984 }
985 }
986 closedir(dirp);
987 return result;
988 }
989 #endif
990 U_CAPI const char* U_EXPORT2
991 uprv_tzname(int n)
992 {
993 const char *tzid = NULL;
994 #if U_PLATFORM_USES_ONLY_WIN32_API
995 tzid = uprv_detectWindowsTimeZone();
996
997 if (tzid != NULL) {
998 return tzid;
999 }
1000 #else
1001
1002 /*#if U_PLATFORM_IS_DARWIN_BASED
1003 int ret;
1004
1005 tzid = getenv("TZFILE");
1006 if (tzid != NULL) {
1007 return tzid;
1008 }
1009 #endif*/
1010
1011 /* This code can be temporarily disabled to test tzname resolution later on. */
1012 #ifndef DEBUG_TZNAME
1013 tzid = getenv("TZ");
1014 if (tzid != NULL && isValidOlsonID(tzid)
1015 #if U_PLATFORM == U_PF_SOLARIS
1016 /* When TZ equals localtime on Solaris, check the /etc/localtime file. */
1017 && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0
1018 #endif
1019 ) {
1020 /* This might be a good Olson ID. */
1021 skipZoneIDPrefix(&tzid);
1022 return tzid;
1023 }
1024 /* else U_TZNAME will give a better result. */
1025 #endif
1026
1027 #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1028 /* Caller must handle threading issues */
1029 if (gTimeZoneBufferPtr == NULL) {
1030 /*
1031 This is a trick to look at the name of the link to get the Olson ID
1032 because the tzfile contents is underspecified.
1033 This isn't guaranteed to work because it may not be a symlink.
1034 */
1035 int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer));
1036 if (0 < ret) {
1037 int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO);
1038 gTimeZoneBuffer[ret] = 0;
1039 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0
1040 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
1041 {
1042 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
1043 }
1044 } else {
1045 #if defined(SEARCH_TZFILE)
1046 DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
1047 if (tzInfo != NULL) {
1048 tzInfo->defaultTZBuffer = NULL;
1049 tzInfo->defaultTZFileSize = 0;
1050 tzInfo->defaultTZFilePtr = NULL;
1051 tzInfo->defaultTZstatus = FALSE;
1052 tzInfo->defaultTZPosition = 0;
1053
1054 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
1055
1056 /* Free previously allocated memory */
1057 if (tzInfo->defaultTZBuffer != NULL) {
1058 uprv_free(tzInfo->defaultTZBuffer);
1059 }
1060 if (tzInfo->defaultTZFilePtr != NULL) {
1061 fclose(tzInfo->defaultTZFilePtr);
1062 }
1063 uprv_free(tzInfo);
1064 }
1065
1066 if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
1067 return gTimeZoneBufferPtr;
1068 }
1069 #endif
1070 }
1071 }
1072 else {
1073 return gTimeZoneBufferPtr;
1074 }
1075 #endif
1076 #endif
1077
1078 #ifdef U_TZNAME
1079 #if U_PLATFORM_USES_ONLY_WIN32_API
1080 /* The return value is free'd in timezone.cpp on Windows because
1081 * the other code path returns a pointer to a heap location. */
1082 return uprv_strdup(U_TZNAME[n]);
1083 #else
1084 /*
1085 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1086 So we remap the abbreviation to an olson ID.
1087
1088 Since Windows exposes a little more timezone information,
1089 we normally don't use this code on Windows because
1090 uprv_detectWindowsTimeZone should have already given the correct answer.
1091 */
1092 {
1093 struct tm juneSol, decemberSol;
1094 int daylightType;
1095 static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
1096 static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
1097
1098 /* This probing will tell us when daylight savings occurs. */
1099 localtime_r(&juneSolstice, &juneSol);
1100 localtime_r(&decemberSolstice, &decemberSol);
1101 if(decemberSol.tm_isdst > 0) {
1102 daylightType = U_DAYLIGHT_DECEMBER;
1103 } else if(juneSol.tm_isdst > 0) {
1104 daylightType = U_DAYLIGHT_JUNE;
1105 } else {
1106 daylightType = U_DAYLIGHT_NONE;
1107 }
1108 tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
1109 if (tzid != NULL) {
1110 return tzid;
1111 }
1112 }
1113 return U_TZNAME[n];
1114 #endif
1115 #else
1116 return "";
1117 #endif
1118 }
1119
1120 /* Get and set the ICU data directory --------------------------------------- */
1121
1122 static char *gDataDirectory = NULL;
1123 #if U_POSIX_LOCALE
1124 static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
1125 #endif
1126
1127 static UBool U_CALLCONV putil_cleanup(void)
1128 {
1129 if (gDataDirectory && *gDataDirectory) {
1130 uprv_free(gDataDirectory);
1131 }
1132 gDataDirectory = NULL;
1133 #if U_POSIX_LOCALE
1134 if (gCorrectedPOSIXLocale) {
1135 uprv_free(gCorrectedPOSIXLocale);
1136 gCorrectedPOSIXLocale = NULL;
1137 }
1138 #endif
1139 return TRUE;
1140 }
1141
1142 /*
1143 * Set the data directory.
1144 * Make a copy of the passed string, and set the global data dir to point to it.
1145 * TODO: see bug #2849, regarding thread safety.
1146 */
1147 U_CAPI void U_EXPORT2
1148 u_setDataDirectory(const char *directory) {
1149 char *newDataDir;
1150 int32_t length;
1151
1152 if(directory==NULL || *directory==0) {
1153 /* A small optimization to prevent the malloc and copy when the
1154 shared library is used, and this is a way to make sure that NULL
1155 is never returned.
1156 */
1157 newDataDir = (char *)"";
1158 }
1159 else {
1160 length=(int32_t)uprv_strlen(directory);
1161 newDataDir = (char *)uprv_malloc(length + 2);
1162 /* Exit out if newDataDir could not be created. */
1163 if (newDataDir == NULL) {
1164 return;
1165 }
1166 uprv_strcpy(newDataDir, directory);
1167
1168 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1169 {
1170 char *p;
1171 while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
1172 *p = U_FILE_SEP_CHAR;
1173 }
1174 }
1175 #endif
1176 }
1177
1178 umtx_lock(NULL);
1179 if (gDataDirectory && *gDataDirectory) {
1180 uprv_free(gDataDirectory);
1181 }
1182 gDataDirectory = newDataDir;
1183 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1184 umtx_unlock(NULL);
1185 }
1186
1187 U_CAPI UBool U_EXPORT2
1188 uprv_pathIsAbsolute(const char *path)
1189 {
1190 if(!path || !*path) {
1191 return FALSE;
1192 }
1193
1194 if(*path == U_FILE_SEP_CHAR) {
1195 return TRUE;
1196 }
1197
1198 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1199 if(*path == U_FILE_ALT_SEP_CHAR) {
1200 return TRUE;
1201 }
1202 #endif
1203
1204 #if U_PLATFORM_USES_ONLY_WIN32_API
1205 if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1206 ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1207 path[1] == ':' ) {
1208 return TRUE;
1209 }
1210 #endif
1211
1212 return FALSE;
1213 }
1214
1215 /* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1216 until some client wrapper makefiles are updated */
1217 #if U_PLATFORM_IS_DARWIN_BASED && TARGET_IPHONE_SIMULATOR
1218 # if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1219 # define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1220 # endif
1221 #endif
1222
1223 U_CAPI const char * U_EXPORT2
1224 u_getDataDirectory(void) {
1225 const char *path = NULL;
1226 #if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1227 char datadir_path_buffer[PATH_MAX];
1228 #endif
1229
1230 /* if we have the directory, then return it immediately */
1231 UMTX_CHECK(NULL, gDataDirectory, path);
1232
1233 if(path) {
1234 return path;
1235 }
1236
1237 /*
1238 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1239 override ICU's data with the ICU_DATA environment variable. This prevents
1240 problems where multiple custom copies of ICU's specific version of data
1241 are installed on a system. Either the application must define the data
1242 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1243 ICU, set the data with udata_setCommonData or trust that all of the
1244 required data is contained in ICU's data library that contains
1245 the entry point defined by U_ICUDATA_ENTRY_POINT.
1246
1247 There may also be some platforms where environment variables
1248 are not allowed.
1249 */
1250 # if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1251 /* First try to get the environment variable */
1252 path=getenv("ICU_DATA");
1253 # endif
1254
1255 /* ICU_DATA_DIR may be set as a compile option.
1256 * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1257 * and is used only when data is built in archive mode eliminating the need
1258 * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1259 * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1260 * set their own path.
1261 */
1262 #if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
1263 if(path==NULL || *path==0) {
1264 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1265 const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
1266 # endif
1267 # ifdef ICU_DATA_DIR
1268 path=ICU_DATA_DIR;
1269 # else
1270 path=U_ICU_DATA_DEFAULT_DIR;
1271 # endif
1272 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1273 if (prefix != NULL) {
1274 snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path);
1275 path=datadir_path_buffer;
1276 }
1277 # endif
1278 }
1279 #endif
1280
1281 if(path==NULL) {
1282 /* It looks really bad, set it to something. */
1283 path = "";
1284 }
1285
1286 u_setDataDirectory(path);
1287 return gDataDirectory;
1288 }
1289
1290
1291
1292
1293
1294 /* Macintosh-specific locale information ------------------------------------ */
1295 #if U_PLATFORM == U_PF_CLASSIC_MACOS
1296
1297 typedef struct {
1298 int32_t script;
1299 int32_t region;
1300 int32_t lang;
1301 int32_t date_region;
1302 const char* posixID;
1303 } mac_lc_rec;
1304
1305 /* Todo: This will be updated with a newer version from www.unicode.org web
1306 page when it's available.*/
1307 #define MAC_LC_MAGIC_NUMBER -5
1308 #define MAC_LC_INIT_NUMBER -9
1309
1310 static const mac_lc_rec mac_lc_recs[] = {
1311 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US",
1312 /* United States*/
1313 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR",
1314 /* France*/
1315 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB",
1316 /* Great Britain*/
1317 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE",
1318 /* Germany*/
1319 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT",
1320 /* Italy*/
1321 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL",
1322 /* Metherlands*/
1323 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE",
1324 /* French for Belgium or Lxembourg*/
1325 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE",
1326 /* Sweden*/
1327 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK",
1328 /* Denmark*/
1329 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT",
1330 /* Portugal*/
1331 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA",
1332 /* French Canada*/
1333 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS",
1334 /* Israel*/
1335 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP",
1336 /* Japan*/
1337 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU",
1338 /* Australia*/
1339 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE",
1340 /* the Arabic world (?)*/
1341 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI",
1342 /* Finland*/
1343 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH",
1344 /* French for Switzerland*/
1345 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH",
1346 /* German for Switzerland*/
1347 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR",
1348 /* Greece*/
1349 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS",
1350 /* Iceland ===*/
1351 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
1352 /* Malta ===*/
1353 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
1354 /* Cyprus ===*/
1355 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR",
1356 /* Turkey ===*/
1357 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU",
1358 /* Croatian system for Yugoslavia*/
1359 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
1360 /* Hindi system for India*/
1361 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
1362 /* Pakistan*/
1363 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT",
1364 /* Lithuania*/
1365 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL",
1366 /* Poland*/
1367 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU",
1368 /* Hungary*/
1369 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE",
1370 /* Estonia*/
1371 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV",
1372 /* Latvia*/
1373 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
1374 /* Lapland [Ask Rich for the data. HS]*/
1375 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
1376 /* Faeroe Islands*/
1377 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR",
1378 /* Iran*/
1379 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU",
1380 /* Russia*/
1381 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE",
1382 /* Ireland*/
1383 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR",
1384 /* Korea*/
1385 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN",
1386 /* People's Republic of China*/
1387 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW",
1388 /* Taiwan*/
1389 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH",
1390 /* Thailand*/
1391
1392 /* fallback is en_US*/
1393 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER,
1394 MAC_LC_MAGIC_NUMBER, "en_US"
1395 };
1396
1397 #endif
1398
1399 #if U_POSIX_LOCALE
1400 /* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1401 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1402 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1403 */
1404 static const char *uprv_getPOSIXIDForCategory(int category)
1405 {
1406 const char* posixID = NULL;
1407 if (category == LC_MESSAGES || category == LC_CTYPE) {
1408 /*
1409 * On Solaris two different calls to setlocale can result in
1410 * different values. Only get this value once.
1411 *
1412 * We must check this first because an application can set this.
1413 *
1414 * LC_ALL can't be used because it's platform dependent. The LANG
1415 * environment variable seems to affect LC_CTYPE variable by default.
1416 * Here is what setlocale(LC_ALL, NULL) can return.
1417 * HPUX can return 'C C C C C C C'
1418 * Solaris can return /en_US/C/C/C/C/C on the second try.
1419 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1420 *
1421 * The default codepage detection also needs to use LC_CTYPE.
1422 *
1423 * Do not call setlocale(LC_*, "")! Using an empty string instead
1424 * of NULL, will modify the libc behavior.
1425 */
1426 posixID = setlocale(category, NULL);
1427 if ((posixID == 0)
1428 || (uprv_strcmp("C", posixID) == 0)
1429 || (uprv_strcmp("POSIX", posixID) == 0))
1430 {
1431 /* Maybe we got some garbage. Try something more reasonable */
1432 posixID = getenv("LC_ALL");
1433 if (posixID == 0) {
1434 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1435 if (posixID == 0) {
1436 posixID = getenv("LANG");
1437 }
1438 }
1439 }
1440 }
1441 if ((posixID==0)
1442 || (uprv_strcmp("C", posixID) == 0)
1443 || (uprv_strcmp("POSIX", posixID) == 0))
1444 {
1445 /* Nothing worked. Give it a nice POSIX default value. */
1446 posixID = "en_US_POSIX";
1447 }
1448 return posixID;
1449 }
1450
1451 /* Return just the POSIX id for the default locale, whatever happens to be in
1452 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1453 */
1454 static const char *uprv_getPOSIXIDForDefaultLocale(void)
1455 {
1456 static const char* posixID = NULL;
1457 if (posixID == 0) {
1458 posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
1459 }
1460 return posixID;
1461 }
1462
1463 #if !U_CHARSET_IS_UTF8
1464 /* Return just the POSIX id for the default codepage, whatever happens to be in
1465 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1466 */
1467 static const char *uprv_getPOSIXIDForDefaultCodepage(void)
1468 {
1469 static const char* posixID = NULL;
1470 if (posixID == 0) {
1471 posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
1472 }
1473 return posixID;
1474 }
1475 #endif
1476 #endif
1477
1478 /* NOTE: The caller should handle thread safety */
1479 U_CAPI const char* U_EXPORT2
1480 uprv_getDefaultLocaleID()
1481 {
1482 #if U_POSIX_LOCALE
1483 /*
1484 Note that: (a '!' means the ID is improper somehow)
1485 LC_ALL ----> default_loc codepage
1486 --------------------------------------------------------
1487 ab.CD ab CD
1488 ab@CD ab__CD -
1489 ab@CD.EF ab__CD EF
1490
1491 ab_CD.EF@GH ab_CD_GH EF
1492
1493 Some 'improper' ways to do the same as above:
1494 ! ab_CD@GH.EF ab_CD_GH EF
1495 ! ab_CD.EF@GH.IJ ab_CD_GH EF
1496 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
1497
1498 _CD@GH _CD_GH -
1499 _CD.EF@GH _CD_GH EF
1500
1501 The variant cannot have dots in it.
1502 The 'rightmost' variant (@xxx) wins.
1503 The leftmost codepage (.xxx) wins.
1504 */
1505 char *correctedPOSIXLocale = 0;
1506 const char* posixID = uprv_getPOSIXIDForDefaultLocale();
1507 const char *p;
1508 const char *q;
1509 int32_t len;
1510
1511 /* Format: (no spaces)
1512 ll [ _CC ] [ . MM ] [ @ VV]
1513
1514 l = lang, C = ctry, M = charmap, V = variant
1515 */
1516
1517 if (gCorrectedPOSIXLocale != NULL) {
1518 return gCorrectedPOSIXLocale;
1519 }
1520
1521 if ((p = uprv_strchr(posixID, '.')) != NULL) {
1522 /* assume new locale can't be larger than old one? */
1523 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1));
1524 /* Exit on memory allocation error. */
1525 if (correctedPOSIXLocale == NULL) {
1526 return NULL;
1527 }
1528 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1529 correctedPOSIXLocale[p-posixID] = 0;
1530
1531 /* do not copy after the @ */
1532 if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
1533 correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
1534 }
1535 }
1536
1537 /* Note that we scan the *uncorrected* ID. */
1538 if ((p = uprv_strrchr(posixID, '@')) != NULL) {
1539 if (correctedPOSIXLocale == NULL) {
1540 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1));
1541 /* Exit on memory allocation error. */
1542 if (correctedPOSIXLocale == NULL) {
1543 return NULL;
1544 }
1545 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1546 correctedPOSIXLocale[p-posixID] = 0;
1547 }
1548 p++;
1549
1550 /* Take care of any special cases here.. */
1551 if (!uprv_strcmp(p, "nynorsk")) {
1552 p = "NY";
1553 /* Don't worry about no__NY. In practice, it won't appear. */
1554 }
1555
1556 if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
1557 uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
1558 }
1559 else {
1560 uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1561 }
1562
1563 if ((q = uprv_strchr(p, '.')) != NULL) {
1564 /* How big will the resulting string be? */
1565 len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1566 uprv_strncat(correctedPOSIXLocale, p, q-p);
1567 correctedPOSIXLocale[len] = 0;
1568 }
1569 else {
1570 /* Anything following the @ sign */
1571 uprv_strcat(correctedPOSIXLocale, p);
1572 }
1573
1574 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1575 * How about 'russian' -> 'ru'?
1576 * Many of the other locales using ISO codes will be handled by the
1577 * canonicalization functions in uloc_getDefault.
1578 */
1579 }
1580
1581 /* Was a correction made? */
1582 if (correctedPOSIXLocale != NULL) {
1583 posixID = correctedPOSIXLocale;
1584 }
1585 else {
1586 /* copy it, just in case the original pointer goes away. See j2395 */
1587 correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
1588 /* Exit on memory allocation error. */
1589 if (correctedPOSIXLocale == NULL) {
1590 return NULL;
1591 }
1592 posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
1593 }
1594
1595 if (gCorrectedPOSIXLocale == NULL) {
1596 gCorrectedPOSIXLocale = correctedPOSIXLocale;
1597 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1598 correctedPOSIXLocale = NULL;
1599 }
1600
1601 if (correctedPOSIXLocale != NULL) { /* Was already set - clean up. */
1602 uprv_free(correctedPOSIXLocale);
1603 }
1604
1605 return posixID;
1606
1607 #elif U_PLATFORM_USES_ONLY_WIN32_API
1608 UErrorCode status = U_ZERO_ERROR;
1609 LCID id = GetThreadLocale();
1610 const char* locID = uprv_convertToPosix(id, &status);
1611
1612 if (U_FAILURE(status)) {
1613 locID = "en_US";
1614 }
1615 return locID;
1616
1617 #elif U_PLATFORM == U_PF_CLASSIC_MACOS
1618 int32_t script = MAC_LC_INIT_NUMBER;
1619 /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
1620 int32_t region = MAC_LC_INIT_NUMBER;
1621 /* = GetScriptManagerVariable(smRegionCode);*/
1622 int32_t lang = MAC_LC_INIT_NUMBER;
1623 /* = GetScriptManagerVariable(smScriptLang);*/
1624 int32_t date_region = MAC_LC_INIT_NUMBER;
1625 const char* posixID = 0;
1626 int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec);
1627 int32_t i;
1628 Intl1Hndl ih;
1629
1630 ih = (Intl1Hndl) GetIntlResource(1);
1631 if (ih)
1632 date_region = ((uint16_t)(*ih)->intl1Vers) >> 8;
1633
1634 for (i = 0; i < count; i++) {
1635 if ( ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER)
1636 || (mac_lc_recs[i].script == script))
1637 && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER)
1638 || (mac_lc_recs[i].region == region))
1639 && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER)
1640 || (mac_lc_recs[i].lang == lang))
1641 && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER)
1642 || (mac_lc_recs[i].date_region == date_region))
1643 )
1644 {
1645 posixID = mac_lc_recs[i].posixID;
1646 break;
1647 }
1648 }
1649
1650 return posixID;
1651
1652 #elif U_PLATFORM == U_PF_OS400
1653 /* locales are process scoped and are by definition thread safe */
1654 static char correctedLocale[64];
1655 const char *localeID = getenv("LC_ALL");
1656 char *p;
1657
1658 if (localeID == NULL)
1659 localeID = getenv("LANG");
1660 if (localeID == NULL)
1661 localeID = setlocale(LC_ALL, NULL);
1662 /* Make sure we have something... */
1663 if (localeID == NULL)
1664 return "en_US_POSIX";
1665
1666 /* Extract the locale name from the path. */
1667 if((p = uprv_strrchr(localeID, '/')) != NULL)
1668 {
1669 /* Increment p to start of locale name. */
1670 p++;
1671 localeID = p;
1672 }
1673
1674 /* Copy to work location. */
1675 uprv_strcpy(correctedLocale, localeID);
1676
1677 /* Strip off the '.locale' extension. */
1678 if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1679 *p = 0;
1680 }
1681
1682 /* Upper case the locale name. */
1683 T_CString_toUpperCase(correctedLocale);
1684
1685 /* See if we are using the POSIX locale. Any of the
1686 * following are equivalent and use the same QLGPGCMA
1687 * (POSIX) locale.
1688 * QLGPGCMA2 means UCS2
1689 * QLGPGCMA_4 means UTF-32
1690 * QLGPGCMA_8 means UTF-8
1691 */
1692 if ((uprv_strcmp("C", correctedLocale) == 0) ||
1693 (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1694 (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1695 {
1696 uprv_strcpy(correctedLocale, "en_US_POSIX");
1697 }
1698 else
1699 {
1700 int16_t LocaleLen;
1701
1702 /* Lower case the lang portion. */
1703 for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1704 {
1705 *p = uprv_tolower(*p);
1706 }
1707
1708 /* Adjust for Euro. After '_E' add 'URO'. */
1709 LocaleLen = uprv_strlen(correctedLocale);
1710 if (correctedLocale[LocaleLen - 2] == '_' &&
1711 correctedLocale[LocaleLen - 1] == 'E')
1712 {
1713 uprv_strcat(correctedLocale, "URO");
1714 }
1715
1716 /* If using Lotus-based locale then convert to
1717 * equivalent non Lotus.
1718 */
1719 else if (correctedLocale[LocaleLen - 2] == '_' &&
1720 correctedLocale[LocaleLen - 1] == 'L')
1721 {
1722 correctedLocale[LocaleLen - 2] = 0;
1723 }
1724
1725 /* There are separate simplified and traditional
1726 * locales called zh_HK_S and zh_HK_T.
1727 */
1728 else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1729 {
1730 uprv_strcpy(correctedLocale, "zh_HK");
1731 }
1732
1733 /* A special zh_CN_GBK locale...
1734 */
1735 else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1736 {
1737 uprv_strcpy(correctedLocale, "zh_CN");
1738 }
1739
1740 }
1741
1742 return correctedLocale;
1743 #endif
1744
1745 }
1746
1747 #if !U_CHARSET_IS_UTF8
1748 #if U_POSIX_LOCALE
1749 /*
1750 Due to various platform differences, one platform may specify a charset,
1751 when they really mean a different charset. Remap the names so that they are
1752 compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1753 here. Before adding anything to this function, please consider adding unique
1754 names to the ICU alias table in the data directory.
1755 */
1756 static const char*
1757 remapPlatformDependentCodepage(const char *locale, const char *name) {
1758 if (locale != NULL && *locale == 0) {
1759 /* Make sure that an empty locale is handled the same way. */
1760 locale = NULL;
1761 }
1762 if (name == NULL) {
1763 return NULL;
1764 }
1765 #if U_PLATFORM == U_PF_AIX
1766 if (uprv_strcmp(name, "IBM-943") == 0) {
1767 /* Use the ASCII compatible ibm-943 */
1768 name = "Shift-JIS";
1769 }
1770 else if (uprv_strcmp(name, "IBM-1252") == 0) {
1771 /* Use the windows-1252 that contains the Euro */
1772 name = "IBM-5348";
1773 }
1774 #elif U_PLATFORM == U_PF_SOLARIS
1775 if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1776 /* Solaris underspecifies the "EUC" name. */
1777 if (uprv_strcmp(locale, "zh_CN") == 0) {
1778 name = "EUC-CN";
1779 }
1780 else if (uprv_strcmp(locale, "zh_TW") == 0) {
1781 name = "EUC-TW";
1782 }
1783 else if (uprv_strcmp(locale, "ko_KR") == 0) {
1784 name = "EUC-KR";
1785 }
1786 }
1787 else if (uprv_strcmp(name, "eucJP") == 0) {
1788 /*
1789 ibm-954 is the best match.
1790 ibm-33722 is the default for eucJP (similar to Windows).
1791 */
1792 name = "eucjis";
1793 }
1794 else if (uprv_strcmp(name, "646") == 0) {
1795 /*
1796 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
1797 * ISO-8859-1 instead of US-ASCII(646).
1798 */
1799 name = "ISO-8859-1";
1800 }
1801 #elif U_PLATFORM_IS_DARWIN_BASED
1802 if (locale == NULL && *name == 0) {
1803 /*
1804 No locale was specified, and an empty name was passed in.
1805 This usually indicates that nl_langinfo didn't return valid information.
1806 Mac OS X uses UTF-8 by default (especially the locale data and console).
1807 */
1808 name = "UTF-8";
1809 }
1810 else if (uprv_strcmp(name, "CP949") == 0) {
1811 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1812 name = "EUC-KR";
1813 }
1814 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
1815 /*
1816 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1817 */
1818 name = "UTF-8";
1819 }
1820 #elif U_PLATFORM == U_PF_BSD
1821 if (uprv_strcmp(name, "CP949") == 0) {
1822 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1823 name = "EUC-KR";
1824 }
1825 #elif U_PLATFORM == U_PF_HPUX
1826 if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
1827 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
1828 /* zh_TW.big5 is not the same charset as zh_HK.big5! */
1829 name = "hkbig5";
1830 }
1831 else if (uprv_strcmp(name, "eucJP") == 0) {
1832 /*
1833 ibm-1350 is the best match, but unavailable.
1834 ibm-954 is mostly a superset of ibm-1350.
1835 ibm-33722 is the default for eucJP (similar to Windows).
1836 */
1837 name = "eucjis";
1838 }
1839 #elif U_PLATFORM == U_PF_LINUX
1840 if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
1841 /* Linux underspecifies the "EUC" name. */
1842 if (uprv_strcmp(locale, "korean") == 0) {
1843 name = "EUC-KR";
1844 }
1845 else if (uprv_strcmp(locale, "japanese") == 0) {
1846 /* See comment below about eucJP */
1847 name = "eucjis";
1848 }
1849 }
1850 else if (uprv_strcmp(name, "eucjp") == 0) {
1851 /*
1852 ibm-1350 is the best match, but unavailable.
1853 ibm-954 is mostly a superset of ibm-1350.
1854 ibm-33722 is the default for eucJP (similar to Windows).
1855 */
1856 name = "eucjis";
1857 }
1858 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
1859 (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
1860 /*
1861 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1862 */
1863 name = "UTF-8";
1864 }
1865 /*
1866 * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
1867 * it by falling back to 'US-ASCII' when NULL is returned from this
1868 * function. So, we don't have to worry about it here.
1869 */
1870 #endif
1871 /* return NULL when "" is passed in */
1872 if (*name == 0) {
1873 name = NULL;
1874 }
1875 return name;
1876 }
1877
1878 static const char*
1879 getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
1880 {
1881 char localeBuf[100];
1882 const char *name = NULL;
1883 char *variant = NULL;
1884
1885 if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
1886 size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
1887 uprv_strncpy(localeBuf, localeName, localeCapacity);
1888 localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
1889 name = uprv_strncpy(buffer, name+1, buffCapacity);
1890 buffer[buffCapacity-1] = 0; /* ensure NULL termination */
1891 if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != NULL) {
1892 *variant = 0;
1893 }
1894 name = remapPlatformDependentCodepage(localeBuf, name);
1895 }
1896 return name;
1897 }
1898 #endif
1899
1900 static const char*
1901 int_getDefaultCodepage()
1902 {
1903 #if U_PLATFORM == U_PF_OS400
1904 uint32_t ccsid = 37; /* Default to ibm-37 */
1905 static char codepage[64];
1906 Qwc_JOBI0400_t jobinfo;
1907 Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
1908
1909 EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
1910 "* ", " ", &error);
1911
1912 if (error.Bytes_Available == 0) {
1913 if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
1914 ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
1915 }
1916 else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
1917 ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
1918 }
1919 /* else use the default */
1920 }
1921 sprintf(codepage,"ibm-%d", ccsid);
1922 return codepage;
1923
1924 #elif U_PLATFORM == U_PF_OS390
1925 static char codepage[64];
1926
1927 strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
1928 strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
1929 codepage[63] = 0; /* NULL terminate */
1930
1931 return codepage;
1932
1933 #elif U_PLATFORM == U_PF_CLASSIC_MACOS
1934 return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
1935
1936 #elif U_PLATFORM_USES_ONLY_WIN32_API
1937 static char codepage[64];
1938 sprintf(codepage, "windows-%d", GetACP());
1939 return codepage;
1940
1941 #elif U_POSIX_LOCALE
1942 static char codesetName[100];
1943 const char *localeName = NULL;
1944 const char *name = NULL;
1945
1946 localeName = uprv_getPOSIXIDForDefaultCodepage();
1947 uprv_memset(codesetName, 0, sizeof(codesetName));
1948 #if U_HAVE_NL_LANGINFO_CODESET
1949 /* When available, check nl_langinfo first because it usually gives more
1950 useful names. It depends on LC_CTYPE.
1951 nl_langinfo may use the same buffer as setlocale. */
1952 {
1953 const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
1954 #if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED
1955 /*
1956 * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
1957 * instead of ASCII.
1958 */
1959 if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
1960 codeset = remapPlatformDependentCodepage(localeName, codeset);
1961 } else
1962 #endif
1963 {
1964 codeset = remapPlatformDependentCodepage(NULL, codeset);
1965 }
1966
1967 if (codeset != NULL) {
1968 uprv_strncpy(codesetName, codeset, sizeof(codesetName));
1969 codesetName[sizeof(codesetName)-1] = 0;
1970 return codesetName;
1971 }
1972 }
1973 #endif
1974
1975 /* Use setlocale in a nice way, and then check some environment variables.
1976 Maybe the application used setlocale already.
1977 */
1978 uprv_memset(codesetName, 0, sizeof(codesetName));
1979 name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
1980 if (name) {
1981 /* if we can find the codeset name from setlocale, return that. */
1982 return name;
1983 }
1984
1985 if (*codesetName == 0)
1986 {
1987 /* Everything failed. Return US ASCII (ISO 646). */
1988 (void)uprv_strcpy(codesetName, "US-ASCII");
1989 }
1990 return codesetName;
1991 #else
1992 return "US-ASCII";
1993 #endif
1994 }
1995
1996
1997 U_CAPI const char* U_EXPORT2
1998 uprv_getDefaultCodepage()
1999 {
2000 static char const *name = NULL;
2001 umtx_lock(NULL);
2002 if (name == NULL) {
2003 name = int_getDefaultCodepage();
2004 }
2005 umtx_unlock(NULL);
2006 return name;
2007 }
2008 #endif /* !U_CHARSET_IS_UTF8 */
2009
2010
2011 /* end of platform-specific implementation -------------- */
2012
2013 /* version handling --------------------------------------------------------- */
2014
2015 U_CAPI void U_EXPORT2
2016 u_versionFromString(UVersionInfo versionArray, const char *versionString) {
2017 char *end;
2018 uint16_t part=0;
2019
2020 if(versionArray==NULL) {
2021 return;
2022 }
2023
2024 if(versionString!=NULL) {
2025 for(;;) {
2026 versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
2027 if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
2028 break;
2029 }
2030 versionString=end+1;
2031 }
2032 }
2033
2034 while(part<U_MAX_VERSION_LENGTH) {
2035 versionArray[part++]=0;
2036 }
2037 }
2038
2039 U_CAPI void U_EXPORT2
2040 u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
2041 if(versionArray!=NULL && versionString!=NULL) {
2042 char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
2043 int32_t len = u_strlen(versionString);
2044 if(len>U_MAX_VERSION_STRING_LENGTH) {
2045 len = U_MAX_VERSION_STRING_LENGTH;
2046 }
2047 u_UCharsToChars(versionString, versionChars, len);
2048 versionChars[len]=0;
2049 u_versionFromString(versionArray, versionChars);
2050 }
2051 }
2052
2053 U_CAPI void U_EXPORT2
2054 u_versionToString(const UVersionInfo versionArray, char *versionString) {
2055 uint16_t count, part;
2056 uint8_t field;
2057
2058 if(versionString==NULL) {
2059 return;
2060 }
2061
2062 if(versionArray==NULL) {
2063 versionString[0]=0;
2064 return;
2065 }
2066
2067 /* count how many fields need to be written */
2068 for(count=4; count>0 && versionArray[count-1]==0; --count) {
2069 }
2070
2071 if(count <= 1) {
2072 count = 2;
2073 }
2074
2075 /* write the first part */
2076 /* write the decimal field value */
2077 field=versionArray[0];
2078 if(field>=100) {
2079 *versionString++=(char)('0'+field/100);
2080 field%=100;
2081 }
2082 if(field>=10) {
2083 *versionString++=(char)('0'+field/10);
2084 field%=10;
2085 }
2086 *versionString++=(char)('0'+field);
2087
2088 /* write the following parts */
2089 for(part=1; part<count; ++part) {
2090 /* write a dot first */
2091 *versionString++=U_VERSION_DELIMITER;
2092
2093 /* write the decimal field value */
2094 field=versionArray[part];
2095 if(field>=100) {
2096 *versionString++=(char)('0'+field/100);
2097 field%=100;
2098 }
2099 if(field>=10) {
2100 *versionString++=(char)('0'+field/10);
2101 field%=10;
2102 }
2103 *versionString++=(char)('0'+field);
2104 }
2105
2106 /* NUL-terminate */
2107 *versionString=0;
2108 }
2109
2110 U_CAPI void U_EXPORT2
2111 u_getVersion(UVersionInfo versionArray) {
2112 u_versionFromString(versionArray, U_ICU_VERSION);
2113 }
2114
2115 /**
2116 * icucfg.h dependent code
2117 */
2118
2119 #if U_ENABLE_DYLOAD
2120
2121 #if HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
2122
2123 #if HAVE_DLFCN_H
2124
2125 #ifdef __MVS__
2126 #ifndef __SUSV3
2127 #define __SUSV3 1
2128 #endif
2129 #endif
2130 #include <dlfcn.h>
2131 #endif
2132
2133 U_INTERNAL void * U_EXPORT2
2134 uprv_dl_open(const char *libName, UErrorCode *status) {
2135 void *ret = NULL;
2136 if(U_FAILURE(*status)) return ret;
2137 ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
2138 if(ret==NULL) {
2139 #ifdef U_TRACE_DYLOAD
2140 printf("dlerror on dlopen(%s): %s\n", libName, dlerror());
2141 #endif
2142 *status = U_MISSING_RESOURCE_ERROR;
2143 }
2144 return ret;
2145 }
2146
2147 U_INTERNAL void U_EXPORT2
2148 uprv_dl_close(void *lib, UErrorCode *status) {
2149 if(U_FAILURE(*status)) return;
2150 dlclose(lib);
2151 }
2152
2153 U_INTERNAL UVoidFunction* U_EXPORT2
2154 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2155 union {
2156 UVoidFunction *fp;
2157 void *vp;
2158 } uret;
2159 uret.fp = NULL;
2160 if(U_FAILURE(*status)) return uret.fp;
2161 uret.vp = dlsym(lib, sym);
2162 if(uret.vp == NULL) {
2163 #ifdef U_TRACE_DYLOAD
2164 printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror());
2165 #endif
2166 *status = U_MISSING_RESOURCE_ERROR;
2167 }
2168 return uret.fp;
2169 }
2170
2171 #else
2172
2173 /* null (nonexistent) implementation. */
2174
2175 U_INTERNAL void * U_EXPORT2
2176 uprv_dl_open(const char *libName, UErrorCode *status) {
2177 if(U_FAILURE(*status)) return NULL;
2178 *status = U_UNSUPPORTED_ERROR;
2179 return NULL;
2180 }
2181
2182 U_INTERNAL void U_EXPORT2
2183 uprv_dl_close(void *lib, UErrorCode *status) {
2184 if(U_FAILURE(*status)) return;
2185 *status = U_UNSUPPORTED_ERROR;
2186 return;
2187 }
2188
2189
2190 U_INTERNAL UVoidFunction* U_EXPORT2
2191 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2192 if(U_SUCCESS(*status)) {
2193 *status = U_UNSUPPORTED_ERROR;
2194 }
2195 return (UVoidFunction*)NULL;
2196 }
2197
2198
2199
2200 #endif
2201
2202 #elif U_PLATFORM_USES_ONLY_WIN32_API
2203
2204 U_INTERNAL void * U_EXPORT2
2205 uprv_dl_open(const char *libName, UErrorCode *status) {
2206 HMODULE lib = NULL;
2207
2208 if(U_FAILURE(*status)) return NULL;
2209
2210 lib = LoadLibraryA(libName);
2211
2212 if(lib==NULL) {
2213 *status = U_MISSING_RESOURCE_ERROR;
2214 }
2215
2216 return (void*)lib;
2217 }
2218
2219 U_INTERNAL void U_EXPORT2
2220 uprv_dl_close(void *lib, UErrorCode *status) {
2221 HMODULE handle = (HMODULE)lib;
2222 if(U_FAILURE(*status)) return;
2223
2224 FreeLibrary(handle);
2225
2226 return;
2227 }
2228
2229
2230 U_INTERNAL UVoidFunction* U_EXPORT2
2231 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2232 HMODULE handle = (HMODULE)lib;
2233 UVoidFunction* addr = NULL;
2234
2235 if(U_FAILURE(*status) || lib==NULL) return NULL;
2236
2237 addr = (UVoidFunction*)GetProcAddress(handle, sym);
2238
2239 if(addr==NULL) {
2240 DWORD lastError = GetLastError();
2241 if(lastError == ERROR_PROC_NOT_FOUND) {
2242 *status = U_MISSING_RESOURCE_ERROR;
2243 } else {
2244 *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
2245 }
2246 }
2247
2248 return addr;
2249 }
2250
2251
2252 #else
2253
2254 /* No dynamic loading set. */
2255
2256 U_INTERNAL void * U_EXPORT2
2257 uprv_dl_open(const char *libName, UErrorCode *status) {
2258 if(U_FAILURE(*status)) return NULL;
2259 *status = U_UNSUPPORTED_ERROR;
2260 return NULL;
2261 }
2262
2263 U_INTERNAL void U_EXPORT2
2264 uprv_dl_close(void *lib, UErrorCode *status) {
2265 if(U_FAILURE(*status)) return;
2266 *status = U_UNSUPPORTED_ERROR;
2267 return;
2268 }
2269
2270
2271 U_INTERNAL UVoidFunction* U_EXPORT2
2272 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2273 if(U_SUCCESS(*status)) {
2274 *status = U_UNSUPPORTED_ERROR;
2275 }
2276 return (UVoidFunction*)NULL;
2277 }
2278
2279 #endif /* U_ENABLE_DYLOAD */
2280
2281 /*
2282 * Hey, Emacs, please set the following:
2283 *
2284 * Local Variables:
2285 * indent-tabs-mode: nil
2286 * End:
2287 *
2288 */