]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/putil.c
ICU-400.38.tar.gz
[apple/icu.git] / icuSources / common / putil.c
1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 1997-2008, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 *
9 * FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
10 *
11 * Date Name Description
12 * 04/14/97 aliu Creation.
13 * 04/24/97 aliu Added getDefaultDataDirectory() and
14 * getDefaultLocaleID().
15 * 04/28/97 aliu Rewritten to assume Unix and apply general methods
16 * for assumed case. Non-UNIX platforms must be
17 * special-cased. Rewrote numeric methods dealing
18 * with NaN and Infinity to be platform independent
19 * over all IEEE 754 platforms.
20 * 05/13/97 aliu Restored sign of timezone
21 * (semantics are hours West of GMT)
22 * 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
23 * nextDouble..
24 * 07/22/98 stephen Added remainder, max, min, trunc
25 * 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
26 * 08/24/98 stephen Added longBitsFromDouble
27 * 09/08/98 stephen Minor changes for Mac Port
28 * 03/02/99 stephen Removed openFile(). Added AS400 support.
29 * Fixed EBCDIC tables
30 * 04/15/99 stephen Converted to C.
31 * 06/28/99 stephen Removed mutex locking in u_isBigEndian().
32 * 08/04/99 jeffrey R. Added OS/2 changes
33 * 11/15/99 helena Integrated S/390 IEEE support.
34 * 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
35 * 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
36 * 01/03/08 Steven L. Fake Time Support
37 ******************************************************************************
38 */
39
40 /* Define _XOPEN_SOURCE for Solaris and friends. */
41 /* NetBSD needs it to be >= 4 */
42 #if !defined(_XOPEN_SOURCE)
43 #if __STDC_VERSION__ >= 199901L
44 /* It is invalid to compile an XPG3, XPG4, XPG4v2 or XPG5 application using c99 on Solaris */
45 #define _XOPEN_SOURCE 600
46 #else
47 #define _XOPEN_SOURCE 4
48 #endif
49 #endif
50
51 /* Make sure things like readlink and such functions work.
52 Poorly upgraded Solaris machines can't have this defined.
53 Cleanly installed Solaris can use this #define.
54 */
55 #if !defined(_XOPEN_SOURCE_EXTENDED) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ >= 199901L)
56 #define _XOPEN_SOURCE_EXTENDED 1
57 #endif
58
59 /* include ICU headers */
60 #include "unicode/utypes.h"
61 #include "unicode/putil.h"
62 #include "unicode/ustring.h"
63 #include "putilimp.h"
64 #include "uassert.h"
65 #include "umutex.h"
66 #include "cmemory.h"
67 #include "cstring.h"
68 #include "locmap.h"
69 #include "ucln_cmn.h"
70
71 /* Include standard headers. */
72 #include <stdio.h>
73 #include <stdlib.h>
74 #include <string.h>
75 #include <math.h>
76 #include <locale.h>
77 #include <float.h>
78 #include <time.h>
79
80 /* include system headers */
81 #ifdef U_WINDOWS
82 # define WIN32_LEAN_AND_MEAN
83 # define VC_EXTRALEAN
84 # define NOUSER
85 # define NOSERVICE
86 # define NOIME
87 # define NOMCX
88 # include <windows.h>
89 # include "wintz.h"
90 #elif defined(U_CYGWIN) && defined(__STRICT_ANSI__)
91 /* tzset isn't defined in strict ANSI on Cygwin. */
92 # undef __STRICT_ANSI__
93 #elif defined(OS400)
94 # include <float.h>
95 # include <qusec.h> /* error code structure */
96 # include <qusrjobi.h>
97 # include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
98 # include <mih/testptr.h> /* For uprv_maximumPtr */
99 #elif defined(XP_MAC)
100 # include <Files.h>
101 # include <IntlResources.h>
102 # include <Script.h>
103 # include <Folders.h>
104 # include <MacTypes.h>
105 # include <TextUtils.h>
106 # define ICU_NO_USER_DATA_OVERRIDE 1
107 #elif defined(OS390)
108 #include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
109 #elif defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD)
110 #include <limits.h>
111 #include <unistd.h>
112 #elif defined(U_QNX)
113 #include <sys/neutrino.h>
114 #endif
115
116 #if defined(U_DARWIN)
117 #include <TargetConditionals.h>
118 #endif
119
120 #ifndef U_WINDOWS
121 #include <sys/time.h>
122 #endif
123
124 /*
125 * Only include langinfo.h if we have a way to get the codeset. If we later
126 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
127 *
128 */
129
130 #if U_HAVE_NL_LANGINFO_CODESET
131 #include <langinfo.h>
132 #endif
133
134 /* Define the extension for data files, again... */
135 #define DATA_TYPE "dat"
136
137 /* Leave this copyright notice here! */
138 static const char copyright[] = U_COPYRIGHT_STRING;
139
140 /* floating point implementations ------------------------------------------- */
141
142 /* We return QNAN rather than SNAN*/
143 #define SIGN 0x80000000U
144
145 /* Make it easy to define certain types of constants */
146 typedef union {
147 int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
148 double d64;
149 } BitPatternConversion;
150 static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
151 static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
152
153 /*---------------------------------------------------------------------------
154 Platform utilities
155 Our general strategy is to assume we're on a POSIX platform. Platforms which
156 are non-POSIX must declare themselves so. The default POSIX implementation
157 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
158 functions).
159 ---------------------------------------------------------------------------*/
160
161 #if defined(U_WINDOWS) || defined(XP_MAC) || defined(OS400)
162 # undef U_POSIX_LOCALE
163 #else
164 # define U_POSIX_LOCALE 1
165 #endif
166
167 /*
168 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
169 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
170 */
171 #if !IEEE_754
172 static char*
173 u_topNBytesOfDouble(double* d, int n)
174 {
175 #if U_IS_BIG_ENDIAN
176 return (char*)d;
177 #else
178 return (char*)(d + 1) - n;
179 #endif
180 }
181 #endif
182
183 static char*
184 u_bottomNBytesOfDouble(double* d, int n)
185 {
186 #if U_IS_BIG_ENDIAN
187 return (char*)(d + 1) - n;
188 #else
189 return (char*)d;
190 #endif
191 }
192
193 #if defined (U_DEBUG_FAKETIME)
194 /* Override the clock to test things without having to move the system clock.
195 * Assumes POSIX gettimeofday() will function
196 */
197 UDate fakeClock_t0 = 0; /** Time to start the clock from **/
198 UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
199 UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
200 static UMTX fakeClockMutex = NULL;
201
202 static UDate getUTCtime_real() {
203 struct timeval posixTime;
204 gettimeofday(&posixTime, NULL);
205 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
206 }
207
208 static UDate getUTCtime_fake() {
209 umtx_lock(&fakeClockMutex);
210 if(!fakeClock_set) {
211 UDate real = getUTCtime_real();
212 const char *fake_start = getenv("U_FAKETIME_START");
213 if(fake_start!=NULL) {
214 sscanf(fake_start,"%lf",&fakeClock_t0);
215 }
216 fakeClock_dt = fakeClock_t0 - real;
217 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
218 "U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
219 fakeClock_t0, fake_start, fakeClock_dt, real);
220 fakeClock_set = TRUE;
221 }
222 umtx_unlock(&fakeClockMutex);
223
224 return getUTCtime_real() + fakeClock_dt;
225 }
226 #endif
227
228 #if defined(U_WINDOWS)
229 typedef union {
230 int64_t int64;
231 FILETIME fileTime;
232 } FileTimeConversion; /* This is like a ULARGE_INTEGER */
233
234 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
235 #define EPOCH_BIAS INT64_C(116444736000000000)
236 #define HECTONANOSECOND_PER_MILLISECOND 10000
237
238 #endif
239
240 /*---------------------------------------------------------------------------
241 Universal Implementations
242 These are designed to work on all platforms. Try these, and if they
243 don't work on your platform, then special case your platform with new
244 implementations.
245 ---------------------------------------------------------------------------*/
246
247 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
248 U_CAPI UDate U_EXPORT2
249 uprv_getUTCtime()
250 {
251 #if defined(U_DEBUG_FAKETIME)
252 return getUTCtime_fake(); /* Hook for overriding the clock */
253 #elif defined(XP_MAC)
254 time_t t, t1, t2;
255 struct tm tmrec;
256
257 uprv_memset( &tmrec, 0, sizeof(tmrec) );
258 tmrec.tm_year = 70;
259 tmrec.tm_mon = 0;
260 tmrec.tm_mday = 1;
261 t1 = mktime(&tmrec); /* seconds of 1/1/1970*/
262
263 time(&t);
264 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
265 t2 = mktime(&tmrec); /* seconds of current GMT*/
266 return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND; /* GMT (or UTC) in seconds since 1970*/
267 #elif defined(U_WINDOWS)
268
269 FileTimeConversion winTime;
270 GetSystemTimeAsFileTime(&winTime.fileTime);
271 return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
272 #else
273 /*
274 struct timeval posixTime;
275 gettimeofday(&posixTime, NULL);
276 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
277 */
278 time_t epochtime;
279 time(&epochtime);
280 return (UDate)epochtime * U_MILLIS_PER_SECOND;
281 #endif
282 }
283
284 /*-----------------------------------------------------------------------------
285 IEEE 754
286 These methods detect and return NaN and infinity values for doubles
287 conforming to IEEE 754. Platforms which support this standard include X86,
288 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
289 If this doesn't work on your platform, you have non-IEEE floating-point, and
290 will need to code your own versions. A naive implementation is to return 0.0
291 for getNaN and getInfinity, and false for isNaN and isInfinite.
292 ---------------------------------------------------------------------------*/
293
294 U_CAPI UBool U_EXPORT2
295 uprv_isNaN(double number)
296 {
297 #if IEEE_754
298 BitPatternConversion convertedNumber;
299 convertedNumber.d64 = number;
300 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
301 return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
302
303 #elif defined(OS390)
304 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
305 sizeof(uint32_t));
306 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
307 sizeof(uint32_t));
308
309 return ((highBits & 0x7F080000L) == 0x7F080000L) &&
310 (lowBits == 0x00000000L);
311
312 #else
313 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
314 /* you'll need to replace this default implementation with what's correct*/
315 /* for your platform.*/
316 return number != number;
317 #endif
318 }
319
320 U_CAPI UBool U_EXPORT2
321 uprv_isInfinite(double number)
322 {
323 #if IEEE_754
324 BitPatternConversion convertedNumber;
325 convertedNumber.d64 = number;
326 /* Infinity is exactly 0x7FF0000000000000U. */
327 return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
328 #elif defined(OS390)
329 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
330 sizeof(uint32_t));
331 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
332 sizeof(uint32_t));
333
334 return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
335
336 #else
337 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
338 /* value, you'll need to replace this default implementation with what's*/
339 /* correct for your platform.*/
340 return number == (2.0 * number);
341 #endif
342 }
343
344 U_CAPI UBool U_EXPORT2
345 uprv_isPositiveInfinity(double number)
346 {
347 #if IEEE_754 || defined(OS390)
348 return (UBool)(number > 0 && uprv_isInfinite(number));
349 #else
350 return uprv_isInfinite(number);
351 #endif
352 }
353
354 U_CAPI UBool U_EXPORT2
355 uprv_isNegativeInfinity(double number)
356 {
357 #if IEEE_754 || defined(OS390)
358 return (UBool)(number < 0 && uprv_isInfinite(number));
359
360 #else
361 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
362 sizeof(uint32_t));
363 return((highBits & SIGN) && uprv_isInfinite(number));
364
365 #endif
366 }
367
368 U_CAPI double U_EXPORT2
369 uprv_getNaN()
370 {
371 #if IEEE_754 || defined(OS390)
372 return gNan.d64;
373 #else
374 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
375 /* you'll need to replace this default implementation with what's correct*/
376 /* for your platform.*/
377 return 0.0;
378 #endif
379 }
380
381 U_CAPI double U_EXPORT2
382 uprv_getInfinity()
383 {
384 #if IEEE_754 || defined(OS390)
385 return gInf.d64;
386 #else
387 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
388 /* value, you'll need to replace this default implementation with what's*/
389 /* correct for your platform.*/
390 return 0.0;
391 #endif
392 }
393
394 U_CAPI double U_EXPORT2
395 uprv_floor(double x)
396 {
397 return floor(x);
398 }
399
400 U_CAPI double U_EXPORT2
401 uprv_ceil(double x)
402 {
403 return ceil(x);
404 }
405
406 U_CAPI double U_EXPORT2
407 uprv_round(double x)
408 {
409 return uprv_floor(x + 0.5);
410 }
411
412 U_CAPI double U_EXPORT2
413 uprv_fabs(double x)
414 {
415 return fabs(x);
416 }
417
418 U_CAPI double U_EXPORT2
419 uprv_modf(double x, double* y)
420 {
421 return modf(x, y);
422 }
423
424 U_CAPI double U_EXPORT2
425 uprv_fmod(double x, double y)
426 {
427 return fmod(x, y);
428 }
429
430 U_CAPI double U_EXPORT2
431 uprv_pow(double x, double y)
432 {
433 /* This is declared as "double pow(double x, double y)" */
434 return pow(x, y);
435 }
436
437 U_CAPI double U_EXPORT2
438 uprv_pow10(int32_t x)
439 {
440 return pow(10.0, (double)x);
441 }
442
443 U_CAPI double U_EXPORT2
444 uprv_fmax(double x, double y)
445 {
446 #if IEEE_754
447 int32_t lowBits;
448
449 /* first handle NaN*/
450 if(uprv_isNaN(x) || uprv_isNaN(y))
451 return uprv_getNaN();
452
453 /* check for -0 and 0*/
454 lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&x, sizeof(uint32_t));
455 if(x == 0.0 && y == 0.0 && (lowBits & SIGN))
456 return y;
457
458 #endif
459
460 /* this should work for all flt point w/o NaN and Infpecial cases */
461 return (x > y ? x : y);
462 }
463
464 U_CAPI double U_EXPORT2
465 uprv_fmin(double x, double y)
466 {
467 #if IEEE_754
468 int32_t lowBits;
469
470 /* first handle NaN*/
471 if(uprv_isNaN(x) || uprv_isNaN(y))
472 return uprv_getNaN();
473
474 /* check for -0 and 0*/
475 lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&y, sizeof(uint32_t));
476 if(x == 0.0 && y == 0.0 && (lowBits & SIGN))
477 return y;
478
479 #endif
480
481 /* this should work for all flt point w/o NaN and Inf special cases */
482 return (x > y ? y : x);
483 }
484
485 /**
486 * Truncates the given double.
487 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
488 * This is different than calling floor() or ceil():
489 * floor(3.3) = 3, floor(-3.3) = -4
490 * ceil(3.3) = 4, ceil(-3.3) = -3
491 */
492 U_CAPI double U_EXPORT2
493 uprv_trunc(double d)
494 {
495 #if IEEE_754
496 int32_t lowBits;
497
498 /* handle error cases*/
499 if(uprv_isNaN(d))
500 return uprv_getNaN();
501 if(uprv_isInfinite(d))
502 return uprv_getInfinity();
503
504 lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&d, sizeof(uint32_t));
505 if( (d == 0.0 && (lowBits & SIGN)) || d < 0)
506 return ceil(d);
507 else
508 return floor(d);
509
510 #else
511 return d >= 0 ? floor(d) : ceil(d);
512
513 #endif
514 }
515
516 /**
517 * Return the largest positive number that can be represented by an integer
518 * type of arbitrary bit length.
519 */
520 U_CAPI double U_EXPORT2
521 uprv_maxMantissa(void)
522 {
523 return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
524 }
525
526 U_CAPI double U_EXPORT2
527 uprv_log(double d)
528 {
529 return log(d);
530 }
531
532 U_CAPI void * U_EXPORT2
533 uprv_maximumPtr(void * base)
534 {
535 #if defined(OS400)
536 /*
537 * With the provided function we should never be out of range of a given segment
538 * (a traditional/typical segment that is). Our segments have 5 bytes for the
539 * id and 3 bytes for the offset. The key is that the casting takes care of
540 * only retrieving the offset portion minus x1000. Hence, the smallest offset
541 * seen in a program is x001000 and when casted to an int would be 0.
542 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment.
543 *
544 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
545 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information).
546 * This function determines the activation based on the pointer that is passed in and
547 * calculates the appropriate maximum available size for
548 * each pointer type (TERASPACE and non-TERASPACE)
549 *
550 * Unlike other operating systems, the pointer model isn't determined at
551 * compile time on i5/OS.
552 */
553 if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
554 /* if it is a TERASPACE pointer the max is 2GB - 4k */
555 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
556 }
557 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
558 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
559
560 #else
561 return U_MAX_PTR(base);
562 #endif
563 }
564
565 /*---------------------------------------------------------------------------
566 Platform-specific Implementations
567 Try these, and if they don't work on your platform, then special case your
568 platform with new implementations.
569 ---------------------------------------------------------------------------*/
570
571 /* Generic time zone layer -------------------------------------------------- */
572
573 /* Time zone utilities */
574 U_CAPI void U_EXPORT2
575 uprv_tzset()
576 {
577 #ifdef U_TZSET
578 U_TZSET();
579 #else
580 /* no initialization*/
581 #endif
582 }
583
584 U_CAPI int32_t U_EXPORT2
585 uprv_timezone()
586 {
587 #ifdef U_TIMEZONE
588 return U_TIMEZONE;
589 #else
590 time_t t, t1, t2;
591 struct tm tmrec;
592 UBool dst_checked;
593 int32_t tdiff = 0;
594
595 time(&t);
596 uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
597 dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
598 t1 = mktime(&tmrec); /* local time in seconds*/
599 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
600 t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/
601 tdiff = t2 - t1;
602 /* imitate NT behaviour, which returns same timezone offset to GMT for
603 winter and summer*/
604 if (dst_checked)
605 tdiff += 3600;
606 return tdiff;
607 #endif
608 }
609
610 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
611 some platforms need to have it declared here. */
612
613 #if defined(U_TZNAME) && (defined(U_IRIX) || defined(U_DARWIN) || defined(U_CYGWIN))
614 /* RS6000 and others reject char **tzname. */
615 extern U_IMPORT char *U_TZNAME[];
616 #endif
617
618 #if !UCONFIG_NO_FILE_IO && (defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD))
619 /* These platforms are likely to use Olson timezone IDs. */
620 #define CHECK_LOCALTIME_LINK 1
621 #if defined(U_DARWIN)
622 #include <tzfile.h>
623 #define TZZONEINFO (TZDIR "/")
624 #else
625 #define TZDEFAULT "/etc/localtime"
626 #define TZZONEINFO "/usr/share/zoneinfo/"
627 #endif
628 static char gTimeZoneBuffer[PATH_MAX];
629 static char *gTimeZoneBufferPtr = NULL;
630 #endif
631
632 #ifndef U_WINDOWS
633 #define isNonDigit(ch) (ch < '0' || '9' < ch)
634 static UBool isValidOlsonID(const char *id) {
635 int32_t idx = 0;
636
637 /* Determine if this is something like Iceland (Olson ID)
638 or AST4ADT (non-Olson ID) */
639 while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
640 idx++;
641 }
642
643 /* If we went through the whole string, then it might be okay.
644 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
645 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
646 The rest of the time it could be an Olson ID. George */
647 return (UBool)(id[idx] == 0
648 || uprv_strcmp(id, "PST8PDT") == 0
649 || uprv_strcmp(id, "MST7MDT") == 0
650 || uprv_strcmp(id, "CST6CDT") == 0
651 || uprv_strcmp(id, "EST5EDT") == 0);
652 }
653 #endif
654
655 #if defined(U_TZNAME) && !defined(U_WINDOWS)
656
657 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
658 typedef struct OffsetZoneMapping {
659 int32_t offsetSeconds;
660 int32_t daylightType; /* 1=daylight in June, 2=daylight in December*/
661 const char *stdID;
662 const char *dstID;
663 const char *olsonID;
664 } OffsetZoneMapping;
665
666 /*
667 This list tries to disambiguate a set of abbreviated timezone IDs and offsets
668 and maps it to an Olson ID.
669 Before adding anything to this list, take a look at
670 icu/source/tools/tzcode/tz.alias
671 Sometimes no daylight savings (0) is important to define due to aliases.
672 This list can be tested with icu/source/test/compat/tzone.pl
673 More values could be added to daylightType to increase precision.
674 */
675 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
676 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
677 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
678 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
679 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
680 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
681 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
682 {-36000, 2, "EST", "EST", "Australia/Sydney"},
683 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
684 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
685 {-34200, 2, "CST", "CST", "Australia/South"},
686 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
687 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
688 {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
689 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
690 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
691 {-28800, 2, "WST", "WST", "Australia/West"},
692 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
693 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
694 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
695 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
696 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
697 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
698 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
699 {-14400, 1, "AZT", "AZST", "Asia/Baku"},
700 {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
701 {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
702 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
703 {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
704 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
705 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
706 {-3600, 0, "CET", "WEST", "Africa/Algiers"},
707 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
708 {0, 1, "GMT", "IST", "Europe/Dublin"},
709 {0, 1, "GMT", "BST", "Europe/London"},
710 {0, 0, "WET", "WEST", "Africa/Casablanca"},
711 {0, 0, "WET", "WET", "Africa/El_Aaiun"},
712 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
713 {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
714 {10800, 1, "PMST", "PMDT", "America/Miquelon"},
715 {10800, 2, "UYT", "UYST", "America/Montevideo"},
716 {10800, 1, "WGT", "WGST", "America/Godthab"},
717 {10800, 2, "BRT", "BRST", "Brazil/East"},
718 {12600, 1, "NST", "NDT", "America/St_Johns"},
719 {14400, 1, "AST", "ADT", "Canada/Atlantic"},
720 {14400, 2, "AMT", "AMST", "America/Cuiaba"},
721 {14400, 2, "CLT", "CLST", "Chile/Continental"},
722 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
723 {14400, 2, "PYT", "PYST", "America/Asuncion"},
724 {18000, 1, "CST", "CDT", "America/Havana"},
725 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
726 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
727 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
728 {21600, 0, "CST", "CDT", "America/Guatemala"},
729 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
730 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
731 {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
732 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
733 {32400, 1, "AKST", "AKDT", "US/Alaska"},
734 {36000, 1, "HAST", "HADT", "US/Aleutian"}
735 };
736
737 /*#define DEBUG_TZNAME*/
738
739 static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
740 {
741 int32_t idx;
742 #ifdef DEBUG_TZNAME
743 fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
744 #endif
745 for (idx = 0; idx < (int32_t)sizeof(OFFSET_ZONE_MAPPINGS)/sizeof(OFFSET_ZONE_MAPPINGS[0]); idx++)
746 {
747 if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
748 && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
749 && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
750 && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
751 {
752 return OFFSET_ZONE_MAPPINGS[idx].olsonID;
753 }
754 }
755 return NULL;
756 }
757 #endif
758
759 U_CAPI const char* U_EXPORT2
760 uprv_tzname(int n)
761 {
762 const char *tzid = NULL;
763 #ifdef U_WINDOWS
764 tzid = uprv_detectWindowsTimeZone();
765
766 if (tzid != NULL) {
767 return tzid;
768 }
769 #else
770
771 /*#if defined(U_DARWIN)
772 int ret;
773
774 tzid = getenv("TZFILE");
775 if (tzid != NULL) {
776 return tzid;
777 }
778 #endif*/
779
780 /* This code can be temporarily disabled to test tzname resolution later on. */
781 #ifndef DEBUG_TZNAME
782 tzid = getenv("TZ");
783 if (tzid != NULL && isValidOlsonID(tzid))
784 {
785 /* This might be a good Olson ID. */
786 if (uprv_strncmp(tzid, "posix/", 6) == 0
787 || uprv_strncmp(tzid, "right/", 6) == 0)
788 {
789 /* Remove the posix/ or right/ prefix. */
790 tzid += 6;
791 }
792 return tzid;
793 }
794 /* else U_TZNAME will give a better result. */
795 #endif
796
797 #if defined(CHECK_LOCALTIME_LINK)
798 /* Caller must handle threading issues */
799 if (gTimeZoneBufferPtr == NULL) {
800 /*
801 This is a trick to look at the name of the link to get the Olson ID
802 because the tzfile contents is underspecified.
803 This isn't guaranteed to work because it may not be a symlink.
804 */
805 int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer));
806 if (0 < ret) {
807 int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO);
808 gTimeZoneBuffer[ret] = 0;
809 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0
810 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
811 {
812 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
813 }
814 }
815 }
816 else {
817 return gTimeZoneBufferPtr;
818 }
819 #endif
820 #endif
821
822 #ifdef U_TZNAME
823 #if !defined(U_WINDOWS)
824 /*
825 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
826 So we remap the abbreviation to an olson ID.
827
828 Since Windows exposes a little more timezone information,
829 we normally don't use this code on Windows because
830 uprv_detectWindowsTimeZone should have already given the correct answer.
831 */
832 {
833 struct tm juneSol, decemberSol;
834 int daylightType;
835 static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
836 static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
837
838 /* This probing will tell us when daylight savings occurs. */
839 localtime_r(&juneSolstice, &juneSol);
840 localtime_r(&decemberSolstice, &decemberSol);
841 daylightType = ((decemberSol.tm_isdst > 0) << 1) | (juneSol.tm_isdst > 0);
842 tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
843 if (tzid != NULL) {
844 return tzid;
845 }
846 }
847 #endif
848 return U_TZNAME[n];
849 #else
850 return "";
851 #endif
852 }
853
854 /* Get and set the ICU data directory --------------------------------------- */
855
856 static char *gDataDirectory = NULL;
857 #if U_POSIX_LOCALE
858 static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
859 #endif
860
861 static UBool U_CALLCONV putil_cleanup(void)
862 {
863 if (gDataDirectory && *gDataDirectory) {
864 uprv_free(gDataDirectory);
865 }
866 gDataDirectory = NULL;
867 #if U_POSIX_LOCALE
868 if (gCorrectedPOSIXLocale) {
869 uprv_free(gCorrectedPOSIXLocale);
870 gCorrectedPOSIXLocale = NULL;
871 }
872 #endif
873 return TRUE;
874 }
875
876 /*
877 * Set the data directory.
878 * Make a copy of the passed string, and set the global data dir to point to it.
879 * TODO: see bug #2849, regarding thread safety.
880 */
881 U_CAPI void U_EXPORT2
882 u_setDataDirectory(const char *directory) {
883 char *newDataDir;
884 int32_t length;
885
886 if(directory==NULL || *directory==0) {
887 /* A small optimization to prevent the malloc and copy when the
888 shared library is used, and this is a way to make sure that NULL
889 is never returned.
890 */
891 newDataDir = (char *)"";
892 }
893 else {
894 length=(int32_t)uprv_strlen(directory);
895 newDataDir = (char *)uprv_malloc(length + 2);
896 /* Exit out if newDataDir could not be created. */
897 if (newDataDir == NULL) {
898 return;
899 }
900 uprv_strcpy(newDataDir, directory);
901
902 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
903 {
904 char *p;
905 while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
906 *p = U_FILE_SEP_CHAR;
907 }
908 }
909 #endif
910 }
911
912 umtx_lock(NULL);
913 if (gDataDirectory && *gDataDirectory) {
914 uprv_free(gDataDirectory);
915 }
916 gDataDirectory = newDataDir;
917 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
918 umtx_unlock(NULL);
919 }
920
921 U_CAPI UBool U_EXPORT2
922 uprv_pathIsAbsolute(const char *path)
923 {
924 if(!path || !*path) {
925 return FALSE;
926 }
927
928 if(*path == U_FILE_SEP_CHAR) {
929 return TRUE;
930 }
931
932 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
933 if(*path == U_FILE_ALT_SEP_CHAR) {
934 return TRUE;
935 }
936 #endif
937
938 #if defined(U_WINDOWS)
939 if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
940 ((path[0] >= 'a') && (path[0] <= 'z'))) &&
941 path[1] == ':' ) {
942 return TRUE;
943 }
944 #endif
945
946 return FALSE;
947 }
948
949 U_CAPI const char * U_EXPORT2
950 u_getDataDirectory(void) {
951 const char *path = NULL;
952 #if defined(U_DARWIN) && defined(TARGET_IPHONE_SIMULATOR) && TARGET_IPHONE_SIMULATOR
953 const char *simulator_root = NULL;
954 char datadir_path_buffer[PATH_MAX];
955 #endif
956
957 /* if we have the directory, then return it immediately */
958 UMTX_CHECK(NULL, gDataDirectory, path);
959
960 if(path) {
961 return path;
962 }
963
964 /*
965 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
966 override ICU's data with the ICU_DATA environment variable. This prevents
967 problems where multiple custom copies of ICU's specific version of data
968 are installed on a system. Either the application must define the data
969 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
970 ICU, set the data with udata_setCommonData or trust that all of the
971 required data is contained in ICU's data library that contains
972 the entry point defined by U_ICUDATA_ENTRY_POINT.
973
974 There may also be some platforms where environment variables
975 are not allowed.
976 */
977 # if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
978 /* First try to get the environment variable */
979 path=getenv("ICU_DATA");
980 # endif
981
982 /* ICU_DATA_DIR may be set as a compile option */
983 # ifdef ICU_DATA_DIR
984 if(path==NULL || *path==0) {
985 path=ICU_DATA_DIR;
986 #if defined(U_DARWIN) && defined(TARGET_IPHONE_SIMULATOR) && TARGET_IPHONE_SIMULATOR
987 simulator_root=getenv("IPHONE_SIMULATOR_ROOT");
988 if (simulator_root != NULL) {
989 (void) strlcpy(datadir_path_buffer, simulator_root, PATH_MAX);
990 (void) strlcat(datadir_path_buffer, path, PATH_MAX);
991 path=datadir_path_buffer;
992 }
993 #endif
994 }
995 # endif
996
997 if(path==NULL) {
998 /* It looks really bad, set it to something. */
999 path = "";
1000 }
1001
1002 u_setDataDirectory(path);
1003 return gDataDirectory;
1004 }
1005
1006
1007
1008
1009
1010 /* Macintosh-specific locale information ------------------------------------ */
1011 #ifdef XP_MAC
1012
1013 typedef struct {
1014 int32_t script;
1015 int32_t region;
1016 int32_t lang;
1017 int32_t date_region;
1018 const char* posixID;
1019 } mac_lc_rec;
1020
1021 /* Todo: This will be updated with a newer version from www.unicode.org web
1022 page when it's available.*/
1023 #define MAC_LC_MAGIC_NUMBER -5
1024 #define MAC_LC_INIT_NUMBER -9
1025
1026 static const mac_lc_rec mac_lc_recs[] = {
1027 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US",
1028 /* United States*/
1029 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR",
1030 /* France*/
1031 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB",
1032 /* Great Britain*/
1033 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE",
1034 /* Germany*/
1035 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT",
1036 /* Italy*/
1037 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL",
1038 /* Metherlands*/
1039 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE",
1040 /* French for Belgium or Lxembourg*/
1041 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE",
1042 /* Sweden*/
1043 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK",
1044 /* Denmark*/
1045 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT",
1046 /* Portugal*/
1047 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA",
1048 /* French Canada*/
1049 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS",
1050 /* Israel*/
1051 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP",
1052 /* Japan*/
1053 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU",
1054 /* Australia*/
1055 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE",
1056 /* the Arabic world (?)*/
1057 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI",
1058 /* Finland*/
1059 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH",
1060 /* French for Switzerland*/
1061 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH",
1062 /* German for Switzerland*/
1063 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR",
1064 /* Greece*/
1065 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS",
1066 /* Iceland ===*/
1067 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
1068 /* Malta ===*/
1069 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
1070 /* Cyprus ===*/
1071 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR",
1072 /* Turkey ===*/
1073 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU",
1074 /* Croatian system for Yugoslavia*/
1075 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
1076 /* Hindi system for India*/
1077 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
1078 /* Pakistan*/
1079 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT",
1080 /* Lithuania*/
1081 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL",
1082 /* Poland*/
1083 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU",
1084 /* Hungary*/
1085 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE",
1086 /* Estonia*/
1087 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV",
1088 /* Latvia*/
1089 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
1090 /* Lapland [Ask Rich for the data. HS]*/
1091 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
1092 /* Faeroe Islands*/
1093 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR",
1094 /* Iran*/
1095 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU",
1096 /* Russia*/
1097 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE",
1098 /* Ireland*/
1099 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR",
1100 /* Korea*/
1101 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN",
1102 /* People's Republic of China*/
1103 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW",
1104 /* Taiwan*/
1105 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH",
1106 /* Thailand*/
1107
1108 /* fallback is en_US*/
1109 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER,
1110 MAC_LC_MAGIC_NUMBER, "en_US"
1111 };
1112
1113 #endif
1114
1115 #if U_POSIX_LOCALE
1116 /* Return just the POSIX id, whatever happens to be in it */
1117 static const char *uprv_getPOSIXID(void)
1118 {
1119 static const char* posixID = NULL;
1120 if (posixID == 0) {
1121 /*
1122 * On Solaris two different calls to setlocale can result in
1123 * different values. Only get this value once.
1124 *
1125 * We must check this first because an application can set this.
1126 *
1127 * LC_ALL can't be used because it's platform dependent. The LANG
1128 * environment variable seems to affect LC_CTYPE variable by default.
1129 * Here is what setlocale(LC_ALL, NULL) can return.
1130 * HPUX can return 'C C C C C C C'
1131 * Solaris can return /en_US/C/C/C/C/C on the second try.
1132 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1133 *
1134 * The default codepage detection also needs to use LC_CTYPE.
1135 *
1136 * Do not call setlocale(LC_*, "")! Using an empty string instead
1137 * of NULL, will modify the libc behavior.
1138 */
1139 posixID = setlocale(LC_CTYPE, NULL);
1140 if ((posixID == 0)
1141 || (uprv_strcmp("C", posixID) == 0)
1142 || (uprv_strcmp("POSIX", posixID) == 0))
1143 {
1144 /* Maybe we got some garbage. Try something more reasonable */
1145 posixID = getenv("LC_ALL");
1146 if (posixID == 0) {
1147 posixID = getenv("LC_CTYPE");
1148 if (posixID == 0) {
1149 posixID = getenv("LANG");
1150 }
1151 }
1152 }
1153
1154 if ((posixID==0)
1155 || (uprv_strcmp("C", posixID) == 0)
1156 || (uprv_strcmp("POSIX", posixID) == 0))
1157 {
1158 /* Nothing worked. Give it a nice POSIX default value. */
1159 posixID = "en_US_POSIX";
1160 }
1161 }
1162
1163 return posixID;
1164 }
1165 #endif
1166
1167 /* NOTE: The caller should handle thread safety */
1168 U_CAPI const char* U_EXPORT2
1169 uprv_getDefaultLocaleID()
1170 {
1171 #if U_POSIX_LOCALE
1172 /*
1173 Note that: (a '!' means the ID is improper somehow)
1174 LC_ALL ----> default_loc codepage
1175 --------------------------------------------------------
1176 ab.CD ab CD
1177 ab@CD ab__CD -
1178 ab@CD.EF ab__CD EF
1179
1180 ab_CD.EF@GH ab_CD_GH EF
1181
1182 Some 'improper' ways to do the same as above:
1183 ! ab_CD@GH.EF ab_CD_GH EF
1184 ! ab_CD.EF@GH.IJ ab_CD_GH EF
1185 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
1186
1187 _CD@GH _CD_GH -
1188 _CD.EF@GH _CD_GH EF
1189
1190 The variant cannot have dots in it.
1191 The 'rightmost' variant (@xxx) wins.
1192 The leftmost codepage (.xxx) wins.
1193 */
1194 char *correctedPOSIXLocale = 0;
1195 const char* posixID = uprv_getPOSIXID();
1196 const char *p;
1197 const char *q;
1198 int32_t len;
1199
1200 /* Format: (no spaces)
1201 ll [ _CC ] [ . MM ] [ @ VV]
1202
1203 l = lang, C = ctry, M = charmap, V = variant
1204 */
1205
1206 if (gCorrectedPOSIXLocale != NULL) {
1207 return gCorrectedPOSIXLocale;
1208 }
1209
1210 if ((p = uprv_strchr(posixID, '.')) != NULL) {
1211 /* assume new locale can't be larger than old one? */
1212 correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1213 /* Exit on memory allocation error. */
1214 if (correctedPOSIXLocale == NULL) {
1215 return NULL;
1216 }
1217 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1218 correctedPOSIXLocale[p-posixID] = 0;
1219
1220 /* do not copy after the @ */
1221 if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
1222 correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
1223 }
1224 }
1225
1226 /* Note that we scan the *uncorrected* ID. */
1227 if ((p = uprv_strrchr(posixID, '@')) != NULL) {
1228 if (correctedPOSIXLocale == NULL) {
1229 correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1230 /* Exit on memory allocation error. */
1231 if (correctedPOSIXLocale == NULL) {
1232 return NULL;
1233 }
1234 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1235 correctedPOSIXLocale[p-posixID] = 0;
1236 }
1237 p++;
1238
1239 /* Take care of any special cases here.. */
1240 if (!uprv_strcmp(p, "nynorsk")) {
1241 p = "NY";
1242 /* Don't worry about no__NY. In practice, it won't appear. */
1243 }
1244
1245 if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
1246 uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
1247 }
1248 else {
1249 uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1250 }
1251
1252 if ((q = uprv_strchr(p, '.')) != NULL) {
1253 /* How big will the resulting string be? */
1254 len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1255 uprv_strncat(correctedPOSIXLocale, p, q-p);
1256 correctedPOSIXLocale[len] = 0;
1257 }
1258 else {
1259 /* Anything following the @ sign */
1260 uprv_strcat(correctedPOSIXLocale, p);
1261 }
1262
1263 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1264 * How about 'russian' -> 'ru'?
1265 * Many of the other locales using ISO codes will be handled by the
1266 * canonicalization functions in uloc_getDefault.
1267 */
1268 }
1269
1270 /* Was a correction made? */
1271 if (correctedPOSIXLocale != NULL) {
1272 posixID = correctedPOSIXLocale;
1273 }
1274 else {
1275 /* copy it, just in case the original pointer goes away. See j2395 */
1276 correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
1277 /* Exit on memory allocation error. */
1278 if (correctedPOSIXLocale == NULL) {
1279 return NULL;
1280 }
1281 posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
1282 }
1283
1284 if (gCorrectedPOSIXLocale == NULL) {
1285 gCorrectedPOSIXLocale = correctedPOSIXLocale;
1286 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1287 correctedPOSIXLocale = NULL;
1288 }
1289
1290 if (correctedPOSIXLocale != NULL) { /* Was already set - clean up. */
1291 uprv_free(correctedPOSIXLocale);
1292 }
1293
1294 return posixID;
1295
1296 #elif defined(U_WINDOWS)
1297 UErrorCode status = U_ZERO_ERROR;
1298 LCID id = GetThreadLocale();
1299 const char* locID = uprv_convertToPosix(id, &status);
1300
1301 if (U_FAILURE(status)) {
1302 locID = "en_US";
1303 }
1304 return locID;
1305
1306 #elif defined(XP_MAC)
1307 int32_t script = MAC_LC_INIT_NUMBER;
1308 /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
1309 int32_t region = MAC_LC_INIT_NUMBER;
1310 /* = GetScriptManagerVariable(smRegionCode);*/
1311 int32_t lang = MAC_LC_INIT_NUMBER;
1312 /* = GetScriptManagerVariable(smScriptLang);*/
1313 int32_t date_region = MAC_LC_INIT_NUMBER;
1314 const char* posixID = 0;
1315 int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec);
1316 int32_t i;
1317 Intl1Hndl ih;
1318
1319 ih = (Intl1Hndl) GetIntlResource(1);
1320 if (ih)
1321 date_region = ((uint16_t)(*ih)->intl1Vers) >> 8;
1322
1323 for (i = 0; i < count; i++) {
1324 if ( ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER)
1325 || (mac_lc_recs[i].script == script))
1326 && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER)
1327 || (mac_lc_recs[i].region == region))
1328 && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER)
1329 || (mac_lc_recs[i].lang == lang))
1330 && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER)
1331 || (mac_lc_recs[i].date_region == date_region))
1332 )
1333 {
1334 posixID = mac_lc_recs[i].posixID;
1335 break;
1336 }
1337 }
1338
1339 return posixID;
1340
1341 #elif defined(OS400)
1342 /* locales are process scoped and are by definition thread safe */
1343 static char correctedLocale[64];
1344 const char *localeID = getenv("LC_ALL");
1345 char *p;
1346
1347 if (localeID == NULL)
1348 localeID = getenv("LANG");
1349 if (localeID == NULL)
1350 localeID = setlocale(LC_ALL, NULL);
1351 /* Make sure we have something... */
1352 if (localeID == NULL)
1353 return "en_US_POSIX";
1354
1355 /* Extract the locale name from the path. */
1356 if((p = uprv_strrchr(localeID, '/')) != NULL)
1357 {
1358 /* Increment p to start of locale name. */
1359 p++;
1360 localeID = p;
1361 }
1362
1363 /* Copy to work location. */
1364 uprv_strcpy(correctedLocale, localeID);
1365
1366 /* Strip off the '.locale' extension. */
1367 if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1368 *p = 0;
1369 }
1370
1371 /* Upper case the locale name. */
1372 T_CString_toUpperCase(correctedLocale);
1373
1374 /* See if we are using the POSIX locale. Any of the
1375 * following are equivalent and use the same QLGPGCMA
1376 * (POSIX) locale.
1377 * QLGPGCMA2 means UCS2
1378 * QLGPGCMA_4 means UTF-32
1379 * QLGPGCMA_8 means UTF-8
1380 */
1381 if ((uprv_strcmp("C", correctedLocale) == 0) ||
1382 (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1383 (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1384 {
1385 uprv_strcpy(correctedLocale, "en_US_POSIX");
1386 }
1387 else
1388 {
1389 int16_t LocaleLen;
1390
1391 /* Lower case the lang portion. */
1392 for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1393 {
1394 *p = uprv_tolower(*p);
1395 }
1396
1397 /* Adjust for Euro. After '_E' add 'URO'. */
1398 LocaleLen = uprv_strlen(correctedLocale);
1399 if (correctedLocale[LocaleLen - 2] == '_' &&
1400 correctedLocale[LocaleLen - 1] == 'E')
1401 {
1402 uprv_strcat(correctedLocale, "URO");
1403 }
1404
1405 /* If using Lotus-based locale then convert to
1406 * equivalent non Lotus.
1407 */
1408 else if (correctedLocale[LocaleLen - 2] == '_' &&
1409 correctedLocale[LocaleLen - 1] == 'L')
1410 {
1411 correctedLocale[LocaleLen - 2] = 0;
1412 }
1413
1414 /* There are separate simplified and traditional
1415 * locales called zh_HK_S and zh_HK_T.
1416 */
1417 else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1418 {
1419 uprv_strcpy(correctedLocale, "zh_HK");
1420 }
1421
1422 /* A special zh_CN_GBK locale...
1423 */
1424 else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1425 {
1426 uprv_strcpy(correctedLocale, "zh_CN");
1427 }
1428
1429 }
1430
1431 return correctedLocale;
1432 #endif
1433
1434 }
1435
1436 #if U_POSIX_LOCALE
1437 /*
1438 Due to various platform differences, one platform may specify a charset,
1439 when they really mean a different charset. Remap the names so that they are
1440 compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1441 here. Before adding anything to this function, please consider adding unique
1442 names to the ICU alias table in the data directory.
1443 */
1444 static const char*
1445 remapPlatformDependentCodepage(const char *locale, const char *name) {
1446 if (locale != NULL && *locale == 0) {
1447 /* Make sure that an empty locale is handled the same way. */
1448 locale = NULL;
1449 }
1450 if (name == NULL) {
1451 return NULL;
1452 }
1453 #if defined(U_AIX)
1454 if (uprv_strcmp(name, "IBM-943") == 0) {
1455 /* Use the ASCII compatible ibm-943 */
1456 name = "Shift-JIS";
1457 }
1458 else if (uprv_strcmp(name, "IBM-1252") == 0) {
1459 /* Use the windows-1252 that contains the Euro */
1460 name = "IBM-5348";
1461 }
1462 #elif defined(U_SOLARIS)
1463 if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1464 /* Solaris underspecifies the "EUC" name. */
1465 if (uprv_strcmp(locale, "zh_CN") == 0) {
1466 name = "EUC-CN";
1467 }
1468 else if (uprv_strcmp(locale, "zh_TW") == 0) {
1469 name = "EUC-TW";
1470 }
1471 else if (uprv_strcmp(locale, "ko_KR") == 0) {
1472 name = "EUC-KR";
1473 }
1474 }
1475 else if (uprv_strcmp(name, "eucJP") == 0) {
1476 /*
1477 ibm-954 is the best match.
1478 ibm-33722 is the default for eucJP (similar to Windows).
1479 */
1480 name = "eucjis";
1481 }
1482 else if (uprv_strcmp(name, "646") == 0) {
1483 /*
1484 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
1485 * ISO-8859-1 instead of US-ASCII(646).
1486 */
1487 name = "ISO-8859-1";
1488 }
1489 #elif defined(U_DARWIN)
1490 if (locale == NULL && *name == 0) {
1491 /*
1492 No locale was specified, and an empty name was passed in.
1493 This usually indicates that nl_langinfo didn't return valid information.
1494 Mac OS X uses UTF-8 by default (especially the locale data and console).
1495 */
1496 name = "UTF-8";
1497 }
1498 #elif defined(U_HPUX)
1499 if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
1500 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
1501 /* zh_TW.big5 is not the same charset as zh_HK.big5! */
1502 name = "hkbig5";
1503 }
1504 else if (uprv_strcmp(name, "eucJP") == 0) {
1505 /*
1506 ibm-1350 is the best match, but unavailable.
1507 ibm-954 is mostly a superset of ibm-1350.
1508 ibm-33722 is the default for eucJP (similar to Windows).
1509 */
1510 name = "eucjis";
1511 }
1512 #elif defined(U_LINUX)
1513 if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
1514 /* Linux underspecifies the "EUC" name. */
1515 if (uprv_strcmp(locale, "korean") == 0) {
1516 name = "EUC-KR";
1517 }
1518 else if (uprv_strcmp(locale, "japanese") == 0) {
1519 /* See comment below about eucJP */
1520 name = "eucjis";
1521 }
1522 }
1523 else if (uprv_strcmp(name, "eucjp") == 0) {
1524 /*
1525 ibm-1350 is the best match, but unavailable.
1526 ibm-954 is mostly a superset of ibm-1350.
1527 ibm-33722 is the default for eucJP (similar to Windows).
1528 */
1529 name = "eucjis";
1530 }
1531 #endif
1532 /* return NULL when "" is passed in */
1533 if (*name == 0) {
1534 name = NULL;
1535 }
1536 return name;
1537 }
1538
1539 static const char*
1540 getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
1541 {
1542 char localeBuf[100];
1543 const char *name = NULL;
1544 char *variant = NULL;
1545
1546 if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
1547 size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
1548 uprv_strncpy(localeBuf, localeName, localeCapacity);
1549 localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
1550 name = uprv_strncpy(buffer, name+1, buffCapacity);
1551 buffer[buffCapacity-1] = 0; /* ensure NULL termination */
1552 if ((variant = (uprv_strchr(name, '@'))) != NULL) {
1553 *variant = 0;
1554 }
1555 name = remapPlatformDependentCodepage(localeBuf, name);
1556 }
1557 return name;
1558 }
1559 #endif
1560
1561 static const char*
1562 int_getDefaultCodepage()
1563 {
1564 #if defined(OS400)
1565 uint32_t ccsid = 37; /* Default to ibm-37 */
1566 static char codepage[64];
1567 Qwc_JOBI0400_t jobinfo;
1568 Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
1569
1570 EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
1571 "* ", " ", &error);
1572
1573 if (error.Bytes_Available == 0) {
1574 if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
1575 ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
1576 }
1577 else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
1578 ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
1579 }
1580 /* else use the default */
1581 }
1582 sprintf(codepage,"ibm-%d", ccsid);
1583 return codepage;
1584
1585 #elif defined(OS390)
1586 static char codepage[64];
1587 sprintf(codepage,"%63s" UCNV_SWAP_LFNL_OPTION_STRING, nl_langinfo(CODESET));
1588 codepage[63] = 0; /* NULL terminate */
1589 return codepage;
1590
1591 #elif defined(XP_MAC)
1592 return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
1593
1594 #elif defined(U_WINDOWS)
1595 static char codepage[64];
1596 sprintf(codepage, "windows-%d", GetACP());
1597 return codepage;
1598
1599 #elif U_POSIX_LOCALE
1600 static char codesetName[100];
1601 const char *localeName = NULL;
1602 const char *name = NULL;
1603
1604 uprv_memset(codesetName, 0, sizeof(codesetName));
1605
1606 /* Use setlocale in a nice way, and then check some environment variables.
1607 Maybe the application used setlocale already.
1608 */
1609 localeName = uprv_getPOSIXID();
1610 name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
1611 if (name) {
1612 /* if we can find the codeset name from setlocale, return that. */
1613 return name;
1614 }
1615 /* else "C" was probably returned. That's underspecified. */
1616
1617 #if U_HAVE_NL_LANGINFO_CODESET
1618 if (*codesetName) {
1619 uprv_memset(codesetName, 0, sizeof(codesetName));
1620 }
1621 /* When available, check nl_langinfo because it usually gives more
1622 useful names. It depends on LC_CTYPE and not LANG or LC_ALL.
1623 nl_langinfo may use the same buffer as setlocale. */
1624 {
1625 const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
1626 codeset = remapPlatformDependentCodepage(NULL, codeset);
1627 if (codeset != NULL) {
1628 uprv_strncpy(codesetName, codeset, sizeof(codesetName));
1629 codesetName[sizeof(codesetName)-1] = 0;
1630 return codesetName;
1631 }
1632 }
1633 #endif
1634
1635 if (*codesetName == 0)
1636 {
1637 /* Everything failed. Return US ASCII (ISO 646). */
1638 (void)uprv_strcpy(codesetName, "US-ASCII");
1639 }
1640 return codesetName;
1641 #else
1642 return "US-ASCII";
1643 #endif
1644 }
1645
1646
1647 U_CAPI const char* U_EXPORT2
1648 uprv_getDefaultCodepage()
1649 {
1650 static char const *name = NULL;
1651 umtx_lock(NULL);
1652 if (name == NULL) {
1653 name = int_getDefaultCodepage();
1654 }
1655 umtx_unlock(NULL);
1656 return name;
1657 }
1658
1659
1660 /* end of platform-specific implementation -------------- */
1661
1662 /* version handling --------------------------------------------------------- */
1663
1664 U_CAPI void U_EXPORT2
1665 u_versionFromString(UVersionInfo versionArray, const char *versionString) {
1666 char *end;
1667 uint16_t part=0;
1668
1669 if(versionArray==NULL) {
1670 return;
1671 }
1672
1673 if(versionString!=NULL) {
1674 for(;;) {
1675 versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
1676 if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
1677 break;
1678 }
1679 versionString=end+1;
1680 }
1681 }
1682
1683 while(part<U_MAX_VERSION_LENGTH) {
1684 versionArray[part++]=0;
1685 }
1686 }
1687
1688 U_CAPI void U_EXPORT2
1689 u_versionToString(UVersionInfo versionArray, char *versionString) {
1690 uint16_t count, part;
1691 uint8_t field;
1692
1693 if(versionString==NULL) {
1694 return;
1695 }
1696
1697 if(versionArray==NULL) {
1698 versionString[0]=0;
1699 return;
1700 }
1701
1702 /* count how many fields need to be written */
1703 for(count=4; count>0 && versionArray[count-1]==0; --count) {
1704 }
1705
1706 if(count <= 1) {
1707 count = 2;
1708 }
1709
1710 /* write the first part */
1711 /* write the decimal field value */
1712 field=versionArray[0];
1713 if(field>=100) {
1714 *versionString++=(char)('0'+field/100);
1715 field%=100;
1716 }
1717 if(field>=10) {
1718 *versionString++=(char)('0'+field/10);
1719 field%=10;
1720 }
1721 *versionString++=(char)('0'+field);
1722
1723 /* write the following parts */
1724 for(part=1; part<count; ++part) {
1725 /* write a dot first */
1726 *versionString++=U_VERSION_DELIMITER;
1727
1728 /* write the decimal field value */
1729 field=versionArray[part];
1730 if(field>=100) {
1731 *versionString++=(char)('0'+field/100);
1732 field%=100;
1733 }
1734 if(field>=10) {
1735 *versionString++=(char)('0'+field/10);
1736 field%=10;
1737 }
1738 *versionString++=(char)('0'+field);
1739 }
1740
1741 /* NUL-terminate */
1742 *versionString=0;
1743 }
1744
1745 U_CAPI void U_EXPORT2
1746 u_getVersion(UVersionInfo versionArray) {
1747 u_versionFromString(versionArray, U_ICU_VERSION);
1748 }
1749
1750 /*
1751 * Hey, Emacs, please set the following:
1752 *
1753 * Local Variables:
1754 * indent-tabs-mode: nil
1755 * End:
1756 *
1757 */