]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/putil.c
ICU-461.12.tar.gz
[apple/icu.git] / icuSources / common / putil.c
1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 1997-2010, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 *
9 * FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
10 *
11 * Date Name Description
12 * 04/14/97 aliu Creation.
13 * 04/24/97 aliu Added getDefaultDataDirectory() and
14 * getDefaultLocaleID().
15 * 04/28/97 aliu Rewritten to assume Unix and apply general methods
16 * for assumed case. Non-UNIX platforms must be
17 * special-cased. Rewrote numeric methods dealing
18 * with NaN and Infinity to be platform independent
19 * over all IEEE 754 platforms.
20 * 05/13/97 aliu Restored sign of timezone
21 * (semantics are hours West of GMT)
22 * 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
23 * nextDouble..
24 * 07/22/98 stephen Added remainder, max, min, trunc
25 * 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
26 * 08/24/98 stephen Added longBitsFromDouble
27 * 09/08/98 stephen Minor changes for Mac Port
28 * 03/02/99 stephen Removed openFile(). Added AS400 support.
29 * Fixed EBCDIC tables
30 * 04/15/99 stephen Converted to C.
31 * 06/28/99 stephen Removed mutex locking in u_isBigEndian().
32 * 08/04/99 jeffrey R. Added OS/2 changes
33 * 11/15/99 helena Integrated S/390 IEEE support.
34 * 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
35 * 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
36 * 01/03/08 Steven L. Fake Time Support
37 ******************************************************************************
38 */
39
40 /* Define _XOPEN_SOURCE for Solaris and friends. */
41 /* NetBSD needs it to be >= 4 */
42 #if !defined(_XOPEN_SOURCE)
43 #if __STDC_VERSION__ >= 199901L
44 /* It is invalid to compile an XPG3, XPG4, XPG4v2 or XPG5 application using c99 on Solaris */
45 #define _XOPEN_SOURCE 600
46 #else
47 #define _XOPEN_SOURCE 4
48 #endif
49 #endif
50
51 /* Make sure things like readlink and such functions work.
52 Poorly upgraded Solaris machines can't have this defined.
53 Cleanly installed Solaris can use this #define.
54 */
55 #if !defined(_XOPEN_SOURCE_EXTENDED) && ((!defined(__STDC_VERSION__) || __STDC_VERSION__ >= 199901L) || defined(__xlc__))
56 #define _XOPEN_SOURCE_EXTENDED 1
57 #endif
58
59 /* include ICU headers */
60 #include "unicode/utypes.h"
61 #include "unicode/putil.h"
62 #include "unicode/ustring.h"
63 #include "putilimp.h"
64 #include "uassert.h"
65 #include "umutex.h"
66 #include "cmemory.h"
67 #include "cstring.h"
68 #include "locmap.h"
69 #include "ucln_cmn.h"
70
71 /* Include standard headers. */
72 #include <stdio.h>
73 #include <stdlib.h>
74 #include <string.h>
75 #include <math.h>
76 #include <locale.h>
77 #include <float.h>
78 #include <time.h>
79
80 /* include system headers */
81 #ifdef U_WINDOWS
82 # define WIN32_LEAN_AND_MEAN
83 # define VC_EXTRALEAN
84 # define NOUSER
85 # define NOSERVICE
86 # define NOIME
87 # define NOMCX
88 # include <windows.h>
89 # include "wintz.h"
90 #elif defined(U_CYGWIN) && defined(__STRICT_ANSI__)
91 /* tzset isn't defined in strict ANSI on Cygwin. */
92 # undef __STRICT_ANSI__
93 #elif defined(OS400)
94 # include <float.h>
95 # include <qusec.h> /* error code structure */
96 # include <qusrjobi.h>
97 # include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
98 # include <mih/testptr.h> /* For uprv_maximumPtr */
99 #elif defined(XP_MAC)
100 # include <Files.h>
101 # include <IntlResources.h>
102 # include <Script.h>
103 # include <Folders.h>
104 # include <MacTypes.h>
105 # include <TextUtils.h>
106 # define ICU_NO_USER_DATA_OVERRIDE 1
107 #elif defined(OS390)
108 #include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
109 #elif defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD)
110 #include <limits.h>
111 #include <unistd.h>
112 #elif defined(U_QNX)
113 #include <sys/neutrino.h>
114 #elif defined(U_SOLARIS)
115 # ifndef _XPG4_2
116 # define _XPG4_2
117 # endif
118 #endif
119
120
121 #if defined(U_DARWIN)
122 #include <TargetConditionals.h>
123 #endif
124
125 #ifndef U_WINDOWS
126 #include <sys/time.h>
127 #endif
128
129 /*
130 * Only include langinfo.h if we have a way to get the codeset. If we later
131 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
132 *
133 */
134
135 #if U_HAVE_NL_LANGINFO_CODESET
136 #include <langinfo.h>
137 #endif
138
139 /**
140 * Simple things (presence of functions, etc) should just go in configure.in and be added to
141 * icucfg.h via autoheader.
142 */
143 #if defined(HAVE_CONFIG_H)
144 #include "icucfg.h"
145 #endif
146
147 /* Define the extension for data files, again... */
148 #define DATA_TYPE "dat"
149
150 /* Leave this copyright notice here! */
151 static const char copyright[] = U_COPYRIGHT_STRING;
152
153 /* floating point implementations ------------------------------------------- */
154
155 /* We return QNAN rather than SNAN*/
156 #define SIGN 0x80000000U
157
158 /* Make it easy to define certain types of constants */
159 typedef union {
160 int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
161 double d64;
162 } BitPatternConversion;
163 static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
164 static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
165
166 /*---------------------------------------------------------------------------
167 Platform utilities
168 Our general strategy is to assume we're on a POSIX platform. Platforms which
169 are non-POSIX must declare themselves so. The default POSIX implementation
170 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
171 functions).
172 ---------------------------------------------------------------------------*/
173
174 #if defined(U_WINDOWS) || defined(XP_MAC) || defined(OS400)
175 # undef U_POSIX_LOCALE
176 #else
177 # define U_POSIX_LOCALE 1
178 #endif
179
180 /*
181 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
182 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
183 */
184 #if !IEEE_754
185 static char*
186 u_topNBytesOfDouble(double* d, int n)
187 {
188 #if U_IS_BIG_ENDIAN
189 return (char*)d;
190 #else
191 return (char*)(d + 1) - n;
192 #endif
193 }
194
195 static char*
196 u_bottomNBytesOfDouble(double* d, int n)
197 {
198 #if U_IS_BIG_ENDIAN
199 return (char*)(d + 1) - n;
200 #else
201 return (char*)d;
202 #endif
203 }
204 #endif /* !IEEE_754 */
205
206 #if IEEE_754
207 static UBool
208 u_signBit(double d) {
209 uint8_t hiByte;
210 #if U_IS_BIG_ENDIAN
211 hiByte = *(uint8_t *)&d;
212 #else
213 hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
214 #endif
215 return (hiByte & 0x80) != 0;
216 }
217 #endif
218
219
220
221 #if defined (U_DEBUG_FAKETIME)
222 /* Override the clock to test things without having to move the system clock.
223 * Assumes POSIX gettimeofday() will function
224 */
225 UDate fakeClock_t0 = 0; /** Time to start the clock from **/
226 UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
227 UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
228 static UMTX fakeClockMutex = NULL;
229
230 static UDate getUTCtime_real() {
231 struct timeval posixTime;
232 gettimeofday(&posixTime, NULL);
233 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
234 }
235
236 static UDate getUTCtime_fake() {
237 umtx_lock(&fakeClockMutex);
238 if(!fakeClock_set) {
239 UDate real = getUTCtime_real();
240 const char *fake_start = getenv("U_FAKETIME_START");
241 if((fake_start!=NULL) && (fake_start[0]!=0)) {
242 sscanf(fake_start,"%lf",&fakeClock_t0);
243 fakeClock_dt = fakeClock_t0 - real;
244 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
245 "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
246 fakeClock_t0, fake_start, fakeClock_dt, real);
247 } else {
248 fakeClock_dt = 0;
249 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
250 "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
251 }
252 fakeClock_set = TRUE;
253 }
254 umtx_unlock(&fakeClockMutex);
255
256 return getUTCtime_real() + fakeClock_dt;
257 }
258 #endif
259
260 #if defined(U_WINDOWS)
261 typedef union {
262 int64_t int64;
263 FILETIME fileTime;
264 } FileTimeConversion; /* This is like a ULARGE_INTEGER */
265
266 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
267 #define EPOCH_BIAS INT64_C(116444736000000000)
268 #define HECTONANOSECOND_PER_MILLISECOND 10000
269
270 #endif
271
272 /*---------------------------------------------------------------------------
273 Universal Implementations
274 These are designed to work on all platforms. Try these, and if they
275 don't work on your platform, then special case your platform with new
276 implementations.
277 ---------------------------------------------------------------------------*/
278
279 U_CAPI UDate U_EXPORT2
280 uprv_getUTCtime()
281 {
282 #if defined(U_DEBUG_FAKETIME)
283 return getUTCtime_fake(); /* Hook for overriding the clock */
284 #else
285 return uprv_getRawUTCtime();
286 #endif
287 }
288
289 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
290 U_CAPI UDate U_EXPORT2
291 uprv_getRawUTCtime()
292 {
293 #if defined(XP_MAC)
294 time_t t, t1, t2;
295 struct tm tmrec;
296
297 uprv_memset( &tmrec, 0, sizeof(tmrec) );
298 tmrec.tm_year = 70;
299 tmrec.tm_mon = 0;
300 tmrec.tm_mday = 1;
301 t1 = mktime(&tmrec); /* seconds of 1/1/1970*/
302
303 time(&t);
304 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
305 t2 = mktime(&tmrec); /* seconds of current GMT*/
306 return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND; /* GMT (or UTC) in seconds since 1970*/
307 #elif defined(U_WINDOWS)
308
309 FileTimeConversion winTime;
310 GetSystemTimeAsFileTime(&winTime.fileTime);
311 return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
312 #else
313
314 #if defined(HAVE_GETTIMEOFDAY)
315 struct timeval posixTime;
316 gettimeofday(&posixTime, NULL);
317 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
318 #else
319 time_t epochtime;
320 time(&epochtime);
321 return (UDate)epochtime * U_MILLIS_PER_SECOND;
322 #endif
323
324 #endif
325 }
326
327 /*-----------------------------------------------------------------------------
328 IEEE 754
329 These methods detect and return NaN and infinity values for doubles
330 conforming to IEEE 754. Platforms which support this standard include X86,
331 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
332 If this doesn't work on your platform, you have non-IEEE floating-point, and
333 will need to code your own versions. A naive implementation is to return 0.0
334 for getNaN and getInfinity, and false for isNaN and isInfinite.
335 ---------------------------------------------------------------------------*/
336
337 U_CAPI UBool U_EXPORT2
338 uprv_isNaN(double number)
339 {
340 #if IEEE_754
341 BitPatternConversion convertedNumber;
342 convertedNumber.d64 = number;
343 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
344 return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
345
346 #elif defined(OS390)
347 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
348 sizeof(uint32_t));
349 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
350 sizeof(uint32_t));
351
352 return ((highBits & 0x7F080000L) == 0x7F080000L) &&
353 (lowBits == 0x00000000L);
354
355 #else
356 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
357 /* you'll need to replace this default implementation with what's correct*/
358 /* for your platform.*/
359 return number != number;
360 #endif
361 }
362
363 U_CAPI UBool U_EXPORT2
364 uprv_isInfinite(double number)
365 {
366 #if IEEE_754
367 BitPatternConversion convertedNumber;
368 convertedNumber.d64 = number;
369 /* Infinity is exactly 0x7FF0000000000000U. */
370 return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
371 #elif defined(OS390)
372 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
373 sizeof(uint32_t));
374 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
375 sizeof(uint32_t));
376
377 return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
378
379 #else
380 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
381 /* value, you'll need to replace this default implementation with what's*/
382 /* correct for your platform.*/
383 return number == (2.0 * number);
384 #endif
385 }
386
387 U_CAPI UBool U_EXPORT2
388 uprv_isPositiveInfinity(double number)
389 {
390 #if IEEE_754 || defined(OS390)
391 return (UBool)(number > 0 && uprv_isInfinite(number));
392 #else
393 return uprv_isInfinite(number);
394 #endif
395 }
396
397 U_CAPI UBool U_EXPORT2
398 uprv_isNegativeInfinity(double number)
399 {
400 #if IEEE_754 || defined(OS390)
401 return (UBool)(number < 0 && uprv_isInfinite(number));
402
403 #else
404 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
405 sizeof(uint32_t));
406 return((highBits & SIGN) && uprv_isInfinite(number));
407
408 #endif
409 }
410
411 U_CAPI double U_EXPORT2
412 uprv_getNaN()
413 {
414 #if IEEE_754 || defined(OS390)
415 return gNan.d64;
416 #else
417 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
418 /* you'll need to replace this default implementation with what's correct*/
419 /* for your platform.*/
420 return 0.0;
421 #endif
422 }
423
424 U_CAPI double U_EXPORT2
425 uprv_getInfinity()
426 {
427 #if IEEE_754 || defined(OS390)
428 return gInf.d64;
429 #else
430 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
431 /* value, you'll need to replace this default implementation with what's*/
432 /* correct for your platform.*/
433 return 0.0;
434 #endif
435 }
436
437 U_CAPI double U_EXPORT2
438 uprv_floor(double x)
439 {
440 return floor(x);
441 }
442
443 U_CAPI double U_EXPORT2
444 uprv_ceil(double x)
445 {
446 return ceil(x);
447 }
448
449 U_CAPI double U_EXPORT2
450 uprv_round(double x)
451 {
452 return uprv_floor(x + 0.5);
453 }
454
455 U_CAPI double U_EXPORT2
456 uprv_fabs(double x)
457 {
458 return fabs(x);
459 }
460
461 U_CAPI double U_EXPORT2
462 uprv_modf(double x, double* y)
463 {
464 return modf(x, y);
465 }
466
467 U_CAPI double U_EXPORT2
468 uprv_fmod(double x, double y)
469 {
470 return fmod(x, y);
471 }
472
473 U_CAPI double U_EXPORT2
474 uprv_pow(double x, double y)
475 {
476 /* This is declared as "double pow(double x, double y)" */
477 return pow(x, y);
478 }
479
480 U_CAPI double U_EXPORT2
481 uprv_pow10(int32_t x)
482 {
483 return pow(10.0, (double)x);
484 }
485
486 U_CAPI double U_EXPORT2
487 uprv_fmax(double x, double y)
488 {
489 #if IEEE_754
490 /* first handle NaN*/
491 if(uprv_isNaN(x) || uprv_isNaN(y))
492 return uprv_getNaN();
493
494 /* check for -0 and 0*/
495 if(x == 0.0 && y == 0.0 && u_signBit(x))
496 return y;
497
498 #endif
499
500 /* this should work for all flt point w/o NaN and Inf special cases */
501 return (x > y ? x : y);
502 }
503
504 U_CAPI double U_EXPORT2
505 uprv_fmin(double x, double y)
506 {
507 #if IEEE_754
508 /* first handle NaN*/
509 if(uprv_isNaN(x) || uprv_isNaN(y))
510 return uprv_getNaN();
511
512 /* check for -0 and 0*/
513 if(x == 0.0 && y == 0.0 && u_signBit(y))
514 return y;
515
516 #endif
517
518 /* this should work for all flt point w/o NaN and Inf special cases */
519 return (x > y ? y : x);
520 }
521
522 /**
523 * Truncates the given double.
524 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
525 * This is different than calling floor() or ceil():
526 * floor(3.3) = 3, floor(-3.3) = -4
527 * ceil(3.3) = 4, ceil(-3.3) = -3
528 */
529 U_CAPI double U_EXPORT2
530 uprv_trunc(double d)
531 {
532 #if IEEE_754
533 /* handle error cases*/
534 if(uprv_isNaN(d))
535 return uprv_getNaN();
536 if(uprv_isInfinite(d))
537 return uprv_getInfinity();
538
539 if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */
540 return ceil(d);
541 else
542 return floor(d);
543
544 #else
545 return d >= 0 ? floor(d) : ceil(d);
546
547 #endif
548 }
549
550 /**
551 * Return the largest positive number that can be represented by an integer
552 * type of arbitrary bit length.
553 */
554 U_CAPI double U_EXPORT2
555 uprv_maxMantissa(void)
556 {
557 return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
558 }
559
560 U_CAPI double U_EXPORT2
561 uprv_log(double d)
562 {
563 return log(d);
564 }
565
566 U_CAPI void * U_EXPORT2
567 uprv_maximumPtr(void * base)
568 {
569 #if defined(OS400)
570 /*
571 * With the provided function we should never be out of range of a given segment
572 * (a traditional/typical segment that is). Our segments have 5 bytes for the
573 * id and 3 bytes for the offset. The key is that the casting takes care of
574 * only retrieving the offset portion minus x1000. Hence, the smallest offset
575 * seen in a program is x001000 and when casted to an int would be 0.
576 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment.
577 *
578 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
579 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information).
580 * This function determines the activation based on the pointer that is passed in and
581 * calculates the appropriate maximum available size for
582 * each pointer type (TERASPACE and non-TERASPACE)
583 *
584 * Unlike other operating systems, the pointer model isn't determined at
585 * compile time on i5/OS.
586 */
587 if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
588 /* if it is a TERASPACE pointer the max is 2GB - 4k */
589 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
590 }
591 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
592 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
593
594 #else
595 return U_MAX_PTR(base);
596 #endif
597 }
598
599 /*---------------------------------------------------------------------------
600 Platform-specific Implementations
601 Try these, and if they don't work on your platform, then special case your
602 platform with new implementations.
603 ---------------------------------------------------------------------------*/
604
605 /* Generic time zone layer -------------------------------------------------- */
606
607 /* Time zone utilities */
608 U_CAPI void U_EXPORT2
609 uprv_tzset()
610 {
611 #ifdef U_TZSET
612 U_TZSET();
613 #else
614 /* no initialization*/
615 #endif
616 }
617
618 U_CAPI int32_t U_EXPORT2
619 uprv_timezone()
620 {
621 #ifdef U_TIMEZONE
622 return U_TIMEZONE;
623 #else
624 time_t t, t1, t2;
625 struct tm tmrec;
626 UBool dst_checked;
627 int32_t tdiff = 0;
628
629 time(&t);
630 uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
631 dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
632 t1 = mktime(&tmrec); /* local time in seconds*/
633 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
634 t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/
635 tdiff = t2 - t1;
636 /* imitate NT behaviour, which returns same timezone offset to GMT for
637 winter and summer*/
638 if (dst_checked)
639 tdiff += 3600;
640 return tdiff;
641 #endif
642 }
643
644 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
645 some platforms need to have it declared here. */
646
647 #if defined(U_TZNAME) && (defined(U_IRIX) || defined(U_DARWIN) || defined(U_CYGWIN))
648 /* RS6000 and others reject char **tzname. */
649 extern U_IMPORT char *U_TZNAME[];
650 #endif
651
652 #if !UCONFIG_NO_FILE_IO && (defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD))
653 /* These platforms are likely to use Olson timezone IDs. */
654 #define CHECK_LOCALTIME_LINK 1
655 #if defined(U_DARWIN)
656 #include <tzfile.h>
657 #define TZZONEINFO (TZDIR "/")
658 #else
659 #define TZDEFAULT "/etc/localtime"
660 #define TZZONEINFO "/usr/share/zoneinfo/"
661 #endif
662 #if U_HAVE_DIRENT_H
663 #define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */
664 /* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
665 symlinked to /etc/localtime, which makes searchForTZFile return
666 'localtime' when it's the first match. */
667 #define TZFILE_SKIP2 "localtime"
668 #define SEARCH_TZFILE
669 #include <dirent.h> /* Needed to search through system timezone files */
670 #endif
671 static char gTimeZoneBuffer[PATH_MAX];
672 static char *gTimeZoneBufferPtr = NULL;
673 #endif
674
675 #ifndef U_WINDOWS
676 #define isNonDigit(ch) (ch < '0' || '9' < ch)
677 static UBool isValidOlsonID(const char *id) {
678 int32_t idx = 0;
679
680 /* Determine if this is something like Iceland (Olson ID)
681 or AST4ADT (non-Olson ID) */
682 while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
683 idx++;
684 }
685
686 /* If we went through the whole string, then it might be okay.
687 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
688 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
689 The rest of the time it could be an Olson ID. George */
690 return (UBool)(id[idx] == 0
691 || uprv_strcmp(id, "PST8PDT") == 0
692 || uprv_strcmp(id, "MST7MDT") == 0
693 || uprv_strcmp(id, "CST6CDT") == 0
694 || uprv_strcmp(id, "EST5EDT") == 0);
695 }
696
697 /* On some Unix-like OS, 'posix' subdirectory in
698 /usr/share/zoneinfo replicates the top-level contents. 'right'
699 subdirectory has the same set of files, but individual files
700 are different from those in the top-level directory or 'posix'
701 because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
702 has files for UTC.
703 When the first match for /etc/localtime is in either of them
704 (usually in posix because 'right' has different file contents),
705 or TZ environment variable points to one of them, createTimeZone
706 fails because, say, 'posix/America/New_York' is not an Olson
707 timezone id ('America/New_York' is). So, we have to skip
708 'posix/' and 'right/' at the beginning. */
709 static void skipZoneIDPrefix(const char** id) {
710 if (uprv_strncmp(*id, "posix/", 6) == 0
711 || uprv_strncmp(*id, "right/", 6) == 0)
712 {
713 *id += 6;
714 }
715 }
716 #endif
717
718 #if defined(U_TZNAME) && !defined(U_WINDOWS)
719
720 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
721 typedef struct OffsetZoneMapping {
722 int32_t offsetSeconds;
723 int32_t daylightType; /* 1=daylight in June, 2=daylight in December*/
724 const char *stdID;
725 const char *dstID;
726 const char *olsonID;
727 } OffsetZoneMapping;
728
729 /*
730 This list tries to disambiguate a set of abbreviated timezone IDs and offsets
731 and maps it to an Olson ID.
732 Before adding anything to this list, take a look at
733 icu/source/tools/tzcode/tz.alias
734 Sometimes no daylight savings (0) is important to define due to aliases.
735 This list can be tested with icu/source/test/compat/tzone.pl
736 More values could be added to daylightType to increase precision.
737 */
738 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
739 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
740 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
741 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
742 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
743 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
744 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
745 {-36000, 2, "EST", "EST", "Australia/Sydney"},
746 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
747 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
748 {-34200, 2, "CST", "CST", "Australia/South"},
749 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
750 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
751 {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
752 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
753 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
754 {-28800, 2, "WST", "WST", "Australia/West"},
755 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
756 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
757 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
758 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
759 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
760 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
761 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
762 {-14400, 1, "AZT", "AZST", "Asia/Baku"},
763 {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
764 {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
765 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
766 {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
767 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
768 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
769 {-3600, 0, "CET", "WEST", "Africa/Algiers"},
770 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
771 {0, 1, "GMT", "IST", "Europe/Dublin"},
772 {0, 1, "GMT", "BST", "Europe/London"},
773 {0, 0, "WET", "WEST", "Africa/Casablanca"},
774 {0, 0, "WET", "WET", "Africa/El_Aaiun"},
775 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
776 {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
777 {10800, 1, "PMST", "PMDT", "America/Miquelon"},
778 {10800, 2, "UYT", "UYST", "America/Montevideo"},
779 {10800, 1, "WGT", "WGST", "America/Godthab"},
780 {10800, 2, "BRT", "BRST", "Brazil/East"},
781 {12600, 1, "NST", "NDT", "America/St_Johns"},
782 {14400, 1, "AST", "ADT", "Canada/Atlantic"},
783 {14400, 2, "AMT", "AMST", "America/Cuiaba"},
784 {14400, 2, "CLT", "CLST", "Chile/Continental"},
785 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
786 {14400, 2, "PYT", "PYST", "America/Asuncion"},
787 {18000, 1, "CST", "CDT", "America/Havana"},
788 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
789 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
790 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
791 {21600, 0, "CST", "CDT", "America/Guatemala"},
792 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
793 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
794 {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
795 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
796 {32400, 1, "AKST", "AKDT", "US/Alaska"},
797 {36000, 1, "HAST", "HADT", "US/Aleutian"}
798 };
799
800 /*#define DEBUG_TZNAME*/
801
802 static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
803 {
804 int32_t idx;
805 #ifdef DEBUG_TZNAME
806 fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
807 #endif
808 for (idx = 0; idx < (int32_t)sizeof(OFFSET_ZONE_MAPPINGS)/sizeof(OFFSET_ZONE_MAPPINGS[0]); idx++)
809 {
810 if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
811 && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
812 && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
813 && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
814 {
815 return OFFSET_ZONE_MAPPINGS[idx].olsonID;
816 }
817 }
818 return NULL;
819 }
820 #endif
821
822 #ifdef SEARCH_TZFILE
823 #define MAX_PATH_SIZE PATH_MAX /* Set the limit for the size of the path. */
824 #define MAX_READ_SIZE 512
825
826 typedef struct DefaultTZInfo {
827 char* defaultTZBuffer;
828 int64_t defaultTZFileSize;
829 FILE* defaultTZFilePtr;
830 UBool defaultTZstatus;
831 int32_t defaultTZPosition;
832 } DefaultTZInfo;
833
834 /*
835 * This method compares the two files given to see if they are a match.
836 * It is currently use to compare two TZ files.
837 */
838 static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
839 FILE* file;
840 int64_t sizeFile;
841 int64_t sizeFileLeft;
842 int32_t sizeFileRead;
843 int32_t sizeFileToRead;
844 char bufferFile[MAX_READ_SIZE];
845 UBool result = TRUE;
846
847 if (tzInfo->defaultTZFilePtr == NULL) {
848 tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
849 }
850 file = fopen(TZFileName, "r");
851
852 tzInfo->defaultTZPosition = 0; /* reset position to begin search */
853
854 if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
855 /* First check that the file size are equal. */
856 if (tzInfo->defaultTZFileSize == 0) {
857 fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
858 tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
859 }
860 fseek(file, 0, SEEK_END);
861 sizeFile = ftell(file);
862 sizeFileLeft = sizeFile;
863
864 if (sizeFile != tzInfo->defaultTZFileSize) {
865 result = FALSE;
866 } else {
867 /* Store the data from the files in seperate buffers and
868 * compare each byte to determine equality.
869 */
870 if (tzInfo->defaultTZBuffer == NULL) {
871 rewind(tzInfo->defaultTZFilePtr);
872 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
873 fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
874 }
875 rewind(file);
876 while(sizeFileLeft > 0) {
877 uprv_memset(bufferFile, 0, MAX_READ_SIZE);
878 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
879
880 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
881 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
882 result = FALSE;
883 break;
884 }
885 sizeFileLeft -= sizeFileRead;
886 tzInfo->defaultTZPosition += sizeFileRead;
887 }
888 }
889 } else {
890 result = FALSE;
891 }
892
893 if (file != NULL) {
894 fclose(file);
895 }
896
897 return result;
898 }
899 /*
900 * This method recursively traverses the directory given for a matching TZ file and returns the first match.
901 */
902 /* dirent also lists two entries: "." and ".." that we can safely ignore. */
903 #define SKIP1 "."
904 #define SKIP2 ".."
905 static char SEARCH_TZFILE_RESULT[MAX_PATH_SIZE] = "";
906 static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
907 char curpath[MAX_PATH_SIZE];
908 DIR* dirp = opendir(path);
909 DIR* subDirp = NULL;
910 struct dirent* dirEntry = NULL;
911
912 char* result = NULL;
913 if (dirp == NULL) {
914 return result;
915 }
916
917 /* Save the current path */
918 uprv_memset(curpath, 0, MAX_PATH_SIZE);
919 uprv_strcpy(curpath, path);
920
921 /* Check each entry in the directory. */
922 while((dirEntry = readdir(dirp)) != NULL) {
923 const char* dirName = dirEntry->d_name;
924 if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) {
925 /* Create a newpath with the new entry to test each entry in the directory. */
926 char newpath[MAX_PATH_SIZE];
927 uprv_strcpy(newpath, curpath);
928 uprv_strcat(newpath, dirName);
929
930 if ((subDirp = opendir(newpath)) != NULL) {
931 /* If this new path is a directory, make a recursive call with the newpath. */
932 closedir(subDirp);
933 uprv_strcat(newpath, "/");
934 result = searchForTZFile(newpath, tzInfo);
935 /*
936 Have to get out here. Otherwise, we'd keep looking
937 and return the first match in the top-level directory
938 if there's a match in the top-level. If not, this function
939 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
940 It worked without this in most cases because we have a fallback of calling
941 localtime_r to figure out the default timezone.
942 */
943 if (result != NULL)
944 break;
945 } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
946 if(compareBinaryFiles(TZDEFAULT, newpath, tzInfo)) {
947 const char* zoneid = newpath + (sizeof(TZZONEINFO)) - 1;
948 skipZoneIDPrefix(&zoneid);
949 uprv_strcpy(SEARCH_TZFILE_RESULT, zoneid);
950 result = SEARCH_TZFILE_RESULT;
951 /* Get out after the first one found. */
952 break;
953 }
954 }
955 }
956 }
957 closedir(dirp);
958 return result;
959 }
960 #endif
961 U_CAPI const char* U_EXPORT2
962 uprv_tzname(int n)
963 {
964 const char *tzid = NULL;
965 #ifdef U_WINDOWS
966 tzid = uprv_detectWindowsTimeZone();
967
968 if (tzid != NULL) {
969 return tzid;
970 }
971 #else
972
973 /*#if defined(U_DARWIN)
974 int ret;
975
976 tzid = getenv("TZFILE");
977 if (tzid != NULL) {
978 return tzid;
979 }
980 #endif*/
981
982 /* This code can be temporarily disabled to test tzname resolution later on. */
983 #ifndef DEBUG_TZNAME
984 tzid = getenv("TZ");
985 if (tzid != NULL && isValidOlsonID(tzid))
986 {
987 /* This might be a good Olson ID. */
988 skipZoneIDPrefix(&tzid);
989 return tzid;
990 }
991 /* else U_TZNAME will give a better result. */
992 #endif
993
994 #if defined(CHECK_LOCALTIME_LINK)
995 /* Caller must handle threading issues */
996 if (gTimeZoneBufferPtr == NULL) {
997 /*
998 This is a trick to look at the name of the link to get the Olson ID
999 because the tzfile contents is underspecified.
1000 This isn't guaranteed to work because it may not be a symlink.
1001 */
1002 int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer));
1003 if (0 < ret) {
1004 int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO);
1005 gTimeZoneBuffer[ret] = 0;
1006 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0
1007 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
1008 {
1009 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
1010 }
1011 } else {
1012 #if defined(SEARCH_TZFILE)
1013 DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
1014 if (tzInfo != NULL) {
1015 tzInfo->defaultTZBuffer = NULL;
1016 tzInfo->defaultTZFileSize = 0;
1017 tzInfo->defaultTZFilePtr = NULL;
1018 tzInfo->defaultTZstatus = FALSE;
1019 tzInfo->defaultTZPosition = 0;
1020
1021 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
1022
1023 /* Free previously allocated memory */
1024 if (tzInfo->defaultTZBuffer != NULL) {
1025 uprv_free(tzInfo->defaultTZBuffer);
1026 }
1027 if (tzInfo->defaultTZFilePtr != NULL) {
1028 fclose(tzInfo->defaultTZFilePtr);
1029 }
1030 uprv_free(tzInfo);
1031 }
1032
1033 if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
1034 return gTimeZoneBufferPtr;
1035 }
1036 #endif
1037 }
1038 }
1039 else {
1040 return gTimeZoneBufferPtr;
1041 }
1042 #endif
1043 #endif
1044
1045 #ifdef U_TZNAME
1046 #ifdef U_WINDOWS
1047 /* The return value is free'd in timezone.cpp on Windows because
1048 * the other code path returns a pointer to a heap location. */
1049 return uprv_strdup(U_TZNAME[n]);
1050 #else
1051 /*
1052 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1053 So we remap the abbreviation to an olson ID.
1054
1055 Since Windows exposes a little more timezone information,
1056 we normally don't use this code on Windows because
1057 uprv_detectWindowsTimeZone should have already given the correct answer.
1058 */
1059 {
1060 struct tm juneSol, decemberSol;
1061 int daylightType;
1062 static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
1063 static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
1064
1065 /* This probing will tell us when daylight savings occurs. */
1066 localtime_r(&juneSolstice, &juneSol);
1067 localtime_r(&decemberSolstice, &decemberSol);
1068 daylightType = ((decemberSol.tm_isdst > 0) << 1) | (juneSol.tm_isdst > 0);
1069 tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
1070 if (tzid != NULL) {
1071 return tzid;
1072 }
1073 }
1074 return U_TZNAME[n];
1075 #endif
1076 #else
1077 return "";
1078 #endif
1079 }
1080
1081 /* Get and set the ICU data directory --------------------------------------- */
1082
1083 static char *gDataDirectory = NULL;
1084 #if U_POSIX_LOCALE
1085 static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
1086 #endif
1087
1088 static UBool U_CALLCONV putil_cleanup(void)
1089 {
1090 if (gDataDirectory && *gDataDirectory) {
1091 uprv_free(gDataDirectory);
1092 }
1093 gDataDirectory = NULL;
1094 #if U_POSIX_LOCALE
1095 if (gCorrectedPOSIXLocale) {
1096 uprv_free(gCorrectedPOSIXLocale);
1097 gCorrectedPOSIXLocale = NULL;
1098 }
1099 #endif
1100 return TRUE;
1101 }
1102
1103 /*
1104 * Set the data directory.
1105 * Make a copy of the passed string, and set the global data dir to point to it.
1106 * TODO: see bug #2849, regarding thread safety.
1107 */
1108 U_CAPI void U_EXPORT2
1109 u_setDataDirectory(const char *directory) {
1110 char *newDataDir;
1111 int32_t length;
1112
1113 if(directory==NULL || *directory==0) {
1114 /* A small optimization to prevent the malloc and copy when the
1115 shared library is used, and this is a way to make sure that NULL
1116 is never returned.
1117 */
1118 newDataDir = (char *)"";
1119 }
1120 else {
1121 length=(int32_t)uprv_strlen(directory);
1122 newDataDir = (char *)uprv_malloc(length + 2);
1123 /* Exit out if newDataDir could not be created. */
1124 if (newDataDir == NULL) {
1125 return;
1126 }
1127 uprv_strcpy(newDataDir, directory);
1128
1129 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1130 {
1131 char *p;
1132 while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
1133 *p = U_FILE_SEP_CHAR;
1134 }
1135 }
1136 #endif
1137 }
1138
1139 umtx_lock(NULL);
1140 if (gDataDirectory && *gDataDirectory) {
1141 uprv_free(gDataDirectory);
1142 }
1143 gDataDirectory = newDataDir;
1144 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1145 umtx_unlock(NULL);
1146 }
1147
1148 U_CAPI UBool U_EXPORT2
1149 uprv_pathIsAbsolute(const char *path)
1150 {
1151 if(!path || !*path) {
1152 return FALSE;
1153 }
1154
1155 if(*path == U_FILE_SEP_CHAR) {
1156 return TRUE;
1157 }
1158
1159 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1160 if(*path == U_FILE_ALT_SEP_CHAR) {
1161 return TRUE;
1162 }
1163 #endif
1164
1165 #if defined(U_WINDOWS)
1166 if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1167 ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1168 path[1] == ':' ) {
1169 return TRUE;
1170 }
1171 #endif
1172
1173 return FALSE;
1174 }
1175
1176 /* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1177 until some client wrapper makefiles are updated */
1178 #if defined(U_DARWIN) && TARGET_IPHONE_SIMULATOR
1179 # if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1180 # define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1181 # endif
1182 #endif
1183
1184 U_CAPI const char * U_EXPORT2
1185 u_getDataDirectory(void) {
1186 const char *path = NULL;
1187 #if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1188 char datadir_path_buffer[PATH_MAX];
1189 #endif
1190
1191 /* if we have the directory, then return it immediately */
1192 UMTX_CHECK(NULL, gDataDirectory, path);
1193
1194 if(path) {
1195 return path;
1196 }
1197
1198 /*
1199 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1200 override ICU's data with the ICU_DATA environment variable. This prevents
1201 problems where multiple custom copies of ICU's specific version of data
1202 are installed on a system. Either the application must define the data
1203 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1204 ICU, set the data with udata_setCommonData or trust that all of the
1205 required data is contained in ICU's data library that contains
1206 the entry point defined by U_ICUDATA_ENTRY_POINT.
1207
1208 There may also be some platforms where environment variables
1209 are not allowed.
1210 */
1211 # if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1212 /* First try to get the environment variable */
1213 path=getenv("ICU_DATA");
1214 # endif
1215
1216 /* ICU_DATA_DIR may be set as a compile option.
1217 * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1218 * and is used only when data is built in archive mode eliminating the need
1219 * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1220 * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1221 * set their own path.
1222 */
1223 #if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
1224 if(path==NULL || *path==0) {
1225 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1226 const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
1227 # endif
1228 # ifdef ICU_DATA_DIR
1229 path=ICU_DATA_DIR;
1230 # else
1231 path=U_ICU_DATA_DEFAULT_DIR;
1232 # endif
1233 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1234 if (prefix != NULL) {
1235 snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path);
1236 path=datadir_path_buffer;
1237 }
1238 # endif
1239 }
1240 #endif
1241
1242 if(path==NULL) {
1243 /* It looks really bad, set it to something. */
1244 path = "";
1245 }
1246
1247 u_setDataDirectory(path);
1248 return gDataDirectory;
1249 }
1250
1251
1252
1253
1254
1255 /* Macintosh-specific locale information ------------------------------------ */
1256 #ifdef XP_MAC
1257
1258 typedef struct {
1259 int32_t script;
1260 int32_t region;
1261 int32_t lang;
1262 int32_t date_region;
1263 const char* posixID;
1264 } mac_lc_rec;
1265
1266 /* Todo: This will be updated with a newer version from www.unicode.org web
1267 page when it's available.*/
1268 #define MAC_LC_MAGIC_NUMBER -5
1269 #define MAC_LC_INIT_NUMBER -9
1270
1271 static const mac_lc_rec mac_lc_recs[] = {
1272 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US",
1273 /* United States*/
1274 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR",
1275 /* France*/
1276 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB",
1277 /* Great Britain*/
1278 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE",
1279 /* Germany*/
1280 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT",
1281 /* Italy*/
1282 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL",
1283 /* Metherlands*/
1284 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE",
1285 /* French for Belgium or Lxembourg*/
1286 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE",
1287 /* Sweden*/
1288 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK",
1289 /* Denmark*/
1290 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT",
1291 /* Portugal*/
1292 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA",
1293 /* French Canada*/
1294 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS",
1295 /* Israel*/
1296 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP",
1297 /* Japan*/
1298 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU",
1299 /* Australia*/
1300 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE",
1301 /* the Arabic world (?)*/
1302 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI",
1303 /* Finland*/
1304 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH",
1305 /* French for Switzerland*/
1306 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH",
1307 /* German for Switzerland*/
1308 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR",
1309 /* Greece*/
1310 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS",
1311 /* Iceland ===*/
1312 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
1313 /* Malta ===*/
1314 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
1315 /* Cyprus ===*/
1316 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR",
1317 /* Turkey ===*/
1318 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU",
1319 /* Croatian system for Yugoslavia*/
1320 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
1321 /* Hindi system for India*/
1322 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
1323 /* Pakistan*/
1324 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT",
1325 /* Lithuania*/
1326 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL",
1327 /* Poland*/
1328 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU",
1329 /* Hungary*/
1330 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE",
1331 /* Estonia*/
1332 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV",
1333 /* Latvia*/
1334 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
1335 /* Lapland [Ask Rich for the data. HS]*/
1336 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
1337 /* Faeroe Islands*/
1338 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR",
1339 /* Iran*/
1340 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU",
1341 /* Russia*/
1342 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE",
1343 /* Ireland*/
1344 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR",
1345 /* Korea*/
1346 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN",
1347 /* People's Republic of China*/
1348 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW",
1349 /* Taiwan*/
1350 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH",
1351 /* Thailand*/
1352
1353 /* fallback is en_US*/
1354 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER,
1355 MAC_LC_MAGIC_NUMBER, "en_US"
1356 };
1357
1358 #endif
1359
1360 #if U_POSIX_LOCALE
1361 /* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1362 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1363 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1364 */
1365 static const char *uprv_getPOSIXIDForCategory(int category)
1366 {
1367 const char* posixID = NULL;
1368 if (category == LC_MESSAGES || category == LC_CTYPE) {
1369 /*
1370 * On Solaris two different calls to setlocale can result in
1371 * different values. Only get this value once.
1372 *
1373 * We must check this first because an application can set this.
1374 *
1375 * LC_ALL can't be used because it's platform dependent. The LANG
1376 * environment variable seems to affect LC_CTYPE variable by default.
1377 * Here is what setlocale(LC_ALL, NULL) can return.
1378 * HPUX can return 'C C C C C C C'
1379 * Solaris can return /en_US/C/C/C/C/C on the second try.
1380 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1381 *
1382 * The default codepage detection also needs to use LC_CTYPE.
1383 *
1384 * Do not call setlocale(LC_*, "")! Using an empty string instead
1385 * of NULL, will modify the libc behavior.
1386 */
1387 posixID = setlocale(category, NULL);
1388 if ((posixID == 0)
1389 || (uprv_strcmp("C", posixID) == 0)
1390 || (uprv_strcmp("POSIX", posixID) == 0))
1391 {
1392 /* Maybe we got some garbage. Try something more reasonable */
1393 posixID = getenv("LC_ALL");
1394 if (posixID == 0) {
1395 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1396 if (posixID == 0) {
1397 posixID = getenv("LANG");
1398 }
1399 }
1400 }
1401 }
1402 if ((posixID==0)
1403 || (uprv_strcmp("C", posixID) == 0)
1404 || (uprv_strcmp("POSIX", posixID) == 0))
1405 {
1406 /* Nothing worked. Give it a nice POSIX default value. */
1407 posixID = "en_US_POSIX";
1408 }
1409 return posixID;
1410 }
1411
1412 /* Return just the POSIX id for the default locale, whatever happens to be in
1413 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1414 */
1415 static const char *uprv_getPOSIXIDForDefaultLocale(void)
1416 {
1417 static const char* posixID = NULL;
1418 if (posixID == 0) {
1419 posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
1420 }
1421 return posixID;
1422 }
1423
1424 /* Return just the POSIX id for the default codepage, whatever happens to be in
1425 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1426 */
1427 static const char *uprv_getPOSIXIDForDefaultCodepage(void)
1428 {
1429 static const char* posixID = NULL;
1430 if (posixID == 0) {
1431 posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
1432 }
1433 return posixID;
1434 }
1435 #endif
1436
1437 /* NOTE: The caller should handle thread safety */
1438 U_CAPI const char* U_EXPORT2
1439 uprv_getDefaultLocaleID()
1440 {
1441 #if U_POSIX_LOCALE
1442 /*
1443 Note that: (a '!' means the ID is improper somehow)
1444 LC_ALL ----> default_loc codepage
1445 --------------------------------------------------------
1446 ab.CD ab CD
1447 ab@CD ab__CD -
1448 ab@CD.EF ab__CD EF
1449
1450 ab_CD.EF@GH ab_CD_GH EF
1451
1452 Some 'improper' ways to do the same as above:
1453 ! ab_CD@GH.EF ab_CD_GH EF
1454 ! ab_CD.EF@GH.IJ ab_CD_GH EF
1455 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
1456
1457 _CD@GH _CD_GH -
1458 _CD.EF@GH _CD_GH EF
1459
1460 The variant cannot have dots in it.
1461 The 'rightmost' variant (@xxx) wins.
1462 The leftmost codepage (.xxx) wins.
1463 */
1464 char *correctedPOSIXLocale = 0;
1465 const char* posixID = uprv_getPOSIXIDForDefaultLocale();
1466 const char *p;
1467 const char *q;
1468 int32_t len;
1469
1470 /* Format: (no spaces)
1471 ll [ _CC ] [ . MM ] [ @ VV]
1472
1473 l = lang, C = ctry, M = charmap, V = variant
1474 */
1475
1476 if (gCorrectedPOSIXLocale != NULL) {
1477 return gCorrectedPOSIXLocale;
1478 }
1479
1480 if ((p = uprv_strchr(posixID, '.')) != NULL) {
1481 /* assume new locale can't be larger than old one? */
1482 correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1483 /* Exit on memory allocation error. */
1484 if (correctedPOSIXLocale == NULL) {
1485 return NULL;
1486 }
1487 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1488 correctedPOSIXLocale[p-posixID] = 0;
1489
1490 /* do not copy after the @ */
1491 if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
1492 correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
1493 }
1494 }
1495
1496 /* Note that we scan the *uncorrected* ID. */
1497 if ((p = uprv_strrchr(posixID, '@')) != NULL) {
1498 if (correctedPOSIXLocale == NULL) {
1499 correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1500 /* Exit on memory allocation error. */
1501 if (correctedPOSIXLocale == NULL) {
1502 return NULL;
1503 }
1504 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1505 correctedPOSIXLocale[p-posixID] = 0;
1506 }
1507 p++;
1508
1509 /* Take care of any special cases here.. */
1510 if (!uprv_strcmp(p, "nynorsk")) {
1511 p = "NY";
1512 /* Don't worry about no__NY. In practice, it won't appear. */
1513 }
1514
1515 if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
1516 uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
1517 }
1518 else {
1519 uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1520 }
1521
1522 if ((q = uprv_strchr(p, '.')) != NULL) {
1523 /* How big will the resulting string be? */
1524 len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1525 uprv_strncat(correctedPOSIXLocale, p, q-p);
1526 correctedPOSIXLocale[len] = 0;
1527 }
1528 else {
1529 /* Anything following the @ sign */
1530 uprv_strcat(correctedPOSIXLocale, p);
1531 }
1532
1533 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1534 * How about 'russian' -> 'ru'?
1535 * Many of the other locales using ISO codes will be handled by the
1536 * canonicalization functions in uloc_getDefault.
1537 */
1538 }
1539
1540 /* Was a correction made? */
1541 if (correctedPOSIXLocale != NULL) {
1542 posixID = correctedPOSIXLocale;
1543 }
1544 else {
1545 /* copy it, just in case the original pointer goes away. See j2395 */
1546 correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
1547 /* Exit on memory allocation error. */
1548 if (correctedPOSIXLocale == NULL) {
1549 return NULL;
1550 }
1551 posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
1552 }
1553
1554 if (gCorrectedPOSIXLocale == NULL) {
1555 gCorrectedPOSIXLocale = correctedPOSIXLocale;
1556 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1557 correctedPOSIXLocale = NULL;
1558 }
1559
1560 if (correctedPOSIXLocale != NULL) { /* Was already set - clean up. */
1561 uprv_free(correctedPOSIXLocale);
1562 }
1563
1564 return posixID;
1565
1566 #elif defined(U_WINDOWS)
1567 UErrorCode status = U_ZERO_ERROR;
1568 LCID id = GetThreadLocale();
1569 const char* locID = uprv_convertToPosix(id, &status);
1570
1571 if (U_FAILURE(status)) {
1572 locID = "en_US";
1573 }
1574 return locID;
1575
1576 #elif defined(XP_MAC)
1577 int32_t script = MAC_LC_INIT_NUMBER;
1578 /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
1579 int32_t region = MAC_LC_INIT_NUMBER;
1580 /* = GetScriptManagerVariable(smRegionCode);*/
1581 int32_t lang = MAC_LC_INIT_NUMBER;
1582 /* = GetScriptManagerVariable(smScriptLang);*/
1583 int32_t date_region = MAC_LC_INIT_NUMBER;
1584 const char* posixID = 0;
1585 int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec);
1586 int32_t i;
1587 Intl1Hndl ih;
1588
1589 ih = (Intl1Hndl) GetIntlResource(1);
1590 if (ih)
1591 date_region = ((uint16_t)(*ih)->intl1Vers) >> 8;
1592
1593 for (i = 0; i < count; i++) {
1594 if ( ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER)
1595 || (mac_lc_recs[i].script == script))
1596 && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER)
1597 || (mac_lc_recs[i].region == region))
1598 && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER)
1599 || (mac_lc_recs[i].lang == lang))
1600 && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER)
1601 || (mac_lc_recs[i].date_region == date_region))
1602 )
1603 {
1604 posixID = mac_lc_recs[i].posixID;
1605 break;
1606 }
1607 }
1608
1609 return posixID;
1610
1611 #elif defined(OS400)
1612 /* locales are process scoped and are by definition thread safe */
1613 static char correctedLocale[64];
1614 const char *localeID = getenv("LC_ALL");
1615 char *p;
1616
1617 if (localeID == NULL)
1618 localeID = getenv("LANG");
1619 if (localeID == NULL)
1620 localeID = setlocale(LC_ALL, NULL);
1621 /* Make sure we have something... */
1622 if (localeID == NULL)
1623 return "en_US_POSIX";
1624
1625 /* Extract the locale name from the path. */
1626 if((p = uprv_strrchr(localeID, '/')) != NULL)
1627 {
1628 /* Increment p to start of locale name. */
1629 p++;
1630 localeID = p;
1631 }
1632
1633 /* Copy to work location. */
1634 uprv_strcpy(correctedLocale, localeID);
1635
1636 /* Strip off the '.locale' extension. */
1637 if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1638 *p = 0;
1639 }
1640
1641 /* Upper case the locale name. */
1642 T_CString_toUpperCase(correctedLocale);
1643
1644 /* See if we are using the POSIX locale. Any of the
1645 * following are equivalent and use the same QLGPGCMA
1646 * (POSIX) locale.
1647 * QLGPGCMA2 means UCS2
1648 * QLGPGCMA_4 means UTF-32
1649 * QLGPGCMA_8 means UTF-8
1650 */
1651 if ((uprv_strcmp("C", correctedLocale) == 0) ||
1652 (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1653 (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1654 {
1655 uprv_strcpy(correctedLocale, "en_US_POSIX");
1656 }
1657 else
1658 {
1659 int16_t LocaleLen;
1660
1661 /* Lower case the lang portion. */
1662 for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1663 {
1664 *p = uprv_tolower(*p);
1665 }
1666
1667 /* Adjust for Euro. After '_E' add 'URO'. */
1668 LocaleLen = uprv_strlen(correctedLocale);
1669 if (correctedLocale[LocaleLen - 2] == '_' &&
1670 correctedLocale[LocaleLen - 1] == 'E')
1671 {
1672 uprv_strcat(correctedLocale, "URO");
1673 }
1674
1675 /* If using Lotus-based locale then convert to
1676 * equivalent non Lotus.
1677 */
1678 else if (correctedLocale[LocaleLen - 2] == '_' &&
1679 correctedLocale[LocaleLen - 1] == 'L')
1680 {
1681 correctedLocale[LocaleLen - 2] = 0;
1682 }
1683
1684 /* There are separate simplified and traditional
1685 * locales called zh_HK_S and zh_HK_T.
1686 */
1687 else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1688 {
1689 uprv_strcpy(correctedLocale, "zh_HK");
1690 }
1691
1692 /* A special zh_CN_GBK locale...
1693 */
1694 else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1695 {
1696 uprv_strcpy(correctedLocale, "zh_CN");
1697 }
1698
1699 }
1700
1701 return correctedLocale;
1702 #endif
1703
1704 }
1705
1706 #if !U_CHARSET_IS_UTF8
1707 #if U_POSIX_LOCALE
1708 /*
1709 Due to various platform differences, one platform may specify a charset,
1710 when they really mean a different charset. Remap the names so that they are
1711 compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1712 here. Before adding anything to this function, please consider adding unique
1713 names to the ICU alias table in the data directory.
1714 */
1715 static const char*
1716 remapPlatformDependentCodepage(const char *locale, const char *name) {
1717 if (locale != NULL && *locale == 0) {
1718 /* Make sure that an empty locale is handled the same way. */
1719 locale = NULL;
1720 }
1721 if (name == NULL) {
1722 return NULL;
1723 }
1724 #if defined(U_AIX)
1725 if (uprv_strcmp(name, "IBM-943") == 0) {
1726 /* Use the ASCII compatible ibm-943 */
1727 name = "Shift-JIS";
1728 }
1729 else if (uprv_strcmp(name, "IBM-1252") == 0) {
1730 /* Use the windows-1252 that contains the Euro */
1731 name = "IBM-5348";
1732 }
1733 #elif defined(U_SOLARIS)
1734 if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1735 /* Solaris underspecifies the "EUC" name. */
1736 if (uprv_strcmp(locale, "zh_CN") == 0) {
1737 name = "EUC-CN";
1738 }
1739 else if (uprv_strcmp(locale, "zh_TW") == 0) {
1740 name = "EUC-TW";
1741 }
1742 else if (uprv_strcmp(locale, "ko_KR") == 0) {
1743 name = "EUC-KR";
1744 }
1745 }
1746 else if (uprv_strcmp(name, "eucJP") == 0) {
1747 /*
1748 ibm-954 is the best match.
1749 ibm-33722 is the default for eucJP (similar to Windows).
1750 */
1751 name = "eucjis";
1752 }
1753 else if (uprv_strcmp(name, "646") == 0) {
1754 /*
1755 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
1756 * ISO-8859-1 instead of US-ASCII(646).
1757 */
1758 name = "ISO-8859-1";
1759 }
1760 #elif defined(U_DARWIN)
1761 if (locale == NULL && *name == 0) {
1762 /*
1763 No locale was specified, and an empty name was passed in.
1764 This usually indicates that nl_langinfo didn't return valid information.
1765 Mac OS X uses UTF-8 by default (especially the locale data and console).
1766 */
1767 name = "UTF-8";
1768 }
1769 else if (uprv_strcmp(name, "CP949") == 0) {
1770 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1771 name = "EUC-KR";
1772 }
1773 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
1774 /*
1775 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1776 */
1777 name = "UTF-8";
1778 }
1779 #elif defined(U_BSD)
1780 if (uprv_strcmp(name, "CP949") == 0) {
1781 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1782 name = "EUC-KR";
1783 }
1784 #elif defined(U_HPUX)
1785 if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
1786 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
1787 /* zh_TW.big5 is not the same charset as zh_HK.big5! */
1788 name = "hkbig5";
1789 }
1790 else if (uprv_strcmp(name, "eucJP") == 0) {
1791 /*
1792 ibm-1350 is the best match, but unavailable.
1793 ibm-954 is mostly a superset of ibm-1350.
1794 ibm-33722 is the default for eucJP (similar to Windows).
1795 */
1796 name = "eucjis";
1797 }
1798 #elif defined(U_LINUX)
1799 if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
1800 /* Linux underspecifies the "EUC" name. */
1801 if (uprv_strcmp(locale, "korean") == 0) {
1802 name = "EUC-KR";
1803 }
1804 else if (uprv_strcmp(locale, "japanese") == 0) {
1805 /* See comment below about eucJP */
1806 name = "eucjis";
1807 }
1808 }
1809 else if (uprv_strcmp(name, "eucjp") == 0) {
1810 /*
1811 ibm-1350 is the best match, but unavailable.
1812 ibm-954 is mostly a superset of ibm-1350.
1813 ibm-33722 is the default for eucJP (similar to Windows).
1814 */
1815 name = "eucjis";
1816 }
1817 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
1818 (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
1819 /*
1820 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1821 */
1822 name = "UTF-8";
1823 }
1824 /*
1825 * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
1826 * it by falling back to 'US-ASCII' when NULL is returned from this
1827 * function. So, we don't have to worry about it here.
1828 */
1829 #endif
1830 /* return NULL when "" is passed in */
1831 if (*name == 0) {
1832 name = NULL;
1833 }
1834 return name;
1835 }
1836
1837 static const char*
1838 getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
1839 {
1840 char localeBuf[100];
1841 const char *name = NULL;
1842 char *variant = NULL;
1843
1844 if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
1845 size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
1846 uprv_strncpy(localeBuf, localeName, localeCapacity);
1847 localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
1848 name = uprv_strncpy(buffer, name+1, buffCapacity);
1849 buffer[buffCapacity-1] = 0; /* ensure NULL termination */
1850 if ((variant = (uprv_strchr(name, '@'))) != NULL) {
1851 *variant = 0;
1852 }
1853 name = remapPlatformDependentCodepage(localeBuf, name);
1854 }
1855 return name;
1856 }
1857 #endif
1858
1859 static const char*
1860 int_getDefaultCodepage()
1861 {
1862 #if defined(OS400)
1863 uint32_t ccsid = 37; /* Default to ibm-37 */
1864 static char codepage[64];
1865 Qwc_JOBI0400_t jobinfo;
1866 Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
1867
1868 EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
1869 "* ", " ", &error);
1870
1871 if (error.Bytes_Available == 0) {
1872 if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
1873 ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
1874 }
1875 else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
1876 ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
1877 }
1878 /* else use the default */
1879 }
1880 sprintf(codepage,"ibm-%d", ccsid);
1881 return codepage;
1882
1883 #elif defined(OS390)
1884 static char codepage[64];
1885
1886 strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
1887 strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
1888 codepage[63] = 0; /* NULL terminate */
1889
1890 return codepage;
1891
1892 #elif defined(XP_MAC)
1893 return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
1894
1895 #elif defined(U_WINDOWS)
1896 static char codepage[64];
1897 sprintf(codepage, "windows-%d", GetACP());
1898 return codepage;
1899
1900 #elif U_POSIX_LOCALE
1901 static char codesetName[100];
1902 const char *localeName = NULL;
1903 const char *name = NULL;
1904
1905 localeName = uprv_getPOSIXIDForDefaultCodepage();
1906 uprv_memset(codesetName, 0, sizeof(codesetName));
1907 #if U_HAVE_NL_LANGINFO_CODESET
1908 /* When available, check nl_langinfo first because it usually gives more
1909 useful names. It depends on LC_CTYPE.
1910 nl_langinfo may use the same buffer as setlocale. */
1911 {
1912 const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
1913 #if defined(U_DARWIN) || defined(U_LINUX)
1914 /*
1915 * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
1916 * instead of ASCII.
1917 */
1918 if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
1919 codeset = remapPlatformDependentCodepage(localeName, codeset);
1920 } else
1921 #endif
1922 {
1923 codeset = remapPlatformDependentCodepage(NULL, codeset);
1924 }
1925
1926 if (codeset != NULL) {
1927 uprv_strncpy(codesetName, codeset, sizeof(codesetName));
1928 codesetName[sizeof(codesetName)-1] = 0;
1929 return codesetName;
1930 }
1931 }
1932 #endif
1933
1934 /* Use setlocale in a nice way, and then check some environment variables.
1935 Maybe the application used setlocale already.
1936 */
1937 uprv_memset(codesetName, 0, sizeof(codesetName));
1938 name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
1939 if (name) {
1940 /* if we can find the codeset name from setlocale, return that. */
1941 return name;
1942 }
1943
1944 if (*codesetName == 0)
1945 {
1946 /* Everything failed. Return US ASCII (ISO 646). */
1947 (void)uprv_strcpy(codesetName, "US-ASCII");
1948 }
1949 return codesetName;
1950 #else
1951 return "US-ASCII";
1952 #endif
1953 }
1954
1955
1956 U_CAPI const char* U_EXPORT2
1957 uprv_getDefaultCodepage()
1958 {
1959 static char const *name = NULL;
1960 umtx_lock(NULL);
1961 if (name == NULL) {
1962 name = int_getDefaultCodepage();
1963 }
1964 umtx_unlock(NULL);
1965 return name;
1966 }
1967 #endif /* !U_CHARSET_IS_UTF8 */
1968
1969
1970 /* end of platform-specific implementation -------------- */
1971
1972 /* version handling --------------------------------------------------------- */
1973
1974 U_CAPI void U_EXPORT2
1975 u_versionFromString(UVersionInfo versionArray, const char *versionString) {
1976 char *end;
1977 uint16_t part=0;
1978
1979 if(versionArray==NULL) {
1980 return;
1981 }
1982
1983 if(versionString!=NULL) {
1984 for(;;) {
1985 versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
1986 if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
1987 break;
1988 }
1989 versionString=end+1;
1990 }
1991 }
1992
1993 while(part<U_MAX_VERSION_LENGTH) {
1994 versionArray[part++]=0;
1995 }
1996 }
1997
1998 U_CAPI void U_EXPORT2
1999 u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
2000 if(versionArray!=NULL && versionString!=NULL) {
2001 char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
2002 int32_t len = u_strlen(versionString);
2003 if(len>U_MAX_VERSION_STRING_LENGTH) {
2004 len = U_MAX_VERSION_STRING_LENGTH;
2005 }
2006 u_UCharsToChars(versionString, versionChars, len);
2007 versionChars[len]=0;
2008 u_versionFromString(versionArray, versionChars);
2009 }
2010 }
2011
2012 U_CAPI void U_EXPORT2
2013 u_versionToString(UVersionInfo versionArray, char *versionString) {
2014 uint16_t count, part;
2015 uint8_t field;
2016
2017 if(versionString==NULL) {
2018 return;
2019 }
2020
2021 if(versionArray==NULL) {
2022 versionString[0]=0;
2023 return;
2024 }
2025
2026 /* count how many fields need to be written */
2027 for(count=4; count>0 && versionArray[count-1]==0; --count) {
2028 }
2029
2030 if(count <= 1) {
2031 count = 2;
2032 }
2033
2034 /* write the first part */
2035 /* write the decimal field value */
2036 field=versionArray[0];
2037 if(field>=100) {
2038 *versionString++=(char)('0'+field/100);
2039 field%=100;
2040 }
2041 if(field>=10) {
2042 *versionString++=(char)('0'+field/10);
2043 field%=10;
2044 }
2045 *versionString++=(char)('0'+field);
2046
2047 /* write the following parts */
2048 for(part=1; part<count; ++part) {
2049 /* write a dot first */
2050 *versionString++=U_VERSION_DELIMITER;
2051
2052 /* write the decimal field value */
2053 field=versionArray[part];
2054 if(field>=100) {
2055 *versionString++=(char)('0'+field/100);
2056 field%=100;
2057 }
2058 if(field>=10) {
2059 *versionString++=(char)('0'+field/10);
2060 field%=10;
2061 }
2062 *versionString++=(char)('0'+field);
2063 }
2064
2065 /* NUL-terminate */
2066 *versionString=0;
2067 }
2068
2069 U_CAPI void U_EXPORT2
2070 u_getVersion(UVersionInfo versionArray) {
2071 u_versionFromString(versionArray, U_ICU_VERSION);
2072 }
2073
2074 /**
2075 * icucfg.h dependent code
2076 */
2077
2078 #if U_ENABLE_DYLOAD
2079
2080 #if defined(U_CHECK_DYLOAD)
2081
2082 #if defined(HAVE_DLOPEN)
2083
2084 #ifdef HAVE_DLFCN_H
2085 #ifdef __MVS__
2086 #ifndef __SUSV3
2087 #define __SUSV3 1
2088 #endif
2089 #endif
2090 #include <dlfcn.h>
2091 #endif
2092
2093 U_INTERNAL void * U_EXPORT2
2094 uprv_dl_open(const char *libName, UErrorCode *status) {
2095 void *ret = NULL;
2096 if(U_FAILURE(*status)) return ret;
2097 ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
2098 if(ret==NULL) {
2099 #ifndef U_TRACE_DYLOAD
2100 perror("dlopen");
2101 #endif
2102 *status = U_MISSING_RESOURCE_ERROR;
2103 }
2104 return ret;
2105 }
2106
2107 U_INTERNAL void U_EXPORT2
2108 uprv_dl_close(void *lib, UErrorCode *status) {
2109 if(U_FAILURE(*status)) return;
2110 dlclose(lib);
2111 }
2112
2113 U_INTERNAL void* U_EXPORT2
2114 uprv_dl_sym(void *lib, const char* sym, UErrorCode *status) {
2115 void *ret = NULL;
2116 if(U_FAILURE(*status)) return ret;
2117 ret = dlsym(lib, sym);
2118 if(ret == NULL) {
2119 *status = U_MISSING_RESOURCE_ERROR;
2120 }
2121 return ret;
2122 }
2123
2124 #else
2125
2126 /* null (nonexistent) implementation. */
2127
2128 U_INTERNAL void * U_EXPORT2
2129 uprv_dl_open(const char *libName, UErrorCode *status) {
2130 if(U_FAILURE(*status)) return NULL;
2131 *status = U_UNSUPPORTED_ERROR;
2132 return NULL;
2133 }
2134
2135 U_INTERNAL void U_EXPORT2
2136 uprv_dl_close(void *lib, UErrorCode *status) {
2137 if(U_FAILURE(*status)) return;
2138 *status = U_UNSUPPORTED_ERROR;
2139 return;
2140 }
2141
2142
2143 U_INTERNAL void* U_EXPORT2
2144 uprv_dl_sym(void *lib, const char* sym, UErrorCode *status) {
2145 if(U_FAILURE(*status)) return NULL;
2146 *status = U_UNSUPPORTED_ERROR;
2147 return NULL;
2148 }
2149
2150
2151
2152 #endif
2153
2154 #elif defined U_WINDOWS
2155
2156 U_INTERNAL void * U_EXPORT2
2157 uprv_dl_open(const char *libName, UErrorCode *status) {
2158 HMODULE lib = NULL;
2159
2160 if(U_FAILURE(*status)) return NULL;
2161
2162 lib = LoadLibrary(libName);
2163
2164 if(lib==NULL) {
2165 *status = U_MISSING_RESOURCE_ERROR;
2166 }
2167
2168 return (void*)lib;
2169 }
2170
2171 U_INTERNAL void U_EXPORT2
2172 uprv_dl_close(void *lib, UErrorCode *status) {
2173 HMODULE handle = (HMODULE)lib;
2174 if(U_FAILURE(*status)) return;
2175
2176 FreeLibrary(handle);
2177
2178 return;
2179 }
2180
2181
2182 U_INTERNAL void* U_EXPORT2
2183 uprv_dl_sym(void *lib, const char* sym, UErrorCode *status) {
2184 HMODULE handle = (HMODULE)lib;
2185 void * addr = NULL;
2186
2187 if(U_FAILURE(*status) || lib==NULL) return NULL;
2188
2189 addr = GetProcAddress(handle, sym);
2190
2191 if(addr==NULL) {
2192 DWORD lastError = GetLastError();
2193 if(lastError == ERROR_PROC_NOT_FOUND) {
2194 *status = U_MISSING_RESOURCE_ERROR;
2195 } else {
2196 *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
2197 }
2198 }
2199
2200 return addr;
2201 }
2202
2203
2204 #else
2205
2206 /* No dynamic loading set. */
2207
2208 U_INTERNAL void * U_EXPORT2
2209 uprv_dl_open(const char *libName, UErrorCode *status) {
2210 if(U_FAILURE(*status)) return NULL;
2211 *status = U_UNSUPPORTED_ERROR;
2212 return NULL;
2213 }
2214
2215 U_INTERNAL void U_EXPORT2
2216 uprv_dl_close(void *lib, UErrorCode *status) {
2217 if(U_FAILURE(*status)) return;
2218 *status = U_UNSUPPORTED_ERROR;
2219 return;
2220 }
2221
2222
2223 U_INTERNAL void* U_EXPORT2
2224 uprv_dl_sym(void *lib, const char* sym, UErrorCode *status) {
2225 if(U_FAILURE(*status)) return NULL;
2226 *status = U_UNSUPPORTED_ERROR;
2227 return NULL;
2228 }
2229
2230
2231 #endif
2232
2233 #endif /* U_ENABLE_DYLOAD */
2234
2235 /*
2236 * Hey, Emacs, please set the following:
2237 *
2238 * Local Variables:
2239 * indent-tabs-mode: nil
2240 * End:
2241 *
2242 */