]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/putil.c
ICU-6.2.8.tar.gz
[apple/icu.git] / icuSources / common / putil.c
1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 1997-2004, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 *
9 * FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
10 *
11 * Date Name Description
12 * 04/14/97 aliu Creation.
13 * 04/24/97 aliu Added getDefaultDataDirectory() and
14 * getDefaultLocaleID().
15 * 04/28/97 aliu Rewritten to assume Unix and apply general methods
16 * for assumed case. Non-UNIX platforms must be
17 * special-cased. Rewrote numeric methods dealing
18 * with NaN and Infinity to be platform independent
19 * over all IEEE 754 platforms.
20 * 05/13/97 aliu Restored sign of timezone
21 * (semantics are hours West of GMT)
22 * 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
23 * nextDouble..
24 * 07/22/98 stephen Added remainder, max, min, trunc
25 * 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
26 * 08/24/98 stephen Added longBitsFromDouble
27 * 09/08/98 stephen Minor changes for Mac Port
28 * 03/02/99 stephen Removed openFile(). Added AS400 support.
29 * Fixed EBCDIC tables
30 * 04/15/99 stephen Converted to C.
31 * 06/28/99 stephen Removed mutex locking in u_isBigEndian().
32 * 08/04/99 jeffrey R. Added OS/2 changes
33 * 11/15/99 helena Integrated S/390 IEEE support.
34 * 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
35 * 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
36 ******************************************************************************
37 */
38
39 #ifndef PTX
40
41 /* Define _XOPEN_SOURCE for Solaris and friends. */
42 /* NetBSD needs it to be >= 4 */
43 #ifndef _XOPEN_SOURCE
44 #define _XOPEN_SOURCE 4
45 #endif
46
47 /* Define __USE_POSIX and __USE_XOPEN for Linux and glibc. */
48 #ifndef __USE_POSIX
49 #define __USE_POSIX
50 #endif
51 #ifndef __USE_XOPEN
52 #define __USE_XOPEN
53 #endif
54
55 #endif /* PTX */
56
57 /* include ICU headers */
58 #include "unicode/utypes.h"
59 #include "unicode/putil.h"
60 #include "unicode/ustring.h"
61 #include "putilimp.h"
62 #include "uassert.h"
63 #include "umutex.h"
64 #include "cmemory.h"
65 #include "cstring.h"
66 #include "locmap.h"
67 #include "ucln_cmn.h"
68 #include "udataswp.h"
69
70 /* include system headers */
71 #ifdef WIN32
72 # define WIN32_LEAN_AND_MEAN
73 # define VC_EXTRALEAN
74 # define NOUSER
75 # define NOSERVICE
76 # define NOIME
77 # define NOMCX
78 # include <windows.h>
79 #elif defined(U_CYGWIN) && defined(__STRICT_ANSI__)
80 /* tzset isn't defined in strict ANSI on Cygwin. */
81 # undef __STRICT_ANSI__
82 #elif defined(OS2)
83 # define INCL_DOSMISC
84 # define INCL_DOSERRORS
85 # define INCL_DOSMODULEMGR
86 # include <os2.h>
87 #elif defined(OS400)
88 # include <float.h>
89 # include <qusec.h> /* error code structure */
90 # include <qusrjobi.h>
91 # include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
92 #elif defined(XP_MAC)
93 # include <Files.h>
94 # include <IntlResources.h>
95 # include <Script.h>
96 # include <Folders.h>
97 # include <MacTypes.h>
98 # include <TextUtils.h>
99 #elif defined(OS390)
100 #include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
101 #elif defined(U_AIX)
102 #elif defined(U_SOLARIS) || defined(U_LINUX)
103 #elif defined(U_HPUX)
104 #elif defined(U_DARWIN)
105 #include <sys/file.h>
106 #include <sys/param.h>
107 #elif defined(U_QNX)
108 #include <sys/neutrino.h>
109 #endif
110
111 /* Include standard headers. */
112 #include <stdio.h>
113 #include <stdlib.h>
114 #include <string.h>
115 #include <math.h>
116 #include <locale.h>
117 #include <float.h>
118 #include <time.h>
119
120 /*
121 * Only include langinfo.h if we have a way to get the codeset. If we later
122 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
123 *
124 */
125
126 #if U_HAVE_NL_LANGINFO_CODESET
127 #include <langinfo.h>
128 #endif
129
130 /* Define the extension for data files, again... */
131 #define DATA_TYPE "dat"
132
133 /* Leave this copyright notice here! */
134 static const char copyright[] = U_COPYRIGHT_STRING;
135
136 /* floating point implementations ------------------------------------------- */
137
138 /* We return QNAN rather than SNAN*/
139 #define SIGN 0x80000000U
140 #if defined(__GNUC__)
141 /*
142 This is an optimization for when u_topNBytesOfDouble
143 and u_bottomNBytesOfDouble can't be properly optimized by the compiler.
144 */
145 #define USE_64BIT_DOUBLE_OPTIMIZATION 1
146 #else
147 #define USE_64BIT_DOUBLE_OPTIMIZATION 0
148 #endif
149
150 #if USE_64BIT_DOUBLE_OPTIMIZATION
151 /* gcc 3.2 has an optimization bug */
152 static const int64_t gNan64 = 0x7FF8000000000000LL;
153 static const int64_t gInf64 = 0x7FF0000000000000LL;
154 static const double * const fgNan = (const double *)(&gNan64);
155 static const double * const fgInf = (const double *)(&gInf64);
156 #else
157
158 #if IEEE_754
159 #define NAN_TOP ((int16_t)0x7FF8)
160 #define INF_TOP ((int16_t)0x7FF0)
161 #elif defined(OS390)
162 #define NAN_TOP ((int16_t)0x7F08)
163 #define INF_TOP ((int16_t)0x3F00)
164 #endif
165
166 /* statics */
167 static UBool fgNaNInitialized = FALSE;
168 static UBool fgInfInitialized = FALSE;
169 static double gNan;
170 static double gInf;
171 static double * const fgNan = &gNan;
172 static double * const fgInf = &gInf;
173 #endif
174
175 /*---------------------------------------------------------------------------
176 Platform utilities
177 Our general strategy is to assume we're on a POSIX platform. Platforms which
178 are non-POSIX must declare themselves so. The default POSIX implementation
179 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
180 functions).
181 ---------------------------------------------------------------------------*/
182
183 #if defined(_WIN32) || defined(XP_MAC) || defined(OS400) || defined(OS2)
184 # undef U_POSIX_LOCALE
185 #else
186 # define U_POSIX_LOCALE 1
187 #endif
188
189 /* Utilities to get the bits from a double */
190 static char*
191 u_topNBytesOfDouble(double* d, int n)
192 {
193 #if U_IS_BIG_ENDIAN
194 return (char*)d;
195 #else
196 return (char*)(d + 1) - n;
197 #endif
198 }
199
200 static char*
201 u_bottomNBytesOfDouble(double* d, int n)
202 {
203 #if U_IS_BIG_ENDIAN
204 return (char*)(d + 1) - n;
205 #else
206 return (char*)d;
207 #endif
208 }
209
210 /*---------------------------------------------------------------------------
211 Universal Implementations
212 These are designed to work on all platforms. Try these, and if they don't
213 work on your platform, then special case your platform with new
214 implementations.
215 ---------------------------------------------------------------------------*/
216
217 /* Get UTC (GMT) time measured in seconds since 0:00 on 1/1/70.*/
218 U_CAPI UDate U_EXPORT2
219 uprv_getUTCtime()
220 {
221 #ifdef XP_MAC
222 time_t t, t1, t2;
223 struct tm tmrec;
224
225 uprv_memset( &tmrec, 0, sizeof(tmrec) );
226 tmrec.tm_year = 70;
227 tmrec.tm_mon = 0;
228 tmrec.tm_mday = 1;
229 t1 = mktime(&tmrec); /* seconds of 1/1/1970*/
230
231 time(&t);
232 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
233 t2 = mktime(&tmrec); /* seconds of current GMT*/
234 return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND; /* GMT (or UTC) in seconds since 1970*/
235 #else
236 time_t epochtime;
237 time(&epochtime);
238 return (UDate)epochtime * U_MILLIS_PER_SECOND;
239 #endif
240 }
241
242 /*-----------------------------------------------------------------------------
243 IEEE 754
244 These methods detect and return NaN and infinity values for doubles
245 conforming to IEEE 754. Platforms which support this standard include X86,
246 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
247 If this doesn't work on your platform, you have non-IEEE floating-point, and
248 will need to code your own versions. A naive implementation is to return 0.0
249 for getNaN and getInfinity, and false for isNaN and isInfinite.
250 ---------------------------------------------------------------------------*/
251
252 U_CAPI UBool U_EXPORT2
253 uprv_isNaN(double number)
254 {
255 #if IEEE_754
256 #if USE_64BIT_DOUBLE_OPTIMIZATION
257 /* gcc 3.2 has an optimization bug */
258 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
259 return (UBool)(((*((int64_t *)&number)) & U_INT64_MAX) > gInf64);
260
261 #else
262 /* This should work in theory, but it doesn't, so we resort to the more*/
263 /* complicated method below.*/
264 /* return number != number;*/
265
266 /* You can't return number == getNaN() because, by definition, NaN != x for*/
267 /* all x, including NaN (that is, NaN != NaN). So instead, we compare*/
268 /* against the known bit pattern. We must be careful of endianism here.*/
269 /* The pattern we are looking for id:*/
270
271 /* 7FFy yyyy yyyy yyyy (some y non-zero)*/
272
273 /* There are two different kinds of NaN, but we ignore the distinction*/
274 /* here. Note that the y value must be non-zero; if it is zero, then we*/
275 /* have infinity.*/
276
277 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
278 sizeof(uint32_t));
279 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
280 sizeof(uint32_t));
281
282 return (UBool)(((highBits & 0x7FF00000L) == 0x7FF00000L) &&
283 (((highBits & 0x000FFFFFL) != 0) || (lowBits != 0)));
284 #endif
285
286 #elif defined(OS390)
287 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
288 sizeof(uint32_t));
289 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
290 sizeof(uint32_t));
291
292 return ((highBits & 0x7F080000L) == 0x7F080000L) &&
293 (lowBits == 0x00000000L);
294
295 #else
296 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
297 /* you'll need to replace this default implementation with what's correct*/
298 /* for your platform.*/
299 return number != number;
300 #endif
301 }
302
303 U_CAPI UBool U_EXPORT2
304 uprv_isInfinite(double number)
305 {
306 #if IEEE_754
307 #if USE_64BIT_DOUBLE_OPTIMIZATION
308 /* gcc 3.2 has an optimization bug */
309 return (UBool)(((*((int64_t *)&number)) & U_INT64_MAX) == gInf64);
310 #else
311
312 /* We know the top bit is the sign bit, so we mask that off in a copy of */
313 /* the number and compare against infinity. [LIU]*/
314 /* The following approach doesn't work for some reason, so we go ahead and */
315 /* scrutinize the pattern itself. */
316 /* double a = number; */
317 /* *(int8_t*)u_topNBytesOfDouble(&a, 1) &= 0x7F;*/
318 /* return a == uprv_getInfinity();*/
319 /* Instead, We want to see either:*/
320
321 /* 7FF0 0000 0000 0000*/
322 /* FFF0 0000 0000 0000*/
323
324 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
325 sizeof(uint32_t));
326 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
327 sizeof(uint32_t));
328
329 return (UBool)(((highBits & ~SIGN) == 0x7FF00000U) &&
330 (lowBits == 0x00000000U));
331 #endif
332
333 #elif defined(OS390)
334 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
335 sizeof(uint32_t));
336 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
337 sizeof(uint32_t));
338
339 return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
340
341 #else
342 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
343 /* value, you'll need to replace this default implementation with what's*/
344 /* correct for your platform.*/
345 return number == (2.0 * number);
346 #endif
347 }
348
349 U_CAPI UBool U_EXPORT2
350 uprv_isPositiveInfinity(double number)
351 {
352 #if IEEE_754 || defined(OS390)
353 return (UBool)(number > 0 && uprv_isInfinite(number));
354 #else
355 return uprv_isInfinite(number);
356 #endif
357 }
358
359 U_CAPI UBool U_EXPORT2
360 uprv_isNegativeInfinity(double number)
361 {
362 #if IEEE_754 || defined(OS390)
363 return (UBool)(number < 0 && uprv_isInfinite(number));
364
365 #else
366 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
367 sizeof(uint32_t));
368 return((highBits & SIGN) && uprv_isInfinite(number));
369
370 #endif
371 }
372
373 U_CAPI double U_EXPORT2
374 uprv_getNaN()
375 {
376 #if IEEE_754 || defined(OS390)
377 #if !USE_64BIT_DOUBLE_OPTIMIZATION
378 if (!fgNaNInitialized) {
379 /* This variable is always initialized with the same value,
380 so a mutex isn't needed. */
381 int i;
382 int8_t* p = (int8_t*)fgNan;
383 for(i = 0; i < sizeof(double); ++i)
384 *p++ = 0;
385 *(int16_t*)u_topNBytesOfDouble(fgNan, sizeof(NAN_TOP)) = NAN_TOP;
386 fgNaNInitialized = TRUE;
387 }
388 #endif
389 return *fgNan;
390 #else
391 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
392 /* you'll need to replace this default implementation with what's correct*/
393 /* for your platform.*/
394 return 0.0;
395 #endif
396 }
397
398 U_CAPI double U_EXPORT2
399 uprv_getInfinity()
400 {
401 #if IEEE_754 || defined(OS390)
402 #if !USE_64BIT_DOUBLE_OPTIMIZATION
403 if (!fgInfInitialized)
404 {
405 /* This variable is always initialized with the same value,
406 so a mutex isn't needed. */
407 int i;
408 int8_t* p = (int8_t*)fgInf;
409 for(i = 0; i < sizeof(double); ++i)
410 *p++ = 0;
411 *(int16_t*)u_topNBytesOfDouble(fgInf, sizeof(INF_TOP)) = INF_TOP;
412 fgInfInitialized = TRUE;
413 }
414 #endif
415 return *fgInf;
416 #else
417 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
418 /* value, you'll need to replace this default implementation with what's*/
419 /* correct for your platform.*/
420 return 0.0;
421 #endif
422 }
423
424 U_CAPI double U_EXPORT2
425 uprv_floor(double x)
426 {
427 return floor(x);
428 }
429
430 U_CAPI double U_EXPORT2
431 uprv_ceil(double x)
432 {
433 return ceil(x);
434 }
435
436 U_CAPI double U_EXPORT2
437 uprv_round(double x)
438 {
439 return uprv_floor(x + 0.5);
440 }
441
442 U_CAPI double U_EXPORT2
443 uprv_fabs(double x)
444 {
445 return fabs(x);
446 }
447
448 U_CAPI double U_EXPORT2
449 uprv_modf(double x, double* y)
450 {
451 return modf(x, y);
452 }
453
454 U_CAPI double U_EXPORT2
455 uprv_fmod(double x, double y)
456 {
457 return fmod(x, y);
458 }
459
460 U_CAPI double U_EXPORT2
461 uprv_pow(double x, double y)
462 {
463 /* This is declared as "double pow(double x, double y)" */
464 return pow(x, y);
465 }
466
467 U_CAPI double U_EXPORT2
468 uprv_pow10(int32_t x)
469 {
470 return pow(10.0, (double)x);
471 }
472
473 U_CAPI double U_EXPORT2
474 uprv_fmax(double x, double y)
475 {
476 #if IEEE_754
477 int32_t lowBits;
478
479 /* first handle NaN*/
480 if(uprv_isNaN(x) || uprv_isNaN(y))
481 return uprv_getNaN();
482
483 /* check for -0 and 0*/
484 lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&x, sizeof(uint32_t));
485 if(x == 0.0 && y == 0.0 && (lowBits & SIGN))
486 return y;
487
488 #endif
489
490 /* this should work for all flt point w/o NaN and Infpecial cases */
491 return (x > y ? x : y);
492 }
493
494 U_CAPI int32_t U_EXPORT2
495 uprv_max(int32_t x, int32_t y)
496 {
497 return (x > y ? x : y);
498 }
499
500 U_CAPI double U_EXPORT2
501 uprv_fmin(double x, double y)
502 {
503 #if IEEE_754
504 int32_t lowBits;
505
506 /* first handle NaN*/
507 if(uprv_isNaN(x) || uprv_isNaN(y))
508 return uprv_getNaN();
509
510 /* check for -0 and 0*/
511 lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&y, sizeof(uint32_t));
512 if(x == 0.0 && y == 0.0 && (lowBits & SIGN))
513 return y;
514
515 #endif
516
517 /* this should work for all flt point w/o NaN and Inf special cases */
518 return (x > y ? y : x);
519 }
520
521 U_CAPI int32_t U_EXPORT2
522 uprv_min(int32_t x, int32_t y)
523 {
524 return (x > y ? y : x);
525 }
526
527 /**
528 * Truncates the given double.
529 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
530 * This is different than calling floor() or ceil():
531 * floor(3.3) = 3, floor(-3.3) = -4
532 * ceil(3.3) = 4, ceil(-3.3) = -3
533 */
534 U_CAPI double U_EXPORT2
535 uprv_trunc(double d)
536 {
537 #if IEEE_754
538 int32_t lowBits;
539
540 /* handle error cases*/
541 if(uprv_isNaN(d))
542 return uprv_getNaN();
543 if(uprv_isInfinite(d))
544 return uprv_getInfinity();
545
546 lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&d, sizeof(uint32_t));
547 if( (d == 0.0 && (lowBits & SIGN)) || d < 0)
548 return ceil(d);
549 else
550 return floor(d);
551
552 #else
553 return d >= 0 ? floor(d) : ceil(d);
554
555 #endif
556 }
557
558 /**
559 * Return the largest positive number that can be represented by an integer
560 * type of arbitrary bit length.
561 */
562 U_CAPI double U_EXPORT2
563 uprv_maxMantissa(void)
564 {
565 return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
566 }
567
568 /**
569 * Return the floor of the log base 10 of a given double.
570 * This method compensates for inaccuracies which arise naturally when
571 * computing logs, and always give the correct value. The parameter
572 * must be positive and finite.
573 * (Thanks to Alan Liu for supplying this function.)
574 */
575 U_CAPI int16_t U_EXPORT2
576 uprv_log10(double d)
577 {
578 #ifdef OS400
579 /* We don't use the normal implementation because you can't underflow */
580 /* a double otherwise an underflow exception occurs */
581 return log10(d);
582 #else
583 /* The reason this routine is needed is that simply taking the*/
584 /* log and dividing by log10 yields a result which may be off*/
585 /* by 1 due to rounding errors. For example, the naive log10*/
586 /* of 1.0e300 taken this way is 299, rather than 300.*/
587 double alog10 = log(d) / log(10.0);
588 int16_t ailog10 = (int16_t) floor(alog10);
589
590 /* Positive logs could be too small, e.g. 0.99 instead of 1.0*/
591 if (alog10 > 0 && d >= pow(10.0, (double)(ailog10 + 1)))
592 ++ailog10;
593
594 /* Negative logs could be too big, e.g. -0.99 instead of -1.0*/
595 else if (alog10 < 0 && d < pow(10.0, (double)(ailog10)))
596 --ailog10;
597
598 return ailog10;
599 #endif
600 }
601
602 U_CAPI double U_EXPORT2
603 uprv_log(double d)
604 {
605 return log(d);
606 }
607
608 #if 0
609 /* This isn't used. If it's readded, readd putiltst.c tests */
610 U_CAPI int32_t U_EXPORT2
611 uprv_digitsAfterDecimal(double x)
612 {
613 char buffer[20];
614 int32_t numDigits, bytesWritten;
615 char *p = buffer;
616 int32_t ptPos, exponent;
617
618 /* cheat and use the string-format routine to get a string representation*/
619 /* (it handles mathematical inaccuracy better than we can), then find out */
620 /* many characters are to the right of the decimal point */
621 bytesWritten = sprintf(buffer, "%+.9g", x);
622 while (isdigit(*(++p))) {
623 }
624
625 ptPos = (int32_t)(p - buffer);
626 numDigits = (int32_t)(bytesWritten - ptPos - 1);
627
628 /* if the number's string representation is in scientific notation, find */
629 /* the exponent and take it into account*/
630 exponent = 0;
631 p = uprv_strchr(buffer, 'e');
632 if (p != 0) {
633 int16_t expPos = (int16_t)(p - buffer);
634 numDigits -= bytesWritten - expPos;
635 exponent = (int32_t)(atol(p + 1));
636 }
637
638 /* the string representation may still have spurious decimal digits in it, */
639 /* so we cut off at the ninth digit to the right of the decimal, and have */
640 /* to search backward from there to the first non-zero digit*/
641 if (numDigits > 9) {
642 numDigits = 9;
643 while (numDigits > 0 && buffer[ptPos + numDigits] == '0')
644 --numDigits;
645 }
646 numDigits -= exponent;
647 if (numDigits < 0) {
648 return 0;
649 }
650 return numDigits;
651 }
652 #endif
653
654 /*---------------------------------------------------------------------------
655 Platform-specific Implementations
656 Try these, and if they don't work on your platform, then special case your
657 platform with new implementations.
658 ---------------------------------------------------------------------------*/
659
660 /* Win32 time zone detection ------------------------------------------------ */
661
662 #ifdef WIN32
663
664 /*
665 This code attempts to detect the Windows time zone, as set in the
666 Windows Date and Time control panel. It attempts to work on
667 multiple flavors of Windows (9x, Me, NT, 2000, XP) and on localized
668 installs. It works by directly interrogating the registry and
669 comparing the data there with the data returned by the
670 GetTimeZoneInformation API, along with some other strategies. The
671 registry contains time zone data under one of two keys (depending on
672 the flavor of Windows):
673
674 HKLM\SOFTWARE\Microsoft\Windows\CurrentVersion\Time Zones\
675 HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones\
676
677 Under this key are several subkeys, one for each time zone. These
678 subkeys are named "Pacific" on Win9x/Me and "Pacific Standard Time"
679 on WinNT/2k/XP. There are some other wrinkles; see the code for
680 details. The subkey name is NOT LOCALIZED, allowing us to support
681 localized installs.
682
683 Under the subkey are data values. We care about:
684
685 Std Standard time display name, localized
686 TZI Binary block of data
687
688 The TZI data is of particular interest. It contains the offset, two
689 more offsets for standard and daylight time, and the start and end
690 rules. This is the same data returned by the GetTimeZoneInformation
691 API. The API may modify the data on the way out, so we have to be
692 careful, but essentially we do a binary comparison against the TZI
693 blocks of various registry keys. When we find a match, we know what
694 time zone Windows is set to. Since the registry key is not
695 localized, we can then translate the key through a simple table
696 lookup into the corresponding ICU time zone.
697
698 This strategy doesn't always work because there are zones which
699 share an offset and rules, so more than one TZI block will match.
700 For example, both Tokyo and Seoul are at GMT+9 with no DST rules;
701 their TZI blocks are identical. For these cases, we fall back to a
702 name lookup. We attempt to match the display name as stored in the
703 registry for the current zone to the display name stored in the
704 registry for various Windows zones. By comparing the registry data
705 directly we avoid conversion complications.
706
707 Author: Alan Liu
708 Since: ICU 2.6
709 Based on original code by Carl Brown <cbrown@xnetinc.com>
710 */
711
712 /**
713 * Layout of the binary registry data under the "TZI" key.
714 */
715 typedef struct {
716 LONG Bias;
717 LONG StandardBias;
718 LONG DaylightBias; /* Tweaked by GetTimeZoneInformation */
719 SYSTEMTIME StandardDate;
720 SYSTEMTIME DaylightDate;
721 } TZI;
722
723 typedef struct {
724 const char* icuid;
725 const char* winid;
726 } WindowsICUMap;
727
728 /**
729 * Mapping between Windows zone IDs and ICU zone IDs. This list has
730 * been mechanically checked; all zone offsets match (most important)
731 * and city names match the display city names (where possible). The
732 * presence or absence of DST differs in some cases, but this is
733 * acceptable as long as the zone is semantically the same (which has
734 * been manually checked).
735 *
736 * Windows 9x/Me zone IDs are listed as "Pacific" rather than "Pacific
737 * Standard Time", which is seen in NT/2k/XP. This is fixed-up at
738 * runtime as needed. The one exception is "Mexico Standard Time 2",
739 * which is not present on Windows 9x/Me.
740 *
741 * Zones that are not unique under Offset+Rules should be grouped
742 * together for efficiency (see code below). In addition, rules MUST
743 * be grouped so that all zones of a single offset are together.
744 *
745 * Comments list S(tandard) or D(aylight), as declared by Windows,
746 * followed by the display name (data from Windows XP).
747 *
748 * NOTE: Etc/GMT+12 is CORRECT for offset GMT-12:00. Consult
749 * documentation elsewhere for an explanation.
750 */
751 static const WindowsICUMap ZONE_MAP[] = {
752 "Etc/GMT+12", "Dateline", /* S (GMT-12:00) International Date Line West */
753
754 "Pacific/Apia", "Samoa", /* S (GMT-11:00) Midway Island, Samoa */
755
756 "Pacific/Honolulu", "Hawaiian", /* S (GMT-10:00) Hawaii */
757
758 "America/Anchorage", "Alaskan", /* D (GMT-09:00) Alaska */
759
760 "America/Los_Angeles", "Pacific", /* D (GMT-08:00) Pacific Time (US & Canada); Tijuana */
761
762 "America/Phoenix", "US Mountain", /* S (GMT-07:00) Arizona */
763 "America/Denver", "Mountain", /* D (GMT-07:00) Mountain Time (US & Canada) */
764 "America/Chihuahua", "Mexico Standard Time 2", /* D (GMT-07:00) Chihuahua, La Paz, Mazatlan */
765
766 "America/Managua", "Central America", /* S (GMT-06:00) Central America */
767 "America/Regina", "Canada Central", /* S (GMT-06:00) Saskatchewan */
768 "America/Mexico_City", "Mexico", /* D (GMT-06:00) Guadalajara, Mexico City, Monterrey */
769 "America/Chicago", "Central", /* D (GMT-06:00) Central Time (US & Canada) */
770
771 "America/Indianapolis", "US Eastern", /* S (GMT-05:00) Indiana (East) */
772 "America/Bogota", "SA Pacific", /* S (GMT-05:00) Bogota, Lima, Quito */
773 "America/New_York", "Eastern", /* D (GMT-05:00) Eastern Time (US & Canada) */
774
775 "America/Caracas", "SA Western", /* S (GMT-04:00) Caracas, La Paz */
776 "America/Santiago", "Pacific SA", /* D (GMT-04:00) Santiago */
777 "America/Halifax", "Atlantic", /* D (GMT-04:00) Atlantic Time (Canada) */
778
779 "America/St_Johns", "Newfoundland", /* D (GMT-03:30) Newfoundland */
780
781 "America/Buenos_Aires", "SA Eastern", /* S (GMT-03:00) Buenos Aires, Georgetown */
782 "America/Godthab", "Greenland", /* D (GMT-03:00) Greenland */
783 "America/Sao_Paulo", "E. South America", /* D (GMT-03:00) Brasilia */
784
785 "America/Noronha", "Mid-Atlantic", /* D (GMT-02:00) Mid-Atlantic */
786
787 "Atlantic/Cape_Verde", "Cape Verde", /* S (GMT-01:00) Cape Verde Is. */
788 "Atlantic/Azores", "Azores", /* D (GMT-01:00) Azores */
789
790 "Africa/Casablanca", "Greenwich", /* S (GMT) Casablanca, Monrovia */
791 "Europe/London", "GMT", /* D (GMT) Greenwich Mean Time : Dublin, Edinburgh, Lisbon, London */
792
793 "Africa/Lagos", "W. Central Africa", /* S (GMT+01:00) West Central Africa */
794 "Europe/Berlin", "W. Europe", /* D (GMT+01:00) Amsterdam, Berlin, Bern, Rome, Stockholm, Vienna */
795 "Europe/Paris", "Romance", /* D (GMT+01:00) Brussels, Copenhagen, Madrid, Paris */
796 "Europe/Sarajevo", "Central European", /* D (GMT+01:00) Sarajevo, Skopje, Warsaw, Zagreb */
797 "Europe/Belgrade", "Central Europe", /* D (GMT+01:00) Belgrade, Bratislava, Budapest, Ljubljana, Prague */
798
799 "Africa/Johannesburg", "South Africa", /* S (GMT+02:00) Harare, Pretoria */
800 "Asia/Jerusalem", "Israel", /* S (GMT+02:00) Jerusalem */
801 "Europe/Istanbul", "GTB", /* D (GMT+02:00) Athens, Istanbul, Minsk */
802 "Europe/Helsinki", "FLE", /* D (GMT+02:00) Helsinki, Kyiv, Riga, Sofia, Tallinn, Vilnius */
803 "Africa/Cairo", "Egypt", /* D (GMT+02:00) Cairo */
804 "Europe/Bucharest", "E. Europe", /* D (GMT+02:00) Bucharest */
805
806 "Africa/Nairobi", "E. Africa", /* S (GMT+03:00) Nairobi */
807 "Asia/Riyadh", "Arab", /* S (GMT+03:00) Kuwait, Riyadh */
808 "Europe/Moscow", "Russian", /* D (GMT+03:00) Moscow, St. Petersburg, Volgograd */
809 "Asia/Baghdad", "Arabic", /* D (GMT+03:00) Baghdad */
810
811 "Asia/Tehran", "Iran", /* D (GMT+03:30) Tehran */
812
813 "Asia/Muscat", "Arabian", /* S (GMT+04:00) Abu Dhabi, Muscat */
814 "Asia/Tbilisi", "Caucasus", /* D (GMT+04:00) Baku, Tbilisi, Yerevan */
815
816 "Asia/Kabul", "Afghanistan", /* S (GMT+04:30) Kabul */
817
818 "Asia/Karachi", "West Asia", /* S (GMT+05:00) Islamabad, Karachi, Tashkent */
819 "Asia/Yekaterinburg", "Ekaterinburg", /* D (GMT+05:00) Ekaterinburg */
820
821 "Asia/Calcutta", "India", /* S (GMT+05:30) Chennai, Kolkata, Mumbai, New Delhi */
822
823 "Asia/Katmandu", "Nepal", /* S (GMT+05:45) Kathmandu */
824
825 "Asia/Colombo", "Sri Lanka", /* S (GMT+06:00) Sri Jayawardenepura */
826 "Asia/Dhaka", "Central Asia", /* S (GMT+06:00) Astana, Dhaka */
827 "Asia/Novosibirsk", "N. Central Asia", /* D (GMT+06:00) Almaty, Novosibirsk */
828
829 "Asia/Rangoon", "Myanmar", /* S (GMT+06:30) Rangoon */
830
831 "Asia/Bangkok", "SE Asia", /* S (GMT+07:00) Bangkok, Hanoi, Jakarta */
832 "Asia/Krasnoyarsk", "North Asia", /* D (GMT+07:00) Krasnoyarsk */
833
834 "Australia/Perth", "W. Australia", /* S (GMT+08:00) Perth */
835 "Asia/Taipei", "Taipei", /* S (GMT+08:00) Taipei */
836 "Asia/Singapore", "Singapore", /* S (GMT+08:00) Kuala Lumpur, Singapore */
837 "Asia/Hong_Kong", "China", /* S (GMT+08:00) Beijing, Chongqing, Hong Kong, Urumqi */
838 "Asia/Irkutsk", "North Asia East", /* D (GMT+08:00) Irkutsk, Ulaan Bataar */
839
840 "Asia/Tokyo", "Tokyo", /* S (GMT+09:00) Osaka, Sapporo, Tokyo */
841 "Asia/Seoul", "Korea", /* S (GMT+09:00) Seoul */
842 "Asia/Yakutsk", "Yakutsk", /* D (GMT+09:00) Yakutsk */
843
844 "Australia/Darwin", "AUS Central", /* S (GMT+09:30) Darwin */
845 "Australia/Adelaide", "Cen. Australia", /* D (GMT+09:30) Adelaide */
846
847 "Pacific/Guam", "West Pacific", /* S (GMT+10:00) Guam, Port Moresby */
848 "Australia/Brisbane", "E. Australia", /* S (GMT+10:00) Brisbane */
849 "Asia/Vladivostok", "Vladivostok", /* D (GMT+10:00) Vladivostok */
850 "Australia/Hobart", "Tasmania", /* D (GMT+10:00) Hobart */
851 "Australia/Sydney", "AUS Eastern", /* D (GMT+10:00) Canberra, Melbourne, Sydney */
852
853 "Asia/Magadan", "Central Pacific", /* S (GMT+11:00) Magadan, Solomon Is., New Caledonia */
854
855 "Pacific/Fiji", "Fiji", /* S (GMT+12:00) Fiji, Kamchatka, Marshall Is. */
856 "Pacific/Auckland", "New Zealand", /* D (GMT+12:00) Auckland, Wellington */
857
858 "Pacific/Tongatapu", "Tonga", /* S (GMT+13:00) Nuku'alofa */
859 NULL, NULL
860 };
861
862 typedef struct {
863 const char* winid;
864 const char* altwinid;
865 } WindowsZoneRemap;
866
867 /**
868 * If a lookup fails, we attempt to remap certain Windows ids to
869 * alternate Windows ids. If the alternate listed here begins with
870 * '-', we use it as is (without the '-'). If it begins with '+', we
871 * append a " Standard Time" if appropriate.
872 */
873 static const WindowsZoneRemap ZONE_REMAP[] = {
874 "Central European", "-Warsaw",
875 "Central Europe", "-Prague Bratislava",
876 "China", "-Beijing",
877
878 "Greenwich", "+GMT",
879 "GTB", "+GFT",
880 "Arab", "+Saudi Arabia",
881 "SE Asia", "+Bangkok",
882 "AUS Eastern", "+Sydney",
883 NULL, NULL,
884 };
885
886 /**
887 * Various registry keys and key fragments.
888 */
889 static const char CURRENT_ZONE_REGKEY[] = "SYSTEM\\CurrentControlSet\\Control\\TimeZoneInformation\\";
890 static const char STANDARD_NAME_REGKEY[] = "StandardName";
891 static const char STANDARD_TIME_REGKEY[] = " Standard Time";
892 static const char TZI_REGKEY[] = "TZI";
893 static const char STD_REGKEY[] = "Std";
894
895 /**
896 * HKLM subkeys used to probe for the flavor of Windows. Note that we
897 * specifically check for the "GMT" zone subkey; this is present on
898 * NT, but on XP has become "GMT Standard Time". We need to
899 * discriminate between these cases.
900 */
901 static const char* const WIN_TYPE_PROBE_REGKEY[] = {
902 /* WIN_9X_ME_TYPE */
903 "SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Time Zones",
904
905 /* WIN_NT_TYPE */
906 "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Time Zones\\GMT"
907
908 /* otherwise: WIN_2K_XP_TYPE */
909 };
910
911 /**
912 * The time zone root subkeys (under HKLM) for different flavors of
913 * Windows.
914 */
915 static const char* const TZ_REGKEY[] = {
916 /* WIN_9X_ME_TYPE */
917 "SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Time Zones\\",
918
919 /* WIN_NT_TYPE | WIN_2K_XP_TYPE */
920 "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Time Zones\\"
921 };
922
923 /**
924 * Flavor of Windows, from our perspective. Not a real OS version,
925 * but rather the flavor of the layout of the time zone information in
926 * the registry.
927 */
928 enum {
929 WIN_9X_ME_TYPE = 0,
930 WIN_NT_TYPE = 1,
931 WIN_2K_XP_TYPE = 2
932 };
933
934 /**
935 * Auxiliary Windows time zone function. Attempts to open the given
936 * Windows time zone ID as a registry key. Returns ERROR_SUCCESS if
937 * successful. Caller must close the registry key. Handles
938 * variations in the resource layout in different flavors of Windows.
939 *
940 * @param hkey output parameter to receive opened registry key
941 * @param winid Windows zone ID, e.g., "Pacific", without the
942 * " Standard Time" suffix (if any). Special case "Mexico Standard Time 2"
943 * allowed.
944 * @param winType Windows flavor (WIN_9X_ME_TYPE, etc.)
945 * @return ERROR_SUCCESS upon success
946 */
947 static LONG openTZRegKey(HKEY *hkey, const char* winid, int winType) {
948 LONG result;
949 char subKeyName[96];
950 char* name;
951 int i;
952
953 uprv_strcpy(subKeyName, TZ_REGKEY[(winType == WIN_9X_ME_TYPE) ? 0 : 1]);
954 name = &subKeyName[strlen(subKeyName)];
955 uprv_strcat(subKeyName, winid);
956 if (winType != WIN_9X_ME_TYPE) {
957 /* Don't modify "Mexico Standard Time 2", which does not occur
958 on WIN_9X_ME_TYPE. Also, if the type is WIN_NT_TYPE, then
959 in practice this means the GMT key is not followed by
960 " Standard Time", so don't append in that case. */
961 int isMexico2 = (winid[uprv_strlen(winid)- 1] == '2');
962 if (!isMexico2 &&
963 !(winType == WIN_NT_TYPE && uprv_strcmp(winid, "GMT") == 0)) {
964 uprv_strcat(subKeyName, STANDARD_TIME_REGKEY);
965 }
966 }
967 result = RegOpenKeyEx(HKEY_LOCAL_MACHINE,
968 subKeyName,
969 0,
970 KEY_QUERY_VALUE,
971 hkey);
972
973 if (result != ERROR_SUCCESS) {
974 /* If the primary lookup fails, try to remap the Windows zone
975 ID, according to the remapping table. */
976 for (i=0; ZONE_REMAP[i].winid; ++i) {
977 if (uprv_strcmp(winid, ZONE_REMAP[i].winid) == 0) {
978 uprv_strcpy(name, ZONE_REMAP[i].altwinid + 1);
979 if (*(ZONE_REMAP[i].altwinid) == '+' &&
980 winType != WIN_9X_ME_TYPE) {
981 uprv_strcat(subKeyName, STANDARD_TIME_REGKEY);
982 }
983 result = RegOpenKeyEx(HKEY_LOCAL_MACHINE,
984 subKeyName,
985 0,
986 KEY_QUERY_VALUE,
987 hkey);
988 break;
989 }
990 }
991 }
992
993 return result;
994 }
995
996 /**
997 * Main Windows time zone detection function. Returns the Windows
998 * time zone, translated to an ICU time zone, or NULL upon failure.
999 */
1000 static const char* detectWindowsTimeZone() {
1001 int winType;
1002 LONG result;
1003 HKEY hkey;
1004 TZI tziKey;
1005 TZI tziReg;
1006 DWORD cbData = sizeof(TZI);
1007 TIME_ZONE_INFORMATION apiTZI;
1008 char stdName[32];
1009 DWORD stdNameSize;
1010 char stdRegName[64];
1011 DWORD stdRegNameSize;
1012 int firstMatch, lastMatch;
1013 int j;
1014
1015 /* Detect the version of windows by trying to open a sequence of
1016 probe keys. We don't use the OS version API because what we
1017 really want to know is how the registry is laid out.
1018 Specifically, is it 9x/Me or not, and is it "GMT" or "GMT
1019 Standard Time". */
1020 for (winType=0; winType<2; ++winType) {
1021 result = RegOpenKeyEx(HKEY_LOCAL_MACHINE,
1022 WIN_TYPE_PROBE_REGKEY[winType],
1023 0,
1024 KEY_QUERY_VALUE,
1025 &hkey);
1026 RegCloseKey(hkey);
1027 if (result == ERROR_SUCCESS) {
1028 break;
1029 }
1030 }
1031
1032 /* Obtain TIME_ZONE_INFORMATION from the API, and then convert it
1033 to TZI. We could also interrogate the registry directly; we do
1034 this below if needed. */
1035 uprv_memset(&apiTZI, 0, sizeof(apiTZI));
1036 GetTimeZoneInformation(&apiTZI);
1037 tziKey.Bias = apiTZI.Bias;
1038 uprv_memcpy((char *)&tziKey.StandardDate, (char*)&apiTZI.StandardDate,
1039 sizeof(apiTZI.StandardDate));
1040 uprv_memcpy((char *)&tziKey.DaylightDate, (char*)&apiTZI.DaylightDate,
1041 sizeof(apiTZI.DaylightDate));
1042
1043 /* For each zone that can be identified by Offset+Rules, see if we
1044 have a match. Continue scanning after finding a match,
1045 recording the index of the first and the last match. We have
1046 to do this because some zones are not unique under
1047 Offset+Rules. */
1048 firstMatch = lastMatch = -1;
1049 for (j=0; ZONE_MAP[j].icuid; j++) {
1050 result = openTZRegKey(&hkey, ZONE_MAP[j].winid, winType);
1051 if (result == ERROR_SUCCESS) {
1052 result = RegQueryValueEx(hkey,
1053 TZI_REGKEY,
1054 NULL,
1055 NULL,
1056 (LPBYTE)&tziReg,
1057 &cbData);
1058 }
1059 RegCloseKey(hkey);
1060 if (result == ERROR_SUCCESS) {
1061 /* Assume that offsets are grouped together, and bail out
1062 when we've scanned everything with a matching
1063 offset. */
1064 if (firstMatch >= 0 && tziKey.Bias != tziReg.Bias) {
1065 break;
1066 }
1067 /* Windows alters the DaylightBias in some situations.
1068 Using the bias and the rules suffices, so overwrite
1069 these unreliable fields. */
1070 tziKey.StandardBias = tziReg.StandardBias;
1071 tziKey.DaylightBias = tziReg.DaylightBias;
1072 if (uprv_memcmp((char *)&tziKey, (char*)&tziReg,
1073 sizeof(tziKey)) == 0) {
1074 if (firstMatch < 0) {
1075 firstMatch = j;
1076 }
1077 lastMatch = j;
1078 }
1079 }
1080 }
1081
1082 /* This should never happen; if it does it means our table doesn't
1083 match Windows AT ALL, perhaps because this is post-XP? */
1084 if (firstMatch < 0) {
1085 return NULL;
1086 }
1087
1088 if (firstMatch != lastMatch) {
1089 /* Offset+Rules lookup yielded >= 2 matches. Try to match the
1090 localized display name. Get the name from the registry
1091 (not the API). This avoids conversion issues. Use the
1092 standard name, since Windows modifies the daylight name to
1093 match the standard name if there is no DST. */
1094 result = RegOpenKeyEx(HKEY_LOCAL_MACHINE,
1095 CURRENT_ZONE_REGKEY,
1096 0,
1097 KEY_QUERY_VALUE,
1098 &hkey);
1099 if (result == ERROR_SUCCESS) {
1100 stdNameSize = sizeof(stdName);
1101 result = RegQueryValueEx(hkey,
1102 (LPTSTR)STANDARD_NAME_REGKEY,
1103 NULL,
1104 NULL,
1105 (LPBYTE)stdName,
1106 &stdNameSize);
1107 RegCloseKey(hkey);
1108
1109 /* Scan through the Windows time zone data in the registry
1110 again (just the range of zones with matching TZIs) and
1111 look for a standard display name match. */
1112 for (j=firstMatch; j<=lastMatch; j++) {
1113 result = openTZRegKey(&hkey, ZONE_MAP[j].winid, winType);
1114 if (result == ERROR_SUCCESS) {
1115 stdRegNameSize = sizeof(stdRegName);
1116 result = RegQueryValueEx(hkey,
1117 (LPTSTR)STD_REGKEY,
1118 NULL,
1119 NULL,
1120 (LPBYTE)stdRegName,
1121 &stdRegNameSize);
1122 }
1123 RegCloseKey(hkey);
1124 if (result == ERROR_SUCCESS &&
1125 stdRegNameSize == stdNameSize &&
1126 uprv_memcmp(stdName, stdRegName, stdNameSize) == 0) {
1127 firstMatch = j; /* record the match */
1128 break;
1129 }
1130 }
1131 } else {
1132 RegCloseKey(hkey); /* should never get here */
1133 }
1134 }
1135
1136 return ZONE_MAP[firstMatch].icuid;
1137 }
1138
1139 #endif /*WIN32*/
1140
1141 /* Generic time zone layer -------------------------------------------------- */
1142
1143 /* Time zone utilities */
1144 U_CAPI void U_EXPORT2
1145 uprv_tzset()
1146 {
1147 #ifdef U_TZSET
1148 U_TZSET();
1149 #else
1150 /* no initialization*/
1151 #endif
1152 }
1153
1154 U_CAPI int32_t U_EXPORT2
1155 uprv_timezone()
1156 {
1157 #ifdef U_TIMEZONE
1158 return U_TIMEZONE;
1159 #else
1160 time_t t, t1, t2;
1161 struct tm tmrec;
1162 UBool dst_checked;
1163 int32_t tdiff = 0;
1164
1165 time(&t);
1166 uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
1167 dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
1168 t1 = mktime(&tmrec); /* local time in seconds*/
1169 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
1170 t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/
1171 tdiff = t2 - t1;
1172 /* imitate NT behaviour, which returns same timezone offset to GMT for
1173 winter and summer*/
1174 if (dst_checked)
1175 tdiff += 3600;
1176 return tdiff;
1177 #endif
1178 }
1179
1180 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
1181 some platforms need to have it declared here. */
1182
1183 #if defined(U_TZNAME) && (defined(U_IRIX) || defined(U_DARWIN) || defined(U_CYGWIN))
1184 /* RS6000 and others reject char **tzname. */
1185 extern U_IMPORT char *U_TZNAME[];
1186 #endif
1187
1188 #if defined(U_DARWIN) /* For Mac OS X */
1189 #define TZZONELINK "/etc/localtime"
1190 #define TZZONEINFO "/usr/share/zoneinfo/"
1191 static char *gTimeZoneBuffer = NULL; /* Heap allocated */
1192 #endif
1193
1194 U_CAPI const char* U_EXPORT2
1195 uprv_tzname(int n)
1196 {
1197 #ifdef WIN32
1198 char* id = (char*) detectWindowsTimeZone();
1199 if (id != NULL) {
1200 return id;
1201 }
1202 #endif
1203
1204 #if defined(U_DARWIN)
1205 int ret;
1206
1207 char *tzenv;
1208
1209 tzenv = getenv("TZFILE");
1210 if (tzenv != NULL) {
1211 return tzenv;
1212 }
1213
1214 #if 0
1215 /* TZ is often set to "PST8PDT" or similar, so we cannot use it. Alan */
1216 tzenv = getenv("TZ");
1217 if (tzenv != NULL) {
1218 return tzenv;
1219 }
1220 #endif
1221
1222 /* Caller must handle threading issues */
1223 if (gTimeZoneBuffer == NULL) {
1224 gTimeZoneBuffer = (char *) uprv_malloc(MAXPATHLEN + 2);
1225
1226 ret = readlink(TZZONELINK, gTimeZoneBuffer, MAXPATHLEN + 2);
1227 if (0 < ret) {
1228 gTimeZoneBuffer[ret] = '\0';
1229 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, sizeof(TZZONEINFO) - 1) == 0) {
1230 return (gTimeZoneBuffer += sizeof(TZZONEINFO) - 1);
1231 }
1232 }
1233
1234 uprv_free(gTimeZoneBuffer);
1235 gTimeZoneBuffer = NULL;
1236 }
1237 #endif
1238
1239 #ifdef U_TZNAME
1240 return U_TZNAME[n];
1241 #else
1242 return "";
1243 #endif
1244 }
1245
1246 /* Get and set the ICU data directory --------------------------------------- */
1247
1248 static char *gDataDirectory = NULL;
1249 #if U_POSIX_LOCALE
1250 static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
1251 #endif
1252
1253 static UBool U_CALLCONV putil_cleanup(void)
1254 {
1255 if (gDataDirectory) {
1256 uprv_free(gDataDirectory);
1257 gDataDirectory = NULL;
1258 }
1259 #if U_POSIX_LOCALE
1260 if (gCorrectedPOSIXLocale) {
1261 uprv_free(gCorrectedPOSIXLocale);
1262 gCorrectedPOSIXLocale = NULL;
1263 }
1264 #endif
1265 return TRUE;
1266 }
1267
1268 /*
1269 * Set the data directory.
1270 * Make a copy of the passed string, and set the global data dir to point to it.
1271 * TODO: see bug #2849, regarding thread safety.
1272 */
1273 U_CAPI void U_EXPORT2
1274 u_setDataDirectory(const char *directory) {
1275 char *newDataDir;
1276 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1277 char *p;
1278 #endif
1279 int32_t length;
1280
1281 if(directory==NULL) {
1282 directory = "";
1283 }
1284 length=(int32_t)uprv_strlen(directory);
1285 newDataDir = (char *)uprv_malloc(length + 2);
1286 uprv_strcpy(newDataDir, directory);
1287
1288 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1289 while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
1290 *p = U_FILE_SEP_CHAR;
1291 }
1292 #endif
1293
1294 umtx_lock(NULL);
1295 if (gDataDirectory) {
1296 uprv_free(gDataDirectory);
1297 }
1298 gDataDirectory = newDataDir;
1299 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1300 umtx_unlock(NULL);
1301 }
1302
1303 U_CAPI UBool U_EXPORT2
1304 uprv_pathIsAbsolute(const char *path)
1305 {
1306 if(!path || !*path) {
1307 return FALSE;
1308 }
1309
1310 if(*path == U_FILE_SEP_CHAR) {
1311 return TRUE;
1312 }
1313
1314 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1315 if(*path == U_FILE_ALT_SEP_CHAR) {
1316 return TRUE;
1317 }
1318 #endif
1319
1320 #if defined(WIN32)
1321 if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1322 ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1323 path[1] == ':' ) {
1324 return TRUE;
1325 }
1326 #endif
1327
1328 return FALSE;
1329 }
1330
1331 U_CAPI const char * U_EXPORT2
1332 u_getDataDirectory(void) {
1333 const char *path = NULL;
1334 char pathBuffer[1024];
1335 const char *dataDir;
1336
1337 /* if we have the directory, then return it immediately */
1338 umtx_lock(NULL);
1339 dataDir = gDataDirectory;
1340 umtx_unlock(NULL);
1341
1342 if(dataDir) {
1343 return dataDir;
1344 }
1345
1346 /* we need to look for it */
1347 pathBuffer[0] = 0; /* Shuts up compiler warnings about unreferenced */
1348 /* variables when the code using it is ifdefed out */
1349 # if !defined(XP_MAC)
1350 /* first try to get the environment variable */
1351 path=getenv("ICU_DATA");
1352 # else /* XP_MAC */
1353 {
1354 OSErr myErr;
1355 short vRef;
1356 long dir,newDir;
1357 int16_t volNum;
1358 Str255 xpath;
1359 FSSpec spec;
1360 short len;
1361 Handle full;
1362
1363 xpath[0]=0;
1364
1365 myErr = HGetVol(xpath, &volNum, &dir);
1366
1367 if(myErr == noErr) {
1368 myErr = FindFolder(volNum, kApplicationSupportFolderType, TRUE, &vRef, &dir);
1369 newDir=-1;
1370 if (myErr == noErr) {
1371 myErr = DirCreate(volNum,
1372 dir,
1373 "\pICU",
1374 &newDir);
1375 if( (myErr == noErr) || (myErr == dupFNErr) ) {
1376 spec.vRefNum = volNum;
1377 spec.parID = dir;
1378 uprv_memcpy(spec.name, "\pICU", 4);
1379
1380 myErr = FSpGetFullPath(&spec, &len, &full);
1381 if(full != NULL)
1382 {
1383 HLock(full);
1384 uprv_memcpy(pathBuffer, ((char*)(*full)), len);
1385 pathBuffer[len] = 0;
1386 path = pathBuffer;
1387 DisposeHandle(full);
1388 }
1389 }
1390 }
1391 }
1392 }
1393 # endif
1394
1395
1396 # if defined WIN32 && defined ICU_ENABLE_DEPRECATED_WIN_REGISTRY
1397 /* next, try to read the path from the registry */
1398 if(path==NULL || *path==0) {
1399 HKEY key;
1400
1401 if(ERROR_SUCCESS==RegOpenKeyEx(HKEY_LOCAL_MACHINE, "SOFTWARE\\ICU\\Unicode\\Data", 0, KEY_QUERY_VALUE, &key)) {
1402 DWORD type=REG_EXPAND_SZ, size=sizeof(pathBuffer);
1403
1404 if(ERROR_SUCCESS==RegQueryValueEx(key, "Path", NULL, &type, (unsigned char *)pathBuffer, &size) && size>1) {
1405 if(type==REG_EXPAND_SZ) {
1406 /* replace environment variable references by their values */
1407 char temporaryPath[1024];
1408
1409 /* copy the path with variables to the temporary one */
1410 uprv_memcpy(temporaryPath, pathBuffer, size);
1411
1412 /* do the replacement and store it in the pathBuffer */
1413 size=ExpandEnvironmentStrings(temporaryPath, pathBuffer, sizeof(pathBuffer));
1414 if(size>0 && size<sizeof(pathBuffer)) {
1415 path=pathBuffer;
1416 }
1417 } else if(type==REG_SZ) {
1418 path=pathBuffer;
1419 }
1420 }
1421 RegCloseKey(key);
1422 }
1423 }
1424 # endif
1425
1426 /* ICU_DATA_DIR may be set as a compile option */
1427 # ifdef ICU_DATA_DIR
1428 if(path==NULL || *path==0) {
1429 path=ICU_DATA_DIR;
1430 }
1431 # endif
1432
1433 if(path==NULL) {
1434 /* It looks really bad, set it to something. */
1435 path = "";
1436 }
1437
1438 u_setDataDirectory(path);
1439 return gDataDirectory;
1440 }
1441
1442
1443
1444
1445
1446 /* Macintosh-specific locale information ------------------------------------ */
1447 #ifdef XP_MAC
1448
1449 typedef struct {
1450 int32_t script;
1451 int32_t region;
1452 int32_t lang;
1453 int32_t date_region;
1454 const char* posixID;
1455 } mac_lc_rec;
1456
1457 /* Todo: This will be updated with a newer version from www.unicode.org web
1458 page when it's available.*/
1459 #define MAC_LC_MAGIC_NUMBER -5
1460 #define MAC_LC_INIT_NUMBER -9
1461
1462 static const mac_lc_rec mac_lc_recs[] = {
1463 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US",
1464 /* United States*/
1465 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR",
1466 /* France*/
1467 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB",
1468 /* Great Britain*/
1469 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE",
1470 /* Germany*/
1471 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT",
1472 /* Italy*/
1473 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL",
1474 /* Metherlands*/
1475 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE",
1476 /* French for Belgium or Lxembourg*/
1477 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE",
1478 /* Sweden*/
1479 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK",
1480 /* Denmark*/
1481 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT",
1482 /* Portugal*/
1483 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA",
1484 /* French Canada*/
1485 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS",
1486 /* Israel*/
1487 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP",
1488 /* Japan*/
1489 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU",
1490 /* Australia*/
1491 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE",
1492 /* the Arabic world (?)*/
1493 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI",
1494 /* Finland*/
1495 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH",
1496 /* French for Switzerland*/
1497 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH",
1498 /* German for Switzerland*/
1499 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR",
1500 /* Greece*/
1501 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS",
1502 /* Iceland ===*/
1503 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
1504 /* Malta ===*/
1505 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
1506 /* Cyprus ===*/
1507 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR",
1508 /* Turkey ===*/
1509 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU",
1510 /* Croatian system for Yugoslavia*/
1511 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
1512 /* Hindi system for India*/
1513 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
1514 /* Pakistan*/
1515 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT",
1516 /* Lithuania*/
1517 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL",
1518 /* Poland*/
1519 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU",
1520 /* Hungary*/
1521 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE",
1522 /* Estonia*/
1523 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV",
1524 /* Latvia*/
1525 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
1526 /* Lapland [Ask Rich for the data. HS]*/
1527 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
1528 /* Faeroe Islands*/
1529 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR",
1530 /* Iran*/
1531 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU",
1532 /* Russia*/
1533 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE",
1534 /* Ireland*/
1535 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR",
1536 /* Korea*/
1537 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN",
1538 /* People's Republic of China*/
1539 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW",
1540 /* Taiwan*/
1541 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH",
1542 /* Thailand*/
1543
1544 /* fallback is en_US*/
1545 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER,
1546 MAC_LC_MAGIC_NUMBER, "en_US"
1547 };
1548
1549 #endif
1550
1551 #if U_POSIX_LOCALE
1552 /* Return just the POSIX id, whatever happens to be in it */
1553 static const char *uprv_getPOSIXID(void)
1554 {
1555 static const char* posixID = NULL;
1556 if (posixID == 0) {
1557 posixID = getenv("LC_ALL");
1558 if (posixID == 0) {
1559 posixID = getenv("LANG");
1560 if (posixID == 0) {
1561 /*
1562 * On Solaris two different calls to setlocale can result in
1563 * different values. Only get this value once.
1564 */
1565 posixID = setlocale(LC_ALL, NULL);
1566 }
1567 }
1568 }
1569
1570 if (posixID==0)
1571 {
1572 /* Nothing worked. Give it a nice value. */
1573 posixID = "en_US";
1574 }
1575 else if ((uprv_strcmp("C", posixID) == 0)
1576 || (uprv_strchr(posixID, ' ') != NULL)
1577 || (uprv_strchr(posixID, '/') != NULL))
1578 { /* HPUX returns 'C C C C C C C' */
1579 /* Solaris can return /en_US/C/C/C/C/C on the second try. */
1580 /* Maybe we got some garbage. Give it a nice value. */
1581 posixID = "en_US_POSIX";
1582 }
1583 return posixID;
1584 }
1585 #endif
1586
1587 /* NOTE: The caller should handle thread safety */
1588 U_CAPI const char* U_EXPORT2
1589 uprv_getDefaultLocaleID()
1590 {
1591 #if U_POSIX_LOCALE
1592 /*
1593 Note that: (a '!' means the ID is improper somehow)
1594 LC_ALL ----> default_loc codepage
1595 --------------------------------------------------------
1596 ab.CD ab CD
1597 ab@CD ab__CD -
1598 ab@CD.EF ab__CD EF
1599
1600 ab_CD.EF@GH ab_CD_GH EF
1601
1602 Some 'improper' ways to do the same as above:
1603 ! ab_CD@GH.EF ab_CD_GH EF
1604 ! ab_CD.EF@GH.IJ ab_CD_GH EF
1605 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
1606
1607 _CD@GH _CD_GH -
1608 _CD.EF@GH _CD_GH EF
1609
1610 The variant cannot have dots in it.
1611 The 'rightmost' variant (@xxx) wins.
1612 The leftmost codepage (.xxx) wins.
1613 */
1614 char *correctedPOSIXLocale = 0;
1615 const char* posixID = uprv_getPOSIXID();
1616 const char *p;
1617 const char *q;
1618 int32_t len;
1619
1620 /* Format: (no spaces)
1621 ll [ _CC ] [ . MM ] [ @ VV]
1622
1623 l = lang, C = ctry, M = charmap, V = variant
1624 */
1625
1626 if (gCorrectedPOSIXLocale != NULL) {
1627 return gCorrectedPOSIXLocale;
1628 }
1629
1630 if ((p = uprv_strchr(posixID, '.')) != NULL) {
1631 /* assume new locale can't be larger than old one? */
1632 correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID));
1633 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1634 correctedPOSIXLocale[p-posixID] = 0;
1635
1636 /* do not copy after the @ */
1637 if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
1638 correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
1639 }
1640 }
1641
1642 /* Note that we scan the *uncorrected* ID. */
1643 if ((p = uprv_strrchr(posixID, '@')) != NULL) {
1644 if (correctedPOSIXLocale == NULL) {
1645 correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID));
1646 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1647 correctedPOSIXLocale[p-posixID] = 0;
1648 }
1649 p++;
1650
1651 /* Take care of any special cases here.. */
1652 if (!uprv_strcmp(p, "nynorsk")) {
1653 p = "NY";
1654
1655 /* Should we assume no_NO_NY instead of possible no__NY?
1656 * if (!uprv_strcmp(correctedPOSIXLocale, "no")) {
1657 * uprv_strcpy(correctedPOSIXLocale, "no_NO");
1658 * }
1659 */
1660 }
1661
1662 if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
1663 uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
1664 }
1665 else {
1666 uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1667 }
1668
1669 if ((q = uprv_strchr(p, '.')) != NULL) {
1670 /* How big will the resulting string be? */
1671 len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1672 uprv_strncat(correctedPOSIXLocale, p, q-p);
1673 correctedPOSIXLocale[len] = 0;
1674 }
1675 else {
1676 /* Anything following the @ sign */
1677 uprv_strcat(correctedPOSIXLocale, p);
1678 }
1679
1680 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1681 * How about 'russian' -> 'ru'?
1682 */
1683 }
1684
1685 /* Was a correction made? */
1686 if (correctedPOSIXLocale != NULL) {
1687 posixID = correctedPOSIXLocale;
1688 }
1689 else {
1690 /* copy it, just in case the original pointer goes away. See j2395 */
1691 correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
1692 posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
1693 }
1694
1695 if (gCorrectedPOSIXLocale == NULL) {
1696 gCorrectedPOSIXLocale = correctedPOSIXLocale;
1697 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1698 correctedPOSIXLocale = NULL;
1699 }
1700
1701 if (correctedPOSIXLocale != NULL) { /* Was already set - clean up. */
1702 uprv_free(correctedPOSIXLocale);
1703 }
1704
1705 return posixID;
1706
1707 #elif defined(WIN32)
1708 UErrorCode status = U_ZERO_ERROR;
1709 LCID id = GetThreadLocale();
1710 const char* locID = uprv_convertToPosix(id, &status);
1711
1712 if (U_FAILURE(status)) {
1713 locID = "en_US";
1714 }
1715 return locID;
1716
1717 #elif defined(XP_MAC)
1718 int32_t script = MAC_LC_INIT_NUMBER;
1719 /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
1720 int32_t region = MAC_LC_INIT_NUMBER;
1721 /* = GetScriptManagerVariable(smRegionCode);*/
1722 int32_t lang = MAC_LC_INIT_NUMBER;
1723 /* = GetScriptManagerVariable(smScriptLang);*/
1724 int32_t date_region = MAC_LC_INIT_NUMBER;
1725 const char* posixID = 0;
1726 int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec);
1727 int32_t i;
1728 Intl1Hndl ih;
1729
1730 ih = (Intl1Hndl) GetIntlResource(1);
1731 if (ih)
1732 date_region = ((uint16_t)(*ih)->intl1Vers) >> 8;
1733
1734 for (i = 0; i < count; i++) {
1735 if ( ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER)
1736 || (mac_lc_recs[i].script == script))
1737 && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER)
1738 || (mac_lc_recs[i].region == region))
1739 && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER)
1740 || (mac_lc_recs[i].lang == lang))
1741 && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER)
1742 || (mac_lc_recs[i].date_region == date_region))
1743 )
1744 {
1745 posixID = mac_lc_recs[i].posixID;
1746 break;
1747 }
1748 }
1749
1750 return posixID;
1751
1752 #elif defined(OS2)
1753 char * locID;
1754
1755 locID = getenv("LC_ALL");
1756 if (!locID || !*locID)
1757 locID = getenv("LANG");
1758 if (!locID || !*locID) {
1759 locID = "en_US";
1760 }
1761 if (!stricmp(locID, "c") || !stricmp(locID, "posix") ||
1762 !stricmp(locID, "univ"))
1763 locID = "en_US_POSIX";
1764 return locID;
1765
1766 #elif defined(OS400)
1767 /* locales are process scoped and are by definition thread safe */
1768 static char correctedLocale[64];
1769 const char *localeID = getenv("LC_ALL");
1770 char *p;
1771
1772 if (localeID == NULL)
1773 localeID = getenv("LANG");
1774 if (localeID == NULL)
1775 localeID = setlocale(LC_ALL, NULL);
1776 /* Make sure we have something... */
1777 if (localeID == NULL)
1778 return "en_US_POSIX";
1779
1780 /* Extract the locale name from the path. */
1781 if((p = uprv_strrchr(localeID, '/')) != NULL)
1782 {
1783 /* Increment p to start of locale name. */
1784 p++;
1785 localeID = p;
1786 }
1787
1788 /* Copy to work location. */
1789 uprv_strcpy(correctedLocale, localeID);
1790
1791 /* Strip off the '.locale' extension. */
1792 if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1793 *p = 0;
1794 }
1795
1796 /* Upper case the locale name. */
1797 T_CString_toUpperCase(correctedLocale);
1798
1799 /* See if we are using the POSIX locale. Any of the
1800 * following are equivalent and use the same QLGPGCMA
1801 * (POSIX) locale.
1802 */
1803 if ((uprv_strcmp("C", correctedLocale) == 0) ||
1804 (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1805 (uprv_strcmp("QLGPGCMA", correctedLocale) == 0))
1806 {
1807 uprv_strcpy(correctedLocale, "en_US_POSIX");
1808 }
1809 else
1810 {
1811 int16_t LocaleLen;
1812
1813 /* Lower case the lang portion. */
1814 for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1815 {
1816 *p = uprv_tolower(*p);
1817 }
1818
1819 /* Adjust for Euro. After '_E' add 'URO'. */
1820 LocaleLen = uprv_strlen(correctedLocale);
1821 if (correctedLocale[LocaleLen - 2] == '_' &&
1822 correctedLocale[LocaleLen - 1] == 'E')
1823 {
1824 uprv_strcat(correctedLocale, "URO");
1825 }
1826
1827 /* If using Lotus-based locale then convert to
1828 * equivalent non Lotus.
1829 */
1830 else if (correctedLocale[LocaleLen - 2] == '_' &&
1831 correctedLocale[LocaleLen - 1] == 'L')
1832 {
1833 correctedLocale[LocaleLen - 2] = 0;
1834 }
1835
1836 /* There are separate simplified and traditional
1837 * locales called zh_HK_S and zh_HK_T.
1838 */
1839 else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1840 {
1841 uprv_strcpy(correctedLocale, "zh_HK");
1842 }
1843
1844 /* A special zh_CN_GBK locale...
1845 */
1846 else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1847 {
1848 uprv_strcpy(correctedLocale, "zh_CN");
1849 }
1850
1851 }
1852
1853 return correctedLocale;
1854 #endif
1855
1856 }
1857
1858
1859 static const char*
1860 int_getDefaultCodepage()
1861 {
1862 #if defined(OS400)
1863 uint32_t ccsid = 37; /* Default to ibm-37 */
1864 static char codepage[64];
1865 Qwc_JOBI0400_t jobinfo;
1866 Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
1867
1868 EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
1869 "* ", " ", &error);
1870
1871 if (error.Bytes_Available == 0) {
1872 if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
1873 ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
1874 }
1875 else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
1876 ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
1877 }
1878 /* else use the default */
1879 }
1880 sprintf(codepage,"ibm-%d", ccsid);
1881 return codepage;
1882
1883 #elif defined(OS390)
1884 static char codepage[64];
1885 sprintf(codepage,"%s" UCNV_SWAP_LFNL_OPTION_STRING, nl_langinfo(CODESET));
1886 return codepage;
1887
1888 #elif defined(XP_MAC)
1889 return "ibm-1275"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
1890
1891 #elif defined(WIN32)
1892 static char codepage[64];
1893 sprintf(codepage, "windows-%d", GetACP());
1894 return codepage;
1895
1896 #elif U_POSIX_LOCALE
1897 static char codesetName[100];
1898 char *name = NULL;
1899 char *euro = NULL;
1900 const char *localeName = NULL;
1901
1902 uprv_memset(codesetName, 0, sizeof(codesetName));
1903
1904 /* Check setlocale before the environment variables
1905 because the application may have set it first */
1906 /* setlocale needs "" and not NULL for Linux and Solaris */
1907 localeName = setlocale(LC_CTYPE, "");
1908 if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
1909 /* strip the locale name and look at the suffix only */
1910 name = uprv_strncpy(codesetName, name+1, sizeof(codesetName));
1911 codesetName[sizeof(codesetName)-1] = 0;
1912 if ((euro = (uprv_strchr(name, '@'))) != NULL) {
1913 *euro = 0;
1914 }
1915 /* if we can find the codset name from setlocale, return that. */
1916 if (*name) {
1917 return name;
1918 }
1919 }
1920
1921 #if U_HAVE_NL_LANGINFO_CODESET
1922 if (*codesetName) {
1923 uprv_memset(codesetName, 0, sizeof(codesetName));
1924 }
1925 /* When available, check nl_langinfo first because it usually gives more
1926 useful names. It depends on LC_CTYPE and not LANG or LC_ALL */
1927 {
1928 const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
1929 if (codeset != NULL) {
1930 uprv_strncpy(codesetName, codeset, sizeof(codesetName));
1931 codesetName[sizeof(codesetName)-1] = 0;
1932 return codesetName;
1933 }
1934 }
1935 #endif
1936
1937 /* Try a locale specified by the user.
1938 This is usually underspecified and usually checked by setlocale already. */
1939 if (*codesetName) {
1940 uprv_memset(codesetName, 0, sizeof(codesetName));
1941 }
1942 localeName = uprv_getPOSIXID();
1943 if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
1944 /* strip the locale name and look at the suffix only */
1945 name = uprv_strncpy(codesetName, name+1, sizeof(codesetName));
1946 codesetName[sizeof(codesetName)-1] = 0;
1947 if ((euro = (uprv_strchr(name, '@'))) != NULL) {
1948 *euro = 0;
1949 }
1950 /* if we can find the codset name, return that. */
1951 if (*name) {
1952 return name;
1953 }
1954 }
1955
1956 if (*codesetName == 0)
1957 {
1958 /* if the table lookup failed, return US ASCII (ISO 646). */
1959 uprv_strcpy(codesetName, "US-ASCII");
1960 }
1961 return codesetName;
1962 #else
1963 return "US-ASCII";
1964 #endif
1965 }
1966
1967
1968 U_CAPI const char* U_EXPORT2
1969 uprv_getDefaultCodepage()
1970 {
1971 static char const *name = NULL;
1972 umtx_lock(NULL);
1973 if (name == NULL) {
1974 name = int_getDefaultCodepage();
1975 }
1976 umtx_unlock(NULL);
1977 return name;
1978 }
1979
1980
1981 /* end of platform-specific implementation -------------- */
1982
1983 /* version handling --------------------------------------------------------- */
1984
1985 U_CAPI void U_EXPORT2
1986 u_versionFromString(UVersionInfo versionArray, const char *versionString) {
1987 char *end;
1988 uint16_t part=0;
1989
1990 if(versionArray==NULL) {
1991 return;
1992 }
1993
1994 if(versionString!=NULL) {
1995 for(;;) {
1996 versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
1997 if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
1998 break;
1999 }
2000 versionString=end+1;
2001 }
2002 }
2003
2004 while(part<U_MAX_VERSION_LENGTH) {
2005 versionArray[part++]=0;
2006 }
2007 }
2008
2009 U_CAPI void U_EXPORT2
2010 u_versionToString(UVersionInfo versionArray, char *versionString) {
2011 uint16_t count, part;
2012 uint8_t field;
2013
2014 if(versionString==NULL) {
2015 return;
2016 }
2017
2018 if(versionArray==NULL) {
2019 versionString[0]=0;
2020 return;
2021 }
2022
2023 /* count how many fields need to be written */
2024 for(count=4; count>0 && versionArray[count-1]==0; --count) {
2025 }
2026
2027 if(count <= 1) {
2028 count = 2;
2029 }
2030
2031 /* write the first part */
2032 /* write the decimal field value */
2033 field=versionArray[0];
2034 if(field>=100) {
2035 *versionString++=(char)('0'+field/100);
2036 field%=100;
2037 }
2038 if(field>=10) {
2039 *versionString++=(char)('0'+field/10);
2040 field%=10;
2041 }
2042 *versionString++=(char)('0'+field);
2043
2044 /* write the following parts */
2045 for(part=1; part<count; ++part) {
2046 /* write a dot first */
2047 *versionString++=U_VERSION_DELIMITER;
2048
2049 /* write the decimal field value */
2050 field=versionArray[part];
2051 if(field>=100) {
2052 *versionString++=(char)('0'+field/100);
2053 field%=100;
2054 }
2055 if(field>=10) {
2056 *versionString++=(char)('0'+field/10);
2057 field%=10;
2058 }
2059 *versionString++=(char)('0'+field);
2060 }
2061
2062 /* NUL-terminate */
2063 *versionString=0;
2064 }
2065
2066 U_CAPI void U_EXPORT2
2067 u_getVersion(UVersionInfo versionArray) {
2068 u_versionFromString(versionArray, U_ICU_VERSION);
2069 }
2070
2071 /*
2072 * Hey, Emacs, please set the following:
2073 *
2074 * Local Variables:
2075 * indent-tabs-mode: nil
2076 * End:
2077 *
2078 */