2 ******************************************************************************
4 * Copyright (C) 1997-2004, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
9 * FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
11 * Date Name Description
12 * 04/14/97 aliu Creation.
13 * 04/24/97 aliu Added getDefaultDataDirectory() and
14 * getDefaultLocaleID().
15 * 04/28/97 aliu Rewritten to assume Unix and apply general methods
16 * for assumed case. Non-UNIX platforms must be
17 * special-cased. Rewrote numeric methods dealing
18 * with NaN and Infinity to be platform independent
19 * over all IEEE 754 platforms.
20 * 05/13/97 aliu Restored sign of timezone
21 * (semantics are hours West of GMT)
22 * 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
24 * 07/22/98 stephen Added remainder, max, min, trunc
25 * 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
26 * 08/24/98 stephen Added longBitsFromDouble
27 * 09/08/98 stephen Minor changes for Mac Port
28 * 03/02/99 stephen Removed openFile(). Added AS400 support.
30 * 04/15/99 stephen Converted to C.
31 * 06/28/99 stephen Removed mutex locking in u_isBigEndian().
32 * 08/04/99 jeffrey R. Added OS/2 changes
33 * 11/15/99 helena Integrated S/390 IEEE support.
34 * 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
35 * 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
36 ******************************************************************************
41 /* Define _XOPEN_SOURCE for Solaris and friends. */
42 /* NetBSD needs it to be >= 4 */
44 #define _XOPEN_SOURCE 4
47 /* Define __USE_POSIX and __USE_XOPEN for Linux and glibc. */
57 /* include ICU headers */
58 #include "unicode/utypes.h"
59 #include "unicode/putil.h"
60 #include "unicode/ustring.h"
70 /* include system headers */
72 # define WIN32_LEAN_AND_MEAN
79 #elif defined(U_CYGWIN) && defined(__STRICT_ANSI__)
80 /* tzset isn't defined in strict ANSI on Cygwin. */
81 # undef __STRICT_ANSI__
84 # define INCL_DOSERRORS
85 # define INCL_DOSMODULEMGR
89 # include <qusec.h> /* error code structure */
90 # include <qusrjobi.h>
91 # include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
94 # include <IntlResources.h>
97 # include <MacTypes.h>
98 # include <TextUtils.h>
100 #include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
102 #elif defined(U_SOLARIS) || defined(U_LINUX)
103 #elif defined(U_HPUX)
104 #elif defined(U_DARWIN)
105 #include <sys/file.h>
106 #include <sys/param.h>
108 #include <sys/neutrino.h>
111 /* Include standard headers. */
121 * Only include langinfo.h if we have a way to get the codeset. If we later
122 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
126 #if U_HAVE_NL_LANGINFO_CODESET
127 #include <langinfo.h>
130 /* Define the extension for data files, again... */
131 #define DATA_TYPE "dat"
133 /* Leave this copyright notice here! */
134 static const char copyright
[] = U_COPYRIGHT_STRING
;
136 /* floating point implementations ------------------------------------------- */
138 /* We return QNAN rather than SNAN*/
139 #define SIGN 0x80000000U
140 #if defined(__GNUC__)
142 This is an optimization for when u_topNBytesOfDouble
143 and u_bottomNBytesOfDouble can't be properly optimized by the compiler.
145 #define USE_64BIT_DOUBLE_OPTIMIZATION 1
147 #define USE_64BIT_DOUBLE_OPTIMIZATION 0
150 #if USE_64BIT_DOUBLE_OPTIMIZATION
151 /* gcc 3.2 has an optimization bug */
152 static const int64_t gNan64
= 0x7FF8000000000000LL
;
153 static const int64_t gInf64
= 0x7FF0000000000000LL
;
154 static const double * const fgNan
= (const double *)(&gNan64
);
155 static const double * const fgInf
= (const double *)(&gInf64
);
159 #define NAN_TOP ((int16_t)0x7FF8)
160 #define INF_TOP ((int16_t)0x7FF0)
162 #define NAN_TOP ((int16_t)0x7F08)
163 #define INF_TOP ((int16_t)0x3F00)
167 static UBool fgNaNInitialized
= FALSE
;
168 static UBool fgInfInitialized
= FALSE
;
171 static double * const fgNan
= &gNan
;
172 static double * const fgInf
= &gInf
;
175 /*---------------------------------------------------------------------------
177 Our general strategy is to assume we're on a POSIX platform. Platforms which
178 are non-POSIX must declare themselves so. The default POSIX implementation
179 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
181 ---------------------------------------------------------------------------*/
183 #if defined(_WIN32) || defined(XP_MAC) || defined(OS400) || defined(OS2)
184 # undef U_POSIX_LOCALE
186 # define U_POSIX_LOCALE 1
189 /* Utilities to get the bits from a double */
191 u_topNBytesOfDouble(double* d
, int n
)
196 return (char*)(d
+ 1) - n
;
201 u_bottomNBytesOfDouble(double* d
, int n
)
204 return (char*)(d
+ 1) - n
;
210 /*---------------------------------------------------------------------------
211 Universal Implementations
212 These are designed to work on all platforms. Try these, and if they don't
213 work on your platform, then special case your platform with new
215 ---------------------------------------------------------------------------*/
217 /* Get UTC (GMT) time measured in seconds since 0:00 on 1/1/70.*/
218 U_CAPI UDate U_EXPORT2
225 uprv_memset( &tmrec
, 0, sizeof(tmrec
) );
229 t1
= mktime(&tmrec
); /* seconds of 1/1/1970*/
232 uprv_memcpy( &tmrec
, gmtime(&t
), sizeof(tmrec
) );
233 t2
= mktime(&tmrec
); /* seconds of current GMT*/
234 return (UDate
)(t2
- t1
) * U_MILLIS_PER_SECOND
; /* GMT (or UTC) in seconds since 1970*/
238 return (UDate
)epochtime
* U_MILLIS_PER_SECOND
;
242 /*-----------------------------------------------------------------------------
244 These methods detect and return NaN and infinity values for doubles
245 conforming to IEEE 754. Platforms which support this standard include X86,
246 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
247 If this doesn't work on your platform, you have non-IEEE floating-point, and
248 will need to code your own versions. A naive implementation is to return 0.0
249 for getNaN and getInfinity, and false for isNaN and isInfinite.
250 ---------------------------------------------------------------------------*/
252 U_CAPI UBool U_EXPORT2
253 uprv_isNaN(double number
)
256 #if USE_64BIT_DOUBLE_OPTIMIZATION
257 /* gcc 3.2 has an optimization bug */
258 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
259 return (UBool
)(((*((int64_t *)&number
)) & U_INT64_MAX
) > gInf64
);
262 /* This should work in theory, but it doesn't, so we resort to the more*/
263 /* complicated method below.*/
264 /* return number != number;*/
266 /* You can't return number == getNaN() because, by definition, NaN != x for*/
267 /* all x, including NaN (that is, NaN != NaN). So instead, we compare*/
268 /* against the known bit pattern. We must be careful of endianism here.*/
269 /* The pattern we are looking for id:*/
271 /* 7FFy yyyy yyyy yyyy (some y non-zero)*/
273 /* There are two different kinds of NaN, but we ignore the distinction*/
274 /* here. Note that the y value must be non-zero; if it is zero, then we*/
277 uint32_t highBits
= *(uint32_t*)u_topNBytesOfDouble(&number
,
279 uint32_t lowBits
= *(uint32_t*)u_bottomNBytesOfDouble(&number
,
282 return (UBool
)(((highBits
& 0x7FF00000L
) == 0x7FF00000L
) &&
283 (((highBits
& 0x000FFFFFL
) != 0) || (lowBits
!= 0)));
287 uint32_t highBits
= *(uint32_t*)u_topNBytesOfDouble(&number
,
289 uint32_t lowBits
= *(uint32_t*)u_bottomNBytesOfDouble(&number
,
292 return ((highBits
& 0x7F080000L
) == 0x7F080000L
) &&
293 (lowBits
== 0x00000000L
);
296 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
297 /* you'll need to replace this default implementation with what's correct*/
298 /* for your platform.*/
299 return number
!= number
;
303 U_CAPI UBool U_EXPORT2
304 uprv_isInfinite(double number
)
307 #if USE_64BIT_DOUBLE_OPTIMIZATION
308 /* gcc 3.2 has an optimization bug */
309 return (UBool
)(((*((int64_t *)&number
)) & U_INT64_MAX
) == gInf64
);
312 /* We know the top bit is the sign bit, so we mask that off in a copy of */
313 /* the number and compare against infinity. [LIU]*/
314 /* The following approach doesn't work for some reason, so we go ahead and */
315 /* scrutinize the pattern itself. */
316 /* double a = number; */
317 /* *(int8_t*)u_topNBytesOfDouble(&a, 1) &= 0x7F;*/
318 /* return a == uprv_getInfinity();*/
319 /* Instead, We want to see either:*/
321 /* 7FF0 0000 0000 0000*/
322 /* FFF0 0000 0000 0000*/
324 uint32_t highBits
= *(uint32_t*)u_topNBytesOfDouble(&number
,
326 uint32_t lowBits
= *(uint32_t*)u_bottomNBytesOfDouble(&number
,
329 return (UBool
)(((highBits
& ~SIGN
) == 0x7FF00000U
) &&
330 (lowBits
== 0x00000000U
));
334 uint32_t highBits
= *(uint32_t*)u_topNBytesOfDouble(&number
,
336 uint32_t lowBits
= *(uint32_t*)u_bottomNBytesOfDouble(&number
,
339 return ((highBits
& ~SIGN
) == 0x70FF0000L
) && (lowBits
== 0x00000000L
);
342 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
343 /* value, you'll need to replace this default implementation with what's*/
344 /* correct for your platform.*/
345 return number
== (2.0 * number
);
349 U_CAPI UBool U_EXPORT2
350 uprv_isPositiveInfinity(double number
)
352 #if IEEE_754 || defined(OS390)
353 return (UBool
)(number
> 0 && uprv_isInfinite(number
));
355 return uprv_isInfinite(number
);
359 U_CAPI UBool U_EXPORT2
360 uprv_isNegativeInfinity(double number
)
362 #if IEEE_754 || defined(OS390)
363 return (UBool
)(number
< 0 && uprv_isInfinite(number
));
366 uint32_t highBits
= *(uint32_t*)u_topNBytesOfDouble(&number
,
368 return((highBits
& SIGN
) && uprv_isInfinite(number
));
373 U_CAPI
double U_EXPORT2
376 #if IEEE_754 || defined(OS390)
377 #if !USE_64BIT_DOUBLE_OPTIMIZATION
378 if (!fgNaNInitialized
) {
379 /* This variable is always initialized with the same value,
380 so a mutex isn't needed. */
382 int8_t* p
= (int8_t*)fgNan
;
383 for(i
= 0; i
< sizeof(double); ++i
)
385 *(int16_t*)u_topNBytesOfDouble(fgNan
, sizeof(NAN_TOP
)) = NAN_TOP
;
386 fgNaNInitialized
= TRUE
;
391 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
392 /* you'll need to replace this default implementation with what's correct*/
393 /* for your platform.*/
398 U_CAPI
double U_EXPORT2
401 #if IEEE_754 || defined(OS390)
402 #if !USE_64BIT_DOUBLE_OPTIMIZATION
403 if (!fgInfInitialized
)
405 /* This variable is always initialized with the same value,
406 so a mutex isn't needed. */
408 int8_t* p
= (int8_t*)fgInf
;
409 for(i
= 0; i
< sizeof(double); ++i
)
411 *(int16_t*)u_topNBytesOfDouble(fgInf
, sizeof(INF_TOP
)) = INF_TOP
;
412 fgInfInitialized
= TRUE
;
417 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
418 /* value, you'll need to replace this default implementation with what's*/
419 /* correct for your platform.*/
424 U_CAPI
double U_EXPORT2
430 U_CAPI
double U_EXPORT2
436 U_CAPI
double U_EXPORT2
439 return uprv_floor(x
+ 0.5);
442 U_CAPI
double U_EXPORT2
448 U_CAPI
double U_EXPORT2
449 uprv_modf(double x
, double* y
)
454 U_CAPI
double U_EXPORT2
455 uprv_fmod(double x
, double y
)
460 U_CAPI
double U_EXPORT2
461 uprv_pow(double x
, double y
)
463 /* This is declared as "double pow(double x, double y)" */
467 U_CAPI
double U_EXPORT2
468 uprv_pow10(int32_t x
)
470 return pow(10.0, (double)x
);
473 U_CAPI
double U_EXPORT2
474 uprv_fmax(double x
, double y
)
479 /* first handle NaN*/
480 if(uprv_isNaN(x
) || uprv_isNaN(y
))
481 return uprv_getNaN();
483 /* check for -0 and 0*/
484 lowBits
= *(uint32_t*) u_bottomNBytesOfDouble(&x
, sizeof(uint32_t));
485 if(x
== 0.0 && y
== 0.0 && (lowBits
& SIGN
))
490 /* this should work for all flt point w/o NaN and Infpecial cases */
491 return (x
> y
? x
: y
);
494 U_CAPI
int32_t U_EXPORT2
495 uprv_max(int32_t x
, int32_t y
)
497 return (x
> y
? x
: y
);
500 U_CAPI
double U_EXPORT2
501 uprv_fmin(double x
, double y
)
506 /* first handle NaN*/
507 if(uprv_isNaN(x
) || uprv_isNaN(y
))
508 return uprv_getNaN();
510 /* check for -0 and 0*/
511 lowBits
= *(uint32_t*) u_bottomNBytesOfDouble(&y
, sizeof(uint32_t));
512 if(x
== 0.0 && y
== 0.0 && (lowBits
& SIGN
))
517 /* this should work for all flt point w/o NaN and Inf special cases */
518 return (x
> y
? y
: x
);
521 U_CAPI
int32_t U_EXPORT2
522 uprv_min(int32_t x
, int32_t y
)
524 return (x
> y
? y
: x
);
528 * Truncates the given double.
529 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
530 * This is different than calling floor() or ceil():
531 * floor(3.3) = 3, floor(-3.3) = -4
532 * ceil(3.3) = 4, ceil(-3.3) = -3
534 U_CAPI
double U_EXPORT2
540 /* handle error cases*/
542 return uprv_getNaN();
543 if(uprv_isInfinite(d
))
544 return uprv_getInfinity();
546 lowBits
= *(uint32_t*) u_bottomNBytesOfDouble(&d
, sizeof(uint32_t));
547 if( (d
== 0.0 && (lowBits
& SIGN
)) || d
< 0)
553 return d
>= 0 ? floor(d
) : ceil(d
);
559 * Return the largest positive number that can be represented by an integer
560 * type of arbitrary bit length.
562 U_CAPI
double U_EXPORT2
563 uprv_maxMantissa(void)
565 return pow(2.0, DBL_MANT_DIG
+ 1.0) - 1.0;
569 * Return the floor of the log base 10 of a given double.
570 * This method compensates for inaccuracies which arise naturally when
571 * computing logs, and always give the correct value. The parameter
572 * must be positive and finite.
573 * (Thanks to Alan Liu for supplying this function.)
575 U_CAPI
int16_t U_EXPORT2
579 /* We don't use the normal implementation because you can't underflow */
580 /* a double otherwise an underflow exception occurs */
583 /* The reason this routine is needed is that simply taking the*/
584 /* log and dividing by log10 yields a result which may be off*/
585 /* by 1 due to rounding errors. For example, the naive log10*/
586 /* of 1.0e300 taken this way is 299, rather than 300.*/
587 double alog10
= log(d
) / log(10.0);
588 int16_t ailog10
= (int16_t) floor(alog10
);
590 /* Positive logs could be too small, e.g. 0.99 instead of 1.0*/
591 if (alog10
> 0 && d
>= pow(10.0, (double)(ailog10
+ 1)))
594 /* Negative logs could be too big, e.g. -0.99 instead of -1.0*/
595 else if (alog10
< 0 && d
< pow(10.0, (double)(ailog10
)))
602 U_CAPI
double U_EXPORT2
609 /* This isn't used. If it's readded, readd putiltst.c tests */
610 U_CAPI
int32_t U_EXPORT2
611 uprv_digitsAfterDecimal(double x
)
614 int32_t numDigits
, bytesWritten
;
616 int32_t ptPos
, exponent
;
618 /* cheat and use the string-format routine to get a string representation*/
619 /* (it handles mathematical inaccuracy better than we can), then find out */
620 /* many characters are to the right of the decimal point */
621 bytesWritten
= sprintf(buffer
, "%+.9g", x
);
622 while (isdigit(*(++p
))) {
625 ptPos
= (int32_t)(p
- buffer
);
626 numDigits
= (int32_t)(bytesWritten
- ptPos
- 1);
628 /* if the number's string representation is in scientific notation, find */
629 /* the exponent and take it into account*/
631 p
= uprv_strchr(buffer
, 'e');
633 int16_t expPos
= (int16_t)(p
- buffer
);
634 numDigits
-= bytesWritten
- expPos
;
635 exponent
= (int32_t)(atol(p
+ 1));
638 /* the string representation may still have spurious decimal digits in it, */
639 /* so we cut off at the ninth digit to the right of the decimal, and have */
640 /* to search backward from there to the first non-zero digit*/
643 while (numDigits
> 0 && buffer
[ptPos
+ numDigits
] == '0')
646 numDigits
-= exponent
;
654 /*---------------------------------------------------------------------------
655 Platform-specific Implementations
656 Try these, and if they don't work on your platform, then special case your
657 platform with new implementations.
658 ---------------------------------------------------------------------------*/
660 /* Win32 time zone detection ------------------------------------------------ */
665 This code attempts to detect the Windows time zone, as set in the
666 Windows Date and Time control panel. It attempts to work on
667 multiple flavors of Windows (9x, Me, NT, 2000, XP) and on localized
668 installs. It works by directly interrogating the registry and
669 comparing the data there with the data returned by the
670 GetTimeZoneInformation API, along with some other strategies. The
671 registry contains time zone data under one of two keys (depending on
672 the flavor of Windows):
674 HKLM\SOFTWARE\Microsoft\Windows\CurrentVersion\Time Zones\
675 HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones\
677 Under this key are several subkeys, one for each time zone. These
678 subkeys are named "Pacific" on Win9x/Me and "Pacific Standard Time"
679 on WinNT/2k/XP. There are some other wrinkles; see the code for
680 details. The subkey name is NOT LOCALIZED, allowing us to support
683 Under the subkey are data values. We care about:
685 Std Standard time display name, localized
686 TZI Binary block of data
688 The TZI data is of particular interest. It contains the offset, two
689 more offsets for standard and daylight time, and the start and end
690 rules. This is the same data returned by the GetTimeZoneInformation
691 API. The API may modify the data on the way out, so we have to be
692 careful, but essentially we do a binary comparison against the TZI
693 blocks of various registry keys. When we find a match, we know what
694 time zone Windows is set to. Since the registry key is not
695 localized, we can then translate the key through a simple table
696 lookup into the corresponding ICU time zone.
698 This strategy doesn't always work because there are zones which
699 share an offset and rules, so more than one TZI block will match.
700 For example, both Tokyo and Seoul are at GMT+9 with no DST rules;
701 their TZI blocks are identical. For these cases, we fall back to a
702 name lookup. We attempt to match the display name as stored in the
703 registry for the current zone to the display name stored in the
704 registry for various Windows zones. By comparing the registry data
705 directly we avoid conversion complications.
709 Based on original code by Carl Brown <cbrown@xnetinc.com>
713 * Layout of the binary registry data under the "TZI" key.
718 LONG DaylightBias
; /* Tweaked by GetTimeZoneInformation */
719 SYSTEMTIME StandardDate
;
720 SYSTEMTIME DaylightDate
;
729 * Mapping between Windows zone IDs and ICU zone IDs. This list has
730 * been mechanically checked; all zone offsets match (most important)
731 * and city names match the display city names (where possible). The
732 * presence or absence of DST differs in some cases, but this is
733 * acceptable as long as the zone is semantically the same (which has
734 * been manually checked).
736 * Windows 9x/Me zone IDs are listed as "Pacific" rather than "Pacific
737 * Standard Time", which is seen in NT/2k/XP. This is fixed-up at
738 * runtime as needed. The one exception is "Mexico Standard Time 2",
739 * which is not present on Windows 9x/Me.
741 * Zones that are not unique under Offset+Rules should be grouped
742 * together for efficiency (see code below). In addition, rules MUST
743 * be grouped so that all zones of a single offset are together.
745 * Comments list S(tandard) or D(aylight), as declared by Windows,
746 * followed by the display name (data from Windows XP).
748 * NOTE: Etc/GMT+12 is CORRECT for offset GMT-12:00. Consult
749 * documentation elsewhere for an explanation.
751 static const WindowsICUMap ZONE_MAP
[] = {
752 "Etc/GMT+12", "Dateline", /* S (GMT-12:00) International Date Line West */
754 "Pacific/Apia", "Samoa", /* S (GMT-11:00) Midway Island, Samoa */
756 "Pacific/Honolulu", "Hawaiian", /* S (GMT-10:00) Hawaii */
758 "America/Anchorage", "Alaskan", /* D (GMT-09:00) Alaska */
760 "America/Los_Angeles", "Pacific", /* D (GMT-08:00) Pacific Time (US & Canada); Tijuana */
762 "America/Phoenix", "US Mountain", /* S (GMT-07:00) Arizona */
763 "America/Denver", "Mountain", /* D (GMT-07:00) Mountain Time (US & Canada) */
764 "America/Chihuahua", "Mexico Standard Time 2", /* D (GMT-07:00) Chihuahua, La Paz, Mazatlan */
766 "America/Managua", "Central America", /* S (GMT-06:00) Central America */
767 "America/Regina", "Canada Central", /* S (GMT-06:00) Saskatchewan */
768 "America/Mexico_City", "Mexico", /* D (GMT-06:00) Guadalajara, Mexico City, Monterrey */
769 "America/Chicago", "Central", /* D (GMT-06:00) Central Time (US & Canada) */
771 "America/Indianapolis", "US Eastern", /* S (GMT-05:00) Indiana (East) */
772 "America/Bogota", "SA Pacific", /* S (GMT-05:00) Bogota, Lima, Quito */
773 "America/New_York", "Eastern", /* D (GMT-05:00) Eastern Time (US & Canada) */
775 "America/Caracas", "SA Western", /* S (GMT-04:00) Caracas, La Paz */
776 "America/Santiago", "Pacific SA", /* D (GMT-04:00) Santiago */
777 "America/Halifax", "Atlantic", /* D (GMT-04:00) Atlantic Time (Canada) */
779 "America/St_Johns", "Newfoundland", /* D (GMT-03:30) Newfoundland */
781 "America/Buenos_Aires", "SA Eastern", /* S (GMT-03:00) Buenos Aires, Georgetown */
782 "America/Godthab", "Greenland", /* D (GMT-03:00) Greenland */
783 "America/Sao_Paulo", "E. South America", /* D (GMT-03:00) Brasilia */
785 "America/Noronha", "Mid-Atlantic", /* D (GMT-02:00) Mid-Atlantic */
787 "Atlantic/Cape_Verde", "Cape Verde", /* S (GMT-01:00) Cape Verde Is. */
788 "Atlantic/Azores", "Azores", /* D (GMT-01:00) Azores */
790 "Africa/Casablanca", "Greenwich", /* S (GMT) Casablanca, Monrovia */
791 "Europe/London", "GMT", /* D (GMT) Greenwich Mean Time : Dublin, Edinburgh, Lisbon, London */
793 "Africa/Lagos", "W. Central Africa", /* S (GMT+01:00) West Central Africa */
794 "Europe/Berlin", "W. Europe", /* D (GMT+01:00) Amsterdam, Berlin, Bern, Rome, Stockholm, Vienna */
795 "Europe/Paris", "Romance", /* D (GMT+01:00) Brussels, Copenhagen, Madrid, Paris */
796 "Europe/Sarajevo", "Central European", /* D (GMT+01:00) Sarajevo, Skopje, Warsaw, Zagreb */
797 "Europe/Belgrade", "Central Europe", /* D (GMT+01:00) Belgrade, Bratislava, Budapest, Ljubljana, Prague */
799 "Africa/Johannesburg", "South Africa", /* S (GMT+02:00) Harare, Pretoria */
800 "Asia/Jerusalem", "Israel", /* S (GMT+02:00) Jerusalem */
801 "Europe/Istanbul", "GTB", /* D (GMT+02:00) Athens, Istanbul, Minsk */
802 "Europe/Helsinki", "FLE", /* D (GMT+02:00) Helsinki, Kyiv, Riga, Sofia, Tallinn, Vilnius */
803 "Africa/Cairo", "Egypt", /* D (GMT+02:00) Cairo */
804 "Europe/Bucharest", "E. Europe", /* D (GMT+02:00) Bucharest */
806 "Africa/Nairobi", "E. Africa", /* S (GMT+03:00) Nairobi */
807 "Asia/Riyadh", "Arab", /* S (GMT+03:00) Kuwait, Riyadh */
808 "Europe/Moscow", "Russian", /* D (GMT+03:00) Moscow, St. Petersburg, Volgograd */
809 "Asia/Baghdad", "Arabic", /* D (GMT+03:00) Baghdad */
811 "Asia/Tehran", "Iran", /* D (GMT+03:30) Tehran */
813 "Asia/Muscat", "Arabian", /* S (GMT+04:00) Abu Dhabi, Muscat */
814 "Asia/Tbilisi", "Caucasus", /* D (GMT+04:00) Baku, Tbilisi, Yerevan */
816 "Asia/Kabul", "Afghanistan", /* S (GMT+04:30) Kabul */
818 "Asia/Karachi", "West Asia", /* S (GMT+05:00) Islamabad, Karachi, Tashkent */
819 "Asia/Yekaterinburg", "Ekaterinburg", /* D (GMT+05:00) Ekaterinburg */
821 "Asia/Calcutta", "India", /* S (GMT+05:30) Chennai, Kolkata, Mumbai, New Delhi */
823 "Asia/Katmandu", "Nepal", /* S (GMT+05:45) Kathmandu */
825 "Asia/Colombo", "Sri Lanka", /* S (GMT+06:00) Sri Jayawardenepura */
826 "Asia/Dhaka", "Central Asia", /* S (GMT+06:00) Astana, Dhaka */
827 "Asia/Novosibirsk", "N. Central Asia", /* D (GMT+06:00) Almaty, Novosibirsk */
829 "Asia/Rangoon", "Myanmar", /* S (GMT+06:30) Rangoon */
831 "Asia/Bangkok", "SE Asia", /* S (GMT+07:00) Bangkok, Hanoi, Jakarta */
832 "Asia/Krasnoyarsk", "North Asia", /* D (GMT+07:00) Krasnoyarsk */
834 "Australia/Perth", "W. Australia", /* S (GMT+08:00) Perth */
835 "Asia/Taipei", "Taipei", /* S (GMT+08:00) Taipei */
836 "Asia/Singapore", "Singapore", /* S (GMT+08:00) Kuala Lumpur, Singapore */
837 "Asia/Hong_Kong", "China", /* S (GMT+08:00) Beijing, Chongqing, Hong Kong, Urumqi */
838 "Asia/Irkutsk", "North Asia East", /* D (GMT+08:00) Irkutsk, Ulaan Bataar */
840 "Asia/Tokyo", "Tokyo", /* S (GMT+09:00) Osaka, Sapporo, Tokyo */
841 "Asia/Seoul", "Korea", /* S (GMT+09:00) Seoul */
842 "Asia/Yakutsk", "Yakutsk", /* D (GMT+09:00) Yakutsk */
844 "Australia/Darwin", "AUS Central", /* S (GMT+09:30) Darwin */
845 "Australia/Adelaide", "Cen. Australia", /* D (GMT+09:30) Adelaide */
847 "Pacific/Guam", "West Pacific", /* S (GMT+10:00) Guam, Port Moresby */
848 "Australia/Brisbane", "E. Australia", /* S (GMT+10:00) Brisbane */
849 "Asia/Vladivostok", "Vladivostok", /* D (GMT+10:00) Vladivostok */
850 "Australia/Hobart", "Tasmania", /* D (GMT+10:00) Hobart */
851 "Australia/Sydney", "AUS Eastern", /* D (GMT+10:00) Canberra, Melbourne, Sydney */
853 "Asia/Magadan", "Central Pacific", /* S (GMT+11:00) Magadan, Solomon Is., New Caledonia */
855 "Pacific/Fiji", "Fiji", /* S (GMT+12:00) Fiji, Kamchatka, Marshall Is. */
856 "Pacific/Auckland", "New Zealand", /* D (GMT+12:00) Auckland, Wellington */
858 "Pacific/Tongatapu", "Tonga", /* S (GMT+13:00) Nuku'alofa */
864 const char* altwinid
;
868 * If a lookup fails, we attempt to remap certain Windows ids to
869 * alternate Windows ids. If the alternate listed here begins with
870 * '-', we use it as is (without the '-'). If it begins with '+', we
871 * append a " Standard Time" if appropriate.
873 static const WindowsZoneRemap ZONE_REMAP
[] = {
874 "Central European", "-Warsaw",
875 "Central Europe", "-Prague Bratislava",
880 "Arab", "+Saudi Arabia",
881 "SE Asia", "+Bangkok",
882 "AUS Eastern", "+Sydney",
887 * Various registry keys and key fragments.
889 static const char CURRENT_ZONE_REGKEY
[] = "SYSTEM\\CurrentControlSet\\Control\\TimeZoneInformation\\";
890 static const char STANDARD_NAME_REGKEY
[] = "StandardName";
891 static const char STANDARD_TIME_REGKEY
[] = " Standard Time";
892 static const char TZI_REGKEY
[] = "TZI";
893 static const char STD_REGKEY
[] = "Std";
896 * HKLM subkeys used to probe for the flavor of Windows. Note that we
897 * specifically check for the "GMT" zone subkey; this is present on
898 * NT, but on XP has become "GMT Standard Time". We need to
899 * discriminate between these cases.
901 static const char* const WIN_TYPE_PROBE_REGKEY
[] = {
903 "SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Time Zones",
906 "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Time Zones\\GMT"
908 /* otherwise: WIN_2K_XP_TYPE */
912 * The time zone root subkeys (under HKLM) for different flavors of
915 static const char* const TZ_REGKEY
[] = {
917 "SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Time Zones\\",
919 /* WIN_NT_TYPE | WIN_2K_XP_TYPE */
920 "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Time Zones\\"
924 * Flavor of Windows, from our perspective. Not a real OS version,
925 * but rather the flavor of the layout of the time zone information in
935 * Auxiliary Windows time zone function. Attempts to open the given
936 * Windows time zone ID as a registry key. Returns ERROR_SUCCESS if
937 * successful. Caller must close the registry key. Handles
938 * variations in the resource layout in different flavors of Windows.
940 * @param hkey output parameter to receive opened registry key
941 * @param winid Windows zone ID, e.g., "Pacific", without the
942 * " Standard Time" suffix (if any). Special case "Mexico Standard Time 2"
944 * @param winType Windows flavor (WIN_9X_ME_TYPE, etc.)
945 * @return ERROR_SUCCESS upon success
947 static LONG
openTZRegKey(HKEY
*hkey
, const char* winid
, int winType
) {
953 uprv_strcpy(subKeyName
, TZ_REGKEY
[(winType
== WIN_9X_ME_TYPE
) ? 0 : 1]);
954 name
= &subKeyName
[strlen(subKeyName
)];
955 uprv_strcat(subKeyName
, winid
);
956 if (winType
!= WIN_9X_ME_TYPE
) {
957 /* Don't modify "Mexico Standard Time 2", which does not occur
958 on WIN_9X_ME_TYPE. Also, if the type is WIN_NT_TYPE, then
959 in practice this means the GMT key is not followed by
960 " Standard Time", so don't append in that case. */
961 int isMexico2
= (winid
[uprv_strlen(winid
)- 1] == '2');
963 !(winType
== WIN_NT_TYPE
&& uprv_strcmp(winid
, "GMT") == 0)) {
964 uprv_strcat(subKeyName
, STANDARD_TIME_REGKEY
);
967 result
= RegOpenKeyEx(HKEY_LOCAL_MACHINE
,
973 if (result
!= ERROR_SUCCESS
) {
974 /* If the primary lookup fails, try to remap the Windows zone
975 ID, according to the remapping table. */
976 for (i
=0; ZONE_REMAP
[i
].winid
; ++i
) {
977 if (uprv_strcmp(winid
, ZONE_REMAP
[i
].winid
) == 0) {
978 uprv_strcpy(name
, ZONE_REMAP
[i
].altwinid
+ 1);
979 if (*(ZONE_REMAP
[i
].altwinid
) == '+' &&
980 winType
!= WIN_9X_ME_TYPE
) {
981 uprv_strcat(subKeyName
, STANDARD_TIME_REGKEY
);
983 result
= RegOpenKeyEx(HKEY_LOCAL_MACHINE
,
997 * Main Windows time zone detection function. Returns the Windows
998 * time zone, translated to an ICU time zone, or NULL upon failure.
1000 static const char* detectWindowsTimeZone() {
1006 DWORD cbData
= sizeof(TZI
);
1007 TIME_ZONE_INFORMATION apiTZI
;
1010 char stdRegName
[64];
1011 DWORD stdRegNameSize
;
1012 int firstMatch
, lastMatch
;
1015 /* Detect the version of windows by trying to open a sequence of
1016 probe keys. We don't use the OS version API because what we
1017 really want to know is how the registry is laid out.
1018 Specifically, is it 9x/Me or not, and is it "GMT" or "GMT
1020 for (winType
=0; winType
<2; ++winType
) {
1021 result
= RegOpenKeyEx(HKEY_LOCAL_MACHINE
,
1022 WIN_TYPE_PROBE_REGKEY
[winType
],
1027 if (result
== ERROR_SUCCESS
) {
1032 /* Obtain TIME_ZONE_INFORMATION from the API, and then convert it
1033 to TZI. We could also interrogate the registry directly; we do
1034 this below if needed. */
1035 uprv_memset(&apiTZI
, 0, sizeof(apiTZI
));
1036 GetTimeZoneInformation(&apiTZI
);
1037 tziKey
.Bias
= apiTZI
.Bias
;
1038 uprv_memcpy((char *)&tziKey
.StandardDate
, (char*)&apiTZI
.StandardDate
,
1039 sizeof(apiTZI
.StandardDate
));
1040 uprv_memcpy((char *)&tziKey
.DaylightDate
, (char*)&apiTZI
.DaylightDate
,
1041 sizeof(apiTZI
.DaylightDate
));
1043 /* For each zone that can be identified by Offset+Rules, see if we
1044 have a match. Continue scanning after finding a match,
1045 recording the index of the first and the last match. We have
1046 to do this because some zones are not unique under
1048 firstMatch
= lastMatch
= -1;
1049 for (j
=0; ZONE_MAP
[j
].icuid
; j
++) {
1050 result
= openTZRegKey(&hkey
, ZONE_MAP
[j
].winid
, winType
);
1051 if (result
== ERROR_SUCCESS
) {
1052 result
= RegQueryValueEx(hkey
,
1060 if (result
== ERROR_SUCCESS
) {
1061 /* Assume that offsets are grouped together, and bail out
1062 when we've scanned everything with a matching
1064 if (firstMatch
>= 0 && tziKey
.Bias
!= tziReg
.Bias
) {
1067 /* Windows alters the DaylightBias in some situations.
1068 Using the bias and the rules suffices, so overwrite
1069 these unreliable fields. */
1070 tziKey
.StandardBias
= tziReg
.StandardBias
;
1071 tziKey
.DaylightBias
= tziReg
.DaylightBias
;
1072 if (uprv_memcmp((char *)&tziKey
, (char*)&tziReg
,
1073 sizeof(tziKey
)) == 0) {
1074 if (firstMatch
< 0) {
1082 /* This should never happen; if it does it means our table doesn't
1083 match Windows AT ALL, perhaps because this is post-XP? */
1084 if (firstMatch
< 0) {
1088 if (firstMatch
!= lastMatch
) {
1089 /* Offset+Rules lookup yielded >= 2 matches. Try to match the
1090 localized display name. Get the name from the registry
1091 (not the API). This avoids conversion issues. Use the
1092 standard name, since Windows modifies the daylight name to
1093 match the standard name if there is no DST. */
1094 result
= RegOpenKeyEx(HKEY_LOCAL_MACHINE
,
1095 CURRENT_ZONE_REGKEY
,
1099 if (result
== ERROR_SUCCESS
) {
1100 stdNameSize
= sizeof(stdName
);
1101 result
= RegQueryValueEx(hkey
,
1102 (LPTSTR
)STANDARD_NAME_REGKEY
,
1109 /* Scan through the Windows time zone data in the registry
1110 again (just the range of zones with matching TZIs) and
1111 look for a standard display name match. */
1112 for (j
=firstMatch
; j
<=lastMatch
; j
++) {
1113 result
= openTZRegKey(&hkey
, ZONE_MAP
[j
].winid
, winType
);
1114 if (result
== ERROR_SUCCESS
) {
1115 stdRegNameSize
= sizeof(stdRegName
);
1116 result
= RegQueryValueEx(hkey
,
1124 if (result
== ERROR_SUCCESS
&&
1125 stdRegNameSize
== stdNameSize
&&
1126 uprv_memcmp(stdName
, stdRegName
, stdNameSize
) == 0) {
1127 firstMatch
= j
; /* record the match */
1132 RegCloseKey(hkey
); /* should never get here */
1136 return ZONE_MAP
[firstMatch
].icuid
;
1141 /* Generic time zone layer -------------------------------------------------- */
1143 /* Time zone utilities */
1144 U_CAPI
void U_EXPORT2
1150 /* no initialization*/
1154 U_CAPI
int32_t U_EXPORT2
1166 uprv_memcpy( &tmrec
, localtime(&t
), sizeof(tmrec
) );
1167 dst_checked
= (tmrec
.tm_isdst
!= 0); /* daylight savings time is checked*/
1168 t1
= mktime(&tmrec
); /* local time in seconds*/
1169 uprv_memcpy( &tmrec
, gmtime(&t
), sizeof(tmrec
) );
1170 t2
= mktime(&tmrec
); /* GMT (or UTC) in seconds*/
1172 /* imitate NT behaviour, which returns same timezone offset to GMT for
1180 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
1181 some platforms need to have it declared here. */
1183 #if defined(U_TZNAME) && (defined(U_IRIX) || defined(U_DARWIN) || defined(U_CYGWIN))
1184 /* RS6000 and others reject char **tzname. */
1185 extern U_IMPORT
char *U_TZNAME
[];
1188 #if defined(U_DARWIN) /* For Mac OS X */
1189 #define TZZONELINK "/etc/localtime"
1190 #define TZZONEINFO "/usr/share/zoneinfo/"
1191 static char *gTimeZoneBuffer
= NULL
; /* Heap allocated */
1194 U_CAPI
const char* U_EXPORT2
1198 char* id
= (char*) detectWindowsTimeZone();
1204 #if defined(U_DARWIN)
1209 tzenv
= getenv("TZFILE");
1210 if (tzenv
!= NULL
) {
1215 /* TZ is often set to "PST8PDT" or similar, so we cannot use it. Alan */
1216 tzenv
= getenv("TZ");
1217 if (tzenv
!= NULL
) {
1222 /* Caller must handle threading issues */
1223 if (gTimeZoneBuffer
== NULL
) {
1224 gTimeZoneBuffer
= (char *) uprv_malloc(MAXPATHLEN
+ 2);
1226 ret
= readlink(TZZONELINK
, gTimeZoneBuffer
, MAXPATHLEN
+ 2);
1228 gTimeZoneBuffer
[ret
] = '\0';
1229 if (uprv_strncmp(gTimeZoneBuffer
, TZZONEINFO
, sizeof(TZZONEINFO
) - 1) == 0) {
1230 return (gTimeZoneBuffer
+= sizeof(TZZONEINFO
) - 1);
1234 uprv_free(gTimeZoneBuffer
);
1235 gTimeZoneBuffer
= NULL
;
1246 /* Get and set the ICU data directory --------------------------------------- */
1248 static char *gDataDirectory
= NULL
;
1250 static char *gCorrectedPOSIXLocale
= NULL
; /* Heap allocated */
1253 static UBool U_CALLCONV
putil_cleanup(void)
1255 if (gDataDirectory
) {
1256 uprv_free(gDataDirectory
);
1257 gDataDirectory
= NULL
;
1260 if (gCorrectedPOSIXLocale
) {
1261 uprv_free(gCorrectedPOSIXLocale
);
1262 gCorrectedPOSIXLocale
= NULL
;
1269 * Set the data directory.
1270 * Make a copy of the passed string, and set the global data dir to point to it.
1271 * TODO: see bug #2849, regarding thread safety.
1273 U_CAPI
void U_EXPORT2
1274 u_setDataDirectory(const char *directory
) {
1276 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1281 if(directory
==NULL
) {
1284 length
=(int32_t)uprv_strlen(directory
);
1285 newDataDir
= (char *)uprv_malloc(length
+ 2);
1286 uprv_strcpy(newDataDir
, directory
);
1288 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1289 while(p
= uprv_strchr(newDataDir
, U_FILE_ALT_SEP_CHAR
)) {
1290 *p
= U_FILE_SEP_CHAR
;
1295 if (gDataDirectory
) {
1296 uprv_free(gDataDirectory
);
1298 gDataDirectory
= newDataDir
;
1299 ucln_common_registerCleanup(UCLN_COMMON_PUTIL
, putil_cleanup
);
1303 U_CAPI UBool U_EXPORT2
1304 uprv_pathIsAbsolute(const char *path
)
1306 if(!path
|| !*path
) {
1310 if(*path
== U_FILE_SEP_CHAR
) {
1314 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1315 if(*path
== U_FILE_ALT_SEP_CHAR
) {
1321 if( (((path
[0] >= 'A') && (path
[0] <= 'Z')) ||
1322 ((path
[0] >= 'a') && (path
[0] <= 'z'))) &&
1331 U_CAPI
const char * U_EXPORT2
1332 u_getDataDirectory(void) {
1333 const char *path
= NULL
;
1334 char pathBuffer
[1024];
1335 const char *dataDir
;
1337 /* if we have the directory, then return it immediately */
1339 dataDir
= gDataDirectory
;
1346 /* we need to look for it */
1347 pathBuffer
[0] = 0; /* Shuts up compiler warnings about unreferenced */
1348 /* variables when the code using it is ifdefed out */
1349 # if !defined(XP_MAC)
1350 /* first try to get the environment variable */
1351 path
=getenv("ICU_DATA");
1365 myErr
= HGetVol(xpath
, &volNum
, &dir
);
1367 if(myErr
== noErr
) {
1368 myErr
= FindFolder(volNum
, kApplicationSupportFolderType
, TRUE
, &vRef
, &dir
);
1370 if (myErr
== noErr
) {
1371 myErr
= DirCreate(volNum
,
1375 if( (myErr
== noErr
) || (myErr
== dupFNErr
) ) {
1376 spec
.vRefNum
= volNum
;
1378 uprv_memcpy(spec
.name
, "\pICU", 4);
1380 myErr
= FSpGetFullPath(&spec
, &len
, &full
);
1384 uprv_memcpy(pathBuffer
, ((char*)(*full
)), len
);
1385 pathBuffer
[len
] = 0;
1387 DisposeHandle(full
);
1396 # if defined WIN32 && defined ICU_ENABLE_DEPRECATED_WIN_REGISTRY
1397 /* next, try to read the path from the registry */
1398 if(path
==NULL
|| *path
==0) {
1401 if(ERROR_SUCCESS
==RegOpenKeyEx(HKEY_LOCAL_MACHINE
, "SOFTWARE\\ICU\\Unicode\\Data", 0, KEY_QUERY_VALUE
, &key
)) {
1402 DWORD type
=REG_EXPAND_SZ
, size
=sizeof(pathBuffer
);
1404 if(ERROR_SUCCESS
==RegQueryValueEx(key
, "Path", NULL
, &type
, (unsigned char *)pathBuffer
, &size
) && size
>1) {
1405 if(type
==REG_EXPAND_SZ
) {
1406 /* replace environment variable references by their values */
1407 char temporaryPath
[1024];
1409 /* copy the path with variables to the temporary one */
1410 uprv_memcpy(temporaryPath
, pathBuffer
, size
);
1412 /* do the replacement and store it in the pathBuffer */
1413 size
=ExpandEnvironmentStrings(temporaryPath
, pathBuffer
, sizeof(pathBuffer
));
1414 if(size
>0 && size
<sizeof(pathBuffer
)) {
1417 } else if(type
==REG_SZ
) {
1426 /* ICU_DATA_DIR may be set as a compile option */
1427 # ifdef ICU_DATA_DIR
1428 if(path
==NULL
|| *path
==0) {
1434 /* It looks really bad, set it to something. */
1438 u_setDataDirectory(path
);
1439 return gDataDirectory
;
1446 /* Macintosh-specific locale information ------------------------------------ */
1453 int32_t date_region
;
1454 const char* posixID
;
1457 /* Todo: This will be updated with a newer version from www.unicode.org web
1458 page when it's available.*/
1459 #define MAC_LC_MAGIC_NUMBER -5
1460 #define MAC_LC_INIT_NUMBER -9
1462 static const mac_lc_rec mac_lc_recs
[] = {
1463 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 0, "en_US",
1465 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 1, "fr_FR",
1467 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 2, "en_GB",
1469 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 3, "de_DE",
1471 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 4, "it_IT",
1473 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 5, "nl_NL",
1475 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 6, "fr_BE",
1476 /* French for Belgium or Lxembourg*/
1477 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 7, "sv_SE",
1479 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 9, "da_DK",
1481 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 10, "pt_PT",
1483 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 11, "fr_CA",
1485 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 13, "is_IS",
1487 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 14, "ja_JP",
1489 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 15, "en_AU",
1491 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 16, "ar_AE",
1492 /* the Arabic world (?)*/
1493 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 17, "fi_FI",
1495 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 18, "fr_CH",
1496 /* French for Switzerland*/
1497 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 19, "de_CH",
1498 /* German for Switzerland*/
1499 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 20, "el_GR",
1501 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 21, "is_IS",
1503 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
1505 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
1507 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 24, "tr_TR",
1509 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 25, "sh_YU",
1510 /* Croatian system for Yugoslavia*/
1511 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
1512 /* Hindi system for India*/
1513 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
1515 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 41, "lt_LT",
1517 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 42, "pl_PL",
1519 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 43, "hu_HU",
1521 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 44, "et_EE",
1523 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 45, "lv_LV",
1525 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
1526 /* Lapland [Ask Rich for the data. HS]*/
1527 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
1529 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 48, "fa_IR",
1531 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 49, "ru_RU",
1533 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 50, "en_IE",
1535 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 51, "ko_KR",
1537 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 52, "zh_CN",
1538 /* People's Republic of China*/
1539 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 53, "zh_TW",
1541 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 54, "th_TH",
1544 /* fallback is en_US*/
1545 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
,
1546 MAC_LC_MAGIC_NUMBER
, "en_US"
1552 /* Return just the POSIX id, whatever happens to be in it */
1553 static const char *uprv_getPOSIXID(void)
1555 static const char* posixID
= NULL
;
1557 posixID
= getenv("LC_ALL");
1559 posixID
= getenv("LANG");
1562 * On Solaris two different calls to setlocale can result in
1563 * different values. Only get this value once.
1565 posixID
= setlocale(LC_ALL
, NULL
);
1572 /* Nothing worked. Give it a nice value. */
1575 else if ((uprv_strcmp("C", posixID
) == 0)
1576 || (uprv_strchr(posixID
, ' ') != NULL
)
1577 || (uprv_strchr(posixID
, '/') != NULL
))
1578 { /* HPUX returns 'C C C C C C C' */
1579 /* Solaris can return /en_US/C/C/C/C/C on the second try. */
1580 /* Maybe we got some garbage. Give it a nice value. */
1581 posixID
= "en_US_POSIX";
1587 /* NOTE: The caller should handle thread safety */
1588 U_CAPI
const char* U_EXPORT2
1589 uprv_getDefaultLocaleID()
1593 Note that: (a '!' means the ID is improper somehow)
1594 LC_ALL ----> default_loc codepage
1595 --------------------------------------------------------
1600 ab_CD.EF@GH ab_CD_GH EF
1602 Some 'improper' ways to do the same as above:
1603 ! ab_CD@GH.EF ab_CD_GH EF
1604 ! ab_CD.EF@GH.IJ ab_CD_GH EF
1605 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
1610 The variant cannot have dots in it.
1611 The 'rightmost' variant (@xxx) wins.
1612 The leftmost codepage (.xxx) wins.
1614 char *correctedPOSIXLocale
= 0;
1615 const char* posixID
= uprv_getPOSIXID();
1620 /* Format: (no spaces)
1621 ll [ _CC ] [ . MM ] [ @ VV]
1623 l = lang, C = ctry, M = charmap, V = variant
1626 if (gCorrectedPOSIXLocale
!= NULL
) {
1627 return gCorrectedPOSIXLocale
;
1630 if ((p
= uprv_strchr(posixID
, '.')) != NULL
) {
1631 /* assume new locale can't be larger than old one? */
1632 correctedPOSIXLocale
= uprv_malloc(uprv_strlen(posixID
));
1633 uprv_strncpy(correctedPOSIXLocale
, posixID
, p
-posixID
);
1634 correctedPOSIXLocale
[p
-posixID
] = 0;
1636 /* do not copy after the @ */
1637 if ((p
= uprv_strchr(correctedPOSIXLocale
, '@')) != NULL
) {
1638 correctedPOSIXLocale
[p
-correctedPOSIXLocale
] = 0;
1642 /* Note that we scan the *uncorrected* ID. */
1643 if ((p
= uprv_strrchr(posixID
, '@')) != NULL
) {
1644 if (correctedPOSIXLocale
== NULL
) {
1645 correctedPOSIXLocale
= uprv_malloc(uprv_strlen(posixID
));
1646 uprv_strncpy(correctedPOSIXLocale
, posixID
, p
-posixID
);
1647 correctedPOSIXLocale
[p
-posixID
] = 0;
1651 /* Take care of any special cases here.. */
1652 if (!uprv_strcmp(p
, "nynorsk")) {
1655 /* Should we assume no_NO_NY instead of possible no__NY?
1656 * if (!uprv_strcmp(correctedPOSIXLocale, "no")) {
1657 * uprv_strcpy(correctedPOSIXLocale, "no_NO");
1662 if (uprv_strchr(correctedPOSIXLocale
,'_') == NULL
) {
1663 uprv_strcat(correctedPOSIXLocale
, "__"); /* aa@b -> aa__b */
1666 uprv_strcat(correctedPOSIXLocale
, "_"); /* aa_CC@b -> aa_CC_b */
1669 if ((q
= uprv_strchr(p
, '.')) != NULL
) {
1670 /* How big will the resulting string be? */
1671 len
= (int32_t)(uprv_strlen(correctedPOSIXLocale
) + (q
-p
));
1672 uprv_strncat(correctedPOSIXLocale
, p
, q
-p
);
1673 correctedPOSIXLocale
[len
] = 0;
1676 /* Anything following the @ sign */
1677 uprv_strcat(correctedPOSIXLocale
, p
);
1680 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1681 * How about 'russian' -> 'ru'?
1685 /* Was a correction made? */
1686 if (correctedPOSIXLocale
!= NULL
) {
1687 posixID
= correctedPOSIXLocale
;
1690 /* copy it, just in case the original pointer goes away. See j2395 */
1691 correctedPOSIXLocale
= (char *)uprv_malloc(uprv_strlen(posixID
) + 1);
1692 posixID
= uprv_strcpy(correctedPOSIXLocale
, posixID
);
1695 if (gCorrectedPOSIXLocale
== NULL
) {
1696 gCorrectedPOSIXLocale
= correctedPOSIXLocale
;
1697 ucln_common_registerCleanup(UCLN_COMMON_PUTIL
, putil_cleanup
);
1698 correctedPOSIXLocale
= NULL
;
1701 if (correctedPOSIXLocale
!= NULL
) { /* Was already set - clean up. */
1702 uprv_free(correctedPOSIXLocale
);
1707 #elif defined(WIN32)
1708 UErrorCode status
= U_ZERO_ERROR
;
1709 LCID id
= GetThreadLocale();
1710 const char* locID
= uprv_convertToPosix(id
, &status
);
1712 if (U_FAILURE(status
)) {
1717 #elif defined(XP_MAC)
1718 int32_t script
= MAC_LC_INIT_NUMBER
;
1719 /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
1720 int32_t region
= MAC_LC_INIT_NUMBER
;
1721 /* = GetScriptManagerVariable(smRegionCode);*/
1722 int32_t lang
= MAC_LC_INIT_NUMBER
;
1723 /* = GetScriptManagerVariable(smScriptLang);*/
1724 int32_t date_region
= MAC_LC_INIT_NUMBER
;
1725 const char* posixID
= 0;
1726 int32_t count
= sizeof(mac_lc_recs
) / sizeof(mac_lc_rec
);
1730 ih
= (Intl1Hndl
) GetIntlResource(1);
1732 date_region
= ((uint16_t)(*ih
)->intl1Vers
) >> 8;
1734 for (i
= 0; i
< count
; i
++) {
1735 if ( ((mac_lc_recs
[i
].script
== MAC_LC_MAGIC_NUMBER
)
1736 || (mac_lc_recs
[i
].script
== script
))
1737 && ((mac_lc_recs
[i
].region
== MAC_LC_MAGIC_NUMBER
)
1738 || (mac_lc_recs
[i
].region
== region
))
1739 && ((mac_lc_recs
[i
].lang
== MAC_LC_MAGIC_NUMBER
)
1740 || (mac_lc_recs
[i
].lang
== lang
))
1741 && ((mac_lc_recs
[i
].date_region
== MAC_LC_MAGIC_NUMBER
)
1742 || (mac_lc_recs
[i
].date_region
== date_region
))
1745 posixID
= mac_lc_recs
[i
].posixID
;
1755 locID
= getenv("LC_ALL");
1756 if (!locID
|| !*locID
)
1757 locID
= getenv("LANG");
1758 if (!locID
|| !*locID
) {
1761 if (!stricmp(locID
, "c") || !stricmp(locID
, "posix") ||
1762 !stricmp(locID
, "univ"))
1763 locID
= "en_US_POSIX";
1766 #elif defined(OS400)
1767 /* locales are process scoped and are by definition thread safe */
1768 static char correctedLocale
[64];
1769 const char *localeID
= getenv("LC_ALL");
1772 if (localeID
== NULL
)
1773 localeID
= getenv("LANG");
1774 if (localeID
== NULL
)
1775 localeID
= setlocale(LC_ALL
, NULL
);
1776 /* Make sure we have something... */
1777 if (localeID
== NULL
)
1778 return "en_US_POSIX";
1780 /* Extract the locale name from the path. */
1781 if((p
= uprv_strrchr(localeID
, '/')) != NULL
)
1783 /* Increment p to start of locale name. */
1788 /* Copy to work location. */
1789 uprv_strcpy(correctedLocale
, localeID
);
1791 /* Strip off the '.locale' extension. */
1792 if((p
= uprv_strchr(correctedLocale
, '.')) != NULL
) {
1796 /* Upper case the locale name. */
1797 T_CString_toUpperCase(correctedLocale
);
1799 /* See if we are using the POSIX locale. Any of the
1800 * following are equivalent and use the same QLGPGCMA
1803 if ((uprv_strcmp("C", correctedLocale
) == 0) ||
1804 (uprv_strcmp("POSIX", correctedLocale
) == 0) ||
1805 (uprv_strcmp("QLGPGCMA", correctedLocale
) == 0))
1807 uprv_strcpy(correctedLocale
, "en_US_POSIX");
1813 /* Lower case the lang portion. */
1814 for(p
= correctedLocale
; *p
!= 0 && *p
!= '_'; p
++)
1816 *p
= uprv_tolower(*p
);
1819 /* Adjust for Euro. After '_E' add 'URO'. */
1820 LocaleLen
= uprv_strlen(correctedLocale
);
1821 if (correctedLocale
[LocaleLen
- 2] == '_' &&
1822 correctedLocale
[LocaleLen
- 1] == 'E')
1824 uprv_strcat(correctedLocale
, "URO");
1827 /* If using Lotus-based locale then convert to
1828 * equivalent non Lotus.
1830 else if (correctedLocale
[LocaleLen
- 2] == '_' &&
1831 correctedLocale
[LocaleLen
- 1] == 'L')
1833 correctedLocale
[LocaleLen
- 2] = 0;
1836 /* There are separate simplified and traditional
1837 * locales called zh_HK_S and zh_HK_T.
1839 else if (uprv_strncmp(correctedLocale
, "zh_HK", 5) == 0)
1841 uprv_strcpy(correctedLocale
, "zh_HK");
1844 /* A special zh_CN_GBK locale...
1846 else if (uprv_strcmp(correctedLocale
, "zh_CN_GBK") == 0)
1848 uprv_strcpy(correctedLocale
, "zh_CN");
1853 return correctedLocale
;
1860 int_getDefaultCodepage()
1863 uint32_t ccsid
= 37; /* Default to ibm-37 */
1864 static char codepage
[64];
1865 Qwc_JOBI0400_t jobinfo
;
1866 Qus_EC_t error
= { sizeof(Qus_EC_t
) }; /* SPI error code */
1868 EPT_CALL(QUSRJOBI
)(&jobinfo
, sizeof(jobinfo
), "JOBI0400",
1871 if (error
.Bytes_Available
== 0) {
1872 if (jobinfo
.Coded_Char_Set_ID
!= 0xFFFF) {
1873 ccsid
= (uint32_t)jobinfo
.Coded_Char_Set_ID
;
1875 else if (jobinfo
.Default_Coded_Char_Set_Id
!= 0xFFFF) {
1876 ccsid
= (uint32_t)jobinfo
.Default_Coded_Char_Set_Id
;
1878 /* else use the default */
1880 sprintf(codepage
,"ibm-%d", ccsid
);
1883 #elif defined(OS390)
1884 static char codepage
[64];
1885 sprintf(codepage
,"%s" UCNV_SWAP_LFNL_OPTION_STRING
, nl_langinfo(CODESET
));
1888 #elif defined(XP_MAC)
1889 return "ibm-1275"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
1891 #elif defined(WIN32)
1892 static char codepage
[64];
1893 sprintf(codepage
, "windows-%d", GetACP());
1896 #elif U_POSIX_LOCALE
1897 static char codesetName
[100];
1900 const char *localeName
= NULL
;
1902 uprv_memset(codesetName
, 0, sizeof(codesetName
));
1904 /* Check setlocale before the environment variables
1905 because the application may have set it first */
1906 /* setlocale needs "" and not NULL for Linux and Solaris */
1907 localeName
= setlocale(LC_CTYPE
, "");
1908 if (localeName
!= NULL
&& (name
= (uprv_strchr(localeName
, '.'))) != NULL
) {
1909 /* strip the locale name and look at the suffix only */
1910 name
= uprv_strncpy(codesetName
, name
+1, sizeof(codesetName
));
1911 codesetName
[sizeof(codesetName
)-1] = 0;
1912 if ((euro
= (uprv_strchr(name
, '@'))) != NULL
) {
1915 /* if we can find the codset name from setlocale, return that. */
1921 #if U_HAVE_NL_LANGINFO_CODESET
1923 uprv_memset(codesetName
, 0, sizeof(codesetName
));
1925 /* When available, check nl_langinfo first because it usually gives more
1926 useful names. It depends on LC_CTYPE and not LANG or LC_ALL */
1928 const char *codeset
= nl_langinfo(U_NL_LANGINFO_CODESET
);
1929 if (codeset
!= NULL
) {
1930 uprv_strncpy(codesetName
, codeset
, sizeof(codesetName
));
1931 codesetName
[sizeof(codesetName
)-1] = 0;
1937 /* Try a locale specified by the user.
1938 This is usually underspecified and usually checked by setlocale already. */
1940 uprv_memset(codesetName
, 0, sizeof(codesetName
));
1942 localeName
= uprv_getPOSIXID();
1943 if (localeName
!= NULL
&& (name
= (uprv_strchr(localeName
, '.'))) != NULL
) {
1944 /* strip the locale name and look at the suffix only */
1945 name
= uprv_strncpy(codesetName
, name
+1, sizeof(codesetName
));
1946 codesetName
[sizeof(codesetName
)-1] = 0;
1947 if ((euro
= (uprv_strchr(name
, '@'))) != NULL
) {
1950 /* if we can find the codset name, return that. */
1956 if (*codesetName
== 0)
1958 /* if the table lookup failed, return US ASCII (ISO 646). */
1959 uprv_strcpy(codesetName
, "US-ASCII");
1968 U_CAPI
const char* U_EXPORT2
1969 uprv_getDefaultCodepage()
1971 static char const *name
= NULL
;
1974 name
= int_getDefaultCodepage();
1981 /* end of platform-specific implementation -------------- */
1983 /* version handling --------------------------------------------------------- */
1985 U_CAPI
void U_EXPORT2
1986 u_versionFromString(UVersionInfo versionArray
, const char *versionString
) {
1990 if(versionArray
==NULL
) {
1994 if(versionString
!=NULL
) {
1996 versionArray
[part
]=(uint8_t)uprv_strtoul(versionString
, &end
, 10);
1997 if(end
==versionString
|| ++part
==U_MAX_VERSION_LENGTH
|| *end
!=U_VERSION_DELIMITER
) {
2000 versionString
=end
+1;
2004 while(part
<U_MAX_VERSION_LENGTH
) {
2005 versionArray
[part
++]=0;
2009 U_CAPI
void U_EXPORT2
2010 u_versionToString(UVersionInfo versionArray
, char *versionString
) {
2011 uint16_t count
, part
;
2014 if(versionString
==NULL
) {
2018 if(versionArray
==NULL
) {
2023 /* count how many fields need to be written */
2024 for(count
=4; count
>0 && versionArray
[count
-1]==0; --count
) {
2031 /* write the first part */
2032 /* write the decimal field value */
2033 field
=versionArray
[0];
2035 *versionString
++=(char)('0'+field
/100);
2039 *versionString
++=(char)('0'+field
/10);
2042 *versionString
++=(char)('0'+field
);
2044 /* write the following parts */
2045 for(part
=1; part
<count
; ++part
) {
2046 /* write a dot first */
2047 *versionString
++=U_VERSION_DELIMITER
;
2049 /* write the decimal field value */
2050 field
=versionArray
[part
];
2052 *versionString
++=(char)('0'+field
/100);
2056 *versionString
++=(char)('0'+field
/10);
2059 *versionString
++=(char)('0'+field
);
2066 U_CAPI
void U_EXPORT2
2067 u_getVersion(UVersionInfo versionArray
) {
2068 u_versionFromString(versionArray
, U_ICU_VERSION
);
2072 * Hey, Emacs, please set the following:
2075 * indent-tabs-mode: nil