2 ******************************************************************************
4 * Copyright (C) 1997-2008, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
9 * FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
11 * Date Name Description
12 * 04/14/97 aliu Creation.
13 * 04/24/97 aliu Added getDefaultDataDirectory() and
14 * getDefaultLocaleID().
15 * 04/28/97 aliu Rewritten to assume Unix and apply general methods
16 * for assumed case. Non-UNIX platforms must be
17 * special-cased. Rewrote numeric methods dealing
18 * with NaN and Infinity to be platform independent
19 * over all IEEE 754 platforms.
20 * 05/13/97 aliu Restored sign of timezone
21 * (semantics are hours West of GMT)
22 * 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
24 * 07/22/98 stephen Added remainder, max, min, trunc
25 * 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
26 * 08/24/98 stephen Added longBitsFromDouble
27 * 09/08/98 stephen Minor changes for Mac Port
28 * 03/02/99 stephen Removed openFile(). Added AS400 support.
30 * 04/15/99 stephen Converted to C.
31 * 06/28/99 stephen Removed mutex locking in u_isBigEndian().
32 * 08/04/99 jeffrey R. Added OS/2 changes
33 * 11/15/99 helena Integrated S/390 IEEE support.
34 * 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
35 * 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
36 * 01/03/08 Steven L. Fake Time Support
37 ******************************************************************************
40 /* Define _XOPEN_SOURCE for Solaris and friends. */
41 /* NetBSD needs it to be >= 4 */
42 #if !defined(_XOPEN_SOURCE)
43 #if __STDC_VERSION__ >= 199901L
44 /* It is invalid to compile an XPG3, XPG4, XPG4v2 or XPG5 application using c99 on Solaris */
45 #define _XOPEN_SOURCE 600
47 #define _XOPEN_SOURCE 4
51 /* Make sure things like readlink and such functions work.
52 Poorly upgraded Solaris machines can't have this defined.
53 Cleanly installed Solaris can use this #define.
55 #if !defined(_XOPEN_SOURCE_EXTENDED) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ >= 199901L)
56 #define _XOPEN_SOURCE_EXTENDED 1
59 /* include ICU headers */
60 #include "unicode/utypes.h"
61 #include "unicode/putil.h"
62 #include "unicode/ustring.h"
71 /* Include standard headers. */
80 /* include system headers */
82 # define WIN32_LEAN_AND_MEAN
90 #elif defined(U_CYGWIN) && defined(__STRICT_ANSI__)
91 /* tzset isn't defined in strict ANSI on Cygwin. */
92 # undef __STRICT_ANSI__
95 # include <qusec.h> /* error code structure */
96 # include <qusrjobi.h>
97 # include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
98 # include <mih/testptr.h> /* For uprv_maximumPtr */
101 # include <IntlResources.h>
103 # include <Folders.h>
104 # include <MacTypes.h>
105 # include <TextUtils.h>
106 # define ICU_NO_USER_DATA_OVERRIDE 1
108 #include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
109 #elif defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD)
113 #include <sys/neutrino.h>
116 #if defined(U_DARWIN)
117 #include <TargetConditionals.h>
121 #include <sys/time.h>
125 * Only include langinfo.h if we have a way to get the codeset. If we later
126 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
130 #if U_HAVE_NL_LANGINFO_CODESET
131 #include <langinfo.h>
134 /* Define the extension for data files, again... */
135 #define DATA_TYPE "dat"
137 /* Leave this copyright notice here! */
138 static const char copyright
[] = U_COPYRIGHT_STRING
;
140 /* floating point implementations ------------------------------------------- */
142 /* We return QNAN rather than SNAN*/
143 #define SIGN 0x80000000U
145 /* Make it easy to define certain types of constants */
147 int64_t i64
; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
149 } BitPatternConversion
;
150 static const BitPatternConversion gNan
= { (int64_t) INT64_C(0x7FF8000000000000) };
151 static const BitPatternConversion gInf
= { (int64_t) INT64_C(0x7FF0000000000000) };
153 /*---------------------------------------------------------------------------
155 Our general strategy is to assume we're on a POSIX platform. Platforms which
156 are non-POSIX must declare themselves so. The default POSIX implementation
157 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
159 ---------------------------------------------------------------------------*/
161 #if defined(U_WINDOWS) || defined(XP_MAC) || defined(OS400)
162 # undef U_POSIX_LOCALE
164 # define U_POSIX_LOCALE 1
168 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
169 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
173 u_topNBytesOfDouble(double* d
, int n
)
178 return (char*)(d
+ 1) - n
;
184 u_bottomNBytesOfDouble(double* d
, int n
)
187 return (char*)(d
+ 1) - n
;
193 #if defined (U_DEBUG_FAKETIME)
194 /* Override the clock to test things without having to move the system clock.
195 * Assumes POSIX gettimeofday() will function
197 UDate fakeClock_t0
= 0; /** Time to start the clock from **/
198 UDate fakeClock_dt
= 0; /** Offset (fake time - real time) **/
199 UBool fakeClock_set
= FALSE
; /** True if fake clock has spun up **/
200 static UMTX fakeClockMutex
= NULL
;
202 static UDate
getUTCtime_real() {
203 struct timeval posixTime
;
204 gettimeofday(&posixTime
, NULL
);
205 return (UDate
)(((int64_t)posixTime
.tv_sec
* U_MILLIS_PER_SECOND
) + (posixTime
.tv_usec
/1000));
208 static UDate
getUTCtime_fake() {
209 umtx_lock(&fakeClockMutex
);
211 UDate real
= getUTCtime_real();
212 const char *fake_start
= getenv("U_FAKETIME_START");
213 if(fake_start
!=NULL
) {
214 sscanf(fake_start
,"%lf",&fakeClock_t0
);
216 fakeClock_dt
= fakeClock_t0
- real
;
217 fprintf(stderr
,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
218 "U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
219 fakeClock_t0
, fake_start
, fakeClock_dt
, real
);
220 fakeClock_set
= TRUE
;
222 umtx_unlock(&fakeClockMutex
);
224 return getUTCtime_real() + fakeClock_dt
;
228 #if defined(U_WINDOWS)
232 } FileTimeConversion
; /* This is like a ULARGE_INTEGER */
234 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
235 #define EPOCH_BIAS INT64_C(116444736000000000)
236 #define HECTONANOSECOND_PER_MILLISECOND 10000
240 /*---------------------------------------------------------------------------
241 Universal Implementations
242 These are designed to work on all platforms. Try these, and if they
243 don't work on your platform, then special case your platform with new
245 ---------------------------------------------------------------------------*/
247 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
248 U_CAPI UDate U_EXPORT2
251 #if defined(U_DEBUG_FAKETIME)
252 return getUTCtime_fake(); /* Hook for overriding the clock */
253 #elif defined(XP_MAC)
257 uprv_memset( &tmrec
, 0, sizeof(tmrec
) );
261 t1
= mktime(&tmrec
); /* seconds of 1/1/1970*/
264 uprv_memcpy( &tmrec
, gmtime(&t
), sizeof(tmrec
) );
265 t2
= mktime(&tmrec
); /* seconds of current GMT*/
266 return (UDate
)(t2
- t1
) * U_MILLIS_PER_SECOND
; /* GMT (or UTC) in seconds since 1970*/
267 #elif defined(U_WINDOWS)
269 FileTimeConversion winTime
;
270 GetSystemTimeAsFileTime(&winTime
.fileTime
);
271 return (UDate
)((winTime
.int64
- EPOCH_BIAS
) / HECTONANOSECOND_PER_MILLISECOND
);
274 struct timeval posixTime;
275 gettimeofday(&posixTime, NULL);
276 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
280 return (UDate
)epochtime
* U_MILLIS_PER_SECOND
;
284 /*-----------------------------------------------------------------------------
286 These methods detect and return NaN and infinity values for doubles
287 conforming to IEEE 754. Platforms which support this standard include X86,
288 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
289 If this doesn't work on your platform, you have non-IEEE floating-point, and
290 will need to code your own versions. A naive implementation is to return 0.0
291 for getNaN and getInfinity, and false for isNaN and isInfinite.
292 ---------------------------------------------------------------------------*/
294 U_CAPI UBool U_EXPORT2
295 uprv_isNaN(double number
)
298 BitPatternConversion convertedNumber
;
299 convertedNumber
.d64
= number
;
300 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
301 return (UBool
)((convertedNumber
.i64
& U_INT64_MAX
) > gInf
.i64
);
304 uint32_t highBits
= *(uint32_t*)u_topNBytesOfDouble(&number
,
306 uint32_t lowBits
= *(uint32_t*)u_bottomNBytesOfDouble(&number
,
309 return ((highBits
& 0x7F080000L
) == 0x7F080000L
) &&
310 (lowBits
== 0x00000000L
);
313 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
314 /* you'll need to replace this default implementation with what's correct*/
315 /* for your platform.*/
316 return number
!= number
;
320 U_CAPI UBool U_EXPORT2
321 uprv_isInfinite(double number
)
324 BitPatternConversion convertedNumber
;
325 convertedNumber
.d64
= number
;
326 /* Infinity is exactly 0x7FF0000000000000U. */
327 return (UBool
)((convertedNumber
.i64
& U_INT64_MAX
) == gInf
.i64
);
329 uint32_t highBits
= *(uint32_t*)u_topNBytesOfDouble(&number
,
331 uint32_t lowBits
= *(uint32_t*)u_bottomNBytesOfDouble(&number
,
334 return ((highBits
& ~SIGN
) == 0x70FF0000L
) && (lowBits
== 0x00000000L
);
337 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
338 /* value, you'll need to replace this default implementation with what's*/
339 /* correct for your platform.*/
340 return number
== (2.0 * number
);
344 U_CAPI UBool U_EXPORT2
345 uprv_isPositiveInfinity(double number
)
347 #if IEEE_754 || defined(OS390)
348 return (UBool
)(number
> 0 && uprv_isInfinite(number
));
350 return uprv_isInfinite(number
);
354 U_CAPI UBool U_EXPORT2
355 uprv_isNegativeInfinity(double number
)
357 #if IEEE_754 || defined(OS390)
358 return (UBool
)(number
< 0 && uprv_isInfinite(number
));
361 uint32_t highBits
= *(uint32_t*)u_topNBytesOfDouble(&number
,
363 return((highBits
& SIGN
) && uprv_isInfinite(number
));
368 U_CAPI
double U_EXPORT2
371 #if IEEE_754 || defined(OS390)
374 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
375 /* you'll need to replace this default implementation with what's correct*/
376 /* for your platform.*/
381 U_CAPI
double U_EXPORT2
384 #if IEEE_754 || defined(OS390)
387 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
388 /* value, you'll need to replace this default implementation with what's*/
389 /* correct for your platform.*/
394 U_CAPI
double U_EXPORT2
400 U_CAPI
double U_EXPORT2
406 U_CAPI
double U_EXPORT2
409 return uprv_floor(x
+ 0.5);
412 U_CAPI
double U_EXPORT2
418 U_CAPI
double U_EXPORT2
419 uprv_modf(double x
, double* y
)
424 U_CAPI
double U_EXPORT2
425 uprv_fmod(double x
, double y
)
430 U_CAPI
double U_EXPORT2
431 uprv_pow(double x
, double y
)
433 /* This is declared as "double pow(double x, double y)" */
437 U_CAPI
double U_EXPORT2
438 uprv_pow10(int32_t x
)
440 return pow(10.0, (double)x
);
443 U_CAPI
double U_EXPORT2
444 uprv_fmax(double x
, double y
)
449 /* first handle NaN*/
450 if(uprv_isNaN(x
) || uprv_isNaN(y
))
451 return uprv_getNaN();
453 /* check for -0 and 0*/
454 lowBits
= *(uint32_t*) u_bottomNBytesOfDouble(&x
, sizeof(uint32_t));
455 if(x
== 0.0 && y
== 0.0 && (lowBits
& SIGN
))
460 /* this should work for all flt point w/o NaN and Infpecial cases */
461 return (x
> y
? x
: y
);
464 U_CAPI
double U_EXPORT2
465 uprv_fmin(double x
, double y
)
470 /* first handle NaN*/
471 if(uprv_isNaN(x
) || uprv_isNaN(y
))
472 return uprv_getNaN();
474 /* check for -0 and 0*/
475 lowBits
= *(uint32_t*) u_bottomNBytesOfDouble(&y
, sizeof(uint32_t));
476 if(x
== 0.0 && y
== 0.0 && (lowBits
& SIGN
))
481 /* this should work for all flt point w/o NaN and Inf special cases */
482 return (x
> y
? y
: x
);
486 * Truncates the given double.
487 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
488 * This is different than calling floor() or ceil():
489 * floor(3.3) = 3, floor(-3.3) = -4
490 * ceil(3.3) = 4, ceil(-3.3) = -3
492 U_CAPI
double U_EXPORT2
498 /* handle error cases*/
500 return uprv_getNaN();
501 if(uprv_isInfinite(d
))
502 return uprv_getInfinity();
504 lowBits
= *(uint32_t*) u_bottomNBytesOfDouble(&d
, sizeof(uint32_t));
505 if( (d
== 0.0 && (lowBits
& SIGN
)) || d
< 0)
511 return d
>= 0 ? floor(d
) : ceil(d
);
517 * Return the largest positive number that can be represented by an integer
518 * type of arbitrary bit length.
520 U_CAPI
double U_EXPORT2
521 uprv_maxMantissa(void)
523 return pow(2.0, DBL_MANT_DIG
+ 1.0) - 1.0;
526 U_CAPI
double U_EXPORT2
532 U_CAPI
void * U_EXPORT2
533 uprv_maximumPtr(void * base
)
537 * With the provided function we should never be out of range of a given segment
538 * (a traditional/typical segment that is). Our segments have 5 bytes for the
539 * id and 3 bytes for the offset. The key is that the casting takes care of
540 * only retrieving the offset portion minus x1000. Hence, the smallest offset
541 * seen in a program is x001000 and when casted to an int would be 0.
542 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment.
544 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
545 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information).
546 * This function determines the activation based on the pointer that is passed in and
547 * calculates the appropriate maximum available size for
548 * each pointer type (TERASPACE and non-TERASPACE)
550 * Unlike other operating systems, the pointer model isn't determined at
551 * compile time on i5/OS.
553 if ((base
!= NULL
) && (_TESTPTR(base
, _C_TERASPACE_CHECK
))) {
554 /* if it is a TERASPACE pointer the max is 2GB - 4k */
555 return ((void *)(((char *)base
)-((uint32_t)(base
))+((uint32_t)0x7fffefff)));
557 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
558 return ((void *)(((char *)base
)-((uint32_t)(base
))+((uint32_t)0xffefff)));
561 return U_MAX_PTR(base
);
565 /*---------------------------------------------------------------------------
566 Platform-specific Implementations
567 Try these, and if they don't work on your platform, then special case your
568 platform with new implementations.
569 ---------------------------------------------------------------------------*/
571 /* Generic time zone layer -------------------------------------------------- */
573 /* Time zone utilities */
574 U_CAPI
void U_EXPORT2
580 /* no initialization*/
584 U_CAPI
int32_t U_EXPORT2
596 uprv_memcpy( &tmrec
, localtime(&t
), sizeof(tmrec
) );
597 dst_checked
= (tmrec
.tm_isdst
!= 0); /* daylight savings time is checked*/
598 t1
= mktime(&tmrec
); /* local time in seconds*/
599 uprv_memcpy( &tmrec
, gmtime(&t
), sizeof(tmrec
) );
600 t2
= mktime(&tmrec
); /* GMT (or UTC) in seconds*/
602 /* imitate NT behaviour, which returns same timezone offset to GMT for
610 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
611 some platforms need to have it declared here. */
613 #if defined(U_TZNAME) && (defined(U_IRIX) || defined(U_DARWIN) || defined(U_CYGWIN))
614 /* RS6000 and others reject char **tzname. */
615 extern U_IMPORT
char *U_TZNAME
[];
618 #if !UCONFIG_NO_FILE_IO && (defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD))
619 /* These platforms are likely to use Olson timezone IDs. */
620 #define CHECK_LOCALTIME_LINK 1
621 #if defined(U_DARWIN)
623 #define TZZONEINFO (TZDIR "/")
625 #define TZDEFAULT "/etc/localtime"
626 #define TZZONEINFO "/usr/share/zoneinfo/"
628 static char gTimeZoneBuffer
[PATH_MAX
];
629 static char *gTimeZoneBufferPtr
= NULL
;
633 #define isNonDigit(ch) (ch < '0' || '9' < ch)
634 static UBool
isValidOlsonID(const char *id
) {
637 /* Determine if this is something like Iceland (Olson ID)
638 or AST4ADT (non-Olson ID) */
639 while (id
[idx
] && isNonDigit(id
[idx
]) && id
[idx
] != ',') {
643 /* If we went through the whole string, then it might be okay.
644 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
645 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
646 The rest of the time it could be an Olson ID. George */
647 return (UBool
)(id
[idx
] == 0
648 || uprv_strcmp(id
, "PST8PDT") == 0
649 || uprv_strcmp(id
, "MST7MDT") == 0
650 || uprv_strcmp(id
, "CST6CDT") == 0
651 || uprv_strcmp(id
, "EST5EDT") == 0);
655 #if defined(U_TZNAME) && !defined(U_WINDOWS)
657 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
658 typedef struct OffsetZoneMapping
{
659 int32_t offsetSeconds
;
660 int32_t daylightType
; /* 1=daylight in June, 2=daylight in December*/
667 This list tries to disambiguate a set of abbreviated timezone IDs and offsets
668 and maps it to an Olson ID.
669 Before adding anything to this list, take a look at
670 icu/source/tools/tzcode/tz.alias
671 Sometimes no daylight savings (0) is important to define due to aliases.
672 This list can be tested with icu/source/test/compat/tzone.pl
673 More values could be added to daylightType to increase precision.
675 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS
[] = {
676 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
677 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
678 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
679 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
680 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
681 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
682 {-36000, 2, "EST", "EST", "Australia/Sydney"},
683 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
684 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
685 {-34200, 2, "CST", "CST", "Australia/South"},
686 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
687 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
688 {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
689 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
690 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
691 {-28800, 2, "WST", "WST", "Australia/West"},
692 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
693 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
694 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
695 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
696 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
697 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
698 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
699 {-14400, 1, "AZT", "AZST", "Asia/Baku"},
700 {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
701 {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
702 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
703 {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
704 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
705 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
706 {-3600, 0, "CET", "WEST", "Africa/Algiers"},
707 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
708 {0, 1, "GMT", "IST", "Europe/Dublin"},
709 {0, 1, "GMT", "BST", "Europe/London"},
710 {0, 0, "WET", "WEST", "Africa/Casablanca"},
711 {0, 0, "WET", "WET", "Africa/El_Aaiun"},
712 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
713 {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
714 {10800, 1, "PMST", "PMDT", "America/Miquelon"},
715 {10800, 2, "UYT", "UYST", "America/Montevideo"},
716 {10800, 1, "WGT", "WGST", "America/Godthab"},
717 {10800, 2, "BRT", "BRST", "Brazil/East"},
718 {12600, 1, "NST", "NDT", "America/St_Johns"},
719 {14400, 1, "AST", "ADT", "Canada/Atlantic"},
720 {14400, 2, "AMT", "AMST", "America/Cuiaba"},
721 {14400, 2, "CLT", "CLST", "Chile/Continental"},
722 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
723 {14400, 2, "PYT", "PYST", "America/Asuncion"},
724 {18000, 1, "CST", "CDT", "America/Havana"},
725 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
726 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
727 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
728 {21600, 0, "CST", "CDT", "America/Guatemala"},
729 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
730 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
731 {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
732 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
733 {32400, 1, "AKST", "AKDT", "US/Alaska"},
734 {36000, 1, "HAST", "HADT", "US/Aleutian"}
737 /*#define DEBUG_TZNAME*/
739 static const char* remapShortTimeZone(const char *stdID
, const char *dstID
, int32_t daylightType
, int32_t offset
)
743 fprintf(stderr
, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID
, dstID
, daylightType
, offset
);
745 for (idx
= 0; idx
< (int32_t)sizeof(OFFSET_ZONE_MAPPINGS
)/sizeof(OFFSET_ZONE_MAPPINGS
[0]); idx
++)
747 if (offset
== OFFSET_ZONE_MAPPINGS
[idx
].offsetSeconds
748 && daylightType
== OFFSET_ZONE_MAPPINGS
[idx
].daylightType
749 && strcmp(OFFSET_ZONE_MAPPINGS
[idx
].stdID
, stdID
) == 0
750 && strcmp(OFFSET_ZONE_MAPPINGS
[idx
].dstID
, dstID
) == 0)
752 return OFFSET_ZONE_MAPPINGS
[idx
].olsonID
;
759 U_CAPI
const char* U_EXPORT2
762 const char *tzid
= NULL
;
764 tzid
= uprv_detectWindowsTimeZone();
771 /*#if defined(U_DARWIN)
774 tzid = getenv("TZFILE");
780 /* This code can be temporarily disabled to test tzname resolution later on. */
783 if (tzid
!= NULL
&& isValidOlsonID(tzid
))
785 /* This might be a good Olson ID. */
786 if (uprv_strncmp(tzid
, "posix/", 6) == 0
787 || uprv_strncmp(tzid
, "right/", 6) == 0)
789 /* Remove the posix/ or right/ prefix. */
794 /* else U_TZNAME will give a better result. */
797 #if defined(CHECK_LOCALTIME_LINK)
798 /* Caller must handle threading issues */
799 if (gTimeZoneBufferPtr
== NULL
) {
801 This is a trick to look at the name of the link to get the Olson ID
802 because the tzfile contents is underspecified.
803 This isn't guaranteed to work because it may not be a symlink.
805 int32_t ret
= (int32_t)readlink(TZDEFAULT
, gTimeZoneBuffer
, sizeof(gTimeZoneBuffer
));
807 int32_t tzZoneInfoLen
= uprv_strlen(TZZONEINFO
);
808 gTimeZoneBuffer
[ret
] = 0;
809 if (uprv_strncmp(gTimeZoneBuffer
, TZZONEINFO
, tzZoneInfoLen
) == 0
810 && isValidOlsonID(gTimeZoneBuffer
+ tzZoneInfoLen
))
812 return (gTimeZoneBufferPtr
= gTimeZoneBuffer
+ tzZoneInfoLen
);
817 return gTimeZoneBufferPtr
;
823 #if !defined(U_WINDOWS)
825 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
826 So we remap the abbreviation to an olson ID.
828 Since Windows exposes a little more timezone information,
829 we normally don't use this code on Windows because
830 uprv_detectWindowsTimeZone should have already given the correct answer.
833 struct tm juneSol
, decemberSol
;
835 static const time_t juneSolstice
=1182478260; /*2007-06-21 18:11 UT*/
836 static const time_t decemberSolstice
=1198332540; /*2007-12-22 06:09 UT*/
838 /* This probing will tell us when daylight savings occurs. */
839 localtime_r(&juneSolstice
, &juneSol
);
840 localtime_r(&decemberSolstice
, &decemberSol
);
841 daylightType
= ((decemberSol
.tm_isdst
> 0) << 1) | (juneSol
.tm_isdst
> 0);
842 tzid
= remapShortTimeZone(U_TZNAME
[0], U_TZNAME
[1], daylightType
, uprv_timezone());
854 /* Get and set the ICU data directory --------------------------------------- */
856 static char *gDataDirectory
= NULL
;
858 static char *gCorrectedPOSIXLocale
= NULL
; /* Heap allocated */
861 static UBool U_CALLCONV
putil_cleanup(void)
863 if (gDataDirectory
&& *gDataDirectory
) {
864 uprv_free(gDataDirectory
);
866 gDataDirectory
= NULL
;
868 if (gCorrectedPOSIXLocale
) {
869 uprv_free(gCorrectedPOSIXLocale
);
870 gCorrectedPOSIXLocale
= NULL
;
877 * Set the data directory.
878 * Make a copy of the passed string, and set the global data dir to point to it.
879 * TODO: see bug #2849, regarding thread safety.
881 U_CAPI
void U_EXPORT2
882 u_setDataDirectory(const char *directory
) {
886 if(directory
==NULL
|| *directory
==0) {
887 /* A small optimization to prevent the malloc and copy when the
888 shared library is used, and this is a way to make sure that NULL
891 newDataDir
= (char *)"";
894 length
=(int32_t)uprv_strlen(directory
);
895 newDataDir
= (char *)uprv_malloc(length
+ 2);
896 /* Exit out if newDataDir could not be created. */
897 if (newDataDir
== NULL
) {
900 uprv_strcpy(newDataDir
, directory
);
902 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
905 while(p
= uprv_strchr(newDataDir
, U_FILE_ALT_SEP_CHAR
)) {
906 *p
= U_FILE_SEP_CHAR
;
913 if (gDataDirectory
&& *gDataDirectory
) {
914 uprv_free(gDataDirectory
);
916 gDataDirectory
= newDataDir
;
917 ucln_common_registerCleanup(UCLN_COMMON_PUTIL
, putil_cleanup
);
921 U_CAPI UBool U_EXPORT2
922 uprv_pathIsAbsolute(const char *path
)
924 if(!path
|| !*path
) {
928 if(*path
== U_FILE_SEP_CHAR
) {
932 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
933 if(*path
== U_FILE_ALT_SEP_CHAR
) {
938 #if defined(U_WINDOWS)
939 if( (((path
[0] >= 'A') && (path
[0] <= 'Z')) ||
940 ((path
[0] >= 'a') && (path
[0] <= 'z'))) &&
949 U_CAPI
const char * U_EXPORT2
950 u_getDataDirectory(void) {
951 const char *path
= NULL
;
952 #if defined(U_DARWIN) && defined(TARGET_IPHONE_SIMULATOR) && TARGET_IPHONE_SIMULATOR
953 const char *simulator_root
= NULL
;
954 char datadir_path_buffer
[PATH_MAX
];
957 /* if we have the directory, then return it immediately */
958 UMTX_CHECK(NULL
, gDataDirectory
, path
);
965 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
966 override ICU's data with the ICU_DATA environment variable. This prevents
967 problems where multiple custom copies of ICU's specific version of data
968 are installed on a system. Either the application must define the data
969 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
970 ICU, set the data with udata_setCommonData or trust that all of the
971 required data is contained in ICU's data library that contains
972 the entry point defined by U_ICUDATA_ENTRY_POINT.
974 There may also be some platforms where environment variables
977 # if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
978 /* First try to get the environment variable */
979 path
=getenv("ICU_DATA");
982 /* ICU_DATA_DIR may be set as a compile option */
984 if(path
==NULL
|| *path
==0) {
986 #if defined(U_DARWIN) && defined(TARGET_IPHONE_SIMULATOR) && TARGET_IPHONE_SIMULATOR
987 simulator_root
=getenv("IPHONE_SIMULATOR_ROOT");
988 if (simulator_root
!= NULL
) {
989 (void) strlcpy(datadir_path_buffer
, simulator_root
, PATH_MAX
);
990 (void) strlcat(datadir_path_buffer
, path
, PATH_MAX
);
991 path
=datadir_path_buffer
;
998 /* It looks really bad, set it to something. */
1002 u_setDataDirectory(path
);
1003 return gDataDirectory
;
1010 /* Macintosh-specific locale information ------------------------------------ */
1017 int32_t date_region
;
1018 const char* posixID
;
1021 /* Todo: This will be updated with a newer version from www.unicode.org web
1022 page when it's available.*/
1023 #define MAC_LC_MAGIC_NUMBER -5
1024 #define MAC_LC_INIT_NUMBER -9
1026 static const mac_lc_rec mac_lc_recs
[] = {
1027 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 0, "en_US",
1029 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 1, "fr_FR",
1031 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 2, "en_GB",
1033 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 3, "de_DE",
1035 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 4, "it_IT",
1037 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 5, "nl_NL",
1039 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 6, "fr_BE",
1040 /* French for Belgium or Lxembourg*/
1041 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 7, "sv_SE",
1043 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 9, "da_DK",
1045 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 10, "pt_PT",
1047 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 11, "fr_CA",
1049 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 13, "is_IS",
1051 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 14, "ja_JP",
1053 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 15, "en_AU",
1055 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 16, "ar_AE",
1056 /* the Arabic world (?)*/
1057 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 17, "fi_FI",
1059 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 18, "fr_CH",
1060 /* French for Switzerland*/
1061 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 19, "de_CH",
1062 /* German for Switzerland*/
1063 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 20, "el_GR",
1065 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 21, "is_IS",
1067 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
1069 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
1071 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 24, "tr_TR",
1073 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 25, "sh_YU",
1074 /* Croatian system for Yugoslavia*/
1075 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
1076 /* Hindi system for India*/
1077 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
1079 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 41, "lt_LT",
1081 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 42, "pl_PL",
1083 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 43, "hu_HU",
1085 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 44, "et_EE",
1087 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 45, "lv_LV",
1089 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
1090 /* Lapland [Ask Rich for the data. HS]*/
1091 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
1093 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 48, "fa_IR",
1095 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 49, "ru_RU",
1097 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 50, "en_IE",
1099 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 51, "ko_KR",
1101 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 52, "zh_CN",
1102 /* People's Republic of China*/
1103 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 53, "zh_TW",
1105 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 54, "th_TH",
1108 /* fallback is en_US*/
1109 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
,
1110 MAC_LC_MAGIC_NUMBER
, "en_US"
1116 /* Return just the POSIX id, whatever happens to be in it */
1117 static const char *uprv_getPOSIXID(void)
1119 static const char* posixID
= NULL
;
1122 * On Solaris two different calls to setlocale can result in
1123 * different values. Only get this value once.
1125 * We must check this first because an application can set this.
1127 * LC_ALL can't be used because it's platform dependent. The LANG
1128 * environment variable seems to affect LC_CTYPE variable by default.
1129 * Here is what setlocale(LC_ALL, NULL) can return.
1130 * HPUX can return 'C C C C C C C'
1131 * Solaris can return /en_US/C/C/C/C/C on the second try.
1132 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1134 * The default codepage detection also needs to use LC_CTYPE.
1136 * Do not call setlocale(LC_*, "")! Using an empty string instead
1137 * of NULL, will modify the libc behavior.
1139 posixID
= setlocale(LC_CTYPE
, NULL
);
1141 || (uprv_strcmp("C", posixID
) == 0)
1142 || (uprv_strcmp("POSIX", posixID
) == 0))
1144 /* Maybe we got some garbage. Try something more reasonable */
1145 posixID
= getenv("LC_ALL");
1147 posixID
= getenv("LC_CTYPE");
1149 posixID
= getenv("LANG");
1155 || (uprv_strcmp("C", posixID
) == 0)
1156 || (uprv_strcmp("POSIX", posixID
) == 0))
1158 /* Nothing worked. Give it a nice POSIX default value. */
1159 posixID
= "en_US_POSIX";
1167 /* NOTE: The caller should handle thread safety */
1168 U_CAPI
const char* U_EXPORT2
1169 uprv_getDefaultLocaleID()
1173 Note that: (a '!' means the ID is improper somehow)
1174 LC_ALL ----> default_loc codepage
1175 --------------------------------------------------------
1180 ab_CD.EF@GH ab_CD_GH EF
1182 Some 'improper' ways to do the same as above:
1183 ! ab_CD@GH.EF ab_CD_GH EF
1184 ! ab_CD.EF@GH.IJ ab_CD_GH EF
1185 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
1190 The variant cannot have dots in it.
1191 The 'rightmost' variant (@xxx) wins.
1192 The leftmost codepage (.xxx) wins.
1194 char *correctedPOSIXLocale
= 0;
1195 const char* posixID
= uprv_getPOSIXID();
1200 /* Format: (no spaces)
1201 ll [ _CC ] [ . MM ] [ @ VV]
1203 l = lang, C = ctry, M = charmap, V = variant
1206 if (gCorrectedPOSIXLocale
!= NULL
) {
1207 return gCorrectedPOSIXLocale
;
1210 if ((p
= uprv_strchr(posixID
, '.')) != NULL
) {
1211 /* assume new locale can't be larger than old one? */
1212 correctedPOSIXLocale
= uprv_malloc(uprv_strlen(posixID
)+1);
1213 /* Exit on memory allocation error. */
1214 if (correctedPOSIXLocale
== NULL
) {
1217 uprv_strncpy(correctedPOSIXLocale
, posixID
, p
-posixID
);
1218 correctedPOSIXLocale
[p
-posixID
] = 0;
1220 /* do not copy after the @ */
1221 if ((p
= uprv_strchr(correctedPOSIXLocale
, '@')) != NULL
) {
1222 correctedPOSIXLocale
[p
-correctedPOSIXLocale
] = 0;
1226 /* Note that we scan the *uncorrected* ID. */
1227 if ((p
= uprv_strrchr(posixID
, '@')) != NULL
) {
1228 if (correctedPOSIXLocale
== NULL
) {
1229 correctedPOSIXLocale
= uprv_malloc(uprv_strlen(posixID
)+1);
1230 /* Exit on memory allocation error. */
1231 if (correctedPOSIXLocale
== NULL
) {
1234 uprv_strncpy(correctedPOSIXLocale
, posixID
, p
-posixID
);
1235 correctedPOSIXLocale
[p
-posixID
] = 0;
1239 /* Take care of any special cases here.. */
1240 if (!uprv_strcmp(p
, "nynorsk")) {
1242 /* Don't worry about no__NY. In practice, it won't appear. */
1245 if (uprv_strchr(correctedPOSIXLocale
,'_') == NULL
) {
1246 uprv_strcat(correctedPOSIXLocale
, "__"); /* aa@b -> aa__b */
1249 uprv_strcat(correctedPOSIXLocale
, "_"); /* aa_CC@b -> aa_CC_b */
1252 if ((q
= uprv_strchr(p
, '.')) != NULL
) {
1253 /* How big will the resulting string be? */
1254 len
= (int32_t)(uprv_strlen(correctedPOSIXLocale
) + (q
-p
));
1255 uprv_strncat(correctedPOSIXLocale
, p
, q
-p
);
1256 correctedPOSIXLocale
[len
] = 0;
1259 /* Anything following the @ sign */
1260 uprv_strcat(correctedPOSIXLocale
, p
);
1263 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1264 * How about 'russian' -> 'ru'?
1265 * Many of the other locales using ISO codes will be handled by the
1266 * canonicalization functions in uloc_getDefault.
1270 /* Was a correction made? */
1271 if (correctedPOSIXLocale
!= NULL
) {
1272 posixID
= correctedPOSIXLocale
;
1275 /* copy it, just in case the original pointer goes away. See j2395 */
1276 correctedPOSIXLocale
= (char *)uprv_malloc(uprv_strlen(posixID
) + 1);
1277 /* Exit on memory allocation error. */
1278 if (correctedPOSIXLocale
== NULL
) {
1281 posixID
= uprv_strcpy(correctedPOSIXLocale
, posixID
);
1284 if (gCorrectedPOSIXLocale
== NULL
) {
1285 gCorrectedPOSIXLocale
= correctedPOSIXLocale
;
1286 ucln_common_registerCleanup(UCLN_COMMON_PUTIL
, putil_cleanup
);
1287 correctedPOSIXLocale
= NULL
;
1290 if (correctedPOSIXLocale
!= NULL
) { /* Was already set - clean up. */
1291 uprv_free(correctedPOSIXLocale
);
1296 #elif defined(U_WINDOWS)
1297 UErrorCode status
= U_ZERO_ERROR
;
1298 LCID id
= GetThreadLocale();
1299 const char* locID
= uprv_convertToPosix(id
, &status
);
1301 if (U_FAILURE(status
)) {
1306 #elif defined(XP_MAC)
1307 int32_t script
= MAC_LC_INIT_NUMBER
;
1308 /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
1309 int32_t region
= MAC_LC_INIT_NUMBER
;
1310 /* = GetScriptManagerVariable(smRegionCode);*/
1311 int32_t lang
= MAC_LC_INIT_NUMBER
;
1312 /* = GetScriptManagerVariable(smScriptLang);*/
1313 int32_t date_region
= MAC_LC_INIT_NUMBER
;
1314 const char* posixID
= 0;
1315 int32_t count
= sizeof(mac_lc_recs
) / sizeof(mac_lc_rec
);
1319 ih
= (Intl1Hndl
) GetIntlResource(1);
1321 date_region
= ((uint16_t)(*ih
)->intl1Vers
) >> 8;
1323 for (i
= 0; i
< count
; i
++) {
1324 if ( ((mac_lc_recs
[i
].script
== MAC_LC_MAGIC_NUMBER
)
1325 || (mac_lc_recs
[i
].script
== script
))
1326 && ((mac_lc_recs
[i
].region
== MAC_LC_MAGIC_NUMBER
)
1327 || (mac_lc_recs
[i
].region
== region
))
1328 && ((mac_lc_recs
[i
].lang
== MAC_LC_MAGIC_NUMBER
)
1329 || (mac_lc_recs
[i
].lang
== lang
))
1330 && ((mac_lc_recs
[i
].date_region
== MAC_LC_MAGIC_NUMBER
)
1331 || (mac_lc_recs
[i
].date_region
== date_region
))
1334 posixID
= mac_lc_recs
[i
].posixID
;
1341 #elif defined(OS400)
1342 /* locales are process scoped and are by definition thread safe */
1343 static char correctedLocale
[64];
1344 const char *localeID
= getenv("LC_ALL");
1347 if (localeID
== NULL
)
1348 localeID
= getenv("LANG");
1349 if (localeID
== NULL
)
1350 localeID
= setlocale(LC_ALL
, NULL
);
1351 /* Make sure we have something... */
1352 if (localeID
== NULL
)
1353 return "en_US_POSIX";
1355 /* Extract the locale name from the path. */
1356 if((p
= uprv_strrchr(localeID
, '/')) != NULL
)
1358 /* Increment p to start of locale name. */
1363 /* Copy to work location. */
1364 uprv_strcpy(correctedLocale
, localeID
);
1366 /* Strip off the '.locale' extension. */
1367 if((p
= uprv_strchr(correctedLocale
, '.')) != NULL
) {
1371 /* Upper case the locale name. */
1372 T_CString_toUpperCase(correctedLocale
);
1374 /* See if we are using the POSIX locale. Any of the
1375 * following are equivalent and use the same QLGPGCMA
1377 * QLGPGCMA2 means UCS2
1378 * QLGPGCMA_4 means UTF-32
1379 * QLGPGCMA_8 means UTF-8
1381 if ((uprv_strcmp("C", correctedLocale
) == 0) ||
1382 (uprv_strcmp("POSIX", correctedLocale
) == 0) ||
1383 (uprv_strncmp("QLGPGCMA", correctedLocale
, 8) == 0))
1385 uprv_strcpy(correctedLocale
, "en_US_POSIX");
1391 /* Lower case the lang portion. */
1392 for(p
= correctedLocale
; *p
!= 0 && *p
!= '_'; p
++)
1394 *p
= uprv_tolower(*p
);
1397 /* Adjust for Euro. After '_E' add 'URO'. */
1398 LocaleLen
= uprv_strlen(correctedLocale
);
1399 if (correctedLocale
[LocaleLen
- 2] == '_' &&
1400 correctedLocale
[LocaleLen
- 1] == 'E')
1402 uprv_strcat(correctedLocale
, "URO");
1405 /* If using Lotus-based locale then convert to
1406 * equivalent non Lotus.
1408 else if (correctedLocale
[LocaleLen
- 2] == '_' &&
1409 correctedLocale
[LocaleLen
- 1] == 'L')
1411 correctedLocale
[LocaleLen
- 2] = 0;
1414 /* There are separate simplified and traditional
1415 * locales called zh_HK_S and zh_HK_T.
1417 else if (uprv_strncmp(correctedLocale
, "zh_HK", 5) == 0)
1419 uprv_strcpy(correctedLocale
, "zh_HK");
1422 /* A special zh_CN_GBK locale...
1424 else if (uprv_strcmp(correctedLocale
, "zh_CN_GBK") == 0)
1426 uprv_strcpy(correctedLocale
, "zh_CN");
1431 return correctedLocale
;
1438 Due to various platform differences, one platform may specify a charset,
1439 when they really mean a different charset. Remap the names so that they are
1440 compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1441 here. Before adding anything to this function, please consider adding unique
1442 names to the ICU alias table in the data directory.
1445 remapPlatformDependentCodepage(const char *locale
, const char *name
) {
1446 if (locale
!= NULL
&& *locale
== 0) {
1447 /* Make sure that an empty locale is handled the same way. */
1454 if (uprv_strcmp(name
, "IBM-943") == 0) {
1455 /* Use the ASCII compatible ibm-943 */
1458 else if (uprv_strcmp(name
, "IBM-1252") == 0) {
1459 /* Use the windows-1252 that contains the Euro */
1462 #elif defined(U_SOLARIS)
1463 if (locale
!= NULL
&& uprv_strcmp(name
, "EUC") == 0) {
1464 /* Solaris underspecifies the "EUC" name. */
1465 if (uprv_strcmp(locale
, "zh_CN") == 0) {
1468 else if (uprv_strcmp(locale
, "zh_TW") == 0) {
1471 else if (uprv_strcmp(locale
, "ko_KR") == 0) {
1475 else if (uprv_strcmp(name
, "eucJP") == 0) {
1477 ibm-954 is the best match.
1478 ibm-33722 is the default for eucJP (similar to Windows).
1482 else if (uprv_strcmp(name
, "646") == 0) {
1484 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
1485 * ISO-8859-1 instead of US-ASCII(646).
1487 name
= "ISO-8859-1";
1489 #elif defined(U_DARWIN)
1490 if (locale
== NULL
&& *name
== 0) {
1492 No locale was specified, and an empty name was passed in.
1493 This usually indicates that nl_langinfo didn't return valid information.
1494 Mac OS X uses UTF-8 by default (especially the locale data and console).
1498 #elif defined(U_HPUX)
1499 if (locale
!= NULL
&& uprv_strcmp(locale
, "zh_HK") == 0 && uprv_strcmp(name
, "big5") == 0) {
1500 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
1501 /* zh_TW.big5 is not the same charset as zh_HK.big5! */
1504 else if (uprv_strcmp(name
, "eucJP") == 0) {
1506 ibm-1350 is the best match, but unavailable.
1507 ibm-954 is mostly a superset of ibm-1350.
1508 ibm-33722 is the default for eucJP (similar to Windows).
1512 #elif defined(U_LINUX)
1513 if (locale
!= NULL
&& uprv_strcmp(name
, "euc") == 0) {
1514 /* Linux underspecifies the "EUC" name. */
1515 if (uprv_strcmp(locale
, "korean") == 0) {
1518 else if (uprv_strcmp(locale
, "japanese") == 0) {
1519 /* See comment below about eucJP */
1523 else if (uprv_strcmp(name
, "eucjp") == 0) {
1525 ibm-1350 is the best match, but unavailable.
1526 ibm-954 is mostly a superset of ibm-1350.
1527 ibm-33722 is the default for eucJP (similar to Windows).
1532 /* return NULL when "" is passed in */
1540 getCodepageFromPOSIXID(const char *localeName
, char * buffer
, int32_t buffCapacity
)
1542 char localeBuf
[100];
1543 const char *name
= NULL
;
1544 char *variant
= NULL
;
1546 if (localeName
!= NULL
&& (name
= (uprv_strchr(localeName
, '.'))) != NULL
) {
1547 size_t localeCapacity
= uprv_min(sizeof(localeBuf
), (name
-localeName
)+1);
1548 uprv_strncpy(localeBuf
, localeName
, localeCapacity
);
1549 localeBuf
[localeCapacity
-1] = 0; /* ensure NULL termination */
1550 name
= uprv_strncpy(buffer
, name
+1, buffCapacity
);
1551 buffer
[buffCapacity
-1] = 0; /* ensure NULL termination */
1552 if ((variant
= (uprv_strchr(name
, '@'))) != NULL
) {
1555 name
= remapPlatformDependentCodepage(localeBuf
, name
);
1562 int_getDefaultCodepage()
1565 uint32_t ccsid
= 37; /* Default to ibm-37 */
1566 static char codepage
[64];
1567 Qwc_JOBI0400_t jobinfo
;
1568 Qus_EC_t error
= { sizeof(Qus_EC_t
) }; /* SPI error code */
1570 EPT_CALL(QUSRJOBI
)(&jobinfo
, sizeof(jobinfo
), "JOBI0400",
1573 if (error
.Bytes_Available
== 0) {
1574 if (jobinfo
.Coded_Char_Set_ID
!= 0xFFFF) {
1575 ccsid
= (uint32_t)jobinfo
.Coded_Char_Set_ID
;
1577 else if (jobinfo
.Default_Coded_Char_Set_Id
!= 0xFFFF) {
1578 ccsid
= (uint32_t)jobinfo
.Default_Coded_Char_Set_Id
;
1580 /* else use the default */
1582 sprintf(codepage
,"ibm-%d", ccsid
);
1585 #elif defined(OS390)
1586 static char codepage
[64];
1587 sprintf(codepage
,"%63s" UCNV_SWAP_LFNL_OPTION_STRING
, nl_langinfo(CODESET
));
1588 codepage
[63] = 0; /* NULL terminate */
1591 #elif defined(XP_MAC)
1592 return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
1594 #elif defined(U_WINDOWS)
1595 static char codepage
[64];
1596 sprintf(codepage
, "windows-%d", GetACP());
1599 #elif U_POSIX_LOCALE
1600 static char codesetName
[100];
1601 const char *localeName
= NULL
;
1602 const char *name
= NULL
;
1604 uprv_memset(codesetName
, 0, sizeof(codesetName
));
1606 /* Use setlocale in a nice way, and then check some environment variables.
1607 Maybe the application used setlocale already.
1609 localeName
= uprv_getPOSIXID();
1610 name
= getCodepageFromPOSIXID(localeName
, codesetName
, sizeof(codesetName
));
1612 /* if we can find the codeset name from setlocale, return that. */
1615 /* else "C" was probably returned. That's underspecified. */
1617 #if U_HAVE_NL_LANGINFO_CODESET
1619 uprv_memset(codesetName
, 0, sizeof(codesetName
));
1621 /* When available, check nl_langinfo because it usually gives more
1622 useful names. It depends on LC_CTYPE and not LANG or LC_ALL.
1623 nl_langinfo may use the same buffer as setlocale. */
1625 const char *codeset
= nl_langinfo(U_NL_LANGINFO_CODESET
);
1626 codeset
= remapPlatformDependentCodepage(NULL
, codeset
);
1627 if (codeset
!= NULL
) {
1628 uprv_strncpy(codesetName
, codeset
, sizeof(codesetName
));
1629 codesetName
[sizeof(codesetName
)-1] = 0;
1635 if (*codesetName
== 0)
1637 /* Everything failed. Return US ASCII (ISO 646). */
1638 (void)uprv_strcpy(codesetName
, "US-ASCII");
1647 U_CAPI
const char* U_EXPORT2
1648 uprv_getDefaultCodepage()
1650 static char const *name
= NULL
;
1653 name
= int_getDefaultCodepage();
1660 /* end of platform-specific implementation -------------- */
1662 /* version handling --------------------------------------------------------- */
1664 U_CAPI
void U_EXPORT2
1665 u_versionFromString(UVersionInfo versionArray
, const char *versionString
) {
1669 if(versionArray
==NULL
) {
1673 if(versionString
!=NULL
) {
1675 versionArray
[part
]=(uint8_t)uprv_strtoul(versionString
, &end
, 10);
1676 if(end
==versionString
|| ++part
==U_MAX_VERSION_LENGTH
|| *end
!=U_VERSION_DELIMITER
) {
1679 versionString
=end
+1;
1683 while(part
<U_MAX_VERSION_LENGTH
) {
1684 versionArray
[part
++]=0;
1688 U_CAPI
void U_EXPORT2
1689 u_versionToString(UVersionInfo versionArray
, char *versionString
) {
1690 uint16_t count
, part
;
1693 if(versionString
==NULL
) {
1697 if(versionArray
==NULL
) {
1702 /* count how many fields need to be written */
1703 for(count
=4; count
>0 && versionArray
[count
-1]==0; --count
) {
1710 /* write the first part */
1711 /* write the decimal field value */
1712 field
=versionArray
[0];
1714 *versionString
++=(char)('0'+field
/100);
1718 *versionString
++=(char)('0'+field
/10);
1721 *versionString
++=(char)('0'+field
);
1723 /* write the following parts */
1724 for(part
=1; part
<count
; ++part
) {
1725 /* write a dot first */
1726 *versionString
++=U_VERSION_DELIMITER
;
1728 /* write the decimal field value */
1729 field
=versionArray
[part
];
1731 *versionString
++=(char)('0'+field
/100);
1735 *versionString
++=(char)('0'+field
/10);
1738 *versionString
++=(char)('0'+field
);
1745 U_CAPI
void U_EXPORT2
1746 u_getVersion(UVersionInfo versionArray
) {
1747 u_versionFromString(versionArray
, U_ICU_VERSION
);
1751 * Hey, Emacs, please set the following:
1754 * indent-tabs-mode: nil