2 ******************************************************************************
4 * Copyright (C) 1997-2007, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
9 * FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
11 * Date Name Description
12 * 04/14/97 aliu Creation.
13 * 04/24/97 aliu Added getDefaultDataDirectory() and
14 * getDefaultLocaleID().
15 * 04/28/97 aliu Rewritten to assume Unix and apply general methods
16 * for assumed case. Non-UNIX platforms must be
17 * special-cased. Rewrote numeric methods dealing
18 * with NaN and Infinity to be platform independent
19 * over all IEEE 754 platforms.
20 * 05/13/97 aliu Restored sign of timezone
21 * (semantics are hours West of GMT)
22 * 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
24 * 07/22/98 stephen Added remainder, max, min, trunc
25 * 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
26 * 08/24/98 stephen Added longBitsFromDouble
27 * 09/08/98 stephen Minor changes for Mac Port
28 * 03/02/99 stephen Removed openFile(). Added AS400 support.
30 * 04/15/99 stephen Converted to C.
31 * 06/28/99 stephen Removed mutex locking in u_isBigEndian().
32 * 08/04/99 jeffrey R. Added OS/2 changes
33 * 11/15/99 helena Integrated S/390 IEEE support.
34 * 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
35 * 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
36 ******************************************************************************
39 /* Define _XOPEN_SOURCE for Solaris and friends. */
40 /* NetBSD needs it to be >= 4 */
42 #if __STDC_VERSION__ >= 199901L
43 /* It is invalid to compile an XPG3, XPG4, XPG4v2 or XPG5 application using c99 */
44 #define _XOPEN_SOURCE 600
46 #define _XOPEN_SOURCE 4
50 /* Make sure things like readlink and such functions work. */
51 #ifndef _XOPEN_SOURCE_EXTENDED
52 #define _XOPEN_SOURCE_EXTENDED 1
55 /* include ICU headers */
56 #include "unicode/utypes.h"
57 #include "unicode/putil.h"
58 #include "unicode/ustring.h"
67 /* Include standard headers. */
76 /* include system headers */
78 # define WIN32_LEAN_AND_MEAN
86 #elif defined(U_CYGWIN) && defined(__STRICT_ANSI__)
87 /* tzset isn't defined in strict ANSI on Cygwin. */
88 # undef __STRICT_ANSI__
91 # include <qusec.h> /* error code structure */
92 # include <qusrjobi.h>
93 # include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
96 # include <IntlResources.h>
99 # include <MacTypes.h>
100 # include <TextUtils.h>
101 # define ICU_NO_USER_DATA_OVERRIDE 1
103 #include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
104 #elif defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD)
108 #include <sys/neutrino.h>
112 #include <sys/time.h>
116 * Only include langinfo.h if we have a way to get the codeset. If we later
117 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
121 #if U_HAVE_NL_LANGINFO_CODESET
122 #include <langinfo.h>
125 /* Define the extension for data files, again... */
126 #define DATA_TYPE "dat"
128 /* Leave this copyright notice here! */
129 static const char copyright
[] = U_COPYRIGHT_STRING
;
131 /* floating point implementations ------------------------------------------- */
133 /* We return QNAN rather than SNAN*/
134 #define SIGN 0x80000000U
136 /* Make it easy to define certain types of constants */
138 int64_t i64
; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
140 } BitPatternConversion
;
141 static const BitPatternConversion gNan
= { (int64_t) INT64_C(0x7FF8000000000000) };
142 static const BitPatternConversion gInf
= { (int64_t) INT64_C(0x7FF0000000000000) };
144 /*---------------------------------------------------------------------------
146 Our general strategy is to assume we're on a POSIX platform. Platforms which
147 are non-POSIX must declare themselves so. The default POSIX implementation
148 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
150 ---------------------------------------------------------------------------*/
152 #if defined(U_WINDOWS) || defined(XP_MAC) || defined(OS400)
153 # undef U_POSIX_LOCALE
155 # define U_POSIX_LOCALE 1
159 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
160 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
164 u_topNBytesOfDouble(double* d
, int n
)
169 return (char*)(d
+ 1) - n
;
175 u_bottomNBytesOfDouble(double* d
, int n
)
178 return (char*)(d
+ 1) - n
;
184 #if defined(U_WINDOWS)
188 } FileTimeConversion
; /* This is like a ULARGE_INTEGER */
190 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
191 #define EPOCH_BIAS INT64_C(116444736000000000)
192 #define HECTONANOSECOND_PER_MILLISECOND 10000
196 /*---------------------------------------------------------------------------
197 Universal Implementations
198 These are designed to work on all platforms. Try these, and if they
199 don't work on your platform, then special case your platform with new
201 ---------------------------------------------------------------------------*/
203 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
204 U_CAPI UDate U_EXPORT2
211 uprv_memset( &tmrec
, 0, sizeof(tmrec
) );
215 t1
= mktime(&tmrec
); /* seconds of 1/1/1970*/
218 uprv_memcpy( &tmrec
, gmtime(&t
), sizeof(tmrec
) );
219 t2
= mktime(&tmrec
); /* seconds of current GMT*/
220 return (UDate
)(t2
- t1
) * U_MILLIS_PER_SECOND
; /* GMT (or UTC) in seconds since 1970*/
221 #elif defined(U_WINDOWS)
223 FileTimeConversion winTime
;
224 GetSystemTimeAsFileTime(&winTime
.fileTime
);
225 return (UDate
)((winTime
.int64
- EPOCH_BIAS
) / HECTONANOSECOND_PER_MILLISECOND
);
228 struct timeval posixTime;
229 gettimeofday(&posixTime, NULL);
230 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
234 return (UDate
)epochtime
* U_MILLIS_PER_SECOND
;
238 /*-----------------------------------------------------------------------------
240 These methods detect and return NaN and infinity values for doubles
241 conforming to IEEE 754. Platforms which support this standard include X86,
242 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
243 If this doesn't work on your platform, you have non-IEEE floating-point, and
244 will need to code your own versions. A naive implementation is to return 0.0
245 for getNaN and getInfinity, and false for isNaN and isInfinite.
246 ---------------------------------------------------------------------------*/
248 U_CAPI UBool U_EXPORT2
249 uprv_isNaN(double number
)
252 BitPatternConversion convertedNumber
;
253 convertedNumber
.d64
= number
;
254 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
255 return (UBool
)((convertedNumber
.i64
& U_INT64_MAX
) > gInf
.i64
);
258 uint32_t highBits
= *(uint32_t*)u_topNBytesOfDouble(&number
,
260 uint32_t lowBits
= *(uint32_t*)u_bottomNBytesOfDouble(&number
,
263 return ((highBits
& 0x7F080000L
) == 0x7F080000L
) &&
264 (lowBits
== 0x00000000L
);
267 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
268 /* you'll need to replace this default implementation with what's correct*/
269 /* for your platform.*/
270 return number
!= number
;
274 U_CAPI UBool U_EXPORT2
275 uprv_isInfinite(double number
)
278 BitPatternConversion convertedNumber
;
279 convertedNumber
.d64
= number
;
280 /* Infinity is exactly 0x7FF0000000000000U. */
281 return (UBool
)((convertedNumber
.i64
& U_INT64_MAX
) == gInf
.i64
);
283 uint32_t highBits
= *(uint32_t*)u_topNBytesOfDouble(&number
,
285 uint32_t lowBits
= *(uint32_t*)u_bottomNBytesOfDouble(&number
,
288 return ((highBits
& ~SIGN
) == 0x70FF0000L
) && (lowBits
== 0x00000000L
);
291 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
292 /* value, you'll need to replace this default implementation with what's*/
293 /* correct for your platform.*/
294 return number
== (2.0 * number
);
298 U_CAPI UBool U_EXPORT2
299 uprv_isPositiveInfinity(double number
)
301 #if IEEE_754 || defined(OS390)
302 return (UBool
)(number
> 0 && uprv_isInfinite(number
));
304 return uprv_isInfinite(number
);
308 U_CAPI UBool U_EXPORT2
309 uprv_isNegativeInfinity(double number
)
311 #if IEEE_754 || defined(OS390)
312 return (UBool
)(number
< 0 && uprv_isInfinite(number
));
315 uint32_t highBits
= *(uint32_t*)u_topNBytesOfDouble(&number
,
317 return((highBits
& SIGN
) && uprv_isInfinite(number
));
322 U_CAPI
double U_EXPORT2
325 #if IEEE_754 || defined(OS390)
328 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
329 /* you'll need to replace this default implementation with what's correct*/
330 /* for your platform.*/
335 U_CAPI
double U_EXPORT2
338 #if IEEE_754 || defined(OS390)
341 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
342 /* value, you'll need to replace this default implementation with what's*/
343 /* correct for your platform.*/
348 U_CAPI
double U_EXPORT2
354 U_CAPI
double U_EXPORT2
360 U_CAPI
double U_EXPORT2
363 return uprv_floor(x
+ 0.5);
366 U_CAPI
double U_EXPORT2
372 U_CAPI
double U_EXPORT2
373 uprv_modf(double x
, double* y
)
378 U_CAPI
double U_EXPORT2
379 uprv_fmod(double x
, double y
)
384 U_CAPI
double U_EXPORT2
385 uprv_pow(double x
, double y
)
387 /* This is declared as "double pow(double x, double y)" */
391 U_CAPI
double U_EXPORT2
392 uprv_pow10(int32_t x
)
394 return pow(10.0, (double)x
);
397 U_CAPI
double U_EXPORT2
398 uprv_fmax(double x
, double y
)
403 /* first handle NaN*/
404 if(uprv_isNaN(x
) || uprv_isNaN(y
))
405 return uprv_getNaN();
407 /* check for -0 and 0*/
408 lowBits
= *(uint32_t*) u_bottomNBytesOfDouble(&x
, sizeof(uint32_t));
409 if(x
== 0.0 && y
== 0.0 && (lowBits
& SIGN
))
414 /* this should work for all flt point w/o NaN and Infpecial cases */
415 return (x
> y
? x
: y
);
418 U_CAPI
double U_EXPORT2
419 uprv_fmin(double x
, double y
)
424 /* first handle NaN*/
425 if(uprv_isNaN(x
) || uprv_isNaN(y
))
426 return uprv_getNaN();
428 /* check for -0 and 0*/
429 lowBits
= *(uint32_t*) u_bottomNBytesOfDouble(&y
, sizeof(uint32_t));
430 if(x
== 0.0 && y
== 0.0 && (lowBits
& SIGN
))
435 /* this should work for all flt point w/o NaN and Inf special cases */
436 return (x
> y
? y
: x
);
440 * Truncates the given double.
441 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
442 * This is different than calling floor() or ceil():
443 * floor(3.3) = 3, floor(-3.3) = -4
444 * ceil(3.3) = 4, ceil(-3.3) = -3
446 U_CAPI
double U_EXPORT2
452 /* handle error cases*/
454 return uprv_getNaN();
455 if(uprv_isInfinite(d
))
456 return uprv_getInfinity();
458 lowBits
= *(uint32_t*) u_bottomNBytesOfDouble(&d
, sizeof(uint32_t));
459 if( (d
== 0.0 && (lowBits
& SIGN
)) || d
< 0)
465 return d
>= 0 ? floor(d
) : ceil(d
);
471 * Return the largest positive number that can be represented by an integer
472 * type of arbitrary bit length.
474 U_CAPI
double U_EXPORT2
475 uprv_maxMantissa(void)
477 return pow(2.0, DBL_MANT_DIG
+ 1.0) - 1.0;
480 U_CAPI
double U_EXPORT2
487 /* This isn't used. If it's readded, readd putiltst.c tests */
488 U_CAPI
int32_t U_EXPORT2
489 uprv_digitsAfterDecimal(double x
)
492 int32_t numDigits
, bytesWritten
;
494 int32_t ptPos
, exponent
;
496 /* cheat and use the string-format routine to get a string representation*/
497 /* (it handles mathematical inaccuracy better than we can), then find out */
498 /* many characters are to the right of the decimal point */
499 bytesWritten
= sprintf(buffer
, "%+.9g", x
);
500 while (isdigit(*(++p
))) {
503 ptPos
= (int32_t)(p
- buffer
);
504 numDigits
= (int32_t)(bytesWritten
- ptPos
- 1);
506 /* if the number's string representation is in scientific notation, find */
507 /* the exponent and take it into account*/
509 p
= uprv_strchr(buffer
, 'e');
511 int16_t expPos
= (int16_t)(p
- buffer
);
512 numDigits
-= bytesWritten
- expPos
;
513 exponent
= (int32_t)(atol(p
+ 1));
516 /* the string representation may still have spurious decimal digits in it, */
517 /* so we cut off at the ninth digit to the right of the decimal, and have */
518 /* to search backward from there to the first non-zero digit*/
521 while (numDigits
> 0 && buffer
[ptPos
+ numDigits
] == '0')
524 numDigits
-= exponent
;
532 /*---------------------------------------------------------------------------
533 Platform-specific Implementations
534 Try these, and if they don't work on your platform, then special case your
535 platform with new implementations.
536 ---------------------------------------------------------------------------*/
538 /* Generic time zone layer -------------------------------------------------- */
540 /* Time zone utilities */
541 U_CAPI
void U_EXPORT2
547 /* no initialization*/
551 U_CAPI
int32_t U_EXPORT2
563 uprv_memcpy( &tmrec
, localtime(&t
), sizeof(tmrec
) );
564 dst_checked
= (tmrec
.tm_isdst
!= 0); /* daylight savings time is checked*/
565 t1
= mktime(&tmrec
); /* local time in seconds*/
566 uprv_memcpy( &tmrec
, gmtime(&t
), sizeof(tmrec
) );
567 t2
= mktime(&tmrec
); /* GMT (or UTC) in seconds*/
569 /* imitate NT behaviour, which returns same timezone offset to GMT for
577 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
578 some platforms need to have it declared here. */
580 #if defined(U_TZNAME) && (defined(U_IRIX) || defined(U_DARWIN) || defined(U_CYGWIN))
581 /* RS6000 and others reject char **tzname. */
582 extern U_IMPORT
char *U_TZNAME
[];
585 #if !UCONFIG_NO_FILE_IO && (defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD))
586 /* These platforms are likely to use Olson timezone IDs. */
587 #define CHECK_LOCALTIME_LINK 1
589 #define TZZONEINFO (TZDIR "/")
590 static char gTimeZoneBuffer
[PATH_MAX
];
591 static char *gTimeZoneBufferPtr
= NULL
;
595 #define isNonDigit(ch) (ch < '0' || '9' < ch)
596 static UBool
isValidOlsonID(const char *id
) {
599 /* Determine if this is something like Iceland (Olson ID)
600 or AST4ADT (non-Olson ID) */
601 while (id
[idx
] && isNonDigit(id
[idx
]) && id
[idx
] != ',') {
605 /* If we went through the whole string, then it might be okay.
606 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
607 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
608 The rest of the time it could be an Olson ID. George */
609 return (UBool
)(id
[idx
] == 0
610 || uprv_strcmp(id
, "PST8PDT") == 0
611 || uprv_strcmp(id
, "MST7MDT") == 0
612 || uprv_strcmp(id
, "CST6CDT") == 0
613 || uprv_strcmp(id
, "EST5EDT") == 0);
617 U_CAPI
const char* U_EXPORT2
621 const char *id
= uprv_detectWindowsTimeZone();
627 const char *tzenv
= NULL
;
629 /*#if defined(U_DARWIN)
632 tzenv = getenv("TZFILE");
638 tzenv
= getenv("TZ");
639 if (tzenv
!= NULL
&& isValidOlsonID(tzenv
))
641 /* This might be a good Olson ID. */
642 if (uprv_strncmp(tzenv
, "posix/", 6) == 0
643 || uprv_strncmp(tzenv
, "right/", 6) == 0)
645 /* Remove the posix/ or right/ prefix. */
650 /* else U_TZNAME will give a better result. */
652 #if defined(CHECK_LOCALTIME_LINK)
653 /* Caller must handle threading issues */
654 if (gTimeZoneBufferPtr
== NULL
) {
656 This is a trick to look at the name of the link to get the Olson ID
657 because the tzfile contents is underspecified.
658 This isn't guaranteed to work because it may not be a symlink.
660 int32_t ret
= (int32_t)readlink(TZDEFAULT
, gTimeZoneBuffer
, sizeof(gTimeZoneBuffer
));
662 int32_t tzZoneInfoLen
= uprv_strlen(TZZONEINFO
);
663 gTimeZoneBuffer
[ret
] = 0;
664 if (uprv_strncmp(gTimeZoneBuffer
, TZZONEINFO
, tzZoneInfoLen
) == 0
665 && isValidOlsonID(gTimeZoneBuffer
+ tzZoneInfoLen
))
667 return (gTimeZoneBufferPtr
= gTimeZoneBuffer
+ tzZoneInfoLen
);
672 return gTimeZoneBufferPtr
;
679 U_TZNAME is usually a non-unique abbreviation,
680 which isn't normally usable.
688 /* Get and set the ICU data directory --------------------------------------- */
690 static char *gDataDirectory
= NULL
;
692 static char *gCorrectedPOSIXLocale
= NULL
; /* Heap allocated */
695 static UBool U_CALLCONV
putil_cleanup(void)
697 if (gDataDirectory
&& *gDataDirectory
) {
698 uprv_free(gDataDirectory
);
700 gDataDirectory
= NULL
;
702 if (gCorrectedPOSIXLocale
) {
703 uprv_free(gCorrectedPOSIXLocale
);
704 gCorrectedPOSIXLocale
= NULL
;
711 * Set the data directory.
712 * Make a copy of the passed string, and set the global data dir to point to it.
713 * TODO: see bug #2849, regarding thread safety.
715 U_CAPI
void U_EXPORT2
716 u_setDataDirectory(const char *directory
) {
720 if(directory
==NULL
|| *directory
==0) {
721 /* A small optimization to prevent the malloc and copy when the
722 shared library is used, and this is a way to make sure that NULL
725 newDataDir
= (char *)"";
728 length
=(int32_t)uprv_strlen(directory
);
729 newDataDir
= (char *)uprv_malloc(length
+ 2);
730 uprv_strcpy(newDataDir
, directory
);
732 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
735 while(p
= uprv_strchr(newDataDir
, U_FILE_ALT_SEP_CHAR
)) {
736 *p
= U_FILE_SEP_CHAR
;
743 if (gDataDirectory
&& *gDataDirectory
) {
744 uprv_free(gDataDirectory
);
746 gDataDirectory
= newDataDir
;
747 ucln_common_registerCleanup(UCLN_COMMON_PUTIL
, putil_cleanup
);
751 U_CAPI UBool U_EXPORT2
752 uprv_pathIsAbsolute(const char *path
)
754 if(!path
|| !*path
) {
758 if(*path
== U_FILE_SEP_CHAR
) {
762 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
763 if(*path
== U_FILE_ALT_SEP_CHAR
) {
768 #if defined(U_WINDOWS)
769 if( (((path
[0] >= 'A') && (path
[0] <= 'Z')) ||
770 ((path
[0] >= 'a') && (path
[0] <= 'z'))) &&
779 U_CAPI
const char * U_EXPORT2
780 u_getDataDirectory(void) {
781 const char *path
= NULL
;
783 /* if we have the directory, then return it immediately */
785 path
= gDataDirectory
;
793 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
794 override ICU's data with the ICU_DATA environment variable. This prevents
795 problems where multiple custom copies of ICU's specific version of data
796 are installed on a system. Either the application must define the data
797 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
798 ICU, set the data with udata_setCommonData or trust that all of the
799 required data is contained in ICU's data library that contains
800 the entry point defined by U_ICUDATA_ENTRY_POINT.
802 There may also be some platforms where environment variables
805 # if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
806 /* First try to get the environment variable */
807 path
=getenv("ICU_DATA");
810 /* ICU_DATA_DIR may be set as a compile option */
812 if(path
==NULL
|| *path
==0) {
818 /* It looks really bad, set it to something. */
822 u_setDataDirectory(path
);
823 return gDataDirectory
;
830 /* Macintosh-specific locale information ------------------------------------ */
841 /* Todo: This will be updated with a newer version from www.unicode.org web
842 page when it's available.*/
843 #define MAC_LC_MAGIC_NUMBER -5
844 #define MAC_LC_INIT_NUMBER -9
846 static const mac_lc_rec mac_lc_recs
[] = {
847 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 0, "en_US",
849 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 1, "fr_FR",
851 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 2, "en_GB",
853 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 3, "de_DE",
855 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 4, "it_IT",
857 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 5, "nl_NL",
859 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 6, "fr_BE",
860 /* French for Belgium or Lxembourg*/
861 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 7, "sv_SE",
863 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 9, "da_DK",
865 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 10, "pt_PT",
867 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 11, "fr_CA",
869 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 13, "is_IS",
871 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 14, "ja_JP",
873 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 15, "en_AU",
875 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 16, "ar_AE",
876 /* the Arabic world (?)*/
877 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 17, "fi_FI",
879 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 18, "fr_CH",
880 /* French for Switzerland*/
881 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 19, "de_CH",
882 /* German for Switzerland*/
883 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 20, "el_GR",
885 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 21, "is_IS",
887 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
889 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
891 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 24, "tr_TR",
893 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 25, "sh_YU",
894 /* Croatian system for Yugoslavia*/
895 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
896 /* Hindi system for India*/
897 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
899 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 41, "lt_LT",
901 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 42, "pl_PL",
903 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 43, "hu_HU",
905 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 44, "et_EE",
907 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 45, "lv_LV",
909 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
910 /* Lapland [Ask Rich for the data. HS]*/
911 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
913 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 48, "fa_IR",
915 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 49, "ru_RU",
917 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 50, "en_IE",
919 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 51, "ko_KR",
921 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 52, "zh_CN",
922 /* People's Republic of China*/
923 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 53, "zh_TW",
925 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, 54, "th_TH",
928 /* fallback is en_US*/
929 MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
, MAC_LC_MAGIC_NUMBER
,
930 MAC_LC_MAGIC_NUMBER
, "en_US"
936 /* Return just the POSIX id, whatever happens to be in it */
937 static const char *uprv_getPOSIXID(void)
939 static const char* posixID
= NULL
;
942 * On Solaris two different calls to setlocale can result in
943 * different values. Only get this value once.
945 * We must check this first because an application can set this.
947 * LC_ALL can't be used because it's platform dependent. The LANG
948 * environment variable seems to affect LC_CTYPE variable by default.
949 * Here is what setlocale(LC_ALL, NULL) can return.
950 * HPUX can return 'C C C C C C C'
951 * Solaris can return /en_US/C/C/C/C/C on the second try.
952 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
954 * The default codepage detection also needs to use LC_CTYPE.
956 * Do not call setlocale(LC_*, "")! Using an empty string instead
957 * of NULL, will modify the libc behavior.
959 posixID
= setlocale(LC_CTYPE
, NULL
);
961 || (uprv_strcmp("C", posixID
) == 0)
962 || (uprv_strcmp("POSIX", posixID
) == 0))
964 /* Maybe we got some garbage. Try something more reasonable */
965 posixID
= getenv("LC_ALL");
967 posixID
= getenv("LC_CTYPE");
969 posixID
= getenv("LANG");
975 || (uprv_strcmp("C", posixID
) == 0)
976 || (uprv_strcmp("POSIX", posixID
) == 0))
978 /* Nothing worked. Give it a nice POSIX default value. */
979 posixID
= "en_US_POSIX";
987 /* NOTE: The caller should handle thread safety */
988 U_CAPI
const char* U_EXPORT2
989 uprv_getDefaultLocaleID()
993 Note that: (a '!' means the ID is improper somehow)
994 LC_ALL ----> default_loc codepage
995 --------------------------------------------------------
1000 ab_CD.EF@GH ab_CD_GH EF
1002 Some 'improper' ways to do the same as above:
1003 ! ab_CD@GH.EF ab_CD_GH EF
1004 ! ab_CD.EF@GH.IJ ab_CD_GH EF
1005 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
1010 The variant cannot have dots in it.
1011 The 'rightmost' variant (@xxx) wins.
1012 The leftmost codepage (.xxx) wins.
1014 char *correctedPOSIXLocale
= 0;
1015 const char* posixID
= uprv_getPOSIXID();
1020 /* Format: (no spaces)
1021 ll [ _CC ] [ . MM ] [ @ VV]
1023 l = lang, C = ctry, M = charmap, V = variant
1026 if (gCorrectedPOSIXLocale
!= NULL
) {
1027 return gCorrectedPOSIXLocale
;
1030 if ((p
= uprv_strchr(posixID
, '.')) != NULL
) {
1031 /* assume new locale can't be larger than old one? */
1032 correctedPOSIXLocale
= uprv_malloc(uprv_strlen(posixID
)+1);
1033 uprv_strncpy(correctedPOSIXLocale
, posixID
, p
-posixID
);
1034 correctedPOSIXLocale
[p
-posixID
] = 0;
1036 /* do not copy after the @ */
1037 if ((p
= uprv_strchr(correctedPOSIXLocale
, '@')) != NULL
) {
1038 correctedPOSIXLocale
[p
-correctedPOSIXLocale
] = 0;
1042 /* Note that we scan the *uncorrected* ID. */
1043 if ((p
= uprv_strrchr(posixID
, '@')) != NULL
) {
1044 if (correctedPOSIXLocale
== NULL
) {
1045 correctedPOSIXLocale
= uprv_malloc(uprv_strlen(posixID
)+1);
1046 uprv_strncpy(correctedPOSIXLocale
, posixID
, p
-posixID
);
1047 correctedPOSIXLocale
[p
-posixID
] = 0;
1051 /* Take care of any special cases here.. */
1052 if (!uprv_strcmp(p
, "nynorsk")) {
1054 /* Don't worry about no__NY. In practice, it won't appear. */
1057 if (uprv_strchr(correctedPOSIXLocale
,'_') == NULL
) {
1058 uprv_strcat(correctedPOSIXLocale
, "__"); /* aa@b -> aa__b */
1061 uprv_strcat(correctedPOSIXLocale
, "_"); /* aa_CC@b -> aa_CC_b */
1064 if ((q
= uprv_strchr(p
, '.')) != NULL
) {
1065 /* How big will the resulting string be? */
1066 len
= (int32_t)(uprv_strlen(correctedPOSIXLocale
) + (q
-p
));
1067 uprv_strncat(correctedPOSIXLocale
, p
, q
-p
);
1068 correctedPOSIXLocale
[len
] = 0;
1071 /* Anything following the @ sign */
1072 uprv_strcat(correctedPOSIXLocale
, p
);
1075 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1076 * How about 'russian' -> 'ru'?
1077 * Many of the other locales using ISO codes will be handled by the
1078 * canonicalization functions in uloc_getDefault.
1082 /* Was a correction made? */
1083 if (correctedPOSIXLocale
!= NULL
) {
1084 posixID
= correctedPOSIXLocale
;
1087 /* copy it, just in case the original pointer goes away. See j2395 */
1088 correctedPOSIXLocale
= (char *)uprv_malloc(uprv_strlen(posixID
) + 1);
1089 posixID
= uprv_strcpy(correctedPOSIXLocale
, posixID
);
1092 if (gCorrectedPOSIXLocale
== NULL
) {
1093 gCorrectedPOSIXLocale
= correctedPOSIXLocale
;
1094 ucln_common_registerCleanup(UCLN_COMMON_PUTIL
, putil_cleanup
);
1095 correctedPOSIXLocale
= NULL
;
1098 if (correctedPOSIXLocale
!= NULL
) { /* Was already set - clean up. */
1099 uprv_free(correctedPOSIXLocale
);
1104 #elif defined(U_WINDOWS)
1105 UErrorCode status
= U_ZERO_ERROR
;
1106 LCID id
= GetThreadLocale();
1107 const char* locID
= uprv_convertToPosix(id
, &status
);
1109 if (U_FAILURE(status
)) {
1114 #elif defined(XP_MAC)
1115 int32_t script
= MAC_LC_INIT_NUMBER
;
1116 /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
1117 int32_t region
= MAC_LC_INIT_NUMBER
;
1118 /* = GetScriptManagerVariable(smRegionCode);*/
1119 int32_t lang
= MAC_LC_INIT_NUMBER
;
1120 /* = GetScriptManagerVariable(smScriptLang);*/
1121 int32_t date_region
= MAC_LC_INIT_NUMBER
;
1122 const char* posixID
= 0;
1123 int32_t count
= sizeof(mac_lc_recs
) / sizeof(mac_lc_rec
);
1127 ih
= (Intl1Hndl
) GetIntlResource(1);
1129 date_region
= ((uint16_t)(*ih
)->intl1Vers
) >> 8;
1131 for (i
= 0; i
< count
; i
++) {
1132 if ( ((mac_lc_recs
[i
].script
== MAC_LC_MAGIC_NUMBER
)
1133 || (mac_lc_recs
[i
].script
== script
))
1134 && ((mac_lc_recs
[i
].region
== MAC_LC_MAGIC_NUMBER
)
1135 || (mac_lc_recs
[i
].region
== region
))
1136 && ((mac_lc_recs
[i
].lang
== MAC_LC_MAGIC_NUMBER
)
1137 || (mac_lc_recs
[i
].lang
== lang
))
1138 && ((mac_lc_recs
[i
].date_region
== MAC_LC_MAGIC_NUMBER
)
1139 || (mac_lc_recs
[i
].date_region
== date_region
))
1142 posixID
= mac_lc_recs
[i
].posixID
;
1149 #elif defined(OS400)
1150 /* locales are process scoped and are by definition thread safe */
1151 static char correctedLocale
[64];
1152 const char *localeID
= getenv("LC_ALL");
1155 if (localeID
== NULL
)
1156 localeID
= getenv("LANG");
1157 if (localeID
== NULL
)
1158 localeID
= setlocale(LC_ALL
, NULL
);
1159 /* Make sure we have something... */
1160 if (localeID
== NULL
)
1161 return "en_US_POSIX";
1163 /* Extract the locale name from the path. */
1164 if((p
= uprv_strrchr(localeID
, '/')) != NULL
)
1166 /* Increment p to start of locale name. */
1171 /* Copy to work location. */
1172 uprv_strcpy(correctedLocale
, localeID
);
1174 /* Strip off the '.locale' extension. */
1175 if((p
= uprv_strchr(correctedLocale
, '.')) != NULL
) {
1179 /* Upper case the locale name. */
1180 T_CString_toUpperCase(correctedLocale
);
1182 /* See if we are using the POSIX locale. Any of the
1183 * following are equivalent and use the same QLGPGCMA
1185 * QLGPGCMA2 means UCS2
1186 * QLGPGCMA_4 means UTF-32
1187 * QLGPGCMA_8 means UTF-8
1189 if ((uprv_strcmp("C", correctedLocale
) == 0) ||
1190 (uprv_strcmp("POSIX", correctedLocale
) == 0) ||
1191 (uprv_strncmp("QLGPGCMA", correctedLocale
, 8) == 0))
1193 uprv_strcpy(correctedLocale
, "en_US_POSIX");
1199 /* Lower case the lang portion. */
1200 for(p
= correctedLocale
; *p
!= 0 && *p
!= '_'; p
++)
1202 *p
= uprv_tolower(*p
);
1205 /* Adjust for Euro. After '_E' add 'URO'. */
1206 LocaleLen
= uprv_strlen(correctedLocale
);
1207 if (correctedLocale
[LocaleLen
- 2] == '_' &&
1208 correctedLocale
[LocaleLen
- 1] == 'E')
1210 uprv_strcat(correctedLocale
, "URO");
1213 /* If using Lotus-based locale then convert to
1214 * equivalent non Lotus.
1216 else if (correctedLocale
[LocaleLen
- 2] == '_' &&
1217 correctedLocale
[LocaleLen
- 1] == 'L')
1219 correctedLocale
[LocaleLen
- 2] = 0;
1222 /* There are separate simplified and traditional
1223 * locales called zh_HK_S and zh_HK_T.
1225 else if (uprv_strncmp(correctedLocale
, "zh_HK", 5) == 0)
1227 uprv_strcpy(correctedLocale
, "zh_HK");
1230 /* A special zh_CN_GBK locale...
1232 else if (uprv_strcmp(correctedLocale
, "zh_CN_GBK") == 0)
1234 uprv_strcpy(correctedLocale
, "zh_CN");
1239 return correctedLocale
;
1246 Due to various platform differences, one platform may specify a charset,
1247 when they really mean a different charset. Remap the names so that they are
1248 compatible with ICU.
1251 remapPlatformDependentCodepage(const char *locale
, const char *name
) {
1252 if (locale
!= NULL
&& *locale
== 0) {
1253 /* Make sure that an empty locale is handled the same way. */
1260 if (uprv_strcmp(name
, "IBM-943") == 0) {
1261 /* Use the ASCII compatible ibm-943 */
1264 else if (uprv_strcmp(name
, "IBM-1252") == 0) {
1265 /* Use the windows-1252 that contains the Euro */
1268 #elif defined(U_SOLARIS)
1269 if (locale
!= NULL
&& uprv_strcmp(name
, "EUC") == 0) {
1270 /* Solaris underspecifies the "EUC" name. */
1271 if (uprv_strcmp(locale
, "zh_CN") == 0) {
1274 else if (uprv_strcmp(locale
, "zh_TW") == 0) {
1277 else if (uprv_strcmp(locale
, "ko_KR") == 0) {
1281 #elif defined(U_DARWIN)
1282 if (locale
== NULL
&& *name
== 0) {
1284 No locale was specified, and an empty name was passed in.
1285 This usually indicates that nl_langinfo didn't return valid information.
1286 Mac OS X uses UTF-8 by default (especially the locale data and console).
1291 /* return NULL when "" is passed in */
1299 getCodepageFromPOSIXID(const char *localeName
, char * buffer
, int32_t buffCapacity
)
1301 char localeBuf
[100];
1302 const char *name
= NULL
;
1303 char *variant
= NULL
;
1305 if (localeName
!= NULL
&& (name
= (uprv_strchr(localeName
, '.'))) != NULL
) {
1306 size_t localeCapacity
= uprv_min(sizeof(localeBuf
), (name
-localeName
)+1);
1307 uprv_strncpy(localeBuf
, localeName
, localeCapacity
);
1308 localeBuf
[localeCapacity
-1] = 0; /* ensure NULL termination */
1309 name
= uprv_strncpy(buffer
, name
+1, buffCapacity
);
1310 buffer
[buffCapacity
-1] = 0; /* ensure NULL termination */
1311 if ((variant
= (uprv_strchr(name
, '@'))) != NULL
) {
1314 name
= remapPlatformDependentCodepage(localeBuf
, name
);
1321 int_getDefaultCodepage()
1324 uint32_t ccsid
= 37; /* Default to ibm-37 */
1325 static char codepage
[64];
1326 Qwc_JOBI0400_t jobinfo
;
1327 Qus_EC_t error
= { sizeof(Qus_EC_t
) }; /* SPI error code */
1329 EPT_CALL(QUSRJOBI
)(&jobinfo
, sizeof(jobinfo
), "JOBI0400",
1332 if (error
.Bytes_Available
== 0) {
1333 if (jobinfo
.Coded_Char_Set_ID
!= 0xFFFF) {
1334 ccsid
= (uint32_t)jobinfo
.Coded_Char_Set_ID
;
1336 else if (jobinfo
.Default_Coded_Char_Set_Id
!= 0xFFFF) {
1337 ccsid
= (uint32_t)jobinfo
.Default_Coded_Char_Set_Id
;
1339 /* else use the default */
1341 sprintf(codepage
,"ibm-%d", ccsid
);
1344 #elif defined(OS390)
1345 static char codepage
[64];
1346 sprintf(codepage
,"%s" UCNV_SWAP_LFNL_OPTION_STRING
, nl_langinfo(CODESET
));
1349 #elif defined(XP_MAC)
1350 return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
1352 #elif defined(U_WINDOWS)
1353 static char codepage
[64];
1354 sprintf(codepage
, "windows-%d", GetACP());
1357 #elif U_POSIX_LOCALE
1358 static char codesetName
[100];
1359 const char *localeName
= NULL
;
1360 const char *name
= NULL
;
1362 uprv_memset(codesetName
, 0, sizeof(codesetName
));
1364 /* Use setlocale in a nice way, and then check some environment variables.
1365 Maybe the application used setlocale already.
1367 localeName
= uprv_getPOSIXID();
1368 name
= getCodepageFromPOSIXID(localeName
, codesetName
, sizeof(codesetName
));
1370 /* if we can find the codeset name from setlocale, return that. */
1373 /* else "C" was probably returned. That's underspecified. */
1375 #if U_HAVE_NL_LANGINFO_CODESET
1377 uprv_memset(codesetName
, 0, sizeof(codesetName
));
1379 /* When available, check nl_langinfo because it usually gives more
1380 useful names. It depends on LC_CTYPE and not LANG or LC_ALL.
1381 nl_langinfo may use the same buffer as setlocale. */
1383 const char *codeset
= nl_langinfo(U_NL_LANGINFO_CODESET
);
1384 codeset
= remapPlatformDependentCodepage(NULL
, codeset
);
1385 if (codeset
!= NULL
) {
1386 uprv_strncpy(codesetName
, codeset
, sizeof(codesetName
));
1387 codesetName
[sizeof(codesetName
)-1] = 0;
1393 if (*codesetName
== 0)
1395 /* Everything failed. Return US ASCII (ISO 646). */
1396 uprv_strcpy(codesetName
, "US-ASCII");
1405 U_CAPI
const char* U_EXPORT2
1406 uprv_getDefaultCodepage()
1408 static char const *name
= NULL
;
1411 name
= int_getDefaultCodepage();
1418 /* end of platform-specific implementation -------------- */
1420 /* version handling --------------------------------------------------------- */
1422 U_CAPI
void U_EXPORT2
1423 u_versionFromString(UVersionInfo versionArray
, const char *versionString
) {
1427 if(versionArray
==NULL
) {
1431 if(versionString
!=NULL
) {
1433 versionArray
[part
]=(uint8_t)uprv_strtoul(versionString
, &end
, 10);
1434 if(end
==versionString
|| ++part
==U_MAX_VERSION_LENGTH
|| *end
!=U_VERSION_DELIMITER
) {
1437 versionString
=end
+1;
1441 while(part
<U_MAX_VERSION_LENGTH
) {
1442 versionArray
[part
++]=0;
1446 U_CAPI
void U_EXPORT2
1447 u_versionToString(UVersionInfo versionArray
, char *versionString
) {
1448 uint16_t count
, part
;
1451 if(versionString
==NULL
) {
1455 if(versionArray
==NULL
) {
1460 /* count how many fields need to be written */
1461 for(count
=4; count
>0 && versionArray
[count
-1]==0; --count
) {
1468 /* write the first part */
1469 /* write the decimal field value */
1470 field
=versionArray
[0];
1472 *versionString
++=(char)('0'+field
/100);
1476 *versionString
++=(char)('0'+field
/10);
1479 *versionString
++=(char)('0'+field
);
1481 /* write the following parts */
1482 for(part
=1; part
<count
; ++part
) {
1483 /* write a dot first */
1484 *versionString
++=U_VERSION_DELIMITER
;
1486 /* write the decimal field value */
1487 field
=versionArray
[part
];
1489 *versionString
++=(char)('0'+field
/100);
1493 *versionString
++=(char)('0'+field
/10);
1496 *versionString
++=(char)('0'+field
);
1503 U_CAPI
void U_EXPORT2
1504 u_getVersion(UVersionInfo versionArray
) {
1505 u_versionFromString(versionArray
, U_ICU_VERSION
);
1509 * Hey, Emacs, please set the following:
1512 * indent-tabs-mode: nil