]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/putil.c
ICU-8.11.4.tar.gz
[apple/icu.git] / icuSources / common / putil.c
1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 1997-2007, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 *
9 * FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
10 *
11 * Date Name Description
12 * 04/14/97 aliu Creation.
13 * 04/24/97 aliu Added getDefaultDataDirectory() and
14 * getDefaultLocaleID().
15 * 04/28/97 aliu Rewritten to assume Unix and apply general methods
16 * for assumed case. Non-UNIX platforms must be
17 * special-cased. Rewrote numeric methods dealing
18 * with NaN and Infinity to be platform independent
19 * over all IEEE 754 platforms.
20 * 05/13/97 aliu Restored sign of timezone
21 * (semantics are hours West of GMT)
22 * 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
23 * nextDouble..
24 * 07/22/98 stephen Added remainder, max, min, trunc
25 * 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
26 * 08/24/98 stephen Added longBitsFromDouble
27 * 09/08/98 stephen Minor changes for Mac Port
28 * 03/02/99 stephen Removed openFile(). Added AS400 support.
29 * Fixed EBCDIC tables
30 * 04/15/99 stephen Converted to C.
31 * 06/28/99 stephen Removed mutex locking in u_isBigEndian().
32 * 08/04/99 jeffrey R. Added OS/2 changes
33 * 11/15/99 helena Integrated S/390 IEEE support.
34 * 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
35 * 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
36 ******************************************************************************
37 */
38
39 /* Define _XOPEN_SOURCE for Solaris and friends. */
40 /* NetBSD needs it to be >= 4 */
41 #ifndef _XOPEN_SOURCE
42 #if __STDC_VERSION__ >= 199901L
43 /* It is invalid to compile an XPG3, XPG4, XPG4v2 or XPG5 application using c99 */
44 #define _XOPEN_SOURCE 600
45 #else
46 #define _XOPEN_SOURCE 4
47 #endif
48 #endif
49
50 /* Make sure things like readlink and such functions work. */
51 #ifndef _XOPEN_SOURCE_EXTENDED
52 #define _XOPEN_SOURCE_EXTENDED 1
53 #endif
54
55 /* include ICU headers */
56 #include "unicode/utypes.h"
57 #include "unicode/putil.h"
58 #include "unicode/ustring.h"
59 #include "putilimp.h"
60 #include "uassert.h"
61 #include "umutex.h"
62 #include "cmemory.h"
63 #include "cstring.h"
64 #include "locmap.h"
65 #include "ucln_cmn.h"
66
67 /* Include standard headers. */
68 #include <stdio.h>
69 #include <stdlib.h>
70 #include <string.h>
71 #include <math.h>
72 #include <locale.h>
73 #include <float.h>
74 #include <time.h>
75
76 /* include system headers */
77 #ifdef U_WINDOWS
78 # define WIN32_LEAN_AND_MEAN
79 # define VC_EXTRALEAN
80 # define NOUSER
81 # define NOSERVICE
82 # define NOIME
83 # define NOMCX
84 # include <windows.h>
85 # include "wintz.h"
86 #elif defined(U_CYGWIN) && defined(__STRICT_ANSI__)
87 /* tzset isn't defined in strict ANSI on Cygwin. */
88 # undef __STRICT_ANSI__
89 #elif defined(OS400)
90 # include <float.h>
91 # include <qusec.h> /* error code structure */
92 # include <qusrjobi.h>
93 # include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
94 #elif defined(XP_MAC)
95 # include <Files.h>
96 # include <IntlResources.h>
97 # include <Script.h>
98 # include <Folders.h>
99 # include <MacTypes.h>
100 # include <TextUtils.h>
101 # define ICU_NO_USER_DATA_OVERRIDE 1
102 #elif defined(OS390)
103 #include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
104 #elif defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD)
105 #include <limits.h>
106 #include <unistd.h>
107 #elif defined(U_QNX)
108 #include <sys/neutrino.h>
109 #endif
110
111 #ifndef U_WINDOWS
112 #include <sys/time.h>
113 #endif
114
115 /*
116 * Only include langinfo.h if we have a way to get the codeset. If we later
117 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
118 *
119 */
120
121 #if U_HAVE_NL_LANGINFO_CODESET
122 #include <langinfo.h>
123 #endif
124
125 /* Define the extension for data files, again... */
126 #define DATA_TYPE "dat"
127
128 /* Leave this copyright notice here! */
129 static const char copyright[] = U_COPYRIGHT_STRING;
130
131 /* floating point implementations ------------------------------------------- */
132
133 /* We return QNAN rather than SNAN*/
134 #define SIGN 0x80000000U
135
136 /* Make it easy to define certain types of constants */
137 typedef union {
138 int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
139 double d64;
140 } BitPatternConversion;
141 static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
142 static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
143
144 /*---------------------------------------------------------------------------
145 Platform utilities
146 Our general strategy is to assume we're on a POSIX platform. Platforms which
147 are non-POSIX must declare themselves so. The default POSIX implementation
148 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
149 functions).
150 ---------------------------------------------------------------------------*/
151
152 #if defined(U_WINDOWS) || defined(XP_MAC) || defined(OS400)
153 # undef U_POSIX_LOCALE
154 #else
155 # define U_POSIX_LOCALE 1
156 #endif
157
158 /*
159 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
160 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
161 */
162 #if !IEEE_754
163 static char*
164 u_topNBytesOfDouble(double* d, int n)
165 {
166 #if U_IS_BIG_ENDIAN
167 return (char*)d;
168 #else
169 return (char*)(d + 1) - n;
170 #endif
171 }
172 #endif
173
174 static char*
175 u_bottomNBytesOfDouble(double* d, int n)
176 {
177 #if U_IS_BIG_ENDIAN
178 return (char*)(d + 1) - n;
179 #else
180 return (char*)d;
181 #endif
182 }
183
184 #if defined(U_WINDOWS)
185 typedef union {
186 int64_t int64;
187 FILETIME fileTime;
188 } FileTimeConversion; /* This is like a ULARGE_INTEGER */
189
190 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
191 #define EPOCH_BIAS INT64_C(116444736000000000)
192 #define HECTONANOSECOND_PER_MILLISECOND 10000
193
194 #endif
195
196 /*---------------------------------------------------------------------------
197 Universal Implementations
198 These are designed to work on all platforms. Try these, and if they
199 don't work on your platform, then special case your platform with new
200 implementations.
201 ---------------------------------------------------------------------------*/
202
203 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
204 U_CAPI UDate U_EXPORT2
205 uprv_getUTCtime()
206 {
207 #ifdef XP_MAC
208 time_t t, t1, t2;
209 struct tm tmrec;
210
211 uprv_memset( &tmrec, 0, sizeof(tmrec) );
212 tmrec.tm_year = 70;
213 tmrec.tm_mon = 0;
214 tmrec.tm_mday = 1;
215 t1 = mktime(&tmrec); /* seconds of 1/1/1970*/
216
217 time(&t);
218 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
219 t2 = mktime(&tmrec); /* seconds of current GMT*/
220 return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND; /* GMT (or UTC) in seconds since 1970*/
221 #elif defined(U_WINDOWS)
222
223 FileTimeConversion winTime;
224 GetSystemTimeAsFileTime(&winTime.fileTime);
225 return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
226 #else
227 /*
228 struct timeval posixTime;
229 gettimeofday(&posixTime, NULL);
230 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
231 */
232 time_t epochtime;
233 time(&epochtime);
234 return (UDate)epochtime * U_MILLIS_PER_SECOND;
235 #endif
236 }
237
238 /*-----------------------------------------------------------------------------
239 IEEE 754
240 These methods detect and return NaN and infinity values for doubles
241 conforming to IEEE 754. Platforms which support this standard include X86,
242 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
243 If this doesn't work on your platform, you have non-IEEE floating-point, and
244 will need to code your own versions. A naive implementation is to return 0.0
245 for getNaN and getInfinity, and false for isNaN and isInfinite.
246 ---------------------------------------------------------------------------*/
247
248 U_CAPI UBool U_EXPORT2
249 uprv_isNaN(double number)
250 {
251 #if IEEE_754
252 BitPatternConversion convertedNumber;
253 convertedNumber.d64 = number;
254 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
255 return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
256
257 #elif defined(OS390)
258 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
259 sizeof(uint32_t));
260 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
261 sizeof(uint32_t));
262
263 return ((highBits & 0x7F080000L) == 0x7F080000L) &&
264 (lowBits == 0x00000000L);
265
266 #else
267 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
268 /* you'll need to replace this default implementation with what's correct*/
269 /* for your platform.*/
270 return number != number;
271 #endif
272 }
273
274 U_CAPI UBool U_EXPORT2
275 uprv_isInfinite(double number)
276 {
277 #if IEEE_754
278 BitPatternConversion convertedNumber;
279 convertedNumber.d64 = number;
280 /* Infinity is exactly 0x7FF0000000000000U. */
281 return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
282 #elif defined(OS390)
283 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
284 sizeof(uint32_t));
285 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
286 sizeof(uint32_t));
287
288 return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
289
290 #else
291 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
292 /* value, you'll need to replace this default implementation with what's*/
293 /* correct for your platform.*/
294 return number == (2.0 * number);
295 #endif
296 }
297
298 U_CAPI UBool U_EXPORT2
299 uprv_isPositiveInfinity(double number)
300 {
301 #if IEEE_754 || defined(OS390)
302 return (UBool)(number > 0 && uprv_isInfinite(number));
303 #else
304 return uprv_isInfinite(number);
305 #endif
306 }
307
308 U_CAPI UBool U_EXPORT2
309 uprv_isNegativeInfinity(double number)
310 {
311 #if IEEE_754 || defined(OS390)
312 return (UBool)(number < 0 && uprv_isInfinite(number));
313
314 #else
315 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
316 sizeof(uint32_t));
317 return((highBits & SIGN) && uprv_isInfinite(number));
318
319 #endif
320 }
321
322 U_CAPI double U_EXPORT2
323 uprv_getNaN()
324 {
325 #if IEEE_754 || defined(OS390)
326 return gNan.d64;
327 #else
328 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
329 /* you'll need to replace this default implementation with what's correct*/
330 /* for your platform.*/
331 return 0.0;
332 #endif
333 }
334
335 U_CAPI double U_EXPORT2
336 uprv_getInfinity()
337 {
338 #if IEEE_754 || defined(OS390)
339 return gInf.d64;
340 #else
341 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
342 /* value, you'll need to replace this default implementation with what's*/
343 /* correct for your platform.*/
344 return 0.0;
345 #endif
346 }
347
348 U_CAPI double U_EXPORT2
349 uprv_floor(double x)
350 {
351 return floor(x);
352 }
353
354 U_CAPI double U_EXPORT2
355 uprv_ceil(double x)
356 {
357 return ceil(x);
358 }
359
360 U_CAPI double U_EXPORT2
361 uprv_round(double x)
362 {
363 return uprv_floor(x + 0.5);
364 }
365
366 U_CAPI double U_EXPORT2
367 uprv_fabs(double x)
368 {
369 return fabs(x);
370 }
371
372 U_CAPI double U_EXPORT2
373 uprv_modf(double x, double* y)
374 {
375 return modf(x, y);
376 }
377
378 U_CAPI double U_EXPORT2
379 uprv_fmod(double x, double y)
380 {
381 return fmod(x, y);
382 }
383
384 U_CAPI double U_EXPORT2
385 uprv_pow(double x, double y)
386 {
387 /* This is declared as "double pow(double x, double y)" */
388 return pow(x, y);
389 }
390
391 U_CAPI double U_EXPORT2
392 uprv_pow10(int32_t x)
393 {
394 return pow(10.0, (double)x);
395 }
396
397 U_CAPI double U_EXPORT2
398 uprv_fmax(double x, double y)
399 {
400 #if IEEE_754
401 int32_t lowBits;
402
403 /* first handle NaN*/
404 if(uprv_isNaN(x) || uprv_isNaN(y))
405 return uprv_getNaN();
406
407 /* check for -0 and 0*/
408 lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&x, sizeof(uint32_t));
409 if(x == 0.0 && y == 0.0 && (lowBits & SIGN))
410 return y;
411
412 #endif
413
414 /* this should work for all flt point w/o NaN and Infpecial cases */
415 return (x > y ? x : y);
416 }
417
418 U_CAPI double U_EXPORT2
419 uprv_fmin(double x, double y)
420 {
421 #if IEEE_754
422 int32_t lowBits;
423
424 /* first handle NaN*/
425 if(uprv_isNaN(x) || uprv_isNaN(y))
426 return uprv_getNaN();
427
428 /* check for -0 and 0*/
429 lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&y, sizeof(uint32_t));
430 if(x == 0.0 && y == 0.0 && (lowBits & SIGN))
431 return y;
432
433 #endif
434
435 /* this should work for all flt point w/o NaN and Inf special cases */
436 return (x > y ? y : x);
437 }
438
439 /**
440 * Truncates the given double.
441 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
442 * This is different than calling floor() or ceil():
443 * floor(3.3) = 3, floor(-3.3) = -4
444 * ceil(3.3) = 4, ceil(-3.3) = -3
445 */
446 U_CAPI double U_EXPORT2
447 uprv_trunc(double d)
448 {
449 #if IEEE_754
450 int32_t lowBits;
451
452 /* handle error cases*/
453 if(uprv_isNaN(d))
454 return uprv_getNaN();
455 if(uprv_isInfinite(d))
456 return uprv_getInfinity();
457
458 lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&d, sizeof(uint32_t));
459 if( (d == 0.0 && (lowBits & SIGN)) || d < 0)
460 return ceil(d);
461 else
462 return floor(d);
463
464 #else
465 return d >= 0 ? floor(d) : ceil(d);
466
467 #endif
468 }
469
470 /**
471 * Return the largest positive number that can be represented by an integer
472 * type of arbitrary bit length.
473 */
474 U_CAPI double U_EXPORT2
475 uprv_maxMantissa(void)
476 {
477 return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
478 }
479
480 U_CAPI double U_EXPORT2
481 uprv_log(double d)
482 {
483 return log(d);
484 }
485
486 #if 0
487 /* This isn't used. If it's readded, readd putiltst.c tests */
488 U_CAPI int32_t U_EXPORT2
489 uprv_digitsAfterDecimal(double x)
490 {
491 char buffer[20];
492 int32_t numDigits, bytesWritten;
493 char *p = buffer;
494 int32_t ptPos, exponent;
495
496 /* cheat and use the string-format routine to get a string representation*/
497 /* (it handles mathematical inaccuracy better than we can), then find out */
498 /* many characters are to the right of the decimal point */
499 bytesWritten = sprintf(buffer, "%+.9g", x);
500 while (isdigit(*(++p))) {
501 }
502
503 ptPos = (int32_t)(p - buffer);
504 numDigits = (int32_t)(bytesWritten - ptPos - 1);
505
506 /* if the number's string representation is in scientific notation, find */
507 /* the exponent and take it into account*/
508 exponent = 0;
509 p = uprv_strchr(buffer, 'e');
510 if (p != 0) {
511 int16_t expPos = (int16_t)(p - buffer);
512 numDigits -= bytesWritten - expPos;
513 exponent = (int32_t)(atol(p + 1));
514 }
515
516 /* the string representation may still have spurious decimal digits in it, */
517 /* so we cut off at the ninth digit to the right of the decimal, and have */
518 /* to search backward from there to the first non-zero digit*/
519 if (numDigits > 9) {
520 numDigits = 9;
521 while (numDigits > 0 && buffer[ptPos + numDigits] == '0')
522 --numDigits;
523 }
524 numDigits -= exponent;
525 if (numDigits < 0) {
526 return 0;
527 }
528 return numDigits;
529 }
530 #endif
531
532 /*---------------------------------------------------------------------------
533 Platform-specific Implementations
534 Try these, and if they don't work on your platform, then special case your
535 platform with new implementations.
536 ---------------------------------------------------------------------------*/
537
538 /* Generic time zone layer -------------------------------------------------- */
539
540 /* Time zone utilities */
541 U_CAPI void U_EXPORT2
542 uprv_tzset()
543 {
544 #ifdef U_TZSET
545 U_TZSET();
546 #else
547 /* no initialization*/
548 #endif
549 }
550
551 U_CAPI int32_t U_EXPORT2
552 uprv_timezone()
553 {
554 #ifdef U_TIMEZONE
555 return U_TIMEZONE;
556 #else
557 time_t t, t1, t2;
558 struct tm tmrec;
559 UBool dst_checked;
560 int32_t tdiff = 0;
561
562 time(&t);
563 uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
564 dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
565 t1 = mktime(&tmrec); /* local time in seconds*/
566 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
567 t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/
568 tdiff = t2 - t1;
569 /* imitate NT behaviour, which returns same timezone offset to GMT for
570 winter and summer*/
571 if (dst_checked)
572 tdiff += 3600;
573 return tdiff;
574 #endif
575 }
576
577 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
578 some platforms need to have it declared here. */
579
580 #if defined(U_TZNAME) && (defined(U_IRIX) || defined(U_DARWIN) || defined(U_CYGWIN))
581 /* RS6000 and others reject char **tzname. */
582 extern U_IMPORT char *U_TZNAME[];
583 #endif
584
585 #if !UCONFIG_NO_FILE_IO && (defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD))
586 /* These platforms are likely to use Olson timezone IDs. */
587 #define CHECK_LOCALTIME_LINK 1
588 #include <tzfile.h>
589 #define TZZONEINFO (TZDIR "/")
590 static char gTimeZoneBuffer[PATH_MAX];
591 static char *gTimeZoneBufferPtr = NULL;
592 #endif
593
594 #ifndef U_WINDOWS
595 #define isNonDigit(ch) (ch < '0' || '9' < ch)
596 static UBool isValidOlsonID(const char *id) {
597 int32_t idx = 0;
598
599 /* Determine if this is something like Iceland (Olson ID)
600 or AST4ADT (non-Olson ID) */
601 while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
602 idx++;
603 }
604
605 /* If we went through the whole string, then it might be okay.
606 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
607 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
608 The rest of the time it could be an Olson ID. George */
609 return (UBool)(id[idx] == 0
610 || uprv_strcmp(id, "PST8PDT") == 0
611 || uprv_strcmp(id, "MST7MDT") == 0
612 || uprv_strcmp(id, "CST6CDT") == 0
613 || uprv_strcmp(id, "EST5EDT") == 0);
614 }
615 #endif
616
617 U_CAPI const char* U_EXPORT2
618 uprv_tzname(int n)
619 {
620 #ifdef U_WINDOWS
621 const char *id = uprv_detectWindowsTimeZone();
622
623 if (id != NULL) {
624 return id;
625 }
626 #else
627 const char *tzenv = NULL;
628
629 /*#if defined(U_DARWIN)
630 int ret;
631
632 tzenv = getenv("TZFILE");
633 if (tzenv != NULL) {
634 return tzenv;
635 }
636 #endif*/
637
638 tzenv = getenv("TZ");
639 if (tzenv != NULL && isValidOlsonID(tzenv))
640 {
641 /* This might be a good Olson ID. */
642 if (uprv_strncmp(tzenv, "posix/", 6) == 0
643 || uprv_strncmp(tzenv, "right/", 6) == 0)
644 {
645 /* Remove the posix/ or right/ prefix. */
646 tzenv += 6;
647 }
648 return tzenv;
649 }
650 /* else U_TZNAME will give a better result. */
651
652 #if defined(CHECK_LOCALTIME_LINK)
653 /* Caller must handle threading issues */
654 if (gTimeZoneBufferPtr == NULL) {
655 /*
656 This is a trick to look at the name of the link to get the Olson ID
657 because the tzfile contents is underspecified.
658 This isn't guaranteed to work because it may not be a symlink.
659 */
660 int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer));
661 if (0 < ret) {
662 int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO);
663 gTimeZoneBuffer[ret] = 0;
664 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0
665 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
666 {
667 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
668 }
669 }
670 }
671 else {
672 return gTimeZoneBufferPtr;
673 }
674 #endif
675 #endif
676
677 #ifdef U_TZNAME
678 /*
679 U_TZNAME is usually a non-unique abbreviation,
680 which isn't normally usable.
681 */
682 return U_TZNAME[n];
683 #else
684 return "";
685 #endif
686 }
687
688 /* Get and set the ICU data directory --------------------------------------- */
689
690 static char *gDataDirectory = NULL;
691 #if U_POSIX_LOCALE
692 static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
693 #endif
694
695 static UBool U_CALLCONV putil_cleanup(void)
696 {
697 if (gDataDirectory && *gDataDirectory) {
698 uprv_free(gDataDirectory);
699 }
700 gDataDirectory = NULL;
701 #if U_POSIX_LOCALE
702 if (gCorrectedPOSIXLocale) {
703 uprv_free(gCorrectedPOSIXLocale);
704 gCorrectedPOSIXLocale = NULL;
705 }
706 #endif
707 return TRUE;
708 }
709
710 /*
711 * Set the data directory.
712 * Make a copy of the passed string, and set the global data dir to point to it.
713 * TODO: see bug #2849, regarding thread safety.
714 */
715 U_CAPI void U_EXPORT2
716 u_setDataDirectory(const char *directory) {
717 char *newDataDir;
718 int32_t length;
719
720 if(directory==NULL || *directory==0) {
721 /* A small optimization to prevent the malloc and copy when the
722 shared library is used, and this is a way to make sure that NULL
723 is never returned.
724 */
725 newDataDir = (char *)"";
726 }
727 else {
728 length=(int32_t)uprv_strlen(directory);
729 newDataDir = (char *)uprv_malloc(length + 2);
730 uprv_strcpy(newDataDir, directory);
731
732 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
733 {
734 char *p;
735 while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
736 *p = U_FILE_SEP_CHAR;
737 }
738 }
739 #endif
740 }
741
742 umtx_lock(NULL);
743 if (gDataDirectory && *gDataDirectory) {
744 uprv_free(gDataDirectory);
745 }
746 gDataDirectory = newDataDir;
747 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
748 umtx_unlock(NULL);
749 }
750
751 U_CAPI UBool U_EXPORT2
752 uprv_pathIsAbsolute(const char *path)
753 {
754 if(!path || !*path) {
755 return FALSE;
756 }
757
758 if(*path == U_FILE_SEP_CHAR) {
759 return TRUE;
760 }
761
762 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
763 if(*path == U_FILE_ALT_SEP_CHAR) {
764 return TRUE;
765 }
766 #endif
767
768 #if defined(U_WINDOWS)
769 if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
770 ((path[0] >= 'a') && (path[0] <= 'z'))) &&
771 path[1] == ':' ) {
772 return TRUE;
773 }
774 #endif
775
776 return FALSE;
777 }
778
779 U_CAPI const char * U_EXPORT2
780 u_getDataDirectory(void) {
781 const char *path = NULL;
782
783 /* if we have the directory, then return it immediately */
784 umtx_lock(NULL);
785 path = gDataDirectory;
786 umtx_unlock(NULL);
787
788 if(path) {
789 return path;
790 }
791
792 /*
793 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
794 override ICU's data with the ICU_DATA environment variable. This prevents
795 problems where multiple custom copies of ICU's specific version of data
796 are installed on a system. Either the application must define the data
797 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
798 ICU, set the data with udata_setCommonData or trust that all of the
799 required data is contained in ICU's data library that contains
800 the entry point defined by U_ICUDATA_ENTRY_POINT.
801
802 There may also be some platforms where environment variables
803 are not allowed.
804 */
805 # if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
806 /* First try to get the environment variable */
807 path=getenv("ICU_DATA");
808 # endif
809
810 /* ICU_DATA_DIR may be set as a compile option */
811 # ifdef ICU_DATA_DIR
812 if(path==NULL || *path==0) {
813 path=ICU_DATA_DIR;
814 }
815 # endif
816
817 if(path==NULL) {
818 /* It looks really bad, set it to something. */
819 path = "";
820 }
821
822 u_setDataDirectory(path);
823 return gDataDirectory;
824 }
825
826
827
828
829
830 /* Macintosh-specific locale information ------------------------------------ */
831 #ifdef XP_MAC
832
833 typedef struct {
834 int32_t script;
835 int32_t region;
836 int32_t lang;
837 int32_t date_region;
838 const char* posixID;
839 } mac_lc_rec;
840
841 /* Todo: This will be updated with a newer version from www.unicode.org web
842 page when it's available.*/
843 #define MAC_LC_MAGIC_NUMBER -5
844 #define MAC_LC_INIT_NUMBER -9
845
846 static const mac_lc_rec mac_lc_recs[] = {
847 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US",
848 /* United States*/
849 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR",
850 /* France*/
851 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB",
852 /* Great Britain*/
853 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE",
854 /* Germany*/
855 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT",
856 /* Italy*/
857 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL",
858 /* Metherlands*/
859 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE",
860 /* French for Belgium or Lxembourg*/
861 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE",
862 /* Sweden*/
863 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK",
864 /* Denmark*/
865 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT",
866 /* Portugal*/
867 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA",
868 /* French Canada*/
869 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS",
870 /* Israel*/
871 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP",
872 /* Japan*/
873 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU",
874 /* Australia*/
875 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE",
876 /* the Arabic world (?)*/
877 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI",
878 /* Finland*/
879 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH",
880 /* French for Switzerland*/
881 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH",
882 /* German for Switzerland*/
883 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR",
884 /* Greece*/
885 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS",
886 /* Iceland ===*/
887 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
888 /* Malta ===*/
889 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
890 /* Cyprus ===*/
891 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR",
892 /* Turkey ===*/
893 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU",
894 /* Croatian system for Yugoslavia*/
895 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
896 /* Hindi system for India*/
897 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
898 /* Pakistan*/
899 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT",
900 /* Lithuania*/
901 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL",
902 /* Poland*/
903 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU",
904 /* Hungary*/
905 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE",
906 /* Estonia*/
907 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV",
908 /* Latvia*/
909 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
910 /* Lapland [Ask Rich for the data. HS]*/
911 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
912 /* Faeroe Islands*/
913 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR",
914 /* Iran*/
915 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU",
916 /* Russia*/
917 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE",
918 /* Ireland*/
919 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR",
920 /* Korea*/
921 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN",
922 /* People's Republic of China*/
923 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW",
924 /* Taiwan*/
925 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH",
926 /* Thailand*/
927
928 /* fallback is en_US*/
929 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER,
930 MAC_LC_MAGIC_NUMBER, "en_US"
931 };
932
933 #endif
934
935 #if U_POSIX_LOCALE
936 /* Return just the POSIX id, whatever happens to be in it */
937 static const char *uprv_getPOSIXID(void)
938 {
939 static const char* posixID = NULL;
940 if (posixID == 0) {
941 /*
942 * On Solaris two different calls to setlocale can result in
943 * different values. Only get this value once.
944 *
945 * We must check this first because an application can set this.
946 *
947 * LC_ALL can't be used because it's platform dependent. The LANG
948 * environment variable seems to affect LC_CTYPE variable by default.
949 * Here is what setlocale(LC_ALL, NULL) can return.
950 * HPUX can return 'C C C C C C C'
951 * Solaris can return /en_US/C/C/C/C/C on the second try.
952 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
953 *
954 * The default codepage detection also needs to use LC_CTYPE.
955 *
956 * Do not call setlocale(LC_*, "")! Using an empty string instead
957 * of NULL, will modify the libc behavior.
958 */
959 posixID = setlocale(LC_CTYPE, NULL);
960 if ((posixID == 0)
961 || (uprv_strcmp("C", posixID) == 0)
962 || (uprv_strcmp("POSIX", posixID) == 0))
963 {
964 /* Maybe we got some garbage. Try something more reasonable */
965 posixID = getenv("LC_ALL");
966 if (posixID == 0) {
967 posixID = getenv("LC_CTYPE");
968 if (posixID == 0) {
969 posixID = getenv("LANG");
970 }
971 }
972 }
973
974 if ((posixID==0)
975 || (uprv_strcmp("C", posixID) == 0)
976 || (uprv_strcmp("POSIX", posixID) == 0))
977 {
978 /* Nothing worked. Give it a nice POSIX default value. */
979 posixID = "en_US_POSIX";
980 }
981 }
982
983 return posixID;
984 }
985 #endif
986
987 /* NOTE: The caller should handle thread safety */
988 U_CAPI const char* U_EXPORT2
989 uprv_getDefaultLocaleID()
990 {
991 #if U_POSIX_LOCALE
992 /*
993 Note that: (a '!' means the ID is improper somehow)
994 LC_ALL ----> default_loc codepage
995 --------------------------------------------------------
996 ab.CD ab CD
997 ab@CD ab__CD -
998 ab@CD.EF ab__CD EF
999
1000 ab_CD.EF@GH ab_CD_GH EF
1001
1002 Some 'improper' ways to do the same as above:
1003 ! ab_CD@GH.EF ab_CD_GH EF
1004 ! ab_CD.EF@GH.IJ ab_CD_GH EF
1005 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
1006
1007 _CD@GH _CD_GH -
1008 _CD.EF@GH _CD_GH EF
1009
1010 The variant cannot have dots in it.
1011 The 'rightmost' variant (@xxx) wins.
1012 The leftmost codepage (.xxx) wins.
1013 */
1014 char *correctedPOSIXLocale = 0;
1015 const char* posixID = uprv_getPOSIXID();
1016 const char *p;
1017 const char *q;
1018 int32_t len;
1019
1020 /* Format: (no spaces)
1021 ll [ _CC ] [ . MM ] [ @ VV]
1022
1023 l = lang, C = ctry, M = charmap, V = variant
1024 */
1025
1026 if (gCorrectedPOSIXLocale != NULL) {
1027 return gCorrectedPOSIXLocale;
1028 }
1029
1030 if ((p = uprv_strchr(posixID, '.')) != NULL) {
1031 /* assume new locale can't be larger than old one? */
1032 correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1033 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1034 correctedPOSIXLocale[p-posixID] = 0;
1035
1036 /* do not copy after the @ */
1037 if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
1038 correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
1039 }
1040 }
1041
1042 /* Note that we scan the *uncorrected* ID. */
1043 if ((p = uprv_strrchr(posixID, '@')) != NULL) {
1044 if (correctedPOSIXLocale == NULL) {
1045 correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1046 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1047 correctedPOSIXLocale[p-posixID] = 0;
1048 }
1049 p++;
1050
1051 /* Take care of any special cases here.. */
1052 if (!uprv_strcmp(p, "nynorsk")) {
1053 p = "NY";
1054 /* Don't worry about no__NY. In practice, it won't appear. */
1055 }
1056
1057 if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
1058 uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
1059 }
1060 else {
1061 uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1062 }
1063
1064 if ((q = uprv_strchr(p, '.')) != NULL) {
1065 /* How big will the resulting string be? */
1066 len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1067 uprv_strncat(correctedPOSIXLocale, p, q-p);
1068 correctedPOSIXLocale[len] = 0;
1069 }
1070 else {
1071 /* Anything following the @ sign */
1072 uprv_strcat(correctedPOSIXLocale, p);
1073 }
1074
1075 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1076 * How about 'russian' -> 'ru'?
1077 * Many of the other locales using ISO codes will be handled by the
1078 * canonicalization functions in uloc_getDefault.
1079 */
1080 }
1081
1082 /* Was a correction made? */
1083 if (correctedPOSIXLocale != NULL) {
1084 posixID = correctedPOSIXLocale;
1085 }
1086 else {
1087 /* copy it, just in case the original pointer goes away. See j2395 */
1088 correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
1089 posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
1090 }
1091
1092 if (gCorrectedPOSIXLocale == NULL) {
1093 gCorrectedPOSIXLocale = correctedPOSIXLocale;
1094 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1095 correctedPOSIXLocale = NULL;
1096 }
1097
1098 if (correctedPOSIXLocale != NULL) { /* Was already set - clean up. */
1099 uprv_free(correctedPOSIXLocale);
1100 }
1101
1102 return posixID;
1103
1104 #elif defined(U_WINDOWS)
1105 UErrorCode status = U_ZERO_ERROR;
1106 LCID id = GetThreadLocale();
1107 const char* locID = uprv_convertToPosix(id, &status);
1108
1109 if (U_FAILURE(status)) {
1110 locID = "en_US";
1111 }
1112 return locID;
1113
1114 #elif defined(XP_MAC)
1115 int32_t script = MAC_LC_INIT_NUMBER;
1116 /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
1117 int32_t region = MAC_LC_INIT_NUMBER;
1118 /* = GetScriptManagerVariable(smRegionCode);*/
1119 int32_t lang = MAC_LC_INIT_NUMBER;
1120 /* = GetScriptManagerVariable(smScriptLang);*/
1121 int32_t date_region = MAC_LC_INIT_NUMBER;
1122 const char* posixID = 0;
1123 int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec);
1124 int32_t i;
1125 Intl1Hndl ih;
1126
1127 ih = (Intl1Hndl) GetIntlResource(1);
1128 if (ih)
1129 date_region = ((uint16_t)(*ih)->intl1Vers) >> 8;
1130
1131 for (i = 0; i < count; i++) {
1132 if ( ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER)
1133 || (mac_lc_recs[i].script == script))
1134 && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER)
1135 || (mac_lc_recs[i].region == region))
1136 && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER)
1137 || (mac_lc_recs[i].lang == lang))
1138 && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER)
1139 || (mac_lc_recs[i].date_region == date_region))
1140 )
1141 {
1142 posixID = mac_lc_recs[i].posixID;
1143 break;
1144 }
1145 }
1146
1147 return posixID;
1148
1149 #elif defined(OS400)
1150 /* locales are process scoped and are by definition thread safe */
1151 static char correctedLocale[64];
1152 const char *localeID = getenv("LC_ALL");
1153 char *p;
1154
1155 if (localeID == NULL)
1156 localeID = getenv("LANG");
1157 if (localeID == NULL)
1158 localeID = setlocale(LC_ALL, NULL);
1159 /* Make sure we have something... */
1160 if (localeID == NULL)
1161 return "en_US_POSIX";
1162
1163 /* Extract the locale name from the path. */
1164 if((p = uprv_strrchr(localeID, '/')) != NULL)
1165 {
1166 /* Increment p to start of locale name. */
1167 p++;
1168 localeID = p;
1169 }
1170
1171 /* Copy to work location. */
1172 uprv_strcpy(correctedLocale, localeID);
1173
1174 /* Strip off the '.locale' extension. */
1175 if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1176 *p = 0;
1177 }
1178
1179 /* Upper case the locale name. */
1180 T_CString_toUpperCase(correctedLocale);
1181
1182 /* See if we are using the POSIX locale. Any of the
1183 * following are equivalent and use the same QLGPGCMA
1184 * (POSIX) locale.
1185 * QLGPGCMA2 means UCS2
1186 * QLGPGCMA_4 means UTF-32
1187 * QLGPGCMA_8 means UTF-8
1188 */
1189 if ((uprv_strcmp("C", correctedLocale) == 0) ||
1190 (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1191 (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1192 {
1193 uprv_strcpy(correctedLocale, "en_US_POSIX");
1194 }
1195 else
1196 {
1197 int16_t LocaleLen;
1198
1199 /* Lower case the lang portion. */
1200 for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1201 {
1202 *p = uprv_tolower(*p);
1203 }
1204
1205 /* Adjust for Euro. After '_E' add 'URO'. */
1206 LocaleLen = uprv_strlen(correctedLocale);
1207 if (correctedLocale[LocaleLen - 2] == '_' &&
1208 correctedLocale[LocaleLen - 1] == 'E')
1209 {
1210 uprv_strcat(correctedLocale, "URO");
1211 }
1212
1213 /* If using Lotus-based locale then convert to
1214 * equivalent non Lotus.
1215 */
1216 else if (correctedLocale[LocaleLen - 2] == '_' &&
1217 correctedLocale[LocaleLen - 1] == 'L')
1218 {
1219 correctedLocale[LocaleLen - 2] = 0;
1220 }
1221
1222 /* There are separate simplified and traditional
1223 * locales called zh_HK_S and zh_HK_T.
1224 */
1225 else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1226 {
1227 uprv_strcpy(correctedLocale, "zh_HK");
1228 }
1229
1230 /* A special zh_CN_GBK locale...
1231 */
1232 else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1233 {
1234 uprv_strcpy(correctedLocale, "zh_CN");
1235 }
1236
1237 }
1238
1239 return correctedLocale;
1240 #endif
1241
1242 }
1243
1244 #if U_POSIX_LOCALE
1245 /*
1246 Due to various platform differences, one platform may specify a charset,
1247 when they really mean a different charset. Remap the names so that they are
1248 compatible with ICU.
1249 */
1250 static const char*
1251 remapPlatformDependentCodepage(const char *locale, const char *name) {
1252 if (locale != NULL && *locale == 0) {
1253 /* Make sure that an empty locale is handled the same way. */
1254 locale = NULL;
1255 }
1256 if (name == NULL) {
1257 return NULL;
1258 }
1259 #if defined(U_AIX)
1260 if (uprv_strcmp(name, "IBM-943") == 0) {
1261 /* Use the ASCII compatible ibm-943 */
1262 name = "Shift-JIS";
1263 }
1264 else if (uprv_strcmp(name, "IBM-1252") == 0) {
1265 /* Use the windows-1252 that contains the Euro */
1266 name = "IBM-5348";
1267 }
1268 #elif defined(U_SOLARIS)
1269 if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1270 /* Solaris underspecifies the "EUC" name. */
1271 if (uprv_strcmp(locale, "zh_CN") == 0) {
1272 name = "EUC-CN";
1273 }
1274 else if (uprv_strcmp(locale, "zh_TW") == 0) {
1275 name = "EUC-TW";
1276 }
1277 else if (uprv_strcmp(locale, "ko_KR") == 0) {
1278 name = "EUC-KR";
1279 }
1280 }
1281 #elif defined(U_DARWIN)
1282 if (locale == NULL && *name == 0) {
1283 /*
1284 No locale was specified, and an empty name was passed in.
1285 This usually indicates that nl_langinfo didn't return valid information.
1286 Mac OS X uses UTF-8 by default (especially the locale data and console).
1287 */
1288 name = "UTF-8";
1289 }
1290 #endif
1291 /* return NULL when "" is passed in */
1292 if (*name == 0) {
1293 name = NULL;
1294 }
1295 return name;
1296 }
1297
1298 static const char*
1299 getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
1300 {
1301 char localeBuf[100];
1302 const char *name = NULL;
1303 char *variant = NULL;
1304
1305 if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
1306 size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
1307 uprv_strncpy(localeBuf, localeName, localeCapacity);
1308 localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
1309 name = uprv_strncpy(buffer, name+1, buffCapacity);
1310 buffer[buffCapacity-1] = 0; /* ensure NULL termination */
1311 if ((variant = (uprv_strchr(name, '@'))) != NULL) {
1312 *variant = 0;
1313 }
1314 name = remapPlatformDependentCodepage(localeBuf, name);
1315 }
1316 return name;
1317 }
1318 #endif
1319
1320 static const char*
1321 int_getDefaultCodepage()
1322 {
1323 #if defined(OS400)
1324 uint32_t ccsid = 37; /* Default to ibm-37 */
1325 static char codepage[64];
1326 Qwc_JOBI0400_t jobinfo;
1327 Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
1328
1329 EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
1330 "* ", " ", &error);
1331
1332 if (error.Bytes_Available == 0) {
1333 if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
1334 ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
1335 }
1336 else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
1337 ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
1338 }
1339 /* else use the default */
1340 }
1341 sprintf(codepage,"ibm-%d", ccsid);
1342 return codepage;
1343
1344 #elif defined(OS390)
1345 static char codepage[64];
1346 sprintf(codepage,"%s" UCNV_SWAP_LFNL_OPTION_STRING, nl_langinfo(CODESET));
1347 return codepage;
1348
1349 #elif defined(XP_MAC)
1350 return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
1351
1352 #elif defined(U_WINDOWS)
1353 static char codepage[64];
1354 sprintf(codepage, "windows-%d", GetACP());
1355 return codepage;
1356
1357 #elif U_POSIX_LOCALE
1358 static char codesetName[100];
1359 const char *localeName = NULL;
1360 const char *name = NULL;
1361
1362 uprv_memset(codesetName, 0, sizeof(codesetName));
1363
1364 /* Use setlocale in a nice way, and then check some environment variables.
1365 Maybe the application used setlocale already.
1366 */
1367 localeName = uprv_getPOSIXID();
1368 name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
1369 if (name) {
1370 /* if we can find the codeset name from setlocale, return that. */
1371 return name;
1372 }
1373 /* else "C" was probably returned. That's underspecified. */
1374
1375 #if U_HAVE_NL_LANGINFO_CODESET
1376 if (*codesetName) {
1377 uprv_memset(codesetName, 0, sizeof(codesetName));
1378 }
1379 /* When available, check nl_langinfo because it usually gives more
1380 useful names. It depends on LC_CTYPE and not LANG or LC_ALL.
1381 nl_langinfo may use the same buffer as setlocale. */
1382 {
1383 const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
1384 codeset = remapPlatformDependentCodepage(NULL, codeset);
1385 if (codeset != NULL) {
1386 uprv_strncpy(codesetName, codeset, sizeof(codesetName));
1387 codesetName[sizeof(codesetName)-1] = 0;
1388 return codesetName;
1389 }
1390 }
1391 #endif
1392
1393 if (*codesetName == 0)
1394 {
1395 /* Everything failed. Return US ASCII (ISO 646). */
1396 uprv_strcpy(codesetName, "US-ASCII");
1397 }
1398 return codesetName;
1399 #else
1400 return "US-ASCII";
1401 #endif
1402 }
1403
1404
1405 U_CAPI const char* U_EXPORT2
1406 uprv_getDefaultCodepage()
1407 {
1408 static char const *name = NULL;
1409 umtx_lock(NULL);
1410 if (name == NULL) {
1411 name = int_getDefaultCodepage();
1412 }
1413 umtx_unlock(NULL);
1414 return name;
1415 }
1416
1417
1418 /* end of platform-specific implementation -------------- */
1419
1420 /* version handling --------------------------------------------------------- */
1421
1422 U_CAPI void U_EXPORT2
1423 u_versionFromString(UVersionInfo versionArray, const char *versionString) {
1424 char *end;
1425 uint16_t part=0;
1426
1427 if(versionArray==NULL) {
1428 return;
1429 }
1430
1431 if(versionString!=NULL) {
1432 for(;;) {
1433 versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
1434 if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
1435 break;
1436 }
1437 versionString=end+1;
1438 }
1439 }
1440
1441 while(part<U_MAX_VERSION_LENGTH) {
1442 versionArray[part++]=0;
1443 }
1444 }
1445
1446 U_CAPI void U_EXPORT2
1447 u_versionToString(UVersionInfo versionArray, char *versionString) {
1448 uint16_t count, part;
1449 uint8_t field;
1450
1451 if(versionString==NULL) {
1452 return;
1453 }
1454
1455 if(versionArray==NULL) {
1456 versionString[0]=0;
1457 return;
1458 }
1459
1460 /* count how many fields need to be written */
1461 for(count=4; count>0 && versionArray[count-1]==0; --count) {
1462 }
1463
1464 if(count <= 1) {
1465 count = 2;
1466 }
1467
1468 /* write the first part */
1469 /* write the decimal field value */
1470 field=versionArray[0];
1471 if(field>=100) {
1472 *versionString++=(char)('0'+field/100);
1473 field%=100;
1474 }
1475 if(field>=10) {
1476 *versionString++=(char)('0'+field/10);
1477 field%=10;
1478 }
1479 *versionString++=(char)('0'+field);
1480
1481 /* write the following parts */
1482 for(part=1; part<count; ++part) {
1483 /* write a dot first */
1484 *versionString++=U_VERSION_DELIMITER;
1485
1486 /* write the decimal field value */
1487 field=versionArray[part];
1488 if(field>=100) {
1489 *versionString++=(char)('0'+field/100);
1490 field%=100;
1491 }
1492 if(field>=10) {
1493 *versionString++=(char)('0'+field/10);
1494 field%=10;
1495 }
1496 *versionString++=(char)('0'+field);
1497 }
1498
1499 /* NUL-terminate */
1500 *versionString=0;
1501 }
1502
1503 U_CAPI void U_EXPORT2
1504 u_getVersion(UVersionInfo versionArray) {
1505 u_versionFromString(versionArray, U_ICU_VERSION);
1506 }
1507
1508 /*
1509 * Hey, Emacs, please set the following:
1510 *
1511 * Local Variables:
1512 * indent-tabs-mode: nil
1513 * End:
1514 *
1515 */