]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/locid.cpp
eecebf47b1e4bd165d16538ed47d452fca4e4919
[apple/icu.git] / icuSources / common / locid.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 1997-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 *
9 * File locid.cpp
10 *
11 * Created by: Richard Gillam
12 *
13 * Modification History:
14 *
15 * Date Name Description
16 * 02/11/97 aliu Changed gLocPath to fgDataDirectory and added
17 * methods to get and set it.
18 * 04/02/97 aliu Made operator!= inline; fixed return value
19 * of getName().
20 * 04/15/97 aliu Cleanup for AIX/Win32.
21 * 04/24/97 aliu Numerous changes per code review.
22 * 08/18/98 stephen Changed getDisplayName()
23 * Added SIMPLIFIED_CHINESE, TRADITIONAL_CHINESE
24 * Added getISOCountries(), getISOLanguages(),
25 * getLanguagesForCountry()
26 * 03/16/99 bertrand rehaul.
27 * 07/21/99 stephen Added U_CFUNC setDefault
28 * 11/09/99 weiv Added const char * getName() const;
29 * 04/12/00 srl removing unicodestring api's and cached hash code
30 * 08/10/01 grhoten Change the static Locales to accessor functions
31 ******************************************************************************
32 */
33
34 #include <utility>
35
36 #include "unicode/bytestream.h"
37 #include "unicode/locid.h"
38 #include "unicode/strenum.h"
39 #include "unicode/stringpiece.h"
40 #include "unicode/uloc.h"
41
42 #include "bytesinkutil.h"
43 #include "charstr.h"
44 #include "cmemory.h"
45 #include "cstring.h"
46 #include "mutex.h"
47 #include "putilimp.h"
48 #include "uassert.h"
49 #include "ucln_cmn.h"
50 #include "uhash.h"
51 #include "ulocimp.h"
52 #include "umutex.h"
53 #include "ustr_imp.h"
54
55 U_CDECL_BEGIN
56 static UBool U_CALLCONV locale_cleanup(void);
57 U_CDECL_END
58
59 U_NAMESPACE_BEGIN
60
61 static Locale *gLocaleCache = NULL;
62 static UInitOnce gLocaleCacheInitOnce = U_INITONCE_INITIALIZER;
63
64 // gDefaultLocaleMutex protects all access to gDefaultLocalesHashT and gDefaultLocale.
65 static UMutex *gDefaultLocaleMutex() {
66 static UMutex *m = STATIC_NEW(UMutex);
67 return m;
68 }
69 static UHashtable *gDefaultLocalesHashT = NULL;
70 static Locale *gDefaultLocale = NULL;
71
72 /**
73 * \def ULOC_STRING_LIMIT
74 * strings beyond this value crash in CharString
75 */
76 #define ULOC_STRING_LIMIT 357913941
77
78 U_NAMESPACE_END
79
80 typedef enum ELocalePos {
81 eENGLISH,
82 eFRENCH,
83 eGERMAN,
84 eITALIAN,
85 eJAPANESE,
86 eKOREAN,
87 eCHINESE,
88
89 eFRANCE,
90 eGERMANY,
91 eITALY,
92 eJAPAN,
93 eKOREA,
94 eCHINA, /* Alias for PRC */
95 eTAIWAN,
96 eUK,
97 eUS,
98 eCANADA,
99 eCANADA_FRENCH,
100 eROOT,
101
102
103 //eDEFAULT,
104 eMAX_LOCALES
105 } ELocalePos;
106
107 U_CFUNC int32_t locale_getKeywords(const char *localeID,
108 char prev,
109 char *keywords, int32_t keywordCapacity,
110 char *values, int32_t valuesCapacity, int32_t *valLen,
111 UBool valuesToo,
112 UErrorCode *status);
113
114 U_CDECL_BEGIN
115 //
116 // Deleter function for Locales owned by the default Locale hash table/
117 //
118 static void U_CALLCONV
119 deleteLocale(void *obj) {
120 delete (icu::Locale *) obj;
121 }
122
123 static UBool U_CALLCONV locale_cleanup(void)
124 {
125 U_NAMESPACE_USE
126
127 delete [] gLocaleCache;
128 gLocaleCache = NULL;
129 gLocaleCacheInitOnce.reset();
130
131 if (gDefaultLocalesHashT) {
132 uhash_close(gDefaultLocalesHashT); // Automatically deletes all elements, using deleter func.
133 gDefaultLocalesHashT = NULL;
134 }
135 gDefaultLocale = NULL;
136 return TRUE;
137 }
138
139
140 static void U_CALLCONV locale_init(UErrorCode &status) {
141 U_NAMESPACE_USE
142
143 U_ASSERT(gLocaleCache == NULL);
144 gLocaleCache = new Locale[(int)eMAX_LOCALES];
145 if (gLocaleCache == NULL) {
146 status = U_MEMORY_ALLOCATION_ERROR;
147 return;
148 }
149 ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup);
150 gLocaleCache[eROOT] = Locale("");
151 gLocaleCache[eENGLISH] = Locale("en");
152 gLocaleCache[eFRENCH] = Locale("fr");
153 gLocaleCache[eGERMAN] = Locale("de");
154 gLocaleCache[eITALIAN] = Locale("it");
155 gLocaleCache[eJAPANESE] = Locale("ja");
156 gLocaleCache[eKOREAN] = Locale("ko");
157 gLocaleCache[eCHINESE] = Locale("zh");
158 gLocaleCache[eFRANCE] = Locale("fr", "FR");
159 gLocaleCache[eGERMANY] = Locale("de", "DE");
160 gLocaleCache[eITALY] = Locale("it", "IT");
161 gLocaleCache[eJAPAN] = Locale("ja", "JP");
162 gLocaleCache[eKOREA] = Locale("ko", "KR");
163 gLocaleCache[eCHINA] = Locale("zh", "CN");
164 gLocaleCache[eTAIWAN] = Locale("zh", "TW");
165 gLocaleCache[eUK] = Locale("en", "GB");
166 gLocaleCache[eUS] = Locale("en", "US");
167 gLocaleCache[eCANADA] = Locale("en", "CA");
168 gLocaleCache[eCANADA_FRENCH] = Locale("fr", "CA");
169 }
170
171 U_CDECL_END
172
173 U_NAMESPACE_BEGIN
174
175 Locale *locale_set_default_internal(const char *id, UErrorCode& status) {
176 // Synchronize this entire function.
177 Mutex lock(gDefaultLocaleMutex());
178
179 UBool canonicalize = FALSE;
180
181 // If given a NULL string for the locale id, grab the default
182 // name from the system.
183 // (Different from most other locale APIs, where a null name means use
184 // the current ICU default locale.)
185 if (id == NULL) {
186 id = uprv_getDefaultLocaleID(); // This function not thread safe? TODO: verify.
187 canonicalize = TRUE; // always canonicalize host ID
188 }
189
190 char localeNameBuf[512];
191
192 if (canonicalize) {
193 uloc_canonicalize(id, localeNameBuf, sizeof(localeNameBuf)-1, &status);
194 } else {
195 uloc_getName(id, localeNameBuf, sizeof(localeNameBuf)-1, &status);
196 }
197 localeNameBuf[sizeof(localeNameBuf)-1] = 0; // Force null termination in event of
198 // a long name filling the buffer.
199 // (long names are truncated.)
200 //
201 if (U_FAILURE(status)) {
202 return gDefaultLocale;
203 }
204
205 if (gDefaultLocalesHashT == NULL) {
206 gDefaultLocalesHashT = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
207 if (U_FAILURE(status)) {
208 return gDefaultLocale;
209 }
210 uhash_setValueDeleter(gDefaultLocalesHashT, deleteLocale);
211 ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup);
212 }
213
214 Locale *newDefault = (Locale *)uhash_get(gDefaultLocalesHashT, localeNameBuf);
215 if (newDefault == NULL) {
216 newDefault = new Locale(Locale::eBOGUS);
217 if (newDefault == NULL) {
218 status = U_MEMORY_ALLOCATION_ERROR;
219 return gDefaultLocale;
220 }
221 newDefault->init(localeNameBuf, FALSE);
222 uhash_put(gDefaultLocalesHashT, (char*) newDefault->getName(), newDefault, &status);
223 if (U_FAILURE(status)) {
224 return gDefaultLocale;
225 }
226 }
227 gDefaultLocale = newDefault;
228 return gDefaultLocale;
229 }
230
231 U_NAMESPACE_END
232
233 /* sfb 07/21/99 */
234 U_CFUNC void
235 locale_set_default(const char *id)
236 {
237 U_NAMESPACE_USE
238 UErrorCode status = U_ZERO_ERROR;
239 locale_set_default_internal(id, status);
240 }
241 /* end */
242
243 U_CFUNC const char *
244 locale_get_default(void)
245 {
246 U_NAMESPACE_USE
247 return Locale::getDefault().getName();
248 }
249
250
251 U_NAMESPACE_BEGIN
252
253 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale)
254
255 /*Character separating the posix id fields*/
256 // '_'
257 // In the platform codepage.
258 #define SEP_CHAR '_'
259
260 Locale::~Locale()
261 {
262 if (baseName != fullName) {
263 uprv_free(baseName);
264 }
265 baseName = NULL;
266 /*if fullName is on the heap, we free it*/
267 if (fullName != fullNameBuffer)
268 {
269 uprv_free(fullName);
270 fullName = NULL;
271 }
272 }
273
274 Locale::Locale()
275 : UObject(), fullName(fullNameBuffer), baseName(NULL)
276 {
277 init(NULL, FALSE);
278 }
279
280 /*
281 * Internal constructor to allow construction of a locale object with
282 * NO side effects. (Default constructor tries to get
283 * the default locale.)
284 */
285 Locale::Locale(Locale::ELocaleType)
286 : UObject(), fullName(fullNameBuffer), baseName(NULL)
287 {
288 setToBogus();
289 }
290
291
292 Locale::Locale( const char * newLanguage,
293 const char * newCountry,
294 const char * newVariant,
295 const char * newKeywords)
296 : UObject(), fullName(fullNameBuffer), baseName(NULL)
297 {
298 if( (newLanguage==NULL) && (newCountry == NULL) && (newVariant == NULL) )
299 {
300 init(NULL, FALSE); /* shortcut */
301 }
302 else
303 {
304 UErrorCode status = U_ZERO_ERROR;
305 int32_t size = 0;
306 int32_t lsize = 0;
307 int32_t csize = 0;
308 int32_t vsize = 0;
309 int32_t ksize = 0;
310
311 // Calculate the size of the resulting string.
312
313 // Language
314 if ( newLanguage != NULL )
315 {
316 lsize = (int32_t)uprv_strlen(newLanguage);
317 if ( lsize < 0 || lsize > ULOC_STRING_LIMIT ) { // int32 wrap
318 setToBogus();
319 return;
320 }
321 size = lsize;
322 }
323
324 CharString togo(newLanguage, lsize, status); // start with newLanguage
325
326 // _Country
327 if ( newCountry != NULL )
328 {
329 csize = (int32_t)uprv_strlen(newCountry);
330 if ( csize < 0 || csize > ULOC_STRING_LIMIT ) { // int32 wrap
331 setToBogus();
332 return;
333 }
334 size += csize;
335 }
336
337 // _Variant
338 if ( newVariant != NULL )
339 {
340 // remove leading _'s
341 while(newVariant[0] == SEP_CHAR)
342 {
343 newVariant++;
344 }
345
346 // remove trailing _'s
347 vsize = (int32_t)uprv_strlen(newVariant);
348 if ( vsize < 0 || vsize > ULOC_STRING_LIMIT ) { // int32 wrap
349 setToBogus();
350 return;
351 }
352 while( (vsize>1) && (newVariant[vsize-1] == SEP_CHAR) )
353 {
354 vsize--;
355 }
356 }
357
358 if( vsize > 0 )
359 {
360 size += vsize;
361 }
362
363 // Separator rules:
364 if ( vsize > 0 )
365 {
366 size += 2; // at least: __v
367 }
368 else if ( csize > 0 )
369 {
370 size += 1; // at least: _v
371 }
372
373 if ( newKeywords != NULL)
374 {
375 ksize = (int32_t)uprv_strlen(newKeywords);
376 if ( ksize < 0 || ksize > ULOC_STRING_LIMIT ) {
377 setToBogus();
378 return;
379 }
380 size += ksize + 1;
381 }
382
383 // NOW we have the full locale string..
384 // Now, copy it back.
385
386 // newLanguage is already copied
387
388 if ( ( vsize != 0 ) || (csize != 0) ) // at least: __v
389 { // ^
390 togo.append(SEP_CHAR, status);
391 }
392
393 if ( csize != 0 )
394 {
395 togo.append(newCountry, status);
396 }
397
398 if ( vsize != 0)
399 {
400 togo.append(SEP_CHAR, status)
401 .append(newVariant, vsize, status);
402 }
403
404 if ( ksize != 0)
405 {
406 if (uprv_strchr(newKeywords, '=')) {
407 togo.append('@', status); /* keyword parsing */
408 }
409 else {
410 togo.append('_', status); /* Variant parsing with a script */
411 if ( vsize == 0) {
412 togo.append('_', status); /* No country found */
413 }
414 }
415 togo.append(newKeywords, status);
416 }
417
418 if (U_FAILURE(status)) {
419 // Something went wrong with appending, etc.
420 setToBogus();
421 return;
422 }
423 // Parse it, because for example 'language' might really be a complete
424 // string.
425 init(togo.data(), FALSE);
426 }
427 }
428
429 Locale::Locale(const Locale &other)
430 : UObject(other), fullName(fullNameBuffer), baseName(NULL)
431 {
432 *this = other;
433 }
434
435 Locale::Locale(Locale&& other) U_NOEXCEPT
436 : UObject(other), fullName(fullNameBuffer), baseName(fullName) {
437 *this = std::move(other);
438 }
439
440 Locale& Locale::operator=(const Locale& other) {
441 if (this == &other) {
442 return *this;
443 }
444
445 setToBogus();
446
447 if (other.fullName == other.fullNameBuffer) {
448 uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
449 } else if (other.fullName == nullptr) {
450 fullName = nullptr;
451 } else {
452 fullName = uprv_strdup(other.fullName);
453 if (fullName == nullptr) return *this;
454 }
455
456 if (other.baseName == other.fullName) {
457 baseName = fullName;
458 } else if (other.baseName != nullptr) {
459 baseName = uprv_strdup(other.baseName);
460 if (baseName == nullptr) return *this;
461 }
462
463 uprv_strcpy(language, other.language);
464 uprv_strcpy(script, other.script);
465 uprv_strcpy(country, other.country);
466
467 variantBegin = other.variantBegin;
468 fIsBogus = other.fIsBogus;
469
470 return *this;
471 }
472
473 Locale& Locale::operator=(Locale&& other) U_NOEXCEPT {
474 if (baseName != fullName) uprv_free(baseName);
475 if (fullName != fullNameBuffer) uprv_free(fullName);
476
477 if (other.fullName == other.fullNameBuffer) {
478 uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
479 fullName = fullNameBuffer;
480 } else {
481 fullName = other.fullName;
482 }
483
484 if (other.baseName == other.fullName) {
485 baseName = fullName;
486 } else {
487 baseName = other.baseName;
488 }
489
490 uprv_strcpy(language, other.language);
491 uprv_strcpy(script, other.script);
492 uprv_strcpy(country, other.country);
493
494 variantBegin = other.variantBegin;
495 fIsBogus = other.fIsBogus;
496
497 other.baseName = other.fullName = other.fullNameBuffer;
498
499 return *this;
500 }
501
502 Locale *
503 Locale::clone() const {
504 return new Locale(*this);
505 }
506
507 UBool
508 Locale::operator==( const Locale& other) const
509 {
510 return (uprv_strcmp(other.fullName, fullName) == 0);
511 }
512
513 #define ISASCIIALPHA(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
514
515 /*This function initializes a Locale from a C locale ID*/
516 Locale& Locale::init(const char* localeID, UBool canonicalize)
517 {
518 fIsBogus = FALSE;
519 /* Free our current storage */
520 if (baseName != fullName) {
521 uprv_free(baseName);
522 }
523 baseName = NULL;
524 if(fullName != fullNameBuffer) {
525 uprv_free(fullName);
526 fullName = fullNameBuffer;
527 }
528
529 // not a loop:
530 // just an easy way to have a common error-exit
531 // without goto and without another function
532 do {
533 char *separator;
534 char *field[5] = {0};
535 int32_t fieldLen[5] = {0};
536 int32_t fieldIdx;
537 int32_t variantField;
538 int32_t length;
539 UErrorCode err;
540
541 if(localeID == NULL) {
542 // not an error, just set the default locale
543 return *this = getDefault();
544 }
545
546 /* preset all fields to empty */
547 language[0] = script[0] = country[0] = 0;
548
549 // "canonicalize" the locale ID to ICU/Java format
550 err = U_ZERO_ERROR;
551 length = canonicalize ?
552 uloc_canonicalize(localeID, fullName, sizeof(fullNameBuffer), &err) :
553 uloc_getName(localeID, fullName, sizeof(fullNameBuffer), &err);
554
555 if(err == U_BUFFER_OVERFLOW_ERROR || length >= (int32_t)sizeof(fullNameBuffer)) {
556 /*Go to heap for the fullName if necessary*/
557 fullName = (char *)uprv_malloc(sizeof(char)*(length + 1));
558 if(fullName == 0) {
559 fullName = fullNameBuffer;
560 break; // error: out of memory
561 }
562 err = U_ZERO_ERROR;
563 length = canonicalize ?
564 uloc_canonicalize(localeID, fullName, length+1, &err) :
565 uloc_getName(localeID, fullName, length+1, &err);
566 }
567 if(U_FAILURE(err) || err == U_STRING_NOT_TERMINATED_WARNING) {
568 /* should never occur */
569 break;
570 }
571
572 variantBegin = length;
573
574 /* after uloc_getName/canonicalize() we know that only '_' are separators */
575 /* But _ could also appeared in timezone such as "en@timezone=America/Los_Angeles" */
576 separator = field[0] = fullName;
577 fieldIdx = 1;
578 char* at = uprv_strchr(fullName, '@');
579 while ((separator = uprv_strchr(field[fieldIdx-1], SEP_CHAR)) != 0 &&
580 fieldIdx < UPRV_LENGTHOF(field)-1 &&
581 (at == nullptr || separator < at)) {
582 field[fieldIdx] = separator + 1;
583 fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]);
584 fieldIdx++;
585 }
586 // variant may contain @foo or .foo POSIX cruft; remove it
587 separator = uprv_strchr(field[fieldIdx-1], '@');
588 char* sep2 = uprv_strchr(field[fieldIdx-1], '.');
589 if (separator!=NULL || sep2!=NULL) {
590 if (separator==NULL || (sep2!=NULL && separator > sep2)) {
591 separator = sep2;
592 }
593 fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]);
594 } else {
595 fieldLen[fieldIdx-1] = length - (int32_t)(field[fieldIdx-1] - fullName);
596 }
597
598 if (fieldLen[0] >= (int32_t)(sizeof(language)))
599 {
600 break; // error: the language field is too long
601 }
602
603 variantField = 1; /* Usually the 2nd one, except when a script or country is also used. */
604 if (fieldLen[0] > 0) {
605 /* We have a language */
606 uprv_memcpy(language, fullName, fieldLen[0]);
607 language[fieldLen[0]] = 0;
608 }
609 if (fieldLen[1] == 4 && ISASCIIALPHA(field[1][0]) &&
610 ISASCIIALPHA(field[1][1]) && ISASCIIALPHA(field[1][2]) &&
611 ISASCIIALPHA(field[1][3])) {
612 /* We have at least a script */
613 uprv_memcpy(script, field[1], fieldLen[1]);
614 script[fieldLen[1]] = 0;
615 variantField++;
616 }
617
618 if (fieldLen[variantField] == 2 || fieldLen[variantField] == 3) {
619 /* We have a country */
620 uprv_memcpy(country, field[variantField], fieldLen[variantField]);
621 country[fieldLen[variantField]] = 0;
622 variantField++;
623 } else if (fieldLen[variantField] == 0) {
624 variantField++; /* script or country empty but variant in next field (i.e. en__POSIX) */
625 }
626
627 if (fieldLen[variantField] > 0) {
628 /* We have a variant */
629 variantBegin = (int32_t)(field[variantField] - fullName);
630 }
631
632 err = U_ZERO_ERROR;
633 initBaseName(err);
634 if (U_FAILURE(err)) {
635 break;
636 }
637
638 // successful end of init()
639 return *this;
640 } while(0); /*loop doesn't iterate*/
641
642 // when an error occurs, then set this object to "bogus" (there is no UErrorCode here)
643 setToBogus();
644
645 return *this;
646 }
647
648 /*
649 * Set up the base name.
650 * If there are no key words, it's exactly the full name.
651 * If key words exist, it's the full name truncated at the '@' character.
652 * Need to set up both at init() and after setting a keyword.
653 */
654 void
655 Locale::initBaseName(UErrorCode &status) {
656 if (U_FAILURE(status)) {
657 return;
658 }
659 U_ASSERT(baseName==NULL || baseName==fullName);
660 const char *atPtr = uprv_strchr(fullName, '@');
661 const char *eqPtr = uprv_strchr(fullName, '=');
662 if (atPtr && eqPtr && atPtr < eqPtr) {
663 // Key words exist.
664 int32_t baseNameLength = (int32_t)(atPtr - fullName);
665 baseName = (char *)uprv_malloc(baseNameLength + 1);
666 if (baseName == NULL) {
667 status = U_MEMORY_ALLOCATION_ERROR;
668 return;
669 }
670 uprv_strncpy(baseName, fullName, baseNameLength);
671 baseName[baseNameLength] = 0;
672
673 // The original computation of variantBegin leaves it equal to the length
674 // of fullName if there is no variant. It should instead be
675 // the length of the baseName.
676 if (variantBegin > baseNameLength) {
677 variantBegin = baseNameLength;
678 }
679 } else {
680 baseName = fullName;
681 }
682 }
683
684
685 int32_t
686 Locale::hashCode() const
687 {
688 return ustr_hashCharsN(fullName, static_cast<int32_t>(uprv_strlen(fullName)));
689 }
690
691 void
692 Locale::setToBogus() {
693 /* Free our current storage */
694 if(baseName != fullName) {
695 uprv_free(baseName);
696 }
697 baseName = NULL;
698 if(fullName != fullNameBuffer) {
699 uprv_free(fullName);
700 fullName = fullNameBuffer;
701 }
702 *fullNameBuffer = 0;
703 *language = 0;
704 *script = 0;
705 *country = 0;
706 fIsBogus = TRUE;
707 variantBegin = 0;
708 }
709
710 const Locale& U_EXPORT2
711 Locale::getDefault()
712 {
713 {
714 Mutex lock(gDefaultLocaleMutex());
715 if (gDefaultLocale != NULL) {
716 return *gDefaultLocale;
717 }
718 }
719 UErrorCode status = U_ZERO_ERROR;
720 return *locale_set_default_internal(NULL, status);
721 }
722
723
724
725 void U_EXPORT2
726 Locale::setDefault( const Locale& newLocale,
727 UErrorCode& status)
728 {
729 if (U_FAILURE(status)) {
730 return;
731 }
732
733 /* Set the default from the full name string of the supplied locale.
734 * This is a convenient way to access the default locale caching mechanisms.
735 */
736 const char *localeID = newLocale.getName();
737 locale_set_default_internal(localeID, status);
738 }
739
740 void
741 Locale::addLikelySubtags(UErrorCode& status) {
742 if (U_FAILURE(status)) {
743 return;
744 }
745
746 CharString maximizedLocaleID;
747 {
748 CharStringByteSink sink(&maximizedLocaleID);
749 ulocimp_addLikelySubtags(fullName, sink, &status);
750 }
751
752 if (U_FAILURE(status)) {
753 return;
754 }
755
756 init(maximizedLocaleID.data(), /*canonicalize=*/FALSE);
757 if (isBogus()) {
758 status = U_ILLEGAL_ARGUMENT_ERROR;
759 }
760 }
761
762 void
763 Locale::minimizeSubtags(UErrorCode& status) {
764 if (U_FAILURE(status)) {
765 return;
766 }
767
768 CharString minimizedLocaleID;
769 {
770 CharStringByteSink sink(&minimizedLocaleID);
771 ulocimp_minimizeSubtags(fullName, sink, &status);
772 }
773
774 if (U_FAILURE(status)) {
775 return;
776 }
777
778 init(minimizedLocaleID.data(), /*canonicalize=*/FALSE);
779 if (isBogus()) {
780 status = U_ILLEGAL_ARGUMENT_ERROR;
781 }
782 }
783
784 Locale U_EXPORT2
785 Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
786 {
787 Locale result(Locale::eBOGUS);
788
789 if (U_FAILURE(status)) {
790 return result;
791 }
792
793 // If a BCP-47 language tag is passed as the language parameter to the
794 // normal Locale constructor, it will actually fall back to invoking
795 // uloc_forLanguageTag() to parse it if it somehow is able to detect that
796 // the string actually is BCP-47. This works well for things like strings
797 // using BCP-47 extensions, but it does not at all work for things like
798 // BCP-47 grandfathered tags (eg. "en-GB-oed") which are possible to also
799 // interpret as ICU locale IDs and because of that won't trigger the BCP-47
800 // parsing. Therefore the code here explicitly calls uloc_forLanguageTag()
801 // and then Locale::init(), instead of just calling the normal constructor.
802
803 CharString localeID;
804 int32_t parsedLength;
805 {
806 CharStringByteSink sink(&localeID);
807 ulocimp_forLanguageTag(
808 tag.data(),
809 tag.length(),
810 sink,
811 &parsedLength,
812 &status);
813 }
814
815 if (U_FAILURE(status)) {
816 return result;
817 }
818
819 if (parsedLength != tag.size()) {
820 status = U_ILLEGAL_ARGUMENT_ERROR;
821 return result;
822 }
823
824 result.init(localeID.data(), /*canonicalize=*/FALSE);
825 if (result.isBogus()) {
826 status = U_ILLEGAL_ARGUMENT_ERROR;
827 }
828 return result;
829 }
830
831 void
832 Locale::toLanguageTag(ByteSink& sink, UErrorCode& status) const
833 {
834 if (U_FAILURE(status)) {
835 return;
836 }
837
838 if (fIsBogus) {
839 status = U_ILLEGAL_ARGUMENT_ERROR;
840 return;
841 }
842
843 ulocimp_toLanguageTag(fullName, sink, /*strict=*/FALSE, &status);
844 }
845
846 Locale U_EXPORT2
847 Locale::createFromName (const char *name)
848 {
849 if (name) {
850 Locale l("");
851 l.init(name, FALSE);
852 return l;
853 }
854 else {
855 return getDefault();
856 }
857 }
858
859 Locale U_EXPORT2
860 Locale::createCanonical(const char* name) {
861 Locale loc("");
862 loc.init(name, TRUE);
863 return loc;
864 }
865
866 const char *
867 Locale::getISO3Language() const
868 {
869 return uloc_getISO3Language(fullName);
870 }
871
872
873 const char *
874 Locale::getISO3Country() const
875 {
876 return uloc_getISO3Country(fullName);
877 }
878
879 /**
880 * Return the LCID value as specified in the "LocaleID" resource for this
881 * locale. The LocaleID must be expressed as a hexadecimal number, from
882 * one to four digits. If the LocaleID resource is not present, or is
883 * in an incorrect format, 0 is returned. The LocaleID is for use in
884 * Windows (it is an LCID), but is available on all platforms.
885 */
886 uint32_t
887 Locale::getLCID() const
888 {
889 return uloc_getLCID(fullName);
890 }
891
892 const char* const* U_EXPORT2 Locale::getISOCountries()
893 {
894 return uloc_getISOCountries();
895 }
896
897 const char* const* U_EXPORT2 Locale::getISOLanguages()
898 {
899 return uloc_getISOLanguages();
900 }
901
902 // Set the locale's data based on a posix id.
903 void Locale::setFromPOSIXID(const char *posixID)
904 {
905 init(posixID, TRUE);
906 }
907
908 const Locale & U_EXPORT2
909 Locale::getRoot(void)
910 {
911 return getLocale(eROOT);
912 }
913
914 const Locale & U_EXPORT2
915 Locale::getEnglish(void)
916 {
917 return getLocale(eENGLISH);
918 }
919
920 const Locale & U_EXPORT2
921 Locale::getFrench(void)
922 {
923 return getLocale(eFRENCH);
924 }
925
926 const Locale & U_EXPORT2
927 Locale::getGerman(void)
928 {
929 return getLocale(eGERMAN);
930 }
931
932 const Locale & U_EXPORT2
933 Locale::getItalian(void)
934 {
935 return getLocale(eITALIAN);
936 }
937
938 const Locale & U_EXPORT2
939 Locale::getJapanese(void)
940 {
941 return getLocale(eJAPANESE);
942 }
943
944 const Locale & U_EXPORT2
945 Locale::getKorean(void)
946 {
947 return getLocale(eKOREAN);
948 }
949
950 const Locale & U_EXPORT2
951 Locale::getChinese(void)
952 {
953 return getLocale(eCHINESE);
954 }
955
956 const Locale & U_EXPORT2
957 Locale::getSimplifiedChinese(void)
958 {
959 return getLocale(eCHINA);
960 }
961
962 const Locale & U_EXPORT2
963 Locale::getTraditionalChinese(void)
964 {
965 return getLocale(eTAIWAN);
966 }
967
968
969 const Locale & U_EXPORT2
970 Locale::getFrance(void)
971 {
972 return getLocale(eFRANCE);
973 }
974
975 const Locale & U_EXPORT2
976 Locale::getGermany(void)
977 {
978 return getLocale(eGERMANY);
979 }
980
981 const Locale & U_EXPORT2
982 Locale::getItaly(void)
983 {
984 return getLocale(eITALY);
985 }
986
987 const Locale & U_EXPORT2
988 Locale::getJapan(void)
989 {
990 return getLocale(eJAPAN);
991 }
992
993 const Locale & U_EXPORT2
994 Locale::getKorea(void)
995 {
996 return getLocale(eKOREA);
997 }
998
999 const Locale & U_EXPORT2
1000 Locale::getChina(void)
1001 {
1002 return getLocale(eCHINA);
1003 }
1004
1005 const Locale & U_EXPORT2
1006 Locale::getPRC(void)
1007 {
1008 return getLocale(eCHINA);
1009 }
1010
1011 const Locale & U_EXPORT2
1012 Locale::getTaiwan(void)
1013 {
1014 return getLocale(eTAIWAN);
1015 }
1016
1017 const Locale & U_EXPORT2
1018 Locale::getUK(void)
1019 {
1020 return getLocale(eUK);
1021 }
1022
1023 const Locale & U_EXPORT2
1024 Locale::getUS(void)
1025 {
1026 return getLocale(eUS);
1027 }
1028
1029 const Locale & U_EXPORT2
1030 Locale::getCanada(void)
1031 {
1032 return getLocale(eCANADA);
1033 }
1034
1035 const Locale & U_EXPORT2
1036 Locale::getCanadaFrench(void)
1037 {
1038 return getLocale(eCANADA_FRENCH);
1039 }
1040
1041 const Locale &
1042 Locale::getLocale(int locid)
1043 {
1044 Locale *localeCache = getLocaleCache();
1045 U_ASSERT((locid < eMAX_LOCALES)&&(locid>=0));
1046 if (localeCache == NULL) {
1047 // Failure allocating the locale cache.
1048 // The best we can do is return a NULL reference.
1049 locid = 0;
1050 }
1051 return localeCache[locid]; /*operating on NULL*/
1052 }
1053
1054 /*
1055 This function is defined this way in order to get around static
1056 initialization and static destruction.
1057 */
1058 Locale *
1059 Locale::getLocaleCache(void)
1060 {
1061 UErrorCode status = U_ZERO_ERROR;
1062 umtx_initOnce(gLocaleCacheInitOnce, locale_init, status);
1063 return gLocaleCache;
1064 }
1065
1066 class KeywordEnumeration : public StringEnumeration {
1067 private:
1068 char *keywords;
1069 char *current;
1070 int32_t length;
1071 UnicodeString currUSKey;
1072 static const char fgClassID;/* Warning this is used beyond the typical RTTI usage. */
1073
1074 public:
1075 static UClassID U_EXPORT2 getStaticClassID(void) { return (UClassID)&fgClassID; }
1076 virtual UClassID getDynamicClassID(void) const { return getStaticClassID(); }
1077 public:
1078 KeywordEnumeration(const char *keys, int32_t keywordLen, int32_t currentIndex, UErrorCode &status)
1079 : keywords((char *)&fgClassID), current((char *)&fgClassID), length(0) {
1080 if(U_SUCCESS(status) && keywordLen != 0) {
1081 if(keys == NULL || keywordLen < 0) {
1082 status = U_ILLEGAL_ARGUMENT_ERROR;
1083 } else {
1084 keywords = (char *)uprv_malloc(keywordLen+1);
1085 if (keywords == NULL) {
1086 status = U_MEMORY_ALLOCATION_ERROR;
1087 }
1088 else {
1089 uprv_memcpy(keywords, keys, keywordLen);
1090 keywords[keywordLen] = 0;
1091 current = keywords + currentIndex;
1092 length = keywordLen;
1093 }
1094 }
1095 }
1096 }
1097
1098 virtual ~KeywordEnumeration();
1099
1100 virtual StringEnumeration * clone() const
1101 {
1102 UErrorCode status = U_ZERO_ERROR;
1103 return new KeywordEnumeration(keywords, length, (int32_t)(current - keywords), status);
1104 }
1105
1106 virtual int32_t count(UErrorCode &/*status*/) const {
1107 char *kw = keywords;
1108 int32_t result = 0;
1109 while(*kw) {
1110 result++;
1111 kw += uprv_strlen(kw)+1;
1112 }
1113 return result;
1114 }
1115
1116 virtual const char* next(int32_t* resultLength, UErrorCode& status) {
1117 const char* result;
1118 int32_t len;
1119 if(U_SUCCESS(status) && *current != 0) {
1120 result = current;
1121 len = (int32_t)uprv_strlen(current);
1122 current += len+1;
1123 if(resultLength != NULL) {
1124 *resultLength = len;
1125 }
1126 } else {
1127 if(resultLength != NULL) {
1128 *resultLength = 0;
1129 }
1130 result = NULL;
1131 }
1132 return result;
1133 }
1134
1135 virtual const UnicodeString* snext(UErrorCode& status) {
1136 int32_t resultLength = 0;
1137 const char *s = next(&resultLength, status);
1138 return setChars(s, resultLength, status);
1139 }
1140
1141 virtual void reset(UErrorCode& /*status*/) {
1142 current = keywords;
1143 }
1144 };
1145
1146 const char KeywordEnumeration::fgClassID = '\0';
1147
1148 KeywordEnumeration::~KeywordEnumeration() {
1149 uprv_free(keywords);
1150 }
1151
1152 // A wrapper around KeywordEnumeration that calls uloc_toUnicodeLocaleKey() in
1153 // the next() method for each keyword before returning it.
1154 class UnicodeKeywordEnumeration : public KeywordEnumeration {
1155 public:
1156 using KeywordEnumeration::KeywordEnumeration;
1157 virtual ~UnicodeKeywordEnumeration();
1158
1159 virtual const char* next(int32_t* resultLength, UErrorCode& status) {
1160 const char* legacy_key = KeywordEnumeration::next(nullptr, status);
1161 if (U_SUCCESS(status) && legacy_key != nullptr) {
1162 const char* key = uloc_toUnicodeLocaleKey(legacy_key);
1163 if (key == nullptr) {
1164 status = U_ILLEGAL_ARGUMENT_ERROR;
1165 } else {
1166 if (resultLength != nullptr) {
1167 *resultLength = static_cast<int32_t>(uprv_strlen(key));
1168 }
1169 return key;
1170 }
1171 }
1172 if (resultLength != nullptr) *resultLength = 0;
1173 return nullptr;
1174 }
1175 };
1176
1177 // Out-of-line virtual destructor to serve as the "key function".
1178 UnicodeKeywordEnumeration::~UnicodeKeywordEnumeration() = default;
1179
1180 StringEnumeration *
1181 Locale::createKeywords(UErrorCode &status) const
1182 {
1183 char keywords[256];
1184 int32_t keywordCapacity = sizeof keywords;
1185 StringEnumeration *result = NULL;
1186
1187 if (U_FAILURE(status)) {
1188 return result;
1189 }
1190
1191 const char* variantStart = uprv_strchr(fullName, '@');
1192 const char* assignment = uprv_strchr(fullName, '=');
1193 if(variantStart) {
1194 if(assignment > variantStart) {
1195 int32_t keyLen = locale_getKeywords(variantStart+1, '@', keywords, keywordCapacity, NULL, 0, NULL, FALSE, &status);
1196 if(U_SUCCESS(status) && keyLen) {
1197 result = new KeywordEnumeration(keywords, keyLen, 0, status);
1198 if (!result) {
1199 status = U_MEMORY_ALLOCATION_ERROR;
1200 }
1201 }
1202 } else {
1203 status = U_INVALID_FORMAT_ERROR;
1204 }
1205 }
1206 return result;
1207 }
1208
1209 StringEnumeration *
1210 Locale::createUnicodeKeywords(UErrorCode &status) const
1211 {
1212 char keywords[256];
1213 int32_t keywordCapacity = sizeof keywords;
1214 StringEnumeration *result = NULL;
1215
1216 if (U_FAILURE(status)) {
1217 return result;
1218 }
1219
1220 const char* variantStart = uprv_strchr(fullName, '@');
1221 const char* assignment = uprv_strchr(fullName, '=');
1222 if(variantStart) {
1223 if(assignment > variantStart) {
1224 int32_t keyLen = locale_getKeywords(variantStart+1, '@', keywords, keywordCapacity, NULL, 0, NULL, FALSE, &status);
1225 if(U_SUCCESS(status) && keyLen) {
1226 result = new UnicodeKeywordEnumeration(keywords, keyLen, 0, status);
1227 if (!result) {
1228 status = U_MEMORY_ALLOCATION_ERROR;
1229 }
1230 }
1231 } else {
1232 status = U_INVALID_FORMAT_ERROR;
1233 }
1234 }
1235 return result;
1236 }
1237
1238 int32_t
1239 Locale::getKeywordValue(const char* keywordName, char *buffer, int32_t bufLen, UErrorCode &status) const
1240 {
1241 return uloc_getKeywordValue(fullName, keywordName, buffer, bufLen, &status);
1242 }
1243
1244 void
1245 Locale::getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const {
1246 if (U_FAILURE(status)) {
1247 return;
1248 }
1249
1250 if (fIsBogus) {
1251 status = U_ILLEGAL_ARGUMENT_ERROR;
1252 return;
1253 }
1254
1255 // TODO: Remove the need for a const char* to a NUL terminated buffer.
1256 const CharString keywordName_nul(keywordName, status);
1257 if (U_FAILURE(status)) {
1258 return;
1259 }
1260
1261 LocalMemory<char> scratch;
1262 int32_t scratch_capacity = 16; // Arbitrarily chosen default size.
1263
1264 char* buffer;
1265 int32_t result_capacity, reslen;
1266
1267 for (;;) {
1268 if (scratch.allocateInsteadAndReset(scratch_capacity) == nullptr) {
1269 status = U_MEMORY_ALLOCATION_ERROR;
1270 return;
1271 }
1272
1273 buffer = sink.GetAppendBuffer(
1274 /*min_capacity=*/scratch_capacity,
1275 /*desired_capacity_hint=*/scratch_capacity,
1276 scratch.getAlias(),
1277 scratch_capacity,
1278 &result_capacity);
1279
1280 reslen = uloc_getKeywordValue(
1281 fullName,
1282 keywordName_nul.data(),
1283 buffer,
1284 result_capacity,
1285 &status);
1286
1287 if (status != U_BUFFER_OVERFLOW_ERROR) {
1288 break;
1289 }
1290
1291 scratch_capacity = reslen;
1292 status = U_ZERO_ERROR;
1293 }
1294
1295 if (U_FAILURE(status)) {
1296 return;
1297 }
1298
1299 sink.Append(buffer, reslen);
1300 if (status == U_STRING_NOT_TERMINATED_WARNING) {
1301 status = U_ZERO_ERROR; // Terminators not used.
1302 }
1303 }
1304
1305 void
1306 Locale::getUnicodeKeywordValue(StringPiece keywordName,
1307 ByteSink& sink,
1308 UErrorCode& status) const {
1309 // TODO: Remove the need for a const char* to a NUL terminated buffer.
1310 const CharString keywordName_nul(keywordName, status);
1311 if (U_FAILURE(status)) {
1312 return;
1313 }
1314
1315 const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
1316
1317 if (legacy_key == nullptr) {
1318 status = U_ILLEGAL_ARGUMENT_ERROR;
1319 return;
1320 }
1321
1322 CharString legacy_value;
1323 {
1324 CharStringByteSink sink(&legacy_value);
1325 getKeywordValue(legacy_key, sink, status);
1326 }
1327
1328 if (U_FAILURE(status)) {
1329 return;
1330 }
1331
1332 const char* unicode_value = uloc_toUnicodeLocaleType(
1333 keywordName_nul.data(), legacy_value.data());
1334
1335 if (unicode_value == nullptr) {
1336 status = U_ILLEGAL_ARGUMENT_ERROR;
1337 return;
1338 }
1339
1340 sink.Append(unicode_value, static_cast<int32_t>(uprv_strlen(unicode_value)));
1341 }
1342
1343 void
1344 Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status)
1345 {
1346 uloc_setKeywordValue(keywordName, keywordValue, fullName, ULOC_FULLNAME_CAPACITY, &status);
1347 if (U_SUCCESS(status) && baseName == fullName) {
1348 // May have added the first keyword, meaning that the fullName is no longer also the baseName.
1349 initBaseName(status);
1350 }
1351 }
1352
1353 void
1354 Locale::setKeywordValue(StringPiece keywordName,
1355 StringPiece keywordValue,
1356 UErrorCode& status) {
1357 // TODO: Remove the need for a const char* to a NUL terminated buffer.
1358 const CharString keywordName_nul(keywordName, status);
1359 const CharString keywordValue_nul(keywordValue, status);
1360 setKeywordValue(keywordName_nul.data(), keywordValue_nul.data(), status);
1361 }
1362
1363 void
1364 Locale::setUnicodeKeywordValue(StringPiece keywordName,
1365 StringPiece keywordValue,
1366 UErrorCode& status) {
1367 // TODO: Remove the need for a const char* to a NUL terminated buffer.
1368 const CharString keywordName_nul(keywordName, status);
1369 const CharString keywordValue_nul(keywordValue, status);
1370
1371 if (U_FAILURE(status)) {
1372 return;
1373 }
1374
1375 const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
1376
1377 if (legacy_key == nullptr) {
1378 status = U_ILLEGAL_ARGUMENT_ERROR;
1379 return;
1380 }
1381
1382 const char* legacy_value = nullptr;
1383
1384 if (!keywordValue_nul.isEmpty()) {
1385 legacy_value =
1386 uloc_toLegacyType(keywordName_nul.data(), keywordValue_nul.data());
1387
1388 if (legacy_value == nullptr) {
1389 status = U_ILLEGAL_ARGUMENT_ERROR;
1390 return;
1391 }
1392 }
1393
1394 setKeywordValue(legacy_key, legacy_value, status);
1395 }
1396
1397 const char *
1398 Locale::getBaseName() const {
1399 return baseName;
1400 }
1401
1402 //eof
1403 U_NAMESPACE_END