]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/uloc_tag.cpp
ICU-64243.0.1.tar.gz
[apple/icu.git] / icuSources / common / uloc_tag.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
729e4ab9
A
3/*
4**********************************************************************
2ca993e8 5* Copyright (C) 2009-2015, International Business Machines
729e4ab9
A
6* Corporation and others. All Rights Reserved.
7**********************************************************************
8*/
9
3d1f044b 10#include "unicode/bytestream.h"
729e4ab9
A
11#include "unicode/utypes.h"
12#include "unicode/ures.h"
3d1f044b 13#include "unicode/localpointer.h"
729e4ab9 14#include "unicode/putil.h"
3d1f044b 15#include "unicode/uenum.h"
729e4ab9
A
16#include "unicode/uloc.h"
17#include "ustr_imp.h"
3d1f044b 18#include "charstr.h"
729e4ab9
A
19#include "cmemory.h"
20#include "cstring.h"
21#include "putilimp.h"
22#include "uinvchar.h"
23#include "ulocimp.h"
51004dcb
A
24#include "uassert.h"
25
f3c0d7a5 26
729e4ab9
A
27/* struct holding a single variant */
28typedef struct VariantListEntry {
29 const char *variant;
30 struct VariantListEntry *next;
31} VariantListEntry;
32
4388f060 33/* struct holding a single attribute value */
3d1f044b 34struct AttributeListEntry : public icu::UMemory {
4388f060
A
35 const char *attribute;
36 struct AttributeListEntry *next;
3d1f044b 37};
4388f060 38
729e4ab9 39/* struct holding a single extension */
3d1f044b 40struct ExtensionListEntry : public icu::UMemory {
729e4ab9
A
41 const char *key;
42 const char *value;
43 struct ExtensionListEntry *next;
3d1f044b 44};
729e4ab9
A
45
46#define MAXEXTLANG 3
47typedef struct ULanguageTag {
48 char *buf; /* holding parsed subtags */
49 const char *language;
50 const char *extlang[MAXEXTLANG];
51 const char *script;
52 const char *region;
53 VariantListEntry *variants;
54 ExtensionListEntry *extensions;
55 const char *privateuse;
56 const char *grandfathered;
57} ULanguageTag;
58
59#define MINLEN 2
60#define SEP '-'
61#define PRIVATEUSE 'x'
62#define LDMLEXT 'u'
63
64#define LOCALE_SEP '_'
65#define LOCALE_EXT_SEP '@'
66#define LOCALE_KEYWORD_SEP ';'
67#define LOCALE_KEY_TYPE_SEP '='
68
4388f060 69#define ISALPHA(c) uprv_isASCIILetter(c)
729e4ab9
A
70#define ISNUMERIC(c) ((c)>='0' && (c)<='9')
71
51004dcb
A
72static const char EMPTY[] = "";
73static const char LANG_UND[] = "und";
74static const char PRIVATEUSE_KEY[] = "x";
75static const char _POSIX[] = "_POSIX";
76static const char POSIX_KEY[] = "va";
77static const char POSIX_VALUE[] = "posix";
78static const char LOCALE_ATTRIBUTE_KEY[] = "attribute";
79static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant";
80static const char LOCALE_TYPE_YES[] = "yes";
729e4ab9
A
81
82#define LANG_UND_LEN 3
83
3d1f044b
A
84/*
85 Updated on 2018-09-12 from
86 https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
87
88 This table has 2 parts. The parts for Grandfathered tags is generated by the
89 following scripts from the IANA language tag registry.
90
91 curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
92 egrep -A 7 'Type: grandfathered' | \
93 egrep 'Tag|Prefe' | grep -B1 'Preferred' | grep -v '^--' | \
94 awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' |\
95 tr 'A-Z' 'a-z'
96
97
98 The 2nd part is made of five ICU-specific entries. They're kept for
99 the backward compatibility for now, even though there are no preferred
100 values. They may have to be removed for the strict BCP 47 compliance.
101
102*/
51004dcb 103static const char* const GRANDFATHERED[] = {
729e4ab9
A
104/* grandfathered preferred */
105 "art-lojban", "jbo",
3d1f044b 106 "en-gb-oed", "en-gb-oxendict",
729e4ab9
A
107 "i-ami", "ami",
108 "i-bnn", "bnn",
729e4ab9
A
109 "i-hak", "hak",
110 "i-klingon", "tlh",
111 "i-lux", "lb",
729e4ab9
A
112 "i-navajo", "nv",
113 "i-pwn", "pwn",
114 "i-tao", "tao",
115 "i-tay", "tay",
116 "i-tsu", "tsu",
117 "no-bok", "nb",
118 "no-nyn", "nn",
119 "sgn-be-fr", "sfb",
120 "sgn-be-nl", "vgt",
121 "sgn-ch-de", "sgg",
122 "zh-guoyu", "cmn",
123 "zh-hakka", "hak",
729e4ab9
A
124 "zh-min-nan", "nan",
125 "zh-xiang", "hsn",
3d1f044b
A
126
127 // Grandfathered tags with no preferred value in the IANA
128 // registry. Kept for now for the backward compatibility
129 // because ICU has mapped them this way.
130 "cel-gaulish", "xtg-x-cel-gaulish",
131 "i-default", "en-x-i-default",
132 "i-enochian", "und-x-i-enochian",
133 "i-mingo", "see-x-i-mingo",
134 "zh-min", "nan-x-zh-min",
135};
136
137/*
138 Updated on 2018-09-12 from
139 https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
140
141 The table lists redundant tags with preferred value in the IANA languate tag registry.
142 It's generated with the following command:
143
144 curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
145 grep 'Type: redundant' -A 5 | egrep '^(Tag:|Prefer)' | grep -B1 'Preferred' | \
146 awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' | \
147 tr 'A-Z' 'a-z'
148
149 In addition, ja-latn-hepburn-heploc is mapped to ja-latn-alalc97 because
150 a variant tag 'hepburn-heploc' has the preferred subtag, 'alaic97'.
151*/
152
153static const char* const REDUNDANT[] = {
154// redundant preferred
155 "sgn-br", "bzs",
156 "sgn-co", "csn",
157 "sgn-de", "gsg",
158 "sgn-dk", "dsl",
159 "sgn-es", "ssp",
160 "sgn-fr", "fsl",
161 "sgn-gb", "bfi",
162 "sgn-gr", "gss",
163 "sgn-ie", "isg",
164 "sgn-it", "ise",
165 "sgn-jp", "jsl",
166 "sgn-mx", "mfs",
167 "sgn-ni", "ncs",
168 "sgn-nl", "dse",
169 "sgn-no", "nsl",
170 "sgn-pt", "psr",
171 "sgn-se", "swl",
172 "sgn-us", "ase",
173 "sgn-za", "sfs",
174 "zh-cmn", "cmn",
175 "zh-cmn-hans", "cmn-hans",
176 "zh-cmn-hant", "cmn-hant",
177 "zh-gan", "gan",
178 "zh-wuu", "wuu",
179 "zh-yue", "yue",
180
181 // variant tag with preferred value
182 "ja-latn-hepburn-heploc", "ja-latn-alalc97",
729e4ab9
A
183};
184
3d1f044b
A
185/*
186 Updated on 2018-09-12 from
187 https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
188
189 grep 'Type: language' -A 7 language-subtag-registry | egrep 'Subtag|Prefe' | \
190 grep -B1 'Preferred' | grep -v '^--' | \
191 awk -n '/Subtag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
192
193 Make sure that 2-letter language subtags come before 3-letter subtags.
194*/
51004dcb 195static const char DEPRECATEDLANGS[][4] = {
729e4ab9 196/* deprecated new */
3d1f044b 197 "in", "id",
729e4ab9
A
198 "iw", "he",
199 "ji", "yi",
3d1f044b
A
200 "jw", "jv",
201 "mo", "ro",
202 "aam", "aas",
203 "adp", "dz",
204 "aue", "ktz",
205 "ayx", "nun",
206 "bgm", "bcg",
207 "bjd", "drl",
208 "ccq", "rki",
209 "cjr", "mom",
210 "cka", "cmr",
211 "cmk", "xch",
212 "coy", "pij",
213 "cqu", "quh",
214 "drh", "khk",
215 "drw", "prs",
216 "gav", "dev",
217 "gfx", "vaj",
218 "ggn", "gvr",
219 "gti", "nyc",
220 "guv", "duz",
221 "hrr", "jal",
222 "ibi", "opa",
223 "ilw", "gal",
224 "jeg", "oyb",
225 "kgc", "tdf",
226 "kgh", "kml",
227 "koj", "kwv",
228 "krm", "bmf",
229 "ktr", "dtp",
230 "kvs", "gdj",
231 "kwq", "yam",
232 "kxe", "tvd",
233 "kzj", "dtp",
234 "kzt", "dtp",
235 "lii", "raq",
236 "lmm", "rmx",
237 "meg", "cir",
238 "mst", "mry",
239 "mwj", "vaj",
240 "myt", "mry",
241 "nad", "xny",
242 "ncp", "kdz",
243 "nnx", "ngv",
244 "nts", "pij",
245 "oun", "vaj",
246 "pcr", "adx",
247 "pmc", "huw",
248 "pmu", "phr",
249 "ppa", "bfy",
250 "ppr", "lcq",
251 "pry", "prt",
252 "puz", "pub",
253 "sca", "hle",
254 "skk", "oyb",
255 "tdu", "dtp",
256 "thc", "tpo",
257 "thx", "oyb",
258 "tie", "ras",
259 "tkk", "twm",
260 "tlw", "weo",
261 "tmp", "tyj",
262 "tne", "kak",
263 "tnf", "prs",
264 "tsf", "taj",
265 "uok", "ema",
266 "xba", "cax",
267 "xia", "acn",
268 "xkh", "waw",
269 "xsj", "suj",
270 "ybd", "rki",
271 "yma", "lrr",
272 "ymt", "mtm",
273 "yos", "zom",
274 "yuu", "yug",
275};
276
277/*
278 Updated on 2018-04-24 from
279
280 curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry | \
281 grep 'Type: region' -A 7 | egrep 'Subtag|Prefe' | \
282 grep -B1 'Preferred' | \
283 awk -n '/Subtag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
284*/
285static const char DEPRECATEDREGIONS[][3] = {
286/* deprecated new */
287 "BU", "MM",
288 "DD", "DE",
289 "FX", "FR",
290 "TP", "TL",
291 "YD", "YE",
292 "ZR", "CD",
729e4ab9
A
293};
294
295/*
296* -------------------------------------------------
297*
298* These ultag_ functions may be exposed as APIs later
299*
300* -------------------------------------------------
301*/
302
303static ULanguageTag*
304ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
305
306static void
307ultag_close(ULanguageTag* langtag);
308
309static const char*
310ultag_getLanguage(const ULanguageTag* langtag);
311
312#if 0
313static const char*
314ultag_getJDKLanguage(const ULanguageTag* langtag);
315#endif
316
317static const char*
318ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
319
320static int32_t
321ultag_getExtlangSize(const ULanguageTag* langtag);
322
323static const char*
324ultag_getScript(const ULanguageTag* langtag);
325
326static const char*
327ultag_getRegion(const ULanguageTag* langtag);
328
329static const char*
330ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
331
332static int32_t
333ultag_getVariantsSize(const ULanguageTag* langtag);
334
335static const char*
336ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
337
338static const char*
339ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
340
341static int32_t
342ultag_getExtensionsSize(const ULanguageTag* langtag);
343
344static const char*
345ultag_getPrivateUse(const ULanguageTag* langtag);
346
347#if 0
348static const char*
349ultag_getGrandfathered(const ULanguageTag* langtag);
350#endif
351
3d1f044b
A
352U_NAMESPACE_BEGIN
353
354/**
355 * \class LocalULanguageTagPointer
356 * "Smart pointer" class, closes a ULanguageTag via ultag_close().
357 * For most methods see the LocalPointerBase base class.
358 *
359 * @see LocalPointerBase
360 * @see LocalPointer
361 * @internal
362 */
363U_DEFINE_LOCAL_OPEN_POINTER(LocalULanguageTagPointer, ULanguageTag, ultag_close);
364
365U_NAMESPACE_END
366
729e4ab9
A
367/*
368* -------------------------------------------------
369*
370* Language subtag syntax validation functions
371*
372* -------------------------------------------------
373*/
374
375static UBool
376_isAlphaString(const char* s, int32_t len) {
377 int32_t i;
378 for (i = 0; i < len; i++) {
379 if (!ISALPHA(*(s + i))) {
380 return FALSE;
381 }
382 }
383 return TRUE;
384}
385
386static UBool
387_isNumericString(const char* s, int32_t len) {
388 int32_t i;
389 for (i = 0; i < len; i++) {
390 if (!ISNUMERIC(*(s + i))) {
391 return FALSE;
392 }
393 }
394 return TRUE;
395}
396
397static UBool
398_isAlphaNumericString(const char* s, int32_t len) {
399 int32_t i;
400 for (i = 0; i < len; i++) {
401 if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
402 return FALSE;
403 }
404 }
405 return TRUE;
406}
407
408static UBool
3d1f044b
A
409_isAlphaNumericStringLimitedLength(const char* s, int32_t len, int32_t min, int32_t max) {
410 if (len < 0) {
411 len = (int32_t)uprv_strlen(s);
412 }
413 if (len >= min && len <= max && _isAlphaNumericString(s, len)) {
414 return TRUE;
415 }
416 return FALSE;
417}
418
419U_CFUNC UBool
420ultag_isLanguageSubtag(const char* s, int32_t len) {
729e4ab9 421 /*
3d1f044b
A
422 * unicode_language_subtag = alpha{2,3} | alpha{5,8};
423 * NOTE: Per ICUTC 2019/01/23- accepting alpha 4
424 * See ICU-20372
729e4ab9
A
425 */
426 if (len < 0) {
427 len = (int32_t)uprv_strlen(s);
428 }
429 if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
430 return TRUE;
431 }
432 return FALSE;
433}
434
435static UBool
436_isExtlangSubtag(const char* s, int32_t len) {
437 /*
438 * extlang = 3ALPHA ; selected ISO 639 codes
439 * *2("-" 3ALPHA) ; permanently reserved
440 */
441 if (len < 0) {
442 len = (int32_t)uprv_strlen(s);
443 }
444 if (len == 3 && _isAlphaString(s, len)) {
445 return TRUE;
446 }
447 return FALSE;
448}
449
3d1f044b
A
450U_CFUNC UBool
451ultag_isScriptSubtag(const char* s, int32_t len) {
729e4ab9
A
452 /*
453 * script = 4ALPHA ; ISO 15924 code
454 */
455 if (len < 0) {
456 len = (int32_t)uprv_strlen(s);
457 }
458 if (len == 4 && _isAlphaString(s, len)) {
459 return TRUE;
460 }
461 return FALSE;
462}
463
3d1f044b
A
464U_CFUNC UBool
465ultag_isRegionSubtag(const char* s, int32_t len) {
729e4ab9
A
466 /*
467 * region = 2ALPHA ; ISO 3166-1 code
468 * / 3DIGIT ; UN M.49 code
469 */
470 if (len < 0) {
471 len = (int32_t)uprv_strlen(s);
472 }
473 if (len == 2 && _isAlphaString(s, len)) {
474 return TRUE;
475 }
476 if (len == 3 && _isNumericString(s, len)) {
477 return TRUE;
478 }
479 return FALSE;
480}
481
482static UBool
483_isVariantSubtag(const char* s, int32_t len) {
484 /*
485 * variant = 5*8alphanum ; registered variants
486 * / (DIGIT 3alphanum)
487 */
488 if (len < 0) {
489 len = (int32_t)uprv_strlen(s);
490 }
3d1f044b 491 if (_isAlphaNumericStringLimitedLength(s, len, 5, 8)) {
729e4ab9
A
492 return TRUE;
493 }
494 if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
495 return TRUE;
496 }
497 return FALSE;
498}
499
3d1f044b
A
500static UBool
501_isSepListOf(UBool (*test)(const char*, int32_t), const char* s, int32_t len) {
502 const char *p = s;
503 const char *pSubtag = NULL;
504
505 if (len < 0) {
506 len = (int32_t)uprv_strlen(s);
507 }
508
509 while ((p - s) < len) {
510 if (*p == SEP) {
511 if (pSubtag == NULL) {
512 return FALSE;
513 }
514 if (!test(pSubtag, (int32_t)(p - pSubtag))) {
515 return FALSE;
516 }
517 pSubtag = NULL;
518 } else if (pSubtag == NULL) {
519 pSubtag = p;
520 }
521 p++;
522 }
523 if (pSubtag == NULL) {
524 return FALSE;
525 }
526 return test(pSubtag, (int32_t)(p - pSubtag));
527}
528
529U_CFUNC UBool
530ultag_isVariantSubtags(const char* s, int32_t len) {
531 return _isSepListOf(&_isVariantSubtag, s, len);
532}
533
534// This is for the ICU-specific "lvariant" handling.
4388f060
A
535static UBool
536_isPrivateuseVariantSubtag(const char* s, int32_t len) {
537 /*
538 * variant = 1*8alphanum ; registered variants
539 * / (DIGIT 3alphanum)
540 */
3d1f044b 541 return _isAlphaNumericStringLimitedLength(s, len , 1, 8);
4388f060
A
542}
543
729e4ab9
A
544static UBool
545_isExtensionSingleton(const char* s, int32_t len) {
546 /*
547 * extension = singleton 1*("-" (2*8alphanum))
3d1f044b
A
548 *
549 * singleton = DIGIT ; 0 - 9
550 * / %x41-57 ; A - W
551 * / %x59-5A ; Y - Z
552 * / %x61-77 ; a - w
553 * / %x79-7A ; y - z
729e4ab9
A
554 */
555 if (len < 0) {
556 len = (int32_t)uprv_strlen(s);
557 }
3d1f044b 558 if (len == 1 && (ISALPHA(*s) || ISNUMERIC(*s)) && (uprv_tolower(*s) != PRIVATEUSE)) {
729e4ab9
A
559 return TRUE;
560 }
561 return FALSE;
562}
563
564static UBool
565_isExtensionSubtag(const char* s, int32_t len) {
566 /*
567 * extension = singleton 1*("-" (2*8alphanum))
568 */
3d1f044b
A
569 return _isAlphaNumericStringLimitedLength(s, len, 2, 8);
570}
571
572U_CFUNC UBool
573ultag_isExtensionSubtags(const char* s, int32_t len) {
574 return _isSepListOf(&_isExtensionSubtag, s, len);
575}
576
577static UBool
578_isPrivateuseValueSubtag(const char* s, int32_t len) {
579 /*
580 * privateuse = "x" 1*("-" (1*8alphanum))
581 */
582 return _isAlphaNumericStringLimitedLength(s, len, 1, 8);
583}
584
585U_CFUNC UBool
586ultag_isPrivateuseValueSubtags(const char* s, int32_t len) {
587 return _isSepListOf(&_isPrivateuseValueSubtag, s, len);
588}
589
590U_CFUNC UBool
591ultag_isUnicodeLocaleAttribute(const char* s, int32_t len) {
592 /*
593 * attribute = alphanum{3,8} ;
594 */
595 return _isAlphaNumericStringLimitedLength(s, len , 3, 8);
596}
597
598U_CFUNC UBool
599ultag_isUnicodeLocaleAttributes(const char* s, int32_t len) {
600 return _isSepListOf(&ultag_isUnicodeLocaleAttribute, s, len);
601}
602
603U_CFUNC UBool
604ultag_isUnicodeLocaleKey(const char* s, int32_t len) {
605 /*
606 * key = alphanum alpha ;
607 */
729e4ab9
A
608 if (len < 0) {
609 len = (int32_t)uprv_strlen(s);
610 }
3d1f044b 611 if (len == 2 && (ISALPHA(*s) || ISNUMERIC(*s)) && ISALPHA(s[1])) {
729e4ab9
A
612 return TRUE;
613 }
614 return FALSE;
615}
616
3d1f044b
A
617U_CFUNC UBool
618_isUnicodeLocaleTypeSubtag(const char*s, int32_t len) {
619 /*
620 * alphanum{3,8}
621 */
622 return _isAlphaNumericStringLimitedLength(s, len , 3, 8);
623}
729e4ab9 624
3d1f044b
A
625U_CFUNC UBool
626ultag_isUnicodeLocaleType(const char*s, int32_t len) {
627 /*
628 * type = alphanum{3,8} (sep alphanum{3,8})* ;
629 */
630 return _isSepListOf(&_isUnicodeLocaleTypeSubtag, s, len);
729e4ab9
A
631}
632
633static UBool
3d1f044b
A
634_isTKey(const char* s, int32_t len)
635{
729e4ab9 636 /*
3d1f044b 637 * tkey = alpha digit ;
729e4ab9
A
638 */
639 if (len < 0) {
640 len = (int32_t)uprv_strlen(s);
641 }
3d1f044b 642 if (len == 2 && ISALPHA(*s) && ISNUMERIC(*(s + 1))) {
729e4ab9
A
643 return TRUE;
644 }
645 return FALSE;
646}
647
648static UBool
3d1f044b
A
649_isTValue(const char* s, int32_t len)
650{
651 /*
652 * tvalue = (sep alphanum{3,8})+ ;
653 */
654 return _isAlphaNumericStringLimitedLength(s, len , 3, 8);
655}
729e4ab9 656
3d1f044b
A
657static UBool
658_isTransformedExtensionSubtag(int32_t& state, const char* s, int32_t len)
659{
660 const int32_t kStart = 0; // Start, wait for unicode_language_subtag, tkey or end
661 const int32_t kGotLanguage = 1; // Got unicode_language_subtag, wait for unicode_script_subtag,
662 // unicode_region_subtag, unicode_variant_subtag, tkey or end
663 const int32_t kGotScript = 2; // Got unicode_script_subtag, wait for unicode_region_subtag,
664 // unicode_variant_subtag, tkey, or end
665 const int32_t kGotRegion = 3; // Got unicode_region_subtag, wait for unicode_variant_subtag,
666 // tkey, or end.
667 const int32_t kGotVariant = 4; // Got unicode_variant_subtag, wait for unicode_variant_subtag
668 // tkey or end.
669 const int32_t kGotTKey = -1; // Got tkey, wait for tvalue. ERROR if stop here.
670 const int32_t kGotTValue = 6; // Got tvalue, wait for tkey, tvalue or end
671
672 switch (state) {
673 case kStart:
674 if (ultag_isLanguageSubtag(s, len)) {
675 state = kGotLanguage;
676 return TRUE;
677 }
678 if (_isTKey(s, len)) {
679 state = kGotTKey;
680 return TRUE;
729e4ab9 681 }
3d1f044b
A
682 return FALSE;
683 case kGotLanguage:
684 if (ultag_isScriptSubtag(s, len)) {
685 state = kGotScript;
686 return TRUE;
687 }
688 U_FALLTHROUGH;
689 case kGotScript:
690 if (ultag_isRegionSubtag(s, len)) {
691 state = kGotRegion;
692 return TRUE;
693 }
694 U_FALLTHROUGH;
695 case kGotRegion:
696 U_FALLTHROUGH;
697 case kGotVariant:
698 if (_isVariantSubtag(s, len)) {
699 state = kGotVariant;
700 return TRUE;
701 }
702 if (_isTKey(s, len)) {
703 state = kGotTKey;
704 return TRUE;
729e4ab9 705 }
3d1f044b
A
706 return FALSE;
707 case kGotTKey:
708 if (_isTValue(s, len)) {
709 state = kGotTValue;
710 return TRUE;
711 }
712 return FALSE;
713 case kGotTValue:
714 if (_isTKey(s, len)) {
715 state = kGotTKey;
716 return TRUE;
717 }
718 if (_isTValue(s, len)) {
719 return TRUE;
720 }
721 return FALSE;
729e4ab9 722 }
3d1f044b 723 return FALSE;
729e4ab9
A
724}
725
3d1f044b
A
726static UBool
727_isUnicodeExtensionSubtag(int32_t& state, const char* s, int32_t len)
728{
729 const int32_t kStart = 0; // Start, wait for a key or attribute or end
730 const int32_t kGotKey = 1; // Got a key, wait for type or key or end
731 const int32_t kGotType = 2; // Got a type, wait for key or end
732
733 switch (state) {
734 case kStart:
735 if (ultag_isUnicodeLocaleKey(s, len)) {
736 state = kGotKey;
737 return TRUE;
738 }
739 if (ultag_isUnicodeLocaleAttribute(s, len)) {
740 return TRUE;
741 }
742 return FALSE;
743 case kGotKey:
744 if (ultag_isUnicodeLocaleKey(s, len)) {
745 return TRUE;
746 }
747 if (_isUnicodeLocaleTypeSubtag(s, len)) {
748 state = kGotType;
749 return TRUE;
750 }
751 return FALSE;
752 case kGotType:
753 if (ultag_isUnicodeLocaleKey(s, len)) {
754 state = kGotKey;
755 return TRUE;
756 }
757 if (_isUnicodeLocaleTypeSubtag(s, len)) {
758 return TRUE;
759 }
760 return FALSE;
729e4ab9
A
761 }
762 return FALSE;
763}
764
3d1f044b
A
765static UBool
766_isStatefulSepListOf(UBool (*test)(int32_t&, const char*, int32_t), const char* s, int32_t len)
767{
768 int32_t state = 0;
b331163b 769 const char* p;
3d1f044b 770 const char* start = s;
b331163b
A
771 int32_t subtagLen = 0;
772
729e4ab9
A
773 if (len < 0) {
774 len = (int32_t)uprv_strlen(s);
775 }
b331163b
A
776
777 for (p = s; len > 0; p++, len--) {
778 if (*p == SEP) {
3d1f044b 779 if (!test(state, start, subtagLen)) {
b331163b
A
780 return FALSE;
781 }
782 subtagLen = 0;
3d1f044b 783 start = p + 1;
b331163b 784 } else {
3d1f044b 785 subtagLen++;
b331163b 786 }
729e4ab9 787 }
729e4ab9 788
3d1f044b
A
789 if (test(state, start, subtagLen) && state >= 0) {
790 return TRUE;
791 }
792 return FALSE;
793}
794
795U_CFUNC UBool
796ultag_isTransformedExtensionSubtags(const char* s, int32_t len)
797{
798 return _isStatefulSepListOf(&_isTransformedExtensionSubtag, s, len);
799}
800
801U_CFUNC UBool
802ultag_isUnicodeExtensionSubtags(const char* s, int32_t len) {
803 return _isStatefulSepListOf(&_isUnicodeExtensionSubtag, s, len);
b331163b 804}
3d1f044b
A
805
806
729e4ab9
A
807/*
808* -------------------------------------------------
809*
810* Helper functions
811*
812* -------------------------------------------------
813*/
814
815static UBool
816_addVariantToList(VariantListEntry **first, VariantListEntry *var) {
817 UBool bAdded = TRUE;
818
819 if (*first == NULL) {
820 var->next = NULL;
821 *first = var;
822 } else {
823 VariantListEntry *prev, *cur;
824 int32_t cmp;
825
4388f060 826 /* variants order should be preserved */
729e4ab9
A
827 prev = NULL;
828 cur = *first;
829 while (TRUE) {
830 if (cur == NULL) {
831 prev->next = var;
832 var->next = NULL;
833 break;
834 }
4388f060
A
835
836 /* Checking for duplicate variant */
729e4ab9 837 cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
4388f060
A
838 if (cmp == 0) {
839 /* duplicated variant */
840 bAdded = FALSE;
841 break;
842 }
843 prev = cur;
844 cur = cur->next;
845 }
846 }
847
848 return bAdded;
849}
850
851static UBool
852_addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {
853 UBool bAdded = TRUE;
854
855 if (*first == NULL) {
856 attr->next = NULL;
857 *first = attr;
858 } else {
859 AttributeListEntry *prev, *cur;
860 int32_t cmp;
861
862 /* reorder variants in alphabetical order */
863 prev = NULL;
864 cur = *first;
865 while (TRUE) {
866 if (cur == NULL) {
867 prev->next = attr;
868 attr->next = NULL;
869 break;
870 }
871 cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute);
729e4ab9
A
872 if (cmp < 0) {
873 if (prev == NULL) {
4388f060 874 *first = attr;
729e4ab9 875 } else {
4388f060 876 prev->next = attr;
729e4ab9 877 }
4388f060 878 attr->next = cur;
729e4ab9
A
879 break;
880 }
881 if (cmp == 0) {
882 /* duplicated variant */
883 bAdded = FALSE;
884 break;
885 }
886 prev = cur;
887 cur = cur->next;
888 }
889 }
890
891 return bAdded;
892}
893
894
895static UBool
896_addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
897 UBool bAdded = TRUE;
898
899 if (*first == NULL) {
900 ext->next = NULL;
901 *first = ext;
902 } else {
903 ExtensionListEntry *prev, *cur;
904 int32_t cmp;
905
906 /* reorder variants in alphabetical order */
907 prev = NULL;
908 cur = *first;
909 while (TRUE) {
910 if (cur == NULL) {
911 prev->next = ext;
912 ext->next = NULL;
913 break;
914 }
915 if (localeToBCP) {
916 /* special handling for locale to bcp conversion */
917 int32_t len, curlen;
918
919 len = (int32_t)uprv_strlen(ext->key);
920 curlen = (int32_t)uprv_strlen(cur->key);
921
922 if (len == 1 && curlen == 1) {
923 if (*(ext->key) == *(cur->key)) {
924 cmp = 0;
925 } else if (*(ext->key) == PRIVATEUSE) {
926 cmp = 1;
927 } else if (*(cur->key) == PRIVATEUSE) {
928 cmp = -1;
929 } else {
930 cmp = *(ext->key) - *(cur->key);
931 }
932 } else if (len == 1) {
933 cmp = *(ext->key) - LDMLEXT;
934 } else if (curlen == 1) {
935 cmp = LDMLEXT - *(cur->key);
936 } else {
937 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
f3c0d7a5
A
938 /* Both are u extension keys - we need special handling for 'attribute' */
939 if (cmp != 0) {
940 if (uprv_strcmp(cur->key, LOCALE_ATTRIBUTE_KEY) == 0) {
941 cmp = 1;
942 } else if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
943 cmp = -1;
944 }
945 }
729e4ab9
A
946 }
947 } else {
948 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
949 }
950 if (cmp < 0) {
951 if (prev == NULL) {
952 *first = ext;
953 } else {
954 prev->next = ext;
955 }
956 ext->next = cur;
957 break;
958 }
959 if (cmp == 0) {
960 /* duplicated extension key */
961 bAdded = FALSE;
962 break;
963 }
964 prev = cur;
965 cur = cur->next;
966 }
967 }
968
969 return bAdded;
970}
971
972static void
973_initializeULanguageTag(ULanguageTag* langtag) {
974 int32_t i;
975
976 langtag->buf = NULL;
977
978 langtag->language = EMPTY;
979 for (i = 0; i < MAXEXTLANG; i++) {
980 langtag->extlang[i] = NULL;
981 }
982
983 langtag->script = EMPTY;
984 langtag->region = EMPTY;
985
986 langtag->variants = NULL;
987 langtag->extensions = NULL;
988
989 langtag->grandfathered = EMPTY;
990 langtag->privateuse = EMPTY;
991}
992
3d1f044b
A
993static void
994_appendLanguageToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UErrorCode* status) {
729e4ab9
A
995 char buf[ULOC_LANG_CAPACITY];
996 UErrorCode tmpStatus = U_ZERO_ERROR;
997 int32_t len, i;
729e4ab9
A
998
999 if (U_FAILURE(*status)) {
3d1f044b 1000 return;
729e4ab9
A
1001 }
1002
1003 len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
1004 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1005 if (strict) {
1006 *status = U_ILLEGAL_ARGUMENT_ERROR;
3d1f044b 1007 return;
729e4ab9
A
1008 }
1009 len = 0;
1010 }
1011
1012 /* Note: returned language code is in lower case letters */
1013
1014 if (len == 0) {
3d1f044b
A
1015 sink.Append(LANG_UND, LANG_UND_LEN);
1016 } else if (!ultag_isLanguageSubtag(buf, len)) {
729e4ab9
A
1017 /* invalid language code */
1018 if (strict) {
1019 *status = U_ILLEGAL_ARGUMENT_ERROR;
3d1f044b 1020 return;
729e4ab9 1021 }
3d1f044b 1022 sink.Append(LANG_UND, LANG_UND_LEN);
729e4ab9
A
1023 } else {
1024 /* resolve deprecated */
b331163b 1025 for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) {
3d1f044b
A
1026 // 2-letter deprecated subtags are listede before 3-letter
1027 // ones in DEPRECATEDLANGS[]. Get out of loop on coming
1028 // across the 1st 3-letter subtag, if the input is a 2-letter code.
1029 // to avoid continuing to try when there's no match.
1030 if (uprv_strlen(buf) < uprv_strlen(DEPRECATEDLANGS[i])) break;
729e4ab9
A
1031 if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
1032 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
1033 len = (int32_t)uprv_strlen(buf);
1034 break;
1035 }
1036 }
3d1f044b 1037 sink.Append(buf, len);
729e4ab9 1038 }
729e4ab9
A
1039}
1040
3d1f044b
A
1041static void
1042_appendScriptToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UErrorCode* status) {
729e4ab9
A
1043 char buf[ULOC_SCRIPT_CAPACITY];
1044 UErrorCode tmpStatus = U_ZERO_ERROR;
1045 int32_t len;
729e4ab9
A
1046
1047 if (U_FAILURE(*status)) {
3d1f044b 1048 return;
729e4ab9
A
1049 }
1050
1051 len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
1052 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1053 if (strict) {
1054 *status = U_ILLEGAL_ARGUMENT_ERROR;
1055 }
3d1f044b 1056 return;
729e4ab9
A
1057 }
1058
1059 if (len > 0) {
3d1f044b 1060 if (!ultag_isScriptSubtag(buf, len)) {
729e4ab9
A
1061 /* invalid script code */
1062 if (strict) {
1063 *status = U_ILLEGAL_ARGUMENT_ERROR;
1064 }
3d1f044b 1065 return;
729e4ab9 1066 } else {
3d1f044b
A
1067 sink.Append("-", 1);
1068 sink.Append(buf, len);
729e4ab9
A
1069 }
1070 }
729e4ab9
A
1071}
1072
3d1f044b
A
1073static void
1074_appendRegionToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UErrorCode* status) {
729e4ab9
A
1075 char buf[ULOC_COUNTRY_CAPACITY];
1076 UErrorCode tmpStatus = U_ZERO_ERROR;
1077 int32_t len;
729e4ab9
A
1078
1079 if (U_FAILURE(*status)) {
3d1f044b 1080 return;
729e4ab9
A
1081 }
1082
1083 len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
1084 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1085 if (strict) {
1086 *status = U_ILLEGAL_ARGUMENT_ERROR;
1087 }
3d1f044b 1088 return;
729e4ab9
A
1089 }
1090
1091 if (len > 0) {
3d1f044b 1092 if (!ultag_isRegionSubtag(buf, len)) {
729e4ab9
A
1093 /* invalid region code */
1094 if (strict) {
1095 *status = U_ILLEGAL_ARGUMENT_ERROR;
1096 }
3d1f044b 1097 return;
729e4ab9 1098 } else {
3d1f044b
A
1099 sink.Append("-", 1);
1100 /* resolve deprecated */
1101 for (int i = 0; i < UPRV_LENGTHOF(DEPRECATEDREGIONS); i += 2) {
1102 if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDREGIONS[i]) == 0) {
1103 uprv_strcpy(buf, DEPRECATEDREGIONS[i + 1]);
1104 len = (int32_t)uprv_strlen(buf);
1105 break;
1106 }
729e4ab9 1107 }
3d1f044b 1108 sink.Append(buf, len);
729e4ab9
A
1109 }
1110 }
729e4ab9
A
1111}
1112
3d1f044b
A
1113static void
1114_appendVariantsToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UBool *hadPosix, UErrorCode* status) {
729e4ab9
A
1115 char buf[ULOC_FULLNAME_CAPACITY];
1116 UErrorCode tmpStatus = U_ZERO_ERROR;
1117 int32_t len, i;
729e4ab9
A
1118
1119 if (U_FAILURE(*status)) {
3d1f044b 1120 return;
729e4ab9
A
1121 }
1122
1123 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
1124 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1125 if (strict) {
1126 *status = U_ILLEGAL_ARGUMENT_ERROR;
1127 }
3d1f044b 1128 return;
729e4ab9
A
1129 }
1130
1131 if (len > 0) {
1132 char *p, *pVar;
1133 UBool bNext = TRUE;
1134 VariantListEntry *var;
1135 VariantListEntry *varFirst = NULL;
1136
1137 pVar = NULL;
1138 p = buf;
1139 while (bNext) {
1140 if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
1141 if (*p == 0) {
1142 bNext = FALSE;
1143 } else {
1144 *p = 0; /* terminate */
1145 }
1146 if (pVar == NULL) {
1147 if (strict) {
1148 *status = U_ILLEGAL_ARGUMENT_ERROR;
1149 break;
1150 }
1151 /* ignore empty variant */
1152 } else {
1153 /* ICU uses upper case letters for variants, but
1154 the canonical format is lowercase in BCP47 */
1155 for (i = 0; *(pVar + i) != 0; i++) {
1156 *(pVar + i) = uprv_tolower(*(pVar + i));
1157 }
1158
1159 /* validate */
1160 if (_isVariantSubtag(pVar, -1)) {
f3c0d7a5 1161 if (uprv_strcmp(pVar,POSIX_VALUE) || len != (int32_t)uprv_strlen(POSIX_VALUE)) {
729e4ab9 1162 /* emit the variant to the list */
51004dcb 1163 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
729e4ab9
A
1164 if (var == NULL) {
1165 *status = U_MEMORY_ALLOCATION_ERROR;
1166 break;
1167 }
1168 var->variant = pVar;
1169 if (!_addVariantToList(&varFirst, var)) {
1170 /* duplicated variant */
1171 uprv_free(var);
1172 if (strict) {
1173 *status = U_ILLEGAL_ARGUMENT_ERROR;
1174 break;
1175 }
1176 }
1177 } else {
1178 /* Special handling for POSIX variant, need to remember that we had it and then */
1179 /* treat it like an extension later. */
1180 *hadPosix = TRUE;
1181 }
1182 } else if (strict) {
1183 *status = U_ILLEGAL_ARGUMENT_ERROR;
1184 break;
4388f060
A
1185 } else if (_isPrivateuseValueSubtag(pVar, -1)) {
1186 /* Handle private use subtags separately */
1187 break;
729e4ab9
A
1188 }
1189 }
1190 /* reset variant starting position */
1191 pVar = NULL;
1192 } else if (pVar == NULL) {
1193 pVar = p;
1194 }
1195 p++;
1196 }
1197
1198 if (U_SUCCESS(*status)) {
1199 if (varFirst != NULL) {
1200 int32_t varLen;
1201
4388f060 1202 /* write out validated/normalized variants to the target */
729e4ab9
A
1203 var = varFirst;
1204 while (var != NULL) {
3d1f044b 1205 sink.Append("-", 1);
729e4ab9 1206 varLen = (int32_t)uprv_strlen(var->variant);
3d1f044b 1207 sink.Append(var->variant, varLen);
729e4ab9
A
1208 var = var->next;
1209 }
1210 }
1211 }
1212
1213 /* clean up */
1214 var = varFirst;
1215 while (var != NULL) {
1216 VariantListEntry *tmpVar = var->next;
1217 uprv_free(var);
1218 var = tmpVar;
1219 }
1220
1221 if (U_FAILURE(*status)) {
3d1f044b 1222 return;
729e4ab9
A
1223 }
1224 }
729e4ab9
A
1225}
1226
3d1f044b
A
1227static void
1228_appendKeywordsToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UBool hadPosix, UErrorCode* status) {
4388f060
A
1229 char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
1230 int32_t attrBufLength = 0;
729e4ab9 1231
3d1f044b
A
1232 icu::MemoryPool<AttributeListEntry> attrPool;
1233 icu::MemoryPool<ExtensionListEntry> extPool;
1234 icu::MemoryPool<icu::CharString> strPool;
1235
1236 icu::LocalUEnumerationPointer keywordEnum(uloc_openKeywords(localeID, status));
729e4ab9 1237 if (U_FAILURE(*status) && !hadPosix) {
3d1f044b 1238 return;
729e4ab9 1239 }
3d1f044b 1240 if (keywordEnum.isValid() || hadPosix) {
729e4ab9
A
1241 /* reorder extensions */
1242 int32_t len;
1243 const char *key;
1244 ExtensionListEntry *firstExt = NULL;
1245 ExtensionListEntry *ext;
4388f060
A
1246 AttributeListEntry *firstAttr = NULL;
1247 AttributeListEntry *attr;
3d1f044b 1248 icu::MemoryPool<icu::CharString> extBufPool;
f3c0d7a5 1249 const char *bcpKey=nullptr, *bcpValue=nullptr;
729e4ab9
A
1250 UErrorCode tmpStatus = U_ZERO_ERROR;
1251 int32_t keylen;
b331163b 1252 UBool isBcpUExt;
729e4ab9
A
1253
1254 while (TRUE) {
3d1f044b
A
1255 icu::CharString buf;
1256 key = uenum_next(keywordEnum.getAlias(), NULL, status);
729e4ab9
A
1257 if (key == NULL) {
1258 break;
1259 }
3d1f044b
A
1260 char* buffer;
1261 int32_t resultCapacity = ULOC_KEYWORD_AND_VALUES_CAPACITY;
1262
1263 for (;;) {
1264 buffer = buf.getAppendBuffer(
1265 /*minCapacity=*/resultCapacity,
1266 /*desiredCapacityHint=*/resultCapacity,
1267 resultCapacity,
1268 tmpStatus);
1269
1270 if (U_FAILURE(tmpStatus)) {
1271 break;
1272 }
1273
1274 len = uloc_getKeywordValue(
1275 localeID, key, buffer, resultCapacity, &tmpStatus);
1276
1277 if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
1278 break;
1279 }
1280
1281 resultCapacity = len;
1282 tmpStatus = U_ZERO_ERROR;
1283 }
1284
1285 if (U_FAILURE(tmpStatus)) {
1286 if (tmpStatus == U_MEMORY_ALLOCATION_ERROR) {
1287 *status = U_MEMORY_ALLOCATION_ERROR;
1288 break;
1289 }
729e4ab9
A
1290 if (strict) {
1291 *status = U_ILLEGAL_ARGUMENT_ERROR;
1292 break;
1293 }
1294 /* ignore this keyword */
1295 tmpStatus = U_ZERO_ERROR;
1296 continue;
1297 }
1298
3d1f044b
A
1299 buf.append(buffer, len, tmpStatus);
1300 if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1301 tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString.
1302 }
1303
729e4ab9 1304 keylen = (int32_t)uprv_strlen(key);
b331163b 1305 isBcpUExt = (keylen > 1);
729e4ab9 1306
4388f060
A
1307 /* special keyword used for representing Unicode locale attributes */
1308 if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
4388f060
A
1309 if (len > 0) {
1310 int32_t i = 0;
1311 while (TRUE) {
1312 attrBufLength = 0;
1313 for (; i < len; i++) {
1314 if (buf[i] != '-') {
1315 attrBuf[attrBufLength++] = buf[i];
1316 } else {
1317 i++;
1318 break;
1319 }
1320 }
1321 if (attrBufLength > 0) {
1322 attrBuf[attrBufLength] = 0;
1323
1324 } else if (i >= len){
1325 break;
1326 }
1327
1328 /* create AttributeListEntry */
3d1f044b 1329 attr = attrPool.create();
4388f060
A
1330 if (attr == NULL) {
1331 *status = U_MEMORY_ALLOCATION_ERROR;
1332 break;
1333 }
3d1f044b
A
1334 icu::CharString* attrValue =
1335 strPool.create(attrBuf, attrBufLength, *status);
4388f060
A
1336 if (attrValue == NULL) {
1337 *status = U_MEMORY_ALLOCATION_ERROR;
1338 break;
1339 }
3d1f044b
A
1340 if (U_FAILURE(*status)) {
1341 break;
1342 }
1343 attr->attribute = attrValue->data();
4388f060
A
1344
1345 if (!_addAttributeToList(&firstAttr, attr)) {
4388f060
A
1346 if (strict) {
1347 *status = U_ILLEGAL_ARGUMENT_ERROR;
1348 break;
1349 }
1350 }
1351 }
f3c0d7a5
A
1352 /* for a place holder ExtensionListEntry */
1353 bcpKey = LOCALE_ATTRIBUTE_KEY;
1354 bcpValue = NULL;
4388f060 1355 }
b331163b
A
1356 } else if (isBcpUExt) {
1357 bcpKey = uloc_toUnicodeLocaleKey(key);
1358 if (bcpKey == NULL) {
729e4ab9
A
1359 if (strict) {
1360 *status = U_ILLEGAL_ARGUMENT_ERROR;
1361 break;
1362 }
729e4ab9
A
1363 continue;
1364 }
1365
b331163b 1366 /* we've checked buf is null-terminated above */
3d1f044b 1367 bcpValue = uloc_toUnicodeLocaleType(key, buf.data());
b331163b 1368 if (bcpValue == NULL) {
729e4ab9
A
1369 if (strict) {
1370 *status = U_ILLEGAL_ARGUMENT_ERROR;
1371 break;
1372 }
729e4ab9
A
1373 continue;
1374 }
3d1f044b
A
1375 if (bcpValue == buf.data()) {
1376 /*
b331163b
A
1377 When uloc_toUnicodeLocaleType(key, buf) returns the
1378 input value as is, the value is well-formed, but has
1379 no known mapping. This implementation normalizes the
3d1f044b 1380 value to lower case
b331163b 1381 */
3d1f044b
A
1382 icu::CharString* extBuf = extBufPool.create();
1383 if (extBuf == nullptr) {
1384 *status = U_MEMORY_ALLOCATION_ERROR;
1385 break;
1386 }
0f5d89e8 1387 int32_t bcpValueLen = static_cast<int32_t>(uprv_strlen(bcpValue));
3d1f044b
A
1388 int32_t resultCapacity;
1389 char* pExtBuf = extBuf->getAppendBuffer(
1390 /*minCapacity=*/bcpValueLen,
1391 /*desiredCapacityHint=*/bcpValueLen,
1392 resultCapacity,
1393 tmpStatus);
1394 if (U_FAILURE(tmpStatus)) {
1395 *status = tmpStatus;
1396 break;
1397 }
b331163b 1398
3d1f044b
A
1399 uprv_strcpy(pExtBuf, bcpValue);
1400 T_CString_toLowerCase(pExtBuf);
b331163b 1401
3d1f044b
A
1402 extBuf->append(pExtBuf, bcpValueLen, tmpStatus);
1403 if (U_FAILURE(tmpStatus)) {
1404 *status = tmpStatus;
1405 break;
b331163b 1406 }
3d1f044b
A
1407
1408 bcpValue = extBuf->data();
b331163b 1409 }
729e4ab9
A
1410 } else {
1411 if (*key == PRIVATEUSE) {
3d1f044b 1412 if (!ultag_isPrivateuseValueSubtags(buf.data(), len)) {
729e4ab9
A
1413 if (strict) {
1414 *status = U_ILLEGAL_ARGUMENT_ERROR;
1415 break;
1416 }
1417 continue;
1418 }
1419 } else {
3d1f044b 1420 if (!_isExtensionSingleton(key, keylen) || !ultag_isExtensionSubtags(buf.data(), len)) {
729e4ab9
A
1421 if (strict) {
1422 *status = U_ILLEGAL_ARGUMENT_ERROR;
1423 break;
1424 }
1425 continue;
1426 }
1427 }
1428 bcpKey = key;
3d1f044b
A
1429 icu::CharString* extBuf =
1430 extBufPool.create(buf.data(), len, tmpStatus);
1431 if (extBuf == nullptr) {
1432 *status = U_MEMORY_ALLOCATION_ERROR;
729e4ab9
A
1433 break;
1434 }
3d1f044b
A
1435 if (U_FAILURE(tmpStatus)) {
1436 *status = tmpStatus;
1437 break;
1438 }
1439 bcpValue = extBuf->data();
729e4ab9
A
1440 }
1441
f3c0d7a5 1442 /* create ExtensionListEntry */
3d1f044b 1443 ext = extPool.create();
f3c0d7a5
A
1444 if (ext == NULL) {
1445 *status = U_MEMORY_ALLOCATION_ERROR;
1446 break;
1447 }
1448 ext->key = bcpKey;
1449 ext->value = bcpValue;
4388f060 1450
f3c0d7a5 1451 if (!_addExtensionToList(&firstExt, ext, TRUE)) {
f3c0d7a5
A
1452 if (strict) {
1453 *status = U_ILLEGAL_ARGUMENT_ERROR;
1454 break;
4388f060 1455 }
729e4ab9
A
1456 }
1457 }
1458
1459 /* Special handling for POSIX variant - add the keywords for POSIX */
1460 if (hadPosix) {
1461 /* create ExtensionListEntry for POSIX */
3d1f044b 1462 ext = extPool.create();
729e4ab9
A
1463 if (ext == NULL) {
1464 *status = U_MEMORY_ALLOCATION_ERROR;
3d1f044b 1465 return;
729e4ab9
A
1466 }
1467 ext->key = POSIX_KEY;
1468 ext->value = POSIX_VALUE;
1469
1470 if (!_addExtensionToList(&firstExt, ext, TRUE)) {
3d1f044b 1471 // Silently ignore errors.
729e4ab9
A
1472 }
1473 }
1474
4388f060 1475 if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) {
729e4ab9 1476 UBool startLDMLExtension = FALSE;
f3c0d7a5
A
1477 for (ext = firstExt; ext; ext = ext->next) {
1478 if (!startLDMLExtension && uprv_strlen(ext->key) > 1) {
1479 /* first LDML u singlton extension */
3d1f044b 1480 sink.Append("-u", 2);
4388f060
A
1481 startLDMLExtension = TRUE;
1482 }
1483
1484 /* write out the sorted BCP47 attributes, extensions and private use */
f3c0d7a5
A
1485 if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
1486 /* write the value for the attributes */
1487 for (attr = firstAttr; attr; attr = attr->next) {
3d1f044b
A
1488 sink.Append("-", 1);
1489 sink.Append(
1490 attr->attribute, static_cast<int32_t>(uprv_strlen(attr->attribute)));
f3c0d7a5
A
1491 }
1492 } else {
3d1f044b
A
1493 sink.Append("-", 1);
1494 sink.Append(ext->key, static_cast<int32_t>(uprv_strlen(ext->key)));
1495 sink.Append("-", 1);
1496 sink.Append(ext->value, static_cast<int32_t>(uprv_strlen(ext->value)));
4388f060 1497 }
f3c0d7a5 1498 }
729e4ab9 1499 }
729e4ab9 1500 }
729e4ab9
A
1501}
1502
1503/**
1504 * Append keywords parsed from LDML extension value
1505 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
1506 * Note: char* buf is used for storing keywords
1507 */
1508static void
3d1f044b 1509_appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, icu::MemoryPool<ExtensionListEntry>& extPool, icu::MemoryPool<icu::CharString>& kwdBuf, UBool *posixVariant, UErrorCode *status) {
51004dcb
A
1510 const char *pTag; /* beginning of current subtag */
1511 const char *pKwds; /* beginning of key-type pairs */
1512 UBool variantExists = *posixVariant;
1513
1514 ExtensionListEntry *kwdFirst = NULL; /* first LDML keyword */
729e4ab9 1515 ExtensionListEntry *kwd, *nextKwd;
51004dcb 1516
51004dcb 1517 int32_t len;
4388f060
A
1518
1519 /* Reset the posixVariant value */
1520 *posixVariant = FALSE;
729e4ab9 1521
51004dcb
A
1522 pTag = ldmlext;
1523 pKwds = NULL;
729e4ab9 1524
3d1f044b
A
1525 {
1526 AttributeListEntry *attrFirst = NULL; /* first attribute */
1527 AttributeListEntry *attr, *nextAttr;
51004dcb 1528
3d1f044b
A
1529 char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1530 int32_t attrBufIdx = 0;
51004dcb 1531
3d1f044b 1532 icu::MemoryPool<AttributeListEntry> attrPool;
729e4ab9 1533
3d1f044b
A
1534 /* Iterate through u extension attributes */
1535 while (*pTag) {
1536 /* locate next separator char */
1537 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
729e4ab9 1538
3d1f044b
A
1539 if (ultag_isUnicodeLocaleKey(pTag, len)) {
1540 pKwds = pTag;
1541 break;
1542 }
729e4ab9 1543
3d1f044b
A
1544 /* add this attribute to the list */
1545 attr = attrPool.create();
1546 if (attr == NULL) {
1547 *status = U_MEMORY_ALLOCATION_ERROR;
1548 return;
1549 }
51004dcb 1550
3d1f044b
A
1551 if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) {
1552 uprv_memcpy(&attrBuf[attrBufIdx], pTag, len);
1553 attrBuf[attrBufIdx + len] = 0;
1554 attr->attribute = &attrBuf[attrBufIdx];
1555 attrBufIdx += (len + 1);
1556 } else {
1557 *status = U_ILLEGAL_ARGUMENT_ERROR;
1558 return;
1559 }
51004dcb 1560
3d1f044b
A
1561 if (!_addAttributeToList(&attrFirst, attr)) {
1562 *status = U_ILLEGAL_ARGUMENT_ERROR;
1563 return;
1564 }
51004dcb 1565
3d1f044b
A
1566 /* next tag */
1567 pTag += len;
1568 if (*pTag) {
1569 /* next to the separator */
1570 pTag++;
1571 }
51004dcb
A
1572 }
1573
3d1f044b
A
1574 if (attrFirst) {
1575 /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */
51004dcb 1576
3d1f044b
A
1577 kwd = extPool.create();
1578 if (kwd == NULL) {
1579 *status = U_MEMORY_ALLOCATION_ERROR;
1580 return;
729e4ab9 1581 }
51004dcb 1582
3d1f044b
A
1583 icu::CharString* value = kwdBuf.create();
1584 if (value == NULL) {
1585 *status = U_MEMORY_ALLOCATION_ERROR;
1586 return;
1587 }
729e4ab9 1588
3d1f044b
A
1589 /* attribute subtags sorted in alphabetical order as type */
1590 attr = attrFirst;
1591 while (attr != NULL) {
1592 nextAttr = attr->next;
1593 if (attr != attrFirst) {
1594 value->append('-', *status);
1595 }
1596 value->append(attr->attribute, *status);
1597 attr = nextAttr;
1598 }
1599 if (U_FAILURE(*status)) {
1600 return;
1601 }
51004dcb 1602
3d1f044b
A
1603 kwd->key = LOCALE_ATTRIBUTE_KEY;
1604 kwd->value = value->data();
51004dcb 1605
3d1f044b
A
1606 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1607 *status = U_ILLEGAL_ARGUMENT_ERROR;
1608 return;
1609 }
51004dcb 1610 }
51004dcb
A
1611 }
1612
1613 if (pKwds) {
1614 const char *pBcpKey = NULL; /* u extenstion key subtag */
1615 const char *pBcpType = NULL; /* beginning of u extension type subtag(s) */
1616 int32_t bcpKeyLen = 0;
1617 int32_t bcpTypeLen = 0;
1618 UBool isDone = FALSE;
1619
1620 pTag = pKwds;
1621 /* BCP47 representation of LDML key/type pairs */
1622 while (!isDone) {
1623 const char *pNextBcpKey = NULL;
b331163b 1624 int32_t nextBcpKeyLen = 0;
51004dcb
A
1625 UBool emitKeyword = FALSE;
1626
1627 if (*pTag) {
1628 /* locate next separator char */
1629 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1630
b331163b 1631 if (ultag_isUnicodeLocaleKey(pTag, len)) {
51004dcb
A
1632 if (pBcpKey) {
1633 emitKeyword = TRUE;
1634 pNextBcpKey = pTag;
1635 nextBcpKeyLen = len;
1636 } else {
1637 pBcpKey = pTag;
1638 bcpKeyLen = len;
1639 }
1640 } else {
1641 U_ASSERT(pBcpKey != NULL);
1642 /* within LDML type subtags */
1643 if (pBcpType) {
1644 bcpTypeLen += (len + 1);
1645 } else {
1646 pBcpType = pTag;
1647 bcpTypeLen = len;
1648 }
1649 }
729e4ab9 1650
51004dcb
A
1651 /* next tag */
1652 pTag += len;
1653 if (*pTag) {
1654 /* next to the separator */
1655 pTag++;
1656 }
729e4ab9 1657 } else {
51004dcb
A
1658 /* processing last one */
1659 emitKeyword = TRUE;
1660 isDone = TRUE;
1661 }
1662
1663 if (emitKeyword) {
1664 const char *pKey = NULL; /* LDML key */
1665 const char *pType = NULL; /* LDML type */
1666
b331163b
A
1667 char bcpKeyBuf[9]; /* BCP key length is always 2 for now */
1668
51004dcb
A
1669 U_ASSERT(pBcpKey != NULL);
1670
f3c0d7a5 1671 if (bcpKeyLen >= (int32_t)sizeof(bcpKeyBuf)) {
b331163b
A
1672 /* the BCP key is invalid */
1673 *status = U_ILLEGAL_ARGUMENT_ERROR;
3d1f044b 1674 return;
b331163b
A
1675 }
1676
1677 uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen);
1678 bcpKeyBuf[bcpKeyLen] = 0;
1679
51004dcb 1680 /* u extension key to LDML key */
b331163b
A
1681 pKey = uloc_toLegacyKey(bcpKeyBuf);
1682 if (pKey == NULL) {
1683 *status = U_ILLEGAL_ARGUMENT_ERROR;
3d1f044b 1684 return;
729e4ab9 1685 }
b331163b
A
1686 if (pKey == bcpKeyBuf) {
1687 /*
1688 The key returned by toLegacyKey points to the input buffer.
1689 We normalize the result key to lower case.
1690 */
1691 T_CString_toLowerCase(bcpKeyBuf);
3d1f044b
A
1692 icu::CharString* key = kwdBuf.create(bcpKeyBuf, bcpKeyLen, *status);
1693 if (key == NULL) {
1694 *status = U_MEMORY_ALLOCATION_ERROR;
1695 return;
b331163b 1696 }
3d1f044b
A
1697 if (U_FAILURE(*status)) {
1698 return;
1699 }
1700 pKey = key->data();
b331163b 1701 }
51004dcb
A
1702
1703 if (pBcpType) {
b331163b 1704 char bcpTypeBuf[128]; /* practically long enough even considering multiple subtag type */
f3c0d7a5 1705 if (bcpTypeLen >= (int32_t)sizeof(bcpTypeBuf)) {
b331163b
A
1706 /* the BCP type is too long */
1707 *status = U_ILLEGAL_ARGUMENT_ERROR;
3d1f044b 1708 return;
b331163b
A
1709 }
1710
1711 uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen);
1712 bcpTypeBuf[bcpTypeLen] = 0;
1713
51004dcb 1714 /* BCP type to locale type */
b331163b
A
1715 pType = uloc_toLegacyType(pKey, bcpTypeBuf);
1716 if (pType == NULL) {
1717 *status = U_ILLEGAL_ARGUMENT_ERROR;
3d1f044b 1718 return;
51004dcb 1719 }
b331163b
A
1720 if (pType == bcpTypeBuf) {
1721 /*
1722 The type returned by toLegacyType points to the input buffer.
1723 We normalize the result type to lower case.
1724 */
1725 /* normalize to lower case */
1726 T_CString_toLowerCase(bcpTypeBuf);
3d1f044b
A
1727 icu::CharString* type = kwdBuf.create(bcpTypeBuf, bcpTypeLen, *status);
1728 if (type == NULL) {
1729 *status = U_MEMORY_ALLOCATION_ERROR;
1730 return;
1731 }
1732 if (U_FAILURE(*status)) {
1733 return;
b331163b 1734 }
3d1f044b 1735 pType = type->data();
b331163b 1736 }
51004dcb
A
1737 } else {
1738 /* typeless - default type value is "yes" */
1739 pType = LOCALE_TYPE_YES;
1740 }
729e4ab9 1741
51004dcb
A
1742 /* Special handling for u-va-posix, since we want to treat this as a variant,
1743 not as a keyword */
1744 if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) {
1745 *posixVariant = TRUE;
1746 } else {
1747 /* create an ExtensionListEntry for this keyword */
3d1f044b 1748 kwd = extPool.create();
51004dcb
A
1749 if (kwd == NULL) {
1750 *status = U_MEMORY_ALLOCATION_ERROR;
3d1f044b 1751 return;
51004dcb 1752 }
729e4ab9 1753
51004dcb
A
1754 kwd->key = pKey;
1755 kwd->value = pType;
1756
1757 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
3d1f044b
A
1758 // duplicate keyword is allowed, Only the first
1759 // is honored.
51004dcb 1760 }
729e4ab9 1761 }
729e4ab9 1762
51004dcb
A
1763 pBcpKey = pNextBcpKey;
1764 bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0;
1765 pBcpType = NULL;
1766 bcpTypeLen = 0;
1767 }
729e4ab9
A
1768 }
1769 }
1770
729e4ab9
A
1771 kwd = kwdFirst;
1772 while (kwd != NULL) {
1773 nextKwd = kwd->next;
1774 _addExtensionToList(appendTo, kwd, FALSE);
1775 kwd = nextKwd;
1776 }
729e4ab9
A
1777}
1778
1779
3d1f044b
A
1780static void
1781_appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode* status) {
729e4ab9
A
1782 int32_t i, n;
1783 int32_t len;
1784 ExtensionListEntry *kwdFirst = NULL;
1785 ExtensionListEntry *kwd;
1786 const char *key, *type;
3d1f044b
A
1787 icu::MemoryPool<ExtensionListEntry> extPool;
1788 icu::MemoryPool<icu::CharString> kwdBuf;
729e4ab9
A
1789 UBool posixVariant = FALSE;
1790
1791 if (U_FAILURE(*status)) {
3d1f044b 1792 return;
4388f060
A
1793 }
1794
1795 /* Determine if variants already exists */
1796 if (ultag_getVariantsSize(langtag)) {
1797 posixVariant = TRUE;
1798 }
1799
729e4ab9
A
1800 n = ultag_getExtensionsSize(langtag);
1801
1802 /* resolve locale keywords and reordering keys */
1803 for (i = 0; i < n; i++) {
1804 key = ultag_getExtensionKey(langtag, i);
1805 type = ultag_getExtensionValue(langtag, i);
1806 if (*key == LDMLEXT) {
3d1f044b 1807 _appendLDMLExtensionAsKeywords(type, &kwdFirst, extPool, kwdBuf, &posixVariant, status);
729e4ab9
A
1808 if (U_FAILURE(*status)) {
1809 break;
1810 }
1811 } else {
3d1f044b 1812 kwd = extPool.create();
729e4ab9
A
1813 if (kwd == NULL) {
1814 *status = U_MEMORY_ALLOCATION_ERROR;
1815 break;
1816 }
1817 kwd->key = key;
1818 kwd->value = type;
1819 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
729e4ab9
A
1820 *status = U_ILLEGAL_ARGUMENT_ERROR;
1821 break;
1822 }
1823 }
1824 }
1825
1826 if (U_SUCCESS(*status)) {
1827 type = ultag_getPrivateUse(langtag);
1828 if ((int32_t)uprv_strlen(type) > 0) {
1829 /* add private use as a keyword */
3d1f044b 1830 kwd = extPool.create();
729e4ab9
A
1831 if (kwd == NULL) {
1832 *status = U_MEMORY_ALLOCATION_ERROR;
1833 } else {
1834 kwd->key = PRIVATEUSE_KEY;
1835 kwd->value = type;
1836 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
729e4ab9
A
1837 *status = U_ILLEGAL_ARGUMENT_ERROR;
1838 }
1839 }
1840 }
1841 }
1842
1843 /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
1844
1845 if (U_SUCCESS(*status) && posixVariant) {
1846 len = (int32_t) uprv_strlen(_POSIX);
3d1f044b 1847 sink.Append(_POSIX, len);
729e4ab9
A
1848 }
1849
51004dcb 1850 if (U_SUCCESS(*status) && kwdFirst != NULL) {
729e4ab9 1851 /* write out the sorted keywords */
4388f060 1852 UBool firstValue = TRUE;
729e4ab9 1853 kwd = kwdFirst;
4388f060 1854 do {
3d1f044b
A
1855 if (firstValue) {
1856 sink.Append("@", 1);
1857 firstValue = FALSE;
1858 } else {
1859 sink.Append(";", 1);
729e4ab9 1860 }
729e4ab9 1861
51004dcb
A
1862 /* key */
1863 len = (int32_t)uprv_strlen(kwd->key);
3d1f044b
A
1864 sink.Append(kwd->key, len);
1865 sink.Append("=", 1);
4388f060 1866
51004dcb
A
1867 /* type */
1868 len = (int32_t)uprv_strlen(kwd->value);
3d1f044b 1869 sink.Append(kwd->value, len);
51004dcb
A
1870
1871 kwd = kwd->next;
1872 } while (kwd);
729e4ab9 1873 }
729e4ab9
A
1874}
1875
3d1f044b
A
1876static void
1877_appendPrivateuseToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UBool hadPosix, UErrorCode* status) {
f3c0d7a5 1878 (void)hadPosix;
4388f060
A
1879 char buf[ULOC_FULLNAME_CAPACITY];
1880 char tmpAppend[ULOC_FULLNAME_CAPACITY];
1881 UErrorCode tmpStatus = U_ZERO_ERROR;
1882 int32_t len, i;
1883 int32_t reslen = 0;
3d1f044b 1884 int32_t capacity = sizeof tmpAppend;
4388f060
A
1885
1886 if (U_FAILURE(*status)) {
3d1f044b 1887 return;
4388f060
A
1888 }
1889
1890 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
1891 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1892 if (strict) {
1893 *status = U_ILLEGAL_ARGUMENT_ERROR;
1894 }
3d1f044b 1895 return;
4388f060
A
1896 }
1897
1898 if (len > 0) {
1899 char *p, *pPriv;
1900 UBool bNext = TRUE;
1901 UBool firstValue = TRUE;
1902 UBool writeValue;
1903
1904 pPriv = NULL;
1905 p = buf;
1906 while (bNext) {
1907 writeValue = FALSE;
1908 if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
1909 if (*p == 0) {
1910 bNext = FALSE;
1911 } else {
1912 *p = 0; /* terminate */
1913 }
1914 if (pPriv != NULL) {
1915 /* Private use in the canonical format is lowercase in BCP47 */
1916 for (i = 0; *(pPriv + i) != 0; i++) {
1917 *(pPriv + i) = uprv_tolower(*(pPriv + i));
1918 }
1919
1920 /* validate */
1921 if (_isPrivateuseValueSubtag(pPriv, -1)) {
1922 if (firstValue) {
1923 if (!_isVariantSubtag(pPriv, -1)) {
1924 writeValue = TRUE;
1925 }
1926 } else {
1927 writeValue = TRUE;
1928 }
1929 } else if (strict) {
1930 *status = U_ILLEGAL_ARGUMENT_ERROR;
1931 break;
1932 } else {
1933 break;
1934 }
1935
1936 if (writeValue) {
1937 if (reslen < capacity) {
1938 tmpAppend[reslen++] = SEP;
1939 }
1940
1941 if (firstValue) {
1942 if (reslen < capacity) {
1943 tmpAppend[reslen++] = *PRIVATEUSE_KEY;
1944 }
1945
1946 if (reslen < capacity) {
1947 tmpAppend[reslen++] = SEP;
1948 }
1949
1950 len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX);
1951 if (reslen < capacity) {
1952 uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen));
1953 }
1954 reslen += len;
1955
1956 if (reslen < capacity) {
1957 tmpAppend[reslen++] = SEP;
1958 }
1959
1960 firstValue = FALSE;
1961 }
1962
1963 len = (int32_t)uprv_strlen(pPriv);
1964 if (reslen < capacity) {
1965 uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen));
1966 }
1967 reslen += len;
1968 }
1969 }
1970 /* reset private use starting position */
1971 pPriv = NULL;
1972 } else if (pPriv == NULL) {
1973 pPriv = p;
1974 }
1975 p++;
1976 }
1977
1978 if (U_FAILURE(*status)) {
3d1f044b 1979 return;
4388f060
A
1980 }
1981 }
1982
1983 if (U_SUCCESS(*status)) {
1984 len = reslen;
3d1f044b 1985 sink.Append(tmpAppend, len);
4388f060 1986 }
4388f060
A
1987}
1988
729e4ab9
A
1989/*
1990* -------------------------------------------------
1991*
1992* ultag_ functions
1993*
1994* -------------------------------------------------
1995*/
1996
1997/* Bit flags used by the parser */
1998#define LANG 0x0001
1999#define EXTL 0x0002
2000#define SCRT 0x0004
2001#define REGN 0x0008
2002#define VART 0x0010
2003#define EXTS 0x0020
2004#define EXTV 0x0040
2005#define PRIV 0x0080
2006
f3c0d7a5
A
2007/**
2008 * Ticket #12705 - Visual Studio 2015 Update 3 contains a new code optimizer which has problems optimizing
2009 * this function. (See https://blogs.msdn.microsoft.com/vcblog/2016/05/04/new-code-optimizer/ )
2010 * As a workaround, we will turn off optimization just for this function on VS2015 Update 3 and above.
2011 */
2012#if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210))
2013#pragma optimize( "", off )
2014#endif
2015
729e4ab9
A
2016static ULanguageTag*
2017ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
729e4ab9
A
2018 char *tagBuf;
2019 int16_t next;
2020 char *pSubtag, *pNext, *pLastGoodPosition;
2021 int32_t subtagLen;
2022 int32_t extlangIdx;
2023 ExtensionListEntry *pExtension;
2024 char *pExtValueSubtag, *pExtValueSubtagEnd;
2025 int32_t i;
51004dcb
A
2026 UBool privateuseVar = FALSE;
2027 int32_t grandfatheredLen = 0;
729e4ab9
A
2028
2029 if (parsedLen != NULL) {
2030 *parsedLen = 0;
2031 }
2032
2033 if (U_FAILURE(*status)) {
2034 return NULL;
2035 }
2036
2037 if (tagLen < 0) {
2038 tagLen = (int32_t)uprv_strlen(tag);
2039 }
2040
2041 /* copy the entire string */
2042 tagBuf = (char*)uprv_malloc(tagLen + 1);
2043 if (tagBuf == NULL) {
2044 *status = U_MEMORY_ALLOCATION_ERROR;
2045 return NULL;
2046 }
2047 uprv_memcpy(tagBuf, tag, tagLen);
2048 *(tagBuf + tagLen) = 0;
2049
2050 /* create a ULanguageTag */
3d1f044b
A
2051 icu::LocalULanguageTagPointer t(
2052 (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag)));
2053 if (t.isNull()) {
729e4ab9
A
2054 uprv_free(tagBuf);
2055 *status = U_MEMORY_ALLOCATION_ERROR;
2056 return NULL;
2057 }
3d1f044b 2058 _initializeULanguageTag(t.getAlias());
4388f060 2059 t->buf = tagBuf;
729e4ab9
A
2060
2061 if (tagLen < MINLEN) {
2062 /* the input tag is too short - return empty ULanguageTag */
3d1f044b 2063 return t.orphan();
729e4ab9
A
2064 }
2065
3d1f044b
A
2066 size_t parsedLenDelta = 0;
2067 // Grandfathered tag will be consider together. Grandfathered tag with intervening
2068 // script and region such as art-DE-lojban or art-Latn-lojban won't be
2069 // matched.
729e4ab9 2070 /* check if the tag is grandfathered */
3d1f044b
A
2071 for (i = 0; i < UPRV_LENGTHOF(GRANDFATHERED); i += 2) {
2072 int32_t checkGrandfatheredLen = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i]));
2073 if (tagLen < checkGrandfatheredLen) {
2074 continue;
2075 }
2076 if (tagLen > checkGrandfatheredLen && tagBuf[checkGrandfatheredLen] != '-') {
2077 // make sure next char is '-'.
2078 continue;
2079 }
2080 if (uprv_strnicmp(GRANDFATHERED[i], tagBuf, checkGrandfatheredLen) == 0) {
51004dcb
A
2081 int32_t newTagLength;
2082
3d1f044b
A
2083 grandfatheredLen = checkGrandfatheredLen; /* back up for output parsedLen */
2084 int32_t replacementLen = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i+1]));
2085 newTagLength = replacementLen + tagLen - checkGrandfatheredLen;
4388f060
A
2086 if (tagLen < newTagLength) {
2087 uprv_free(tagBuf);
2088 tagBuf = (char*)uprv_malloc(newTagLength + 1);
2089 if (tagBuf == NULL) {
2090 *status = U_MEMORY_ALLOCATION_ERROR;
2091 return NULL;
2092 }
2093 t->buf = tagBuf;
2094 tagLen = newTagLength;
729e4ab9 2095 }
3d1f044b 2096 parsedLenDelta = checkGrandfatheredLen - replacementLen;
4388f060 2097 uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
3d1f044b
A
2098 if (checkGrandfatheredLen != tagLen) {
2099 uprv_strcpy(t->buf + replacementLen, tag + checkGrandfatheredLen);
2100 }
4388f060 2101 break;
729e4ab9
A
2102 }
2103 }
2104
3d1f044b
A
2105 if (grandfatheredLen == 0) {
2106 for (i = 0; i < UPRV_LENGTHOF(REDUNDANT); i += 2) {
2107 const char* redundantTag = REDUNDANT[i];
2108 size_t redundantTagLen = uprv_strlen(redundantTag);
2109 // The preferred tag for a redundant tag is always shorter than redundant
2110 // tag. A redundant tag may or may not be followed by other subtags.
2111 // (i.e. "zh-yue" or "zh-yue-u-co-pinyin").
2112 if (uprv_strnicmp(redundantTag, tagBuf, static_cast<uint32_t>(redundantTagLen)) == 0) {
2113 const char* redundantTagEnd = tagBuf + redundantTagLen;
2114 if (*redundantTagEnd == '\0' || *redundantTagEnd == SEP) {
2115 const char* preferredTag = REDUNDANT[i + 1];
2116 size_t preferredTagLen = uprv_strlen(preferredTag);
2117 uprv_strncpy(t->buf, preferredTag, preferredTagLen);
2118 if (*redundantTagEnd == SEP) {
2119 uprv_memmove(tagBuf + preferredTagLen,
2120 redundantTagEnd,
2121 tagLen - redundantTagLen + 1);
2122 } else {
2123 tagBuf[preferredTagLen] = '\0';
2124 }
2125 // parsedLen should be the length of the input
2126 // before redundantTag is replaced by preferredTag.
2127 // Save the delta to add it back later.
2128 parsedLenDelta = redundantTagLen - preferredTagLen;
2129 break;
2130 }
2131 }
2132 }
2133 }
2134
729e4ab9
A
2135 /*
2136 * langtag = language
2137 * ["-" script]
2138 * ["-" region]
2139 * *("-" variant)
2140 * *("-" extension)
2141 * ["-" privateuse]
2142 */
2143
2144 next = LANG | PRIV;
2145 pNext = pLastGoodPosition = tagBuf;
2146 extlangIdx = 0;
2147 pExtension = NULL;
2148 pExtValueSubtag = NULL;
2149 pExtValueSubtagEnd = NULL;
729e4ab9
A
2150
2151 while (pNext) {
2152 char *pSep;
2153
2154 pSubtag = pNext;
2155
2156 /* locate next separator char */
2157 pSep = pSubtag;
2158 while (*pSep) {
2159 if (*pSep == SEP) {
2160 break;
2161 }
2162 pSep++;
2163 }
2164 if (*pSep == 0) {
2165 /* last subtag */
2166 pNext = NULL;
2167 } else {
2168 pNext = pSep + 1;
2169 }
2170 subtagLen = (int32_t)(pSep - pSubtag);
2171
2172 if (next & LANG) {
3d1f044b 2173 if (ultag_isLanguageSubtag(pSubtag, subtagLen)) {
729e4ab9 2174 *pSep = 0; /* terminate */
3d1f044b 2175 // TODO: move deprecated language code handling here.
729e4ab9
A
2176 t->language = T_CString_toLowerCase(pSubtag);
2177
2178 pLastGoodPosition = pSep;
3d1f044b
A
2179 next = SCRT | REGN | VART | EXTS | PRIV;
2180 if (subtagLen <= 3)
2181 next |= EXTL;
729e4ab9
A
2182 continue;
2183 }
2184 }
2185 if (next & EXTL) {
2186 if (_isExtlangSubtag(pSubtag, subtagLen)) {
2187 *pSep = 0;
2188 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
2189
2190 pLastGoodPosition = pSep;
2191 if (extlangIdx < 3) {
2192 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
2193 } else {
2194 next = SCRT | REGN | VART | EXTS | PRIV;
2195 }
2196 continue;
2197 }
2198 }
2199 if (next & SCRT) {
3d1f044b 2200 if (ultag_isScriptSubtag(pSubtag, subtagLen)) {
729e4ab9
A
2201 char *p = pSubtag;
2202
2203 *pSep = 0;
2204
2205 /* to title case */
2206 *p = uprv_toupper(*p);
2207 p++;
2208 for (; *p; p++) {
2209 *p = uprv_tolower(*p);
2210 }
2211
2212 t->script = pSubtag;
2213
2214 pLastGoodPosition = pSep;
2215 next = REGN | VART | EXTS | PRIV;
2216 continue;
2217 }
2218 }
2219 if (next & REGN) {
3d1f044b 2220 if (ultag_isRegionSubtag(pSubtag, subtagLen)) {
729e4ab9 2221 *pSep = 0;
3d1f044b 2222 // TODO: move deprecated region code handling here.
729e4ab9
A
2223 t->region = T_CString_toUpperCase(pSubtag);
2224
2225 pLastGoodPosition = pSep;
2226 next = VART | EXTS | PRIV;
2227 continue;
2228 }
2229 }
2230 if (next & VART) {
4388f060
A
2231 if (_isVariantSubtag(pSubtag, subtagLen) ||
2232 (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) {
729e4ab9
A
2233 VariantListEntry *var;
2234 UBool isAdded;
2235
2236 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
2237 if (var == NULL) {
2238 *status = U_MEMORY_ALLOCATION_ERROR;
3d1f044b 2239 return NULL;
729e4ab9
A
2240 }
2241 *pSep = 0;
2242 var->variant = T_CString_toUpperCase(pSubtag);
2243 isAdded = _addVariantToList(&(t->variants), var);
2244 if (!isAdded) {
2245 /* duplicated variant entry */
2246 uprv_free(var);
2247 break;
2248 }
2249 pLastGoodPosition = pSep;
2250 next = VART | EXTS | PRIV;
2251 continue;
2252 }
2253 }
2254 if (next & EXTS) {
2255 if (_isExtensionSingleton(pSubtag, subtagLen)) {
2256 if (pExtension != NULL) {
2257 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2258 /* the previous extension is incomplete */
2259 uprv_free(pExtension);
2260 pExtension = NULL;
2261 break;
2262 }
2263
2264 /* terminate the previous extension value */
2265 *pExtValueSubtagEnd = 0;
2266 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2267
2268 /* insert the extension to the list */
2269 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2270 pLastGoodPosition = pExtValueSubtagEnd;
2271 } else {
2272 /* stop parsing here */
2273 uprv_free(pExtension);
2274 pExtension = NULL;
2275 break;
2276 }
729e4ab9
A
2277 }
2278
729e4ab9 2279 /* create a new extension */
51004dcb 2280 pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
729e4ab9
A
2281 if (pExtension == NULL) {
2282 *status = U_MEMORY_ALLOCATION_ERROR;
3d1f044b 2283 return NULL;
729e4ab9
A
2284 }
2285 *pSep = 0;
2286 pExtension->key = T_CString_toLowerCase(pSubtag);
2287 pExtension->value = NULL; /* will be set later */
2288
2289 /*
2290 * reset the start and the end location of extension value
2291 * subtags for this extension
2292 */
2293 pExtValueSubtag = NULL;
2294 pExtValueSubtagEnd = NULL;
2295
2296 next = EXTV;
2297 continue;
2298 }
2299 }
2300 if (next & EXTV) {
2301 if (_isExtensionSubtag(pSubtag, subtagLen)) {
51004dcb
A
2302 if (pExtValueSubtag == NULL) {
2303 /* if the start postion of this extension's value is not yet,
2304 this one is the first value subtag */
2305 pExtValueSubtag = pSubtag;
729e4ab9
A
2306 }
2307
51004dcb
A
2308 /* Mark the end of this subtag */
2309 pExtValueSubtagEnd = pSep;
2310 next = EXTS | EXTV | PRIV;
4388f060 2311
51004dcb 2312 continue;
729e4ab9
A
2313 }
2314 }
2315 if (next & PRIV) {
3d1f044b 2316 if (uprv_tolower(*pSubtag) == PRIVATEUSE && subtagLen == 1) {
729e4ab9
A
2317 char *pPrivuseVal;
2318
2319 if (pExtension != NULL) {
2320 /* Process the last extension */
2321 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2322 /* the previous extension is incomplete */
2323 uprv_free(pExtension);
2324 pExtension = NULL;
2325 break;
2326 } else {
2327 /* terminate the previous extension value */
2328 *pExtValueSubtagEnd = 0;
2329 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2330
2331 /* insert the extension to the list */
2332 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2333 pLastGoodPosition = pExtValueSubtagEnd;
2334 pExtension = NULL;
2335 } else {
2336 /* stop parsing here */
2337 uprv_free(pExtension);
2338 pExtension = NULL;
2339 break;
2340 }
2341 }
2342 }
2343
2344 /* The rest of part will be private use value subtags */
2345 if (pNext == NULL) {
2346 /* empty private use subtag */
2347 break;
2348 }
2349 /* back up the private use value start position */
2350 pPrivuseVal = pNext;
2351
2352 /* validate private use value subtags */
2353 while (pNext) {
2354 pSubtag = pNext;
2355 pSep = pSubtag;
2356 while (*pSep) {
2357 if (*pSep == SEP) {
2358 break;
2359 }
2360 pSep++;
2361 }
2362 if (*pSep == 0) {
2363 /* last subtag */
2364 pNext = NULL;
2365 } else {
2366 pNext = pSep + 1;
2367 }
2368 subtagLen = (int32_t)(pSep - pSubtag);
2369
4388f060
A
2370 if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) {
2371 *pSep = 0;
2372 next = VART;
2373 privateuseVar = TRUE;
2374 break;
2375 } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
729e4ab9
A
2376 pLastGoodPosition = pSep;
2377 } else {
2378 break;
2379 }
2380 }
4388f060
A
2381
2382 if (next == VART) {
2383 continue;
2384 }
2385
729e4ab9
A
2386 if (pLastGoodPosition - pPrivuseVal > 0) {
2387 *pLastGoodPosition = 0;
2388 t->privateuse = T_CString_toLowerCase(pPrivuseVal);
2389 }
2390 /* No more subtags, exiting the parse loop */
2391 break;
2392 }
2393 break;
2394 }
4388f060 2395
729e4ab9
A
2396 /* If we fell through here, it means this subtag is illegal - quit parsing */
2397 break;
2398 }
2399
2400 if (pExtension != NULL) {
2401 /* Process the last extension */
2402 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2403 /* the previous extension is incomplete */
2404 uprv_free(pExtension);
2405 } else {
2406 /* terminate the previous extension value */
2407 *pExtValueSubtagEnd = 0;
2408 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2409 /* insert the extension to the list */
2410 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2411 pLastGoodPosition = pExtValueSubtagEnd;
2412 } else {
2413 uprv_free(pExtension);
2414 }
2415 }
2416 }
2417
2418 if (parsedLen != NULL) {
3d1f044b 2419 *parsedLen = (int32_t)(pLastGoodPosition - t->buf + parsedLenDelta);
729e4ab9
A
2420 }
2421
3d1f044b 2422 return t.orphan();
729e4ab9
A
2423}
2424
f3c0d7a5
A
2425/**
2426* Ticket #12705 - Turn optimization back on.
2427*/
2428#if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210))
2429#pragma optimize( "", on )
2430#endif
2431
729e4ab9
A
2432static void
2433ultag_close(ULanguageTag* langtag) {
2434
2435 if (langtag == NULL) {
2436 return;
2437 }
2438
2439 uprv_free(langtag->buf);
2440
2441 if (langtag->variants) {
2442 VariantListEntry *curVar = langtag->variants;
2443 while (curVar) {
2444 VariantListEntry *nextVar = curVar->next;
2445 uprv_free(curVar);
2446 curVar = nextVar;
2447 }
2448 }
2449
2450 if (langtag->extensions) {
2451 ExtensionListEntry *curExt = langtag->extensions;
2452 while (curExt) {
2453 ExtensionListEntry *nextExt = curExt->next;
2454 uprv_free(curExt);
2455 curExt = nextExt;
2456 }
2457 }
2458
2459 uprv_free(langtag);
2460}
2461
2462static const char*
2463ultag_getLanguage(const ULanguageTag* langtag) {
2464 return langtag->language;
2465}
2466
2467#if 0
2468static const char*
2469ultag_getJDKLanguage(const ULanguageTag* langtag) {
2470 int32_t i;
2471 for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
2472 if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
2473 return DEPRECATEDLANGS[i + 1];
2474 }
2475 }
2476 return langtag->language;
2477}
2478#endif
2479
2480static const char*
2481ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
2482 if (idx >= 0 && idx < MAXEXTLANG) {
2483 return langtag->extlang[idx];
2484 }
2485 return NULL;
2486}
2487
2488static int32_t
2489ultag_getExtlangSize(const ULanguageTag* langtag) {
2490 int32_t size = 0;
2491 int32_t i;
2492 for (i = 0; i < MAXEXTLANG; i++) {
2493 if (langtag->extlang[i]) {
2494 size++;
2495 }
2496 }
2497 return size;
2498}
2499
2500static const char*
2501ultag_getScript(const ULanguageTag* langtag) {
2502 return langtag->script;
2503}
2504
2505static const char*
2506ultag_getRegion(const ULanguageTag* langtag) {
2507 return langtag->region;
2508}
2509
2510static const char*
2511ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
2512 const char *var = NULL;
2513 VariantListEntry *cur = langtag->variants;
2514 int32_t i = 0;
2515 while (cur) {
2516 if (i == idx) {
2517 var = cur->variant;
2518 break;
2519 }
2520 cur = cur->next;
2521 i++;
2522 }
2523 return var;
2524}
2525
2526static int32_t
2527ultag_getVariantsSize(const ULanguageTag* langtag) {
2528 int32_t size = 0;
2529 VariantListEntry *cur = langtag->variants;
2530 while (TRUE) {
2531 if (cur == NULL) {
2532 break;
2533 }
2534 size++;
2535 cur = cur->next;
2536 }
2537 return size;
2538}
2539
2540static const char*
2541ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
2542 const char *key = NULL;
2543 ExtensionListEntry *cur = langtag->extensions;
2544 int32_t i = 0;
2545 while (cur) {
2546 if (i == idx) {
2547 key = cur->key;
2548 break;
2549 }
2550 cur = cur->next;
2551 i++;
2552 }
2553 return key;
2554}
2555
2556static const char*
2557ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
2558 const char *val = NULL;
2559 ExtensionListEntry *cur = langtag->extensions;
2560 int32_t i = 0;
2561 while (cur) {
2562 if (i == idx) {
2563 val = cur->value;
2564 break;
2565 }
2566 cur = cur->next;
2567 i++;
2568 }
2569 return val;
2570}
2571
2572static int32_t
2573ultag_getExtensionsSize(const ULanguageTag* langtag) {
2574 int32_t size = 0;
2575 ExtensionListEntry *cur = langtag->extensions;
2576 while (TRUE) {
2577 if (cur == NULL) {
2578 break;
2579 }
2580 size++;
2581 cur = cur->next;
2582 }
2583 return size;
2584}
2585
2586static const char*
2587ultag_getPrivateUse(const ULanguageTag* langtag) {
2588 return langtag->privateuse;
2589}
2590
2591#if 0
2592static const char*
2593ultag_getGrandfathered(const ULanguageTag* langtag) {
2594 return langtag->grandfathered;
2595}
2596#endif
2597
2598
2599/*
2600* -------------------------------------------------
2601*
2602* Locale/BCP47 conversion APIs, exposed as uloc_*
2603*
2604* -------------------------------------------------
2605*/
51004dcb 2606U_CAPI int32_t U_EXPORT2
729e4ab9
A
2607uloc_toLanguageTag(const char* localeID,
2608 char* langtag,
2609 int32_t langtagCapacity,
2610 UBool strict,
2611 UErrorCode* status) {
3d1f044b
A
2612 if (U_FAILURE(*status)) {
2613 return 0;
2614 }
2615
2616 icu::CheckedArrayByteSink sink(langtag, langtagCapacity);
2617 ulocimp_toLanguageTag(localeID, sink, strict, status);
2618
2619 int32_t reslen = sink.NumberOfBytesAppended();
2620
2621 if (U_FAILURE(*status)) {
2622 return reslen;
2623 }
2624
2625 if (sink.Overflowed()) {
2626 *status = U_BUFFER_OVERFLOW_ERROR;
2627 } else {
2628 u_terminateChars(langtag, langtagCapacity, reslen, status);
2629 }
2630
2631 return reslen;
2632}
2633
2634
2635U_CAPI void U_EXPORT2
2636ulocimp_toLanguageTag(const char* localeID,
2637 icu::ByteSink& sink,
2638 UBool strict,
2639 UErrorCode* status) {
2640 icu::CharString canonical;
2641 int32_t reslen;
729e4ab9
A
2642 UErrorCode tmpStatus = U_ZERO_ERROR;
2643 UBool hadPosix = FALSE;
2644 const char* pKeywordStart;
2645
2646 /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
3d1f044b
A
2647 int32_t resultCapacity = static_cast<int32_t>(uprv_strlen(localeID));
2648 if (resultCapacity > 0) {
2649 char* buffer;
2650
2651 for (;;) {
2652 buffer = canonical.getAppendBuffer(
2653 /*minCapacity=*/resultCapacity,
2654 /*desiredCapacityHint=*/resultCapacity,
2655 resultCapacity,
2656 tmpStatus);
2657
2658 if (U_FAILURE(tmpStatus)) {
2659 *status = tmpStatus;
2660 return;
2661 }
2662
2663 reslen =
2664 uloc_canonicalize(localeID, buffer, resultCapacity, &tmpStatus);
2665
2666 if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
2667 break;
2668 }
2669
2670 resultCapacity = reslen;
2671 tmpStatus = U_ZERO_ERROR;
2672 }
2673
2674 if (U_FAILURE(tmpStatus)) {
729e4ab9 2675 *status = U_ILLEGAL_ARGUMENT_ERROR;
3d1f044b
A
2676 return;
2677 }
2678
2679 canonical.append(buffer, reslen, tmpStatus);
2680 if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
2681 tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString.
2682 }
2683
2684 if (U_FAILURE(tmpStatus)) {
2685 *status = tmpStatus;
2686 return;
729e4ab9
A
2687 }
2688 }
2689
2690 /* For handling special case - private use only tag */
3d1f044b
A
2691 pKeywordStart = locale_getKeywordsStart(canonical.data());
2692 if (pKeywordStart == canonical.data()) {
729e4ab9
A
2693 int kwdCnt = 0;
2694 UBool done = FALSE;
2695
3d1f044b
A
2696 icu::LocalUEnumerationPointer kwdEnum(uloc_openKeywords(canonical.data(), &tmpStatus));
2697 if (U_SUCCESS(tmpStatus)) {
2698 kwdCnt = uenum_count(kwdEnum.getAlias(), &tmpStatus);
729e4ab9
A
2699 if (kwdCnt == 1) {
2700 const char *key;
2701 int32_t len = 0;
2702
3d1f044b 2703 key = uenum_next(kwdEnum.getAlias(), &len, &tmpStatus);
729e4ab9
A
2704 if (len == 1 && *key == PRIVATEUSE) {
2705 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
2706 buf[0] = PRIVATEUSE;
2707 buf[1] = SEP;
2708 len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus);
2709 if (U_SUCCESS(tmpStatus)) {
3d1f044b 2710 if (ultag_isPrivateuseValueSubtags(&buf[2], len)) {
729e4ab9 2711 /* return private use only tag */
3d1f044b 2712 sink.Append(buf, len + 2);
729e4ab9
A
2713 done = TRUE;
2714 } else if (strict) {
2715 *status = U_ILLEGAL_ARGUMENT_ERROR;
2716 done = TRUE;
2717 }
2718 /* if not strict mode, then "und" will be returned */
2719 } else {
2720 *status = U_ILLEGAL_ARGUMENT_ERROR;
2721 done = TRUE;
2722 }
2723 }
2724 }
729e4ab9 2725 if (done) {
3d1f044b 2726 return;
729e4ab9
A
2727 }
2728 }
2729 }
2730
3d1f044b
A
2731 _appendLanguageToLanguageTag(canonical.data(), sink, strict, status);
2732 _appendScriptToLanguageTag(canonical.data(), sink, strict, status);
2733 _appendRegionToLanguageTag(canonical.data(), sink, strict, status);
2734 _appendVariantsToLanguageTag(canonical.data(), sink, strict, &hadPosix, status);
2735 _appendKeywordsToLanguageTag(canonical.data(), sink, strict, hadPosix, status);
2736 _appendPrivateuseToLanguageTag(canonical.data(), sink, strict, hadPosix, status);
729e4ab9
A
2737}
2738
2739
51004dcb 2740U_CAPI int32_t U_EXPORT2
729e4ab9
A
2741uloc_forLanguageTag(const char* langtag,
2742 char* localeID,
2743 int32_t localeIDCapacity,
2744 int32_t* parsedLength,
2745 UErrorCode* status) {
3d1f044b
A
2746 if (U_FAILURE(*status)) {
2747 return 0;
2748 }
2749
2750 icu::CheckedArrayByteSink sink(localeID, localeIDCapacity);
2751 ulocimp_forLanguageTag(langtag, -1, sink, parsedLength, status);
2752
2753 int32_t reslen = sink.NumberOfBytesAppended();
2754
2755 if (U_FAILURE(*status)) {
2756 return reslen;
2757 }
2758
2759 if (sink.Overflowed()) {
2760 *status = U_BUFFER_OVERFLOW_ERROR;
2761 } else {
2762 u_terminateChars(localeID, localeIDCapacity, reslen, status);
2763 }
2764
2765 return reslen;
2766}
2767
2768
2769U_CAPI void U_EXPORT2
2770ulocimp_forLanguageTag(const char* langtag,
2771 int32_t tagLen,
2772 icu::ByteSink& sink,
2773 int32_t* parsedLength,
2774 UErrorCode* status) {
2775 UBool isEmpty = TRUE;
729e4ab9
A
2776 const char *subtag, *p;
2777 int32_t len;
51004dcb 2778 int32_t i, n;
729e4ab9
A
2779 UBool noRegion = TRUE;
2780
3d1f044b 2781 icu::LocalULanguageTagPointer lt(ultag_parse(langtag, tagLen, parsedLength, status));
729e4ab9 2782 if (U_FAILURE(*status)) {
3d1f044b 2783 return;
729e4ab9
A
2784 }
2785
2786 /* language */
3d1f044b 2787 subtag = ultag_getExtlangSize(lt.getAlias()) > 0 ? ultag_getExtlang(lt.getAlias(), 0) : ultag_getLanguage(lt.getAlias());
729e4ab9
A
2788 if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
2789 len = (int32_t)uprv_strlen(subtag);
2790 if (len > 0) {
3d1f044b
A
2791 sink.Append(subtag, len);
2792 isEmpty = FALSE;
729e4ab9
A
2793 }
2794 }
2795
2796 /* script */
3d1f044b 2797 subtag = ultag_getScript(lt.getAlias());
729e4ab9
A
2798 len = (int32_t)uprv_strlen(subtag);
2799 if (len > 0) {
3d1f044b
A
2800 sink.Append("_", 1);
2801 isEmpty = FALSE;
729e4ab9
A
2802
2803 /* write out the script in title case */
3d1f044b
A
2804 char c = uprv_toupper(*subtag);
2805 sink.Append(&c, 1);
2806 sink.Append(subtag + 1, len - 1);
729e4ab9
A
2807 }
2808
2809 /* region */
3d1f044b 2810 subtag = ultag_getRegion(lt.getAlias());
729e4ab9
A
2811 len = (int32_t)uprv_strlen(subtag);
2812 if (len > 0) {
3d1f044b
A
2813 sink.Append("_", 1);
2814 isEmpty = FALSE;
2815
2816 /* write out the region in upper case */
729e4ab9
A
2817 p = subtag;
2818 while (*p) {
3d1f044b
A
2819 char c = uprv_toupper(*p);
2820 sink.Append(&c, 1);
729e4ab9
A
2821 p++;
2822 }
2823 noRegion = FALSE;
2824 }
2825
2826 /* variants */
3d1f044b 2827 n = ultag_getVariantsSize(lt.getAlias());
729e4ab9
A
2828 if (n > 0) {
2829 if (noRegion) {
3d1f044b
A
2830 sink.Append("_", 1);
2831 isEmpty = FALSE;
729e4ab9
A
2832 }
2833
2834 for (i = 0; i < n; i++) {
3d1f044b
A
2835 subtag = ultag_getVariant(lt.getAlias(), i);
2836 sink.Append("_", 1);
2837
729e4ab9
A
2838 /* write out the variant in upper case */
2839 p = subtag;
2840 while (*p) {
3d1f044b
A
2841 char c = uprv_toupper(*p);
2842 sink.Append(&c, 1);
729e4ab9
A
2843 p++;
2844 }
2845 }
2846 }
2847
2848 /* keywords */
3d1f044b
A
2849 n = ultag_getExtensionsSize(lt.getAlias());
2850 subtag = ultag_getPrivateUse(lt.getAlias());
51004dcb 2851 if (n > 0 || uprv_strlen(subtag) > 0) {
3d1f044b 2852 if (isEmpty && n > 0) {
729e4ab9 2853 /* need a language */
3d1f044b 2854 sink.Append(LANG_UND, LANG_UND_LEN);
729e4ab9 2855 }
3d1f044b 2856 _appendKeywords(lt.getAlias(), sink, status);
729e4ab9 2857 }
729e4ab9 2858}