]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/uloc_tag.cpp
ICU-59117.0.1.tar.gz
[apple/icu.git] / icuSources / common / uloc_tag.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
729e4ab9
A
3/*
4**********************************************************************
2ca993e8 5* Copyright (C) 2009-2015, International Business Machines
729e4ab9
A
6* Corporation and others. All Rights Reserved.
7**********************************************************************
8*/
9
10#include "unicode/utypes.h"
11#include "unicode/ures.h"
12#include "unicode/putil.h"
13#include "unicode/uloc.h"
14#include "ustr_imp.h"
15#include "cmemory.h"
16#include "cstring.h"
17#include "putilimp.h"
18#include "uinvchar.h"
19#include "ulocimp.h"
51004dcb
A
20#include "uassert.h"
21
f3c0d7a5 22
729e4ab9
A
23/* struct holding a single variant */
24typedef struct VariantListEntry {
25 const char *variant;
26 struct VariantListEntry *next;
27} VariantListEntry;
28
4388f060
A
29/* struct holding a single attribute value */
30typedef struct AttributeListEntry {
31 const char *attribute;
32 struct AttributeListEntry *next;
33} AttributeListEntry;
34
729e4ab9
A
35/* struct holding a single extension */
36typedef struct ExtensionListEntry {
37 const char *key;
38 const char *value;
39 struct ExtensionListEntry *next;
40} ExtensionListEntry;
41
42#define MAXEXTLANG 3
43typedef struct ULanguageTag {
44 char *buf; /* holding parsed subtags */
45 const char *language;
46 const char *extlang[MAXEXTLANG];
47 const char *script;
48 const char *region;
49 VariantListEntry *variants;
50 ExtensionListEntry *extensions;
51 const char *privateuse;
52 const char *grandfathered;
53} ULanguageTag;
54
55#define MINLEN 2
56#define SEP '-'
57#define PRIVATEUSE 'x'
58#define LDMLEXT 'u'
59
60#define LOCALE_SEP '_'
61#define LOCALE_EXT_SEP '@'
62#define LOCALE_KEYWORD_SEP ';'
63#define LOCALE_KEY_TYPE_SEP '='
64
4388f060 65#define ISALPHA(c) uprv_isASCIILetter(c)
729e4ab9
A
66#define ISNUMERIC(c) ((c)>='0' && (c)<='9')
67
51004dcb
A
68static const char EMPTY[] = "";
69static const char LANG_UND[] = "und";
70static const char PRIVATEUSE_KEY[] = "x";
71static const char _POSIX[] = "_POSIX";
72static const char POSIX_KEY[] = "va";
73static const char POSIX_VALUE[] = "posix";
74static const char LOCALE_ATTRIBUTE_KEY[] = "attribute";
75static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant";
76static const char LOCALE_TYPE_YES[] = "yes";
729e4ab9
A
77
78#define LANG_UND_LEN 3
79
51004dcb 80static const char* const GRANDFATHERED[] = {
729e4ab9
A
81/* grandfathered preferred */
82 "art-lojban", "jbo",
4388f060
A
83 "cel-gaulish", "xtg-x-cel-gaulish",
84 "en-GB-oed", "en-GB-x-oed",
729e4ab9
A
85 "i-ami", "ami",
86 "i-bnn", "bnn",
4388f060
A
87 "i-default", "en-x-i-default",
88 "i-enochian", "und-x-i-enochian",
729e4ab9
A
89 "i-hak", "hak",
90 "i-klingon", "tlh",
91 "i-lux", "lb",
4388f060 92 "i-mingo", "see-x-i-mingo",
729e4ab9
A
93 "i-navajo", "nv",
94 "i-pwn", "pwn",
95 "i-tao", "tao",
96 "i-tay", "tay",
97 "i-tsu", "tsu",
98 "no-bok", "nb",
99 "no-nyn", "nn",
100 "sgn-be-fr", "sfb",
101 "sgn-be-nl", "vgt",
102 "sgn-ch-de", "sgg",
103 "zh-guoyu", "cmn",
104 "zh-hakka", "hak",
4388f060 105 "zh-min", "nan-x-zh-min",
729e4ab9
A
106 "zh-min-nan", "nan",
107 "zh-xiang", "hsn",
108 NULL, NULL
109};
110
51004dcb 111static const char DEPRECATEDLANGS[][4] = {
729e4ab9
A
112/* deprecated new */
113 "iw", "he",
114 "ji", "yi",
51004dcb 115 "in", "id"
729e4ab9
A
116};
117
118/*
119* -------------------------------------------------
120*
121* These ultag_ functions may be exposed as APIs later
122*
123* -------------------------------------------------
124*/
125
126static ULanguageTag*
127ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
128
129static void
130ultag_close(ULanguageTag* langtag);
131
132static const char*
133ultag_getLanguage(const ULanguageTag* langtag);
134
135#if 0
136static const char*
137ultag_getJDKLanguage(const ULanguageTag* langtag);
138#endif
139
140static const char*
141ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
142
143static int32_t
144ultag_getExtlangSize(const ULanguageTag* langtag);
145
146static const char*
147ultag_getScript(const ULanguageTag* langtag);
148
149static const char*
150ultag_getRegion(const ULanguageTag* langtag);
151
152static const char*
153ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
154
155static int32_t
156ultag_getVariantsSize(const ULanguageTag* langtag);
157
158static const char*
159ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
160
161static const char*
162ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
163
164static int32_t
165ultag_getExtensionsSize(const ULanguageTag* langtag);
166
167static const char*
168ultag_getPrivateUse(const ULanguageTag* langtag);
169
170#if 0
171static const char*
172ultag_getGrandfathered(const ULanguageTag* langtag);
173#endif
174
175/*
176* -------------------------------------------------
177*
178* Language subtag syntax validation functions
179*
180* -------------------------------------------------
181*/
182
183static UBool
184_isAlphaString(const char* s, int32_t len) {
185 int32_t i;
186 for (i = 0; i < len; i++) {
187 if (!ISALPHA(*(s + i))) {
188 return FALSE;
189 }
190 }
191 return TRUE;
192}
193
194static UBool
195_isNumericString(const char* s, int32_t len) {
196 int32_t i;
197 for (i = 0; i < len; i++) {
198 if (!ISNUMERIC(*(s + i))) {
199 return FALSE;
200 }
201 }
202 return TRUE;
203}
204
205static UBool
206_isAlphaNumericString(const char* s, int32_t len) {
207 int32_t i;
208 for (i = 0; i < len; i++) {
209 if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
210 return FALSE;
211 }
212 }
213 return TRUE;
214}
215
216static UBool
217_isLanguageSubtag(const char* s, int32_t len) {
218 /*
219 * language = 2*3ALPHA ; shortest ISO 639 code
220 * ["-" extlang] ; sometimes followed by
221 * ; extended language subtags
222 * / 4ALPHA ; or reserved for future use
223 * / 5*8ALPHA ; or registered language subtag
224 */
225 if (len < 0) {
226 len = (int32_t)uprv_strlen(s);
227 }
228 if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
229 return TRUE;
230 }
231 return FALSE;
232}
233
234static UBool
235_isExtlangSubtag(const char* s, int32_t len) {
236 /*
237 * extlang = 3ALPHA ; selected ISO 639 codes
238 * *2("-" 3ALPHA) ; permanently reserved
239 */
240 if (len < 0) {
241 len = (int32_t)uprv_strlen(s);
242 }
243 if (len == 3 && _isAlphaString(s, len)) {
244 return TRUE;
245 }
246 return FALSE;
247}
248
249static UBool
250_isScriptSubtag(const char* s, int32_t len) {
251 /*
252 * script = 4ALPHA ; ISO 15924 code
253 */
254 if (len < 0) {
255 len = (int32_t)uprv_strlen(s);
256 }
257 if (len == 4 && _isAlphaString(s, len)) {
258 return TRUE;
259 }
260 return FALSE;
261}
262
263static UBool
264_isRegionSubtag(const char* s, int32_t len) {
265 /*
266 * region = 2ALPHA ; ISO 3166-1 code
267 * / 3DIGIT ; UN M.49 code
268 */
269 if (len < 0) {
270 len = (int32_t)uprv_strlen(s);
271 }
272 if (len == 2 && _isAlphaString(s, len)) {
273 return TRUE;
274 }
275 if (len == 3 && _isNumericString(s, len)) {
276 return TRUE;
277 }
278 return FALSE;
279}
280
281static UBool
282_isVariantSubtag(const char* s, int32_t len) {
283 /*
284 * variant = 5*8alphanum ; registered variants
285 * / (DIGIT 3alphanum)
286 */
287 if (len < 0) {
288 len = (int32_t)uprv_strlen(s);
289 }
4388f060 290 if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) {
729e4ab9
A
291 return TRUE;
292 }
293 if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
294 return TRUE;
295 }
296 return FALSE;
297}
298
4388f060
A
299static UBool
300_isPrivateuseVariantSubtag(const char* s, int32_t len) {
301 /*
302 * variant = 1*8alphanum ; registered variants
303 * / (DIGIT 3alphanum)
304 */
305 if (len < 0) {
306 len = (int32_t)uprv_strlen(s);
307 }
308 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
309 return TRUE;
310 }
311 return FALSE;
312}
313
729e4ab9
A
314static UBool
315_isExtensionSingleton(const char* s, int32_t len) {
316 /*
317 * extension = singleton 1*("-" (2*8alphanum))
318 */
319 if (len < 0) {
320 len = (int32_t)uprv_strlen(s);
321 }
322 if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) {
323 return TRUE;
324 }
325 return FALSE;
326}
327
328static UBool
329_isExtensionSubtag(const char* s, int32_t len) {
330 /*
331 * extension = singleton 1*("-" (2*8alphanum))
332 */
333 if (len < 0) {
334 len = (int32_t)uprv_strlen(s);
335 }
336 if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) {
337 return TRUE;
338 }
339 return FALSE;
340}
341
342static UBool
343_isExtensionSubtags(const char* s, int32_t len) {
344 const char *p = s;
345 const char *pSubtag = NULL;
346
347 if (len < 0) {
348 len = (int32_t)uprv_strlen(s);
349 }
350
351 while ((p - s) < len) {
352 if (*p == SEP) {
353 if (pSubtag == NULL) {
354 return FALSE;
355 }
356 if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) {
357 return FALSE;
358 }
359 pSubtag = NULL;
360 } else if (pSubtag == NULL) {
361 pSubtag = p;
362 }
363 p++;
364 }
365 if (pSubtag == NULL) {
366 return FALSE;
367 }
368 return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag));
369}
370
371static UBool
372_isPrivateuseValueSubtag(const char* s, int32_t len) {
373 /*
374 * privateuse = "x" 1*("-" (1*8alphanum))
375 */
376 if (len < 0) {
377 len = (int32_t)uprv_strlen(s);
378 }
379 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
380 return TRUE;
381 }
382 return FALSE;
383}
384
385static UBool
386_isPrivateuseValueSubtags(const char* s, int32_t len) {
387 const char *p = s;
388 const char *pSubtag = NULL;
389
390 if (len < 0) {
391 len = (int32_t)uprv_strlen(s);
392 }
393
394 while ((p - s) < len) {
395 if (*p == SEP) {
396 if (pSubtag == NULL) {
397 return FALSE;
398 }
399 if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) {
400 return FALSE;
401 }
402 pSubtag = NULL;
403 } else if (pSubtag == NULL) {
404 pSubtag = p;
405 }
406 p++;
407 }
408 if (pSubtag == NULL) {
409 return FALSE;
410 }
411 return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
412}
413
b331163b
A
414U_CFUNC UBool
415ultag_isUnicodeLocaleKey(const char* s, int32_t len) {
729e4ab9
A
416 if (len < 0) {
417 len = (int32_t)uprv_strlen(s);
418 }
419 if (len == 2 && _isAlphaNumericString(s, len)) {
420 return TRUE;
421 }
422 return FALSE;
423}
424
b331163b
A
425U_CFUNC UBool
426ultag_isUnicodeLocaleType(const char*s, int32_t len) {
427 const char* p;
428 int32_t subtagLen = 0;
429
729e4ab9
A
430 if (len < 0) {
431 len = (int32_t)uprv_strlen(s);
432 }
b331163b
A
433
434 for (p = s; len > 0; p++, len--) {
435 if (*p == SEP) {
436 if (subtagLen < 3) {
437 return FALSE;
438 }
439 subtagLen = 0;
440 } else if (ISALPHA(*p) || ISNUMERIC(*p)) {
441 subtagLen++;
442 if (subtagLen > 8) {
443 return FALSE;
444 }
445 } else {
446 return FALSE;
447 }
729e4ab9 448 }
729e4ab9 449
b331163b
A
450 return (subtagLen >= 3);
451}
729e4ab9
A
452/*
453* -------------------------------------------------
454*
455* Helper functions
456*
457* -------------------------------------------------
458*/
459
460static UBool
461_addVariantToList(VariantListEntry **first, VariantListEntry *var) {
462 UBool bAdded = TRUE;
463
464 if (*first == NULL) {
465 var->next = NULL;
466 *first = var;
467 } else {
468 VariantListEntry *prev, *cur;
469 int32_t cmp;
470
4388f060 471 /* variants order should be preserved */
729e4ab9
A
472 prev = NULL;
473 cur = *first;
474 while (TRUE) {
475 if (cur == NULL) {
476 prev->next = var;
477 var->next = NULL;
478 break;
479 }
4388f060
A
480
481 /* Checking for duplicate variant */
729e4ab9 482 cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
4388f060
A
483 if (cmp == 0) {
484 /* duplicated variant */
485 bAdded = FALSE;
486 break;
487 }
488 prev = cur;
489 cur = cur->next;
490 }
491 }
492
493 return bAdded;
494}
495
496static UBool
497_addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {
498 UBool bAdded = TRUE;
499
500 if (*first == NULL) {
501 attr->next = NULL;
502 *first = attr;
503 } else {
504 AttributeListEntry *prev, *cur;
505 int32_t cmp;
506
507 /* reorder variants in alphabetical order */
508 prev = NULL;
509 cur = *first;
510 while (TRUE) {
511 if (cur == NULL) {
512 prev->next = attr;
513 attr->next = NULL;
514 break;
515 }
516 cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute);
729e4ab9
A
517 if (cmp < 0) {
518 if (prev == NULL) {
4388f060 519 *first = attr;
729e4ab9 520 } else {
4388f060 521 prev->next = attr;
729e4ab9 522 }
4388f060 523 attr->next = cur;
729e4ab9
A
524 break;
525 }
526 if (cmp == 0) {
527 /* duplicated variant */
528 bAdded = FALSE;
529 break;
530 }
531 prev = cur;
532 cur = cur->next;
533 }
534 }
535
536 return bAdded;
537}
538
539
540static UBool
541_addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
542 UBool bAdded = TRUE;
543
544 if (*first == NULL) {
545 ext->next = NULL;
546 *first = ext;
547 } else {
548 ExtensionListEntry *prev, *cur;
549 int32_t cmp;
550
551 /* reorder variants in alphabetical order */
552 prev = NULL;
553 cur = *first;
554 while (TRUE) {
555 if (cur == NULL) {
556 prev->next = ext;
557 ext->next = NULL;
558 break;
559 }
560 if (localeToBCP) {
561 /* special handling for locale to bcp conversion */
562 int32_t len, curlen;
563
564 len = (int32_t)uprv_strlen(ext->key);
565 curlen = (int32_t)uprv_strlen(cur->key);
566
567 if (len == 1 && curlen == 1) {
568 if (*(ext->key) == *(cur->key)) {
569 cmp = 0;
570 } else if (*(ext->key) == PRIVATEUSE) {
571 cmp = 1;
572 } else if (*(cur->key) == PRIVATEUSE) {
573 cmp = -1;
574 } else {
575 cmp = *(ext->key) - *(cur->key);
576 }
577 } else if (len == 1) {
578 cmp = *(ext->key) - LDMLEXT;
579 } else if (curlen == 1) {
580 cmp = LDMLEXT - *(cur->key);
581 } else {
582 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
f3c0d7a5
A
583 /* Both are u extension keys - we need special handling for 'attribute' */
584 if (cmp != 0) {
585 if (uprv_strcmp(cur->key, LOCALE_ATTRIBUTE_KEY) == 0) {
586 cmp = 1;
587 } else if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
588 cmp = -1;
589 }
590 }
729e4ab9
A
591 }
592 } else {
593 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
594 }
595 if (cmp < 0) {
596 if (prev == NULL) {
597 *first = ext;
598 } else {
599 prev->next = ext;
600 }
601 ext->next = cur;
602 break;
603 }
604 if (cmp == 0) {
605 /* duplicated extension key */
606 bAdded = FALSE;
607 break;
608 }
609 prev = cur;
610 cur = cur->next;
611 }
612 }
613
614 return bAdded;
615}
616
617static void
618_initializeULanguageTag(ULanguageTag* langtag) {
619 int32_t i;
620
621 langtag->buf = NULL;
622
623 langtag->language = EMPTY;
624 for (i = 0; i < MAXEXTLANG; i++) {
625 langtag->extlang[i] = NULL;
626 }
627
628 langtag->script = EMPTY;
629 langtag->region = EMPTY;
630
631 langtag->variants = NULL;
632 langtag->extensions = NULL;
633
634 langtag->grandfathered = EMPTY;
635 langtag->privateuse = EMPTY;
636}
637
729e4ab9
A
638static int32_t
639_appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
640 char buf[ULOC_LANG_CAPACITY];
641 UErrorCode tmpStatus = U_ZERO_ERROR;
642 int32_t len, i;
643 int32_t reslen = 0;
644
645 if (U_FAILURE(*status)) {
646 return 0;
647 }
648
649 len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
650 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
651 if (strict) {
652 *status = U_ILLEGAL_ARGUMENT_ERROR;
653 return 0;
654 }
655 len = 0;
656 }
657
658 /* Note: returned language code is in lower case letters */
659
660 if (len == 0) {
661 if (reslen < capacity) {
662 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
663 }
664 reslen += LANG_UND_LEN;
665 } else if (!_isLanguageSubtag(buf, len)) {
666 /* invalid language code */
667 if (strict) {
668 *status = U_ILLEGAL_ARGUMENT_ERROR;
669 return 0;
670 }
671 if (reslen < capacity) {
672 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
673 }
674 reslen += LANG_UND_LEN;
675 } else {
676 /* resolve deprecated */
b331163b 677 for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) {
729e4ab9
A
678 if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
679 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
680 len = (int32_t)uprv_strlen(buf);
681 break;
682 }
683 }
684 if (reslen < capacity) {
685 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
686 }
687 reslen += len;
688 }
689 u_terminateChars(appendAt, capacity, reslen, status);
690 return reslen;
691}
692
693static int32_t
694_appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
695 char buf[ULOC_SCRIPT_CAPACITY];
696 UErrorCode tmpStatus = U_ZERO_ERROR;
697 int32_t len;
698 int32_t reslen = 0;
699
700 if (U_FAILURE(*status)) {
701 return 0;
702 }
703
704 len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
705 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
706 if (strict) {
707 *status = U_ILLEGAL_ARGUMENT_ERROR;
708 }
709 return 0;
710 }
711
712 if (len > 0) {
713 if (!_isScriptSubtag(buf, len)) {
714 /* invalid script code */
715 if (strict) {
716 *status = U_ILLEGAL_ARGUMENT_ERROR;
717 }
718 return 0;
719 } else {
720 if (reslen < capacity) {
721 *(appendAt + reslen) = SEP;
722 }
723 reslen++;
724
725 if (reslen < capacity) {
726 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
727 }
728 reslen += len;
729 }
730 }
731 u_terminateChars(appendAt, capacity, reslen, status);
732 return reslen;
733}
734
735static int32_t
736_appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
737 char buf[ULOC_COUNTRY_CAPACITY];
738 UErrorCode tmpStatus = U_ZERO_ERROR;
739 int32_t len;
740 int32_t reslen = 0;
741
742 if (U_FAILURE(*status)) {
743 return 0;
744 }
745
746 len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
747 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
748 if (strict) {
749 *status = U_ILLEGAL_ARGUMENT_ERROR;
750 }
751 return 0;
752 }
753
754 if (len > 0) {
755 if (!_isRegionSubtag(buf, len)) {
756 /* invalid region code */
757 if (strict) {
758 *status = U_ILLEGAL_ARGUMENT_ERROR;
759 }
760 return 0;
761 } else {
762 if (reslen < capacity) {
763 *(appendAt + reslen) = SEP;
764 }
765 reslen++;
766
767 if (reslen < capacity) {
768 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
769 }
770 reslen += len;
771 }
772 }
773 u_terminateChars(appendAt, capacity, reslen, status);
774 return reslen;
775}
776
777static int32_t
778_appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) {
779 char buf[ULOC_FULLNAME_CAPACITY];
780 UErrorCode tmpStatus = U_ZERO_ERROR;
781 int32_t len, i;
782 int32_t reslen = 0;
783
784 if (U_FAILURE(*status)) {
785 return 0;
786 }
787
788 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
789 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
790 if (strict) {
791 *status = U_ILLEGAL_ARGUMENT_ERROR;
792 }
793 return 0;
794 }
795
796 if (len > 0) {
797 char *p, *pVar;
798 UBool bNext = TRUE;
799 VariantListEntry *var;
800 VariantListEntry *varFirst = NULL;
801
802 pVar = NULL;
803 p = buf;
804 while (bNext) {
805 if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
806 if (*p == 0) {
807 bNext = FALSE;
808 } else {
809 *p = 0; /* terminate */
810 }
811 if (pVar == NULL) {
812 if (strict) {
813 *status = U_ILLEGAL_ARGUMENT_ERROR;
814 break;
815 }
816 /* ignore empty variant */
817 } else {
818 /* ICU uses upper case letters for variants, but
819 the canonical format is lowercase in BCP47 */
820 for (i = 0; *(pVar + i) != 0; i++) {
821 *(pVar + i) = uprv_tolower(*(pVar + i));
822 }
823
824 /* validate */
825 if (_isVariantSubtag(pVar, -1)) {
f3c0d7a5 826 if (uprv_strcmp(pVar,POSIX_VALUE) || len != (int32_t)uprv_strlen(POSIX_VALUE)) {
729e4ab9 827 /* emit the variant to the list */
51004dcb 828 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
729e4ab9
A
829 if (var == NULL) {
830 *status = U_MEMORY_ALLOCATION_ERROR;
831 break;
832 }
833 var->variant = pVar;
834 if (!_addVariantToList(&varFirst, var)) {
835 /* duplicated variant */
836 uprv_free(var);
837 if (strict) {
838 *status = U_ILLEGAL_ARGUMENT_ERROR;
839 break;
840 }
841 }
842 } else {
843 /* Special handling for POSIX variant, need to remember that we had it and then */
844 /* treat it like an extension later. */
845 *hadPosix = TRUE;
846 }
847 } else if (strict) {
848 *status = U_ILLEGAL_ARGUMENT_ERROR;
849 break;
4388f060
A
850 } else if (_isPrivateuseValueSubtag(pVar, -1)) {
851 /* Handle private use subtags separately */
852 break;
729e4ab9
A
853 }
854 }
855 /* reset variant starting position */
856 pVar = NULL;
857 } else if (pVar == NULL) {
858 pVar = p;
859 }
860 p++;
861 }
862
863 if (U_SUCCESS(*status)) {
864 if (varFirst != NULL) {
865 int32_t varLen;
866
4388f060 867 /* write out validated/normalized variants to the target */
729e4ab9
A
868 var = varFirst;
869 while (var != NULL) {
870 if (reslen < capacity) {
871 *(appendAt + reslen) = SEP;
872 }
873 reslen++;
874 varLen = (int32_t)uprv_strlen(var->variant);
875 if (reslen < capacity) {
876 uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen));
877 }
878 reslen += varLen;
879 var = var->next;
880 }
881 }
882 }
883
884 /* clean up */
885 var = varFirst;
886 while (var != NULL) {
887 VariantListEntry *tmpVar = var->next;
888 uprv_free(var);
889 var = tmpVar;
890 }
891
892 if (U_FAILURE(*status)) {
893 return 0;
894 }
895 }
896
897 u_terminateChars(appendAt, capacity, reslen, status);
898 return reslen;
899}
900
901static int32_t
902_appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
903 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
4388f060
A
904 char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
905 int32_t attrBufLength = 0;
729e4ab9
A
906 UEnumeration *keywordEnum = NULL;
907 int32_t reslen = 0;
908
909 keywordEnum = uloc_openKeywords(localeID, status);
910 if (U_FAILURE(*status) && !hadPosix) {
911 uenum_close(keywordEnum);
912 return 0;
913 }
914 if (keywordEnum != NULL || hadPosix) {
915 /* reorder extensions */
916 int32_t len;
917 const char *key;
918 ExtensionListEntry *firstExt = NULL;
919 ExtensionListEntry *ext;
4388f060
A
920 AttributeListEntry *firstAttr = NULL;
921 AttributeListEntry *attr;
922 char *attrValue;
729e4ab9
A
923 char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
924 char *pExtBuf = extBuf;
925 int32_t extBufCapacity = sizeof(extBuf);
f3c0d7a5 926 const char *bcpKey=nullptr, *bcpValue=nullptr;
729e4ab9
A
927 UErrorCode tmpStatus = U_ZERO_ERROR;
928 int32_t keylen;
b331163b 929 UBool isBcpUExt;
729e4ab9
A
930
931 while (TRUE) {
932 key = uenum_next(keywordEnum, NULL, status);
933 if (key == NULL) {
934 break;
935 }
936 len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
b331163b
A
937 /* buf must be null-terminated */
938 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
729e4ab9
A
939 if (strict) {
940 *status = U_ILLEGAL_ARGUMENT_ERROR;
941 break;
942 }
943 /* ignore this keyword */
944 tmpStatus = U_ZERO_ERROR;
945 continue;
946 }
947
948 keylen = (int32_t)uprv_strlen(key);
b331163b 949 isBcpUExt = (keylen > 1);
729e4ab9 950
4388f060
A
951 /* special keyword used for representing Unicode locale attributes */
952 if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
4388f060
A
953 if (len > 0) {
954 int32_t i = 0;
955 while (TRUE) {
956 attrBufLength = 0;
957 for (; i < len; i++) {
958 if (buf[i] != '-') {
959 attrBuf[attrBufLength++] = buf[i];
960 } else {
961 i++;
962 break;
963 }
964 }
965 if (attrBufLength > 0) {
966 attrBuf[attrBufLength] = 0;
967
968 } else if (i >= len){
969 break;
970 }
971
972 /* create AttributeListEntry */
51004dcb 973 attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
4388f060
A
974 if (attr == NULL) {
975 *status = U_MEMORY_ALLOCATION_ERROR;
976 break;
977 }
51004dcb 978 attrValue = (char*)uprv_malloc(attrBufLength + 1);
4388f060
A
979 if (attrValue == NULL) {
980 *status = U_MEMORY_ALLOCATION_ERROR;
981 break;
982 }
983 uprv_strcpy(attrValue, attrBuf);
984 attr->attribute = attrValue;
985
986 if (!_addAttributeToList(&firstAttr, attr)) {
987 uprv_free(attr);
988 uprv_free(attrValue);
989 if (strict) {
990 *status = U_ILLEGAL_ARGUMENT_ERROR;
991 break;
992 }
993 }
994 }
f3c0d7a5
A
995 /* for a place holder ExtensionListEntry */
996 bcpKey = LOCALE_ATTRIBUTE_KEY;
997 bcpValue = NULL;
4388f060 998 }
b331163b
A
999 } else if (isBcpUExt) {
1000 bcpKey = uloc_toUnicodeLocaleKey(key);
1001 if (bcpKey == NULL) {
729e4ab9
A
1002 if (strict) {
1003 *status = U_ILLEGAL_ARGUMENT_ERROR;
1004 break;
1005 }
729e4ab9
A
1006 continue;
1007 }
1008
b331163b
A
1009 /* we've checked buf is null-terminated above */
1010 bcpValue = uloc_toUnicodeLocaleType(key, buf);
1011 if (bcpValue == NULL) {
729e4ab9
A
1012 if (strict) {
1013 *status = U_ILLEGAL_ARGUMENT_ERROR;
1014 break;
1015 }
729e4ab9
A
1016 continue;
1017 }
b331163b
A
1018 if (bcpValue == buf) {
1019 /*
1020 When uloc_toUnicodeLocaleType(key, buf) returns the
1021 input value as is, the value is well-formed, but has
1022 no known mapping. This implementation normalizes the
1023 the value to lower case
1024 */
1025 int32_t bcpValueLen = uprv_strlen(bcpValue);
1026 if (bcpValueLen < extBufCapacity) {
1027 uprv_strcpy(pExtBuf, bcpValue);
1028 T_CString_toLowerCase(pExtBuf);
1029
1030 bcpValue = pExtBuf;
1031
1032 pExtBuf += (bcpValueLen + 1);
1033 extBufCapacity -= (bcpValueLen + 1);
1034 } else {
1035 if (strict) {
1036 *status = U_ILLEGAL_ARGUMENT_ERROR;
1037 break;
1038 }
1039 continue;
1040 }
1041 }
729e4ab9
A
1042 } else {
1043 if (*key == PRIVATEUSE) {
1044 if (!_isPrivateuseValueSubtags(buf, len)) {
1045 if (strict) {
1046 *status = U_ILLEGAL_ARGUMENT_ERROR;
1047 break;
1048 }
1049 continue;
1050 }
1051 } else {
1052 if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
1053 if (strict) {
1054 *status = U_ILLEGAL_ARGUMENT_ERROR;
1055 break;
1056 }
1057 continue;
1058 }
1059 }
1060 bcpKey = key;
1061 if ((len + 1) < extBufCapacity) {
1062 uprv_memcpy(pExtBuf, buf, len);
1063 bcpValue = pExtBuf;
1064
1065 pExtBuf += len;
1066
1067 *pExtBuf = 0;
1068 pExtBuf++;
1069
1070 extBufCapacity -= (len + 1);
1071 } else {
1072 *status = U_ILLEGAL_ARGUMENT_ERROR;
1073 break;
1074 }
1075 }
1076
f3c0d7a5
A
1077 /* create ExtensionListEntry */
1078 ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1079 if (ext == NULL) {
1080 *status = U_MEMORY_ALLOCATION_ERROR;
1081 break;
1082 }
1083 ext->key = bcpKey;
1084 ext->value = bcpValue;
4388f060 1085
f3c0d7a5
A
1086 if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1087 uprv_free(ext);
1088 if (strict) {
1089 *status = U_ILLEGAL_ARGUMENT_ERROR;
1090 break;
4388f060 1091 }
729e4ab9
A
1092 }
1093 }
1094
1095 /* Special handling for POSIX variant - add the keywords for POSIX */
1096 if (hadPosix) {
1097 /* create ExtensionListEntry for POSIX */
51004dcb 1098 ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
729e4ab9
A
1099 if (ext == NULL) {
1100 *status = U_MEMORY_ALLOCATION_ERROR;
4388f060 1101 goto cleanup;
729e4ab9
A
1102 }
1103 ext->key = POSIX_KEY;
1104 ext->value = POSIX_VALUE;
1105
1106 if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1107 uprv_free(ext);
1108 }
1109 }
1110
4388f060 1111 if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) {
729e4ab9 1112 UBool startLDMLExtension = FALSE;
f3c0d7a5
A
1113 for (ext = firstExt; ext; ext = ext->next) {
1114 if (!startLDMLExtension && uprv_strlen(ext->key) > 1) {
1115 /* first LDML u singlton extension */
4388f060
A
1116 if (reslen < capacity) {
1117 *(appendAt + reslen) = SEP;
1118 }
1119 reslen++;
1120 if (reslen < capacity) {
1121 *(appendAt + reslen) = LDMLEXT;
1122 }
1123 reslen++;
1124
1125 startLDMLExtension = TRUE;
1126 }
1127
1128 /* write out the sorted BCP47 attributes, extensions and private use */
f3c0d7a5
A
1129 if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
1130 /* write the value for the attributes */
1131 for (attr = firstAttr; attr; attr = attr->next) {
1132 if (reslen < capacity) {
1133 *(appendAt + reslen) = SEP;
1134 }
1135 reslen++;
1136 len = (int32_t)uprv_strlen(attr->attribute);
1137 if (reslen < capacity) {
1138 uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen));
1139 }
1140 reslen += len;
1141 }
1142 } else {
729e4ab9
A
1143 if (reslen < capacity) {
1144 *(appendAt + reslen) = SEP;
1145 }
1146 reslen++;
4388f060 1147 len = (int32_t)uprv_strlen(ext->key);
729e4ab9 1148 if (reslen < capacity) {
4388f060
A
1149 uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen));
1150 }
1151 reslen += len;
1152 if (reslen < capacity) {
1153 *(appendAt + reslen) = SEP;
729e4ab9
A
1154 }
1155 reslen++;
4388f060
A
1156 len = (int32_t)uprv_strlen(ext->value);
1157 if (reslen < capacity) {
1158 uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen));
1159 }
1160 reslen += len;
4388f060 1161 }
f3c0d7a5 1162 }
729e4ab9 1163 }
4388f060 1164cleanup:
729e4ab9
A
1165 /* clean up */
1166 ext = firstExt;
1167 while (ext != NULL) {
1168 ExtensionListEntry *tmpExt = ext->next;
1169 uprv_free(ext);
1170 ext = tmpExt;
1171 }
1172
4388f060
A
1173 attr = firstAttr;
1174 while (attr != NULL) {
1175 AttributeListEntry *tmpAttr = attr->next;
1176 char *pValue = (char *)attr->attribute;
1177 uprv_free(pValue);
1178 uprv_free(attr);
1179 attr = tmpAttr;
1180 }
1181
729e4ab9
A
1182 uenum_close(keywordEnum);
1183
1184 if (U_FAILURE(*status)) {
1185 return 0;
1186 }
1187 }
1188
1189 return u_terminateChars(appendAt, capacity, reslen, status);
1190}
1191
1192/**
1193 * Append keywords parsed from LDML extension value
1194 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
1195 * Note: char* buf is used for storing keywords
1196 */
1197static void
1198_appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) {
51004dcb
A
1199 const char *pTag; /* beginning of current subtag */
1200 const char *pKwds; /* beginning of key-type pairs */
1201 UBool variantExists = *posixVariant;
1202
1203 ExtensionListEntry *kwdFirst = NULL; /* first LDML keyword */
729e4ab9 1204 ExtensionListEntry *kwd, *nextKwd;
51004dcb
A
1205
1206 AttributeListEntry *attrFirst = NULL; /* first attribute */
1207 AttributeListEntry *attr, *nextAttr;
1208
1209 int32_t len;
729e4ab9 1210 int32_t bufIdx = 0;
51004dcb
A
1211
1212 char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1213 int32_t attrBufIdx = 0;
4388f060
A
1214
1215 /* Reset the posixVariant value */
1216 *posixVariant = FALSE;
729e4ab9 1217
51004dcb
A
1218 pTag = ldmlext;
1219 pKwds = NULL;
729e4ab9 1220
51004dcb
A
1221 /* Iterate through u extension attributes */
1222 while (*pTag) {
729e4ab9 1223 /* locate next separator char */
51004dcb
A
1224 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1225
b331163b 1226 if (ultag_isUnicodeLocaleKey(pTag, len)) {
51004dcb
A
1227 pKwds = pTag;
1228 break;
729e4ab9 1229 }
51004dcb
A
1230
1231 /* add this attribute to the list */
1232 attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
1233 if (attr == NULL) {
1234 *status = U_MEMORY_ALLOCATION_ERROR;
1235 goto cleanup;
729e4ab9
A
1236 }
1237
51004dcb
A
1238 if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) {
1239 uprv_memcpy(&attrBuf[attrBufIdx], pTag, len);
1240 attrBuf[attrBufIdx + len] = 0;
1241 attr->attribute = &attrBuf[attrBufIdx];
1242 attrBufIdx += (len + 1);
729e4ab9 1243 } else {
51004dcb
A
1244 *status = U_ILLEGAL_ARGUMENT_ERROR;
1245 goto cleanup;
1246 }
729e4ab9 1247
51004dcb
A
1248 if (!_addAttributeToList(&attrFirst, attr)) {
1249 *status = U_ILLEGAL_ARGUMENT_ERROR;
1250 uprv_free(attr);
1251 goto cleanup;
1252 }
729e4ab9 1253
51004dcb
A
1254 /* next tag */
1255 pTag += len;
1256 if (*pTag) {
1257 /* next to the separator */
1258 pTag++;
1259 }
1260 }
1261
1262 if (attrFirst) {
1263 /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */
1264
1265 if (attrBufIdx > bufSize) {
1266 /* attrBufIdx == <total length of attribute subtag> + 1 */
1267 *status = U_ILLEGAL_ARGUMENT_ERROR;
1268 goto cleanup;
1269 }
1270
1271 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1272 if (kwd == NULL) {
1273 *status = U_MEMORY_ALLOCATION_ERROR;
1274 goto cleanup;
1275 }
1276
1277 kwd->key = LOCALE_ATTRIBUTE_KEY;
1278 kwd->value = buf;
1279
1280 /* attribute subtags sorted in alphabetical order as type */
1281 attr = attrFirst;
1282 while (attr != NULL) {
1283 nextAttr = attr->next;
1284
1285 /* buffer size check is done above */
1286 if (attr != attrFirst) {
1287 *(buf + bufIdx) = SEP;
1288 bufIdx++;
729e4ab9 1289 }
51004dcb
A
1290
1291 len = uprv_strlen(attr->attribute);
1292 uprv_memcpy(buf + bufIdx, attr->attribute, len);
729e4ab9 1293 bufIdx += len;
729e4ab9 1294
51004dcb
A
1295 attr = nextAttr;
1296 }
1297 *(buf + bufIdx) = 0;
1298 bufIdx++;
1299
1300 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1301 *status = U_ILLEGAL_ARGUMENT_ERROR;
1302 uprv_free(kwd);
1303 goto cleanup;
1304 }
1305
1306 /* once keyword entry is created, delete the attribute list */
1307 attr = attrFirst;
1308 while (attr != NULL) {
1309 nextAttr = attr->next;
1310 uprv_free(attr);
1311 attr = nextAttr;
1312 }
1313 attrFirst = NULL;
1314 }
1315
1316 if (pKwds) {
1317 const char *pBcpKey = NULL; /* u extenstion key subtag */
1318 const char *pBcpType = NULL; /* beginning of u extension type subtag(s) */
1319 int32_t bcpKeyLen = 0;
1320 int32_t bcpTypeLen = 0;
1321 UBool isDone = FALSE;
1322
1323 pTag = pKwds;
1324 /* BCP47 representation of LDML key/type pairs */
1325 while (!isDone) {
1326 const char *pNextBcpKey = NULL;
b331163b 1327 int32_t nextBcpKeyLen = 0;
51004dcb
A
1328 UBool emitKeyword = FALSE;
1329
1330 if (*pTag) {
1331 /* locate next separator char */
1332 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1333
b331163b 1334 if (ultag_isUnicodeLocaleKey(pTag, len)) {
51004dcb
A
1335 if (pBcpKey) {
1336 emitKeyword = TRUE;
1337 pNextBcpKey = pTag;
1338 nextBcpKeyLen = len;
1339 } else {
1340 pBcpKey = pTag;
1341 bcpKeyLen = len;
1342 }
1343 } else {
1344 U_ASSERT(pBcpKey != NULL);
1345 /* within LDML type subtags */
1346 if (pBcpType) {
1347 bcpTypeLen += (len + 1);
1348 } else {
1349 pBcpType = pTag;
1350 bcpTypeLen = len;
1351 }
1352 }
729e4ab9 1353
51004dcb
A
1354 /* next tag */
1355 pTag += len;
1356 if (*pTag) {
1357 /* next to the separator */
1358 pTag++;
1359 }
729e4ab9 1360 } else {
51004dcb
A
1361 /* processing last one */
1362 emitKeyword = TRUE;
1363 isDone = TRUE;
1364 }
1365
1366 if (emitKeyword) {
1367 const char *pKey = NULL; /* LDML key */
1368 const char *pType = NULL; /* LDML type */
1369
b331163b
A
1370 char bcpKeyBuf[9]; /* BCP key length is always 2 for now */
1371
51004dcb
A
1372 U_ASSERT(pBcpKey != NULL);
1373
f3c0d7a5 1374 if (bcpKeyLen >= (int32_t)sizeof(bcpKeyBuf)) {
b331163b
A
1375 /* the BCP key is invalid */
1376 *status = U_ILLEGAL_ARGUMENT_ERROR;
1377 goto cleanup;
1378 }
1379
1380 uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen);
1381 bcpKeyBuf[bcpKeyLen] = 0;
1382
51004dcb 1383 /* u extension key to LDML key */
b331163b
A
1384 pKey = uloc_toLegacyKey(bcpKeyBuf);
1385 if (pKey == NULL) {
1386 *status = U_ILLEGAL_ARGUMENT_ERROR;
729e4ab9
A
1387 goto cleanup;
1388 }
b331163b
A
1389 if (pKey == bcpKeyBuf) {
1390 /*
1391 The key returned by toLegacyKey points to the input buffer.
1392 We normalize the result key to lower case.
1393 */
1394 T_CString_toLowerCase(bcpKeyBuf);
1395 if (bufSize - bufIdx - 1 >= bcpKeyLen) {
1396 uprv_memcpy(buf + bufIdx, bcpKeyBuf, bcpKeyLen);
1397 pKey = buf + bufIdx;
1398 bufIdx += bcpKeyLen;
1399 *(buf + bufIdx) = 0;
1400 bufIdx++;
1401 } else {
1402 *status = U_BUFFER_OVERFLOW_ERROR;
1403 goto cleanup;
1404 }
1405 }
51004dcb
A
1406
1407 if (pBcpType) {
b331163b 1408 char bcpTypeBuf[128]; /* practically long enough even considering multiple subtag type */
f3c0d7a5 1409 if (bcpTypeLen >= (int32_t)sizeof(bcpTypeBuf)) {
b331163b
A
1410 /* the BCP type is too long */
1411 *status = U_ILLEGAL_ARGUMENT_ERROR;
1412 goto cleanup;
1413 }
1414
1415 uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen);
1416 bcpTypeBuf[bcpTypeLen] = 0;
1417
51004dcb 1418 /* BCP type to locale type */
b331163b
A
1419 pType = uloc_toLegacyType(pKey, bcpTypeBuf);
1420 if (pType == NULL) {
1421 *status = U_ILLEGAL_ARGUMENT_ERROR;
51004dcb
A
1422 goto cleanup;
1423 }
b331163b
A
1424 if (pType == bcpTypeBuf) {
1425 /*
1426 The type returned by toLegacyType points to the input buffer.
1427 We normalize the result type to lower case.
1428 */
1429 /* normalize to lower case */
1430 T_CString_toLowerCase(bcpTypeBuf);
1431 if (bufSize - bufIdx - 1 >= bcpTypeLen) {
1432 uprv_memcpy(buf + bufIdx, bcpTypeBuf, bcpTypeLen);
1433 pType = buf + bufIdx;
1434 bufIdx += bcpTypeLen;
1435 *(buf + bufIdx) = 0;
1436 bufIdx++;
1437 } else {
1438 *status = U_BUFFER_OVERFLOW_ERROR;
1439 goto cleanup;
1440 }
1441 }
51004dcb
A
1442 } else {
1443 /* typeless - default type value is "yes" */
1444 pType = LOCALE_TYPE_YES;
1445 }
729e4ab9 1446
51004dcb
A
1447 /* Special handling for u-va-posix, since we want to treat this as a variant,
1448 not as a keyword */
1449 if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) {
1450 *posixVariant = TRUE;
1451 } else {
1452 /* create an ExtensionListEntry for this keyword */
1453 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1454 if (kwd == NULL) {
1455 *status = U_MEMORY_ALLOCATION_ERROR;
1456 goto cleanup;
1457 }
729e4ab9 1458
51004dcb
A
1459 kwd->key = pKey;
1460 kwd->value = pType;
1461
1462 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1463 *status = U_ILLEGAL_ARGUMENT_ERROR;
1464 uprv_free(kwd);
1465 goto cleanup;
1466 }
729e4ab9 1467 }
729e4ab9 1468
51004dcb
A
1469 pBcpKey = pNextBcpKey;
1470 bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0;
1471 pBcpType = NULL;
1472 bcpTypeLen = 0;
1473 }
729e4ab9
A
1474 }
1475 }
1476
729e4ab9
A
1477 kwd = kwdFirst;
1478 while (kwd != NULL) {
1479 nextKwd = kwd->next;
1480 _addExtensionToList(appendTo, kwd, FALSE);
1481 kwd = nextKwd;
1482 }
1483
1484 return;
1485
1486cleanup:
51004dcb
A
1487 attr = attrFirst;
1488 while (attr != NULL) {
1489 nextAttr = attr->next;
1490 uprv_free(attr);
1491 attr = nextAttr;
1492 }
1493
729e4ab9
A
1494 kwd = kwdFirst;
1495 while (kwd != NULL) {
1496 nextKwd = kwd->next;
1497 uprv_free(kwd);
1498 kwd = nextKwd;
1499 }
1500}
1501
1502
1503static int32_t
1504_appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) {
1505 int32_t reslen = 0;
1506 int32_t i, n;
1507 int32_t len;
1508 ExtensionListEntry *kwdFirst = NULL;
1509 ExtensionListEntry *kwd;
1510 const char *key, *type;
4388f060
A
1511 char *kwdBuf = NULL;
1512 int32_t kwdBufLength = capacity;
729e4ab9
A
1513 UBool posixVariant = FALSE;
1514
1515 if (U_FAILURE(*status)) {
1516 return 0;
1517 }
1518
51004dcb 1519 kwdBuf = (char*)uprv_malloc(kwdBufLength);
4388f060
A
1520 if (kwdBuf == NULL) {
1521 *status = U_MEMORY_ALLOCATION_ERROR;
1522 return 0;
1523 }
1524
1525 /* Determine if variants already exists */
1526 if (ultag_getVariantsSize(langtag)) {
1527 posixVariant = TRUE;
1528 }
1529
729e4ab9
A
1530 n = ultag_getExtensionsSize(langtag);
1531
1532 /* resolve locale keywords and reordering keys */
1533 for (i = 0; i < n; i++) {
1534 key = ultag_getExtensionKey(langtag, i);
1535 type = ultag_getExtensionValue(langtag, i);
1536 if (*key == LDMLEXT) {
4388f060 1537 _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status);
729e4ab9
A
1538 if (U_FAILURE(*status)) {
1539 break;
1540 }
1541 } else {
51004dcb 1542 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
729e4ab9
A
1543 if (kwd == NULL) {
1544 *status = U_MEMORY_ALLOCATION_ERROR;
1545 break;
1546 }
1547 kwd->key = key;
1548 kwd->value = type;
1549 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1550 uprv_free(kwd);
1551 *status = U_ILLEGAL_ARGUMENT_ERROR;
1552 break;
1553 }
1554 }
1555 }
1556
1557 if (U_SUCCESS(*status)) {
1558 type = ultag_getPrivateUse(langtag);
1559 if ((int32_t)uprv_strlen(type) > 0) {
1560 /* add private use as a keyword */
51004dcb 1561 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
729e4ab9
A
1562 if (kwd == NULL) {
1563 *status = U_MEMORY_ALLOCATION_ERROR;
1564 } else {
1565 kwd->key = PRIVATEUSE_KEY;
1566 kwd->value = type;
1567 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1568 uprv_free(kwd);
1569 *status = U_ILLEGAL_ARGUMENT_ERROR;
1570 }
1571 }
1572 }
1573 }
1574
1575 /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
1576
1577 if (U_SUCCESS(*status) && posixVariant) {
1578 len = (int32_t) uprv_strlen(_POSIX);
1579 if (reslen < capacity) {
1580 uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen));
1581 }
1582 reslen += len;
1583 }
1584
51004dcb 1585 if (U_SUCCESS(*status) && kwdFirst != NULL) {
729e4ab9 1586 /* write out the sorted keywords */
4388f060 1587 UBool firstValue = TRUE;
729e4ab9 1588 kwd = kwdFirst;
4388f060 1589 do {
729e4ab9 1590 if (reslen < capacity) {
4388f060 1591 if (firstValue) {
729e4ab9
A
1592 /* '@' */
1593 *(appendAt + reslen) = LOCALE_EXT_SEP;
4388f060 1594 firstValue = FALSE;
51004dcb 1595 } else {
729e4ab9
A
1596 /* ';' */
1597 *(appendAt + reslen) = LOCALE_KEYWORD_SEP;
1598 }
1599 }
1600 reslen++;
1601
51004dcb
A
1602 /* key */
1603 len = (int32_t)uprv_strlen(kwd->key);
1604 if (reslen < capacity) {
1605 uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
1606 }
1607 reslen += len;
4388f060 1608
51004dcb
A
1609 /* '=' */
1610 if (reslen < capacity) {
1611 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
1612 }
1613 reslen++;
4388f060 1614
51004dcb
A
1615 /* type */
1616 len = (int32_t)uprv_strlen(kwd->value);
1617 if (reslen < capacity) {
1618 uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
4388f060 1619 }
51004dcb
A
1620 reslen += len;
1621
1622 kwd = kwd->next;
1623 } while (kwd);
729e4ab9
A
1624 }
1625
1626 /* clean up */
1627 kwd = kwdFirst;
1628 while (kwd != NULL) {
1629 ExtensionListEntry *tmpKwd = kwd->next;
1630 uprv_free(kwd);
1631 kwd = tmpKwd;
1632 }
1633
4388f060
A
1634 uprv_free(kwdBuf);
1635
729e4ab9
A
1636 if (U_FAILURE(*status)) {
1637 return 0;
1638 }
1639
1640 return u_terminateChars(appendAt, capacity, reslen, status);
1641}
1642
4388f060
A
1643static int32_t
1644_appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
f3c0d7a5 1645 (void)hadPosix;
4388f060
A
1646 char buf[ULOC_FULLNAME_CAPACITY];
1647 char tmpAppend[ULOC_FULLNAME_CAPACITY];
1648 UErrorCode tmpStatus = U_ZERO_ERROR;
1649 int32_t len, i;
1650 int32_t reslen = 0;
1651
1652 if (U_FAILURE(*status)) {
1653 return 0;
1654 }
1655
1656 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
1657 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1658 if (strict) {
1659 *status = U_ILLEGAL_ARGUMENT_ERROR;
1660 }
1661 return 0;
1662 }
1663
1664 if (len > 0) {
1665 char *p, *pPriv;
1666 UBool bNext = TRUE;
1667 UBool firstValue = TRUE;
1668 UBool writeValue;
1669
1670 pPriv = NULL;
1671 p = buf;
1672 while (bNext) {
1673 writeValue = FALSE;
1674 if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
1675 if (*p == 0) {
1676 bNext = FALSE;
1677 } else {
1678 *p = 0; /* terminate */
1679 }
1680 if (pPriv != NULL) {
1681 /* Private use in the canonical format is lowercase in BCP47 */
1682 for (i = 0; *(pPriv + i) != 0; i++) {
1683 *(pPriv + i) = uprv_tolower(*(pPriv + i));
1684 }
1685
1686 /* validate */
1687 if (_isPrivateuseValueSubtag(pPriv, -1)) {
1688 if (firstValue) {
1689 if (!_isVariantSubtag(pPriv, -1)) {
1690 writeValue = TRUE;
1691 }
1692 } else {
1693 writeValue = TRUE;
1694 }
1695 } else if (strict) {
1696 *status = U_ILLEGAL_ARGUMENT_ERROR;
1697 break;
1698 } else {
1699 break;
1700 }
1701
1702 if (writeValue) {
1703 if (reslen < capacity) {
1704 tmpAppend[reslen++] = SEP;
1705 }
1706
1707 if (firstValue) {
1708 if (reslen < capacity) {
1709 tmpAppend[reslen++] = *PRIVATEUSE_KEY;
1710 }
1711
1712 if (reslen < capacity) {
1713 tmpAppend[reslen++] = SEP;
1714 }
1715
1716 len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX);
1717 if (reslen < capacity) {
1718 uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen));
1719 }
1720 reslen += len;
1721
1722 if (reslen < capacity) {
1723 tmpAppend[reslen++] = SEP;
1724 }
1725
1726 firstValue = FALSE;
1727 }
1728
1729 len = (int32_t)uprv_strlen(pPriv);
1730 if (reslen < capacity) {
1731 uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen));
1732 }
1733 reslen += len;
1734 }
1735 }
1736 /* reset private use starting position */
1737 pPriv = NULL;
1738 } else if (pPriv == NULL) {
1739 pPriv = p;
1740 }
1741 p++;
1742 }
1743
1744 if (U_FAILURE(*status)) {
1745 return 0;
1746 }
1747 }
1748
1749 if (U_SUCCESS(*status)) {
1750 len = reslen;
1751 if (reslen < capacity) {
1752 uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen));
1753 }
1754 }
1755
1756 u_terminateChars(appendAt, capacity, reslen, status);
1757
1758 return reslen;
1759}
1760
729e4ab9
A
1761/*
1762* -------------------------------------------------
1763*
1764* ultag_ functions
1765*
1766* -------------------------------------------------
1767*/
1768
1769/* Bit flags used by the parser */
1770#define LANG 0x0001
1771#define EXTL 0x0002
1772#define SCRT 0x0004
1773#define REGN 0x0008
1774#define VART 0x0010
1775#define EXTS 0x0020
1776#define EXTV 0x0040
1777#define PRIV 0x0080
1778
f3c0d7a5
A
1779/**
1780 * Ticket #12705 - Visual Studio 2015 Update 3 contains a new code optimizer which has problems optimizing
1781 * this function. (See https://blogs.msdn.microsoft.com/vcblog/2016/05/04/new-code-optimizer/ )
1782 * As a workaround, we will turn off optimization just for this function on VS2015 Update 3 and above.
1783 */
1784#if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210))
1785#pragma optimize( "", off )
1786#endif
1787
729e4ab9
A
1788static ULanguageTag*
1789ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
1790 ULanguageTag *t;
1791 char *tagBuf;
1792 int16_t next;
1793 char *pSubtag, *pNext, *pLastGoodPosition;
1794 int32_t subtagLen;
1795 int32_t extlangIdx;
1796 ExtensionListEntry *pExtension;
1797 char *pExtValueSubtag, *pExtValueSubtagEnd;
1798 int32_t i;
51004dcb
A
1799 UBool privateuseVar = FALSE;
1800 int32_t grandfatheredLen = 0;
729e4ab9
A
1801
1802 if (parsedLen != NULL) {
1803 *parsedLen = 0;
1804 }
1805
1806 if (U_FAILURE(*status)) {
1807 return NULL;
1808 }
1809
1810 if (tagLen < 0) {
1811 tagLen = (int32_t)uprv_strlen(tag);
1812 }
1813
1814 /* copy the entire string */
1815 tagBuf = (char*)uprv_malloc(tagLen + 1);
1816 if (tagBuf == NULL) {
1817 *status = U_MEMORY_ALLOCATION_ERROR;
1818 return NULL;
1819 }
1820 uprv_memcpy(tagBuf, tag, tagLen);
1821 *(tagBuf + tagLen) = 0;
1822
1823 /* create a ULanguageTag */
1824 t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag));
729e4ab9
A
1825 if (t == NULL) {
1826 uprv_free(tagBuf);
1827 *status = U_MEMORY_ALLOCATION_ERROR;
1828 return NULL;
1829 }
4388f060
A
1830 _initializeULanguageTag(t);
1831 t->buf = tagBuf;
729e4ab9
A
1832
1833 if (tagLen < MINLEN) {
1834 /* the input tag is too short - return empty ULanguageTag */
1835 return t;
1836 }
1837
1838 /* check if the tag is grandfathered */
1839 for (i = 0; GRANDFATHERED[i] != NULL; i += 2) {
4388f060 1840 if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
51004dcb
A
1841 int32_t newTagLength;
1842
1843 grandfatheredLen = tagLen; /* back up for output parsedLen */
1844 newTagLength = uprv_strlen(GRANDFATHERED[i+1]);
4388f060
A
1845 if (tagLen < newTagLength) {
1846 uprv_free(tagBuf);
1847 tagBuf = (char*)uprv_malloc(newTagLength + 1);
1848 if (tagBuf == NULL) {
1849 *status = U_MEMORY_ALLOCATION_ERROR;
2ca993e8 1850 ultag_close(t);
4388f060
A
1851 return NULL;
1852 }
1853 t->buf = tagBuf;
1854 tagLen = newTagLength;
729e4ab9 1855 }
4388f060 1856 uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
4388f060 1857 break;
729e4ab9
A
1858 }
1859 }
1860
1861 /*
1862 * langtag = language
1863 * ["-" script]
1864 * ["-" region]
1865 * *("-" variant)
1866 * *("-" extension)
1867 * ["-" privateuse]
1868 */
1869
1870 next = LANG | PRIV;
1871 pNext = pLastGoodPosition = tagBuf;
1872 extlangIdx = 0;
1873 pExtension = NULL;
1874 pExtValueSubtag = NULL;
1875 pExtValueSubtagEnd = NULL;
729e4ab9
A
1876
1877 while (pNext) {
1878 char *pSep;
1879
1880 pSubtag = pNext;
1881
1882 /* locate next separator char */
1883 pSep = pSubtag;
1884 while (*pSep) {
1885 if (*pSep == SEP) {
1886 break;
1887 }
1888 pSep++;
1889 }
1890 if (*pSep == 0) {
1891 /* last subtag */
1892 pNext = NULL;
1893 } else {
1894 pNext = pSep + 1;
1895 }
1896 subtagLen = (int32_t)(pSep - pSubtag);
1897
1898 if (next & LANG) {
1899 if (_isLanguageSubtag(pSubtag, subtagLen)) {
1900 *pSep = 0; /* terminate */
1901 t->language = T_CString_toLowerCase(pSubtag);
1902
1903 pLastGoodPosition = pSep;
1904 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
1905 continue;
1906 }
1907 }
1908 if (next & EXTL) {
1909 if (_isExtlangSubtag(pSubtag, subtagLen)) {
1910 *pSep = 0;
1911 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
1912
1913 pLastGoodPosition = pSep;
1914 if (extlangIdx < 3) {
1915 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
1916 } else {
1917 next = SCRT | REGN | VART | EXTS | PRIV;
1918 }
1919 continue;
1920 }
1921 }
1922 if (next & SCRT) {
1923 if (_isScriptSubtag(pSubtag, subtagLen)) {
1924 char *p = pSubtag;
1925
1926 *pSep = 0;
1927
1928 /* to title case */
1929 *p = uprv_toupper(*p);
1930 p++;
1931 for (; *p; p++) {
1932 *p = uprv_tolower(*p);
1933 }
1934
1935 t->script = pSubtag;
1936
1937 pLastGoodPosition = pSep;
1938 next = REGN | VART | EXTS | PRIV;
1939 continue;
1940 }
1941 }
1942 if (next & REGN) {
1943 if (_isRegionSubtag(pSubtag, subtagLen)) {
1944 *pSep = 0;
1945 t->region = T_CString_toUpperCase(pSubtag);
1946
1947 pLastGoodPosition = pSep;
1948 next = VART | EXTS | PRIV;
1949 continue;
1950 }
1951 }
1952 if (next & VART) {
4388f060
A
1953 if (_isVariantSubtag(pSubtag, subtagLen) ||
1954 (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) {
729e4ab9
A
1955 VariantListEntry *var;
1956 UBool isAdded;
1957
1958 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
1959 if (var == NULL) {
1960 *status = U_MEMORY_ALLOCATION_ERROR;
1961 goto error;
1962 }
1963 *pSep = 0;
1964 var->variant = T_CString_toUpperCase(pSubtag);
1965 isAdded = _addVariantToList(&(t->variants), var);
1966 if (!isAdded) {
1967 /* duplicated variant entry */
1968 uprv_free(var);
1969 break;
1970 }
1971 pLastGoodPosition = pSep;
1972 next = VART | EXTS | PRIV;
1973 continue;
1974 }
1975 }
1976 if (next & EXTS) {
1977 if (_isExtensionSingleton(pSubtag, subtagLen)) {
1978 if (pExtension != NULL) {
1979 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
1980 /* the previous extension is incomplete */
1981 uprv_free(pExtension);
1982 pExtension = NULL;
1983 break;
1984 }
1985
1986 /* terminate the previous extension value */
1987 *pExtValueSubtagEnd = 0;
1988 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
1989
1990 /* insert the extension to the list */
1991 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
1992 pLastGoodPosition = pExtValueSubtagEnd;
1993 } else {
1994 /* stop parsing here */
1995 uprv_free(pExtension);
1996 pExtension = NULL;
1997 break;
1998 }
729e4ab9
A
1999 }
2000
729e4ab9 2001 /* create a new extension */
51004dcb 2002 pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
729e4ab9
A
2003 if (pExtension == NULL) {
2004 *status = U_MEMORY_ALLOCATION_ERROR;
2005 goto error;
2006 }
2007 *pSep = 0;
2008 pExtension->key = T_CString_toLowerCase(pSubtag);
2009 pExtension->value = NULL; /* will be set later */
2010
2011 /*
2012 * reset the start and the end location of extension value
2013 * subtags for this extension
2014 */
2015 pExtValueSubtag = NULL;
2016 pExtValueSubtagEnd = NULL;
2017
2018 next = EXTV;
2019 continue;
2020 }
2021 }
2022 if (next & EXTV) {
2023 if (_isExtensionSubtag(pSubtag, subtagLen)) {
51004dcb
A
2024 if (pExtValueSubtag == NULL) {
2025 /* if the start postion of this extension's value is not yet,
2026 this one is the first value subtag */
2027 pExtValueSubtag = pSubtag;
729e4ab9
A
2028 }
2029
51004dcb
A
2030 /* Mark the end of this subtag */
2031 pExtValueSubtagEnd = pSep;
2032 next = EXTS | EXTV | PRIV;
4388f060 2033
51004dcb 2034 continue;
729e4ab9
A
2035 }
2036 }
2037 if (next & PRIV) {
2038 if (uprv_tolower(*pSubtag) == PRIVATEUSE) {
2039 char *pPrivuseVal;
2040
2041 if (pExtension != NULL) {
2042 /* Process the last extension */
2043 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2044 /* the previous extension is incomplete */
2045 uprv_free(pExtension);
2046 pExtension = NULL;
2047 break;
2048 } else {
2049 /* terminate the previous extension value */
2050 *pExtValueSubtagEnd = 0;
2051 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2052
2053 /* insert the extension to the list */
2054 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2055 pLastGoodPosition = pExtValueSubtagEnd;
2056 pExtension = NULL;
2057 } else {
2058 /* stop parsing here */
2059 uprv_free(pExtension);
2060 pExtension = NULL;
2061 break;
2062 }
2063 }
2064 }
2065
2066 /* The rest of part will be private use value subtags */
2067 if (pNext == NULL) {
2068 /* empty private use subtag */
2069 break;
2070 }
2071 /* back up the private use value start position */
2072 pPrivuseVal = pNext;
2073
2074 /* validate private use value subtags */
2075 while (pNext) {
2076 pSubtag = pNext;
2077 pSep = pSubtag;
2078 while (*pSep) {
2079 if (*pSep == SEP) {
2080 break;
2081 }
2082 pSep++;
2083 }
2084 if (*pSep == 0) {
2085 /* last subtag */
2086 pNext = NULL;
2087 } else {
2088 pNext = pSep + 1;
2089 }
2090 subtagLen = (int32_t)(pSep - pSubtag);
2091
4388f060
A
2092 if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) {
2093 *pSep = 0;
2094 next = VART;
2095 privateuseVar = TRUE;
2096 break;
2097 } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
729e4ab9
A
2098 pLastGoodPosition = pSep;
2099 } else {
2100 break;
2101 }
2102 }
4388f060
A
2103
2104 if (next == VART) {
2105 continue;
2106 }
2107
729e4ab9
A
2108 if (pLastGoodPosition - pPrivuseVal > 0) {
2109 *pLastGoodPosition = 0;
2110 t->privateuse = T_CString_toLowerCase(pPrivuseVal);
2111 }
2112 /* No more subtags, exiting the parse loop */
2113 break;
2114 }
2115 break;
2116 }
4388f060 2117
729e4ab9
A
2118 /* If we fell through here, it means this subtag is illegal - quit parsing */
2119 break;
2120 }
2121
2122 if (pExtension != NULL) {
2123 /* Process the last extension */
2124 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2125 /* the previous extension is incomplete */
2126 uprv_free(pExtension);
2127 } else {
2128 /* terminate the previous extension value */
2129 *pExtValueSubtagEnd = 0;
2130 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2131 /* insert the extension to the list */
2132 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2133 pLastGoodPosition = pExtValueSubtagEnd;
2134 } else {
2135 uprv_free(pExtension);
2136 }
2137 }
2138 }
2139
2140 if (parsedLen != NULL) {
51004dcb 2141 *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf);
729e4ab9
A
2142 }
2143
2144 return t;
2145
2146error:
2ca993e8 2147 ultag_close(t);
729e4ab9
A
2148 return NULL;
2149}
2150
f3c0d7a5
A
2151/**
2152* Ticket #12705 - Turn optimization back on.
2153*/
2154#if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210))
2155#pragma optimize( "", on )
2156#endif
2157
729e4ab9
A
2158static void
2159ultag_close(ULanguageTag* langtag) {
2160
2161 if (langtag == NULL) {
2162 return;
2163 }
2164
2165 uprv_free(langtag->buf);
2166
2167 if (langtag->variants) {
2168 VariantListEntry *curVar = langtag->variants;
2169 while (curVar) {
2170 VariantListEntry *nextVar = curVar->next;
2171 uprv_free(curVar);
2172 curVar = nextVar;
2173 }
2174 }
2175
2176 if (langtag->extensions) {
2177 ExtensionListEntry *curExt = langtag->extensions;
2178 while (curExt) {
2179 ExtensionListEntry *nextExt = curExt->next;
2180 uprv_free(curExt);
2181 curExt = nextExt;
2182 }
2183 }
2184
2185 uprv_free(langtag);
2186}
2187
2188static const char*
2189ultag_getLanguage(const ULanguageTag* langtag) {
2190 return langtag->language;
2191}
2192
2193#if 0
2194static const char*
2195ultag_getJDKLanguage(const ULanguageTag* langtag) {
2196 int32_t i;
2197 for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
2198 if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
2199 return DEPRECATEDLANGS[i + 1];
2200 }
2201 }
2202 return langtag->language;
2203}
2204#endif
2205
2206static const char*
2207ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
2208 if (idx >= 0 && idx < MAXEXTLANG) {
2209 return langtag->extlang[idx];
2210 }
2211 return NULL;
2212}
2213
2214static int32_t
2215ultag_getExtlangSize(const ULanguageTag* langtag) {
2216 int32_t size = 0;
2217 int32_t i;
2218 for (i = 0; i < MAXEXTLANG; i++) {
2219 if (langtag->extlang[i]) {
2220 size++;
2221 }
2222 }
2223 return size;
2224}
2225
2226static const char*
2227ultag_getScript(const ULanguageTag* langtag) {
2228 return langtag->script;
2229}
2230
2231static const char*
2232ultag_getRegion(const ULanguageTag* langtag) {
2233 return langtag->region;
2234}
2235
2236static const char*
2237ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
2238 const char *var = NULL;
2239 VariantListEntry *cur = langtag->variants;
2240 int32_t i = 0;
2241 while (cur) {
2242 if (i == idx) {
2243 var = cur->variant;
2244 break;
2245 }
2246 cur = cur->next;
2247 i++;
2248 }
2249 return var;
2250}
2251
2252static int32_t
2253ultag_getVariantsSize(const ULanguageTag* langtag) {
2254 int32_t size = 0;
2255 VariantListEntry *cur = langtag->variants;
2256 while (TRUE) {
2257 if (cur == NULL) {
2258 break;
2259 }
2260 size++;
2261 cur = cur->next;
2262 }
2263 return size;
2264}
2265
2266static const char*
2267ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
2268 const char *key = NULL;
2269 ExtensionListEntry *cur = langtag->extensions;
2270 int32_t i = 0;
2271 while (cur) {
2272 if (i == idx) {
2273 key = cur->key;
2274 break;
2275 }
2276 cur = cur->next;
2277 i++;
2278 }
2279 return key;
2280}
2281
2282static const char*
2283ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
2284 const char *val = NULL;
2285 ExtensionListEntry *cur = langtag->extensions;
2286 int32_t i = 0;
2287 while (cur) {
2288 if (i == idx) {
2289 val = cur->value;
2290 break;
2291 }
2292 cur = cur->next;
2293 i++;
2294 }
2295 return val;
2296}
2297
2298static int32_t
2299ultag_getExtensionsSize(const ULanguageTag* langtag) {
2300 int32_t size = 0;
2301 ExtensionListEntry *cur = langtag->extensions;
2302 while (TRUE) {
2303 if (cur == NULL) {
2304 break;
2305 }
2306 size++;
2307 cur = cur->next;
2308 }
2309 return size;
2310}
2311
2312static const char*
2313ultag_getPrivateUse(const ULanguageTag* langtag) {
2314 return langtag->privateuse;
2315}
2316
2317#if 0
2318static const char*
2319ultag_getGrandfathered(const ULanguageTag* langtag) {
2320 return langtag->grandfathered;
2321}
2322#endif
2323
2324
2325/*
2326* -------------------------------------------------
2327*
2328* Locale/BCP47 conversion APIs, exposed as uloc_*
2329*
2330* -------------------------------------------------
2331*/
51004dcb 2332U_CAPI int32_t U_EXPORT2
729e4ab9
A
2333uloc_toLanguageTag(const char* localeID,
2334 char* langtag,
2335 int32_t langtagCapacity,
2336 UBool strict,
2337 UErrorCode* status) {
2338 /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
2339 char canonical[256];
2340 int32_t reslen = 0;
2341 UErrorCode tmpStatus = U_ZERO_ERROR;
2342 UBool hadPosix = FALSE;
2343 const char* pKeywordStart;
2344
2345 /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
2346 canonical[0] = 0;
2347 if (uprv_strlen(localeID) > 0) {
2348 uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
2349 if (tmpStatus != U_ZERO_ERROR) {
2350 *status = U_ILLEGAL_ARGUMENT_ERROR;
2351 return 0;
2352 }
2353 }
2354
2355 /* For handling special case - private use only tag */
2356 pKeywordStart = locale_getKeywordsStart(canonical);
2357 if (pKeywordStart == canonical) {
2358 UEnumeration *kwdEnum;
2359 int kwdCnt = 0;
2360 UBool done = FALSE;
2361
2362 kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus);
2363 if (kwdEnum != NULL) {
2364 kwdCnt = uenum_count(kwdEnum, &tmpStatus);
2365 if (kwdCnt == 1) {
2366 const char *key;
2367 int32_t len = 0;
2368
2369 key = uenum_next(kwdEnum, &len, &tmpStatus);
2370 if (len == 1 && *key == PRIVATEUSE) {
2371 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
2372 buf[0] = PRIVATEUSE;
2373 buf[1] = SEP;
2374 len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus);
2375 if (U_SUCCESS(tmpStatus)) {
2376 if (_isPrivateuseValueSubtags(&buf[2], len)) {
2377 /* return private use only tag */
2378 reslen = len + 2;
2379 uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity));
2380 u_terminateChars(langtag, langtagCapacity, reslen, status);
2381 done = TRUE;
2382 } else if (strict) {
2383 *status = U_ILLEGAL_ARGUMENT_ERROR;
2384 done = TRUE;
2385 }
2386 /* if not strict mode, then "und" will be returned */
2387 } else {
2388 *status = U_ILLEGAL_ARGUMENT_ERROR;
2389 done = TRUE;
2390 }
2391 }
2392 }
2393 uenum_close(kwdEnum);
2394 if (done) {
2395 return reslen;
2396 }
2397 }
2398 }
2399
2400 reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
2401 reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
2402 reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
2403 reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
2404 reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
4388f060 2405 reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
729e4ab9
A
2406
2407 return reslen;
2408}
2409
2410
51004dcb 2411U_CAPI int32_t U_EXPORT2
729e4ab9
A
2412uloc_forLanguageTag(const char* langtag,
2413 char* localeID,
2414 int32_t localeIDCapacity,
2415 int32_t* parsedLength,
2416 UErrorCode* status) {
2417 ULanguageTag *lt;
2418 int32_t reslen = 0;
2419 const char *subtag, *p;
2420 int32_t len;
51004dcb 2421 int32_t i, n;
729e4ab9
A
2422 UBool noRegion = TRUE;
2423
2424 lt = ultag_parse(langtag, -1, parsedLength, status);
2425 if (U_FAILURE(*status)) {
2426 return 0;
2427 }
2428
2429 /* language */
2430 subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt);
2431 if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
2432 len = (int32_t)uprv_strlen(subtag);
2433 if (len > 0) {
2434 if (reslen < localeIDCapacity) {
2435 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen));
2436 }
2437 reslen += len;
2438 }
2439 }
2440
2441 /* script */
2442 subtag = ultag_getScript(lt);
2443 len = (int32_t)uprv_strlen(subtag);
2444 if (len > 0) {
2445 if (reslen < localeIDCapacity) {
2446 *(localeID + reslen) = LOCALE_SEP;
2447 }
2448 reslen++;
2449
2450 /* write out the script in title case */
2451 p = subtag;
2452 while (*p) {
2453 if (reslen < localeIDCapacity) {
2454 if (p == subtag) {
2455 *(localeID + reslen) = uprv_toupper(*p);
2456 } else {
2457 *(localeID + reslen) = *p;
2458 }
2459 }
2460 reslen++;
2461 p++;
2462 }
2463 }
2464
2465 /* region */
2466 subtag = ultag_getRegion(lt);
2467 len = (int32_t)uprv_strlen(subtag);
2468 if (len > 0) {
2469 if (reslen < localeIDCapacity) {
2470 *(localeID + reslen) = LOCALE_SEP;
2471 }
2472 reslen++;
2473 /* write out the retion in upper case */
2474 p = subtag;
2475 while (*p) {
2476 if (reslen < localeIDCapacity) {
2477 *(localeID + reslen) = uprv_toupper(*p);
2478 }
2479 reslen++;
2480 p++;
2481 }
2482 noRegion = FALSE;
2483 }
2484
2485 /* variants */
2486 n = ultag_getVariantsSize(lt);
2487 if (n > 0) {
2488 if (noRegion) {
2489 if (reslen < localeIDCapacity) {
2490 *(localeID + reslen) = LOCALE_SEP;
2491 }
2492 reslen++;
2493 }
2494
2495 for (i = 0; i < n; i++) {
2496 subtag = ultag_getVariant(lt, i);
2497 if (reslen < localeIDCapacity) {
2498 *(localeID + reslen) = LOCALE_SEP;
2499 }
2500 reslen++;
2501 /* write out the variant in upper case */
2502 p = subtag;
2503 while (*p) {
2504 if (reslen < localeIDCapacity) {
2505 *(localeID + reslen) = uprv_toupper(*p);
2506 }
2507 reslen++;
2508 p++;
2509 }
2510 }
2511 }
2512
2513 /* keywords */
2514 n = ultag_getExtensionsSize(lt);
2515 subtag = ultag_getPrivateUse(lt);
51004dcb
A
2516 if (n > 0 || uprv_strlen(subtag) > 0) {
2517 if (reslen == 0 && n > 0) {
729e4ab9
A
2518 /* need a language */
2519 if (reslen < localeIDCapacity) {
2520 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
2521 }
2522 reslen += LANG_UND_LEN;
2523 }
2524 len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
2525 reslen += len;
2526 }
2527
2528 ultag_close(lt);
2529 return u_terminateChars(localeID, localeIDCapacity, reslen, status);
2530}
2531
2532