]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/uloc_tag.c
ICU-491.11.1.tar.gz
[apple/icu.git] / icuSources / common / uloc_tag.c
1 /*
2 **********************************************************************
3 * Copyright (C) 2009-2012, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 */
7
8 #include "unicode/utypes.h"
9 #include "unicode/ures.h"
10 #include "unicode/putil.h"
11 #include "unicode/uloc.h"
12 #include "ustr_imp.h"
13 #include "cmemory.h"
14 #include "cstring.h"
15 #include "putilimp.h"
16 #include "uinvchar.h"
17 #include "ulocimp.h"
18
19 /* struct holding a single variant */
20 typedef struct VariantListEntry {
21 const char *variant;
22 struct VariantListEntry *next;
23 } VariantListEntry;
24
25 /* struct holding a single attribute value */
26 typedef struct AttributeListEntry {
27 const char *attribute;
28 struct AttributeListEntry *next;
29 } AttributeListEntry;
30
31 /* struct holding a single extension */
32 typedef struct ExtensionListEntry {
33 const char *key;
34 const char *value;
35 struct ExtensionListEntry *next;
36 } ExtensionListEntry;
37
38 #define MAXEXTLANG 3
39 typedef struct ULanguageTag {
40 char *buf; /* holding parsed subtags */
41 const char *language;
42 const char *extlang[MAXEXTLANG];
43 const char *script;
44 const char *region;
45 VariantListEntry *variants;
46 ExtensionListEntry *extensions;
47 AttributeListEntry *attributes;
48 const char *privateuse;
49 const char *grandfathered;
50 } ULanguageTag;
51
52 #define MINLEN 2
53 #define SEP '-'
54 #define PRIVATEUSE 'x'
55 #define LDMLEXT 'u'
56
57 #define LOCALE_SEP '_'
58 #define LOCALE_EXT_SEP '@'
59 #define LOCALE_KEYWORD_SEP ';'
60 #define LOCALE_KEY_TYPE_SEP '='
61
62 #define ISALPHA(c) uprv_isASCIILetter(c)
63 #define ISNUMERIC(c) ((c)>='0' && (c)<='9')
64
65 static const char* EMPTY = "";
66 static const char* LANG_UND = "und";
67 static const char* PRIVATEUSE_KEY = "x";
68 static const char* _POSIX = "_POSIX";
69 static const char* POSIX_KEY = "va";
70 static const char* POSIX_VALUE = "posix";
71 static const char* LOCALE_ATTRIBUTE_KEY = "attribute";
72 static const char* PRIVUSE_VARIANT_PREFIX = "lvariant";
73
74 #define LANG_UND_LEN 3
75
76 static const char* GRANDFATHERED[] = {
77 /* grandfathered preferred */
78 "art-lojban", "jbo",
79 "cel-gaulish", "xtg-x-cel-gaulish",
80 "en-GB-oed", "en-GB-x-oed",
81 "i-ami", "ami",
82 "i-bnn", "bnn",
83 "i-default", "en-x-i-default",
84 "i-enochian", "und-x-i-enochian",
85 "i-hak", "hak",
86 "i-klingon", "tlh",
87 "i-lux", "lb",
88 "i-mingo", "see-x-i-mingo",
89 "i-navajo", "nv",
90 "i-pwn", "pwn",
91 "i-tao", "tao",
92 "i-tay", "tay",
93 "i-tsu", "tsu",
94 "no-bok", "nb",
95 "no-nyn", "nn",
96 "sgn-be-fr", "sfb",
97 "sgn-be-nl", "vgt",
98 "sgn-ch-de", "sgg",
99 "zh-guoyu", "cmn",
100 "zh-hakka", "hak",
101 "zh-min", "nan-x-zh-min",
102 "zh-min-nan", "nan",
103 "zh-xiang", "hsn",
104 NULL, NULL
105 };
106
107 static const char* DEPRECATEDLANGS[] = {
108 /* deprecated new */
109 "iw", "he",
110 "ji", "yi",
111 "in", "id",
112 NULL, NULL
113 };
114
115 /*
116 * -------------------------------------------------
117 *
118 * These ultag_ functions may be exposed as APIs later
119 *
120 * -------------------------------------------------
121 */
122
123 static ULanguageTag*
124 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
125
126 static void
127 ultag_close(ULanguageTag* langtag);
128
129 static const char*
130 ultag_getLanguage(const ULanguageTag* langtag);
131
132 #if 0
133 static const char*
134 ultag_getJDKLanguage(const ULanguageTag* langtag);
135 #endif
136
137 static const char*
138 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
139
140 static int32_t
141 ultag_getExtlangSize(const ULanguageTag* langtag);
142
143 static const char*
144 ultag_getScript(const ULanguageTag* langtag);
145
146 static const char*
147 ultag_getRegion(const ULanguageTag* langtag);
148
149 static const char*
150 ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
151
152 static int32_t
153 ultag_getVariantsSize(const ULanguageTag* langtag);
154
155 #if 0
156 /* Currently not being used. */
157 static const char*
158 ultag_getAttribute(const ULanguageTag* langtag, int32_t idx);
159 #endif
160
161 static int32_t
162 ultag_getAttributesSize(const ULanguageTag* langtag);
163
164 static const char*
165 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
166
167 static const char*
168 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
169
170 static int32_t
171 ultag_getExtensionsSize(const ULanguageTag* langtag);
172
173 static const char*
174 ultag_getPrivateUse(const ULanguageTag* langtag);
175
176 #if 0
177 static const char*
178 ultag_getGrandfathered(const ULanguageTag* langtag);
179 #endif
180
181 /*
182 * -------------------------------------------------
183 *
184 * Language subtag syntax validation functions
185 *
186 * -------------------------------------------------
187 */
188
189 static UBool
190 _isAlphaString(const char* s, int32_t len) {
191 int32_t i;
192 for (i = 0; i < len; i++) {
193 if (!ISALPHA(*(s + i))) {
194 return FALSE;
195 }
196 }
197 return TRUE;
198 }
199
200 static UBool
201 _isNumericString(const char* s, int32_t len) {
202 int32_t i;
203 for (i = 0; i < len; i++) {
204 if (!ISNUMERIC(*(s + i))) {
205 return FALSE;
206 }
207 }
208 return TRUE;
209 }
210
211 static UBool
212 _isAlphaNumericString(const char* s, int32_t len) {
213 int32_t i;
214 for (i = 0; i < len; i++) {
215 if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
216 return FALSE;
217 }
218 }
219 return TRUE;
220 }
221
222 static UBool
223 _isLanguageSubtag(const char* s, int32_t len) {
224 /*
225 * language = 2*3ALPHA ; shortest ISO 639 code
226 * ["-" extlang] ; sometimes followed by
227 * ; extended language subtags
228 * / 4ALPHA ; or reserved for future use
229 * / 5*8ALPHA ; or registered language subtag
230 */
231 if (len < 0) {
232 len = (int32_t)uprv_strlen(s);
233 }
234 if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
235 return TRUE;
236 }
237 return FALSE;
238 }
239
240 static UBool
241 _isExtlangSubtag(const char* s, int32_t len) {
242 /*
243 * extlang = 3ALPHA ; selected ISO 639 codes
244 * *2("-" 3ALPHA) ; permanently reserved
245 */
246 if (len < 0) {
247 len = (int32_t)uprv_strlen(s);
248 }
249 if (len == 3 && _isAlphaString(s, len)) {
250 return TRUE;
251 }
252 return FALSE;
253 }
254
255 static UBool
256 _isScriptSubtag(const char* s, int32_t len) {
257 /*
258 * script = 4ALPHA ; ISO 15924 code
259 */
260 if (len < 0) {
261 len = (int32_t)uprv_strlen(s);
262 }
263 if (len == 4 && _isAlphaString(s, len)) {
264 return TRUE;
265 }
266 return FALSE;
267 }
268
269 static UBool
270 _isRegionSubtag(const char* s, int32_t len) {
271 /*
272 * region = 2ALPHA ; ISO 3166-1 code
273 * / 3DIGIT ; UN M.49 code
274 */
275 if (len < 0) {
276 len = (int32_t)uprv_strlen(s);
277 }
278 if (len == 2 && _isAlphaString(s, len)) {
279 return TRUE;
280 }
281 if (len == 3 && _isNumericString(s, len)) {
282 return TRUE;
283 }
284 return FALSE;
285 }
286
287 static UBool
288 _isVariantSubtag(const char* s, int32_t len) {
289 /*
290 * variant = 5*8alphanum ; registered variants
291 * / (DIGIT 3alphanum)
292 */
293 if (len < 0) {
294 len = (int32_t)uprv_strlen(s);
295 }
296 if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) {
297 return TRUE;
298 }
299 if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
300 return TRUE;
301 }
302 return FALSE;
303 }
304
305 static UBool
306 _isPrivateuseVariantSubtag(const char* s, int32_t len) {
307 /*
308 * variant = 1*8alphanum ; registered variants
309 * / (DIGIT 3alphanum)
310 */
311 if (len < 0) {
312 len = (int32_t)uprv_strlen(s);
313 }
314 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
315 return TRUE;
316 }
317 return FALSE;
318 }
319
320 static UBool
321 _isAttributeSubtag(const char* s, int32_t len) {
322 /*
323 * attribute = 3*8alphanum
324 */
325 if (len < 0) {
326 len = (int32_t)uprv_strlen(s);
327 }
328 if (len >= 3 && len <= 8 && _isAlphaNumericString(s, len)) {
329 return TRUE;
330 }
331 return FALSE;
332 }
333
334 static UBool
335 _isExtensionSingleton(const char* s, int32_t len) {
336 /*
337 * extension = singleton 1*("-" (2*8alphanum))
338 */
339 if (len < 0) {
340 len = (int32_t)uprv_strlen(s);
341 }
342 if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) {
343 return TRUE;
344 }
345 return FALSE;
346 }
347
348 static UBool
349 _isExtensionSubtag(const char* s, int32_t len) {
350 /*
351 * extension = singleton 1*("-" (2*8alphanum))
352 */
353 if (len < 0) {
354 len = (int32_t)uprv_strlen(s);
355 }
356 if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) {
357 return TRUE;
358 }
359 return FALSE;
360 }
361
362 static UBool
363 _isExtensionSubtags(const char* s, int32_t len) {
364 const char *p = s;
365 const char *pSubtag = NULL;
366
367 if (len < 0) {
368 len = (int32_t)uprv_strlen(s);
369 }
370
371 while ((p - s) < len) {
372 if (*p == SEP) {
373 if (pSubtag == NULL) {
374 return FALSE;
375 }
376 if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) {
377 return FALSE;
378 }
379 pSubtag = NULL;
380 } else if (pSubtag == NULL) {
381 pSubtag = p;
382 }
383 p++;
384 }
385 if (pSubtag == NULL) {
386 return FALSE;
387 }
388 return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag));
389 }
390
391 static UBool
392 _isPrivateuseValueSubtag(const char* s, int32_t len) {
393 /*
394 * privateuse = "x" 1*("-" (1*8alphanum))
395 */
396 if (len < 0) {
397 len = (int32_t)uprv_strlen(s);
398 }
399 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
400 return TRUE;
401 }
402 return FALSE;
403 }
404
405 static UBool
406 _isPrivateuseValueSubtags(const char* s, int32_t len) {
407 const char *p = s;
408 const char *pSubtag = NULL;
409
410 if (len < 0) {
411 len = (int32_t)uprv_strlen(s);
412 }
413
414 while ((p - s) < len) {
415 if (*p == SEP) {
416 if (pSubtag == NULL) {
417 return FALSE;
418 }
419 if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) {
420 return FALSE;
421 }
422 pSubtag = NULL;
423 } else if (pSubtag == NULL) {
424 pSubtag = p;
425 }
426 p++;
427 }
428 if (pSubtag == NULL) {
429 return FALSE;
430 }
431 return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
432 }
433
434 static UBool
435 _isLDMLKey(const char* s, int32_t len) {
436 if (len < 0) {
437 len = (int32_t)uprv_strlen(s);
438 }
439 if (len == 2 && _isAlphaNumericString(s, len)) {
440 return TRUE;
441 }
442 return FALSE;
443 }
444
445 static UBool
446 _isLDMLType(const char* s, int32_t len) {
447 if (len < 0) {
448 len = (int32_t)uprv_strlen(s);
449 }
450 if (len >= 3 && len <= 8 && _isAlphaNumericString(s, len)) {
451 return TRUE;
452 }
453 return FALSE;
454 }
455
456 /*
457 * -------------------------------------------------
458 *
459 * Helper functions
460 *
461 * -------------------------------------------------
462 */
463
464 static UBool
465 _addVariantToList(VariantListEntry **first, VariantListEntry *var) {
466 UBool bAdded = TRUE;
467
468 if (*first == NULL) {
469 var->next = NULL;
470 *first = var;
471 } else {
472 VariantListEntry *prev, *cur;
473 int32_t cmp;
474
475 /* variants order should be preserved */
476 prev = NULL;
477 cur = *first;
478 while (TRUE) {
479 if (cur == NULL) {
480 prev->next = var;
481 var->next = NULL;
482 break;
483 }
484
485 /* Checking for duplicate variant */
486 cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
487 if (cmp == 0) {
488 /* duplicated variant */
489 bAdded = FALSE;
490 break;
491 }
492 prev = cur;
493 cur = cur->next;
494 }
495 }
496
497 return bAdded;
498 }
499
500 static UBool
501 _addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {
502 UBool bAdded = TRUE;
503
504 if (*first == NULL) {
505 attr->next = NULL;
506 *first = attr;
507 } else {
508 AttributeListEntry *prev, *cur;
509 int32_t cmp;
510
511 /* reorder variants in alphabetical order */
512 prev = NULL;
513 cur = *first;
514 while (TRUE) {
515 if (cur == NULL) {
516 prev->next = attr;
517 attr->next = NULL;
518 break;
519 }
520 cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute);
521 if (cmp < 0) {
522 if (prev == NULL) {
523 *first = attr;
524 } else {
525 prev->next = attr;
526 }
527 attr->next = cur;
528 break;
529 }
530 if (cmp == 0) {
531 /* duplicated variant */
532 bAdded = FALSE;
533 break;
534 }
535 prev = cur;
536 cur = cur->next;
537 }
538 }
539
540 return bAdded;
541 }
542
543
544 static UBool
545 _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
546 UBool bAdded = TRUE;
547
548 if (*first == NULL) {
549 ext->next = NULL;
550 *first = ext;
551 } else {
552 ExtensionListEntry *prev, *cur;
553 int32_t cmp;
554
555 /* reorder variants in alphabetical order */
556 prev = NULL;
557 cur = *first;
558 while (TRUE) {
559 if (cur == NULL) {
560 prev->next = ext;
561 ext->next = NULL;
562 break;
563 }
564 if (localeToBCP) {
565 /* special handling for locale to bcp conversion */
566 int32_t len, curlen;
567
568 len = (int32_t)uprv_strlen(ext->key);
569 curlen = (int32_t)uprv_strlen(cur->key);
570
571 if (len == 1 && curlen == 1) {
572 if (*(ext->key) == *(cur->key)) {
573 cmp = 0;
574 } else if (*(ext->key) == PRIVATEUSE) {
575 cmp = 1;
576 } else if (*(cur->key) == PRIVATEUSE) {
577 cmp = -1;
578 } else {
579 cmp = *(ext->key) - *(cur->key);
580 }
581 } else if (len == 1) {
582 cmp = *(ext->key) - LDMLEXT;
583 } else if (curlen == 1) {
584 cmp = LDMLEXT - *(cur->key);
585 } else {
586 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
587 }
588 } else {
589 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
590 }
591 if (cmp < 0) {
592 if (prev == NULL) {
593 *first = ext;
594 } else {
595 prev->next = ext;
596 }
597 ext->next = cur;
598 break;
599 }
600 if (cmp == 0) {
601 /* duplicated extension key */
602 bAdded = FALSE;
603 break;
604 }
605 prev = cur;
606 cur = cur->next;
607 }
608 }
609
610 return bAdded;
611 }
612
613 static void
614 _initializeULanguageTag(ULanguageTag* langtag) {
615 int32_t i;
616
617 langtag->buf = NULL;
618
619 langtag->language = EMPTY;
620 for (i = 0; i < MAXEXTLANG; i++) {
621 langtag->extlang[i] = NULL;
622 }
623
624 langtag->script = EMPTY;
625 langtag->region = EMPTY;
626
627 langtag->variants = NULL;
628 langtag->extensions = NULL;
629
630 langtag->attributes = NULL;
631
632 langtag->grandfathered = EMPTY;
633 langtag->privateuse = EMPTY;
634 }
635
636 #define KEYTYPEDATA "keyTypeData"
637 #define KEYMAP "keyMap"
638 #define TYPEMAP "typeMap"
639 #define TYPEALIAS "typeAlias"
640 #define MAX_BCP47_SUBTAG_LEN 9 /* including null terminator */
641 #define MAX_LDML_KEY_LEN 22
642 #define MAX_LDML_TYPE_LEN 32
643
644 static int32_t
645 _ldmlKeyToBCP47(const char* key, int32_t keyLen,
646 char* bcpKey, int32_t bcpKeyCapacity,
647 UErrorCode *status) {
648 UResourceBundle *rb;
649 char keyBuf[MAX_LDML_KEY_LEN];
650 char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
651 int32_t resultLen = 0;
652 int32_t i;
653 UErrorCode tmpStatus = U_ZERO_ERROR;
654 const UChar *uBcpKey;
655 int32_t bcpKeyLen;
656
657 if (keyLen < 0) {
658 keyLen = (int32_t)uprv_strlen(key);
659 }
660
661 if (keyLen >= sizeof(keyBuf)) {
662 /* no known valid LDML key exceeding 21 */
663 *status = U_ILLEGAL_ARGUMENT_ERROR;
664 return 0;
665 }
666
667 uprv_memcpy(keyBuf, key, keyLen);
668 keyBuf[keyLen] = 0;
669
670 /* to lower case */
671 for (i = 0; i < keyLen; i++) {
672 keyBuf[i] = uprv_tolower(keyBuf[i]);
673 }
674
675 rb = ures_openDirect(NULL, KEYTYPEDATA, status);
676 ures_getByKey(rb, KEYMAP, rb, status);
677
678 if (U_FAILURE(*status)) {
679 ures_close(rb);
680 return 0;
681 }
682
683 uBcpKey = ures_getStringByKey(rb, keyBuf, &bcpKeyLen, &tmpStatus);
684 if (U_SUCCESS(tmpStatus)) {
685 u_UCharsToChars(uBcpKey, bcpKeyBuf, bcpKeyLen);
686 bcpKeyBuf[bcpKeyLen] = 0;
687 resultLen = bcpKeyLen;
688 } else {
689 if (_isLDMLKey(key, keyLen)) {
690 uprv_memcpy(bcpKeyBuf, key, keyLen);
691 bcpKeyBuf[keyLen] = 0;
692 resultLen = keyLen;
693 } else {
694 /* mapping not availabe */
695 *status = U_ILLEGAL_ARGUMENT_ERROR;
696 }
697 }
698 ures_close(rb);
699
700 if (U_FAILURE(*status)) {
701 return 0;
702 }
703
704 uprv_memcpy(bcpKey, bcpKeyBuf, uprv_min(resultLen, bcpKeyCapacity));
705 return u_terminateChars(bcpKey, bcpKeyCapacity, resultLen, status);
706 }
707
708 static int32_t
709 _bcp47ToLDMLKey(const char* bcpKey, int32_t bcpKeyLen,
710 char* key, int32_t keyCapacity,
711 UErrorCode *status) {
712 UResourceBundle *rb;
713 char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
714 int32_t resultLen = 0;
715 int32_t i;
716 const char *resKey = NULL;
717 UResourceBundle *mapData;
718
719 if (bcpKeyLen < 0) {
720 bcpKeyLen = (int32_t)uprv_strlen(bcpKey);
721 }
722
723 if (bcpKeyLen >= sizeof(bcpKeyBuf)) {
724 *status = U_ILLEGAL_ARGUMENT_ERROR;
725 return 0;
726 }
727
728 uprv_memcpy(bcpKeyBuf, bcpKey, bcpKeyLen);
729 bcpKeyBuf[bcpKeyLen] = 0;
730
731 /* to lower case */
732 for (i = 0; i < bcpKeyLen; i++) {
733 bcpKeyBuf[i] = uprv_tolower(bcpKeyBuf[i]);
734 }
735
736 rb = ures_openDirect(NULL, KEYTYPEDATA, status);
737 ures_getByKey(rb, KEYMAP, rb, status);
738 if (U_FAILURE(*status)) {
739 ures_close(rb);
740 return 0;
741 }
742
743 mapData = ures_getNextResource(rb, NULL, status);
744 while (U_SUCCESS(*status)) {
745 const UChar *uBcpKey;
746 char tmpBcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
747 int32_t tmpBcpKeyLen;
748
749 uBcpKey = ures_getString(mapData, &tmpBcpKeyLen, status);
750 if (U_FAILURE(*status)) {
751 break;
752 }
753 u_UCharsToChars(uBcpKey, tmpBcpKeyBuf, tmpBcpKeyLen);
754 tmpBcpKeyBuf[tmpBcpKeyLen] = 0;
755 if (uprv_compareInvCharsAsAscii(bcpKeyBuf, tmpBcpKeyBuf) == 0) {
756 /* found a matching BCP47 key */
757 resKey = ures_getKey(mapData);
758 resultLen = (int32_t)uprv_strlen(resKey);
759 break;
760 }
761 if (!ures_hasNext(rb)) {
762 break;
763 }
764 ures_getNextResource(rb, mapData, status);
765 }
766 ures_close(mapData);
767 ures_close(rb);
768
769 if (U_FAILURE(*status)) {
770 return 0;
771 }
772
773 if (resKey == NULL) {
774 resKey = bcpKeyBuf;
775 resultLen = bcpKeyLen;
776 }
777
778 uprv_memcpy(key, resKey, uprv_min(resultLen, keyCapacity));
779 return u_terminateChars(key, keyCapacity, resultLen, status);
780 }
781
782 static int32_t
783 _ldmlTypeToBCP47(const char* key, int32_t keyLen,
784 const char* type, int32_t typeLen,
785 char* bcpType, int32_t bcpTypeCapacity,
786 UErrorCode *status) {
787 UResourceBundle *rb, *keyTypeData, *typeMapForKey;
788 char keyBuf[MAX_LDML_KEY_LEN];
789 char typeBuf[MAX_LDML_TYPE_LEN];
790 char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
791 int32_t resultLen = 0;
792 int32_t i;
793 UErrorCode tmpStatus = U_ZERO_ERROR;
794 const UChar *uBcpType, *uCanonicalType;
795 int32_t bcpTypeLen, canonicalTypeLen;
796 UBool isTimezone = FALSE;
797
798 if (keyLen < 0) {
799 keyLen = (int32_t)uprv_strlen(key);
800 }
801 if (keyLen >= sizeof(keyBuf)) {
802 /* no known valid LDML key exceeding 21 */
803 *status = U_ILLEGAL_ARGUMENT_ERROR;
804 return 0;
805 }
806 uprv_memcpy(keyBuf, key, keyLen);
807 keyBuf[keyLen] = 0;
808
809 /* to lower case */
810 for (i = 0; i < keyLen; i++) {
811 keyBuf[i] = uprv_tolower(keyBuf[i]);
812 }
813 if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
814 isTimezone = TRUE;
815 }
816
817 if (typeLen < 0) {
818 typeLen = (int32_t)uprv_strlen(type);
819 }
820 if (typeLen >= sizeof(typeBuf)) {
821 *status = U_ILLEGAL_ARGUMENT_ERROR;
822 return 0;
823 }
824
825 if (isTimezone) {
826 /* replace '/' with ':' */
827 for (i = 0; i < typeLen; i++) {
828 if (*(type + i) == '/') {
829 typeBuf[i] = ':';
830 } else {
831 typeBuf[i] = *(type + i);
832 }
833 }
834 typeBuf[typeLen] = 0;
835 type = &typeBuf[0];
836 }
837
838 keyTypeData = ures_openDirect(NULL, KEYTYPEDATA, status);
839 rb = ures_getByKey(keyTypeData, TYPEMAP, NULL, status);
840 if (U_FAILURE(*status)) {
841 ures_close(rb);
842 ures_close(keyTypeData);
843 return 0;
844 }
845
846 typeMapForKey = ures_getByKey(rb, keyBuf, NULL, &tmpStatus);
847 uBcpType = ures_getStringByKey(typeMapForKey, type, &bcpTypeLen, &tmpStatus);
848 if (U_SUCCESS(tmpStatus)) {
849 u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
850 resultLen = bcpTypeLen;
851 } else if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
852 /* is this type alias? */
853 tmpStatus = U_ZERO_ERROR;
854 ures_getByKey(keyTypeData, TYPEALIAS, rb, &tmpStatus);
855 ures_getByKey(rb, keyBuf, rb, &tmpStatus);
856 uCanonicalType = ures_getStringByKey(rb, type, &canonicalTypeLen, &tmpStatus);
857 if (U_SUCCESS(tmpStatus)) {
858 u_UCharsToChars(uCanonicalType, typeBuf, canonicalTypeLen);
859 if (isTimezone) {
860 /* replace '/' with ':' */
861 for (i = 0; i < canonicalTypeLen; i++) {
862 if (typeBuf[i] == '/') {
863 typeBuf[i] = ':';
864 }
865 }
866 }
867 typeBuf[canonicalTypeLen] = 0;
868
869 /* look up the canonical type */
870 uBcpType = ures_getStringByKey(typeMapForKey, typeBuf, &bcpTypeLen, &tmpStatus);
871 if (U_SUCCESS(tmpStatus)) {
872 u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
873 resultLen = bcpTypeLen;
874 }
875 }
876 if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
877 if (_isLDMLType(type, typeLen)) {
878 uprv_memcpy(bcpTypeBuf, type, typeLen);
879 resultLen = typeLen;
880 } else {
881 /* mapping not availabe */
882 *status = U_ILLEGAL_ARGUMENT_ERROR;
883 }
884 }
885 } else {
886 *status = tmpStatus;
887 }
888 ures_close(rb);
889 ures_close(typeMapForKey);
890 ures_close(keyTypeData);
891
892 if (U_FAILURE(*status)) {
893 return 0;
894 }
895
896 uprv_memcpy(bcpType, bcpTypeBuf, uprv_min(resultLen, bcpTypeCapacity));
897 return u_terminateChars(bcpType, bcpTypeCapacity, resultLen, status);
898 }
899
900 static int32_t
901 _bcp47ToLDMLType(const char* key, int32_t keyLen,
902 const char* bcpType, int32_t bcpTypeLen,
903 char* type, int32_t typeCapacity,
904 UErrorCode *status) {
905 UResourceBundle *rb;
906 char keyBuf[MAX_LDML_KEY_LEN];
907 char bcpTypeBuf[ULOC_KEYWORDS_CAPACITY]; /* ensure buffter is large enough for multiple values (e.g. buddhist-greg) */
908 int32_t resultLen = 0;
909 int32_t i, typeSize;
910 const char *resType = NULL;
911 UResourceBundle *mapData;
912 UErrorCode tmpStatus = U_ZERO_ERROR;
913 int32_t copyLen;
914
915 if (keyLen < 0) {
916 keyLen = (int32_t)uprv_strlen(key);
917 }
918
919 if (keyLen >= sizeof(keyBuf)) {
920 /* no known valid LDML key exceeding 21 */
921 *status = U_ILLEGAL_ARGUMENT_ERROR;
922 return 0;
923 }
924 uprv_memcpy(keyBuf, key, keyLen);
925 keyBuf[keyLen] = 0;
926
927 /* to lower case */
928 for (i = 0; i < keyLen; i++) {
929 keyBuf[i] = uprv_tolower(keyBuf[i]);
930 }
931
932
933 if (bcpTypeLen < 0) {
934 bcpTypeLen = (int32_t)uprv_strlen(bcpType);
935 }
936
937 typeSize = 0;
938 for (i = 0; i < bcpTypeLen; i++) {
939 if (bcpType[i] == SEP) {
940 if (typeSize >= MAX_BCP47_SUBTAG_LEN) {
941 *status = U_ILLEGAL_ARGUMENT_ERROR;
942 return 0;
943 }
944 typeSize = 0;
945 } else {
946 typeSize++;
947 }
948 }
949
950 uprv_memcpy(bcpTypeBuf, bcpType, bcpTypeLen);
951 bcpTypeBuf[bcpTypeLen] = 0;
952
953 /* to lower case */
954 for (i = 0; i < bcpTypeLen; i++) {
955 bcpTypeBuf[i] = uprv_tolower(bcpTypeBuf[i]);
956 }
957
958 rb = ures_openDirect(NULL, KEYTYPEDATA, status);
959 ures_getByKey(rb, TYPEMAP, rb, status);
960 if (U_FAILURE(*status)) {
961 ures_close(rb);
962 return 0;
963 }
964
965 ures_getByKey(rb, keyBuf, rb, &tmpStatus);
966 mapData = ures_getNextResource(rb, NULL, &tmpStatus);
967 while (U_SUCCESS(tmpStatus)) {
968 const UChar *uBcpType;
969 char tmpBcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
970 int32_t tmpBcpTypeLen;
971
972 uBcpType = ures_getString(mapData, &tmpBcpTypeLen, &tmpStatus);
973 if (U_FAILURE(tmpStatus)) {
974 break;
975 }
976 u_UCharsToChars(uBcpType, tmpBcpTypeBuf, tmpBcpTypeLen);
977 tmpBcpTypeBuf[tmpBcpTypeLen] = 0;
978 if (uprv_compareInvCharsAsAscii(bcpTypeBuf, tmpBcpTypeBuf) == 0) {
979 /* found a matching BCP47 type */
980 resType = ures_getKey(mapData);
981 resultLen = (int32_t)uprv_strlen(resType);
982 break;
983 }
984 if (!ures_hasNext(rb)) {
985 break;
986 }
987 ures_getNextResource(rb, mapData, &tmpStatus);
988 }
989 ures_close(mapData);
990 ures_close(rb);
991
992 if (U_FAILURE(tmpStatus) && tmpStatus != U_MISSING_RESOURCE_ERROR) {
993 *status = tmpStatus;
994 return 0;
995 }
996
997 if (resType == NULL) {
998 resType = bcpTypeBuf;
999 resultLen = bcpTypeLen;
1000 }
1001
1002 copyLen = uprv_min(resultLen, typeCapacity);
1003 uprv_memcpy(type, resType, copyLen);
1004
1005 if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
1006 for (i = 0; i < copyLen; i++) {
1007 if (*(type + i) == ':') {
1008 *(type + i) = '/';
1009 }
1010 }
1011 }
1012
1013 return u_terminateChars(type, typeCapacity, resultLen, status);
1014 }
1015
1016 static int32_t
1017 _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
1018 char buf[ULOC_LANG_CAPACITY];
1019 UErrorCode tmpStatus = U_ZERO_ERROR;
1020 int32_t len, i;
1021 int32_t reslen = 0;
1022
1023 if (U_FAILURE(*status)) {
1024 return 0;
1025 }
1026
1027 len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
1028 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1029 if (strict) {
1030 *status = U_ILLEGAL_ARGUMENT_ERROR;
1031 return 0;
1032 }
1033 len = 0;
1034 }
1035
1036 /* Note: returned language code is in lower case letters */
1037
1038 if (len == 0) {
1039 if (reslen < capacity) {
1040 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
1041 }
1042 reslen += LANG_UND_LEN;
1043 } else if (!_isLanguageSubtag(buf, len)) {
1044 /* invalid language code */
1045 if (strict) {
1046 *status = U_ILLEGAL_ARGUMENT_ERROR;
1047 return 0;
1048 }
1049 if (reslen < capacity) {
1050 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
1051 }
1052 reslen += LANG_UND_LEN;
1053 } else {
1054 /* resolve deprecated */
1055 for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
1056 if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
1057 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
1058 len = (int32_t)uprv_strlen(buf);
1059 break;
1060 }
1061 }
1062 if (reslen < capacity) {
1063 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
1064 }
1065 reslen += len;
1066 }
1067 u_terminateChars(appendAt, capacity, reslen, status);
1068 return reslen;
1069 }
1070
1071 static int32_t
1072 _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
1073 char buf[ULOC_SCRIPT_CAPACITY];
1074 UErrorCode tmpStatus = U_ZERO_ERROR;
1075 int32_t len;
1076 int32_t reslen = 0;
1077
1078 if (U_FAILURE(*status)) {
1079 return 0;
1080 }
1081
1082 len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
1083 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1084 if (strict) {
1085 *status = U_ILLEGAL_ARGUMENT_ERROR;
1086 }
1087 return 0;
1088 }
1089
1090 if (len > 0) {
1091 if (!_isScriptSubtag(buf, len)) {
1092 /* invalid script code */
1093 if (strict) {
1094 *status = U_ILLEGAL_ARGUMENT_ERROR;
1095 }
1096 return 0;
1097 } else {
1098 if (reslen < capacity) {
1099 *(appendAt + reslen) = SEP;
1100 }
1101 reslen++;
1102
1103 if (reslen < capacity) {
1104 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
1105 }
1106 reslen += len;
1107 }
1108 }
1109 u_terminateChars(appendAt, capacity, reslen, status);
1110 return reslen;
1111 }
1112
1113 static int32_t
1114 _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
1115 char buf[ULOC_COUNTRY_CAPACITY];
1116 UErrorCode tmpStatus = U_ZERO_ERROR;
1117 int32_t len;
1118 int32_t reslen = 0;
1119
1120 if (U_FAILURE(*status)) {
1121 return 0;
1122 }
1123
1124 len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
1125 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1126 if (strict) {
1127 *status = U_ILLEGAL_ARGUMENT_ERROR;
1128 }
1129 return 0;
1130 }
1131
1132 if (len > 0) {
1133 if (!_isRegionSubtag(buf, len)) {
1134 /* invalid region code */
1135 if (strict) {
1136 *status = U_ILLEGAL_ARGUMENT_ERROR;
1137 }
1138 return 0;
1139 } else {
1140 if (reslen < capacity) {
1141 *(appendAt + reslen) = SEP;
1142 }
1143 reslen++;
1144
1145 if (reslen < capacity) {
1146 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
1147 }
1148 reslen += len;
1149 }
1150 }
1151 u_terminateChars(appendAt, capacity, reslen, status);
1152 return reslen;
1153 }
1154
1155 static int32_t
1156 _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) {
1157 char buf[ULOC_FULLNAME_CAPACITY];
1158 UErrorCode tmpStatus = U_ZERO_ERROR;
1159 int32_t len, i;
1160 int32_t reslen = 0;
1161
1162 if (U_FAILURE(*status)) {
1163 return 0;
1164 }
1165
1166 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
1167 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1168 if (strict) {
1169 *status = U_ILLEGAL_ARGUMENT_ERROR;
1170 }
1171 return 0;
1172 }
1173
1174 if (len > 0) {
1175 char *p, *pVar;
1176 UBool bNext = TRUE;
1177 VariantListEntry *var;
1178 VariantListEntry *varFirst = NULL;
1179
1180 pVar = NULL;
1181 p = buf;
1182 while (bNext) {
1183 if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
1184 if (*p == 0) {
1185 bNext = FALSE;
1186 } else {
1187 *p = 0; /* terminate */
1188 }
1189 if (pVar == NULL) {
1190 if (strict) {
1191 *status = U_ILLEGAL_ARGUMENT_ERROR;
1192 break;
1193 }
1194 /* ignore empty variant */
1195 } else {
1196 /* ICU uses upper case letters for variants, but
1197 the canonical format is lowercase in BCP47 */
1198 for (i = 0; *(pVar + i) != 0; i++) {
1199 *(pVar + i) = uprv_tolower(*(pVar + i));
1200 }
1201
1202 /* validate */
1203 if (_isVariantSubtag(pVar, -1)) {
1204 if (uprv_strcmp(pVar,POSIX_VALUE) || len != uprv_strlen(POSIX_VALUE)) {
1205 /* emit the variant to the list */
1206 var = uprv_malloc(sizeof(VariantListEntry));
1207 if (var == NULL) {
1208 *status = U_MEMORY_ALLOCATION_ERROR;
1209 break;
1210 }
1211 var->variant = pVar;
1212 if (!_addVariantToList(&varFirst, var)) {
1213 /* duplicated variant */
1214 uprv_free(var);
1215 if (strict) {
1216 *status = U_ILLEGAL_ARGUMENT_ERROR;
1217 break;
1218 }
1219 }
1220 } else {
1221 /* Special handling for POSIX variant, need to remember that we had it and then */
1222 /* treat it like an extension later. */
1223 *hadPosix = TRUE;
1224 }
1225 } else if (strict) {
1226 *status = U_ILLEGAL_ARGUMENT_ERROR;
1227 break;
1228 } else if (_isPrivateuseValueSubtag(pVar, -1)) {
1229 /* Handle private use subtags separately */
1230 break;
1231 }
1232 }
1233 /* reset variant starting position */
1234 pVar = NULL;
1235 } else if (pVar == NULL) {
1236 pVar = p;
1237 }
1238 p++;
1239 }
1240
1241 if (U_SUCCESS(*status)) {
1242 if (varFirst != NULL) {
1243 int32_t varLen;
1244
1245 /* write out validated/normalized variants to the target */
1246 var = varFirst;
1247 while (var != NULL) {
1248 if (reslen < capacity) {
1249 *(appendAt + reslen) = SEP;
1250 }
1251 reslen++;
1252 varLen = (int32_t)uprv_strlen(var->variant);
1253 if (reslen < capacity) {
1254 uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen));
1255 }
1256 reslen += varLen;
1257 var = var->next;
1258 }
1259 }
1260 }
1261
1262 /* clean up */
1263 var = varFirst;
1264 while (var != NULL) {
1265 VariantListEntry *tmpVar = var->next;
1266 uprv_free(var);
1267 var = tmpVar;
1268 }
1269
1270 if (U_FAILURE(*status)) {
1271 return 0;
1272 }
1273 }
1274
1275 u_terminateChars(appendAt, capacity, reslen, status);
1276 return reslen;
1277 }
1278
1279 static int32_t
1280 _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
1281 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1282 char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
1283 int32_t attrBufLength = 0;
1284 UBool isAttribute = FALSE;
1285 UEnumeration *keywordEnum = NULL;
1286 int32_t reslen = 0;
1287
1288 keywordEnum = uloc_openKeywords(localeID, status);
1289 if (U_FAILURE(*status) && !hadPosix) {
1290 uenum_close(keywordEnum);
1291 return 0;
1292 }
1293 if (keywordEnum != NULL || hadPosix) {
1294 /* reorder extensions */
1295 int32_t len;
1296 const char *key;
1297 ExtensionListEntry *firstExt = NULL;
1298 ExtensionListEntry *ext;
1299 AttributeListEntry *firstAttr = NULL;
1300 AttributeListEntry *attr;
1301 char *attrValue;
1302 char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1303 char *pExtBuf = extBuf;
1304 int32_t extBufCapacity = sizeof(extBuf);
1305 const char *bcpKey, *bcpValue;
1306 UErrorCode tmpStatus = U_ZERO_ERROR;
1307 int32_t keylen;
1308 UBool isLDMLKeyword;
1309
1310 while (TRUE) {
1311 isAttribute = FALSE;
1312 key = uenum_next(keywordEnum, NULL, status);
1313 if (key == NULL) {
1314 break;
1315 }
1316 len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
1317 if (U_FAILURE(tmpStatus)) {
1318 if (strict) {
1319 *status = U_ILLEGAL_ARGUMENT_ERROR;
1320 break;
1321 }
1322 /* ignore this keyword */
1323 tmpStatus = U_ZERO_ERROR;
1324 continue;
1325 }
1326
1327 keylen = (int32_t)uprv_strlen(key);
1328 isLDMLKeyword = (keylen > 1);
1329
1330 /* special keyword used for representing Unicode locale attributes */
1331 if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
1332 isAttribute = TRUE;
1333 if (len > 0) {
1334 int32_t i = 0;
1335 while (TRUE) {
1336 attrBufLength = 0;
1337 for (; i < len; i++) {
1338 if (buf[i] != '-') {
1339 attrBuf[attrBufLength++] = buf[i];
1340 } else {
1341 i++;
1342 break;
1343 }
1344 }
1345 if (attrBufLength > 0) {
1346 attrBuf[attrBufLength] = 0;
1347
1348 } else if (i >= len){
1349 break;
1350 }
1351
1352 /* create AttributeListEntry */
1353 attr = uprv_malloc(sizeof(AttributeListEntry));
1354 if (attr == NULL) {
1355 *status = U_MEMORY_ALLOCATION_ERROR;
1356 break;
1357 }
1358 attrValue = uprv_malloc(attrBufLength + 1);
1359 if (attrValue == NULL) {
1360 *status = U_MEMORY_ALLOCATION_ERROR;
1361 break;
1362 }
1363 uprv_strcpy(attrValue, attrBuf);
1364 attr->attribute = attrValue;
1365
1366 if (!_addAttributeToList(&firstAttr, attr)) {
1367 uprv_free(attr);
1368 uprv_free(attrValue);
1369 if (strict) {
1370 *status = U_ILLEGAL_ARGUMENT_ERROR;
1371 break;
1372 }
1373 }
1374 }
1375 }
1376 } else if (isLDMLKeyword) {
1377 int32_t modKeyLen;
1378
1379 /* transform key and value to bcp47 style */
1380 modKeyLen = _ldmlKeyToBCP47(key, keylen, pExtBuf, extBufCapacity, &tmpStatus);
1381 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1382 if (strict) {
1383 *status = U_ILLEGAL_ARGUMENT_ERROR;
1384 break;
1385 }
1386 tmpStatus = U_ZERO_ERROR;
1387 continue;
1388 }
1389
1390 bcpKey = pExtBuf;
1391 pExtBuf += (modKeyLen + 1);
1392 extBufCapacity -= (modKeyLen + 1);
1393
1394 len = _ldmlTypeToBCP47(key, keylen, buf, len, pExtBuf, extBufCapacity, &tmpStatus);
1395 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1396 if (strict) {
1397 *status = U_ILLEGAL_ARGUMENT_ERROR;
1398 break;
1399 }
1400 tmpStatus = U_ZERO_ERROR;
1401 continue;
1402 }
1403 bcpValue = pExtBuf;
1404 pExtBuf += (len + 1);
1405 extBufCapacity -= (len + 1);
1406 } else {
1407 if (*key == PRIVATEUSE) {
1408 if (!_isPrivateuseValueSubtags(buf, len)) {
1409 if (strict) {
1410 *status = U_ILLEGAL_ARGUMENT_ERROR;
1411 break;
1412 }
1413 continue;
1414 }
1415 } else {
1416 if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
1417 if (strict) {
1418 *status = U_ILLEGAL_ARGUMENT_ERROR;
1419 break;
1420 }
1421 continue;
1422 }
1423 }
1424 bcpKey = key;
1425 if ((len + 1) < extBufCapacity) {
1426 uprv_memcpy(pExtBuf, buf, len);
1427 bcpValue = pExtBuf;
1428
1429 pExtBuf += len;
1430
1431 *pExtBuf = 0;
1432 pExtBuf++;
1433
1434 extBufCapacity -= (len + 1);
1435 } else {
1436 *status = U_ILLEGAL_ARGUMENT_ERROR;
1437 break;
1438 }
1439 }
1440
1441 if (!isAttribute) {
1442 /* create ExtensionListEntry */
1443 ext = uprv_malloc(sizeof(ExtensionListEntry));
1444 if (ext == NULL) {
1445 *status = U_MEMORY_ALLOCATION_ERROR;
1446 break;
1447 }
1448 ext->key = bcpKey;
1449 ext->value = bcpValue;
1450
1451 if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1452 uprv_free(ext);
1453 if (strict) {
1454 *status = U_ILLEGAL_ARGUMENT_ERROR;
1455 break;
1456 }
1457 }
1458 }
1459 }
1460
1461 /* Special handling for POSIX variant - add the keywords for POSIX */
1462 if (hadPosix) {
1463 /* create ExtensionListEntry for POSIX */
1464 ext = uprv_malloc(sizeof(ExtensionListEntry));
1465 if (ext == NULL) {
1466 *status = U_MEMORY_ALLOCATION_ERROR;
1467 goto cleanup;
1468 }
1469 ext->key = POSIX_KEY;
1470 ext->value = POSIX_VALUE;
1471
1472 if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1473 uprv_free(ext);
1474 }
1475 }
1476
1477 if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) {
1478 UBool startLDMLExtension = FALSE;
1479
1480 attr = firstAttr;
1481 ext = firstExt;
1482 do {
1483 if (!startLDMLExtension && (ext && uprv_strlen(ext->key) > 1)) {
1484 /* write LDML singleton extension */
1485 if (reslen < capacity) {
1486 *(appendAt + reslen) = SEP;
1487 }
1488 reslen++;
1489 if (reslen < capacity) {
1490 *(appendAt + reslen) = LDMLEXT;
1491 }
1492 reslen++;
1493
1494 startLDMLExtension = TRUE;
1495 }
1496
1497 /* write out the sorted BCP47 attributes, extensions and private use */
1498 if (ext && (uprv_strlen(ext->key) == 1 || attr == NULL)) {
1499 if (reslen < capacity) {
1500 *(appendAt + reslen) = SEP;
1501 }
1502 reslen++;
1503 len = (int32_t)uprv_strlen(ext->key);
1504 if (reslen < capacity) {
1505 uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen));
1506 }
1507 reslen += len;
1508 if (reslen < capacity) {
1509 *(appendAt + reslen) = SEP;
1510 }
1511 reslen++;
1512 len = (int32_t)uprv_strlen(ext->value);
1513 if (reslen < capacity) {
1514 uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen));
1515 }
1516 reslen += len;
1517
1518 ext = ext->next;
1519 } else if (attr) {
1520 /* write the value for the attributes */
1521 if (reslen < capacity) {
1522 *(appendAt + reslen) = SEP;
1523 }
1524 reslen++;
1525 len = (int32_t)uprv_strlen(attr->attribute);
1526 if (reslen < capacity) {
1527 uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen));
1528 }
1529 reslen += len;
1530
1531 attr = attr->next;
1532 }
1533 } while (attr != NULL || ext != NULL);
1534 }
1535 cleanup:
1536 /* clean up */
1537 ext = firstExt;
1538 while (ext != NULL) {
1539 ExtensionListEntry *tmpExt = ext->next;
1540 uprv_free(ext);
1541 ext = tmpExt;
1542 }
1543
1544 attr = firstAttr;
1545 while (attr != NULL) {
1546 AttributeListEntry *tmpAttr = attr->next;
1547 char *pValue = (char *)attr->attribute;
1548 uprv_free(pValue);
1549 uprv_free(attr);
1550 attr = tmpAttr;
1551 }
1552
1553 uenum_close(keywordEnum);
1554
1555 if (U_FAILURE(*status)) {
1556 return 0;
1557 }
1558 }
1559
1560 return u_terminateChars(appendAt, capacity, reslen, status);
1561 }
1562
1563 /**
1564 * Append keywords parsed from LDML extension value
1565 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
1566 * Note: char* buf is used for storing keywords
1567 */
1568 static void
1569 _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) {
1570 const char *p, *pNext, *pSep, *pTmp, *pTmpStart;
1571 const char *pBcpKey, *pBcpType;
1572 const char *pKey, *pType;
1573 int32_t bcpKeyLen = 0, bcpTypeLen;
1574 ExtensionListEntry *kwd, *nextKwd;
1575 ExtensionListEntry *kwdFirst = NULL;
1576 int32_t bufIdx = 0;
1577 int32_t len;
1578 UBool variantExists = *posixVariant;
1579 UBool searchFurther;
1580
1581 /* Reset the posixVariant value */
1582 *posixVariant = FALSE;
1583
1584 pNext = ldmlext;
1585 pBcpKey = pBcpType = NULL;
1586 while (pNext) {
1587 p = pSep = pNext;
1588
1589 /* locate next separator char */
1590 while (*pSep) {
1591 if (*pSep == SEP) {
1592 searchFurther = FALSE;
1593 if (pBcpKey != NULL) {
1594 pTmpStart = (pSep + 1);
1595 pTmp = pTmpStart;
1596 /* Look at the next subtag and see if it is part of the previous subtag or the start of new keyword */
1597 while (*pTmp) {
1598 if (*pTmp == SEP || *(pTmp + 1) == 0) {
1599 if (!_isLDMLKey(pTmpStart, (int32_t)(pTmp - pTmpStart))) {
1600 searchFurther = TRUE;
1601 }
1602 break;
1603 }
1604 pTmp++;
1605 }
1606 }
1607 if (searchFurther) {
1608 pSep++;
1609 continue;
1610 } else {
1611 break;
1612 }
1613 }
1614 pSep++;
1615 }
1616 if (*pSep == 0) {
1617 /* last subtag */
1618 pNext = NULL;
1619 } else {
1620 pNext = pSep + 1;
1621 }
1622
1623 if (pBcpKey == NULL) {
1624 pBcpKey = p;
1625 bcpKeyLen = (int32_t)(pSep - p);
1626 } else {
1627 pBcpType = p;
1628 bcpTypeLen = (int32_t)(pSep - p);
1629
1630 /* BCP key to locale key */
1631 len = _bcp47ToLDMLKey(pBcpKey, bcpKeyLen, buf + bufIdx, bufSize - bufIdx - 1, status);
1632 if (U_FAILURE(*status)) {
1633 goto cleanup;
1634 }
1635 pKey = buf + bufIdx;
1636 bufIdx += len;
1637 *(buf + bufIdx) = 0;
1638 bufIdx++;
1639
1640 /* BCP type to locale type */
1641 len = _bcp47ToLDMLType(pKey, -1, pBcpType, bcpTypeLen, buf + bufIdx, bufSize - bufIdx - 1, status);
1642 if (U_FAILURE(*status)) {
1643 goto cleanup;
1644 }
1645 pType = buf + bufIdx;
1646 bufIdx += len;
1647 *(buf + bufIdx) = 0;
1648 bufIdx++;
1649
1650 /* Special handling for u-va-posix, since we want to treat this as a variant, not */
1651 /* as a keyword. */
1652
1653 if (!variantExists && !uprv_strcmp(pKey,POSIX_KEY) && !uprv_strcmp(pType,POSIX_VALUE) ) {
1654 *posixVariant = TRUE;
1655 } else {
1656 /* create an ExtensionListEntry for this keyword */
1657 kwd = uprv_malloc(sizeof(ExtensionListEntry));
1658 if (kwd == NULL) {
1659 *status = U_MEMORY_ALLOCATION_ERROR;
1660 goto cleanup;
1661 }
1662
1663 kwd->key = pKey;
1664 kwd->value = pType;
1665
1666 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1667 *status = U_ILLEGAL_ARGUMENT_ERROR;
1668 uprv_free(kwd);
1669 goto cleanup;
1670 }
1671 }
1672
1673 /* for next pair */
1674 pBcpKey = NULL;
1675 pBcpType = NULL;
1676 }
1677 }
1678
1679 if (pBcpKey != NULL) {
1680 *status = U_ILLEGAL_ARGUMENT_ERROR;
1681 goto cleanup;
1682 }
1683
1684 kwd = kwdFirst;
1685 while (kwd != NULL) {
1686 nextKwd = kwd->next;
1687 _addExtensionToList(appendTo, kwd, FALSE);
1688 kwd = nextKwd;
1689 }
1690
1691 return;
1692
1693 cleanup:
1694 kwd = kwdFirst;
1695 while (kwd != NULL) {
1696 nextKwd = kwd->next;
1697 uprv_free(kwd);
1698 kwd = nextKwd;
1699 }
1700 }
1701
1702
1703 static int32_t
1704 _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) {
1705 int32_t reslen = 0;
1706 int32_t i, n;
1707 int32_t len;
1708 ExtensionListEntry *kwdFirst = NULL;
1709 ExtensionListEntry *kwd;
1710 AttributeListEntry *attrFirst = NULL;
1711 AttributeListEntry *attr;
1712 const char *key, *type;
1713 char *kwdBuf = NULL;
1714 int32_t kwdBufLength = capacity;
1715 UBool posixVariant = FALSE;
1716
1717 if (U_FAILURE(*status)) {
1718 return 0;
1719 }
1720
1721 kwdBuf = (char *)uprv_malloc(kwdBufLength);
1722 if (kwdBuf == NULL) {
1723 *status = U_MEMORY_ALLOCATION_ERROR;
1724 return 0;
1725 }
1726
1727 /* Determine if variants already exists */
1728 if (ultag_getVariantsSize(langtag)) {
1729 posixVariant = TRUE;
1730 }
1731
1732 n = ultag_getExtensionsSize(langtag);
1733
1734 /* resolve locale keywords and reordering keys */
1735 for (i = 0; i < n; i++) {
1736 key = ultag_getExtensionKey(langtag, i);
1737 type = ultag_getExtensionValue(langtag, i);
1738 if (*key == LDMLEXT) {
1739 _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status);
1740 if (U_FAILURE(*status)) {
1741 break;
1742 }
1743 } else {
1744 kwd = uprv_malloc(sizeof(ExtensionListEntry));
1745 if (kwd == NULL) {
1746 *status = U_MEMORY_ALLOCATION_ERROR;
1747 break;
1748 }
1749 kwd->key = key;
1750 kwd->value = type;
1751 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1752 uprv_free(kwd);
1753 *status = U_ILLEGAL_ARGUMENT_ERROR;
1754 break;
1755 }
1756 }
1757 }
1758
1759 if (U_SUCCESS(*status)) {
1760 type = ultag_getPrivateUse(langtag);
1761 if ((int32_t)uprv_strlen(type) > 0) {
1762 /* add private use as a keyword */
1763 kwd = uprv_malloc(sizeof(ExtensionListEntry));
1764 if (kwd == NULL) {
1765 *status = U_MEMORY_ALLOCATION_ERROR;
1766 } else {
1767 kwd->key = PRIVATEUSE_KEY;
1768 kwd->value = type;
1769 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1770 uprv_free(kwd);
1771 *status = U_ILLEGAL_ARGUMENT_ERROR;
1772 }
1773 }
1774 }
1775 }
1776
1777 /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
1778
1779 if (U_SUCCESS(*status) && posixVariant) {
1780 len = (int32_t) uprv_strlen(_POSIX);
1781 if (reslen < capacity) {
1782 uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen));
1783 }
1784 reslen += len;
1785 }
1786
1787 attrFirst = langtag->attributes;
1788 if (U_SUCCESS(*status) && (kwdFirst != NULL || attrFirst != NULL)) {
1789 /* write out the sorted keywords */
1790 UBool firstValue = TRUE;
1791 UBool firstAttr = TRUE;
1792 kwd = kwdFirst;
1793 attr = attrFirst;
1794 do {
1795 if (reslen < capacity) {
1796 if (firstValue) {
1797 /* '@' */
1798 *(appendAt + reslen) = LOCALE_EXT_SEP;
1799 firstValue = FALSE;
1800 } else if (attr) {
1801 /* '-' */
1802 *(appendAt + reslen) = SEP;
1803 }else {
1804 /* ';' */
1805 *(appendAt + reslen) = LOCALE_KEYWORD_SEP;
1806 }
1807 }
1808 reslen++;
1809
1810 if (attr) {
1811 if (firstAttr) {
1812 len = (int32_t)uprv_strlen(LOCALE_ATTRIBUTE_KEY);
1813 if (reslen < capacity) {
1814 uprv_memcpy(appendAt + reslen, LOCALE_ATTRIBUTE_KEY, uprv_min(len, capacity - reslen));
1815 }
1816 reslen += len;
1817
1818 /* '=' */
1819 if (reslen < capacity) {
1820 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
1821 }
1822
1823 reslen++;
1824
1825 firstAttr = FALSE;
1826 }
1827
1828 len = (int32_t)uprv_strlen(attr->attribute);
1829 if (reslen < capacity) {
1830 uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen));
1831 }
1832 reslen += len;
1833
1834 attr = attr->next;
1835 } else if (kwd) {
1836 /* key */
1837 len = (int32_t)uprv_strlen(kwd->key);
1838 if (reslen < capacity) {
1839 uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
1840 }
1841 reslen += len;
1842
1843 /* '=' */
1844 if (reslen < capacity) {
1845 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
1846 }
1847 reslen++;
1848
1849 /* type */
1850 len = (int32_t)uprv_strlen(kwd->value);
1851 if (reslen < capacity) {
1852 uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
1853 }
1854 reslen += len;
1855
1856 kwd = kwd->next;
1857 }
1858 } while (kwd || attr);
1859 }
1860
1861 /* clean up */
1862 kwd = kwdFirst;
1863 while (kwd != NULL) {
1864 ExtensionListEntry *tmpKwd = kwd->next;
1865 uprv_free(kwd);
1866 kwd = tmpKwd;
1867 }
1868
1869 uprv_free(kwdBuf);
1870
1871 if (U_FAILURE(*status)) {
1872 return 0;
1873 }
1874
1875 return u_terminateChars(appendAt, capacity, reslen, status);
1876 }
1877
1878 static int32_t
1879 _appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
1880 char buf[ULOC_FULLNAME_CAPACITY];
1881 char tmpAppend[ULOC_FULLNAME_CAPACITY];
1882 UErrorCode tmpStatus = U_ZERO_ERROR;
1883 int32_t len, i;
1884 int32_t reslen = 0;
1885
1886 if (U_FAILURE(*status)) {
1887 return 0;
1888 }
1889
1890 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
1891 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1892 if (strict) {
1893 *status = U_ILLEGAL_ARGUMENT_ERROR;
1894 }
1895 return 0;
1896 }
1897
1898 if (len > 0) {
1899 char *p, *pPriv;
1900 UBool bNext = TRUE;
1901 UBool firstValue = TRUE;
1902 UBool writeValue;
1903
1904 pPriv = NULL;
1905 p = buf;
1906 while (bNext) {
1907 writeValue = FALSE;
1908 if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
1909 if (*p == 0) {
1910 bNext = FALSE;
1911 } else {
1912 *p = 0; /* terminate */
1913 }
1914 if (pPriv != NULL) {
1915 /* Private use in the canonical format is lowercase in BCP47 */
1916 for (i = 0; *(pPriv + i) != 0; i++) {
1917 *(pPriv + i) = uprv_tolower(*(pPriv + i));
1918 }
1919
1920 /* validate */
1921 if (_isPrivateuseValueSubtag(pPriv, -1)) {
1922 if (firstValue) {
1923 if (!_isVariantSubtag(pPriv, -1)) {
1924 writeValue = TRUE;
1925 }
1926 } else {
1927 writeValue = TRUE;
1928 }
1929 } else if (strict) {
1930 *status = U_ILLEGAL_ARGUMENT_ERROR;
1931 break;
1932 } else {
1933 break;
1934 }
1935
1936 if (writeValue) {
1937 if (reslen < capacity) {
1938 tmpAppend[reslen++] = SEP;
1939 }
1940
1941 if (firstValue) {
1942 if (reslen < capacity) {
1943 tmpAppend[reslen++] = *PRIVATEUSE_KEY;
1944 }
1945
1946 if (reslen < capacity) {
1947 tmpAppend[reslen++] = SEP;
1948 }
1949
1950 len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX);
1951 if (reslen < capacity) {
1952 uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen));
1953 }
1954 reslen += len;
1955
1956 if (reslen < capacity) {
1957 tmpAppend[reslen++] = SEP;
1958 }
1959
1960 firstValue = FALSE;
1961 }
1962
1963 len = (int32_t)uprv_strlen(pPriv);
1964 if (reslen < capacity) {
1965 uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen));
1966 }
1967 reslen += len;
1968 }
1969 }
1970 /* reset private use starting position */
1971 pPriv = NULL;
1972 } else if (pPriv == NULL) {
1973 pPriv = p;
1974 }
1975 p++;
1976 }
1977
1978 if (U_FAILURE(*status)) {
1979 return 0;
1980 }
1981 }
1982
1983 if (U_SUCCESS(*status)) {
1984 len = reslen;
1985 if (reslen < capacity) {
1986 uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen));
1987 }
1988 }
1989
1990 u_terminateChars(appendAt, capacity, reslen, status);
1991
1992 return reslen;
1993 }
1994
1995 /*
1996 * -------------------------------------------------
1997 *
1998 * ultag_ functions
1999 *
2000 * -------------------------------------------------
2001 */
2002
2003 /* Bit flags used by the parser */
2004 #define LANG 0x0001
2005 #define EXTL 0x0002
2006 #define SCRT 0x0004
2007 #define REGN 0x0008
2008 #define VART 0x0010
2009 #define EXTS 0x0020
2010 #define EXTV 0x0040
2011 #define PRIV 0x0080
2012 #define ATTR 0x0100
2013
2014 static ULanguageTag*
2015 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
2016 ULanguageTag *t;
2017 char *tagBuf;
2018 int16_t next;
2019 char *pSubtag, *pNext, *pLastGoodPosition;
2020 int32_t subtagLen;
2021 int32_t extlangIdx;
2022 ExtensionListEntry *pExtension;
2023 AttributeListEntry *pAttribute;
2024 char *pExtValueSubtag, *pExtValueSubtagEnd;
2025 int32_t i;
2026 UBool isLDMLExtension, reqLDMLType, privateuseVar = FALSE;
2027
2028 if (parsedLen != NULL) {
2029 *parsedLen = 0;
2030 }
2031
2032 if (U_FAILURE(*status)) {
2033 return NULL;
2034 }
2035
2036 if (tagLen < 0) {
2037 tagLen = (int32_t)uprv_strlen(tag);
2038 }
2039
2040 /* copy the entire string */
2041 tagBuf = (char*)uprv_malloc(tagLen + 1);
2042 if (tagBuf == NULL) {
2043 *status = U_MEMORY_ALLOCATION_ERROR;
2044 return NULL;
2045 }
2046 uprv_memcpy(tagBuf, tag, tagLen);
2047 *(tagBuf + tagLen) = 0;
2048
2049 /* create a ULanguageTag */
2050 t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag));
2051 if (t == NULL) {
2052 uprv_free(tagBuf);
2053 *status = U_MEMORY_ALLOCATION_ERROR;
2054 return NULL;
2055 }
2056 _initializeULanguageTag(t);
2057 t->buf = tagBuf;
2058
2059 if (tagLen < MINLEN) {
2060 /* the input tag is too short - return empty ULanguageTag */
2061 return t;
2062 }
2063
2064 /* check if the tag is grandfathered */
2065 for (i = 0; GRANDFATHERED[i] != NULL; i += 2) {
2066 if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
2067 /* a grandfathered tag is always longer than its preferred mapping */
2068 int32_t newTagLength = uprv_strlen(GRANDFATHERED[i+1]);
2069 if (tagLen < newTagLength) {
2070 uprv_free(tagBuf);
2071 tagBuf = (char*)uprv_malloc(newTagLength + 1);
2072 if (tagBuf == NULL) {
2073 *status = U_MEMORY_ALLOCATION_ERROR;
2074 return NULL;
2075 }
2076 t->buf = tagBuf;
2077 tagLen = newTagLength;
2078 }
2079 uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
2080
2081 break;
2082 }
2083 }
2084
2085 /*
2086 * langtag = language
2087 * ["-" script]
2088 * ["-" region]
2089 * *("-" variant)
2090 * *("-" extension)
2091 * ["-" privateuse]
2092 */
2093
2094 next = LANG | PRIV;
2095 pNext = pLastGoodPosition = tagBuf;
2096 extlangIdx = 0;
2097 pExtension = NULL;
2098 pExtValueSubtag = NULL;
2099 pExtValueSubtagEnd = NULL;
2100 pAttribute = NULL;
2101 isLDMLExtension = FALSE;
2102 reqLDMLType = FALSE;
2103
2104 while (pNext) {
2105 char *pSep;
2106
2107 pSubtag = pNext;
2108
2109 /* locate next separator char */
2110 pSep = pSubtag;
2111 while (*pSep) {
2112 if (*pSep == SEP) {
2113 break;
2114 }
2115 pSep++;
2116 }
2117 if (*pSep == 0) {
2118 /* last subtag */
2119 pNext = NULL;
2120 } else {
2121 pNext = pSep + 1;
2122 }
2123 subtagLen = (int32_t)(pSep - pSubtag);
2124
2125 if (next & LANG) {
2126 if (_isLanguageSubtag(pSubtag, subtagLen)) {
2127 *pSep = 0; /* terminate */
2128 t->language = T_CString_toLowerCase(pSubtag);
2129
2130 pLastGoodPosition = pSep;
2131 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
2132 continue;
2133 }
2134 }
2135 if (next & EXTL) {
2136 if (_isExtlangSubtag(pSubtag, subtagLen)) {
2137 *pSep = 0;
2138 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
2139
2140 pLastGoodPosition = pSep;
2141 if (extlangIdx < 3) {
2142 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
2143 } else {
2144 next = SCRT | REGN | VART | EXTS | PRIV;
2145 }
2146 continue;
2147 }
2148 }
2149 if (next & SCRT) {
2150 if (_isScriptSubtag(pSubtag, subtagLen)) {
2151 char *p = pSubtag;
2152
2153 *pSep = 0;
2154
2155 /* to title case */
2156 *p = uprv_toupper(*p);
2157 p++;
2158 for (; *p; p++) {
2159 *p = uprv_tolower(*p);
2160 }
2161
2162 t->script = pSubtag;
2163
2164 pLastGoodPosition = pSep;
2165 next = REGN | VART | EXTS | PRIV;
2166 continue;
2167 }
2168 }
2169 if (next & REGN) {
2170 if (_isRegionSubtag(pSubtag, subtagLen)) {
2171 *pSep = 0;
2172 t->region = T_CString_toUpperCase(pSubtag);
2173
2174 pLastGoodPosition = pSep;
2175 next = VART | EXTS | PRIV;
2176 continue;
2177 }
2178 }
2179 if (next & VART) {
2180 if (_isVariantSubtag(pSubtag, subtagLen) ||
2181 (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) {
2182 VariantListEntry *var;
2183 UBool isAdded;
2184
2185 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
2186 if (var == NULL) {
2187 *status = U_MEMORY_ALLOCATION_ERROR;
2188 goto error;
2189 }
2190 *pSep = 0;
2191 var->variant = T_CString_toUpperCase(pSubtag);
2192 isAdded = _addVariantToList(&(t->variants), var);
2193 if (!isAdded) {
2194 /* duplicated variant entry */
2195 uprv_free(var);
2196 break;
2197 }
2198 pLastGoodPosition = pSep;
2199 next = VART | EXTS | PRIV;
2200 continue;
2201 }
2202 }
2203 if (next & EXTS) {
2204 if (_isExtensionSingleton(pSubtag, subtagLen)) {
2205 if (pExtension != NULL) {
2206 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2207 /* the previous extension is incomplete */
2208 uprv_free(pExtension);
2209 pExtension = NULL;
2210 break;
2211 }
2212
2213 /* terminate the previous extension value */
2214 *pExtValueSubtagEnd = 0;
2215 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2216
2217 /* insert the extension to the list */
2218 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2219 pLastGoodPosition = pExtValueSubtagEnd;
2220 } else {
2221 /* stop parsing here */
2222 uprv_free(pExtension);
2223 pExtension = NULL;
2224 break;
2225 }
2226
2227 if (isLDMLExtension && reqLDMLType) {
2228 /* incomplete LDML extension key and type pair */
2229 pExtension = NULL;
2230 break;
2231 }
2232 }
2233
2234 isLDMLExtension = (uprv_tolower(*pSubtag) == LDMLEXT);
2235
2236 /* create a new extension */
2237 pExtension = uprv_malloc(sizeof(ExtensionListEntry));
2238 if (pExtension == NULL) {
2239 *status = U_MEMORY_ALLOCATION_ERROR;
2240 goto error;
2241 }
2242 *pSep = 0;
2243 pExtension->key = T_CString_toLowerCase(pSubtag);
2244 pExtension->value = NULL; /* will be set later */
2245
2246 /*
2247 * reset the start and the end location of extension value
2248 * subtags for this extension
2249 */
2250 pExtValueSubtag = NULL;
2251 pExtValueSubtagEnd = NULL;
2252
2253 next = EXTV;
2254 continue;
2255 }
2256 }
2257 if (next & EXTV) {
2258 if (_isExtensionSubtag(pSubtag, subtagLen)) {
2259 if (isLDMLExtension) {
2260 if (reqLDMLType) {
2261 /* already saw an LDML key */
2262 if (!_isLDMLType(pSubtag, subtagLen)) {
2263 /* stop parsing here and let the valid LDML extension key/type
2264 pairs processed by the code out of this while loop */
2265 break;
2266 }
2267 pExtValueSubtagEnd = pSep;
2268 reqLDMLType = FALSE;
2269 next = EXTS | EXTV | PRIV;
2270 } else {
2271 /* LDML key */
2272 if (!_isLDMLKey(pSubtag, subtagLen)) {
2273 /* May be part of incomplete type */
2274 if (pExtValueSubtag != NULL) {
2275 if (_isLDMLType(pSubtag, subtagLen)) {
2276 pExtValueSubtagEnd = pSep;
2277 reqLDMLType = FALSE;
2278 next = EXTS | EXTV | PRIV;
2279 }
2280 } else if (pExtValueSubtag == NULL && _isAttributeSubtag(pSubtag, subtagLen)) {
2281 /* Get attribute */
2282 next = ATTR;
2283 } else {
2284 /* stop parsing here and let the valid LDML extension key/type
2285 pairs processed by the code out of this while loop */
2286 break;
2287 }
2288 } else {
2289 reqLDMLType = TRUE;
2290 next = EXTV;
2291 }
2292 }
2293 } else {
2294 /* Mark the end of this subtag */
2295 pExtValueSubtagEnd = pSep;
2296 next = EXTS | EXTV | PRIV;
2297 }
2298
2299 if (next != ATTR) {
2300 if (pExtValueSubtag == NULL) {
2301 /* if the start postion of this extension's value is not yet,
2302 this one is the first value subtag */
2303 pExtValueSubtag = pSubtag;
2304 }
2305
2306 continue;
2307 }
2308 }
2309 }
2310 if (next & PRIV) {
2311 if (uprv_tolower(*pSubtag) == PRIVATEUSE) {
2312 char *pPrivuseVal;
2313
2314 if (pExtension != NULL) {
2315 /* Process the last extension */
2316 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2317 /* the previous extension is incomplete */
2318 uprv_free(pExtension);
2319 pExtension = NULL;
2320 break;
2321 } else {
2322 /* terminate the previous extension value */
2323 *pExtValueSubtagEnd = 0;
2324 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2325
2326 /* insert the extension to the list */
2327 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2328 pLastGoodPosition = pExtValueSubtagEnd;
2329 pExtension = NULL;
2330 } else {
2331 /* stop parsing here */
2332 uprv_free(pExtension);
2333 pExtension = NULL;
2334 break;
2335 }
2336 }
2337 }
2338
2339 /* The rest of part will be private use value subtags */
2340 if (pNext == NULL) {
2341 /* empty private use subtag */
2342 break;
2343 }
2344 /* back up the private use value start position */
2345 pPrivuseVal = pNext;
2346
2347 /* validate private use value subtags */
2348 while (pNext) {
2349 pSubtag = pNext;
2350 pSep = pSubtag;
2351 while (*pSep) {
2352 if (*pSep == SEP) {
2353 break;
2354 }
2355 pSep++;
2356 }
2357 if (*pSep == 0) {
2358 /* last subtag */
2359 pNext = NULL;
2360 } else {
2361 pNext = pSep + 1;
2362 }
2363 subtagLen = (int32_t)(pSep - pSubtag);
2364
2365 if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) {
2366 *pSep = 0;
2367 next = VART;
2368 privateuseVar = TRUE;
2369 break;
2370 } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
2371 pLastGoodPosition = pSep;
2372 } else {
2373 break;
2374 }
2375 }
2376
2377 if (next == VART) {
2378 continue;
2379 }
2380
2381 if (pLastGoodPosition - pPrivuseVal > 0) {
2382 *pLastGoodPosition = 0;
2383 t->privateuse = T_CString_toLowerCase(pPrivuseVal);
2384 }
2385 /* No more subtags, exiting the parse loop */
2386 break;
2387 }
2388 break;
2389 }
2390
2391 if (next & ATTR) {
2392 /* create a new attribute */
2393 pAttribute = uprv_malloc(sizeof(AttributeListEntry));
2394 if (pAttribute == NULL) {
2395 *status = U_MEMORY_ALLOCATION_ERROR;
2396 goto error;
2397 }
2398
2399 *pSep = 0;
2400 pAttribute->attribute =T_CString_toLowerCase(pSubtag);
2401
2402 if (!_addAttributeToList(&(t->attributes), pAttribute)) {
2403 uprv_free(pAttribute);
2404 }
2405
2406 next = EXTS | EXTV | PRIV;
2407 continue;
2408 }
2409 /* If we fell through here, it means this subtag is illegal - quit parsing */
2410 break;
2411 }
2412
2413 if (pExtension != NULL) {
2414 /* Process the last extension */
2415 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2416 /* the previous extension is incomplete */
2417 uprv_free(pExtension);
2418 } else {
2419 /* terminate the previous extension value */
2420 *pExtValueSubtagEnd = 0;
2421 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2422 /* insert the extension to the list */
2423 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2424 pLastGoodPosition = pExtValueSubtagEnd;
2425 } else {
2426 uprv_free(pExtension);
2427 }
2428 }
2429 }
2430
2431 if (parsedLen != NULL) {
2432 *parsedLen = (int32_t)(pLastGoodPosition - t->buf);
2433 }
2434
2435 return t;
2436
2437 error:
2438 uprv_free(t);
2439 return NULL;
2440 }
2441
2442 static void
2443 ultag_close(ULanguageTag* langtag) {
2444
2445 if (langtag == NULL) {
2446 return;
2447 }
2448
2449 uprv_free(langtag->buf);
2450
2451 if (langtag->variants) {
2452 VariantListEntry *curVar = langtag->variants;
2453 while (curVar) {
2454 VariantListEntry *nextVar = curVar->next;
2455 uprv_free(curVar);
2456 curVar = nextVar;
2457 }
2458 }
2459
2460 if (langtag->extensions) {
2461 ExtensionListEntry *curExt = langtag->extensions;
2462 while (curExt) {
2463 ExtensionListEntry *nextExt = curExt->next;
2464 uprv_free(curExt);
2465 curExt = nextExt;
2466 }
2467 }
2468
2469 if (langtag->attributes) {
2470 AttributeListEntry *curAttr = langtag->attributes;
2471 while (curAttr) {
2472 AttributeListEntry *nextAttr = curAttr->next;
2473 uprv_free(curAttr);
2474 curAttr = nextAttr;
2475 }
2476 }
2477
2478 uprv_free(langtag);
2479 }
2480
2481 static const char*
2482 ultag_getLanguage(const ULanguageTag* langtag) {
2483 return langtag->language;
2484 }
2485
2486 #if 0
2487 static const char*
2488 ultag_getJDKLanguage(const ULanguageTag* langtag) {
2489 int32_t i;
2490 for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
2491 if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
2492 return DEPRECATEDLANGS[i + 1];
2493 }
2494 }
2495 return langtag->language;
2496 }
2497 #endif
2498
2499 static const char*
2500 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
2501 if (idx >= 0 && idx < MAXEXTLANG) {
2502 return langtag->extlang[idx];
2503 }
2504 return NULL;
2505 }
2506
2507 static int32_t
2508 ultag_getExtlangSize(const ULanguageTag* langtag) {
2509 int32_t size = 0;
2510 int32_t i;
2511 for (i = 0; i < MAXEXTLANG; i++) {
2512 if (langtag->extlang[i]) {
2513 size++;
2514 }
2515 }
2516 return size;
2517 }
2518
2519 static const char*
2520 ultag_getScript(const ULanguageTag* langtag) {
2521 return langtag->script;
2522 }
2523
2524 static const char*
2525 ultag_getRegion(const ULanguageTag* langtag) {
2526 return langtag->region;
2527 }
2528
2529 static const char*
2530 ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
2531 const char *var = NULL;
2532 VariantListEntry *cur = langtag->variants;
2533 int32_t i = 0;
2534 while (cur) {
2535 if (i == idx) {
2536 var = cur->variant;
2537 break;
2538 }
2539 cur = cur->next;
2540 i++;
2541 }
2542 return var;
2543 }
2544
2545 static int32_t
2546 ultag_getVariantsSize(const ULanguageTag* langtag) {
2547 int32_t size = 0;
2548 VariantListEntry *cur = langtag->variants;
2549 while (TRUE) {
2550 if (cur == NULL) {
2551 break;
2552 }
2553 size++;
2554 cur = cur->next;
2555 }
2556 return size;
2557 }
2558
2559 #if 0
2560 /* Currently not being used. */
2561 static const char*
2562 ultag_getAttribute(const ULanguageTag* langtag, int32_t idx) {
2563 const char *attr = NULL;
2564 AttributeListEntry *cur = langtag->attributes;
2565 int32_t i = 0;
2566 while (cur) {
2567 if (i == idx) {
2568 attr = cur->attribute;
2569 break;
2570 }
2571 cur = cur->next;
2572 i++;
2573 }
2574 return attr;
2575 }
2576 #endif
2577
2578 static int32_t
2579 ultag_getAttributesSize(const ULanguageTag* langtag) {
2580 int32_t size = 0;
2581 AttributeListEntry *cur = langtag->attributes;
2582 while (TRUE) {
2583 if (cur == NULL) {
2584 break;
2585 }
2586 size++;
2587 cur = cur->next;
2588 }
2589 return size;
2590 }
2591
2592 static const char*
2593 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
2594 const char *key = NULL;
2595 ExtensionListEntry *cur = langtag->extensions;
2596 int32_t i = 0;
2597 while (cur) {
2598 if (i == idx) {
2599 key = cur->key;
2600 break;
2601 }
2602 cur = cur->next;
2603 i++;
2604 }
2605 return key;
2606 }
2607
2608 static const char*
2609 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
2610 const char *val = NULL;
2611 ExtensionListEntry *cur = langtag->extensions;
2612 int32_t i = 0;
2613 while (cur) {
2614 if (i == idx) {
2615 val = cur->value;
2616 break;
2617 }
2618 cur = cur->next;
2619 i++;
2620 }
2621 return val;
2622 }
2623
2624 static int32_t
2625 ultag_getExtensionsSize(const ULanguageTag* langtag) {
2626 int32_t size = 0;
2627 ExtensionListEntry *cur = langtag->extensions;
2628 while (TRUE) {
2629 if (cur == NULL) {
2630 break;
2631 }
2632 size++;
2633 cur = cur->next;
2634 }
2635 return size;
2636 }
2637
2638 static const char*
2639 ultag_getPrivateUse(const ULanguageTag* langtag) {
2640 return langtag->privateuse;
2641 }
2642
2643 #if 0
2644 static const char*
2645 ultag_getGrandfathered(const ULanguageTag* langtag) {
2646 return langtag->grandfathered;
2647 }
2648 #endif
2649
2650
2651 /*
2652 * -------------------------------------------------
2653 *
2654 * Locale/BCP47 conversion APIs, exposed as uloc_*
2655 *
2656 * -------------------------------------------------
2657 */
2658 U_DRAFT int32_t U_EXPORT2
2659 uloc_toLanguageTag(const char* localeID,
2660 char* langtag,
2661 int32_t langtagCapacity,
2662 UBool strict,
2663 UErrorCode* status) {
2664 /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
2665 char canonical[256];
2666 int32_t reslen = 0;
2667 UErrorCode tmpStatus = U_ZERO_ERROR;
2668 UBool hadPosix = FALSE;
2669 const char* pKeywordStart;
2670
2671 /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
2672 canonical[0] = 0;
2673 if (uprv_strlen(localeID) > 0) {
2674 uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
2675 if (tmpStatus != U_ZERO_ERROR) {
2676 *status = U_ILLEGAL_ARGUMENT_ERROR;
2677 return 0;
2678 }
2679 }
2680
2681 /* For handling special case - private use only tag */
2682 pKeywordStart = locale_getKeywordsStart(canonical);
2683 if (pKeywordStart == canonical) {
2684 UEnumeration *kwdEnum;
2685 int kwdCnt = 0;
2686 UBool done = FALSE;
2687
2688 kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus);
2689 if (kwdEnum != NULL) {
2690 kwdCnt = uenum_count(kwdEnum, &tmpStatus);
2691 if (kwdCnt == 1) {
2692 const char *key;
2693 int32_t len = 0;
2694
2695 key = uenum_next(kwdEnum, &len, &tmpStatus);
2696 if (len == 1 && *key == PRIVATEUSE) {
2697 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
2698 buf[0] = PRIVATEUSE;
2699 buf[1] = SEP;
2700 len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus);
2701 if (U_SUCCESS(tmpStatus)) {
2702 if (_isPrivateuseValueSubtags(&buf[2], len)) {
2703 /* return private use only tag */
2704 reslen = len + 2;
2705 uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity));
2706 u_terminateChars(langtag, langtagCapacity, reslen, status);
2707 done = TRUE;
2708 } else if (strict) {
2709 *status = U_ILLEGAL_ARGUMENT_ERROR;
2710 done = TRUE;
2711 }
2712 /* if not strict mode, then "und" will be returned */
2713 } else {
2714 *status = U_ILLEGAL_ARGUMENT_ERROR;
2715 done = TRUE;
2716 }
2717 }
2718 }
2719 uenum_close(kwdEnum);
2720 if (done) {
2721 return reslen;
2722 }
2723 }
2724 }
2725
2726 reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
2727 reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
2728 reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
2729 reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
2730 reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2731 reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2732
2733 return reslen;
2734 }
2735
2736
2737 U_DRAFT int32_t U_EXPORT2
2738 uloc_forLanguageTag(const char* langtag,
2739 char* localeID,
2740 int32_t localeIDCapacity,
2741 int32_t* parsedLength,
2742 UErrorCode* status) {
2743 ULanguageTag *lt;
2744 int32_t reslen = 0;
2745 const char *subtag, *p;
2746 int32_t len;
2747 int32_t i, n, m;
2748 UBool noRegion = TRUE;
2749
2750 lt = ultag_parse(langtag, -1, parsedLength, status);
2751 if (U_FAILURE(*status)) {
2752 return 0;
2753 }
2754
2755 /* language */
2756 subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt);
2757 if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
2758 len = (int32_t)uprv_strlen(subtag);
2759 if (len > 0) {
2760 if (reslen < localeIDCapacity) {
2761 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen));
2762 }
2763 reslen += len;
2764 }
2765 }
2766
2767 /* script */
2768 subtag = ultag_getScript(lt);
2769 len = (int32_t)uprv_strlen(subtag);
2770 if (len > 0) {
2771 if (reslen < localeIDCapacity) {
2772 *(localeID + reslen) = LOCALE_SEP;
2773 }
2774 reslen++;
2775
2776 /* write out the script in title case */
2777 p = subtag;
2778 while (*p) {
2779 if (reslen < localeIDCapacity) {
2780 if (p == subtag) {
2781 *(localeID + reslen) = uprv_toupper(*p);
2782 } else {
2783 *(localeID + reslen) = *p;
2784 }
2785 }
2786 reslen++;
2787 p++;
2788 }
2789 }
2790
2791 /* region */
2792 subtag = ultag_getRegion(lt);
2793 len = (int32_t)uprv_strlen(subtag);
2794 if (len > 0) {
2795 if (reslen < localeIDCapacity) {
2796 *(localeID + reslen) = LOCALE_SEP;
2797 }
2798 reslen++;
2799 /* write out the retion in upper case */
2800 p = subtag;
2801 while (*p) {
2802 if (reslen < localeIDCapacity) {
2803 *(localeID + reslen) = uprv_toupper(*p);
2804 }
2805 reslen++;
2806 p++;
2807 }
2808 noRegion = FALSE;
2809 }
2810
2811 /* variants */
2812 n = ultag_getVariantsSize(lt);
2813 if (n > 0) {
2814 if (noRegion) {
2815 if (reslen < localeIDCapacity) {
2816 *(localeID + reslen) = LOCALE_SEP;
2817 }
2818 reslen++;
2819 }
2820
2821 for (i = 0; i < n; i++) {
2822 subtag = ultag_getVariant(lt, i);
2823 if (reslen < localeIDCapacity) {
2824 *(localeID + reslen) = LOCALE_SEP;
2825 }
2826 reslen++;
2827 /* write out the variant in upper case */
2828 p = subtag;
2829 while (*p) {
2830 if (reslen < localeIDCapacity) {
2831 *(localeID + reslen) = uprv_toupper(*p);
2832 }
2833 reslen++;
2834 p++;
2835 }
2836 }
2837 }
2838
2839 /* keywords */
2840 n = ultag_getExtensionsSize(lt);
2841 m = ultag_getAttributesSize(lt);
2842 subtag = ultag_getPrivateUse(lt);
2843 if (n > 0 || m > 0 || uprv_strlen(subtag) > 0) {
2844 if (reslen == 0 && (n > 0 || m > 0)) {
2845 /* need a language */
2846 if (reslen < localeIDCapacity) {
2847 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
2848 }
2849 reslen += LANG_UND_LEN;
2850 }
2851 len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
2852 reslen += len;
2853 }
2854
2855 ultag_close(lt);
2856 return u_terminateChars(localeID, localeIDCapacity, reslen, status);
2857 }
2858
2859