]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/uloc_tag.c
ICU-531.48.tar.gz
[apple/icu.git] / icuSources / common / uloc_tag.c
1 /*
2 **********************************************************************
3 * Copyright (C) 2009-2012, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 */
7
8 #include "unicode/utypes.h"
9 #include "unicode/ures.h"
10 #include "unicode/putil.h"
11 #include "unicode/uloc.h"
12 #include "ustr_imp.h"
13 #include "cmemory.h"
14 #include "cstring.h"
15 #include "putilimp.h"
16 #include "uinvchar.h"
17 #include "ulocimp.h"
18 #include "uassert.h"
19
20 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
21
22 /* struct holding a single variant */
23 typedef struct VariantListEntry {
24 const char *variant;
25 struct VariantListEntry *next;
26 } VariantListEntry;
27
28 /* struct holding a single attribute value */
29 typedef struct AttributeListEntry {
30 const char *attribute;
31 struct AttributeListEntry *next;
32 } AttributeListEntry;
33
34 /* struct holding a single extension */
35 typedef struct ExtensionListEntry {
36 const char *key;
37 const char *value;
38 struct ExtensionListEntry *next;
39 } ExtensionListEntry;
40
41 #define MAXEXTLANG 3
42 typedef struct ULanguageTag {
43 char *buf; /* holding parsed subtags */
44 const char *language;
45 const char *extlang[MAXEXTLANG];
46 const char *script;
47 const char *region;
48 VariantListEntry *variants;
49 ExtensionListEntry *extensions;
50 const char *privateuse;
51 const char *grandfathered;
52 } ULanguageTag;
53
54 #define MINLEN 2
55 #define SEP '-'
56 #define PRIVATEUSE 'x'
57 #define LDMLEXT 'u'
58
59 #define LOCALE_SEP '_'
60 #define LOCALE_EXT_SEP '@'
61 #define LOCALE_KEYWORD_SEP ';'
62 #define LOCALE_KEY_TYPE_SEP '='
63
64 #define ISALPHA(c) uprv_isASCIILetter(c)
65 #define ISNUMERIC(c) ((c)>='0' && (c)<='9')
66
67 static const char EMPTY[] = "";
68 static const char LANG_UND[] = "und";
69 static const char PRIVATEUSE_KEY[] = "x";
70 static const char _POSIX[] = "_POSIX";
71 static const char POSIX_KEY[] = "va";
72 static const char POSIX_VALUE[] = "posix";
73 static const char LOCALE_ATTRIBUTE_KEY[] = "attribute";
74 static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant";
75 static const char LOCALE_TYPE_YES[] = "yes";
76
77 #define LANG_UND_LEN 3
78
79 static const char* const GRANDFATHERED[] = {
80 /* grandfathered preferred */
81 "art-lojban", "jbo",
82 "cel-gaulish", "xtg-x-cel-gaulish",
83 "en-GB-oed", "en-GB-x-oed",
84 "i-ami", "ami",
85 "i-bnn", "bnn",
86 "i-default", "en-x-i-default",
87 "i-enochian", "und-x-i-enochian",
88 "i-hak", "hak",
89 "i-klingon", "tlh",
90 "i-lux", "lb",
91 "i-mingo", "see-x-i-mingo",
92 "i-navajo", "nv",
93 "i-pwn", "pwn",
94 "i-tao", "tao",
95 "i-tay", "tay",
96 "i-tsu", "tsu",
97 "no-bok", "nb",
98 "no-nyn", "nn",
99 "sgn-be-fr", "sfb",
100 "sgn-be-nl", "vgt",
101 "sgn-ch-de", "sgg",
102 "zh-guoyu", "cmn",
103 "zh-hakka", "hak",
104 "zh-min", "nan-x-zh-min",
105 "zh-min-nan", "nan",
106 "zh-xiang", "hsn",
107 NULL, NULL
108 };
109
110 static const char DEPRECATEDLANGS[][4] = {
111 /* deprecated new */
112 "iw", "he",
113 "ji", "yi",
114 "in", "id"
115 };
116
117 /*
118 * -------------------------------------------------
119 *
120 * These ultag_ functions may be exposed as APIs later
121 *
122 * -------------------------------------------------
123 */
124
125 static ULanguageTag*
126 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
127
128 static void
129 ultag_close(ULanguageTag* langtag);
130
131 static const char*
132 ultag_getLanguage(const ULanguageTag* langtag);
133
134 #if 0
135 static const char*
136 ultag_getJDKLanguage(const ULanguageTag* langtag);
137 #endif
138
139 static const char*
140 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
141
142 static int32_t
143 ultag_getExtlangSize(const ULanguageTag* langtag);
144
145 static const char*
146 ultag_getScript(const ULanguageTag* langtag);
147
148 static const char*
149 ultag_getRegion(const ULanguageTag* langtag);
150
151 static const char*
152 ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
153
154 static int32_t
155 ultag_getVariantsSize(const ULanguageTag* langtag);
156
157 static const char*
158 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
159
160 static const char*
161 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
162
163 static int32_t
164 ultag_getExtensionsSize(const ULanguageTag* langtag);
165
166 static const char*
167 ultag_getPrivateUse(const ULanguageTag* langtag);
168
169 #if 0
170 static const char*
171 ultag_getGrandfathered(const ULanguageTag* langtag);
172 #endif
173
174 /*
175 * -------------------------------------------------
176 *
177 * Language subtag syntax validation functions
178 *
179 * -------------------------------------------------
180 */
181
182 static UBool
183 _isAlphaString(const char* s, int32_t len) {
184 int32_t i;
185 for (i = 0; i < len; i++) {
186 if (!ISALPHA(*(s + i))) {
187 return FALSE;
188 }
189 }
190 return TRUE;
191 }
192
193 static UBool
194 _isNumericString(const char* s, int32_t len) {
195 int32_t i;
196 for (i = 0; i < len; i++) {
197 if (!ISNUMERIC(*(s + i))) {
198 return FALSE;
199 }
200 }
201 return TRUE;
202 }
203
204 static UBool
205 _isAlphaNumericString(const char* s, int32_t len) {
206 int32_t i;
207 for (i = 0; i < len; i++) {
208 if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
209 return FALSE;
210 }
211 }
212 return TRUE;
213 }
214
215 static UBool
216 _isLanguageSubtag(const char* s, int32_t len) {
217 /*
218 * language = 2*3ALPHA ; shortest ISO 639 code
219 * ["-" extlang] ; sometimes followed by
220 * ; extended language subtags
221 * / 4ALPHA ; or reserved for future use
222 * / 5*8ALPHA ; or registered language subtag
223 */
224 if (len < 0) {
225 len = (int32_t)uprv_strlen(s);
226 }
227 if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
228 return TRUE;
229 }
230 return FALSE;
231 }
232
233 static UBool
234 _isExtlangSubtag(const char* s, int32_t len) {
235 /*
236 * extlang = 3ALPHA ; selected ISO 639 codes
237 * *2("-" 3ALPHA) ; permanently reserved
238 */
239 if (len < 0) {
240 len = (int32_t)uprv_strlen(s);
241 }
242 if (len == 3 && _isAlphaString(s, len)) {
243 return TRUE;
244 }
245 return FALSE;
246 }
247
248 static UBool
249 _isScriptSubtag(const char* s, int32_t len) {
250 /*
251 * script = 4ALPHA ; ISO 15924 code
252 */
253 if (len < 0) {
254 len = (int32_t)uprv_strlen(s);
255 }
256 if (len == 4 && _isAlphaString(s, len)) {
257 return TRUE;
258 }
259 return FALSE;
260 }
261
262 static UBool
263 _isRegionSubtag(const char* s, int32_t len) {
264 /*
265 * region = 2ALPHA ; ISO 3166-1 code
266 * / 3DIGIT ; UN M.49 code
267 */
268 if (len < 0) {
269 len = (int32_t)uprv_strlen(s);
270 }
271 if (len == 2 && _isAlphaString(s, len)) {
272 return TRUE;
273 }
274 if (len == 3 && _isNumericString(s, len)) {
275 return TRUE;
276 }
277 return FALSE;
278 }
279
280 static UBool
281 _isVariantSubtag(const char* s, int32_t len) {
282 /*
283 * variant = 5*8alphanum ; registered variants
284 * / (DIGIT 3alphanum)
285 */
286 if (len < 0) {
287 len = (int32_t)uprv_strlen(s);
288 }
289 if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) {
290 return TRUE;
291 }
292 if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
293 return TRUE;
294 }
295 return FALSE;
296 }
297
298 static UBool
299 _isPrivateuseVariantSubtag(const char* s, int32_t len) {
300 /*
301 * variant = 1*8alphanum ; registered variants
302 * / (DIGIT 3alphanum)
303 */
304 if (len < 0) {
305 len = (int32_t)uprv_strlen(s);
306 }
307 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
308 return TRUE;
309 }
310 return FALSE;
311 }
312
313 static UBool
314 _isExtensionSingleton(const char* s, int32_t len) {
315 /*
316 * extension = singleton 1*("-" (2*8alphanum))
317 */
318 if (len < 0) {
319 len = (int32_t)uprv_strlen(s);
320 }
321 if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) {
322 return TRUE;
323 }
324 return FALSE;
325 }
326
327 static UBool
328 _isExtensionSubtag(const char* s, int32_t len) {
329 /*
330 * extension = singleton 1*("-" (2*8alphanum))
331 */
332 if (len < 0) {
333 len = (int32_t)uprv_strlen(s);
334 }
335 if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) {
336 return TRUE;
337 }
338 return FALSE;
339 }
340
341 static UBool
342 _isExtensionSubtags(const char* s, int32_t len) {
343 const char *p = s;
344 const char *pSubtag = NULL;
345
346 if (len < 0) {
347 len = (int32_t)uprv_strlen(s);
348 }
349
350 while ((p - s) < len) {
351 if (*p == SEP) {
352 if (pSubtag == NULL) {
353 return FALSE;
354 }
355 if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) {
356 return FALSE;
357 }
358 pSubtag = NULL;
359 } else if (pSubtag == NULL) {
360 pSubtag = p;
361 }
362 p++;
363 }
364 if (pSubtag == NULL) {
365 return FALSE;
366 }
367 return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag));
368 }
369
370 static UBool
371 _isPrivateuseValueSubtag(const char* s, int32_t len) {
372 /*
373 * privateuse = "x" 1*("-" (1*8alphanum))
374 */
375 if (len < 0) {
376 len = (int32_t)uprv_strlen(s);
377 }
378 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
379 return TRUE;
380 }
381 return FALSE;
382 }
383
384 static UBool
385 _isPrivateuseValueSubtags(const char* s, int32_t len) {
386 const char *p = s;
387 const char *pSubtag = NULL;
388
389 if (len < 0) {
390 len = (int32_t)uprv_strlen(s);
391 }
392
393 while ((p - s) < len) {
394 if (*p == SEP) {
395 if (pSubtag == NULL) {
396 return FALSE;
397 }
398 if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) {
399 return FALSE;
400 }
401 pSubtag = NULL;
402 } else if (pSubtag == NULL) {
403 pSubtag = p;
404 }
405 p++;
406 }
407 if (pSubtag == NULL) {
408 return FALSE;
409 }
410 return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
411 }
412
413 static UBool
414 _isLDMLKey(const char* s, int32_t len) {
415 if (len < 0) {
416 len = (int32_t)uprv_strlen(s);
417 }
418 if (len == 2 && _isAlphaNumericString(s, len)) {
419 return TRUE;
420 }
421 return FALSE;
422 }
423
424 static UBool
425 _isLDMLType(const char* s, int32_t len) {
426 if (len < 0) {
427 len = (int32_t)uprv_strlen(s);
428 }
429 if (len >= 3 && len <= 8 && _isAlphaNumericString(s, len)) {
430 return TRUE;
431 }
432 return FALSE;
433 }
434
435 /*
436 * -------------------------------------------------
437 *
438 * Helper functions
439 *
440 * -------------------------------------------------
441 */
442
443 static UBool
444 _addVariantToList(VariantListEntry **first, VariantListEntry *var) {
445 UBool bAdded = TRUE;
446
447 if (*first == NULL) {
448 var->next = NULL;
449 *first = var;
450 } else {
451 VariantListEntry *prev, *cur;
452 int32_t cmp;
453
454 /* variants order should be preserved */
455 prev = NULL;
456 cur = *first;
457 while (TRUE) {
458 if (cur == NULL) {
459 prev->next = var;
460 var->next = NULL;
461 break;
462 }
463
464 /* Checking for duplicate variant */
465 cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
466 if (cmp == 0) {
467 /* duplicated variant */
468 bAdded = FALSE;
469 break;
470 }
471 prev = cur;
472 cur = cur->next;
473 }
474 }
475
476 return bAdded;
477 }
478
479 static UBool
480 _addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {
481 UBool bAdded = TRUE;
482
483 if (*first == NULL) {
484 attr->next = NULL;
485 *first = attr;
486 } else {
487 AttributeListEntry *prev, *cur;
488 int32_t cmp;
489
490 /* reorder variants in alphabetical order */
491 prev = NULL;
492 cur = *first;
493 while (TRUE) {
494 if (cur == NULL) {
495 prev->next = attr;
496 attr->next = NULL;
497 break;
498 }
499 cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute);
500 if (cmp < 0) {
501 if (prev == NULL) {
502 *first = attr;
503 } else {
504 prev->next = attr;
505 }
506 attr->next = cur;
507 break;
508 }
509 if (cmp == 0) {
510 /* duplicated variant */
511 bAdded = FALSE;
512 break;
513 }
514 prev = cur;
515 cur = cur->next;
516 }
517 }
518
519 return bAdded;
520 }
521
522
523 static UBool
524 _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
525 UBool bAdded = TRUE;
526
527 if (*first == NULL) {
528 ext->next = NULL;
529 *first = ext;
530 } else {
531 ExtensionListEntry *prev, *cur;
532 int32_t cmp;
533
534 /* reorder variants in alphabetical order */
535 prev = NULL;
536 cur = *first;
537 while (TRUE) {
538 if (cur == NULL) {
539 prev->next = ext;
540 ext->next = NULL;
541 break;
542 }
543 if (localeToBCP) {
544 /* special handling for locale to bcp conversion */
545 int32_t len, curlen;
546
547 len = (int32_t)uprv_strlen(ext->key);
548 curlen = (int32_t)uprv_strlen(cur->key);
549
550 if (len == 1 && curlen == 1) {
551 if (*(ext->key) == *(cur->key)) {
552 cmp = 0;
553 } else if (*(ext->key) == PRIVATEUSE) {
554 cmp = 1;
555 } else if (*(cur->key) == PRIVATEUSE) {
556 cmp = -1;
557 } else {
558 cmp = *(ext->key) - *(cur->key);
559 }
560 } else if (len == 1) {
561 cmp = *(ext->key) - LDMLEXT;
562 } else if (curlen == 1) {
563 cmp = LDMLEXT - *(cur->key);
564 } else {
565 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
566 }
567 } else {
568 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
569 }
570 if (cmp < 0) {
571 if (prev == NULL) {
572 *first = ext;
573 } else {
574 prev->next = ext;
575 }
576 ext->next = cur;
577 break;
578 }
579 if (cmp == 0) {
580 /* duplicated extension key */
581 bAdded = FALSE;
582 break;
583 }
584 prev = cur;
585 cur = cur->next;
586 }
587 }
588
589 return bAdded;
590 }
591
592 static void
593 _initializeULanguageTag(ULanguageTag* langtag) {
594 int32_t i;
595
596 langtag->buf = NULL;
597
598 langtag->language = EMPTY;
599 for (i = 0; i < MAXEXTLANG; i++) {
600 langtag->extlang[i] = NULL;
601 }
602
603 langtag->script = EMPTY;
604 langtag->region = EMPTY;
605
606 langtag->variants = NULL;
607 langtag->extensions = NULL;
608
609 langtag->grandfathered = EMPTY;
610 langtag->privateuse = EMPTY;
611 }
612
613 #define KEYTYPEDATA "keyTypeData"
614 #define KEYMAP "keyMap"
615 #define TYPEMAP "typeMap"
616 #define TYPEALIAS "typeAlias"
617 #define MAX_BCP47_SUBTAG_LEN 9 /* including null terminator */
618 #define MAX_LDML_KEY_LEN 22
619 #define MAX_LDML_TYPE_LEN 32
620
621 static int32_t
622 _ldmlKeyToBCP47(const char* key, int32_t keyLen,
623 char* bcpKey, int32_t bcpKeyCapacity,
624 UErrorCode *status) {
625 UResourceBundle *rb;
626 char keyBuf[MAX_LDML_KEY_LEN];
627 char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
628 int32_t resultLen = 0;
629 int32_t i;
630 UErrorCode tmpStatus = U_ZERO_ERROR;
631 const UChar *uBcpKey;
632 int32_t bcpKeyLen;
633
634 if (keyLen < 0) {
635 keyLen = (int32_t)uprv_strlen(key);
636 }
637
638 if (keyLen >= sizeof(keyBuf)) {
639 /* no known valid LDML key exceeding 21 */
640 *status = U_ILLEGAL_ARGUMENT_ERROR;
641 return 0;
642 }
643
644 uprv_memcpy(keyBuf, key, keyLen);
645 keyBuf[keyLen] = 0;
646
647 /* to lower case */
648 for (i = 0; i < keyLen; i++) {
649 keyBuf[i] = uprv_tolower(keyBuf[i]);
650 }
651
652 rb = ures_openDirect(NULL, KEYTYPEDATA, status);
653 ures_getByKey(rb, KEYMAP, rb, status);
654
655 if (U_FAILURE(*status)) {
656 ures_close(rb);
657 return 0;
658 }
659
660 uBcpKey = ures_getStringByKey(rb, keyBuf, &bcpKeyLen, &tmpStatus);
661 if (U_SUCCESS(tmpStatus)) {
662 u_UCharsToChars(uBcpKey, bcpKeyBuf, bcpKeyLen);
663 bcpKeyBuf[bcpKeyLen] = 0;
664 resultLen = bcpKeyLen;
665 } else {
666 if (_isLDMLKey(key, keyLen)) {
667 uprv_memcpy(bcpKeyBuf, key, keyLen);
668 bcpKeyBuf[keyLen] = 0;
669 resultLen = keyLen;
670 } else {
671 /* mapping not availabe */
672 *status = U_ILLEGAL_ARGUMENT_ERROR;
673 }
674 }
675 ures_close(rb);
676
677 if (U_FAILURE(*status)) {
678 return 0;
679 }
680
681 uprv_memcpy(bcpKey, bcpKeyBuf, uprv_min(resultLen, bcpKeyCapacity));
682 return u_terminateChars(bcpKey, bcpKeyCapacity, resultLen, status);
683 }
684
685 static int32_t
686 _bcp47ToLDMLKey(const char* bcpKey, int32_t bcpKeyLen,
687 char* key, int32_t keyCapacity,
688 UErrorCode *status) {
689 UResourceBundle *rb;
690 char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
691 int32_t resultLen = 0;
692 int32_t i;
693 const char *resKey = NULL;
694 UResourceBundle *mapData;
695
696 if (bcpKeyLen < 0) {
697 bcpKeyLen = (int32_t)uprv_strlen(bcpKey);
698 }
699
700 if (bcpKeyLen >= sizeof(bcpKeyBuf)) {
701 *status = U_ILLEGAL_ARGUMENT_ERROR;
702 return 0;
703 }
704
705 uprv_memcpy(bcpKeyBuf, bcpKey, bcpKeyLen);
706 bcpKeyBuf[bcpKeyLen] = 0;
707
708 /* to lower case */
709 for (i = 0; i < bcpKeyLen; i++) {
710 bcpKeyBuf[i] = uprv_tolower(bcpKeyBuf[i]);
711 }
712
713 rb = ures_openDirect(NULL, KEYTYPEDATA, status);
714 ures_getByKey(rb, KEYMAP, rb, status);
715 if (U_FAILURE(*status)) {
716 ures_close(rb);
717 return 0;
718 }
719
720 mapData = ures_getNextResource(rb, NULL, status);
721 while (U_SUCCESS(*status)) {
722 const UChar *uBcpKey;
723 char tmpBcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
724 int32_t tmpBcpKeyLen;
725
726 uBcpKey = ures_getString(mapData, &tmpBcpKeyLen, status);
727 if (U_FAILURE(*status)) {
728 break;
729 }
730 u_UCharsToChars(uBcpKey, tmpBcpKeyBuf, tmpBcpKeyLen);
731 tmpBcpKeyBuf[tmpBcpKeyLen] = 0;
732 if (uprv_compareInvCharsAsAscii(bcpKeyBuf, tmpBcpKeyBuf) == 0) {
733 /* found a matching BCP47 key */
734 resKey = ures_getKey(mapData);
735 resultLen = (int32_t)uprv_strlen(resKey);
736 break;
737 }
738 if (!ures_hasNext(rb)) {
739 break;
740 }
741 ures_getNextResource(rb, mapData, status);
742 }
743 ures_close(mapData);
744 ures_close(rb);
745
746 if (U_FAILURE(*status)) {
747 return 0;
748 }
749
750 if (resKey == NULL) {
751 resKey = bcpKeyBuf;
752 resultLen = bcpKeyLen;
753 }
754
755 uprv_memcpy(key, resKey, uprv_min(resultLen, keyCapacity));
756 return u_terminateChars(key, keyCapacity, resultLen, status);
757 }
758
759 static int32_t
760 _ldmlTypeToBCP47(const char* key, int32_t keyLen,
761 const char* type, int32_t typeLen,
762 char* bcpType, int32_t bcpTypeCapacity,
763 UErrorCode *status) {
764 UResourceBundle *rb, *keyTypeData, *typeMapForKey;
765 char keyBuf[MAX_LDML_KEY_LEN];
766 char typeBuf[MAX_LDML_TYPE_LEN];
767 char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
768 int32_t resultLen = 0;
769 int32_t i;
770 UErrorCode tmpStatus = U_ZERO_ERROR;
771 const UChar *uBcpType, *uCanonicalType;
772 int32_t bcpTypeLen, canonicalTypeLen;
773 UBool isTimezone = FALSE;
774
775 if (keyLen < 0) {
776 keyLen = (int32_t)uprv_strlen(key);
777 }
778 if (keyLen >= sizeof(keyBuf)) {
779 /* no known valid LDML key exceeding 21 */
780 *status = U_ILLEGAL_ARGUMENT_ERROR;
781 return 0;
782 }
783 uprv_memcpy(keyBuf, key, keyLen);
784 keyBuf[keyLen] = 0;
785
786 /* to lower case */
787 for (i = 0; i < keyLen; i++) {
788 keyBuf[i] = uprv_tolower(keyBuf[i]);
789 }
790 if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
791 isTimezone = TRUE;
792 }
793
794 if (typeLen < 0) {
795 typeLen = (int32_t)uprv_strlen(type);
796 }
797 if (typeLen >= sizeof(typeBuf)) {
798 *status = U_ILLEGAL_ARGUMENT_ERROR;
799 return 0;
800 }
801
802 if (isTimezone) {
803 /* replace '/' with ':' */
804 for (i = 0; i < typeLen; i++) {
805 if (*(type + i) == '/') {
806 typeBuf[i] = ':';
807 } else {
808 typeBuf[i] = *(type + i);
809 }
810 }
811 typeBuf[typeLen] = 0;
812 type = &typeBuf[0];
813 }
814
815 keyTypeData = ures_openDirect(NULL, KEYTYPEDATA, status);
816 rb = ures_getByKey(keyTypeData, TYPEMAP, NULL, status);
817 if (U_FAILURE(*status)) {
818 ures_close(rb);
819 ures_close(keyTypeData);
820 return 0;
821 }
822
823 typeMapForKey = ures_getByKey(rb, keyBuf, NULL, &tmpStatus);
824 uBcpType = ures_getStringByKey(typeMapForKey, type, &bcpTypeLen, &tmpStatus);
825 if (U_SUCCESS(tmpStatus)) {
826 u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
827 resultLen = bcpTypeLen;
828 } else if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
829 /* is this type alias? */
830 tmpStatus = U_ZERO_ERROR;
831 ures_getByKey(keyTypeData, TYPEALIAS, rb, &tmpStatus);
832 ures_getByKey(rb, keyBuf, rb, &tmpStatus);
833 uCanonicalType = ures_getStringByKey(rb, type, &canonicalTypeLen, &tmpStatus);
834 if (U_SUCCESS(tmpStatus)) {
835 u_UCharsToChars(uCanonicalType, typeBuf, canonicalTypeLen);
836 if (isTimezone) {
837 /* replace '/' with ':' */
838 for (i = 0; i < canonicalTypeLen; i++) {
839 if (typeBuf[i] == '/') {
840 typeBuf[i] = ':';
841 }
842 }
843 }
844 typeBuf[canonicalTypeLen] = 0;
845
846 /* look up the canonical type */
847 uBcpType = ures_getStringByKey(typeMapForKey, typeBuf, &bcpTypeLen, &tmpStatus);
848 if (U_SUCCESS(tmpStatus)) {
849 u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
850 resultLen = bcpTypeLen;
851 }
852 }
853 if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
854 if (_isLDMLType(type, typeLen)) {
855 uprv_memcpy(bcpTypeBuf, type, typeLen);
856 resultLen = typeLen;
857 } else {
858 /* mapping not availabe */
859 *status = U_ILLEGAL_ARGUMENT_ERROR;
860 }
861 }
862 } else {
863 *status = tmpStatus;
864 }
865 ures_close(rb);
866 ures_close(typeMapForKey);
867 ures_close(keyTypeData);
868
869 if (U_FAILURE(*status)) {
870 return 0;
871 }
872
873 uprv_memcpy(bcpType, bcpTypeBuf, uprv_min(resultLen, bcpTypeCapacity));
874 return u_terminateChars(bcpType, bcpTypeCapacity, resultLen, status);
875 }
876
877 static int32_t
878 _bcp47ToLDMLType(const char* key, int32_t keyLen,
879 const char* bcpType, int32_t bcpTypeLen,
880 char* type, int32_t typeCapacity,
881 UErrorCode *status) {
882 UResourceBundle *rb;
883 char keyBuf[MAX_LDML_KEY_LEN];
884 char bcpTypeBuf[ULOC_KEYWORDS_CAPACITY]; /* ensure buffter is large enough for multiple values (e.g. buddhist-greg) */
885 int32_t resultLen = 0;
886 int32_t i, typeSize;
887 const char *resType = NULL;
888 UResourceBundle *mapData;
889 UErrorCode tmpStatus = U_ZERO_ERROR;
890 int32_t copyLen;
891
892 if (keyLen < 0) {
893 keyLen = (int32_t)uprv_strlen(key);
894 }
895
896 if (keyLen >= sizeof(keyBuf)) {
897 /* no known valid LDML key exceeding 21 */
898 *status = U_ILLEGAL_ARGUMENT_ERROR;
899 return 0;
900 }
901 uprv_memcpy(keyBuf, key, keyLen);
902 keyBuf[keyLen] = 0;
903
904 /* to lower case */
905 for (i = 0; i < keyLen; i++) {
906 keyBuf[i] = uprv_tolower(keyBuf[i]);
907 }
908
909
910 if (bcpTypeLen < 0) {
911 bcpTypeLen = (int32_t)uprv_strlen(bcpType);
912 }
913
914 typeSize = 0;
915 for (i = 0; i < bcpTypeLen; i++) {
916 if (bcpType[i] == SEP) {
917 if (typeSize >= MAX_BCP47_SUBTAG_LEN) {
918 *status = U_ILLEGAL_ARGUMENT_ERROR;
919 return 0;
920 }
921 typeSize = 0;
922 } else {
923 typeSize++;
924 }
925 }
926
927 uprv_memcpy(bcpTypeBuf, bcpType, bcpTypeLen);
928 bcpTypeBuf[bcpTypeLen] = 0;
929
930 /* to lower case */
931 for (i = 0; i < bcpTypeLen; i++) {
932 bcpTypeBuf[i] = uprv_tolower(bcpTypeBuf[i]);
933 }
934
935 rb = ures_openDirect(NULL, KEYTYPEDATA, status);
936 ures_getByKey(rb, TYPEMAP, rb, status);
937 if (U_FAILURE(*status)) {
938 ures_close(rb);
939 return 0;
940 }
941
942 ures_getByKey(rb, keyBuf, rb, &tmpStatus);
943 mapData = ures_getNextResource(rb, NULL, &tmpStatus);
944 while (U_SUCCESS(tmpStatus)) {
945 const UChar *uBcpType;
946 char tmpBcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
947 int32_t tmpBcpTypeLen;
948
949 uBcpType = ures_getString(mapData, &tmpBcpTypeLen, &tmpStatus);
950 if (U_FAILURE(tmpStatus)) {
951 break;
952 }
953 u_UCharsToChars(uBcpType, tmpBcpTypeBuf, tmpBcpTypeLen);
954 tmpBcpTypeBuf[tmpBcpTypeLen] = 0;
955 if (uprv_compareInvCharsAsAscii(bcpTypeBuf, tmpBcpTypeBuf) == 0) {
956 /* found a matching BCP47 type */
957 resType = ures_getKey(mapData);
958 resultLen = (int32_t)uprv_strlen(resType);
959 break;
960 }
961 if (!ures_hasNext(rb)) {
962 break;
963 }
964 ures_getNextResource(rb, mapData, &tmpStatus);
965 }
966 ures_close(mapData);
967 ures_close(rb);
968
969 if (U_FAILURE(tmpStatus) && tmpStatus != U_MISSING_RESOURCE_ERROR) {
970 *status = tmpStatus;
971 return 0;
972 }
973
974 if (resType == NULL) {
975 resType = bcpTypeBuf;
976 resultLen = bcpTypeLen;
977 }
978
979 copyLen = uprv_min(resultLen, typeCapacity);
980 uprv_memcpy(type, resType, copyLen);
981
982 if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
983 for (i = 0; i < copyLen; i++) {
984 if (*(type + i) == ':') {
985 *(type + i) = '/';
986 }
987 }
988 }
989
990 return u_terminateChars(type, typeCapacity, resultLen, status);
991 }
992
993 static int32_t
994 _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
995 char buf[ULOC_LANG_CAPACITY];
996 UErrorCode tmpStatus = U_ZERO_ERROR;
997 int32_t len, i;
998 int32_t reslen = 0;
999
1000 if (U_FAILURE(*status)) {
1001 return 0;
1002 }
1003
1004 len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
1005 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1006 if (strict) {
1007 *status = U_ILLEGAL_ARGUMENT_ERROR;
1008 return 0;
1009 }
1010 len = 0;
1011 }
1012
1013 /* Note: returned language code is in lower case letters */
1014
1015 if (len == 0) {
1016 if (reslen < capacity) {
1017 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
1018 }
1019 reslen += LANG_UND_LEN;
1020 } else if (!_isLanguageSubtag(buf, len)) {
1021 /* invalid language code */
1022 if (strict) {
1023 *status = U_ILLEGAL_ARGUMENT_ERROR;
1024 return 0;
1025 }
1026 if (reslen < capacity) {
1027 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
1028 }
1029 reslen += LANG_UND_LEN;
1030 } else {
1031 /* resolve deprecated */
1032 for (i = 0; i < LENGTHOF(DEPRECATEDLANGS); i += 2) {
1033 if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
1034 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
1035 len = (int32_t)uprv_strlen(buf);
1036 break;
1037 }
1038 }
1039 if (reslen < capacity) {
1040 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
1041 }
1042 reslen += len;
1043 }
1044 u_terminateChars(appendAt, capacity, reslen, status);
1045 return reslen;
1046 }
1047
1048 static int32_t
1049 _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
1050 char buf[ULOC_SCRIPT_CAPACITY];
1051 UErrorCode tmpStatus = U_ZERO_ERROR;
1052 int32_t len;
1053 int32_t reslen = 0;
1054
1055 if (U_FAILURE(*status)) {
1056 return 0;
1057 }
1058
1059 len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
1060 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1061 if (strict) {
1062 *status = U_ILLEGAL_ARGUMENT_ERROR;
1063 }
1064 return 0;
1065 }
1066
1067 if (len > 0) {
1068 if (!_isScriptSubtag(buf, len)) {
1069 /* invalid script code */
1070 if (strict) {
1071 *status = U_ILLEGAL_ARGUMENT_ERROR;
1072 }
1073 return 0;
1074 } else {
1075 if (reslen < capacity) {
1076 *(appendAt + reslen) = SEP;
1077 }
1078 reslen++;
1079
1080 if (reslen < capacity) {
1081 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
1082 }
1083 reslen += len;
1084 }
1085 }
1086 u_terminateChars(appendAt, capacity, reslen, status);
1087 return reslen;
1088 }
1089
1090 static int32_t
1091 _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
1092 char buf[ULOC_COUNTRY_CAPACITY];
1093 UErrorCode tmpStatus = U_ZERO_ERROR;
1094 int32_t len;
1095 int32_t reslen = 0;
1096
1097 if (U_FAILURE(*status)) {
1098 return 0;
1099 }
1100
1101 len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
1102 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1103 if (strict) {
1104 *status = U_ILLEGAL_ARGUMENT_ERROR;
1105 }
1106 return 0;
1107 }
1108
1109 if (len > 0) {
1110 if (!_isRegionSubtag(buf, len)) {
1111 /* invalid region code */
1112 if (strict) {
1113 *status = U_ILLEGAL_ARGUMENT_ERROR;
1114 }
1115 return 0;
1116 } else {
1117 if (reslen < capacity) {
1118 *(appendAt + reslen) = SEP;
1119 }
1120 reslen++;
1121
1122 if (reslen < capacity) {
1123 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
1124 }
1125 reslen += len;
1126 }
1127 }
1128 u_terminateChars(appendAt, capacity, reslen, status);
1129 return reslen;
1130 }
1131
1132 static int32_t
1133 _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) {
1134 char buf[ULOC_FULLNAME_CAPACITY];
1135 UErrorCode tmpStatus = U_ZERO_ERROR;
1136 int32_t len, i;
1137 int32_t reslen = 0;
1138
1139 if (U_FAILURE(*status)) {
1140 return 0;
1141 }
1142
1143 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
1144 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1145 if (strict) {
1146 *status = U_ILLEGAL_ARGUMENT_ERROR;
1147 }
1148 return 0;
1149 }
1150
1151 if (len > 0) {
1152 char *p, *pVar;
1153 UBool bNext = TRUE;
1154 VariantListEntry *var;
1155 VariantListEntry *varFirst = NULL;
1156
1157 pVar = NULL;
1158 p = buf;
1159 while (bNext) {
1160 if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
1161 if (*p == 0) {
1162 bNext = FALSE;
1163 } else {
1164 *p = 0; /* terminate */
1165 }
1166 if (pVar == NULL) {
1167 if (strict) {
1168 *status = U_ILLEGAL_ARGUMENT_ERROR;
1169 break;
1170 }
1171 /* ignore empty variant */
1172 } else {
1173 /* ICU uses upper case letters for variants, but
1174 the canonical format is lowercase in BCP47 */
1175 for (i = 0; *(pVar + i) != 0; i++) {
1176 *(pVar + i) = uprv_tolower(*(pVar + i));
1177 }
1178
1179 /* validate */
1180 if (_isVariantSubtag(pVar, -1)) {
1181 if (uprv_strcmp(pVar,POSIX_VALUE) || len != uprv_strlen(POSIX_VALUE)) {
1182 /* emit the variant to the list */
1183 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
1184 if (var == NULL) {
1185 *status = U_MEMORY_ALLOCATION_ERROR;
1186 break;
1187 }
1188 var->variant = pVar;
1189 if (!_addVariantToList(&varFirst, var)) {
1190 /* duplicated variant */
1191 uprv_free(var);
1192 if (strict) {
1193 *status = U_ILLEGAL_ARGUMENT_ERROR;
1194 break;
1195 }
1196 }
1197 } else {
1198 /* Special handling for POSIX variant, need to remember that we had it and then */
1199 /* treat it like an extension later. */
1200 *hadPosix = TRUE;
1201 }
1202 } else if (strict) {
1203 *status = U_ILLEGAL_ARGUMENT_ERROR;
1204 break;
1205 } else if (_isPrivateuseValueSubtag(pVar, -1)) {
1206 /* Handle private use subtags separately */
1207 break;
1208 }
1209 }
1210 /* reset variant starting position */
1211 pVar = NULL;
1212 } else if (pVar == NULL) {
1213 pVar = p;
1214 }
1215 p++;
1216 }
1217
1218 if (U_SUCCESS(*status)) {
1219 if (varFirst != NULL) {
1220 int32_t varLen;
1221
1222 /* write out validated/normalized variants to the target */
1223 var = varFirst;
1224 while (var != NULL) {
1225 if (reslen < capacity) {
1226 *(appendAt + reslen) = SEP;
1227 }
1228 reslen++;
1229 varLen = (int32_t)uprv_strlen(var->variant);
1230 if (reslen < capacity) {
1231 uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen));
1232 }
1233 reslen += varLen;
1234 var = var->next;
1235 }
1236 }
1237 }
1238
1239 /* clean up */
1240 var = varFirst;
1241 while (var != NULL) {
1242 VariantListEntry *tmpVar = var->next;
1243 uprv_free(var);
1244 var = tmpVar;
1245 }
1246
1247 if (U_FAILURE(*status)) {
1248 return 0;
1249 }
1250 }
1251
1252 u_terminateChars(appendAt, capacity, reslen, status);
1253 return reslen;
1254 }
1255
1256 static int32_t
1257 _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
1258 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1259 char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
1260 int32_t attrBufLength = 0;
1261 UBool isAttribute = FALSE;
1262 UEnumeration *keywordEnum = NULL;
1263 int32_t reslen = 0;
1264
1265 keywordEnum = uloc_openKeywords(localeID, status);
1266 if (U_FAILURE(*status) && !hadPosix) {
1267 uenum_close(keywordEnum);
1268 return 0;
1269 }
1270 if (keywordEnum != NULL || hadPosix) {
1271 /* reorder extensions */
1272 int32_t len;
1273 const char *key;
1274 ExtensionListEntry *firstExt = NULL;
1275 ExtensionListEntry *ext;
1276 AttributeListEntry *firstAttr = NULL;
1277 AttributeListEntry *attr;
1278 char *attrValue;
1279 char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1280 char *pExtBuf = extBuf;
1281 int32_t extBufCapacity = sizeof(extBuf);
1282 const char *bcpKey, *bcpValue;
1283 UErrorCode tmpStatus = U_ZERO_ERROR;
1284 int32_t keylen;
1285 UBool isLDMLKeyword;
1286
1287 while (TRUE) {
1288 isAttribute = FALSE;
1289 key = uenum_next(keywordEnum, NULL, status);
1290 if (key == NULL) {
1291 break;
1292 }
1293 len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
1294 if (U_FAILURE(tmpStatus)) {
1295 if (strict) {
1296 *status = U_ILLEGAL_ARGUMENT_ERROR;
1297 break;
1298 }
1299 /* ignore this keyword */
1300 tmpStatus = U_ZERO_ERROR;
1301 continue;
1302 }
1303
1304 keylen = (int32_t)uprv_strlen(key);
1305 isLDMLKeyword = (keylen > 1);
1306
1307 /* special keyword used for representing Unicode locale attributes */
1308 if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
1309 isAttribute = TRUE;
1310 if (len > 0) {
1311 int32_t i = 0;
1312 while (TRUE) {
1313 attrBufLength = 0;
1314 for (; i < len; i++) {
1315 if (buf[i] != '-') {
1316 attrBuf[attrBufLength++] = buf[i];
1317 } else {
1318 i++;
1319 break;
1320 }
1321 }
1322 if (attrBufLength > 0) {
1323 attrBuf[attrBufLength] = 0;
1324
1325 } else if (i >= len){
1326 break;
1327 }
1328
1329 /* create AttributeListEntry */
1330 attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
1331 if (attr == NULL) {
1332 *status = U_MEMORY_ALLOCATION_ERROR;
1333 break;
1334 }
1335 attrValue = (char*)uprv_malloc(attrBufLength + 1);
1336 if (attrValue == NULL) {
1337 *status = U_MEMORY_ALLOCATION_ERROR;
1338 break;
1339 }
1340 uprv_strcpy(attrValue, attrBuf);
1341 attr->attribute = attrValue;
1342
1343 if (!_addAttributeToList(&firstAttr, attr)) {
1344 uprv_free(attr);
1345 uprv_free(attrValue);
1346 if (strict) {
1347 *status = U_ILLEGAL_ARGUMENT_ERROR;
1348 break;
1349 }
1350 }
1351 }
1352 }
1353 } else if (isLDMLKeyword) {
1354 int32_t modKeyLen;
1355
1356 /* transform key and value to bcp47 style */
1357 modKeyLen = _ldmlKeyToBCP47(key, keylen, pExtBuf, extBufCapacity, &tmpStatus);
1358 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1359 if (strict) {
1360 *status = U_ILLEGAL_ARGUMENT_ERROR;
1361 break;
1362 }
1363 tmpStatus = U_ZERO_ERROR;
1364 continue;
1365 }
1366
1367 bcpKey = pExtBuf;
1368 pExtBuf += (modKeyLen + 1);
1369 extBufCapacity -= (modKeyLen + 1);
1370
1371 len = _ldmlTypeToBCP47(key, keylen, buf, len, pExtBuf, extBufCapacity, &tmpStatus);
1372 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1373 if (strict) {
1374 *status = U_ILLEGAL_ARGUMENT_ERROR;
1375 break;
1376 }
1377 tmpStatus = U_ZERO_ERROR;
1378 continue;
1379 }
1380 bcpValue = pExtBuf;
1381 pExtBuf += (len + 1);
1382 extBufCapacity -= (len + 1);
1383 } else {
1384 if (*key == PRIVATEUSE) {
1385 if (!_isPrivateuseValueSubtags(buf, len)) {
1386 if (strict) {
1387 *status = U_ILLEGAL_ARGUMENT_ERROR;
1388 break;
1389 }
1390 continue;
1391 }
1392 } else {
1393 if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
1394 if (strict) {
1395 *status = U_ILLEGAL_ARGUMENT_ERROR;
1396 break;
1397 }
1398 continue;
1399 }
1400 }
1401 bcpKey = key;
1402 if ((len + 1) < extBufCapacity) {
1403 uprv_memcpy(pExtBuf, buf, len);
1404 bcpValue = pExtBuf;
1405
1406 pExtBuf += len;
1407
1408 *pExtBuf = 0;
1409 pExtBuf++;
1410
1411 extBufCapacity -= (len + 1);
1412 } else {
1413 *status = U_ILLEGAL_ARGUMENT_ERROR;
1414 break;
1415 }
1416 }
1417
1418 if (!isAttribute) {
1419 /* create ExtensionListEntry */
1420 ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1421 if (ext == NULL) {
1422 *status = U_MEMORY_ALLOCATION_ERROR;
1423 break;
1424 }
1425 ext->key = bcpKey;
1426 ext->value = bcpValue;
1427
1428 if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1429 uprv_free(ext);
1430 if (strict) {
1431 *status = U_ILLEGAL_ARGUMENT_ERROR;
1432 break;
1433 }
1434 }
1435 }
1436 }
1437
1438 /* Special handling for POSIX variant - add the keywords for POSIX */
1439 if (hadPosix) {
1440 /* create ExtensionListEntry for POSIX */
1441 ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1442 if (ext == NULL) {
1443 *status = U_MEMORY_ALLOCATION_ERROR;
1444 goto cleanup;
1445 }
1446 ext->key = POSIX_KEY;
1447 ext->value = POSIX_VALUE;
1448
1449 if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1450 uprv_free(ext);
1451 }
1452 }
1453
1454 if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) {
1455 UBool startLDMLExtension = FALSE;
1456
1457 attr = firstAttr;
1458 ext = firstExt;
1459 do {
1460 if (!startLDMLExtension && (ext && uprv_strlen(ext->key) > 1)) {
1461 /* write LDML singleton extension */
1462 if (reslen < capacity) {
1463 *(appendAt + reslen) = SEP;
1464 }
1465 reslen++;
1466 if (reslen < capacity) {
1467 *(appendAt + reslen) = LDMLEXT;
1468 }
1469 reslen++;
1470
1471 startLDMLExtension = TRUE;
1472 }
1473
1474 /* write out the sorted BCP47 attributes, extensions and private use */
1475 if (ext && (uprv_strlen(ext->key) == 1 || attr == NULL)) {
1476 if (reslen < capacity) {
1477 *(appendAt + reslen) = SEP;
1478 }
1479 reslen++;
1480 len = (int32_t)uprv_strlen(ext->key);
1481 if (reslen < capacity) {
1482 uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen));
1483 }
1484 reslen += len;
1485 if (reslen < capacity) {
1486 *(appendAt + reslen) = SEP;
1487 }
1488 reslen++;
1489 len = (int32_t)uprv_strlen(ext->value);
1490 if (reslen < capacity) {
1491 uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen));
1492 }
1493 reslen += len;
1494
1495 ext = ext->next;
1496 } else if (attr) {
1497 /* write the value for the attributes */
1498 if (reslen < capacity) {
1499 *(appendAt + reslen) = SEP;
1500 }
1501 reslen++;
1502 len = (int32_t)uprv_strlen(attr->attribute);
1503 if (reslen < capacity) {
1504 uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen));
1505 }
1506 reslen += len;
1507
1508 attr = attr->next;
1509 }
1510 } while (attr != NULL || ext != NULL);
1511 }
1512 cleanup:
1513 /* clean up */
1514 ext = firstExt;
1515 while (ext != NULL) {
1516 ExtensionListEntry *tmpExt = ext->next;
1517 uprv_free(ext);
1518 ext = tmpExt;
1519 }
1520
1521 attr = firstAttr;
1522 while (attr != NULL) {
1523 AttributeListEntry *tmpAttr = attr->next;
1524 char *pValue = (char *)attr->attribute;
1525 uprv_free(pValue);
1526 uprv_free(attr);
1527 attr = tmpAttr;
1528 }
1529
1530 uenum_close(keywordEnum);
1531
1532 if (U_FAILURE(*status)) {
1533 return 0;
1534 }
1535 }
1536
1537 return u_terminateChars(appendAt, capacity, reslen, status);
1538 }
1539
1540 /**
1541 * Append keywords parsed from LDML extension value
1542 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
1543 * Note: char* buf is used for storing keywords
1544 */
1545 static void
1546 _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) {
1547 const char *pTag; /* beginning of current subtag */
1548 const char *pKwds; /* beginning of key-type pairs */
1549 UBool variantExists = *posixVariant;
1550
1551 ExtensionListEntry *kwdFirst = NULL; /* first LDML keyword */
1552 ExtensionListEntry *kwd, *nextKwd;
1553
1554 AttributeListEntry *attrFirst = NULL; /* first attribute */
1555 AttributeListEntry *attr, *nextAttr;
1556
1557 int32_t len;
1558 int32_t bufIdx = 0;
1559
1560 char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1561 int32_t attrBufIdx = 0;
1562
1563 /* Reset the posixVariant value */
1564 *posixVariant = FALSE;
1565
1566 pTag = ldmlext;
1567 pKwds = NULL;
1568
1569 /* Iterate through u extension attributes */
1570 while (*pTag) {
1571 /* locate next separator char */
1572 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1573
1574 if (_isLDMLKey(pTag, len)) {
1575 pKwds = pTag;
1576 break;
1577 }
1578
1579 /* add this attribute to the list */
1580 attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
1581 if (attr == NULL) {
1582 *status = U_MEMORY_ALLOCATION_ERROR;
1583 goto cleanup;
1584 }
1585
1586 if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) {
1587 uprv_memcpy(&attrBuf[attrBufIdx], pTag, len);
1588 attrBuf[attrBufIdx + len] = 0;
1589 attr->attribute = &attrBuf[attrBufIdx];
1590 attrBufIdx += (len + 1);
1591 } else {
1592 *status = U_ILLEGAL_ARGUMENT_ERROR;
1593 goto cleanup;
1594 }
1595
1596 if (!_addAttributeToList(&attrFirst, attr)) {
1597 *status = U_ILLEGAL_ARGUMENT_ERROR;
1598 uprv_free(attr);
1599 goto cleanup;
1600 }
1601
1602 /* next tag */
1603 pTag += len;
1604 if (*pTag) {
1605 /* next to the separator */
1606 pTag++;
1607 }
1608 }
1609
1610 if (attrFirst) {
1611 /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */
1612
1613 if (attrBufIdx > bufSize) {
1614 /* attrBufIdx == <total length of attribute subtag> + 1 */
1615 *status = U_ILLEGAL_ARGUMENT_ERROR;
1616 goto cleanup;
1617 }
1618
1619 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1620 if (kwd == NULL) {
1621 *status = U_MEMORY_ALLOCATION_ERROR;
1622 goto cleanup;
1623 }
1624
1625 kwd->key = LOCALE_ATTRIBUTE_KEY;
1626 kwd->value = buf;
1627
1628 /* attribute subtags sorted in alphabetical order as type */
1629 attr = attrFirst;
1630 while (attr != NULL) {
1631 nextAttr = attr->next;
1632
1633 /* buffer size check is done above */
1634 if (attr != attrFirst) {
1635 *(buf + bufIdx) = SEP;
1636 bufIdx++;
1637 }
1638
1639 len = uprv_strlen(attr->attribute);
1640 uprv_memcpy(buf + bufIdx, attr->attribute, len);
1641 bufIdx += len;
1642
1643 attr = nextAttr;
1644 }
1645 *(buf + bufIdx) = 0;
1646 bufIdx++;
1647
1648 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1649 *status = U_ILLEGAL_ARGUMENT_ERROR;
1650 uprv_free(kwd);
1651 goto cleanup;
1652 }
1653
1654 /* once keyword entry is created, delete the attribute list */
1655 attr = attrFirst;
1656 while (attr != NULL) {
1657 nextAttr = attr->next;
1658 uprv_free(attr);
1659 attr = nextAttr;
1660 }
1661 attrFirst = NULL;
1662 }
1663
1664 if (pKwds) {
1665 const char *pBcpKey = NULL; /* u extenstion key subtag */
1666 const char *pBcpType = NULL; /* beginning of u extension type subtag(s) */
1667 int32_t bcpKeyLen = 0;
1668 int32_t bcpTypeLen = 0;
1669 UBool isDone = FALSE;
1670
1671 pTag = pKwds;
1672 /* BCP47 representation of LDML key/type pairs */
1673 while (!isDone) {
1674 const char *pNextBcpKey = NULL;
1675 int32_t nextBcpKeyLen;
1676 UBool emitKeyword = FALSE;
1677
1678 if (*pTag) {
1679 /* locate next separator char */
1680 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1681
1682 if (_isLDMLKey(pTag, len)) {
1683 if (pBcpKey) {
1684 emitKeyword = TRUE;
1685 pNextBcpKey = pTag;
1686 nextBcpKeyLen = len;
1687 } else {
1688 pBcpKey = pTag;
1689 bcpKeyLen = len;
1690 }
1691 } else {
1692 U_ASSERT(pBcpKey != NULL);
1693 /* within LDML type subtags */
1694 if (pBcpType) {
1695 bcpTypeLen += (len + 1);
1696 } else {
1697 pBcpType = pTag;
1698 bcpTypeLen = len;
1699 }
1700 }
1701
1702 /* next tag */
1703 pTag += len;
1704 if (*pTag) {
1705 /* next to the separator */
1706 pTag++;
1707 }
1708 } else {
1709 /* processing last one */
1710 emitKeyword = TRUE;
1711 isDone = TRUE;
1712 }
1713
1714 if (emitKeyword) {
1715 const char *pKey = NULL; /* LDML key */
1716 const char *pType = NULL; /* LDML type */
1717
1718 U_ASSERT(pBcpKey != NULL);
1719
1720 /* u extension key to LDML key */
1721 len = _bcp47ToLDMLKey(pBcpKey, bcpKeyLen, buf + bufIdx, bufSize - bufIdx - 1, status);
1722 if (U_FAILURE(*status)) {
1723 goto cleanup;
1724 }
1725 pKey = buf + bufIdx;
1726 bufIdx += len;
1727 *(buf + bufIdx) = 0;
1728 bufIdx++;
1729
1730 if (pBcpType) {
1731 /* BCP type to locale type */
1732 len = _bcp47ToLDMLType(pKey, -1, pBcpType, bcpTypeLen, buf + bufIdx, bufSize - bufIdx - 1, status);
1733 if (U_FAILURE(*status)) {
1734 goto cleanup;
1735 }
1736 pType = buf + bufIdx;
1737 bufIdx += len;
1738 *(buf + bufIdx) = 0;
1739 bufIdx++;
1740 } else {
1741 /* typeless - default type value is "yes" */
1742 pType = LOCALE_TYPE_YES;
1743 }
1744
1745 /* Special handling for u-va-posix, since we want to treat this as a variant,
1746 not as a keyword */
1747 if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) {
1748 *posixVariant = TRUE;
1749 } else {
1750 /* create an ExtensionListEntry for this keyword */
1751 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1752 if (kwd == NULL) {
1753 *status = U_MEMORY_ALLOCATION_ERROR;
1754 goto cleanup;
1755 }
1756
1757 kwd->key = pKey;
1758 kwd->value = pType;
1759
1760 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1761 *status = U_ILLEGAL_ARGUMENT_ERROR;
1762 uprv_free(kwd);
1763 goto cleanup;
1764 }
1765 }
1766
1767 pBcpKey = pNextBcpKey;
1768 bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0;
1769 pBcpType = NULL;
1770 bcpTypeLen = 0;
1771 }
1772 }
1773 }
1774
1775 kwd = kwdFirst;
1776 while (kwd != NULL) {
1777 nextKwd = kwd->next;
1778 _addExtensionToList(appendTo, kwd, FALSE);
1779 kwd = nextKwd;
1780 }
1781
1782 return;
1783
1784 cleanup:
1785 attr = attrFirst;
1786 while (attr != NULL) {
1787 nextAttr = attr->next;
1788 uprv_free(attr);
1789 attr = nextAttr;
1790 }
1791
1792 kwd = kwdFirst;
1793 while (kwd != NULL) {
1794 nextKwd = kwd->next;
1795 uprv_free(kwd);
1796 kwd = nextKwd;
1797 }
1798 }
1799
1800
1801 static int32_t
1802 _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) {
1803 int32_t reslen = 0;
1804 int32_t i, n;
1805 int32_t len;
1806 ExtensionListEntry *kwdFirst = NULL;
1807 ExtensionListEntry *kwd;
1808 const char *key, *type;
1809 char *kwdBuf = NULL;
1810 int32_t kwdBufLength = capacity;
1811 UBool posixVariant = FALSE;
1812
1813 if (U_FAILURE(*status)) {
1814 return 0;
1815 }
1816
1817 kwdBuf = (char*)uprv_malloc(kwdBufLength);
1818 if (kwdBuf == NULL) {
1819 *status = U_MEMORY_ALLOCATION_ERROR;
1820 return 0;
1821 }
1822
1823 /* Determine if variants already exists */
1824 if (ultag_getVariantsSize(langtag)) {
1825 posixVariant = TRUE;
1826 }
1827
1828 n = ultag_getExtensionsSize(langtag);
1829
1830 /* resolve locale keywords and reordering keys */
1831 for (i = 0; i < n; i++) {
1832 key = ultag_getExtensionKey(langtag, i);
1833 type = ultag_getExtensionValue(langtag, i);
1834 if (*key == LDMLEXT) {
1835 _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status);
1836 if (U_FAILURE(*status)) {
1837 break;
1838 }
1839 } else {
1840 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1841 if (kwd == NULL) {
1842 *status = U_MEMORY_ALLOCATION_ERROR;
1843 break;
1844 }
1845 kwd->key = key;
1846 kwd->value = type;
1847 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1848 uprv_free(kwd);
1849 *status = U_ILLEGAL_ARGUMENT_ERROR;
1850 break;
1851 }
1852 }
1853 }
1854
1855 if (U_SUCCESS(*status)) {
1856 type = ultag_getPrivateUse(langtag);
1857 if ((int32_t)uprv_strlen(type) > 0) {
1858 /* add private use as a keyword */
1859 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1860 if (kwd == NULL) {
1861 *status = U_MEMORY_ALLOCATION_ERROR;
1862 } else {
1863 kwd->key = PRIVATEUSE_KEY;
1864 kwd->value = type;
1865 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1866 uprv_free(kwd);
1867 *status = U_ILLEGAL_ARGUMENT_ERROR;
1868 }
1869 }
1870 }
1871 }
1872
1873 /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
1874
1875 if (U_SUCCESS(*status) && posixVariant) {
1876 len = (int32_t) uprv_strlen(_POSIX);
1877 if (reslen < capacity) {
1878 uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen));
1879 }
1880 reslen += len;
1881 }
1882
1883 if (U_SUCCESS(*status) && kwdFirst != NULL) {
1884 /* write out the sorted keywords */
1885 UBool firstValue = TRUE;
1886 kwd = kwdFirst;
1887 do {
1888 if (reslen < capacity) {
1889 if (firstValue) {
1890 /* '@' */
1891 *(appendAt + reslen) = LOCALE_EXT_SEP;
1892 firstValue = FALSE;
1893 } else {
1894 /* ';' */
1895 *(appendAt + reslen) = LOCALE_KEYWORD_SEP;
1896 }
1897 }
1898 reslen++;
1899
1900 /* key */
1901 len = (int32_t)uprv_strlen(kwd->key);
1902 if (reslen < capacity) {
1903 uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
1904 }
1905 reslen += len;
1906
1907 /* '=' */
1908 if (reslen < capacity) {
1909 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
1910 }
1911 reslen++;
1912
1913 /* type */
1914 len = (int32_t)uprv_strlen(kwd->value);
1915 if (reslen < capacity) {
1916 uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
1917 }
1918 reslen += len;
1919
1920 kwd = kwd->next;
1921 } while (kwd);
1922 }
1923
1924 /* clean up */
1925 kwd = kwdFirst;
1926 while (kwd != NULL) {
1927 ExtensionListEntry *tmpKwd = kwd->next;
1928 uprv_free(kwd);
1929 kwd = tmpKwd;
1930 }
1931
1932 uprv_free(kwdBuf);
1933
1934 if (U_FAILURE(*status)) {
1935 return 0;
1936 }
1937
1938 return u_terminateChars(appendAt, capacity, reslen, status);
1939 }
1940
1941 static int32_t
1942 _appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
1943 char buf[ULOC_FULLNAME_CAPACITY];
1944 char tmpAppend[ULOC_FULLNAME_CAPACITY];
1945 UErrorCode tmpStatus = U_ZERO_ERROR;
1946 int32_t len, i;
1947 int32_t reslen = 0;
1948
1949 if (U_FAILURE(*status)) {
1950 return 0;
1951 }
1952
1953 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
1954 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1955 if (strict) {
1956 *status = U_ILLEGAL_ARGUMENT_ERROR;
1957 }
1958 return 0;
1959 }
1960
1961 if (len > 0) {
1962 char *p, *pPriv;
1963 UBool bNext = TRUE;
1964 UBool firstValue = TRUE;
1965 UBool writeValue;
1966
1967 pPriv = NULL;
1968 p = buf;
1969 while (bNext) {
1970 writeValue = FALSE;
1971 if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
1972 if (*p == 0) {
1973 bNext = FALSE;
1974 } else {
1975 *p = 0; /* terminate */
1976 }
1977 if (pPriv != NULL) {
1978 /* Private use in the canonical format is lowercase in BCP47 */
1979 for (i = 0; *(pPriv + i) != 0; i++) {
1980 *(pPriv + i) = uprv_tolower(*(pPriv + i));
1981 }
1982
1983 /* validate */
1984 if (_isPrivateuseValueSubtag(pPriv, -1)) {
1985 if (firstValue) {
1986 if (!_isVariantSubtag(pPriv, -1)) {
1987 writeValue = TRUE;
1988 }
1989 } else {
1990 writeValue = TRUE;
1991 }
1992 } else if (strict) {
1993 *status = U_ILLEGAL_ARGUMENT_ERROR;
1994 break;
1995 } else {
1996 break;
1997 }
1998
1999 if (writeValue) {
2000 if (reslen < capacity) {
2001 tmpAppend[reslen++] = SEP;
2002 }
2003
2004 if (firstValue) {
2005 if (reslen < capacity) {
2006 tmpAppend[reslen++] = *PRIVATEUSE_KEY;
2007 }
2008
2009 if (reslen < capacity) {
2010 tmpAppend[reslen++] = SEP;
2011 }
2012
2013 len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX);
2014 if (reslen < capacity) {
2015 uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen));
2016 }
2017 reslen += len;
2018
2019 if (reslen < capacity) {
2020 tmpAppend[reslen++] = SEP;
2021 }
2022
2023 firstValue = FALSE;
2024 }
2025
2026 len = (int32_t)uprv_strlen(pPriv);
2027 if (reslen < capacity) {
2028 uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen));
2029 }
2030 reslen += len;
2031 }
2032 }
2033 /* reset private use starting position */
2034 pPriv = NULL;
2035 } else if (pPriv == NULL) {
2036 pPriv = p;
2037 }
2038 p++;
2039 }
2040
2041 if (U_FAILURE(*status)) {
2042 return 0;
2043 }
2044 }
2045
2046 if (U_SUCCESS(*status)) {
2047 len = reslen;
2048 if (reslen < capacity) {
2049 uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen));
2050 }
2051 }
2052
2053 u_terminateChars(appendAt, capacity, reslen, status);
2054
2055 return reslen;
2056 }
2057
2058 /*
2059 * -------------------------------------------------
2060 *
2061 * ultag_ functions
2062 *
2063 * -------------------------------------------------
2064 */
2065
2066 /* Bit flags used by the parser */
2067 #define LANG 0x0001
2068 #define EXTL 0x0002
2069 #define SCRT 0x0004
2070 #define REGN 0x0008
2071 #define VART 0x0010
2072 #define EXTS 0x0020
2073 #define EXTV 0x0040
2074 #define PRIV 0x0080
2075
2076 static ULanguageTag*
2077 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
2078 ULanguageTag *t;
2079 char *tagBuf;
2080 int16_t next;
2081 char *pSubtag, *pNext, *pLastGoodPosition;
2082 int32_t subtagLen;
2083 int32_t extlangIdx;
2084 ExtensionListEntry *pExtension;
2085 char *pExtValueSubtag, *pExtValueSubtagEnd;
2086 int32_t i;
2087 UBool privateuseVar = FALSE;
2088 int32_t grandfatheredLen = 0;
2089
2090 if (parsedLen != NULL) {
2091 *parsedLen = 0;
2092 }
2093
2094 if (U_FAILURE(*status)) {
2095 return NULL;
2096 }
2097
2098 if (tagLen < 0) {
2099 tagLen = (int32_t)uprv_strlen(tag);
2100 }
2101
2102 /* copy the entire string */
2103 tagBuf = (char*)uprv_malloc(tagLen + 1);
2104 if (tagBuf == NULL) {
2105 *status = U_MEMORY_ALLOCATION_ERROR;
2106 return NULL;
2107 }
2108 uprv_memcpy(tagBuf, tag, tagLen);
2109 *(tagBuf + tagLen) = 0;
2110
2111 /* create a ULanguageTag */
2112 t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag));
2113 if (t == NULL) {
2114 uprv_free(tagBuf);
2115 *status = U_MEMORY_ALLOCATION_ERROR;
2116 return NULL;
2117 }
2118 _initializeULanguageTag(t);
2119 t->buf = tagBuf;
2120
2121 if (tagLen < MINLEN) {
2122 /* the input tag is too short - return empty ULanguageTag */
2123 return t;
2124 }
2125
2126 /* check if the tag is grandfathered */
2127 for (i = 0; GRANDFATHERED[i] != NULL; i += 2) {
2128 if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
2129 int32_t newTagLength;
2130
2131 grandfatheredLen = tagLen; /* back up for output parsedLen */
2132 newTagLength = uprv_strlen(GRANDFATHERED[i+1]);
2133 if (tagLen < newTagLength) {
2134 uprv_free(tagBuf);
2135 tagBuf = (char*)uprv_malloc(newTagLength + 1);
2136 if (tagBuf == NULL) {
2137 *status = U_MEMORY_ALLOCATION_ERROR;
2138 return NULL;
2139 }
2140 t->buf = tagBuf;
2141 tagLen = newTagLength;
2142 }
2143 uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
2144 break;
2145 }
2146 }
2147
2148 /*
2149 * langtag = language
2150 * ["-" script]
2151 * ["-" region]
2152 * *("-" variant)
2153 * *("-" extension)
2154 * ["-" privateuse]
2155 */
2156
2157 next = LANG | PRIV;
2158 pNext = pLastGoodPosition = tagBuf;
2159 extlangIdx = 0;
2160 pExtension = NULL;
2161 pExtValueSubtag = NULL;
2162 pExtValueSubtagEnd = NULL;
2163
2164 while (pNext) {
2165 char *pSep;
2166
2167 pSubtag = pNext;
2168
2169 /* locate next separator char */
2170 pSep = pSubtag;
2171 while (*pSep) {
2172 if (*pSep == SEP) {
2173 break;
2174 }
2175 pSep++;
2176 }
2177 if (*pSep == 0) {
2178 /* last subtag */
2179 pNext = NULL;
2180 } else {
2181 pNext = pSep + 1;
2182 }
2183 subtagLen = (int32_t)(pSep - pSubtag);
2184
2185 if (next & LANG) {
2186 if (_isLanguageSubtag(pSubtag, subtagLen)) {
2187 *pSep = 0; /* terminate */
2188 t->language = T_CString_toLowerCase(pSubtag);
2189
2190 pLastGoodPosition = pSep;
2191 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
2192 continue;
2193 }
2194 }
2195 if (next & EXTL) {
2196 if (_isExtlangSubtag(pSubtag, subtagLen)) {
2197 *pSep = 0;
2198 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
2199
2200 pLastGoodPosition = pSep;
2201 if (extlangIdx < 3) {
2202 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
2203 } else {
2204 next = SCRT | REGN | VART | EXTS | PRIV;
2205 }
2206 continue;
2207 }
2208 }
2209 if (next & SCRT) {
2210 if (_isScriptSubtag(pSubtag, subtagLen)) {
2211 char *p = pSubtag;
2212
2213 *pSep = 0;
2214
2215 /* to title case */
2216 *p = uprv_toupper(*p);
2217 p++;
2218 for (; *p; p++) {
2219 *p = uprv_tolower(*p);
2220 }
2221
2222 t->script = pSubtag;
2223
2224 pLastGoodPosition = pSep;
2225 next = REGN | VART | EXTS | PRIV;
2226 continue;
2227 }
2228 }
2229 if (next & REGN) {
2230 if (_isRegionSubtag(pSubtag, subtagLen)) {
2231 *pSep = 0;
2232 t->region = T_CString_toUpperCase(pSubtag);
2233
2234 pLastGoodPosition = pSep;
2235 next = VART | EXTS | PRIV;
2236 continue;
2237 }
2238 }
2239 if (next & VART) {
2240 if (_isVariantSubtag(pSubtag, subtagLen) ||
2241 (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) {
2242 VariantListEntry *var;
2243 UBool isAdded;
2244
2245 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
2246 if (var == NULL) {
2247 *status = U_MEMORY_ALLOCATION_ERROR;
2248 goto error;
2249 }
2250 *pSep = 0;
2251 var->variant = T_CString_toUpperCase(pSubtag);
2252 isAdded = _addVariantToList(&(t->variants), var);
2253 if (!isAdded) {
2254 /* duplicated variant entry */
2255 uprv_free(var);
2256 break;
2257 }
2258 pLastGoodPosition = pSep;
2259 next = VART | EXTS | PRIV;
2260 continue;
2261 }
2262 }
2263 if (next & EXTS) {
2264 if (_isExtensionSingleton(pSubtag, subtagLen)) {
2265 if (pExtension != NULL) {
2266 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2267 /* the previous extension is incomplete */
2268 uprv_free(pExtension);
2269 pExtension = NULL;
2270 break;
2271 }
2272
2273 /* terminate the previous extension value */
2274 *pExtValueSubtagEnd = 0;
2275 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2276
2277 /* insert the extension to the list */
2278 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2279 pLastGoodPosition = pExtValueSubtagEnd;
2280 } else {
2281 /* stop parsing here */
2282 uprv_free(pExtension);
2283 pExtension = NULL;
2284 break;
2285 }
2286 }
2287
2288 /* create a new extension */
2289 pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
2290 if (pExtension == NULL) {
2291 *status = U_MEMORY_ALLOCATION_ERROR;
2292 goto error;
2293 }
2294 *pSep = 0;
2295 pExtension->key = T_CString_toLowerCase(pSubtag);
2296 pExtension->value = NULL; /* will be set later */
2297
2298 /*
2299 * reset the start and the end location of extension value
2300 * subtags for this extension
2301 */
2302 pExtValueSubtag = NULL;
2303 pExtValueSubtagEnd = NULL;
2304
2305 next = EXTV;
2306 continue;
2307 }
2308 }
2309 if (next & EXTV) {
2310 if (_isExtensionSubtag(pSubtag, subtagLen)) {
2311 if (pExtValueSubtag == NULL) {
2312 /* if the start postion of this extension's value is not yet,
2313 this one is the first value subtag */
2314 pExtValueSubtag = pSubtag;
2315 }
2316
2317 /* Mark the end of this subtag */
2318 pExtValueSubtagEnd = pSep;
2319 next = EXTS | EXTV | PRIV;
2320
2321 continue;
2322 }
2323 }
2324 if (next & PRIV) {
2325 if (uprv_tolower(*pSubtag) == PRIVATEUSE) {
2326 char *pPrivuseVal;
2327
2328 if (pExtension != NULL) {
2329 /* Process the last extension */
2330 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2331 /* the previous extension is incomplete */
2332 uprv_free(pExtension);
2333 pExtension = NULL;
2334 break;
2335 } else {
2336 /* terminate the previous extension value */
2337 *pExtValueSubtagEnd = 0;
2338 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2339
2340 /* insert the extension to the list */
2341 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2342 pLastGoodPosition = pExtValueSubtagEnd;
2343 pExtension = NULL;
2344 } else {
2345 /* stop parsing here */
2346 uprv_free(pExtension);
2347 pExtension = NULL;
2348 break;
2349 }
2350 }
2351 }
2352
2353 /* The rest of part will be private use value subtags */
2354 if (pNext == NULL) {
2355 /* empty private use subtag */
2356 break;
2357 }
2358 /* back up the private use value start position */
2359 pPrivuseVal = pNext;
2360
2361 /* validate private use value subtags */
2362 while (pNext) {
2363 pSubtag = pNext;
2364 pSep = pSubtag;
2365 while (*pSep) {
2366 if (*pSep == SEP) {
2367 break;
2368 }
2369 pSep++;
2370 }
2371 if (*pSep == 0) {
2372 /* last subtag */
2373 pNext = NULL;
2374 } else {
2375 pNext = pSep + 1;
2376 }
2377 subtagLen = (int32_t)(pSep - pSubtag);
2378
2379 if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) {
2380 *pSep = 0;
2381 next = VART;
2382 privateuseVar = TRUE;
2383 break;
2384 } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
2385 pLastGoodPosition = pSep;
2386 } else {
2387 break;
2388 }
2389 }
2390
2391 if (next == VART) {
2392 continue;
2393 }
2394
2395 if (pLastGoodPosition - pPrivuseVal > 0) {
2396 *pLastGoodPosition = 0;
2397 t->privateuse = T_CString_toLowerCase(pPrivuseVal);
2398 }
2399 /* No more subtags, exiting the parse loop */
2400 break;
2401 }
2402 break;
2403 }
2404
2405 /* If we fell through here, it means this subtag is illegal - quit parsing */
2406 break;
2407 }
2408
2409 if (pExtension != NULL) {
2410 /* Process the last extension */
2411 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2412 /* the previous extension is incomplete */
2413 uprv_free(pExtension);
2414 } else {
2415 /* terminate the previous extension value */
2416 *pExtValueSubtagEnd = 0;
2417 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2418 /* insert the extension to the list */
2419 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2420 pLastGoodPosition = pExtValueSubtagEnd;
2421 } else {
2422 uprv_free(pExtension);
2423 }
2424 }
2425 }
2426
2427 if (parsedLen != NULL) {
2428 *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf);
2429 }
2430
2431 return t;
2432
2433 error:
2434 uprv_free(t);
2435 return NULL;
2436 }
2437
2438 static void
2439 ultag_close(ULanguageTag* langtag) {
2440
2441 if (langtag == NULL) {
2442 return;
2443 }
2444
2445 uprv_free(langtag->buf);
2446
2447 if (langtag->variants) {
2448 VariantListEntry *curVar = langtag->variants;
2449 while (curVar) {
2450 VariantListEntry *nextVar = curVar->next;
2451 uprv_free(curVar);
2452 curVar = nextVar;
2453 }
2454 }
2455
2456 if (langtag->extensions) {
2457 ExtensionListEntry *curExt = langtag->extensions;
2458 while (curExt) {
2459 ExtensionListEntry *nextExt = curExt->next;
2460 uprv_free(curExt);
2461 curExt = nextExt;
2462 }
2463 }
2464
2465 uprv_free(langtag);
2466 }
2467
2468 static const char*
2469 ultag_getLanguage(const ULanguageTag* langtag) {
2470 return langtag->language;
2471 }
2472
2473 #if 0
2474 static const char*
2475 ultag_getJDKLanguage(const ULanguageTag* langtag) {
2476 int32_t i;
2477 for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
2478 if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
2479 return DEPRECATEDLANGS[i + 1];
2480 }
2481 }
2482 return langtag->language;
2483 }
2484 #endif
2485
2486 static const char*
2487 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
2488 if (idx >= 0 && idx < MAXEXTLANG) {
2489 return langtag->extlang[idx];
2490 }
2491 return NULL;
2492 }
2493
2494 static int32_t
2495 ultag_getExtlangSize(const ULanguageTag* langtag) {
2496 int32_t size = 0;
2497 int32_t i;
2498 for (i = 0; i < MAXEXTLANG; i++) {
2499 if (langtag->extlang[i]) {
2500 size++;
2501 }
2502 }
2503 return size;
2504 }
2505
2506 static const char*
2507 ultag_getScript(const ULanguageTag* langtag) {
2508 return langtag->script;
2509 }
2510
2511 static const char*
2512 ultag_getRegion(const ULanguageTag* langtag) {
2513 return langtag->region;
2514 }
2515
2516 static const char*
2517 ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
2518 const char *var = NULL;
2519 VariantListEntry *cur = langtag->variants;
2520 int32_t i = 0;
2521 while (cur) {
2522 if (i == idx) {
2523 var = cur->variant;
2524 break;
2525 }
2526 cur = cur->next;
2527 i++;
2528 }
2529 return var;
2530 }
2531
2532 static int32_t
2533 ultag_getVariantsSize(const ULanguageTag* langtag) {
2534 int32_t size = 0;
2535 VariantListEntry *cur = langtag->variants;
2536 while (TRUE) {
2537 if (cur == NULL) {
2538 break;
2539 }
2540 size++;
2541 cur = cur->next;
2542 }
2543 return size;
2544 }
2545
2546 static const char*
2547 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
2548 const char *key = NULL;
2549 ExtensionListEntry *cur = langtag->extensions;
2550 int32_t i = 0;
2551 while (cur) {
2552 if (i == idx) {
2553 key = cur->key;
2554 break;
2555 }
2556 cur = cur->next;
2557 i++;
2558 }
2559 return key;
2560 }
2561
2562 static const char*
2563 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
2564 const char *val = NULL;
2565 ExtensionListEntry *cur = langtag->extensions;
2566 int32_t i = 0;
2567 while (cur) {
2568 if (i == idx) {
2569 val = cur->value;
2570 break;
2571 }
2572 cur = cur->next;
2573 i++;
2574 }
2575 return val;
2576 }
2577
2578 static int32_t
2579 ultag_getExtensionsSize(const ULanguageTag* langtag) {
2580 int32_t size = 0;
2581 ExtensionListEntry *cur = langtag->extensions;
2582 while (TRUE) {
2583 if (cur == NULL) {
2584 break;
2585 }
2586 size++;
2587 cur = cur->next;
2588 }
2589 return size;
2590 }
2591
2592 static const char*
2593 ultag_getPrivateUse(const ULanguageTag* langtag) {
2594 return langtag->privateuse;
2595 }
2596
2597 #if 0
2598 static const char*
2599 ultag_getGrandfathered(const ULanguageTag* langtag) {
2600 return langtag->grandfathered;
2601 }
2602 #endif
2603
2604
2605 /*
2606 * -------------------------------------------------
2607 *
2608 * Locale/BCP47 conversion APIs, exposed as uloc_*
2609 *
2610 * -------------------------------------------------
2611 */
2612 U_CAPI int32_t U_EXPORT2
2613 uloc_toLanguageTag(const char* localeID,
2614 char* langtag,
2615 int32_t langtagCapacity,
2616 UBool strict,
2617 UErrorCode* status) {
2618 /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
2619 char canonical[256];
2620 int32_t reslen = 0;
2621 UErrorCode tmpStatus = U_ZERO_ERROR;
2622 UBool hadPosix = FALSE;
2623 const char* pKeywordStart;
2624
2625 /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
2626 canonical[0] = 0;
2627 if (uprv_strlen(localeID) > 0) {
2628 uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
2629 if (tmpStatus != U_ZERO_ERROR) {
2630 *status = U_ILLEGAL_ARGUMENT_ERROR;
2631 return 0;
2632 }
2633 }
2634
2635 /* For handling special case - private use only tag */
2636 pKeywordStart = locale_getKeywordsStart(canonical);
2637 if (pKeywordStart == canonical) {
2638 UEnumeration *kwdEnum;
2639 int kwdCnt = 0;
2640 UBool done = FALSE;
2641
2642 kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus);
2643 if (kwdEnum != NULL) {
2644 kwdCnt = uenum_count(kwdEnum, &tmpStatus);
2645 if (kwdCnt == 1) {
2646 const char *key;
2647 int32_t len = 0;
2648
2649 key = uenum_next(kwdEnum, &len, &tmpStatus);
2650 if (len == 1 && *key == PRIVATEUSE) {
2651 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
2652 buf[0] = PRIVATEUSE;
2653 buf[1] = SEP;
2654 len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus);
2655 if (U_SUCCESS(tmpStatus)) {
2656 if (_isPrivateuseValueSubtags(&buf[2], len)) {
2657 /* return private use only tag */
2658 reslen = len + 2;
2659 uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity));
2660 u_terminateChars(langtag, langtagCapacity, reslen, status);
2661 done = TRUE;
2662 } else if (strict) {
2663 *status = U_ILLEGAL_ARGUMENT_ERROR;
2664 done = TRUE;
2665 }
2666 /* if not strict mode, then "und" will be returned */
2667 } else {
2668 *status = U_ILLEGAL_ARGUMENT_ERROR;
2669 done = TRUE;
2670 }
2671 }
2672 }
2673 uenum_close(kwdEnum);
2674 if (done) {
2675 return reslen;
2676 }
2677 }
2678 }
2679
2680 reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
2681 reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
2682 reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
2683 reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
2684 reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2685 reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2686
2687 return reslen;
2688 }
2689
2690
2691 U_CAPI int32_t U_EXPORT2
2692 uloc_forLanguageTag(const char* langtag,
2693 char* localeID,
2694 int32_t localeIDCapacity,
2695 int32_t* parsedLength,
2696 UErrorCode* status) {
2697 ULanguageTag *lt;
2698 int32_t reslen = 0;
2699 const char *subtag, *p;
2700 int32_t len;
2701 int32_t i, n;
2702 UBool noRegion = TRUE;
2703
2704 lt = ultag_parse(langtag, -1, parsedLength, status);
2705 if (U_FAILURE(*status)) {
2706 return 0;
2707 }
2708
2709 /* language */
2710 subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt);
2711 if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
2712 len = (int32_t)uprv_strlen(subtag);
2713 if (len > 0) {
2714 if (reslen < localeIDCapacity) {
2715 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen));
2716 }
2717 reslen += len;
2718 }
2719 }
2720
2721 /* script */
2722 subtag = ultag_getScript(lt);
2723 len = (int32_t)uprv_strlen(subtag);
2724 if (len > 0) {
2725 if (reslen < localeIDCapacity) {
2726 *(localeID + reslen) = LOCALE_SEP;
2727 }
2728 reslen++;
2729
2730 /* write out the script in title case */
2731 p = subtag;
2732 while (*p) {
2733 if (reslen < localeIDCapacity) {
2734 if (p == subtag) {
2735 *(localeID + reslen) = uprv_toupper(*p);
2736 } else {
2737 *(localeID + reslen) = *p;
2738 }
2739 }
2740 reslen++;
2741 p++;
2742 }
2743 }
2744
2745 /* region */
2746 subtag = ultag_getRegion(lt);
2747 len = (int32_t)uprv_strlen(subtag);
2748 if (len > 0) {
2749 if (reslen < localeIDCapacity) {
2750 *(localeID + reslen) = LOCALE_SEP;
2751 }
2752 reslen++;
2753 /* write out the retion in upper case */
2754 p = subtag;
2755 while (*p) {
2756 if (reslen < localeIDCapacity) {
2757 *(localeID + reslen) = uprv_toupper(*p);
2758 }
2759 reslen++;
2760 p++;
2761 }
2762 noRegion = FALSE;
2763 }
2764
2765 /* variants */
2766 n = ultag_getVariantsSize(lt);
2767 if (n > 0) {
2768 if (noRegion) {
2769 if (reslen < localeIDCapacity) {
2770 *(localeID + reslen) = LOCALE_SEP;
2771 }
2772 reslen++;
2773 }
2774
2775 for (i = 0; i < n; i++) {
2776 subtag = ultag_getVariant(lt, i);
2777 if (reslen < localeIDCapacity) {
2778 *(localeID + reslen) = LOCALE_SEP;
2779 }
2780 reslen++;
2781 /* write out the variant in upper case */
2782 p = subtag;
2783 while (*p) {
2784 if (reslen < localeIDCapacity) {
2785 *(localeID + reslen) = uprv_toupper(*p);
2786 }
2787 reslen++;
2788 p++;
2789 }
2790 }
2791 }
2792
2793 /* keywords */
2794 n = ultag_getExtensionsSize(lt);
2795 subtag = ultag_getPrivateUse(lt);
2796 if (n > 0 || uprv_strlen(subtag) > 0) {
2797 if (reslen == 0 && n > 0) {
2798 /* need a language */
2799 if (reslen < localeIDCapacity) {
2800 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
2801 }
2802 reslen += LANG_UND_LEN;
2803 }
2804 len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
2805 reslen += len;
2806 }
2807
2808 ultag_close(lt);
2809 return u_terminateChars(localeID, localeIDCapacity, reslen, status);
2810 }
2811
2812