]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/uloc_tag.c
ICU-461.12.tar.gz
[apple/icu.git] / icuSources / common / uloc_tag.c
1 /*
2 **********************************************************************
3 * Copyright (C) 2009-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 */
7
8 #include "unicode/utypes.h"
9 #include "unicode/ures.h"
10 #include "unicode/putil.h"
11 #include "unicode/uloc.h"
12 #include "ustr_imp.h"
13 #include "cmemory.h"
14 #include "cstring.h"
15 #include "putilimp.h"
16 #include "uinvchar.h"
17 #include "ulocimp.h"
18
19 /* struct holding a single variant */
20 typedef struct VariantListEntry {
21 const char *variant;
22 struct VariantListEntry *next;
23 } VariantListEntry;
24
25 /* struct holding a single extension */
26 typedef struct ExtensionListEntry {
27 const char *key;
28 const char *value;
29 struct ExtensionListEntry *next;
30 } ExtensionListEntry;
31
32 #define MAXEXTLANG 3
33 typedef struct ULanguageTag {
34 char *buf; /* holding parsed subtags */
35 const char *language;
36 const char *extlang[MAXEXTLANG];
37 const char *script;
38 const char *region;
39 VariantListEntry *variants;
40 ExtensionListEntry *extensions;
41 const char *privateuse;
42 const char *grandfathered;
43 } ULanguageTag;
44
45 #define MINLEN 2
46 #define SEP '-'
47 #define PRIVATEUSE 'x'
48 #define LDMLEXT 'u'
49
50 #define LOCALE_SEP '_'
51 #define LOCALE_EXT_SEP '@'
52 #define LOCALE_KEYWORD_SEP ';'
53 #define LOCALE_KEY_TYPE_SEP '='
54
55 #define ISALPHA(c) (((c)>='A' && (c)<='Z') || ((c)>='a' && (c)<='z'))
56 #define ISNUMERIC(c) ((c)>='0' && (c)<='9')
57
58 static const char* EMPTY = "";
59 static const char* LANG_UND = "und";
60 static const char* PRIVATEUSE_KEY = "x";
61 static const char* _POSIX = "_POSIX";
62 static const char* POSIX_KEY = "va";
63 static const char* POSIX_VALUE = "posix";
64
65 #define LANG_UND_LEN 3
66
67 static const char* GRANDFATHERED[] = {
68 /* grandfathered preferred */
69 "art-lojban", "jbo",
70 "cel-gaulish", "",
71 "en-GB-oed", "",
72 "i-ami", "ami",
73 "i-bnn", "bnn",
74 "i-default", "",
75 "i-enochian", "",
76 "i-hak", "hak",
77 "i-klingon", "tlh",
78 "i-lux", "lb",
79 "i-mingo", "",
80 "i-navajo", "nv",
81 "i-pwn", "pwn",
82 "i-tao", "tao",
83 "i-tay", "tay",
84 "i-tsu", "tsu",
85 "no-bok", "nb",
86 "no-nyn", "nn",
87 "sgn-be-fr", "sfb",
88 "sgn-be-nl", "vgt",
89 "sgn-ch-de", "sgg",
90 "zh-guoyu", "cmn",
91 "zh-hakka", "hak",
92 "zh-min", "",
93 "zh-min-nan", "nan",
94 "zh-xiang", "hsn",
95 NULL, NULL
96 };
97
98 static const char* DEPRECATEDLANGS[] = {
99 /* deprecated new */
100 "iw", "he",
101 "ji", "yi",
102 "in", "id",
103 NULL, NULL
104 };
105
106 /*
107 * -------------------------------------------------
108 *
109 * These ultag_ functions may be exposed as APIs later
110 *
111 * -------------------------------------------------
112 */
113
114 static ULanguageTag*
115 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
116
117 static void
118 ultag_close(ULanguageTag* langtag);
119
120 static const char*
121 ultag_getLanguage(const ULanguageTag* langtag);
122
123 #if 0
124 static const char*
125 ultag_getJDKLanguage(const ULanguageTag* langtag);
126 #endif
127
128 static const char*
129 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
130
131 static int32_t
132 ultag_getExtlangSize(const ULanguageTag* langtag);
133
134 static const char*
135 ultag_getScript(const ULanguageTag* langtag);
136
137 static const char*
138 ultag_getRegion(const ULanguageTag* langtag);
139
140 static const char*
141 ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
142
143 static int32_t
144 ultag_getVariantsSize(const ULanguageTag* langtag);
145
146 static const char*
147 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
148
149 static const char*
150 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
151
152 static int32_t
153 ultag_getExtensionsSize(const ULanguageTag* langtag);
154
155 static const char*
156 ultag_getPrivateUse(const ULanguageTag* langtag);
157
158 #if 0
159 static const char*
160 ultag_getGrandfathered(const ULanguageTag* langtag);
161 #endif
162
163 /*
164 * -------------------------------------------------
165 *
166 * Language subtag syntax validation functions
167 *
168 * -------------------------------------------------
169 */
170
171 static UBool
172 _isAlphaString(const char* s, int32_t len) {
173 int32_t i;
174 for (i = 0; i < len; i++) {
175 if (!ISALPHA(*(s + i))) {
176 return FALSE;
177 }
178 }
179 return TRUE;
180 }
181
182 static UBool
183 _isNumericString(const char* s, int32_t len) {
184 int32_t i;
185 for (i = 0; i < len; i++) {
186 if (!ISNUMERIC(*(s + i))) {
187 return FALSE;
188 }
189 }
190 return TRUE;
191 }
192
193 static UBool
194 _isAlphaNumericString(const char* s, int32_t len) {
195 int32_t i;
196 for (i = 0; i < len; i++) {
197 if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
198 return FALSE;
199 }
200 }
201 return TRUE;
202 }
203
204 static UBool
205 _isLanguageSubtag(const char* s, int32_t len) {
206 /*
207 * language = 2*3ALPHA ; shortest ISO 639 code
208 * ["-" extlang] ; sometimes followed by
209 * ; extended language subtags
210 * / 4ALPHA ; or reserved for future use
211 * / 5*8ALPHA ; or registered language subtag
212 */
213 if (len < 0) {
214 len = (int32_t)uprv_strlen(s);
215 }
216 if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
217 return TRUE;
218 }
219 return FALSE;
220 }
221
222 static UBool
223 _isExtlangSubtag(const char* s, int32_t len) {
224 /*
225 * extlang = 3ALPHA ; selected ISO 639 codes
226 * *2("-" 3ALPHA) ; permanently reserved
227 */
228 if (len < 0) {
229 len = (int32_t)uprv_strlen(s);
230 }
231 if (len == 3 && _isAlphaString(s, len)) {
232 return TRUE;
233 }
234 return FALSE;
235 }
236
237 static UBool
238 _isScriptSubtag(const char* s, int32_t len) {
239 /*
240 * script = 4ALPHA ; ISO 15924 code
241 */
242 if (len < 0) {
243 len = (int32_t)uprv_strlen(s);
244 }
245 if (len == 4 && _isAlphaString(s, len)) {
246 return TRUE;
247 }
248 return FALSE;
249 }
250
251 static UBool
252 _isRegionSubtag(const char* s, int32_t len) {
253 /*
254 * region = 2ALPHA ; ISO 3166-1 code
255 * / 3DIGIT ; UN M.49 code
256 */
257 if (len < 0) {
258 len = (int32_t)uprv_strlen(s);
259 }
260 if (len == 2 && _isAlphaString(s, len)) {
261 return TRUE;
262 }
263 if (len == 3 && _isNumericString(s, len)) {
264 return TRUE;
265 }
266 return FALSE;
267 }
268
269 static UBool
270 _isVariantSubtag(const char* s, int32_t len) {
271 /*
272 * variant = 5*8alphanum ; registered variants
273 * / (DIGIT 3alphanum)
274 */
275 if (len < 0) {
276 len = (int32_t)uprv_strlen(s);
277 }
278 if (len >= 5 && len <= 8 && _isAlphaString(s, len)) {
279 return TRUE;
280 }
281 if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
282 return TRUE;
283 }
284 return FALSE;
285 }
286
287 static UBool
288 _isExtensionSingleton(const char* s, int32_t len) {
289 /*
290 * extension = singleton 1*("-" (2*8alphanum))
291 */
292 if (len < 0) {
293 len = (int32_t)uprv_strlen(s);
294 }
295 if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) {
296 return TRUE;
297 }
298 return FALSE;
299 }
300
301 static UBool
302 _isExtensionSubtag(const char* s, int32_t len) {
303 /*
304 * extension = singleton 1*("-" (2*8alphanum))
305 */
306 if (len < 0) {
307 len = (int32_t)uprv_strlen(s);
308 }
309 if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) {
310 return TRUE;
311 }
312 return FALSE;
313 }
314
315 static UBool
316 _isExtensionSubtags(const char* s, int32_t len) {
317 const char *p = s;
318 const char *pSubtag = NULL;
319
320 if (len < 0) {
321 len = (int32_t)uprv_strlen(s);
322 }
323
324 while ((p - s) < len) {
325 if (*p == SEP) {
326 if (pSubtag == NULL) {
327 return FALSE;
328 }
329 if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) {
330 return FALSE;
331 }
332 pSubtag = NULL;
333 } else if (pSubtag == NULL) {
334 pSubtag = p;
335 }
336 p++;
337 }
338 if (pSubtag == NULL) {
339 return FALSE;
340 }
341 return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag));
342 }
343
344 static UBool
345 _isPrivateuseValueSubtag(const char* s, int32_t len) {
346 /*
347 * privateuse = "x" 1*("-" (1*8alphanum))
348 */
349 if (len < 0) {
350 len = (int32_t)uprv_strlen(s);
351 }
352 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
353 return TRUE;
354 }
355 return FALSE;
356 }
357
358 static UBool
359 _isPrivateuseValueSubtags(const char* s, int32_t len) {
360 const char *p = s;
361 const char *pSubtag = NULL;
362
363 if (len < 0) {
364 len = (int32_t)uprv_strlen(s);
365 }
366
367 while ((p - s) < len) {
368 if (*p == SEP) {
369 if (pSubtag == NULL) {
370 return FALSE;
371 }
372 if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) {
373 return FALSE;
374 }
375 pSubtag = NULL;
376 } else if (pSubtag == NULL) {
377 pSubtag = p;
378 }
379 p++;
380 }
381 if (pSubtag == NULL) {
382 return FALSE;
383 }
384 return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
385 }
386
387 static UBool
388 _isLDMLKey(const char* s, int32_t len) {
389 if (len < 0) {
390 len = (int32_t)uprv_strlen(s);
391 }
392 if (len == 2 && _isAlphaNumericString(s, len)) {
393 return TRUE;
394 }
395 return FALSE;
396 }
397
398 static UBool
399 _isLDMLType(const char* s, int32_t len) {
400 if (len < 0) {
401 len = (int32_t)uprv_strlen(s);
402 }
403 if (len >= 3 && len <= 8 && _isAlphaNumericString(s, len)) {
404 return TRUE;
405 }
406 return FALSE;
407 }
408
409 /*
410 * -------------------------------------------------
411 *
412 * Helper functions
413 *
414 * -------------------------------------------------
415 */
416
417 static UBool
418 _addVariantToList(VariantListEntry **first, VariantListEntry *var) {
419 UBool bAdded = TRUE;
420
421 if (*first == NULL) {
422 var->next = NULL;
423 *first = var;
424 } else {
425 VariantListEntry *prev, *cur;
426 int32_t cmp;
427
428 /* reorder variants in alphabetical order */
429 prev = NULL;
430 cur = *first;
431 while (TRUE) {
432 if (cur == NULL) {
433 prev->next = var;
434 var->next = NULL;
435 break;
436 }
437 cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
438 if (cmp < 0) {
439 if (prev == NULL) {
440 *first = var;
441 } else {
442 prev->next = var;
443 }
444 var->next = cur;
445 break;
446 }
447 if (cmp == 0) {
448 /* duplicated variant */
449 bAdded = FALSE;
450 break;
451 }
452 prev = cur;
453 cur = cur->next;
454 }
455 }
456
457 return bAdded;
458 }
459
460
461 static UBool
462 _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
463 UBool bAdded = TRUE;
464
465 if (*first == NULL) {
466 ext->next = NULL;
467 *first = ext;
468 } else {
469 ExtensionListEntry *prev, *cur;
470 int32_t cmp;
471
472 /* reorder variants in alphabetical order */
473 prev = NULL;
474 cur = *first;
475 while (TRUE) {
476 if (cur == NULL) {
477 prev->next = ext;
478 ext->next = NULL;
479 break;
480 }
481 if (localeToBCP) {
482 /* special handling for locale to bcp conversion */
483 int32_t len, curlen;
484
485 len = (int32_t)uprv_strlen(ext->key);
486 curlen = (int32_t)uprv_strlen(cur->key);
487
488 if (len == 1 && curlen == 1) {
489 if (*(ext->key) == *(cur->key)) {
490 cmp = 0;
491 } else if (*(ext->key) == PRIVATEUSE) {
492 cmp = 1;
493 } else if (*(cur->key) == PRIVATEUSE) {
494 cmp = -1;
495 } else {
496 cmp = *(ext->key) - *(cur->key);
497 }
498 } else if (len == 1) {
499 cmp = *(ext->key) - LDMLEXT;
500 } else if (curlen == 1) {
501 cmp = LDMLEXT - *(cur->key);
502 } else {
503 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
504 }
505 } else {
506 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
507 }
508 if (cmp < 0) {
509 if (prev == NULL) {
510 *first = ext;
511 } else {
512 prev->next = ext;
513 }
514 ext->next = cur;
515 break;
516 }
517 if (cmp == 0) {
518 /* duplicated extension key */
519 bAdded = FALSE;
520 break;
521 }
522 prev = cur;
523 cur = cur->next;
524 }
525 }
526
527 return bAdded;
528 }
529
530 static void
531 _initializeULanguageTag(ULanguageTag* langtag) {
532 int32_t i;
533
534 langtag->buf = NULL;
535
536 langtag->language = EMPTY;
537 for (i = 0; i < MAXEXTLANG; i++) {
538 langtag->extlang[i] = NULL;
539 }
540
541 langtag->script = EMPTY;
542 langtag->region = EMPTY;
543
544 langtag->variants = NULL;
545 langtag->extensions = NULL;
546
547 langtag->grandfathered = EMPTY;
548 langtag->privateuse = EMPTY;
549 }
550
551 #define KEYTYPEDATA "keyTypeData"
552 #define KEYMAP "keyMap"
553 #define TYPEMAP "typeMap"
554 #define TYPEALIAS "typeAlias"
555 #define MAX_BCP47_SUBTAG_LEN 9 /* including null terminator */
556 #define MAX_LDML_KEY_LEN 22
557 #define MAX_LDML_TYPE_LEN 32
558
559 static int32_t
560 _ldmlKeyToBCP47(const char* key, int32_t keyLen,
561 char* bcpKey, int32_t bcpKeyCapacity,
562 UErrorCode *status) {
563 UResourceBundle *rb;
564 char keyBuf[MAX_LDML_KEY_LEN];
565 char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
566 int32_t resultLen = 0;
567 int32_t i;
568 UErrorCode tmpStatus = U_ZERO_ERROR;
569 const UChar *uBcpKey;
570 int32_t bcpKeyLen;
571
572 if (keyLen < 0) {
573 keyLen = (int32_t)uprv_strlen(key);
574 }
575
576 if (keyLen >= sizeof(keyBuf)) {
577 /* no known valid LDML key exceeding 21 */
578 *status = U_ILLEGAL_ARGUMENT_ERROR;
579 return 0;
580 }
581
582 uprv_memcpy(keyBuf, key, keyLen);
583 keyBuf[keyLen] = 0;
584
585 /* to lower case */
586 for (i = 0; i < keyLen; i++) {
587 keyBuf[i] = uprv_tolower(keyBuf[i]);
588 }
589
590 rb = ures_openDirect(NULL, KEYTYPEDATA, status);
591 ures_getByKey(rb, KEYMAP, rb, status);
592
593 if (U_FAILURE(*status)) {
594 ures_close(rb);
595 return 0;
596 }
597
598 uBcpKey = ures_getStringByKey(rb, keyBuf, &bcpKeyLen, &tmpStatus);
599 if (U_SUCCESS(tmpStatus)) {
600 u_UCharsToChars(uBcpKey, bcpKeyBuf, bcpKeyLen);
601 bcpKeyBuf[bcpKeyLen] = 0;
602 resultLen = bcpKeyLen;
603 } else {
604 if (_isLDMLKey(key, keyLen)) {
605 uprv_memcpy(bcpKeyBuf, key, keyLen);
606 bcpKeyBuf[keyLen] = 0;
607 resultLen = keyLen;
608 } else {
609 /* mapping not availabe */
610 *status = U_ILLEGAL_ARGUMENT_ERROR;
611 }
612 }
613 ures_close(rb);
614
615 if (U_FAILURE(*status)) {
616 return 0;
617 }
618
619 uprv_memcpy(bcpKey, bcpKeyBuf, uprv_min(resultLen, bcpKeyCapacity));
620 return u_terminateChars(bcpKey, bcpKeyCapacity, resultLen, status);
621 }
622
623 static int32_t
624 _bcp47ToLDMLKey(const char* bcpKey, int32_t bcpKeyLen,
625 char* key, int32_t keyCapacity,
626 UErrorCode *status) {
627 UResourceBundle *rb;
628 char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
629 int32_t resultLen = 0;
630 int32_t i;
631 const char *resKey = NULL;
632 UResourceBundle *mapData;
633
634 if (bcpKeyLen < 0) {
635 bcpKeyLen = (int32_t)uprv_strlen(bcpKey);
636 }
637
638 if (bcpKeyLen >= sizeof(bcpKeyBuf)) {
639 *status = U_ILLEGAL_ARGUMENT_ERROR;
640 return 0;
641 }
642
643 uprv_memcpy(bcpKeyBuf, bcpKey, bcpKeyLen);
644 bcpKeyBuf[bcpKeyLen] = 0;
645
646 /* to lower case */
647 for (i = 0; i < bcpKeyLen; i++) {
648 bcpKeyBuf[i] = uprv_tolower(bcpKeyBuf[i]);
649 }
650
651 rb = ures_openDirect(NULL, KEYTYPEDATA, status);
652 ures_getByKey(rb, KEYMAP, rb, status);
653 if (U_FAILURE(*status)) {
654 ures_close(rb);
655 return 0;
656 }
657
658 mapData = ures_getNextResource(rb, NULL, status);
659 while (U_SUCCESS(*status)) {
660 const UChar *uBcpKey;
661 char tmpBcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
662 int32_t tmpBcpKeyLen;
663
664 uBcpKey = ures_getString(mapData, &tmpBcpKeyLen, status);
665 if (U_FAILURE(*status)) {
666 break;
667 }
668 u_UCharsToChars(uBcpKey, tmpBcpKeyBuf, tmpBcpKeyLen);
669 tmpBcpKeyBuf[tmpBcpKeyLen] = 0;
670 if (uprv_compareInvCharsAsAscii(bcpKeyBuf, tmpBcpKeyBuf) == 0) {
671 /* found a matching BCP47 key */
672 resKey = ures_getKey(mapData);
673 resultLen = (int32_t)uprv_strlen(resKey);
674 break;
675 }
676 if (!ures_hasNext(rb)) {
677 break;
678 }
679 ures_getNextResource(rb, mapData, status);
680 }
681 ures_close(mapData);
682 ures_close(rb);
683
684 if (U_FAILURE(*status)) {
685 return 0;
686 }
687
688 if (resKey == NULL) {
689 resKey = bcpKeyBuf;
690 resultLen = bcpKeyLen;
691 }
692
693 uprv_memcpy(key, resKey, uprv_min(resultLen, keyCapacity));
694 return u_terminateChars(key, keyCapacity, resultLen, status);
695 }
696
697 static int32_t
698 _ldmlTypeToBCP47(const char* key, int32_t keyLen,
699 const char* type, int32_t typeLen,
700 char* bcpType, int32_t bcpTypeCapacity,
701 UErrorCode *status) {
702 UResourceBundle *rb, *keyTypeData, *typeMapForKey;
703 char keyBuf[MAX_LDML_KEY_LEN];
704 char typeBuf[MAX_LDML_TYPE_LEN];
705 char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
706 int32_t resultLen = 0;
707 int32_t i;
708 UErrorCode tmpStatus = U_ZERO_ERROR;
709 const UChar *uBcpType, *uCanonicalType;
710 int32_t bcpTypeLen, canonicalTypeLen;
711 UBool isTimezone = FALSE;
712
713 if (keyLen < 0) {
714 keyLen = (int32_t)uprv_strlen(key);
715 }
716 if (keyLen >= sizeof(keyBuf)) {
717 /* no known valid LDML key exceeding 21 */
718 *status = U_ILLEGAL_ARGUMENT_ERROR;
719 return 0;
720 }
721 uprv_memcpy(keyBuf, key, keyLen);
722 keyBuf[keyLen] = 0;
723
724 /* to lower case */
725 for (i = 0; i < keyLen; i++) {
726 keyBuf[i] = uprv_tolower(keyBuf[i]);
727 }
728 if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
729 isTimezone = TRUE;
730 }
731
732 if (typeLen < 0) {
733 typeLen = (int32_t)uprv_strlen(type);
734 }
735 if (typeLen >= sizeof(typeBuf)) {
736 *status = U_ILLEGAL_ARGUMENT_ERROR;
737 return 0;
738 }
739
740 if (isTimezone) {
741 /* replace '/' with ':' */
742 for (i = 0; i < typeLen; i++) {
743 if (*(type + i) == '/') {
744 typeBuf[i] = ':';
745 } else {
746 typeBuf[i] = *(type + i);
747 }
748 }
749 typeBuf[typeLen] = 0;
750 type = &typeBuf[0];
751 }
752
753 keyTypeData = ures_openDirect(NULL, KEYTYPEDATA, status);
754 rb = ures_getByKey(keyTypeData, TYPEMAP, NULL, status);
755 if (U_FAILURE(*status)) {
756 ures_close(rb);
757 ures_close(keyTypeData);
758 return 0;
759 }
760
761 typeMapForKey = ures_getByKey(rb, keyBuf, NULL, &tmpStatus);
762 uBcpType = ures_getStringByKey(typeMapForKey, type, &bcpTypeLen, &tmpStatus);
763 if (U_SUCCESS(tmpStatus)) {
764 u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
765 resultLen = bcpTypeLen;
766 } else if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
767 /* is this type alias? */
768 tmpStatus = U_ZERO_ERROR;
769 ures_getByKey(keyTypeData, TYPEALIAS, rb, &tmpStatus);
770 ures_getByKey(rb, keyBuf, rb, &tmpStatus);
771 uCanonicalType = ures_getStringByKey(rb, type, &canonicalTypeLen, &tmpStatus);
772 if (U_SUCCESS(tmpStatus)) {
773 u_UCharsToChars(uCanonicalType, typeBuf, canonicalTypeLen);
774 if (isTimezone) {
775 /* replace '/' with ':' */
776 for (i = 0; i < canonicalTypeLen; i++) {
777 if (typeBuf[i] == '/') {
778 typeBuf[i] = ':';
779 }
780 }
781 }
782 typeBuf[canonicalTypeLen] = 0;
783
784 /* look up the canonical type */
785 uBcpType = ures_getStringByKey(typeMapForKey, typeBuf, &bcpTypeLen, &tmpStatus);
786 if (U_SUCCESS(tmpStatus)) {
787 u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
788 resultLen = bcpTypeLen;
789 }
790 }
791 if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
792 if (_isLDMLType(type, typeLen)) {
793 uprv_memcpy(bcpTypeBuf, type, typeLen);
794 resultLen = typeLen;
795 } else {
796 /* mapping not availabe */
797 *status = U_ILLEGAL_ARGUMENT_ERROR;
798 }
799 }
800 } else {
801 *status = tmpStatus;
802 }
803 ures_close(rb);
804 ures_close(typeMapForKey);
805 ures_close(keyTypeData);
806
807 if (U_FAILURE(*status)) {
808 return 0;
809 }
810
811 uprv_memcpy(bcpType, bcpTypeBuf, uprv_min(resultLen, bcpTypeCapacity));
812 return u_terminateChars(bcpType, bcpTypeCapacity, resultLen, status);
813 }
814
815 static int32_t
816 _bcp47ToLDMLType(const char* key, int32_t keyLen,
817 const char* bcpType, int32_t bcpTypeLen,
818 char* type, int32_t typeCapacity,
819 UErrorCode *status) {
820 UResourceBundle *rb;
821 char keyBuf[MAX_LDML_KEY_LEN];
822 char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
823 int32_t resultLen = 0;
824 int32_t i;
825 const char *resType = NULL;
826 UResourceBundle *mapData;
827 UErrorCode tmpStatus = U_ZERO_ERROR;
828 int32_t copyLen;
829
830 if (keyLen < 0) {
831 keyLen = (int32_t)uprv_strlen(key);
832 }
833
834 if (keyLen >= sizeof(keyBuf)) {
835 /* no known valid LDML key exceeding 21 */
836 *status = U_ILLEGAL_ARGUMENT_ERROR;
837 return 0;
838 }
839 uprv_memcpy(keyBuf, key, keyLen);
840 keyBuf[keyLen] = 0;
841
842 /* to lower case */
843 for (i = 0; i < keyLen; i++) {
844 keyBuf[i] = uprv_tolower(keyBuf[i]);
845 }
846
847
848 if (bcpTypeLen < 0) {
849 bcpTypeLen = (int32_t)uprv_strlen(bcpType);
850 }
851
852 if (bcpTypeLen >= sizeof(bcpTypeBuf)) {
853 *status = U_ILLEGAL_ARGUMENT_ERROR;
854 return 0;
855 }
856
857 uprv_memcpy(bcpTypeBuf, bcpType, bcpTypeLen);
858 bcpTypeBuf[bcpTypeLen] = 0;
859
860 /* to lower case */
861 for (i = 0; i < bcpTypeLen; i++) {
862 bcpTypeBuf[i] = uprv_tolower(bcpTypeBuf[i]);
863 }
864
865 rb = ures_openDirect(NULL, KEYTYPEDATA, status);
866 ures_getByKey(rb, TYPEMAP, rb, status);
867 if (U_FAILURE(*status)) {
868 ures_close(rb);
869 return 0;
870 }
871
872 ures_getByKey(rb, keyBuf, rb, &tmpStatus);
873 mapData = ures_getNextResource(rb, NULL, &tmpStatus);
874 while (U_SUCCESS(tmpStatus)) {
875 const UChar *uBcpType;
876 char tmpBcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
877 int32_t tmpBcpTypeLen;
878
879 uBcpType = ures_getString(mapData, &tmpBcpTypeLen, &tmpStatus);
880 if (U_FAILURE(tmpStatus)) {
881 break;
882 }
883 u_UCharsToChars(uBcpType, tmpBcpTypeBuf, tmpBcpTypeLen);
884 tmpBcpTypeBuf[tmpBcpTypeLen] = 0;
885 if (uprv_compareInvCharsAsAscii(bcpTypeBuf, tmpBcpTypeBuf) == 0) {
886 /* found a matching BCP47 type */
887 resType = ures_getKey(mapData);
888 resultLen = (int32_t)uprv_strlen(resType);
889 break;
890 }
891 if (!ures_hasNext(rb)) {
892 break;
893 }
894 ures_getNextResource(rb, mapData, &tmpStatus);
895 }
896 ures_close(mapData);
897 ures_close(rb);
898
899 if (U_FAILURE(tmpStatus) && tmpStatus != U_MISSING_RESOURCE_ERROR) {
900 *status = tmpStatus;
901 return 0;
902 }
903
904 if (resType == NULL) {
905 resType = bcpTypeBuf;
906 resultLen = bcpTypeLen;
907 }
908
909 copyLen = uprv_min(resultLen, typeCapacity);
910 uprv_memcpy(type, resType, copyLen);
911
912 if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
913 for (i = 0; i < copyLen; i++) {
914 if (*(type + i) == ':') {
915 *(type + i) = '/';
916 }
917 }
918 }
919
920 return u_terminateChars(type, typeCapacity, resultLen, status);
921 }
922
923 static int32_t
924 _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
925 char buf[ULOC_LANG_CAPACITY];
926 UErrorCode tmpStatus = U_ZERO_ERROR;
927 int32_t len, i;
928 int32_t reslen = 0;
929
930 if (U_FAILURE(*status)) {
931 return 0;
932 }
933
934 len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
935 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
936 if (strict) {
937 *status = U_ILLEGAL_ARGUMENT_ERROR;
938 return 0;
939 }
940 len = 0;
941 }
942
943 /* Note: returned language code is in lower case letters */
944
945 if (len == 0) {
946 if (reslen < capacity) {
947 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
948 }
949 reslen += LANG_UND_LEN;
950 } else if (!_isLanguageSubtag(buf, len)) {
951 /* invalid language code */
952 if (strict) {
953 *status = U_ILLEGAL_ARGUMENT_ERROR;
954 return 0;
955 }
956 if (reslen < capacity) {
957 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
958 }
959 reslen += LANG_UND_LEN;
960 } else {
961 /* resolve deprecated */
962 for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
963 if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
964 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
965 len = (int32_t)uprv_strlen(buf);
966 break;
967 }
968 }
969 if (reslen < capacity) {
970 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
971 }
972 reslen += len;
973 }
974 u_terminateChars(appendAt, capacity, reslen, status);
975 return reslen;
976 }
977
978 static int32_t
979 _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
980 char buf[ULOC_SCRIPT_CAPACITY];
981 UErrorCode tmpStatus = U_ZERO_ERROR;
982 int32_t len;
983 int32_t reslen = 0;
984
985 if (U_FAILURE(*status)) {
986 return 0;
987 }
988
989 len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
990 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
991 if (strict) {
992 *status = U_ILLEGAL_ARGUMENT_ERROR;
993 }
994 return 0;
995 }
996
997 if (len > 0) {
998 if (!_isScriptSubtag(buf, len)) {
999 /* invalid script code */
1000 if (strict) {
1001 *status = U_ILLEGAL_ARGUMENT_ERROR;
1002 }
1003 return 0;
1004 } else {
1005 if (reslen < capacity) {
1006 *(appendAt + reslen) = SEP;
1007 }
1008 reslen++;
1009
1010 if (reslen < capacity) {
1011 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
1012 }
1013 reslen += len;
1014 }
1015 }
1016 u_terminateChars(appendAt, capacity, reslen, status);
1017 return reslen;
1018 }
1019
1020 static int32_t
1021 _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
1022 char buf[ULOC_COUNTRY_CAPACITY];
1023 UErrorCode tmpStatus = U_ZERO_ERROR;
1024 int32_t len;
1025 int32_t reslen = 0;
1026
1027 if (U_FAILURE(*status)) {
1028 return 0;
1029 }
1030
1031 len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
1032 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1033 if (strict) {
1034 *status = U_ILLEGAL_ARGUMENT_ERROR;
1035 }
1036 return 0;
1037 }
1038
1039 if (len > 0) {
1040 if (!_isRegionSubtag(buf, len)) {
1041 /* invalid region code */
1042 if (strict) {
1043 *status = U_ILLEGAL_ARGUMENT_ERROR;
1044 }
1045 return 0;
1046 } else {
1047 if (reslen < capacity) {
1048 *(appendAt + reslen) = SEP;
1049 }
1050 reslen++;
1051
1052 if (reslen < capacity) {
1053 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
1054 }
1055 reslen += len;
1056 }
1057 }
1058 u_terminateChars(appendAt, capacity, reslen, status);
1059 return reslen;
1060 }
1061
1062 static int32_t
1063 _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) {
1064 char buf[ULOC_FULLNAME_CAPACITY];
1065 UErrorCode tmpStatus = U_ZERO_ERROR;
1066 int32_t len, i;
1067 int32_t reslen = 0;
1068
1069 if (U_FAILURE(*status)) {
1070 return 0;
1071 }
1072
1073 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
1074 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1075 if (strict) {
1076 *status = U_ILLEGAL_ARGUMENT_ERROR;
1077 }
1078 return 0;
1079 }
1080
1081 if (len > 0) {
1082 char *p, *pVar;
1083 UBool bNext = TRUE;
1084 VariantListEntry *var;
1085 VariantListEntry *varFirst = NULL;
1086
1087 pVar = NULL;
1088 p = buf;
1089 while (bNext) {
1090 if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
1091 if (*p == 0) {
1092 bNext = FALSE;
1093 } else {
1094 *p = 0; /* terminate */
1095 }
1096 if (pVar == NULL) {
1097 if (strict) {
1098 *status = U_ILLEGAL_ARGUMENT_ERROR;
1099 break;
1100 }
1101 /* ignore empty variant */
1102 } else {
1103 /* ICU uses upper case letters for variants, but
1104 the canonical format is lowercase in BCP47 */
1105 for (i = 0; *(pVar + i) != 0; i++) {
1106 *(pVar + i) = uprv_tolower(*(pVar + i));
1107 }
1108
1109 /* validate */
1110 if (_isVariantSubtag(pVar, -1)) {
1111 if (uprv_strcmp(pVar,POSIX_VALUE)) {
1112 /* emit the variant to the list */
1113 var = uprv_malloc(sizeof(VariantListEntry));
1114 if (var == NULL) {
1115 *status = U_MEMORY_ALLOCATION_ERROR;
1116 break;
1117 }
1118 var->variant = pVar;
1119 if (!_addVariantToList(&varFirst, var)) {
1120 /* duplicated variant */
1121 uprv_free(var);
1122 if (strict) {
1123 *status = U_ILLEGAL_ARGUMENT_ERROR;
1124 break;
1125 }
1126 }
1127 } else {
1128 /* Special handling for POSIX variant, need to remember that we had it and then */
1129 /* treat it like an extension later. */
1130 *hadPosix = TRUE;
1131 }
1132 } else if (strict) {
1133 *status = U_ILLEGAL_ARGUMENT_ERROR;
1134 break;
1135 }
1136 }
1137 /* reset variant starting position */
1138 pVar = NULL;
1139 } else if (pVar == NULL) {
1140 pVar = p;
1141 }
1142 p++;
1143 }
1144
1145 if (U_SUCCESS(*status)) {
1146 if (varFirst != NULL) {
1147 int32_t varLen;
1148
1149 /* write out sorted/validated/normalized variants to the target */
1150 var = varFirst;
1151 while (var != NULL) {
1152 if (reslen < capacity) {
1153 *(appendAt + reslen) = SEP;
1154 }
1155 reslen++;
1156 varLen = (int32_t)uprv_strlen(var->variant);
1157 if (reslen < capacity) {
1158 uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen));
1159 }
1160 reslen += varLen;
1161 var = var->next;
1162 }
1163 }
1164 }
1165
1166 /* clean up */
1167 var = varFirst;
1168 while (var != NULL) {
1169 VariantListEntry *tmpVar = var->next;
1170 uprv_free(var);
1171 var = tmpVar;
1172 }
1173
1174 if (U_FAILURE(*status)) {
1175 return 0;
1176 }
1177 }
1178
1179 u_terminateChars(appendAt, capacity, reslen, status);
1180 return reslen;
1181 }
1182
1183 static int32_t
1184 _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
1185 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1186 UEnumeration *keywordEnum = NULL;
1187 int32_t reslen = 0;
1188
1189 keywordEnum = uloc_openKeywords(localeID, status);
1190 if (U_FAILURE(*status) && !hadPosix) {
1191 uenum_close(keywordEnum);
1192 return 0;
1193 }
1194 if (keywordEnum != NULL || hadPosix) {
1195 /* reorder extensions */
1196 int32_t len;
1197 const char *key;
1198 ExtensionListEntry *firstExt = NULL;
1199 ExtensionListEntry *ext;
1200 char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1201 char *pExtBuf = extBuf;
1202 int32_t extBufCapacity = sizeof(extBuf);
1203 const char *bcpKey, *bcpValue;
1204 UErrorCode tmpStatus = U_ZERO_ERROR;
1205 int32_t keylen;
1206 UBool isLDMLKeyword;
1207
1208 while (TRUE) {
1209 key = uenum_next(keywordEnum, NULL, status);
1210 if (key == NULL) {
1211 break;
1212 }
1213 len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
1214 if (U_FAILURE(tmpStatus)) {
1215 if (strict) {
1216 *status = U_ILLEGAL_ARGUMENT_ERROR;
1217 break;
1218 }
1219 /* ignore this keyword */
1220 tmpStatus = U_ZERO_ERROR;
1221 continue;
1222 }
1223
1224 keylen = (int32_t)uprv_strlen(key);
1225 isLDMLKeyword = (keylen > 1);
1226
1227 if (isLDMLKeyword) {
1228 int32_t modKeyLen;
1229
1230 /* transform key and value to bcp47 style */
1231 modKeyLen = _ldmlKeyToBCP47(key, keylen, pExtBuf, extBufCapacity, &tmpStatus);
1232 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1233 if (strict) {
1234 *status = U_ILLEGAL_ARGUMENT_ERROR;
1235 break;
1236 }
1237 tmpStatus = U_ZERO_ERROR;
1238 continue;
1239 }
1240
1241 bcpKey = pExtBuf;
1242 pExtBuf += (modKeyLen + 1);
1243 extBufCapacity -= (modKeyLen + 1);
1244
1245 len = _ldmlTypeToBCP47(key, keylen, buf, len, pExtBuf, extBufCapacity, &tmpStatus);
1246 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1247 if (strict) {
1248 *status = U_ILLEGAL_ARGUMENT_ERROR;
1249 break;
1250 }
1251 tmpStatus = U_ZERO_ERROR;
1252 continue;
1253 }
1254 bcpValue = pExtBuf;
1255 pExtBuf += (len + 1);
1256 extBufCapacity -= (len + 1);
1257 } else {
1258 if (*key == PRIVATEUSE) {
1259 if (!_isPrivateuseValueSubtags(buf, len)) {
1260 if (strict) {
1261 *status = U_ILLEGAL_ARGUMENT_ERROR;
1262 break;
1263 }
1264 continue;
1265 }
1266 } else {
1267 if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
1268 if (strict) {
1269 *status = U_ILLEGAL_ARGUMENT_ERROR;
1270 break;
1271 }
1272 continue;
1273 }
1274 }
1275 bcpKey = key;
1276 if ((len + 1) < extBufCapacity) {
1277 uprv_memcpy(pExtBuf, buf, len);
1278 bcpValue = pExtBuf;
1279
1280 pExtBuf += len;
1281
1282 *pExtBuf = 0;
1283 pExtBuf++;
1284
1285 extBufCapacity -= (len + 1);
1286 } else {
1287 *status = U_ILLEGAL_ARGUMENT_ERROR;
1288 break;
1289 }
1290 }
1291
1292 /* create ExtensionListEntry */
1293 ext = uprv_malloc(sizeof(ExtensionListEntry));
1294 if (ext == NULL) {
1295 *status = U_MEMORY_ALLOCATION_ERROR;
1296 break;
1297 }
1298 ext->key = bcpKey;
1299 ext->value = bcpValue;
1300
1301 if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1302 uprv_free(ext);
1303 if (strict) {
1304 *status = U_ILLEGAL_ARGUMENT_ERROR;
1305 break;
1306 }
1307 }
1308 }
1309
1310 /* Special handling for POSIX variant - add the keywords for POSIX */
1311 if (hadPosix) {
1312 /* create ExtensionListEntry for POSIX */
1313 ext = uprv_malloc(sizeof(ExtensionListEntry));
1314 if (ext == NULL) {
1315 *status = U_MEMORY_ALLOCATION_ERROR;
1316 }
1317 ext->key = POSIX_KEY;
1318 ext->value = POSIX_VALUE;
1319
1320 if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1321 uprv_free(ext);
1322 }
1323 }
1324
1325 if (U_SUCCESS(*status) && (firstExt != NULL)) {
1326 UBool startLDMLExtension = FALSE;
1327
1328 /* write out the sorted BCP47 extensions and private use */
1329 ext = firstExt;
1330 while (ext != NULL) {
1331 if ((int32_t)uprv_strlen(ext->key) > 1 && !startLDMLExtension) {
1332 /* write LDML singleton extension */
1333 if (reslen < capacity) {
1334 *(appendAt + reslen) = SEP;
1335 }
1336 reslen++;
1337 if (reslen < capacity) {
1338 *(appendAt + reslen) = LDMLEXT;
1339 }
1340 reslen++;
1341 startLDMLExtension = TRUE;
1342 }
1343
1344 if (reslen < capacity) {
1345 *(appendAt + reslen) = SEP;
1346 }
1347 reslen++;
1348 len = (int32_t)uprv_strlen(ext->key);
1349 if (reslen < capacity) {
1350 uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen));
1351 }
1352 reslen += len;
1353 if (reslen < capacity) {
1354 *(appendAt + reslen) = SEP;
1355 }
1356 reslen++;
1357 len = (int32_t)uprv_strlen(ext->value);
1358 if (reslen < capacity) {
1359 uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen));
1360 }
1361 reslen += len;
1362
1363 ext = ext->next;
1364 }
1365 }
1366 /* clean up */
1367 ext = firstExt;
1368 while (ext != NULL) {
1369 ExtensionListEntry *tmpExt = ext->next;
1370 uprv_free(ext);
1371 ext = tmpExt;
1372 }
1373
1374 uenum_close(keywordEnum);
1375
1376 if (U_FAILURE(*status)) {
1377 return 0;
1378 }
1379 }
1380
1381 return u_terminateChars(appendAt, capacity, reslen, status);
1382 }
1383
1384 /**
1385 * Append keywords parsed from LDML extension value
1386 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
1387 * Note: char* buf is used for storing keywords
1388 */
1389 static void
1390 _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) {
1391 const char *p, *pNext, *pSep;
1392 const char *pBcpKey, *pBcpType;
1393 const char *pKey, *pType;
1394 int32_t bcpKeyLen = 0, bcpTypeLen;
1395 ExtensionListEntry *kwd, *nextKwd;
1396 ExtensionListEntry *kwdFirst = NULL;
1397 int32_t bufIdx = 0;
1398 int32_t len;
1399
1400 pNext = ldmlext;
1401 pBcpKey = pBcpType = NULL;
1402 while (pNext) {
1403 p = pSep = pNext;
1404
1405 /* locate next separator char */
1406 while (*pSep) {
1407 if (*pSep == SEP) {
1408 break;
1409 }
1410 pSep++;
1411 }
1412 if (*pSep == 0) {
1413 /* last subtag */
1414 pNext = NULL;
1415 } else {
1416 pNext = pSep + 1;
1417 }
1418
1419 if (pBcpKey == NULL) {
1420 pBcpKey = p;
1421 bcpKeyLen = (int32_t)(pSep - p);
1422 } else {
1423 pBcpType = p;
1424 bcpTypeLen = (int32_t)(pSep - p);
1425
1426 /* BCP key to locale key */
1427 len = _bcp47ToLDMLKey(pBcpKey, bcpKeyLen, buf + bufIdx, bufSize - bufIdx - 1, status);
1428 if (U_FAILURE(*status)) {
1429 goto cleanup;
1430 }
1431 pKey = buf + bufIdx;
1432 bufIdx += len;
1433 *(buf + bufIdx) = 0;
1434 bufIdx++;
1435
1436 /* BCP type to locale type */
1437 len = _bcp47ToLDMLType(pKey, -1, pBcpType, bcpTypeLen, buf + bufIdx, bufSize - bufIdx - 1, status);
1438 if (U_FAILURE(*status)) {
1439 goto cleanup;
1440 }
1441 pType = buf + bufIdx;
1442 bufIdx += len;
1443 *(buf + bufIdx) = 0;
1444 bufIdx++;
1445
1446 /* Special handling for u-va-posix, since we want to treat this as a variant, not */
1447 /* as a keyword. */
1448
1449 if ( !uprv_strcmp(pKey,POSIX_KEY) && !uprv_strcmp(pType,POSIX_VALUE) ) {
1450 *posixVariant = TRUE;
1451 } else {
1452 /* create an ExtensionListEntry for this keyword */
1453 kwd = uprv_malloc(sizeof(ExtensionListEntry));
1454 if (kwd == NULL) {
1455 *status = U_MEMORY_ALLOCATION_ERROR;
1456 goto cleanup;
1457 }
1458
1459 kwd->key = pKey;
1460 kwd->value = pType;
1461
1462 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1463 *status = U_ILLEGAL_ARGUMENT_ERROR;
1464 uprv_free(kwd);
1465 goto cleanup;
1466 }
1467 }
1468
1469 /* for next pair */
1470 pBcpKey = NULL;
1471 pBcpType = NULL;
1472 }
1473 }
1474
1475 if (pBcpKey != NULL) {
1476 *status = U_ILLEGAL_ARGUMENT_ERROR;
1477 goto cleanup;
1478 }
1479
1480 kwd = kwdFirst;
1481 while (kwd != NULL) {
1482 nextKwd = kwd->next;
1483 _addExtensionToList(appendTo, kwd, FALSE);
1484 kwd = nextKwd;
1485 }
1486
1487 return;
1488
1489 cleanup:
1490 kwd = kwdFirst;
1491 while (kwd != NULL) {
1492 nextKwd = kwd->next;
1493 uprv_free(kwd);
1494 kwd = nextKwd;
1495 }
1496 }
1497
1498
1499 static int32_t
1500 _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) {
1501 int32_t reslen = 0;
1502 int32_t i, n;
1503 int32_t len;
1504 ExtensionListEntry *kwdFirst = NULL;
1505 ExtensionListEntry *kwd;
1506 const char *key, *type;
1507 char kwdBuf[ULOC_KEYWORDS_CAPACITY];
1508 UBool posixVariant = FALSE;
1509
1510 if (U_FAILURE(*status)) {
1511 return 0;
1512 }
1513
1514 n = ultag_getExtensionsSize(langtag);
1515
1516 /* resolve locale keywords and reordering keys */
1517 for (i = 0; i < n; i++) {
1518 key = ultag_getExtensionKey(langtag, i);
1519 type = ultag_getExtensionValue(langtag, i);
1520 if (*key == LDMLEXT) {
1521 _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, sizeof(kwdBuf), &posixVariant, status);
1522 if (U_FAILURE(*status)) {
1523 break;
1524 }
1525 } else {
1526 kwd = uprv_malloc(sizeof(ExtensionListEntry));
1527 if (kwd == NULL) {
1528 *status = U_MEMORY_ALLOCATION_ERROR;
1529 break;
1530 }
1531 kwd->key = key;
1532 kwd->value = type;
1533 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1534 uprv_free(kwd);
1535 *status = U_ILLEGAL_ARGUMENT_ERROR;
1536 break;
1537 }
1538 }
1539 }
1540
1541 if (U_SUCCESS(*status)) {
1542 type = ultag_getPrivateUse(langtag);
1543 if ((int32_t)uprv_strlen(type) > 0) {
1544 /* add private use as a keyword */
1545 kwd = uprv_malloc(sizeof(ExtensionListEntry));
1546 if (kwd == NULL) {
1547 *status = U_MEMORY_ALLOCATION_ERROR;
1548 } else {
1549 kwd->key = PRIVATEUSE_KEY;
1550 kwd->value = type;
1551 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1552 uprv_free(kwd);
1553 *status = U_ILLEGAL_ARGUMENT_ERROR;
1554 }
1555 }
1556 }
1557 }
1558
1559 /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
1560
1561 if (U_SUCCESS(*status) && posixVariant) {
1562 len = (int32_t) uprv_strlen(_POSIX);
1563 if (reslen < capacity) {
1564 uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen));
1565 }
1566 reslen += len;
1567 }
1568
1569 if (U_SUCCESS(*status) && kwdFirst != NULL) {
1570 /* write out the sorted keywords */
1571 kwd = kwdFirst;
1572 while (kwd != NULL) {
1573 if (reslen < capacity) {
1574 if (kwd == kwdFirst) {
1575 /* '@' */
1576 *(appendAt + reslen) = LOCALE_EXT_SEP;
1577 } else {
1578 /* ';' */
1579 *(appendAt + reslen) = LOCALE_KEYWORD_SEP;
1580 }
1581 }
1582 reslen++;
1583
1584 /* key */
1585 len = (int32_t)uprv_strlen(kwd->key);
1586 if (reslen < capacity) {
1587 uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
1588 }
1589 reslen += len;
1590
1591 /* '=' */
1592 if (reslen < capacity) {
1593 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
1594 }
1595 reslen++;
1596
1597 /* type */
1598 len = (int32_t)uprv_strlen(kwd->value);
1599 if (reslen < capacity) {
1600 uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
1601 }
1602 reslen += len;
1603
1604 kwd = kwd->next;
1605 }
1606 }
1607
1608 /* clean up */
1609 kwd = kwdFirst;
1610 while (kwd != NULL) {
1611 ExtensionListEntry *tmpKwd = kwd->next;
1612 uprv_free(kwd);
1613 kwd = tmpKwd;
1614 }
1615
1616 if (U_FAILURE(*status)) {
1617 return 0;
1618 }
1619
1620 return u_terminateChars(appendAt, capacity, reslen, status);
1621 }
1622
1623 /*
1624 * -------------------------------------------------
1625 *
1626 * ultag_ functions
1627 *
1628 * -------------------------------------------------
1629 */
1630
1631 /* Bit flags used by the parser */
1632 #define LANG 0x0001
1633 #define EXTL 0x0002
1634 #define SCRT 0x0004
1635 #define REGN 0x0008
1636 #define VART 0x0010
1637 #define EXTS 0x0020
1638 #define EXTV 0x0040
1639 #define PRIV 0x0080
1640
1641 static ULanguageTag*
1642 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
1643 ULanguageTag *t;
1644 char *tagBuf;
1645 int16_t next;
1646 char *pSubtag, *pNext, *pLastGoodPosition;
1647 int32_t subtagLen;
1648 int32_t extlangIdx;
1649 ExtensionListEntry *pExtension;
1650 char *pExtValueSubtag, *pExtValueSubtagEnd;
1651 int32_t i;
1652 UBool isLDMLExtension, reqLDMLType;
1653
1654 if (parsedLen != NULL) {
1655 *parsedLen = 0;
1656 }
1657
1658 if (U_FAILURE(*status)) {
1659 return NULL;
1660 }
1661
1662 if (tagLen < 0) {
1663 tagLen = (int32_t)uprv_strlen(tag);
1664 }
1665
1666 /* copy the entire string */
1667 tagBuf = (char*)uprv_malloc(tagLen + 1);
1668 if (tagBuf == NULL) {
1669 *status = U_MEMORY_ALLOCATION_ERROR;
1670 return NULL;
1671 }
1672 uprv_memcpy(tagBuf, tag, tagLen);
1673 *(tagBuf + tagLen) = 0;
1674
1675 /* create a ULanguageTag */
1676 t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag));
1677 _initializeULanguageTag(t);
1678 t->buf = tagBuf;
1679 if (t == NULL) {
1680 uprv_free(tagBuf);
1681 *status = U_MEMORY_ALLOCATION_ERROR;
1682 return NULL;
1683 }
1684
1685 if (tagLen < MINLEN) {
1686 /* the input tag is too short - return empty ULanguageTag */
1687 return t;
1688 }
1689
1690 /* check if the tag is grandfathered */
1691 for (i = 0; GRANDFATHERED[i] != NULL; i += 2) {
1692 if (T_CString_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
1693 /* a grandfathered tag is always longer than its preferred mapping */
1694 uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
1695 t->language = t->buf;
1696 if (parsedLen != NULL) {
1697 *parsedLen = tagLen;
1698 }
1699 return t;
1700 }
1701 }
1702
1703 /*
1704 * langtag = language
1705 * ["-" script]
1706 * ["-" region]
1707 * *("-" variant)
1708 * *("-" extension)
1709 * ["-" privateuse]
1710 */
1711
1712 next = LANG | PRIV;
1713 pNext = pLastGoodPosition = tagBuf;
1714 extlangIdx = 0;
1715 pExtension = NULL;
1716 pExtValueSubtag = NULL;
1717 pExtValueSubtagEnd = NULL;
1718 isLDMLExtension = FALSE;
1719 reqLDMLType = FALSE;
1720
1721 while (pNext) {
1722 char *pSep;
1723
1724 pSubtag = pNext;
1725
1726 /* locate next separator char */
1727 pSep = pSubtag;
1728 while (*pSep) {
1729 if (*pSep == SEP) {
1730 break;
1731 }
1732 pSep++;
1733 }
1734 if (*pSep == 0) {
1735 /* last subtag */
1736 pNext = NULL;
1737 } else {
1738 pNext = pSep + 1;
1739 }
1740 subtagLen = (int32_t)(pSep - pSubtag);
1741
1742 if (next & LANG) {
1743 if (_isLanguageSubtag(pSubtag, subtagLen)) {
1744 *pSep = 0; /* terminate */
1745 t->language = T_CString_toLowerCase(pSubtag);
1746
1747 pLastGoodPosition = pSep;
1748 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
1749 continue;
1750 }
1751 }
1752 if (next & EXTL) {
1753 if (_isExtlangSubtag(pSubtag, subtagLen)) {
1754 *pSep = 0;
1755 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
1756
1757 pLastGoodPosition = pSep;
1758 if (extlangIdx < 3) {
1759 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
1760 } else {
1761 next = SCRT | REGN | VART | EXTS | PRIV;
1762 }
1763 continue;
1764 }
1765 }
1766 if (next & SCRT) {
1767 if (_isScriptSubtag(pSubtag, subtagLen)) {
1768 char *p = pSubtag;
1769
1770 *pSep = 0;
1771
1772 /* to title case */
1773 *p = uprv_toupper(*p);
1774 p++;
1775 for (; *p; p++) {
1776 *p = uprv_tolower(*p);
1777 }
1778
1779 t->script = pSubtag;
1780
1781 pLastGoodPosition = pSep;
1782 next = REGN | VART | EXTS | PRIV;
1783 continue;
1784 }
1785 }
1786 if (next & REGN) {
1787 if (_isRegionSubtag(pSubtag, subtagLen)) {
1788 *pSep = 0;
1789 t->region = T_CString_toUpperCase(pSubtag);
1790
1791 pLastGoodPosition = pSep;
1792 next = VART | EXTS | PRIV;
1793 continue;
1794 }
1795 }
1796 if (next & VART) {
1797 if (_isVariantSubtag(pSubtag, subtagLen)) {
1798 VariantListEntry *var;
1799 UBool isAdded;
1800
1801 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
1802 if (var == NULL) {
1803 *status = U_MEMORY_ALLOCATION_ERROR;
1804 goto error;
1805 }
1806 *pSep = 0;
1807 var->variant = T_CString_toUpperCase(pSubtag);
1808 isAdded = _addVariantToList(&(t->variants), var);
1809 if (!isAdded) {
1810 /* duplicated variant entry */
1811 uprv_free(var);
1812 break;
1813 }
1814 pLastGoodPosition = pSep;
1815 next = VART | EXTS | PRIV;
1816 continue;
1817 }
1818 }
1819 if (next & EXTS) {
1820 if (_isExtensionSingleton(pSubtag, subtagLen)) {
1821 if (pExtension != NULL) {
1822 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
1823 /* the previous extension is incomplete */
1824 uprv_free(pExtension);
1825 pExtension = NULL;
1826 break;
1827 }
1828
1829 /* terminate the previous extension value */
1830 *pExtValueSubtagEnd = 0;
1831 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
1832
1833 /* insert the extension to the list */
1834 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
1835 pLastGoodPosition = pExtValueSubtagEnd;
1836 } else {
1837 /* stop parsing here */
1838 uprv_free(pExtension);
1839 pExtension = NULL;
1840 break;
1841 }
1842
1843 if (isLDMLExtension && reqLDMLType) {
1844 /* incomplete LDML extension key and type pair */
1845 pExtension = NULL;
1846 break;
1847 }
1848 }
1849
1850 isLDMLExtension = (uprv_tolower(*pSubtag) == LDMLEXT);
1851
1852 /* create a new extension */
1853 pExtension = uprv_malloc(sizeof(ExtensionListEntry));
1854 if (pExtension == NULL) {
1855 *status = U_MEMORY_ALLOCATION_ERROR;
1856 goto error;
1857 }
1858 *pSep = 0;
1859 pExtension->key = T_CString_toLowerCase(pSubtag);
1860 pExtension->value = NULL; /* will be set later */
1861
1862 /*
1863 * reset the start and the end location of extension value
1864 * subtags for this extension
1865 */
1866 pExtValueSubtag = NULL;
1867 pExtValueSubtagEnd = NULL;
1868
1869 next = EXTV;
1870 continue;
1871 }
1872 }
1873 if (next & EXTV) {
1874 if (_isExtensionSubtag(pSubtag, subtagLen)) {
1875 if (isLDMLExtension) {
1876 if (reqLDMLType) {
1877 /* already saw an LDML key */
1878 if (!_isLDMLType(pSubtag, subtagLen)) {
1879 /* stop parsing here and let the valid LDML extension key/type
1880 pairs processed by the code out of this while loop */
1881 break;
1882 }
1883 pExtValueSubtagEnd = pSep;
1884 reqLDMLType = FALSE;
1885 next = EXTS | EXTV | PRIV;
1886 } else {
1887 /* LDML key */
1888 if (!_isLDMLKey(pSubtag, subtagLen)) {
1889 /* stop parsing here and let the valid LDML extension key/type
1890 pairs processed by the code out of this while loop */
1891 break;
1892 }
1893 reqLDMLType = TRUE;
1894 next = EXTV;
1895 }
1896 } else {
1897 /* Mark the end of this subtag */
1898 pExtValueSubtagEnd = pSep;
1899 next = EXTS | EXTV | PRIV;
1900 }
1901
1902 if (pExtValueSubtag == NULL) {
1903 /* if the start postion of this extension's value is not yet,
1904 this one is the first value subtag */
1905 pExtValueSubtag = pSubtag;
1906 }
1907 continue;
1908 }
1909 }
1910 if (next & PRIV) {
1911 if (uprv_tolower(*pSubtag) == PRIVATEUSE) {
1912 char *pPrivuseVal;
1913
1914 if (pExtension != NULL) {
1915 /* Process the last extension */
1916 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
1917 /* the previous extension is incomplete */
1918 uprv_free(pExtension);
1919 pExtension = NULL;
1920 break;
1921 } else {
1922 /* terminate the previous extension value */
1923 *pExtValueSubtagEnd = 0;
1924 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
1925
1926 /* insert the extension to the list */
1927 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
1928 pLastGoodPosition = pExtValueSubtagEnd;
1929 pExtension = NULL;
1930 } else {
1931 /* stop parsing here */
1932 uprv_free(pExtension);
1933 pExtension = NULL;
1934 break;
1935 }
1936 }
1937 }
1938
1939 /* The rest of part will be private use value subtags */
1940 if (pNext == NULL) {
1941 /* empty private use subtag */
1942 break;
1943 }
1944 /* back up the private use value start position */
1945 pPrivuseVal = pNext;
1946
1947 /* validate private use value subtags */
1948 while (pNext) {
1949 pSubtag = pNext;
1950 pSep = pSubtag;
1951 while (*pSep) {
1952 if (*pSep == SEP) {
1953 break;
1954 }
1955 pSep++;
1956 }
1957 if (*pSep == 0) {
1958 /* last subtag */
1959 pNext = NULL;
1960 } else {
1961 pNext = pSep + 1;
1962 }
1963 subtagLen = (int32_t)(pSep - pSubtag);
1964
1965 if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
1966 pLastGoodPosition = pSep;
1967 } else {
1968 break;
1969 }
1970 }
1971 if (pLastGoodPosition - pPrivuseVal > 0) {
1972 *pLastGoodPosition = 0;
1973 t->privateuse = T_CString_toLowerCase(pPrivuseVal);
1974 }
1975 /* No more subtags, exiting the parse loop */
1976 break;
1977 }
1978 break;
1979 }
1980 /* If we fell through here, it means this subtag is illegal - quit parsing */
1981 break;
1982 }
1983
1984 if (pExtension != NULL) {
1985 /* Process the last extension */
1986 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
1987 /* the previous extension is incomplete */
1988 uprv_free(pExtension);
1989 } else {
1990 /* terminate the previous extension value */
1991 *pExtValueSubtagEnd = 0;
1992 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
1993 /* insert the extension to the list */
1994 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
1995 pLastGoodPosition = pExtValueSubtagEnd;
1996 } else {
1997 uprv_free(pExtension);
1998 }
1999 }
2000 }
2001
2002 if (parsedLen != NULL) {
2003 *parsedLen = (int32_t)(pLastGoodPosition - t->buf);
2004 }
2005
2006 return t;
2007
2008 error:
2009 uprv_free(t);
2010 return NULL;
2011 }
2012
2013 static void
2014 ultag_close(ULanguageTag* langtag) {
2015
2016 if (langtag == NULL) {
2017 return;
2018 }
2019
2020 uprv_free(langtag->buf);
2021
2022 if (langtag->variants) {
2023 VariantListEntry *curVar = langtag->variants;
2024 while (curVar) {
2025 VariantListEntry *nextVar = curVar->next;
2026 uprv_free(curVar);
2027 curVar = nextVar;
2028 }
2029 }
2030
2031 if (langtag->extensions) {
2032 ExtensionListEntry *curExt = langtag->extensions;
2033 while (curExt) {
2034 ExtensionListEntry *nextExt = curExt->next;
2035 uprv_free(curExt);
2036 curExt = nextExt;
2037 }
2038 }
2039
2040 uprv_free(langtag);
2041 }
2042
2043 static const char*
2044 ultag_getLanguage(const ULanguageTag* langtag) {
2045 return langtag->language;
2046 }
2047
2048 #if 0
2049 static const char*
2050 ultag_getJDKLanguage(const ULanguageTag* langtag) {
2051 int32_t i;
2052 for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
2053 if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
2054 return DEPRECATEDLANGS[i + 1];
2055 }
2056 }
2057 return langtag->language;
2058 }
2059 #endif
2060
2061 static const char*
2062 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
2063 if (idx >= 0 && idx < MAXEXTLANG) {
2064 return langtag->extlang[idx];
2065 }
2066 return NULL;
2067 }
2068
2069 static int32_t
2070 ultag_getExtlangSize(const ULanguageTag* langtag) {
2071 int32_t size = 0;
2072 int32_t i;
2073 for (i = 0; i < MAXEXTLANG; i++) {
2074 if (langtag->extlang[i]) {
2075 size++;
2076 }
2077 }
2078 return size;
2079 }
2080
2081 static const char*
2082 ultag_getScript(const ULanguageTag* langtag) {
2083 return langtag->script;
2084 }
2085
2086 static const char*
2087 ultag_getRegion(const ULanguageTag* langtag) {
2088 return langtag->region;
2089 }
2090
2091 static const char*
2092 ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
2093 const char *var = NULL;
2094 VariantListEntry *cur = langtag->variants;
2095 int32_t i = 0;
2096 while (cur) {
2097 if (i == idx) {
2098 var = cur->variant;
2099 break;
2100 }
2101 cur = cur->next;
2102 i++;
2103 }
2104 return var;
2105 }
2106
2107 static int32_t
2108 ultag_getVariantsSize(const ULanguageTag* langtag) {
2109 int32_t size = 0;
2110 VariantListEntry *cur = langtag->variants;
2111 while (TRUE) {
2112 if (cur == NULL) {
2113 break;
2114 }
2115 size++;
2116 cur = cur->next;
2117 }
2118 return size;
2119 }
2120
2121 static const char*
2122 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
2123 const char *key = NULL;
2124 ExtensionListEntry *cur = langtag->extensions;
2125 int32_t i = 0;
2126 while (cur) {
2127 if (i == idx) {
2128 key = cur->key;
2129 break;
2130 }
2131 cur = cur->next;
2132 i++;
2133 }
2134 return key;
2135 }
2136
2137 static const char*
2138 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
2139 const char *val = NULL;
2140 ExtensionListEntry *cur = langtag->extensions;
2141 int32_t i = 0;
2142 while (cur) {
2143 if (i == idx) {
2144 val = cur->value;
2145 break;
2146 }
2147 cur = cur->next;
2148 i++;
2149 }
2150 return val;
2151 }
2152
2153 static int32_t
2154 ultag_getExtensionsSize(const ULanguageTag* langtag) {
2155 int32_t size = 0;
2156 ExtensionListEntry *cur = langtag->extensions;
2157 while (TRUE) {
2158 if (cur == NULL) {
2159 break;
2160 }
2161 size++;
2162 cur = cur->next;
2163 }
2164 return size;
2165 }
2166
2167 static const char*
2168 ultag_getPrivateUse(const ULanguageTag* langtag) {
2169 return langtag->privateuse;
2170 }
2171
2172 #if 0
2173 static const char*
2174 ultag_getGrandfathered(const ULanguageTag* langtag) {
2175 return langtag->grandfathered;
2176 }
2177 #endif
2178
2179
2180 /*
2181 * -------------------------------------------------
2182 *
2183 * Locale/BCP47 conversion APIs, exposed as uloc_*
2184 *
2185 * -------------------------------------------------
2186 */
2187 U_DRAFT int32_t U_EXPORT2
2188 uloc_toLanguageTag(const char* localeID,
2189 char* langtag,
2190 int32_t langtagCapacity,
2191 UBool strict,
2192 UErrorCode* status) {
2193 /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
2194 char canonical[256];
2195 int32_t reslen = 0;
2196 UErrorCode tmpStatus = U_ZERO_ERROR;
2197 UBool hadPosix = FALSE;
2198 const char* pKeywordStart;
2199
2200 /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
2201 canonical[0] = 0;
2202 if (uprv_strlen(localeID) > 0) {
2203 uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
2204 if (tmpStatus != U_ZERO_ERROR) {
2205 *status = U_ILLEGAL_ARGUMENT_ERROR;
2206 return 0;
2207 }
2208 }
2209
2210 /* For handling special case - private use only tag */
2211 pKeywordStart = locale_getKeywordsStart(canonical);
2212 if (pKeywordStart == canonical) {
2213 UEnumeration *kwdEnum;
2214 int kwdCnt = 0;
2215 UBool done = FALSE;
2216
2217 kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus);
2218 if (kwdEnum != NULL) {
2219 kwdCnt = uenum_count(kwdEnum, &tmpStatus);
2220 if (kwdCnt == 1) {
2221 const char *key;
2222 int32_t len = 0;
2223
2224 key = uenum_next(kwdEnum, &len, &tmpStatus);
2225 if (len == 1 && *key == PRIVATEUSE) {
2226 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
2227 buf[0] = PRIVATEUSE;
2228 buf[1] = SEP;
2229 len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus);
2230 if (U_SUCCESS(tmpStatus)) {
2231 if (_isPrivateuseValueSubtags(&buf[2], len)) {
2232 /* return private use only tag */
2233 reslen = len + 2;
2234 uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity));
2235 u_terminateChars(langtag, langtagCapacity, reslen, status);
2236 done = TRUE;
2237 } else if (strict) {
2238 *status = U_ILLEGAL_ARGUMENT_ERROR;
2239 done = TRUE;
2240 }
2241 /* if not strict mode, then "und" will be returned */
2242 } else {
2243 *status = U_ILLEGAL_ARGUMENT_ERROR;
2244 done = TRUE;
2245 }
2246 }
2247 }
2248 uenum_close(kwdEnum);
2249 if (done) {
2250 return reslen;
2251 }
2252 }
2253 }
2254
2255 reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
2256 reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
2257 reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
2258 reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
2259 reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2260
2261 return reslen;
2262 }
2263
2264
2265 U_DRAFT int32_t U_EXPORT2
2266 uloc_forLanguageTag(const char* langtag,
2267 char* localeID,
2268 int32_t localeIDCapacity,
2269 int32_t* parsedLength,
2270 UErrorCode* status) {
2271 ULanguageTag *lt;
2272 int32_t reslen = 0;
2273 const char *subtag, *p;
2274 int32_t len;
2275 int32_t i, n;
2276 UBool noRegion = TRUE;
2277
2278 lt = ultag_parse(langtag, -1, parsedLength, status);
2279 if (U_FAILURE(*status)) {
2280 return 0;
2281 }
2282
2283 /* language */
2284 subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt);
2285 if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
2286 len = (int32_t)uprv_strlen(subtag);
2287 if (len > 0) {
2288 if (reslen < localeIDCapacity) {
2289 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen));
2290 }
2291 reslen += len;
2292 }
2293 }
2294
2295 /* script */
2296 subtag = ultag_getScript(lt);
2297 len = (int32_t)uprv_strlen(subtag);
2298 if (len > 0) {
2299 if (reslen < localeIDCapacity) {
2300 *(localeID + reslen) = LOCALE_SEP;
2301 }
2302 reslen++;
2303
2304 /* write out the script in title case */
2305 p = subtag;
2306 while (*p) {
2307 if (reslen < localeIDCapacity) {
2308 if (p == subtag) {
2309 *(localeID + reslen) = uprv_toupper(*p);
2310 } else {
2311 *(localeID + reslen) = *p;
2312 }
2313 }
2314 reslen++;
2315 p++;
2316 }
2317 }
2318
2319 /* region */
2320 subtag = ultag_getRegion(lt);
2321 len = (int32_t)uprv_strlen(subtag);
2322 if (len > 0) {
2323 if (reslen < localeIDCapacity) {
2324 *(localeID + reslen) = LOCALE_SEP;
2325 }
2326 reslen++;
2327 /* write out the retion in upper case */
2328 p = subtag;
2329 while (*p) {
2330 if (reslen < localeIDCapacity) {
2331 *(localeID + reslen) = uprv_toupper(*p);
2332 }
2333 reslen++;
2334 p++;
2335 }
2336 noRegion = FALSE;
2337 }
2338
2339 /* variants */
2340 n = ultag_getVariantsSize(lt);
2341 if (n > 0) {
2342 if (noRegion) {
2343 if (reslen < localeIDCapacity) {
2344 *(localeID + reslen) = LOCALE_SEP;
2345 }
2346 reslen++;
2347 }
2348
2349 for (i = 0; i < n; i++) {
2350 subtag = ultag_getVariant(lt, i);
2351 if (reslen < localeIDCapacity) {
2352 *(localeID + reslen) = LOCALE_SEP;
2353 }
2354 reslen++;
2355 /* write out the variant in upper case */
2356 p = subtag;
2357 while (*p) {
2358 if (reslen < localeIDCapacity) {
2359 *(localeID + reslen) = uprv_toupper(*p);
2360 }
2361 reslen++;
2362 p++;
2363 }
2364 }
2365 }
2366
2367 /* keywords */
2368 n = ultag_getExtensionsSize(lt);
2369 subtag = ultag_getPrivateUse(lt);
2370 if (n > 0 || uprv_strlen(subtag) > 0) {
2371 if (reslen == 0 && n > 0) {
2372 /* need a language */
2373 if (reslen < localeIDCapacity) {
2374 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
2375 }
2376 reslen += LANG_UND_LEN;
2377 }
2378 len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
2379 reslen += len;
2380 }
2381
2382 ultag_close(lt);
2383 return u_terminateChars(localeID, localeIDCapacity, reslen, status);
2384 }
2385
2386