2 **********************************************************************
3 * Copyright (C) 2009-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
8 #include "unicode/utypes.h"
9 #include "unicode/ures.h"
10 #include "unicode/putil.h"
11 #include "unicode/uloc.h"
19 /* struct holding a single variant */
20 typedef struct VariantListEntry
{
22 struct VariantListEntry
*next
;
25 /* struct holding a single extension */
26 typedef struct ExtensionListEntry
{
29 struct ExtensionListEntry
*next
;
33 typedef struct ULanguageTag
{
34 char *buf
; /* holding parsed subtags */
36 const char *extlang
[MAXEXTLANG
];
39 VariantListEntry
*variants
;
40 ExtensionListEntry
*extensions
;
41 const char *privateuse
;
42 const char *grandfathered
;
47 #define PRIVATEUSE 'x'
50 #define LOCALE_SEP '_'
51 #define LOCALE_EXT_SEP '@'
52 #define LOCALE_KEYWORD_SEP ';'
53 #define LOCALE_KEY_TYPE_SEP '='
55 #define ISALPHA(c) (((c)>='A' && (c)<='Z') || ((c)>='a' && (c)<='z'))
56 #define ISNUMERIC(c) ((c)>='0' && (c)<='9')
58 static const char* EMPTY
= "";
59 static const char* LANG_UND
= "und";
60 static const char* PRIVATEUSE_KEY
= "x";
61 static const char* _POSIX
= "_POSIX";
62 static const char* POSIX_KEY
= "va";
63 static const char* POSIX_VALUE
= "posix";
65 #define LANG_UND_LEN 3
67 static const char* GRANDFATHERED
[] = {
68 /* grandfathered preferred */
98 static const char* DEPRECATEDLANGS
[] = {
107 * -------------------------------------------------
109 * These ultag_ functions may be exposed as APIs later
111 * -------------------------------------------------
115 ultag_parse(const char* tag
, int32_t tagLen
, int32_t* parsedLen
, UErrorCode
* status
);
118 ultag_close(ULanguageTag
* langtag
);
121 ultag_getLanguage(const ULanguageTag
* langtag
);
125 ultag_getJDKLanguage(const ULanguageTag
* langtag
);
129 ultag_getExtlang(const ULanguageTag
* langtag
, int32_t idx
);
132 ultag_getExtlangSize(const ULanguageTag
* langtag
);
135 ultag_getScript(const ULanguageTag
* langtag
);
138 ultag_getRegion(const ULanguageTag
* langtag
);
141 ultag_getVariant(const ULanguageTag
* langtag
, int32_t idx
);
144 ultag_getVariantsSize(const ULanguageTag
* langtag
);
147 ultag_getExtensionKey(const ULanguageTag
* langtag
, int32_t idx
);
150 ultag_getExtensionValue(const ULanguageTag
* langtag
, int32_t idx
);
153 ultag_getExtensionsSize(const ULanguageTag
* langtag
);
156 ultag_getPrivateUse(const ULanguageTag
* langtag
);
160 ultag_getGrandfathered(const ULanguageTag
* langtag
);
164 * -------------------------------------------------
166 * Language subtag syntax validation functions
168 * -------------------------------------------------
172 _isAlphaString(const char* s
, int32_t len
) {
174 for (i
= 0; i
< len
; i
++) {
175 if (!ISALPHA(*(s
+ i
))) {
183 _isNumericString(const char* s
, int32_t len
) {
185 for (i
= 0; i
< len
; i
++) {
186 if (!ISNUMERIC(*(s
+ i
))) {
194 _isAlphaNumericString(const char* s
, int32_t len
) {
196 for (i
= 0; i
< len
; i
++) {
197 if (!ISALPHA(*(s
+ i
)) && !ISNUMERIC(*(s
+ i
))) {
205 _isLanguageSubtag(const char* s
, int32_t len
) {
207 * language = 2*3ALPHA ; shortest ISO 639 code
208 * ["-" extlang] ; sometimes followed by
209 * ; extended language subtags
210 * / 4ALPHA ; or reserved for future use
211 * / 5*8ALPHA ; or registered language subtag
214 len
= (int32_t)uprv_strlen(s
);
216 if (len
>= 2 && len
<= 8 && _isAlphaString(s
, len
)) {
223 _isExtlangSubtag(const char* s
, int32_t len
) {
225 * extlang = 3ALPHA ; selected ISO 639 codes
226 * *2("-" 3ALPHA) ; permanently reserved
229 len
= (int32_t)uprv_strlen(s
);
231 if (len
== 3 && _isAlphaString(s
, len
)) {
238 _isScriptSubtag(const char* s
, int32_t len
) {
240 * script = 4ALPHA ; ISO 15924 code
243 len
= (int32_t)uprv_strlen(s
);
245 if (len
== 4 && _isAlphaString(s
, len
)) {
252 _isRegionSubtag(const char* s
, int32_t len
) {
254 * region = 2ALPHA ; ISO 3166-1 code
255 * / 3DIGIT ; UN M.49 code
258 len
= (int32_t)uprv_strlen(s
);
260 if (len
== 2 && _isAlphaString(s
, len
)) {
263 if (len
== 3 && _isNumericString(s
, len
)) {
270 _isVariantSubtag(const char* s
, int32_t len
) {
272 * variant = 5*8alphanum ; registered variants
273 * / (DIGIT 3alphanum)
276 len
= (int32_t)uprv_strlen(s
);
278 if (len
>= 5 && len
<= 8 && _isAlphaString(s
, len
)) {
281 if (len
== 4 && ISNUMERIC(*s
) && _isAlphaNumericString(s
+ 1, 3)) {
288 _isExtensionSingleton(const char* s
, int32_t len
) {
290 * extension = singleton 1*("-" (2*8alphanum))
293 len
= (int32_t)uprv_strlen(s
);
295 if (len
== 1 && ISALPHA(*s
) && (uprv_tolower(*s
) != PRIVATEUSE
)) {
302 _isExtensionSubtag(const char* s
, int32_t len
) {
304 * extension = singleton 1*("-" (2*8alphanum))
307 len
= (int32_t)uprv_strlen(s
);
309 if (len
>= 2 && len
<= 8 && _isAlphaNumericString(s
, len
)) {
316 _isExtensionSubtags(const char* s
, int32_t len
) {
318 const char *pSubtag
= NULL
;
321 len
= (int32_t)uprv_strlen(s
);
324 while ((p
- s
) < len
) {
326 if (pSubtag
== NULL
) {
329 if (!_isExtensionSubtag(pSubtag
, (int32_t)(p
- pSubtag
))) {
333 } else if (pSubtag
== NULL
) {
338 if (pSubtag
== NULL
) {
341 return _isExtensionSubtag(pSubtag
, (int32_t)(p
- pSubtag
));
345 _isPrivateuseValueSubtag(const char* s
, int32_t len
) {
347 * privateuse = "x" 1*("-" (1*8alphanum))
350 len
= (int32_t)uprv_strlen(s
);
352 if (len
>= 1 && len
<= 8 && _isAlphaNumericString(s
, len
)) {
359 _isPrivateuseValueSubtags(const char* s
, int32_t len
) {
361 const char *pSubtag
= NULL
;
364 len
= (int32_t)uprv_strlen(s
);
367 while ((p
- s
) < len
) {
369 if (pSubtag
== NULL
) {
372 if (!_isPrivateuseValueSubtag(pSubtag
, (int32_t)(p
- pSubtag
))) {
376 } else if (pSubtag
== NULL
) {
381 if (pSubtag
== NULL
) {
384 return _isPrivateuseValueSubtag(pSubtag
, (int32_t)(p
- pSubtag
));
388 _isLDMLKey(const char* s
, int32_t len
) {
390 len
= (int32_t)uprv_strlen(s
);
392 if (len
== 2 && _isAlphaNumericString(s
, len
)) {
399 _isLDMLType(const char* s
, int32_t len
) {
401 len
= (int32_t)uprv_strlen(s
);
403 if (len
>= 3 && len
<= 8 && _isAlphaNumericString(s
, len
)) {
410 * -------------------------------------------------
414 * -------------------------------------------------
418 _addVariantToList(VariantListEntry
**first
, VariantListEntry
*var
) {
421 if (*first
== NULL
) {
425 VariantListEntry
*prev
, *cur
;
428 /* reorder variants in alphabetical order */
437 cmp
= uprv_compareInvCharsAsAscii(var
->variant
, cur
->variant
);
448 /* duplicated variant */
462 _addExtensionToList(ExtensionListEntry
**first
, ExtensionListEntry
*ext
, UBool localeToBCP
) {
465 if (*first
== NULL
) {
469 ExtensionListEntry
*prev
, *cur
;
472 /* reorder variants in alphabetical order */
482 /* special handling for locale to bcp conversion */
485 len
= (int32_t)uprv_strlen(ext
->key
);
486 curlen
= (int32_t)uprv_strlen(cur
->key
);
488 if (len
== 1 && curlen
== 1) {
489 if (*(ext
->key
) == *(cur
->key
)) {
491 } else if (*(ext
->key
) == PRIVATEUSE
) {
493 } else if (*(cur
->key
) == PRIVATEUSE
) {
496 cmp
= *(ext
->key
) - *(cur
->key
);
498 } else if (len
== 1) {
499 cmp
= *(ext
->key
) - LDMLEXT
;
500 } else if (curlen
== 1) {
501 cmp
= LDMLEXT
- *(cur
->key
);
503 cmp
= uprv_compareInvCharsAsAscii(ext
->key
, cur
->key
);
506 cmp
= uprv_compareInvCharsAsAscii(ext
->key
, cur
->key
);
518 /* duplicated extension key */
531 _initializeULanguageTag(ULanguageTag
* langtag
) {
536 langtag
->language
= EMPTY
;
537 for (i
= 0; i
< MAXEXTLANG
; i
++) {
538 langtag
->extlang
[i
] = NULL
;
541 langtag
->script
= EMPTY
;
542 langtag
->region
= EMPTY
;
544 langtag
->variants
= NULL
;
545 langtag
->extensions
= NULL
;
547 langtag
->grandfathered
= EMPTY
;
548 langtag
->privateuse
= EMPTY
;
551 #define KEYTYPEDATA "keyTypeData"
552 #define KEYMAP "keyMap"
553 #define TYPEMAP "typeMap"
554 #define TYPEALIAS "typeAlias"
555 #define MAX_BCP47_SUBTAG_LEN 9 /* including null terminator */
556 #define MAX_LDML_KEY_LEN 22
557 #define MAX_LDML_TYPE_LEN 32
560 _ldmlKeyToBCP47(const char* key
, int32_t keyLen
,
561 char* bcpKey
, int32_t bcpKeyCapacity
,
562 UErrorCode
*status
) {
564 char keyBuf
[MAX_LDML_KEY_LEN
];
565 char bcpKeyBuf
[MAX_BCP47_SUBTAG_LEN
];
566 int32_t resultLen
= 0;
568 UErrorCode tmpStatus
= U_ZERO_ERROR
;
569 const UChar
*uBcpKey
;
573 keyLen
= (int32_t)uprv_strlen(key
);
576 if (keyLen
>= sizeof(keyBuf
)) {
577 /* no known valid LDML key exceeding 21 */
578 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
582 uprv_memcpy(keyBuf
, key
, keyLen
);
586 for (i
= 0; i
< keyLen
; i
++) {
587 keyBuf
[i
] = uprv_tolower(keyBuf
[i
]);
590 rb
= ures_openDirect(NULL
, KEYTYPEDATA
, status
);
591 ures_getByKey(rb
, KEYMAP
, rb
, status
);
593 if (U_FAILURE(*status
)) {
598 uBcpKey
= ures_getStringByKey(rb
, keyBuf
, &bcpKeyLen
, &tmpStatus
);
599 if (U_SUCCESS(tmpStatus
)) {
600 u_UCharsToChars(uBcpKey
, bcpKeyBuf
, bcpKeyLen
);
601 bcpKeyBuf
[bcpKeyLen
] = 0;
602 resultLen
= bcpKeyLen
;
604 if (_isLDMLKey(key
, keyLen
)) {
605 uprv_memcpy(bcpKeyBuf
, key
, keyLen
);
606 bcpKeyBuf
[keyLen
] = 0;
609 /* mapping not availabe */
610 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
615 if (U_FAILURE(*status
)) {
619 uprv_memcpy(bcpKey
, bcpKeyBuf
, uprv_min(resultLen
, bcpKeyCapacity
));
620 return u_terminateChars(bcpKey
, bcpKeyCapacity
, resultLen
, status
);
624 _bcp47ToLDMLKey(const char* bcpKey
, int32_t bcpKeyLen
,
625 char* key
, int32_t keyCapacity
,
626 UErrorCode
*status
) {
628 char bcpKeyBuf
[MAX_BCP47_SUBTAG_LEN
];
629 int32_t resultLen
= 0;
631 const char *resKey
= NULL
;
632 UResourceBundle
*mapData
;
635 bcpKeyLen
= (int32_t)uprv_strlen(bcpKey
);
638 if (bcpKeyLen
>= sizeof(bcpKeyBuf
)) {
639 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
643 uprv_memcpy(bcpKeyBuf
, bcpKey
, bcpKeyLen
);
644 bcpKeyBuf
[bcpKeyLen
] = 0;
647 for (i
= 0; i
< bcpKeyLen
; i
++) {
648 bcpKeyBuf
[i
] = uprv_tolower(bcpKeyBuf
[i
]);
651 rb
= ures_openDirect(NULL
, KEYTYPEDATA
, status
);
652 ures_getByKey(rb
, KEYMAP
, rb
, status
);
653 if (U_FAILURE(*status
)) {
658 mapData
= ures_getNextResource(rb
, NULL
, status
);
659 while (U_SUCCESS(*status
)) {
660 const UChar
*uBcpKey
;
661 char tmpBcpKeyBuf
[MAX_BCP47_SUBTAG_LEN
];
662 int32_t tmpBcpKeyLen
;
664 uBcpKey
= ures_getString(mapData
, &tmpBcpKeyLen
, status
);
665 if (U_FAILURE(*status
)) {
668 u_UCharsToChars(uBcpKey
, tmpBcpKeyBuf
, tmpBcpKeyLen
);
669 tmpBcpKeyBuf
[tmpBcpKeyLen
] = 0;
670 if (uprv_compareInvCharsAsAscii(bcpKeyBuf
, tmpBcpKeyBuf
) == 0) {
671 /* found a matching BCP47 key */
672 resKey
= ures_getKey(mapData
);
673 resultLen
= (int32_t)uprv_strlen(resKey
);
676 if (!ures_hasNext(rb
)) {
679 ures_getNextResource(rb
, mapData
, status
);
684 if (U_FAILURE(*status
)) {
688 if (resKey
== NULL
) {
690 resultLen
= bcpKeyLen
;
693 uprv_memcpy(key
, resKey
, uprv_min(resultLen
, keyCapacity
));
694 return u_terminateChars(key
, keyCapacity
, resultLen
, status
);
698 _ldmlTypeToBCP47(const char* key
, int32_t keyLen
,
699 const char* type
, int32_t typeLen
,
700 char* bcpType
, int32_t bcpTypeCapacity
,
701 UErrorCode
*status
) {
702 UResourceBundle
*rb
, *keyTypeData
, *typeMapForKey
;
703 char keyBuf
[MAX_LDML_KEY_LEN
];
704 char typeBuf
[MAX_LDML_TYPE_LEN
];
705 char bcpTypeBuf
[MAX_BCP47_SUBTAG_LEN
];
706 int32_t resultLen
= 0;
708 UErrorCode tmpStatus
= U_ZERO_ERROR
;
709 const UChar
*uBcpType
, *uCanonicalType
;
710 int32_t bcpTypeLen
, canonicalTypeLen
;
711 UBool isTimezone
= FALSE
;
714 keyLen
= (int32_t)uprv_strlen(key
);
716 if (keyLen
>= sizeof(keyBuf
)) {
717 /* no known valid LDML key exceeding 21 */
718 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
721 uprv_memcpy(keyBuf
, key
, keyLen
);
725 for (i
= 0; i
< keyLen
; i
++) {
726 keyBuf
[i
] = uprv_tolower(keyBuf
[i
]);
728 if (uprv_compareInvCharsAsAscii(keyBuf
, "timezone") == 0) {
733 typeLen
= (int32_t)uprv_strlen(type
);
735 if (typeLen
>= sizeof(typeBuf
)) {
736 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
741 /* replace '/' with ':' */
742 for (i
= 0; i
< typeLen
; i
++) {
743 if (*(type
+ i
) == '/') {
746 typeBuf
[i
] = *(type
+ i
);
749 typeBuf
[typeLen
] = 0;
753 keyTypeData
= ures_openDirect(NULL
, KEYTYPEDATA
, status
);
754 rb
= ures_getByKey(keyTypeData
, TYPEMAP
, NULL
, status
);
755 if (U_FAILURE(*status
)) {
757 ures_close(keyTypeData
);
761 typeMapForKey
= ures_getByKey(rb
, keyBuf
, NULL
, &tmpStatus
);
762 uBcpType
= ures_getStringByKey(typeMapForKey
, type
, &bcpTypeLen
, &tmpStatus
);
763 if (U_SUCCESS(tmpStatus
)) {
764 u_UCharsToChars(uBcpType
, bcpTypeBuf
, bcpTypeLen
);
765 resultLen
= bcpTypeLen
;
766 } else if (tmpStatus
== U_MISSING_RESOURCE_ERROR
) {
767 /* is this type alias? */
768 tmpStatus
= U_ZERO_ERROR
;
769 ures_getByKey(keyTypeData
, TYPEALIAS
, rb
, &tmpStatus
);
770 ures_getByKey(rb
, keyBuf
, rb
, &tmpStatus
);
771 uCanonicalType
= ures_getStringByKey(rb
, type
, &canonicalTypeLen
, &tmpStatus
);
772 if (U_SUCCESS(tmpStatus
)) {
773 u_UCharsToChars(uCanonicalType
, typeBuf
, canonicalTypeLen
);
775 /* replace '/' with ':' */
776 for (i
= 0; i
< canonicalTypeLen
; i
++) {
777 if (typeBuf
[i
] == '/') {
782 typeBuf
[canonicalTypeLen
] = 0;
784 /* look up the canonical type */
785 uBcpType
= ures_getStringByKey(typeMapForKey
, typeBuf
, &bcpTypeLen
, &tmpStatus
);
786 if (U_SUCCESS(tmpStatus
)) {
787 u_UCharsToChars(uBcpType
, bcpTypeBuf
, bcpTypeLen
);
788 resultLen
= bcpTypeLen
;
791 if (tmpStatus
== U_MISSING_RESOURCE_ERROR
) {
792 if (_isLDMLType(type
, typeLen
)) {
793 uprv_memcpy(bcpTypeBuf
, type
, typeLen
);
796 /* mapping not availabe */
797 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
804 ures_close(typeMapForKey
);
805 ures_close(keyTypeData
);
807 if (U_FAILURE(*status
)) {
811 uprv_memcpy(bcpType
, bcpTypeBuf
, uprv_min(resultLen
, bcpTypeCapacity
));
812 return u_terminateChars(bcpType
, bcpTypeCapacity
, resultLen
, status
);
816 _bcp47ToLDMLType(const char* key
, int32_t keyLen
,
817 const char* bcpType
, int32_t bcpTypeLen
,
818 char* type
, int32_t typeCapacity
,
819 UErrorCode
*status
) {
821 char keyBuf
[MAX_LDML_KEY_LEN
];
822 char bcpTypeBuf
[MAX_BCP47_SUBTAG_LEN
];
823 int32_t resultLen
= 0;
825 const char *resType
= NULL
;
826 UResourceBundle
*mapData
;
827 UErrorCode tmpStatus
= U_ZERO_ERROR
;
831 keyLen
= (int32_t)uprv_strlen(key
);
834 if (keyLen
>= sizeof(keyBuf
)) {
835 /* no known valid LDML key exceeding 21 */
836 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
839 uprv_memcpy(keyBuf
, key
, keyLen
);
843 for (i
= 0; i
< keyLen
; i
++) {
844 keyBuf
[i
] = uprv_tolower(keyBuf
[i
]);
848 if (bcpTypeLen
< 0) {
849 bcpTypeLen
= (int32_t)uprv_strlen(bcpType
);
852 if (bcpTypeLen
>= sizeof(bcpTypeBuf
)) {
853 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
857 uprv_memcpy(bcpTypeBuf
, bcpType
, bcpTypeLen
);
858 bcpTypeBuf
[bcpTypeLen
] = 0;
861 for (i
= 0; i
< bcpTypeLen
; i
++) {
862 bcpTypeBuf
[i
] = uprv_tolower(bcpTypeBuf
[i
]);
865 rb
= ures_openDirect(NULL
, KEYTYPEDATA
, status
);
866 ures_getByKey(rb
, TYPEMAP
, rb
, status
);
867 if (U_FAILURE(*status
)) {
872 ures_getByKey(rb
, keyBuf
, rb
, &tmpStatus
);
873 mapData
= ures_getNextResource(rb
, NULL
, &tmpStatus
);
874 while (U_SUCCESS(tmpStatus
)) {
875 const UChar
*uBcpType
;
876 char tmpBcpTypeBuf
[MAX_BCP47_SUBTAG_LEN
];
877 int32_t tmpBcpTypeLen
;
879 uBcpType
= ures_getString(mapData
, &tmpBcpTypeLen
, &tmpStatus
);
880 if (U_FAILURE(tmpStatus
)) {
883 u_UCharsToChars(uBcpType
, tmpBcpTypeBuf
, tmpBcpTypeLen
);
884 tmpBcpTypeBuf
[tmpBcpTypeLen
] = 0;
885 if (uprv_compareInvCharsAsAscii(bcpTypeBuf
, tmpBcpTypeBuf
) == 0) {
886 /* found a matching BCP47 type */
887 resType
= ures_getKey(mapData
);
888 resultLen
= (int32_t)uprv_strlen(resType
);
891 if (!ures_hasNext(rb
)) {
894 ures_getNextResource(rb
, mapData
, &tmpStatus
);
899 if (U_FAILURE(tmpStatus
) && tmpStatus
!= U_MISSING_RESOURCE_ERROR
) {
904 if (resType
== NULL
) {
905 resType
= bcpTypeBuf
;
906 resultLen
= bcpTypeLen
;
909 copyLen
= uprv_min(resultLen
, typeCapacity
);
910 uprv_memcpy(type
, resType
, copyLen
);
912 if (uprv_compareInvCharsAsAscii(keyBuf
, "timezone") == 0) {
913 for (i
= 0; i
< copyLen
; i
++) {
914 if (*(type
+ i
) == ':') {
920 return u_terminateChars(type
, typeCapacity
, resultLen
, status
);
924 _appendLanguageToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UErrorCode
* status
) {
925 char buf
[ULOC_LANG_CAPACITY
];
926 UErrorCode tmpStatus
= U_ZERO_ERROR
;
930 if (U_FAILURE(*status
)) {
934 len
= uloc_getLanguage(localeID
, buf
, sizeof(buf
), &tmpStatus
);
935 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
937 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
943 /* Note: returned language code is in lower case letters */
946 if (reslen
< capacity
) {
947 uprv_memcpy(appendAt
+ reslen
, LANG_UND
, uprv_min(LANG_UND_LEN
, capacity
- reslen
));
949 reslen
+= LANG_UND_LEN
;
950 } else if (!_isLanguageSubtag(buf
, len
)) {
951 /* invalid language code */
953 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
956 if (reslen
< capacity
) {
957 uprv_memcpy(appendAt
+ reslen
, LANG_UND
, uprv_min(LANG_UND_LEN
, capacity
- reslen
));
959 reslen
+= LANG_UND_LEN
;
961 /* resolve deprecated */
962 for (i
= 0; DEPRECATEDLANGS
[i
] != NULL
; i
+= 2) {
963 if (uprv_compareInvCharsAsAscii(buf
, DEPRECATEDLANGS
[i
]) == 0) {
964 uprv_strcpy(buf
, DEPRECATEDLANGS
[i
+ 1]);
965 len
= (int32_t)uprv_strlen(buf
);
969 if (reslen
< capacity
) {
970 uprv_memcpy(appendAt
+ reslen
, buf
, uprv_min(len
, capacity
- reslen
));
974 u_terminateChars(appendAt
, capacity
, reslen
, status
);
979 _appendScriptToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UErrorCode
* status
) {
980 char buf
[ULOC_SCRIPT_CAPACITY
];
981 UErrorCode tmpStatus
= U_ZERO_ERROR
;
985 if (U_FAILURE(*status
)) {
989 len
= uloc_getScript(localeID
, buf
, sizeof(buf
), &tmpStatus
);
990 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
992 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
998 if (!_isScriptSubtag(buf
, len
)) {
999 /* invalid script code */
1001 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1005 if (reslen
< capacity
) {
1006 *(appendAt
+ reslen
) = SEP
;
1010 if (reslen
< capacity
) {
1011 uprv_memcpy(appendAt
+ reslen
, buf
, uprv_min(len
, capacity
- reslen
));
1016 u_terminateChars(appendAt
, capacity
, reslen
, status
);
1021 _appendRegionToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UErrorCode
* status
) {
1022 char buf
[ULOC_COUNTRY_CAPACITY
];
1023 UErrorCode tmpStatus
= U_ZERO_ERROR
;
1027 if (U_FAILURE(*status
)) {
1031 len
= uloc_getCountry(localeID
, buf
, sizeof(buf
), &tmpStatus
);
1032 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
1034 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1040 if (!_isRegionSubtag(buf
, len
)) {
1041 /* invalid region code */
1043 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1047 if (reslen
< capacity
) {
1048 *(appendAt
+ reslen
) = SEP
;
1052 if (reslen
< capacity
) {
1053 uprv_memcpy(appendAt
+ reslen
, buf
, uprv_min(len
, capacity
- reslen
));
1058 u_terminateChars(appendAt
, capacity
, reslen
, status
);
1063 _appendVariantsToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UBool
*hadPosix
, UErrorCode
* status
) {
1064 char buf
[ULOC_FULLNAME_CAPACITY
];
1065 UErrorCode tmpStatus
= U_ZERO_ERROR
;
1069 if (U_FAILURE(*status
)) {
1073 len
= uloc_getVariant(localeID
, buf
, sizeof(buf
), &tmpStatus
);
1074 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
1076 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1084 VariantListEntry
*var
;
1085 VariantListEntry
*varFirst
= NULL
;
1090 if (*p
== SEP
|| *p
== LOCALE_SEP
|| *p
== 0) {
1094 *p
= 0; /* terminate */
1098 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1101 /* ignore empty variant */
1103 /* ICU uses upper case letters for variants, but
1104 the canonical format is lowercase in BCP47 */
1105 for (i
= 0; *(pVar
+ i
) != 0; i
++) {
1106 *(pVar
+ i
) = uprv_tolower(*(pVar
+ i
));
1110 if (_isVariantSubtag(pVar
, -1)) {
1111 if (uprv_strcmp(pVar
,POSIX_VALUE
)) {
1112 /* emit the variant to the list */
1113 var
= uprv_malloc(sizeof(VariantListEntry
));
1115 *status
= U_MEMORY_ALLOCATION_ERROR
;
1118 var
->variant
= pVar
;
1119 if (!_addVariantToList(&varFirst
, var
)) {
1120 /* duplicated variant */
1123 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1128 /* Special handling for POSIX variant, need to remember that we had it and then */
1129 /* treat it like an extension later. */
1132 } else if (strict
) {
1133 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1137 /* reset variant starting position */
1139 } else if (pVar
== NULL
) {
1145 if (U_SUCCESS(*status
)) {
1146 if (varFirst
!= NULL
) {
1149 /* write out sorted/validated/normalized variants to the target */
1151 while (var
!= NULL
) {
1152 if (reslen
< capacity
) {
1153 *(appendAt
+ reslen
) = SEP
;
1156 varLen
= (int32_t)uprv_strlen(var
->variant
);
1157 if (reslen
< capacity
) {
1158 uprv_memcpy(appendAt
+ reslen
, var
->variant
, uprv_min(varLen
, capacity
- reslen
));
1168 while (var
!= NULL
) {
1169 VariantListEntry
*tmpVar
= var
->next
;
1174 if (U_FAILURE(*status
)) {
1179 u_terminateChars(appendAt
, capacity
, reslen
, status
);
1184 _appendKeywordsToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UBool hadPosix
, UErrorCode
* status
) {
1185 char buf
[ULOC_KEYWORD_AND_VALUES_CAPACITY
];
1186 UEnumeration
*keywordEnum
= NULL
;
1189 keywordEnum
= uloc_openKeywords(localeID
, status
);
1190 if (U_FAILURE(*status
) && !hadPosix
) {
1191 uenum_close(keywordEnum
);
1194 if (keywordEnum
!= NULL
|| hadPosix
) {
1195 /* reorder extensions */
1198 ExtensionListEntry
*firstExt
= NULL
;
1199 ExtensionListEntry
*ext
;
1200 char extBuf
[ULOC_KEYWORD_AND_VALUES_CAPACITY
];
1201 char *pExtBuf
= extBuf
;
1202 int32_t extBufCapacity
= sizeof(extBuf
);
1203 const char *bcpKey
, *bcpValue
;
1204 UErrorCode tmpStatus
= U_ZERO_ERROR
;
1206 UBool isLDMLKeyword
;
1209 key
= uenum_next(keywordEnum
, NULL
, status
);
1213 len
= uloc_getKeywordValue(localeID
, key
, buf
, sizeof(buf
), &tmpStatus
);
1214 if (U_FAILURE(tmpStatus
)) {
1216 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1219 /* ignore this keyword */
1220 tmpStatus
= U_ZERO_ERROR
;
1224 keylen
= (int32_t)uprv_strlen(key
);
1225 isLDMLKeyword
= (keylen
> 1);
1227 if (isLDMLKeyword
) {
1230 /* transform key and value to bcp47 style */
1231 modKeyLen
= _ldmlKeyToBCP47(key
, keylen
, pExtBuf
, extBufCapacity
, &tmpStatus
);
1232 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
1234 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1237 tmpStatus
= U_ZERO_ERROR
;
1242 pExtBuf
+= (modKeyLen
+ 1);
1243 extBufCapacity
-= (modKeyLen
+ 1);
1245 len
= _ldmlTypeToBCP47(key
, keylen
, buf
, len
, pExtBuf
, extBufCapacity
, &tmpStatus
);
1246 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
1248 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1251 tmpStatus
= U_ZERO_ERROR
;
1255 pExtBuf
+= (len
+ 1);
1256 extBufCapacity
-= (len
+ 1);
1258 if (*key
== PRIVATEUSE
) {
1259 if (!_isPrivateuseValueSubtags(buf
, len
)) {
1261 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1267 if (!_isExtensionSingleton(key
, keylen
) || !_isExtensionSubtags(buf
, len
)) {
1269 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1276 if ((len
+ 1) < extBufCapacity
) {
1277 uprv_memcpy(pExtBuf
, buf
, len
);
1285 extBufCapacity
-= (len
+ 1);
1287 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1292 /* create ExtensionListEntry */
1293 ext
= uprv_malloc(sizeof(ExtensionListEntry
));
1295 *status
= U_MEMORY_ALLOCATION_ERROR
;
1299 ext
->value
= bcpValue
;
1301 if (!_addExtensionToList(&firstExt
, ext
, TRUE
)) {
1304 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1310 /* Special handling for POSIX variant - add the keywords for POSIX */
1312 /* create ExtensionListEntry for POSIX */
1313 ext
= uprv_malloc(sizeof(ExtensionListEntry
));
1315 *status
= U_MEMORY_ALLOCATION_ERROR
;
1317 ext
->key
= POSIX_KEY
;
1318 ext
->value
= POSIX_VALUE
;
1320 if (!_addExtensionToList(&firstExt
, ext
, TRUE
)) {
1325 if (U_SUCCESS(*status
) && (firstExt
!= NULL
)) {
1326 UBool startLDMLExtension
= FALSE
;
1328 /* write out the sorted BCP47 extensions and private use */
1330 while (ext
!= NULL
) {
1331 if ((int32_t)uprv_strlen(ext
->key
) > 1 && !startLDMLExtension
) {
1332 /* write LDML singleton extension */
1333 if (reslen
< capacity
) {
1334 *(appendAt
+ reslen
) = SEP
;
1337 if (reslen
< capacity
) {
1338 *(appendAt
+ reslen
) = LDMLEXT
;
1341 startLDMLExtension
= TRUE
;
1344 if (reslen
< capacity
) {
1345 *(appendAt
+ reslen
) = SEP
;
1348 len
= (int32_t)uprv_strlen(ext
->key
);
1349 if (reslen
< capacity
) {
1350 uprv_memcpy(appendAt
+ reslen
, ext
->key
, uprv_min(len
, capacity
- reslen
));
1353 if (reslen
< capacity
) {
1354 *(appendAt
+ reslen
) = SEP
;
1357 len
= (int32_t)uprv_strlen(ext
->value
);
1358 if (reslen
< capacity
) {
1359 uprv_memcpy(appendAt
+ reslen
, ext
->value
, uprv_min(len
, capacity
- reslen
));
1368 while (ext
!= NULL
) {
1369 ExtensionListEntry
*tmpExt
= ext
->next
;
1374 uenum_close(keywordEnum
);
1376 if (U_FAILURE(*status
)) {
1381 return u_terminateChars(appendAt
, capacity
, reslen
, status
);
1385 * Append keywords parsed from LDML extension value
1386 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
1387 * Note: char* buf is used for storing keywords
1390 _appendLDMLExtensionAsKeywords(const char* ldmlext
, ExtensionListEntry
** appendTo
, char* buf
, int32_t bufSize
, UBool
*posixVariant
, UErrorCode
*status
) {
1391 const char *p
, *pNext
, *pSep
;
1392 const char *pBcpKey
, *pBcpType
;
1393 const char *pKey
, *pType
;
1394 int32_t bcpKeyLen
= 0, bcpTypeLen
;
1395 ExtensionListEntry
*kwd
, *nextKwd
;
1396 ExtensionListEntry
*kwdFirst
= NULL
;
1401 pBcpKey
= pBcpType
= NULL
;
1405 /* locate next separator char */
1419 if (pBcpKey
== NULL
) {
1421 bcpKeyLen
= (int32_t)(pSep
- p
);
1424 bcpTypeLen
= (int32_t)(pSep
- p
);
1426 /* BCP key to locale key */
1427 len
= _bcp47ToLDMLKey(pBcpKey
, bcpKeyLen
, buf
+ bufIdx
, bufSize
- bufIdx
- 1, status
);
1428 if (U_FAILURE(*status
)) {
1431 pKey
= buf
+ bufIdx
;
1433 *(buf
+ bufIdx
) = 0;
1436 /* BCP type to locale type */
1437 len
= _bcp47ToLDMLType(pKey
, -1, pBcpType
, bcpTypeLen
, buf
+ bufIdx
, bufSize
- bufIdx
- 1, status
);
1438 if (U_FAILURE(*status
)) {
1441 pType
= buf
+ bufIdx
;
1443 *(buf
+ bufIdx
) = 0;
1446 /* Special handling for u-va-posix, since we want to treat this as a variant, not */
1449 if ( !uprv_strcmp(pKey
,POSIX_KEY
) && !uprv_strcmp(pType
,POSIX_VALUE
) ) {
1450 *posixVariant
= TRUE
;
1452 /* create an ExtensionListEntry for this keyword */
1453 kwd
= uprv_malloc(sizeof(ExtensionListEntry
));
1455 *status
= U_MEMORY_ALLOCATION_ERROR
;
1462 if (!_addExtensionToList(&kwdFirst
, kwd
, FALSE
)) {
1463 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1475 if (pBcpKey
!= NULL
) {
1476 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1481 while (kwd
!= NULL
) {
1482 nextKwd
= kwd
->next
;
1483 _addExtensionToList(appendTo
, kwd
, FALSE
);
1491 while (kwd
!= NULL
) {
1492 nextKwd
= kwd
->next
;
1500 _appendKeywords(ULanguageTag
* langtag
, char* appendAt
, int32_t capacity
, UErrorCode
* status
) {
1504 ExtensionListEntry
*kwdFirst
= NULL
;
1505 ExtensionListEntry
*kwd
;
1506 const char *key
, *type
;
1507 char kwdBuf
[ULOC_KEYWORDS_CAPACITY
];
1508 UBool posixVariant
= FALSE
;
1510 if (U_FAILURE(*status
)) {
1514 n
= ultag_getExtensionsSize(langtag
);
1516 /* resolve locale keywords and reordering keys */
1517 for (i
= 0; i
< n
; i
++) {
1518 key
= ultag_getExtensionKey(langtag
, i
);
1519 type
= ultag_getExtensionValue(langtag
, i
);
1520 if (*key
== LDMLEXT
) {
1521 _appendLDMLExtensionAsKeywords(type
, &kwdFirst
, kwdBuf
, sizeof(kwdBuf
), &posixVariant
, status
);
1522 if (U_FAILURE(*status
)) {
1526 kwd
= uprv_malloc(sizeof(ExtensionListEntry
));
1528 *status
= U_MEMORY_ALLOCATION_ERROR
;
1533 if (!_addExtensionToList(&kwdFirst
, kwd
, FALSE
)) {
1535 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1541 if (U_SUCCESS(*status
)) {
1542 type
= ultag_getPrivateUse(langtag
);
1543 if ((int32_t)uprv_strlen(type
) > 0) {
1544 /* add private use as a keyword */
1545 kwd
= uprv_malloc(sizeof(ExtensionListEntry
));
1547 *status
= U_MEMORY_ALLOCATION_ERROR
;
1549 kwd
->key
= PRIVATEUSE_KEY
;
1551 if (!_addExtensionToList(&kwdFirst
, kwd
, FALSE
)) {
1553 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1559 /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
1561 if (U_SUCCESS(*status
) && posixVariant
) {
1562 len
= (int32_t) uprv_strlen(_POSIX
);
1563 if (reslen
< capacity
) {
1564 uprv_memcpy(appendAt
+ reslen
, _POSIX
, uprv_min(len
, capacity
- reslen
));
1569 if (U_SUCCESS(*status
) && kwdFirst
!= NULL
) {
1570 /* write out the sorted keywords */
1572 while (kwd
!= NULL
) {
1573 if (reslen
< capacity
) {
1574 if (kwd
== kwdFirst
) {
1576 *(appendAt
+ reslen
) = LOCALE_EXT_SEP
;
1579 *(appendAt
+ reslen
) = LOCALE_KEYWORD_SEP
;
1585 len
= (int32_t)uprv_strlen(kwd
->key
);
1586 if (reslen
< capacity
) {
1587 uprv_memcpy(appendAt
+ reslen
, kwd
->key
, uprv_min(len
, capacity
- reslen
));
1592 if (reslen
< capacity
) {
1593 *(appendAt
+ reslen
) = LOCALE_KEY_TYPE_SEP
;
1598 len
= (int32_t)uprv_strlen(kwd
->value
);
1599 if (reslen
< capacity
) {
1600 uprv_memcpy(appendAt
+ reslen
, kwd
->value
, uprv_min(len
, capacity
- reslen
));
1610 while (kwd
!= NULL
) {
1611 ExtensionListEntry
*tmpKwd
= kwd
->next
;
1616 if (U_FAILURE(*status
)) {
1620 return u_terminateChars(appendAt
, capacity
, reslen
, status
);
1624 * -------------------------------------------------
1628 * -------------------------------------------------
1631 /* Bit flags used by the parser */
1641 static ULanguageTag
*
1642 ultag_parse(const char* tag
, int32_t tagLen
, int32_t* parsedLen
, UErrorCode
* status
) {
1646 char *pSubtag
, *pNext
, *pLastGoodPosition
;
1649 ExtensionListEntry
*pExtension
;
1650 char *pExtValueSubtag
, *pExtValueSubtagEnd
;
1652 UBool isLDMLExtension
, reqLDMLType
;
1654 if (parsedLen
!= NULL
) {
1658 if (U_FAILURE(*status
)) {
1663 tagLen
= (int32_t)uprv_strlen(tag
);
1666 /* copy the entire string */
1667 tagBuf
= (char*)uprv_malloc(tagLen
+ 1);
1668 if (tagBuf
== NULL
) {
1669 *status
= U_MEMORY_ALLOCATION_ERROR
;
1672 uprv_memcpy(tagBuf
, tag
, tagLen
);
1673 *(tagBuf
+ tagLen
) = 0;
1675 /* create a ULanguageTag */
1676 t
= (ULanguageTag
*)uprv_malloc(sizeof(ULanguageTag
));
1677 _initializeULanguageTag(t
);
1681 *status
= U_MEMORY_ALLOCATION_ERROR
;
1685 if (tagLen
< MINLEN
) {
1686 /* the input tag is too short - return empty ULanguageTag */
1690 /* check if the tag is grandfathered */
1691 for (i
= 0; GRANDFATHERED
[i
] != NULL
; i
+= 2) {
1692 if (T_CString_stricmp(GRANDFATHERED
[i
], tagBuf
) == 0) {
1693 /* a grandfathered tag is always longer than its preferred mapping */
1694 uprv_strcpy(t
->buf
, GRANDFATHERED
[i
+ 1]);
1695 t
->language
= t
->buf
;
1696 if (parsedLen
!= NULL
) {
1697 *parsedLen
= tagLen
;
1704 * langtag = language
1713 pNext
= pLastGoodPosition
= tagBuf
;
1716 pExtValueSubtag
= NULL
;
1717 pExtValueSubtagEnd
= NULL
;
1718 isLDMLExtension
= FALSE
;
1719 reqLDMLType
= FALSE
;
1726 /* locate next separator char */
1740 subtagLen
= (int32_t)(pSep
- pSubtag
);
1743 if (_isLanguageSubtag(pSubtag
, subtagLen
)) {
1744 *pSep
= 0; /* terminate */
1745 t
->language
= T_CString_toLowerCase(pSubtag
);
1747 pLastGoodPosition
= pSep
;
1748 next
= EXTL
| SCRT
| REGN
| VART
| EXTS
| PRIV
;
1753 if (_isExtlangSubtag(pSubtag
, subtagLen
)) {
1755 t
->extlang
[extlangIdx
++] = T_CString_toLowerCase(pSubtag
);
1757 pLastGoodPosition
= pSep
;
1758 if (extlangIdx
< 3) {
1759 next
= EXTL
| SCRT
| REGN
| VART
| EXTS
| PRIV
;
1761 next
= SCRT
| REGN
| VART
| EXTS
| PRIV
;
1767 if (_isScriptSubtag(pSubtag
, subtagLen
)) {
1773 *p
= uprv_toupper(*p
);
1776 *p
= uprv_tolower(*p
);
1779 t
->script
= pSubtag
;
1781 pLastGoodPosition
= pSep
;
1782 next
= REGN
| VART
| EXTS
| PRIV
;
1787 if (_isRegionSubtag(pSubtag
, subtagLen
)) {
1789 t
->region
= T_CString_toUpperCase(pSubtag
);
1791 pLastGoodPosition
= pSep
;
1792 next
= VART
| EXTS
| PRIV
;
1797 if (_isVariantSubtag(pSubtag
, subtagLen
)) {
1798 VariantListEntry
*var
;
1801 var
= (VariantListEntry
*)uprv_malloc(sizeof(VariantListEntry
));
1803 *status
= U_MEMORY_ALLOCATION_ERROR
;
1807 var
->variant
= T_CString_toUpperCase(pSubtag
);
1808 isAdded
= _addVariantToList(&(t
->variants
), var
);
1810 /* duplicated variant entry */
1814 pLastGoodPosition
= pSep
;
1815 next
= VART
| EXTS
| PRIV
;
1820 if (_isExtensionSingleton(pSubtag
, subtagLen
)) {
1821 if (pExtension
!= NULL
) {
1822 if (pExtValueSubtag
== NULL
|| pExtValueSubtagEnd
== NULL
) {
1823 /* the previous extension is incomplete */
1824 uprv_free(pExtension
);
1829 /* terminate the previous extension value */
1830 *pExtValueSubtagEnd
= 0;
1831 pExtension
->value
= T_CString_toLowerCase(pExtValueSubtag
);
1833 /* insert the extension to the list */
1834 if (_addExtensionToList(&(t
->extensions
), pExtension
, FALSE
)) {
1835 pLastGoodPosition
= pExtValueSubtagEnd
;
1837 /* stop parsing here */
1838 uprv_free(pExtension
);
1843 if (isLDMLExtension
&& reqLDMLType
) {
1844 /* incomplete LDML extension key and type pair */
1850 isLDMLExtension
= (uprv_tolower(*pSubtag
) == LDMLEXT
);
1852 /* create a new extension */
1853 pExtension
= uprv_malloc(sizeof(ExtensionListEntry
));
1854 if (pExtension
== NULL
) {
1855 *status
= U_MEMORY_ALLOCATION_ERROR
;
1859 pExtension
->key
= T_CString_toLowerCase(pSubtag
);
1860 pExtension
->value
= NULL
; /* will be set later */
1863 * reset the start and the end location of extension value
1864 * subtags for this extension
1866 pExtValueSubtag
= NULL
;
1867 pExtValueSubtagEnd
= NULL
;
1874 if (_isExtensionSubtag(pSubtag
, subtagLen
)) {
1875 if (isLDMLExtension
) {
1877 /* already saw an LDML key */
1878 if (!_isLDMLType(pSubtag
, subtagLen
)) {
1879 /* stop parsing here and let the valid LDML extension key/type
1880 pairs processed by the code out of this while loop */
1883 pExtValueSubtagEnd
= pSep
;
1884 reqLDMLType
= FALSE
;
1885 next
= EXTS
| EXTV
| PRIV
;
1888 if (!_isLDMLKey(pSubtag
, subtagLen
)) {
1889 /* stop parsing here and let the valid LDML extension key/type
1890 pairs processed by the code out of this while loop */
1897 /* Mark the end of this subtag */
1898 pExtValueSubtagEnd
= pSep
;
1899 next
= EXTS
| EXTV
| PRIV
;
1902 if (pExtValueSubtag
== NULL
) {
1903 /* if the start postion of this extension's value is not yet,
1904 this one is the first value subtag */
1905 pExtValueSubtag
= pSubtag
;
1911 if (uprv_tolower(*pSubtag
) == PRIVATEUSE
) {
1914 if (pExtension
!= NULL
) {
1915 /* Process the last extension */
1916 if (pExtValueSubtag
== NULL
|| pExtValueSubtagEnd
== NULL
) {
1917 /* the previous extension is incomplete */
1918 uprv_free(pExtension
);
1922 /* terminate the previous extension value */
1923 *pExtValueSubtagEnd
= 0;
1924 pExtension
->value
= T_CString_toLowerCase(pExtValueSubtag
);
1926 /* insert the extension to the list */
1927 if (_addExtensionToList(&(t
->extensions
), pExtension
, FALSE
)) {
1928 pLastGoodPosition
= pExtValueSubtagEnd
;
1931 /* stop parsing here */
1932 uprv_free(pExtension
);
1939 /* The rest of part will be private use value subtags */
1940 if (pNext
== NULL
) {
1941 /* empty private use subtag */
1944 /* back up the private use value start position */
1945 pPrivuseVal
= pNext
;
1947 /* validate private use value subtags */
1963 subtagLen
= (int32_t)(pSep
- pSubtag
);
1965 if (_isPrivateuseValueSubtag(pSubtag
, subtagLen
)) {
1966 pLastGoodPosition
= pSep
;
1971 if (pLastGoodPosition
- pPrivuseVal
> 0) {
1972 *pLastGoodPosition
= 0;
1973 t
->privateuse
= T_CString_toLowerCase(pPrivuseVal
);
1975 /* No more subtags, exiting the parse loop */
1980 /* If we fell through here, it means this subtag is illegal - quit parsing */
1984 if (pExtension
!= NULL
) {
1985 /* Process the last extension */
1986 if (pExtValueSubtag
== NULL
|| pExtValueSubtagEnd
== NULL
) {
1987 /* the previous extension is incomplete */
1988 uprv_free(pExtension
);
1990 /* terminate the previous extension value */
1991 *pExtValueSubtagEnd
= 0;
1992 pExtension
->value
= T_CString_toLowerCase(pExtValueSubtag
);
1993 /* insert the extension to the list */
1994 if (_addExtensionToList(&(t
->extensions
), pExtension
, FALSE
)) {
1995 pLastGoodPosition
= pExtValueSubtagEnd
;
1997 uprv_free(pExtension
);
2002 if (parsedLen
!= NULL
) {
2003 *parsedLen
= (int32_t)(pLastGoodPosition
- t
->buf
);
2014 ultag_close(ULanguageTag
* langtag
) {
2016 if (langtag
== NULL
) {
2020 uprv_free(langtag
->buf
);
2022 if (langtag
->variants
) {
2023 VariantListEntry
*curVar
= langtag
->variants
;
2025 VariantListEntry
*nextVar
= curVar
->next
;
2031 if (langtag
->extensions
) {
2032 ExtensionListEntry
*curExt
= langtag
->extensions
;
2034 ExtensionListEntry
*nextExt
= curExt
->next
;
2044 ultag_getLanguage(const ULanguageTag
* langtag
) {
2045 return langtag
->language
;
2050 ultag_getJDKLanguage(const ULanguageTag
* langtag
) {
2052 for (i
= 0; DEPRECATEDLANGS
[i
] != NULL
; i
+= 2) {
2053 if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS
[i
], langtag
->language
) == 0) {
2054 return DEPRECATEDLANGS
[i
+ 1];
2057 return langtag
->language
;
2062 ultag_getExtlang(const ULanguageTag
* langtag
, int32_t idx
) {
2063 if (idx
>= 0 && idx
< MAXEXTLANG
) {
2064 return langtag
->extlang
[idx
];
2070 ultag_getExtlangSize(const ULanguageTag
* langtag
) {
2073 for (i
= 0; i
< MAXEXTLANG
; i
++) {
2074 if (langtag
->extlang
[i
]) {
2082 ultag_getScript(const ULanguageTag
* langtag
) {
2083 return langtag
->script
;
2087 ultag_getRegion(const ULanguageTag
* langtag
) {
2088 return langtag
->region
;
2092 ultag_getVariant(const ULanguageTag
* langtag
, int32_t idx
) {
2093 const char *var
= NULL
;
2094 VariantListEntry
*cur
= langtag
->variants
;
2108 ultag_getVariantsSize(const ULanguageTag
* langtag
) {
2110 VariantListEntry
*cur
= langtag
->variants
;
2122 ultag_getExtensionKey(const ULanguageTag
* langtag
, int32_t idx
) {
2123 const char *key
= NULL
;
2124 ExtensionListEntry
*cur
= langtag
->extensions
;
2138 ultag_getExtensionValue(const ULanguageTag
* langtag
, int32_t idx
) {
2139 const char *val
= NULL
;
2140 ExtensionListEntry
*cur
= langtag
->extensions
;
2154 ultag_getExtensionsSize(const ULanguageTag
* langtag
) {
2156 ExtensionListEntry
*cur
= langtag
->extensions
;
2168 ultag_getPrivateUse(const ULanguageTag
* langtag
) {
2169 return langtag
->privateuse
;
2174 ultag_getGrandfathered(const ULanguageTag
* langtag
) {
2175 return langtag
->grandfathered
;
2181 * -------------------------------------------------
2183 * Locale/BCP47 conversion APIs, exposed as uloc_*
2185 * -------------------------------------------------
2187 U_DRAFT
int32_t U_EXPORT2
2188 uloc_toLanguageTag(const char* localeID
,
2190 int32_t langtagCapacity
,
2192 UErrorCode
* status
) {
2193 /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
2194 char canonical
[256];
2196 UErrorCode tmpStatus
= U_ZERO_ERROR
;
2197 UBool hadPosix
= FALSE
;
2198 const char* pKeywordStart
;
2200 /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
2202 if (uprv_strlen(localeID
) > 0) {
2203 uloc_canonicalize(localeID
, canonical
, sizeof(canonical
), &tmpStatus
);
2204 if (tmpStatus
!= U_ZERO_ERROR
) {
2205 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
2210 /* For handling special case - private use only tag */
2211 pKeywordStart
= locale_getKeywordsStart(canonical
);
2212 if (pKeywordStart
== canonical
) {
2213 UEnumeration
*kwdEnum
;
2217 kwdEnum
= uloc_openKeywords((const char*)canonical
, &tmpStatus
);
2218 if (kwdEnum
!= NULL
) {
2219 kwdCnt
= uenum_count(kwdEnum
, &tmpStatus
);
2224 key
= uenum_next(kwdEnum
, &len
, &tmpStatus
);
2225 if (len
== 1 && *key
== PRIVATEUSE
) {
2226 char buf
[ULOC_KEYWORD_AND_VALUES_CAPACITY
];
2227 buf
[0] = PRIVATEUSE
;
2229 len
= uloc_getKeywordValue(localeID
, key
, &buf
[2], sizeof(buf
) - 2, &tmpStatus
);
2230 if (U_SUCCESS(tmpStatus
)) {
2231 if (_isPrivateuseValueSubtags(&buf
[2], len
)) {
2232 /* return private use only tag */
2234 uprv_memcpy(langtag
, buf
, uprv_min(reslen
, langtagCapacity
));
2235 u_terminateChars(langtag
, langtagCapacity
, reslen
, status
);
2237 } else if (strict
) {
2238 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
2241 /* if not strict mode, then "und" will be returned */
2243 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
2248 uenum_close(kwdEnum
);
2255 reslen
+= _appendLanguageToLanguageTag(canonical
, langtag
, langtagCapacity
, strict
, status
);
2256 reslen
+= _appendScriptToLanguageTag(canonical
, langtag
+ reslen
, langtagCapacity
- reslen
, strict
, status
);
2257 reslen
+= _appendRegionToLanguageTag(canonical
, langtag
+ reslen
, langtagCapacity
- reslen
, strict
, status
);
2258 reslen
+= _appendVariantsToLanguageTag(canonical
, langtag
+ reslen
, langtagCapacity
- reslen
, strict
, &hadPosix
, status
);
2259 reslen
+= _appendKeywordsToLanguageTag(canonical
, langtag
+ reslen
, langtagCapacity
- reslen
, strict
, hadPosix
, status
);
2265 U_DRAFT
int32_t U_EXPORT2
2266 uloc_forLanguageTag(const char* langtag
,
2268 int32_t localeIDCapacity
,
2269 int32_t* parsedLength
,
2270 UErrorCode
* status
) {
2273 const char *subtag
, *p
;
2276 UBool noRegion
= TRUE
;
2278 lt
= ultag_parse(langtag
, -1, parsedLength
, status
);
2279 if (U_FAILURE(*status
)) {
2284 subtag
= ultag_getExtlangSize(lt
) > 0 ? ultag_getExtlang(lt
, 0) : ultag_getLanguage(lt
);
2285 if (uprv_compareInvCharsAsAscii(subtag
, LANG_UND
) != 0) {
2286 len
= (int32_t)uprv_strlen(subtag
);
2288 if (reslen
< localeIDCapacity
) {
2289 uprv_memcpy(localeID
, subtag
, uprv_min(len
, localeIDCapacity
- reslen
));
2296 subtag
= ultag_getScript(lt
);
2297 len
= (int32_t)uprv_strlen(subtag
);
2299 if (reslen
< localeIDCapacity
) {
2300 *(localeID
+ reslen
) = LOCALE_SEP
;
2304 /* write out the script in title case */
2307 if (reslen
< localeIDCapacity
) {
2309 *(localeID
+ reslen
) = uprv_toupper(*p
);
2311 *(localeID
+ reslen
) = *p
;
2320 subtag
= ultag_getRegion(lt
);
2321 len
= (int32_t)uprv_strlen(subtag
);
2323 if (reslen
< localeIDCapacity
) {
2324 *(localeID
+ reslen
) = LOCALE_SEP
;
2327 /* write out the retion in upper case */
2330 if (reslen
< localeIDCapacity
) {
2331 *(localeID
+ reslen
) = uprv_toupper(*p
);
2340 n
= ultag_getVariantsSize(lt
);
2343 if (reslen
< localeIDCapacity
) {
2344 *(localeID
+ reslen
) = LOCALE_SEP
;
2349 for (i
= 0; i
< n
; i
++) {
2350 subtag
= ultag_getVariant(lt
, i
);
2351 if (reslen
< localeIDCapacity
) {
2352 *(localeID
+ reslen
) = LOCALE_SEP
;
2355 /* write out the variant in upper case */
2358 if (reslen
< localeIDCapacity
) {
2359 *(localeID
+ reslen
) = uprv_toupper(*p
);
2368 n
= ultag_getExtensionsSize(lt
);
2369 subtag
= ultag_getPrivateUse(lt
);
2370 if (n
> 0 || uprv_strlen(subtag
) > 0) {
2371 if (reslen
== 0 && n
> 0) {
2372 /* need a language */
2373 if (reslen
< localeIDCapacity
) {
2374 uprv_memcpy(localeID
+ reslen
, LANG_UND
, uprv_min(LANG_UND_LEN
, localeIDCapacity
- reslen
));
2376 reslen
+= LANG_UND_LEN
;
2378 len
= _appendKeywords(lt
, localeID
+ reslen
, localeIDCapacity
- reslen
, status
);
2383 return u_terminateChars(localeID
, localeIDCapacity
, reslen
, status
);