2 **********************************************************************
3 * Copyright (C) 2009-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
8 #include "unicode/utypes.h"
9 #include "unicode/ures.h"
10 #include "unicode/putil.h"
11 #include "unicode/uloc.h"
20 /* struct holding a single variant */
21 typedef struct VariantListEntry
{
23 struct VariantListEntry
*next
;
26 /* struct holding a single attribute value */
27 typedef struct AttributeListEntry
{
28 const char *attribute
;
29 struct AttributeListEntry
*next
;
32 /* struct holding a single extension */
33 typedef struct ExtensionListEntry
{
36 struct ExtensionListEntry
*next
;
40 typedef struct ULanguageTag
{
41 char *buf
; /* holding parsed subtags */
43 const char *extlang
[MAXEXTLANG
];
46 VariantListEntry
*variants
;
47 ExtensionListEntry
*extensions
;
48 const char *privateuse
;
49 const char *grandfathered
;
54 #define PRIVATEUSE 'x'
57 #define LOCALE_SEP '_'
58 #define LOCALE_EXT_SEP '@'
59 #define LOCALE_KEYWORD_SEP ';'
60 #define LOCALE_KEY_TYPE_SEP '='
62 #define ISALPHA(c) uprv_isASCIILetter(c)
63 #define ISNUMERIC(c) ((c)>='0' && (c)<='9')
65 static const char EMPTY
[] = "";
66 static const char LANG_UND
[] = "und";
67 static const char PRIVATEUSE_KEY
[] = "x";
68 static const char _POSIX
[] = "_POSIX";
69 static const char POSIX_KEY
[] = "va";
70 static const char POSIX_VALUE
[] = "posix";
71 static const char LOCALE_ATTRIBUTE_KEY
[] = "attribute";
72 static const char PRIVUSE_VARIANT_PREFIX
[] = "lvariant";
73 static const char LOCALE_TYPE_YES
[] = "yes";
75 #define LANG_UND_LEN 3
77 static const char* const GRANDFATHERED
[] = {
78 /* grandfathered preferred */
80 "cel-gaulish", "xtg-x-cel-gaulish",
81 "en-GB-oed", "en-GB-x-oed",
84 "i-default", "en-x-i-default",
85 "i-enochian", "und-x-i-enochian",
89 "i-mingo", "see-x-i-mingo",
102 "zh-min", "nan-x-zh-min",
108 static const char DEPRECATEDLANGS
[][4] = {
116 * -------------------------------------------------
118 * These ultag_ functions may be exposed as APIs later
120 * -------------------------------------------------
124 ultag_parse(const char* tag
, int32_t tagLen
, int32_t* parsedLen
, UErrorCode
* status
);
127 ultag_close(ULanguageTag
* langtag
);
130 ultag_getLanguage(const ULanguageTag
* langtag
);
134 ultag_getJDKLanguage(const ULanguageTag
* langtag
);
138 ultag_getExtlang(const ULanguageTag
* langtag
, int32_t idx
);
141 ultag_getExtlangSize(const ULanguageTag
* langtag
);
144 ultag_getScript(const ULanguageTag
* langtag
);
147 ultag_getRegion(const ULanguageTag
* langtag
);
150 ultag_getVariant(const ULanguageTag
* langtag
, int32_t idx
);
153 ultag_getVariantsSize(const ULanguageTag
* langtag
);
156 ultag_getExtensionKey(const ULanguageTag
* langtag
, int32_t idx
);
159 ultag_getExtensionValue(const ULanguageTag
* langtag
, int32_t idx
);
162 ultag_getExtensionsSize(const ULanguageTag
* langtag
);
165 ultag_getPrivateUse(const ULanguageTag
* langtag
);
169 ultag_getGrandfathered(const ULanguageTag
* langtag
);
173 * -------------------------------------------------
175 * Language subtag syntax validation functions
177 * -------------------------------------------------
181 _isAlphaString(const char* s
, int32_t len
) {
183 for (i
= 0; i
< len
; i
++) {
184 if (!ISALPHA(*(s
+ i
))) {
192 _isNumericString(const char* s
, int32_t len
) {
194 for (i
= 0; i
< len
; i
++) {
195 if (!ISNUMERIC(*(s
+ i
))) {
203 _isAlphaNumericString(const char* s
, int32_t len
) {
205 for (i
= 0; i
< len
; i
++) {
206 if (!ISALPHA(*(s
+ i
)) && !ISNUMERIC(*(s
+ i
))) {
214 _isLanguageSubtag(const char* s
, int32_t len
) {
216 * language = 2*3ALPHA ; shortest ISO 639 code
217 * ["-" extlang] ; sometimes followed by
218 * ; extended language subtags
219 * / 4ALPHA ; or reserved for future use
220 * / 5*8ALPHA ; or registered language subtag
223 len
= (int32_t)uprv_strlen(s
);
225 if (len
>= 2 && len
<= 8 && _isAlphaString(s
, len
)) {
232 _isExtlangSubtag(const char* s
, int32_t len
) {
234 * extlang = 3ALPHA ; selected ISO 639 codes
235 * *2("-" 3ALPHA) ; permanently reserved
238 len
= (int32_t)uprv_strlen(s
);
240 if (len
== 3 && _isAlphaString(s
, len
)) {
247 _isScriptSubtag(const char* s
, int32_t len
) {
249 * script = 4ALPHA ; ISO 15924 code
252 len
= (int32_t)uprv_strlen(s
);
254 if (len
== 4 && _isAlphaString(s
, len
)) {
261 _isRegionSubtag(const char* s
, int32_t len
) {
263 * region = 2ALPHA ; ISO 3166-1 code
264 * / 3DIGIT ; UN M.49 code
267 len
= (int32_t)uprv_strlen(s
);
269 if (len
== 2 && _isAlphaString(s
, len
)) {
272 if (len
== 3 && _isNumericString(s
, len
)) {
279 _isVariantSubtag(const char* s
, int32_t len
) {
281 * variant = 5*8alphanum ; registered variants
282 * / (DIGIT 3alphanum)
285 len
= (int32_t)uprv_strlen(s
);
287 if (len
>= 5 && len
<= 8 && _isAlphaNumericString(s
, len
)) {
290 if (len
== 4 && ISNUMERIC(*s
) && _isAlphaNumericString(s
+ 1, 3)) {
297 _isPrivateuseVariantSubtag(const char* s
, int32_t len
) {
299 * variant = 1*8alphanum ; registered variants
300 * / (DIGIT 3alphanum)
303 len
= (int32_t)uprv_strlen(s
);
305 if (len
>= 1 && len
<= 8 && _isAlphaNumericString(s
, len
)) {
312 _isExtensionSingleton(const char* s
, int32_t len
) {
314 * extension = singleton 1*("-" (2*8alphanum))
317 len
= (int32_t)uprv_strlen(s
);
319 if (len
== 1 && ISALPHA(*s
) && (uprv_tolower(*s
) != PRIVATEUSE
)) {
326 _isExtensionSubtag(const char* s
, int32_t len
) {
328 * extension = singleton 1*("-" (2*8alphanum))
331 len
= (int32_t)uprv_strlen(s
);
333 if (len
>= 2 && len
<= 8 && _isAlphaNumericString(s
, len
)) {
340 _isExtensionSubtags(const char* s
, int32_t len
) {
342 const char *pSubtag
= NULL
;
345 len
= (int32_t)uprv_strlen(s
);
348 while ((p
- s
) < len
) {
350 if (pSubtag
== NULL
) {
353 if (!_isExtensionSubtag(pSubtag
, (int32_t)(p
- pSubtag
))) {
357 } else if (pSubtag
== NULL
) {
362 if (pSubtag
== NULL
) {
365 return _isExtensionSubtag(pSubtag
, (int32_t)(p
- pSubtag
));
369 _isPrivateuseValueSubtag(const char* s
, int32_t len
) {
371 * privateuse = "x" 1*("-" (1*8alphanum))
374 len
= (int32_t)uprv_strlen(s
);
376 if (len
>= 1 && len
<= 8 && _isAlphaNumericString(s
, len
)) {
383 _isPrivateuseValueSubtags(const char* s
, int32_t len
) {
385 const char *pSubtag
= NULL
;
388 len
= (int32_t)uprv_strlen(s
);
391 while ((p
- s
) < len
) {
393 if (pSubtag
== NULL
) {
396 if (!_isPrivateuseValueSubtag(pSubtag
, (int32_t)(p
- pSubtag
))) {
400 } else if (pSubtag
== NULL
) {
405 if (pSubtag
== NULL
) {
408 return _isPrivateuseValueSubtag(pSubtag
, (int32_t)(p
- pSubtag
));
412 ultag_isUnicodeLocaleKey(const char* s
, int32_t len
) {
414 len
= (int32_t)uprv_strlen(s
);
416 if (len
== 2 && _isAlphaNumericString(s
, len
)) {
423 ultag_isUnicodeLocaleType(const char*s
, int32_t len
) {
425 int32_t subtagLen
= 0;
428 len
= (int32_t)uprv_strlen(s
);
431 for (p
= s
; len
> 0; p
++, len
--) {
437 } else if (ISALPHA(*p
) || ISNUMERIC(*p
)) {
447 return (subtagLen
>= 3);
450 * -------------------------------------------------
454 * -------------------------------------------------
458 _addVariantToList(VariantListEntry
**first
, VariantListEntry
*var
) {
461 if (*first
== NULL
) {
465 VariantListEntry
*prev
, *cur
;
468 /* variants order should be preserved */
478 /* Checking for duplicate variant */
479 cmp
= uprv_compareInvCharsAsAscii(var
->variant
, cur
->variant
);
481 /* duplicated variant */
494 _addAttributeToList(AttributeListEntry
**first
, AttributeListEntry
*attr
) {
497 if (*first
== NULL
) {
501 AttributeListEntry
*prev
, *cur
;
504 /* reorder variants in alphabetical order */
513 cmp
= uprv_compareInvCharsAsAscii(attr
->attribute
, cur
->attribute
);
524 /* duplicated variant */
538 _addExtensionToList(ExtensionListEntry
**first
, ExtensionListEntry
*ext
, UBool localeToBCP
) {
541 if (*first
== NULL
) {
545 ExtensionListEntry
*prev
, *cur
;
548 /* reorder variants in alphabetical order */
558 /* special handling for locale to bcp conversion */
561 len
= (int32_t)uprv_strlen(ext
->key
);
562 curlen
= (int32_t)uprv_strlen(cur
->key
);
564 if (len
== 1 && curlen
== 1) {
565 if (*(ext
->key
) == *(cur
->key
)) {
567 } else if (*(ext
->key
) == PRIVATEUSE
) {
569 } else if (*(cur
->key
) == PRIVATEUSE
) {
572 cmp
= *(ext
->key
) - *(cur
->key
);
574 } else if (len
== 1) {
575 cmp
= *(ext
->key
) - LDMLEXT
;
576 } else if (curlen
== 1) {
577 cmp
= LDMLEXT
- *(cur
->key
);
579 cmp
= uprv_compareInvCharsAsAscii(ext
->key
, cur
->key
);
582 cmp
= uprv_compareInvCharsAsAscii(ext
->key
, cur
->key
);
594 /* duplicated extension key */
607 _initializeULanguageTag(ULanguageTag
* langtag
) {
612 langtag
->language
= EMPTY
;
613 for (i
= 0; i
< MAXEXTLANG
; i
++) {
614 langtag
->extlang
[i
] = NULL
;
617 langtag
->script
= EMPTY
;
618 langtag
->region
= EMPTY
;
620 langtag
->variants
= NULL
;
621 langtag
->extensions
= NULL
;
623 langtag
->grandfathered
= EMPTY
;
624 langtag
->privateuse
= EMPTY
;
628 _appendLanguageToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UErrorCode
* status
) {
629 char buf
[ULOC_LANG_CAPACITY
];
630 UErrorCode tmpStatus
= U_ZERO_ERROR
;
634 if (U_FAILURE(*status
)) {
638 len
= uloc_getLanguage(localeID
, buf
, sizeof(buf
), &tmpStatus
);
639 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
641 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
647 /* Note: returned language code is in lower case letters */
650 if (reslen
< capacity
) {
651 uprv_memcpy(appendAt
+ reslen
, LANG_UND
, uprv_min(LANG_UND_LEN
, capacity
- reslen
));
653 reslen
+= LANG_UND_LEN
;
654 } else if (!_isLanguageSubtag(buf
, len
)) {
655 /* invalid language code */
657 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
660 if (reslen
< capacity
) {
661 uprv_memcpy(appendAt
+ reslen
, LANG_UND
, uprv_min(LANG_UND_LEN
, capacity
- reslen
));
663 reslen
+= LANG_UND_LEN
;
665 /* resolve deprecated */
666 for (i
= 0; i
< UPRV_LENGTHOF(DEPRECATEDLANGS
); i
+= 2) {
667 if (uprv_compareInvCharsAsAscii(buf
, DEPRECATEDLANGS
[i
]) == 0) {
668 uprv_strcpy(buf
, DEPRECATEDLANGS
[i
+ 1]);
669 len
= (int32_t)uprv_strlen(buf
);
673 if (reslen
< capacity
) {
674 uprv_memcpy(appendAt
+ reslen
, buf
, uprv_min(len
, capacity
- reslen
));
678 u_terminateChars(appendAt
, capacity
, reslen
, status
);
683 _appendScriptToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UErrorCode
* status
) {
684 char buf
[ULOC_SCRIPT_CAPACITY
];
685 UErrorCode tmpStatus
= U_ZERO_ERROR
;
689 if (U_FAILURE(*status
)) {
693 len
= uloc_getScript(localeID
, buf
, sizeof(buf
), &tmpStatus
);
694 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
696 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
702 if (!_isScriptSubtag(buf
, len
)) {
703 /* invalid script code */
705 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
709 if (reslen
< capacity
) {
710 *(appendAt
+ reslen
) = SEP
;
714 if (reslen
< capacity
) {
715 uprv_memcpy(appendAt
+ reslen
, buf
, uprv_min(len
, capacity
- reslen
));
720 u_terminateChars(appendAt
, capacity
, reslen
, status
);
725 _appendRegionToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UErrorCode
* status
) {
726 char buf
[ULOC_COUNTRY_CAPACITY
];
727 UErrorCode tmpStatus
= U_ZERO_ERROR
;
731 if (U_FAILURE(*status
)) {
735 len
= uloc_getCountry(localeID
, buf
, sizeof(buf
), &tmpStatus
);
736 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
738 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
744 if (!_isRegionSubtag(buf
, len
)) {
745 /* invalid region code */
747 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
751 if (reslen
< capacity
) {
752 *(appendAt
+ reslen
) = SEP
;
756 if (reslen
< capacity
) {
757 uprv_memcpy(appendAt
+ reslen
, buf
, uprv_min(len
, capacity
- reslen
));
762 u_terminateChars(appendAt
, capacity
, reslen
, status
);
767 _appendVariantsToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UBool
*hadPosix
, UErrorCode
* status
) {
768 char buf
[ULOC_FULLNAME_CAPACITY
];
769 UErrorCode tmpStatus
= U_ZERO_ERROR
;
773 if (U_FAILURE(*status
)) {
777 len
= uloc_getVariant(localeID
, buf
, sizeof(buf
), &tmpStatus
);
778 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
780 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
788 VariantListEntry
*var
;
789 VariantListEntry
*varFirst
= NULL
;
794 if (*p
== SEP
|| *p
== LOCALE_SEP
|| *p
== 0) {
798 *p
= 0; /* terminate */
802 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
805 /* ignore empty variant */
807 /* ICU uses upper case letters for variants, but
808 the canonical format is lowercase in BCP47 */
809 for (i
= 0; *(pVar
+ i
) != 0; i
++) {
810 *(pVar
+ i
) = uprv_tolower(*(pVar
+ i
));
814 if (_isVariantSubtag(pVar
, -1)) {
815 if (uprv_strcmp(pVar
,POSIX_VALUE
) || len
!= uprv_strlen(POSIX_VALUE
)) {
816 /* emit the variant to the list */
817 var
= (VariantListEntry
*)uprv_malloc(sizeof(VariantListEntry
));
819 *status
= U_MEMORY_ALLOCATION_ERROR
;
823 if (!_addVariantToList(&varFirst
, var
)) {
824 /* duplicated variant */
827 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
832 /* Special handling for POSIX variant, need to remember that we had it and then */
833 /* treat it like an extension later. */
837 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
839 } else if (_isPrivateuseValueSubtag(pVar
, -1)) {
840 /* Handle private use subtags separately */
844 /* reset variant starting position */
846 } else if (pVar
== NULL
) {
852 if (U_SUCCESS(*status
)) {
853 if (varFirst
!= NULL
) {
856 /* write out validated/normalized variants to the target */
858 while (var
!= NULL
) {
859 if (reslen
< capacity
) {
860 *(appendAt
+ reslen
) = SEP
;
863 varLen
= (int32_t)uprv_strlen(var
->variant
);
864 if (reslen
< capacity
) {
865 uprv_memcpy(appendAt
+ reslen
, var
->variant
, uprv_min(varLen
, capacity
- reslen
));
875 while (var
!= NULL
) {
876 VariantListEntry
*tmpVar
= var
->next
;
881 if (U_FAILURE(*status
)) {
886 u_terminateChars(appendAt
, capacity
, reslen
, status
);
891 _appendKeywordsToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UBool hadPosix
, UErrorCode
* status
) {
892 char buf
[ULOC_KEYWORD_AND_VALUES_CAPACITY
];
893 char attrBuf
[ULOC_KEYWORD_AND_VALUES_CAPACITY
] = { 0 };
894 int32_t attrBufLength
= 0;
895 UBool isAttribute
= FALSE
;
896 UEnumeration
*keywordEnum
= NULL
;
899 keywordEnum
= uloc_openKeywords(localeID
, status
);
900 if (U_FAILURE(*status
) && !hadPosix
) {
901 uenum_close(keywordEnum
);
904 if (keywordEnum
!= NULL
|| hadPosix
) {
905 /* reorder extensions */
908 ExtensionListEntry
*firstExt
= NULL
;
909 ExtensionListEntry
*ext
;
910 AttributeListEntry
*firstAttr
= NULL
;
911 AttributeListEntry
*attr
;
913 char extBuf
[ULOC_KEYWORD_AND_VALUES_CAPACITY
];
914 char *pExtBuf
= extBuf
;
915 int32_t extBufCapacity
= sizeof(extBuf
);
916 const char *bcpKey
, *bcpValue
;
917 UErrorCode tmpStatus
= U_ZERO_ERROR
;
923 key
= uenum_next(keywordEnum
, NULL
, status
);
927 len
= uloc_getKeywordValue(localeID
, key
, buf
, sizeof(buf
), &tmpStatus
);
928 /* buf must be null-terminated */
929 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
931 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
934 /* ignore this keyword */
935 tmpStatus
= U_ZERO_ERROR
;
939 keylen
= (int32_t)uprv_strlen(key
);
940 isBcpUExt
= (keylen
> 1);
942 /* special keyword used for representing Unicode locale attributes */
943 if (uprv_strcmp(key
, LOCALE_ATTRIBUTE_KEY
) == 0) {
949 for (; i
< len
; i
++) {
951 attrBuf
[attrBufLength
++] = buf
[i
];
957 if (attrBufLength
> 0) {
958 attrBuf
[attrBufLength
] = 0;
960 } else if (i
>= len
){
964 /* create AttributeListEntry */
965 attr
= (AttributeListEntry
*)uprv_malloc(sizeof(AttributeListEntry
));
967 *status
= U_MEMORY_ALLOCATION_ERROR
;
970 attrValue
= (char*)uprv_malloc(attrBufLength
+ 1);
971 if (attrValue
== NULL
) {
972 *status
= U_MEMORY_ALLOCATION_ERROR
;
975 uprv_strcpy(attrValue
, attrBuf
);
976 attr
->attribute
= attrValue
;
978 if (!_addAttributeToList(&firstAttr
, attr
)) {
980 uprv_free(attrValue
);
982 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
988 } else if (isBcpUExt
) {
989 bcpKey
= uloc_toUnicodeLocaleKey(key
);
990 if (bcpKey
== NULL
) {
992 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
998 /* we've checked buf is null-terminated above */
999 bcpValue
= uloc_toUnicodeLocaleType(key
, buf
);
1000 if (bcpValue
== NULL
) {
1002 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1007 if (bcpValue
== buf
) {
1009 When uloc_toUnicodeLocaleType(key, buf) returns the
1010 input value as is, the value is well-formed, but has
1011 no known mapping. This implementation normalizes the
1012 the value to lower case
1014 int32_t bcpValueLen
= uprv_strlen(bcpValue
);
1015 if (bcpValueLen
< extBufCapacity
) {
1016 uprv_strcpy(pExtBuf
, bcpValue
);
1017 T_CString_toLowerCase(pExtBuf
);
1021 pExtBuf
+= (bcpValueLen
+ 1);
1022 extBufCapacity
-= (bcpValueLen
+ 1);
1025 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1032 if (*key
== PRIVATEUSE
) {
1033 if (!_isPrivateuseValueSubtags(buf
, len
)) {
1035 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1041 if (!_isExtensionSingleton(key
, keylen
) || !_isExtensionSubtags(buf
, len
)) {
1043 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1050 if ((len
+ 1) < extBufCapacity
) {
1051 uprv_memcpy(pExtBuf
, buf
, len
);
1059 extBufCapacity
-= (len
+ 1);
1061 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1067 /* create ExtensionListEntry */
1068 ext
= (ExtensionListEntry
*)uprv_malloc(sizeof(ExtensionListEntry
));
1070 *status
= U_MEMORY_ALLOCATION_ERROR
;
1074 ext
->value
= bcpValue
;
1076 if (!_addExtensionToList(&firstExt
, ext
, TRUE
)) {
1079 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1086 /* Special handling for POSIX variant - add the keywords for POSIX */
1088 /* create ExtensionListEntry for POSIX */
1089 ext
= (ExtensionListEntry
*)uprv_malloc(sizeof(ExtensionListEntry
));
1091 *status
= U_MEMORY_ALLOCATION_ERROR
;
1094 ext
->key
= POSIX_KEY
;
1095 ext
->value
= POSIX_VALUE
;
1097 if (!_addExtensionToList(&firstExt
, ext
, TRUE
)) {
1102 if (U_SUCCESS(*status
) && (firstExt
!= NULL
|| firstAttr
!= NULL
)) {
1103 UBool startLDMLExtension
= FALSE
;
1108 if (!startLDMLExtension
&& (ext
&& uprv_strlen(ext
->key
) > 1)) {
1109 /* write LDML singleton extension */
1110 if (reslen
< capacity
) {
1111 *(appendAt
+ reslen
) = SEP
;
1114 if (reslen
< capacity
) {
1115 *(appendAt
+ reslen
) = LDMLEXT
;
1119 startLDMLExtension
= TRUE
;
1122 /* write out the sorted BCP47 attributes, extensions and private use */
1123 if (ext
&& (uprv_strlen(ext
->key
) == 1 || attr
== NULL
)) {
1124 if (reslen
< capacity
) {
1125 *(appendAt
+ reslen
) = SEP
;
1128 len
= (int32_t)uprv_strlen(ext
->key
);
1129 if (reslen
< capacity
) {
1130 uprv_memcpy(appendAt
+ reslen
, ext
->key
, uprv_min(len
, capacity
- reslen
));
1133 if (reslen
< capacity
) {
1134 *(appendAt
+ reslen
) = SEP
;
1137 len
= (int32_t)uprv_strlen(ext
->value
);
1138 if (reslen
< capacity
) {
1139 uprv_memcpy(appendAt
+ reslen
, ext
->value
, uprv_min(len
, capacity
- reslen
));
1145 /* write the value for the attributes */
1146 if (reslen
< capacity
) {
1147 *(appendAt
+ reslen
) = SEP
;
1150 len
= (int32_t)uprv_strlen(attr
->attribute
);
1151 if (reslen
< capacity
) {
1152 uprv_memcpy(appendAt
+ reslen
, attr
->attribute
, uprv_min(len
, capacity
- reslen
));
1158 } while (attr
!= NULL
|| ext
!= NULL
);
1163 while (ext
!= NULL
) {
1164 ExtensionListEntry
*tmpExt
= ext
->next
;
1170 while (attr
!= NULL
) {
1171 AttributeListEntry
*tmpAttr
= attr
->next
;
1172 char *pValue
= (char *)attr
->attribute
;
1178 uenum_close(keywordEnum
);
1180 if (U_FAILURE(*status
)) {
1185 return u_terminateChars(appendAt
, capacity
, reslen
, status
);
1189 * Append keywords parsed from LDML extension value
1190 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
1191 * Note: char* buf is used for storing keywords
1194 _appendLDMLExtensionAsKeywords(const char* ldmlext
, ExtensionListEntry
** appendTo
, char* buf
, int32_t bufSize
, UBool
*posixVariant
, UErrorCode
*status
) {
1195 const char *pTag
; /* beginning of current subtag */
1196 const char *pKwds
; /* beginning of key-type pairs */
1197 UBool variantExists
= *posixVariant
;
1199 ExtensionListEntry
*kwdFirst
= NULL
; /* first LDML keyword */
1200 ExtensionListEntry
*kwd
, *nextKwd
;
1202 AttributeListEntry
*attrFirst
= NULL
; /* first attribute */
1203 AttributeListEntry
*attr
, *nextAttr
;
1208 char attrBuf
[ULOC_KEYWORD_AND_VALUES_CAPACITY
];
1209 int32_t attrBufIdx
= 0;
1211 /* Reset the posixVariant value */
1212 *posixVariant
= FALSE
;
1217 /* Iterate through u extension attributes */
1219 /* locate next separator char */
1220 for (len
= 0; *(pTag
+ len
) && *(pTag
+ len
) != SEP
; len
++);
1222 if (ultag_isUnicodeLocaleKey(pTag
, len
)) {
1227 /* add this attribute to the list */
1228 attr
= (AttributeListEntry
*)uprv_malloc(sizeof(AttributeListEntry
));
1230 *status
= U_MEMORY_ALLOCATION_ERROR
;
1234 if (len
< (int32_t)sizeof(attrBuf
) - attrBufIdx
) {
1235 uprv_memcpy(&attrBuf
[attrBufIdx
], pTag
, len
);
1236 attrBuf
[attrBufIdx
+ len
] = 0;
1237 attr
->attribute
= &attrBuf
[attrBufIdx
];
1238 attrBufIdx
+= (len
+ 1);
1240 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1244 if (!_addAttributeToList(&attrFirst
, attr
)) {
1245 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1253 /* next to the separator */
1259 /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */
1261 if (attrBufIdx
> bufSize
) {
1262 /* attrBufIdx == <total length of attribute subtag> + 1 */
1263 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1267 kwd
= (ExtensionListEntry
*)uprv_malloc(sizeof(ExtensionListEntry
));
1269 *status
= U_MEMORY_ALLOCATION_ERROR
;
1273 kwd
->key
= LOCALE_ATTRIBUTE_KEY
;
1276 /* attribute subtags sorted in alphabetical order as type */
1278 while (attr
!= NULL
) {
1279 nextAttr
= attr
->next
;
1281 /* buffer size check is done above */
1282 if (attr
!= attrFirst
) {
1283 *(buf
+ bufIdx
) = SEP
;
1287 len
= uprv_strlen(attr
->attribute
);
1288 uprv_memcpy(buf
+ bufIdx
, attr
->attribute
, len
);
1293 *(buf
+ bufIdx
) = 0;
1296 if (!_addExtensionToList(&kwdFirst
, kwd
, FALSE
)) {
1297 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1302 /* once keyword entry is created, delete the attribute list */
1304 while (attr
!= NULL
) {
1305 nextAttr
= attr
->next
;
1313 const char *pBcpKey
= NULL
; /* u extenstion key subtag */
1314 const char *pBcpType
= NULL
; /* beginning of u extension type subtag(s) */
1315 int32_t bcpKeyLen
= 0;
1316 int32_t bcpTypeLen
= 0;
1317 UBool isDone
= FALSE
;
1320 /* BCP47 representation of LDML key/type pairs */
1322 const char *pNextBcpKey
= NULL
;
1323 int32_t nextBcpKeyLen
= 0;
1324 UBool emitKeyword
= FALSE
;
1327 /* locate next separator char */
1328 for (len
= 0; *(pTag
+ len
) && *(pTag
+ len
) != SEP
; len
++);
1330 if (ultag_isUnicodeLocaleKey(pTag
, len
)) {
1334 nextBcpKeyLen
= len
;
1340 U_ASSERT(pBcpKey
!= NULL
);
1341 /* within LDML type subtags */
1343 bcpTypeLen
+= (len
+ 1);
1353 /* next to the separator */
1357 /* processing last one */
1363 const char *pKey
= NULL
; /* LDML key */
1364 const char *pType
= NULL
; /* LDML type */
1366 char bcpKeyBuf
[9]; /* BCP key length is always 2 for now */
1368 U_ASSERT(pBcpKey
!= NULL
);
1370 if (bcpKeyLen
>= sizeof(bcpKeyBuf
)) {
1371 /* the BCP key is invalid */
1372 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1376 uprv_strncpy(bcpKeyBuf
, pBcpKey
, bcpKeyLen
);
1377 bcpKeyBuf
[bcpKeyLen
] = 0;
1379 /* u extension key to LDML key */
1380 pKey
= uloc_toLegacyKey(bcpKeyBuf
);
1382 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1385 if (pKey
== bcpKeyBuf
) {
1387 The key returned by toLegacyKey points to the input buffer.
1388 We normalize the result key to lower case.
1390 T_CString_toLowerCase(bcpKeyBuf
);
1391 if (bufSize
- bufIdx
- 1 >= bcpKeyLen
) {
1392 uprv_memcpy(buf
+ bufIdx
, bcpKeyBuf
, bcpKeyLen
);
1393 pKey
= buf
+ bufIdx
;
1394 bufIdx
+= bcpKeyLen
;
1395 *(buf
+ bufIdx
) = 0;
1398 *status
= U_BUFFER_OVERFLOW_ERROR
;
1404 char bcpTypeBuf
[128]; /* practically long enough even considering multiple subtag type */
1405 if (bcpTypeLen
>= sizeof(bcpTypeBuf
)) {
1406 /* the BCP type is too long */
1407 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1411 uprv_strncpy(bcpTypeBuf
, pBcpType
, bcpTypeLen
);
1412 bcpTypeBuf
[bcpTypeLen
] = 0;
1414 /* BCP type to locale type */
1415 pType
= uloc_toLegacyType(pKey
, bcpTypeBuf
);
1416 if (pType
== NULL
) {
1417 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1420 if (pType
== bcpTypeBuf
) {
1422 The type returned by toLegacyType points to the input buffer.
1423 We normalize the result type to lower case.
1425 /* normalize to lower case */
1426 T_CString_toLowerCase(bcpTypeBuf
);
1427 if (bufSize
- bufIdx
- 1 >= bcpTypeLen
) {
1428 uprv_memcpy(buf
+ bufIdx
, bcpTypeBuf
, bcpTypeLen
);
1429 pType
= buf
+ bufIdx
;
1430 bufIdx
+= bcpTypeLen
;
1431 *(buf
+ bufIdx
) = 0;
1434 *status
= U_BUFFER_OVERFLOW_ERROR
;
1439 /* typeless - default type value is "yes" */
1440 pType
= LOCALE_TYPE_YES
;
1443 /* Special handling for u-va-posix, since we want to treat this as a variant,
1445 if (!variantExists
&& !uprv_strcmp(pKey
, POSIX_KEY
) && !uprv_strcmp(pType
, POSIX_VALUE
) ) {
1446 *posixVariant
= TRUE
;
1448 /* create an ExtensionListEntry for this keyword */
1449 kwd
= (ExtensionListEntry
*)uprv_malloc(sizeof(ExtensionListEntry
));
1451 *status
= U_MEMORY_ALLOCATION_ERROR
;
1458 if (!_addExtensionToList(&kwdFirst
, kwd
, FALSE
)) {
1459 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1465 pBcpKey
= pNextBcpKey
;
1466 bcpKeyLen
= pNextBcpKey
!= NULL
? nextBcpKeyLen
: 0;
1474 while (kwd
!= NULL
) {
1475 nextKwd
= kwd
->next
;
1476 _addExtensionToList(appendTo
, kwd
, FALSE
);
1484 while (attr
!= NULL
) {
1485 nextAttr
= attr
->next
;
1491 while (kwd
!= NULL
) {
1492 nextKwd
= kwd
->next
;
1500 _appendKeywords(ULanguageTag
* langtag
, char* appendAt
, int32_t capacity
, UErrorCode
* status
) {
1504 ExtensionListEntry
*kwdFirst
= NULL
;
1505 ExtensionListEntry
*kwd
;
1506 const char *key
, *type
;
1507 char *kwdBuf
= NULL
;
1508 int32_t kwdBufLength
= capacity
;
1509 UBool posixVariant
= FALSE
;
1511 if (U_FAILURE(*status
)) {
1515 kwdBuf
= (char*)uprv_malloc(kwdBufLength
);
1516 if (kwdBuf
== NULL
) {
1517 *status
= U_MEMORY_ALLOCATION_ERROR
;
1521 /* Determine if variants already exists */
1522 if (ultag_getVariantsSize(langtag
)) {
1523 posixVariant
= TRUE
;
1526 n
= ultag_getExtensionsSize(langtag
);
1528 /* resolve locale keywords and reordering keys */
1529 for (i
= 0; i
< n
; i
++) {
1530 key
= ultag_getExtensionKey(langtag
, i
);
1531 type
= ultag_getExtensionValue(langtag
, i
);
1532 if (*key
== LDMLEXT
) {
1533 _appendLDMLExtensionAsKeywords(type
, &kwdFirst
, kwdBuf
, kwdBufLength
, &posixVariant
, status
);
1534 if (U_FAILURE(*status
)) {
1538 kwd
= (ExtensionListEntry
*)uprv_malloc(sizeof(ExtensionListEntry
));
1540 *status
= U_MEMORY_ALLOCATION_ERROR
;
1545 if (!_addExtensionToList(&kwdFirst
, kwd
, FALSE
)) {
1547 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1553 if (U_SUCCESS(*status
)) {
1554 type
= ultag_getPrivateUse(langtag
);
1555 if ((int32_t)uprv_strlen(type
) > 0) {
1556 /* add private use as a keyword */
1557 kwd
= (ExtensionListEntry
*)uprv_malloc(sizeof(ExtensionListEntry
));
1559 *status
= U_MEMORY_ALLOCATION_ERROR
;
1561 kwd
->key
= PRIVATEUSE_KEY
;
1563 if (!_addExtensionToList(&kwdFirst
, kwd
, FALSE
)) {
1565 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1571 /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
1573 if (U_SUCCESS(*status
) && posixVariant
) {
1574 len
= (int32_t) uprv_strlen(_POSIX
);
1575 if (reslen
< capacity
) {
1576 uprv_memcpy(appendAt
+ reslen
, _POSIX
, uprv_min(len
, capacity
- reslen
));
1581 if (U_SUCCESS(*status
) && kwdFirst
!= NULL
) {
1582 /* write out the sorted keywords */
1583 UBool firstValue
= TRUE
;
1586 if (reslen
< capacity
) {
1589 *(appendAt
+ reslen
) = LOCALE_EXT_SEP
;
1593 *(appendAt
+ reslen
) = LOCALE_KEYWORD_SEP
;
1599 len
= (int32_t)uprv_strlen(kwd
->key
);
1600 if (reslen
< capacity
) {
1601 uprv_memcpy(appendAt
+ reslen
, kwd
->key
, uprv_min(len
, capacity
- reslen
));
1606 if (reslen
< capacity
) {
1607 *(appendAt
+ reslen
) = LOCALE_KEY_TYPE_SEP
;
1612 len
= (int32_t)uprv_strlen(kwd
->value
);
1613 if (reslen
< capacity
) {
1614 uprv_memcpy(appendAt
+ reslen
, kwd
->value
, uprv_min(len
, capacity
- reslen
));
1624 while (kwd
!= NULL
) {
1625 ExtensionListEntry
*tmpKwd
= kwd
->next
;
1632 if (U_FAILURE(*status
)) {
1636 return u_terminateChars(appendAt
, capacity
, reslen
, status
);
1640 _appendPrivateuseToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UBool hadPosix
, UErrorCode
* status
) {
1641 char buf
[ULOC_FULLNAME_CAPACITY
];
1642 char tmpAppend
[ULOC_FULLNAME_CAPACITY
];
1643 UErrorCode tmpStatus
= U_ZERO_ERROR
;
1647 if (U_FAILURE(*status
)) {
1651 len
= uloc_getVariant(localeID
, buf
, sizeof(buf
), &tmpStatus
);
1652 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
1654 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1662 UBool firstValue
= TRUE
;
1669 if (*p
== SEP
|| *p
== LOCALE_SEP
|| *p
== 0) {
1673 *p
= 0; /* terminate */
1675 if (pPriv
!= NULL
) {
1676 /* Private use in the canonical format is lowercase in BCP47 */
1677 for (i
= 0; *(pPriv
+ i
) != 0; i
++) {
1678 *(pPriv
+ i
) = uprv_tolower(*(pPriv
+ i
));
1682 if (_isPrivateuseValueSubtag(pPriv
, -1)) {
1684 if (!_isVariantSubtag(pPriv
, -1)) {
1690 } else if (strict
) {
1691 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1698 if (reslen
< capacity
) {
1699 tmpAppend
[reslen
++] = SEP
;
1703 if (reslen
< capacity
) {
1704 tmpAppend
[reslen
++] = *PRIVATEUSE_KEY
;
1707 if (reslen
< capacity
) {
1708 tmpAppend
[reslen
++] = SEP
;
1711 len
= (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX
);
1712 if (reslen
< capacity
) {
1713 uprv_memcpy(tmpAppend
+ reslen
, PRIVUSE_VARIANT_PREFIX
, uprv_min(len
, capacity
- reslen
));
1717 if (reslen
< capacity
) {
1718 tmpAppend
[reslen
++] = SEP
;
1724 len
= (int32_t)uprv_strlen(pPriv
);
1725 if (reslen
< capacity
) {
1726 uprv_memcpy(tmpAppend
+ reslen
, pPriv
, uprv_min(len
, capacity
- reslen
));
1731 /* reset private use starting position */
1733 } else if (pPriv
== NULL
) {
1739 if (U_FAILURE(*status
)) {
1744 if (U_SUCCESS(*status
)) {
1746 if (reslen
< capacity
) {
1747 uprv_memcpy(appendAt
, tmpAppend
, uprv_min(len
, capacity
- reslen
));
1751 u_terminateChars(appendAt
, capacity
, reslen
, status
);
1757 * -------------------------------------------------
1761 * -------------------------------------------------
1764 /* Bit flags used by the parser */
1774 static ULanguageTag
*
1775 ultag_parse(const char* tag
, int32_t tagLen
, int32_t* parsedLen
, UErrorCode
* status
) {
1779 char *pSubtag
, *pNext
, *pLastGoodPosition
;
1782 ExtensionListEntry
*pExtension
;
1783 char *pExtValueSubtag
, *pExtValueSubtagEnd
;
1785 UBool privateuseVar
= FALSE
;
1786 int32_t grandfatheredLen
= 0;
1788 if (parsedLen
!= NULL
) {
1792 if (U_FAILURE(*status
)) {
1797 tagLen
= (int32_t)uprv_strlen(tag
);
1800 /* copy the entire string */
1801 tagBuf
= (char*)uprv_malloc(tagLen
+ 1);
1802 if (tagBuf
== NULL
) {
1803 *status
= U_MEMORY_ALLOCATION_ERROR
;
1806 uprv_memcpy(tagBuf
, tag
, tagLen
);
1807 *(tagBuf
+ tagLen
) = 0;
1809 /* create a ULanguageTag */
1810 t
= (ULanguageTag
*)uprv_malloc(sizeof(ULanguageTag
));
1813 *status
= U_MEMORY_ALLOCATION_ERROR
;
1816 _initializeULanguageTag(t
);
1819 if (tagLen
< MINLEN
) {
1820 /* the input tag is too short - return empty ULanguageTag */
1824 /* check if the tag is grandfathered */
1825 for (i
= 0; GRANDFATHERED
[i
] != NULL
; i
+= 2) {
1826 if (uprv_stricmp(GRANDFATHERED
[i
], tagBuf
) == 0) {
1827 int32_t newTagLength
;
1829 grandfatheredLen
= tagLen
; /* back up for output parsedLen */
1830 newTagLength
= uprv_strlen(GRANDFATHERED
[i
+1]);
1831 if (tagLen
< newTagLength
) {
1833 tagBuf
= (char*)uprv_malloc(newTagLength
+ 1);
1834 if (tagBuf
== NULL
) {
1835 *status
= U_MEMORY_ALLOCATION_ERROR
;
1839 tagLen
= newTagLength
;
1841 uprv_strcpy(t
->buf
, GRANDFATHERED
[i
+ 1]);
1847 * langtag = language
1856 pNext
= pLastGoodPosition
= tagBuf
;
1859 pExtValueSubtag
= NULL
;
1860 pExtValueSubtagEnd
= NULL
;
1867 /* locate next separator char */
1881 subtagLen
= (int32_t)(pSep
- pSubtag
);
1884 if (_isLanguageSubtag(pSubtag
, subtagLen
)) {
1885 *pSep
= 0; /* terminate */
1886 t
->language
= T_CString_toLowerCase(pSubtag
);
1888 pLastGoodPosition
= pSep
;
1889 next
= EXTL
| SCRT
| REGN
| VART
| EXTS
| PRIV
;
1894 if (_isExtlangSubtag(pSubtag
, subtagLen
)) {
1896 t
->extlang
[extlangIdx
++] = T_CString_toLowerCase(pSubtag
);
1898 pLastGoodPosition
= pSep
;
1899 if (extlangIdx
< 3) {
1900 next
= EXTL
| SCRT
| REGN
| VART
| EXTS
| PRIV
;
1902 next
= SCRT
| REGN
| VART
| EXTS
| PRIV
;
1908 if (_isScriptSubtag(pSubtag
, subtagLen
)) {
1914 *p
= uprv_toupper(*p
);
1917 *p
= uprv_tolower(*p
);
1920 t
->script
= pSubtag
;
1922 pLastGoodPosition
= pSep
;
1923 next
= REGN
| VART
| EXTS
| PRIV
;
1928 if (_isRegionSubtag(pSubtag
, subtagLen
)) {
1930 t
->region
= T_CString_toUpperCase(pSubtag
);
1932 pLastGoodPosition
= pSep
;
1933 next
= VART
| EXTS
| PRIV
;
1938 if (_isVariantSubtag(pSubtag
, subtagLen
) ||
1939 (privateuseVar
&& _isPrivateuseVariantSubtag(pSubtag
, subtagLen
))) {
1940 VariantListEntry
*var
;
1943 var
= (VariantListEntry
*)uprv_malloc(sizeof(VariantListEntry
));
1945 *status
= U_MEMORY_ALLOCATION_ERROR
;
1949 var
->variant
= T_CString_toUpperCase(pSubtag
);
1950 isAdded
= _addVariantToList(&(t
->variants
), var
);
1952 /* duplicated variant entry */
1956 pLastGoodPosition
= pSep
;
1957 next
= VART
| EXTS
| PRIV
;
1962 if (_isExtensionSingleton(pSubtag
, subtagLen
)) {
1963 if (pExtension
!= NULL
) {
1964 if (pExtValueSubtag
== NULL
|| pExtValueSubtagEnd
== NULL
) {
1965 /* the previous extension is incomplete */
1966 uprv_free(pExtension
);
1971 /* terminate the previous extension value */
1972 *pExtValueSubtagEnd
= 0;
1973 pExtension
->value
= T_CString_toLowerCase(pExtValueSubtag
);
1975 /* insert the extension to the list */
1976 if (_addExtensionToList(&(t
->extensions
), pExtension
, FALSE
)) {
1977 pLastGoodPosition
= pExtValueSubtagEnd
;
1979 /* stop parsing here */
1980 uprv_free(pExtension
);
1986 /* create a new extension */
1987 pExtension
= (ExtensionListEntry
*)uprv_malloc(sizeof(ExtensionListEntry
));
1988 if (pExtension
== NULL
) {
1989 *status
= U_MEMORY_ALLOCATION_ERROR
;
1993 pExtension
->key
= T_CString_toLowerCase(pSubtag
);
1994 pExtension
->value
= NULL
; /* will be set later */
1997 * reset the start and the end location of extension value
1998 * subtags for this extension
2000 pExtValueSubtag
= NULL
;
2001 pExtValueSubtagEnd
= NULL
;
2008 if (_isExtensionSubtag(pSubtag
, subtagLen
)) {
2009 if (pExtValueSubtag
== NULL
) {
2010 /* if the start postion of this extension's value is not yet,
2011 this one is the first value subtag */
2012 pExtValueSubtag
= pSubtag
;
2015 /* Mark the end of this subtag */
2016 pExtValueSubtagEnd
= pSep
;
2017 next
= EXTS
| EXTV
| PRIV
;
2023 if (uprv_tolower(*pSubtag
) == PRIVATEUSE
) {
2026 if (pExtension
!= NULL
) {
2027 /* Process the last extension */
2028 if (pExtValueSubtag
== NULL
|| pExtValueSubtagEnd
== NULL
) {
2029 /* the previous extension is incomplete */
2030 uprv_free(pExtension
);
2034 /* terminate the previous extension value */
2035 *pExtValueSubtagEnd
= 0;
2036 pExtension
->value
= T_CString_toLowerCase(pExtValueSubtag
);
2038 /* insert the extension to the list */
2039 if (_addExtensionToList(&(t
->extensions
), pExtension
, FALSE
)) {
2040 pLastGoodPosition
= pExtValueSubtagEnd
;
2043 /* stop parsing here */
2044 uprv_free(pExtension
);
2051 /* The rest of part will be private use value subtags */
2052 if (pNext
== NULL
) {
2053 /* empty private use subtag */
2056 /* back up the private use value start position */
2057 pPrivuseVal
= pNext
;
2059 /* validate private use value subtags */
2075 subtagLen
= (int32_t)(pSep
- pSubtag
);
2077 if (uprv_strncmp(pSubtag
, PRIVUSE_VARIANT_PREFIX
, uprv_strlen(PRIVUSE_VARIANT_PREFIX
)) == 0) {
2080 privateuseVar
= TRUE
;
2082 } else if (_isPrivateuseValueSubtag(pSubtag
, subtagLen
)) {
2083 pLastGoodPosition
= pSep
;
2093 if (pLastGoodPosition
- pPrivuseVal
> 0) {
2094 *pLastGoodPosition
= 0;
2095 t
->privateuse
= T_CString_toLowerCase(pPrivuseVal
);
2097 /* No more subtags, exiting the parse loop */
2103 /* If we fell through here, it means this subtag is illegal - quit parsing */
2107 if (pExtension
!= NULL
) {
2108 /* Process the last extension */
2109 if (pExtValueSubtag
== NULL
|| pExtValueSubtagEnd
== NULL
) {
2110 /* the previous extension is incomplete */
2111 uprv_free(pExtension
);
2113 /* terminate the previous extension value */
2114 *pExtValueSubtagEnd
= 0;
2115 pExtension
->value
= T_CString_toLowerCase(pExtValueSubtag
);
2116 /* insert the extension to the list */
2117 if (_addExtensionToList(&(t
->extensions
), pExtension
, FALSE
)) {
2118 pLastGoodPosition
= pExtValueSubtagEnd
;
2120 uprv_free(pExtension
);
2125 if (parsedLen
!= NULL
) {
2126 *parsedLen
= (grandfatheredLen
> 0) ? grandfatheredLen
: (int32_t)(pLastGoodPosition
- t
->buf
);
2137 ultag_close(ULanguageTag
* langtag
) {
2139 if (langtag
== NULL
) {
2143 uprv_free(langtag
->buf
);
2145 if (langtag
->variants
) {
2146 VariantListEntry
*curVar
= langtag
->variants
;
2148 VariantListEntry
*nextVar
= curVar
->next
;
2154 if (langtag
->extensions
) {
2155 ExtensionListEntry
*curExt
= langtag
->extensions
;
2157 ExtensionListEntry
*nextExt
= curExt
->next
;
2167 ultag_getLanguage(const ULanguageTag
* langtag
) {
2168 return langtag
->language
;
2173 ultag_getJDKLanguage(const ULanguageTag
* langtag
) {
2175 for (i
= 0; DEPRECATEDLANGS
[i
] != NULL
; i
+= 2) {
2176 if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS
[i
], langtag
->language
) == 0) {
2177 return DEPRECATEDLANGS
[i
+ 1];
2180 return langtag
->language
;
2185 ultag_getExtlang(const ULanguageTag
* langtag
, int32_t idx
) {
2186 if (idx
>= 0 && idx
< MAXEXTLANG
) {
2187 return langtag
->extlang
[idx
];
2193 ultag_getExtlangSize(const ULanguageTag
* langtag
) {
2196 for (i
= 0; i
< MAXEXTLANG
; i
++) {
2197 if (langtag
->extlang
[i
]) {
2205 ultag_getScript(const ULanguageTag
* langtag
) {
2206 return langtag
->script
;
2210 ultag_getRegion(const ULanguageTag
* langtag
) {
2211 return langtag
->region
;
2215 ultag_getVariant(const ULanguageTag
* langtag
, int32_t idx
) {
2216 const char *var
= NULL
;
2217 VariantListEntry
*cur
= langtag
->variants
;
2231 ultag_getVariantsSize(const ULanguageTag
* langtag
) {
2233 VariantListEntry
*cur
= langtag
->variants
;
2245 ultag_getExtensionKey(const ULanguageTag
* langtag
, int32_t idx
) {
2246 const char *key
= NULL
;
2247 ExtensionListEntry
*cur
= langtag
->extensions
;
2261 ultag_getExtensionValue(const ULanguageTag
* langtag
, int32_t idx
) {
2262 const char *val
= NULL
;
2263 ExtensionListEntry
*cur
= langtag
->extensions
;
2277 ultag_getExtensionsSize(const ULanguageTag
* langtag
) {
2279 ExtensionListEntry
*cur
= langtag
->extensions
;
2291 ultag_getPrivateUse(const ULanguageTag
* langtag
) {
2292 return langtag
->privateuse
;
2297 ultag_getGrandfathered(const ULanguageTag
* langtag
) {
2298 return langtag
->grandfathered
;
2304 * -------------------------------------------------
2306 * Locale/BCP47 conversion APIs, exposed as uloc_*
2308 * -------------------------------------------------
2310 U_CAPI
int32_t U_EXPORT2
2311 uloc_toLanguageTag(const char* localeID
,
2313 int32_t langtagCapacity
,
2315 UErrorCode
* status
) {
2316 /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
2317 char canonical
[256];
2319 UErrorCode tmpStatus
= U_ZERO_ERROR
;
2320 UBool hadPosix
= FALSE
;
2321 const char* pKeywordStart
;
2323 /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
2325 if (uprv_strlen(localeID
) > 0) {
2326 uloc_canonicalize(localeID
, canonical
, sizeof(canonical
), &tmpStatus
);
2327 if (tmpStatus
!= U_ZERO_ERROR
) {
2328 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
2333 /* For handling special case - private use only tag */
2334 pKeywordStart
= locale_getKeywordsStart(canonical
);
2335 if (pKeywordStart
== canonical
) {
2336 UEnumeration
*kwdEnum
;
2340 kwdEnum
= uloc_openKeywords((const char*)canonical
, &tmpStatus
);
2341 if (kwdEnum
!= NULL
) {
2342 kwdCnt
= uenum_count(kwdEnum
, &tmpStatus
);
2347 key
= uenum_next(kwdEnum
, &len
, &tmpStatus
);
2348 if (len
== 1 && *key
== PRIVATEUSE
) {
2349 char buf
[ULOC_KEYWORD_AND_VALUES_CAPACITY
];
2350 buf
[0] = PRIVATEUSE
;
2352 len
= uloc_getKeywordValue(localeID
, key
, &buf
[2], sizeof(buf
) - 2, &tmpStatus
);
2353 if (U_SUCCESS(tmpStatus
)) {
2354 if (_isPrivateuseValueSubtags(&buf
[2], len
)) {
2355 /* return private use only tag */
2357 uprv_memcpy(langtag
, buf
, uprv_min(reslen
, langtagCapacity
));
2358 u_terminateChars(langtag
, langtagCapacity
, reslen
, status
);
2360 } else if (strict
) {
2361 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
2364 /* if not strict mode, then "und" will be returned */
2366 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
2371 uenum_close(kwdEnum
);
2378 reslen
+= _appendLanguageToLanguageTag(canonical
, langtag
, langtagCapacity
, strict
, status
);
2379 reslen
+= _appendScriptToLanguageTag(canonical
, langtag
+ reslen
, langtagCapacity
- reslen
, strict
, status
);
2380 reslen
+= _appendRegionToLanguageTag(canonical
, langtag
+ reslen
, langtagCapacity
- reslen
, strict
, status
);
2381 reslen
+= _appendVariantsToLanguageTag(canonical
, langtag
+ reslen
, langtagCapacity
- reslen
, strict
, &hadPosix
, status
);
2382 reslen
+= _appendKeywordsToLanguageTag(canonical
, langtag
+ reslen
, langtagCapacity
- reslen
, strict
, hadPosix
, status
);
2383 reslen
+= _appendPrivateuseToLanguageTag(canonical
, langtag
+ reslen
, langtagCapacity
- reslen
, strict
, hadPosix
, status
);
2389 U_CAPI
int32_t U_EXPORT2
2390 uloc_forLanguageTag(const char* langtag
,
2392 int32_t localeIDCapacity
,
2393 int32_t* parsedLength
,
2394 UErrorCode
* status
) {
2397 const char *subtag
, *p
;
2400 UBool noRegion
= TRUE
;
2402 lt
= ultag_parse(langtag
, -1, parsedLength
, status
);
2403 if (U_FAILURE(*status
)) {
2408 subtag
= ultag_getExtlangSize(lt
) > 0 ? ultag_getExtlang(lt
, 0) : ultag_getLanguage(lt
);
2409 if (uprv_compareInvCharsAsAscii(subtag
, LANG_UND
) != 0) {
2410 len
= (int32_t)uprv_strlen(subtag
);
2412 if (reslen
< localeIDCapacity
) {
2413 uprv_memcpy(localeID
, subtag
, uprv_min(len
, localeIDCapacity
- reslen
));
2420 subtag
= ultag_getScript(lt
);
2421 len
= (int32_t)uprv_strlen(subtag
);
2423 if (reslen
< localeIDCapacity
) {
2424 *(localeID
+ reslen
) = LOCALE_SEP
;
2428 /* write out the script in title case */
2431 if (reslen
< localeIDCapacity
) {
2433 *(localeID
+ reslen
) = uprv_toupper(*p
);
2435 *(localeID
+ reslen
) = *p
;
2444 subtag
= ultag_getRegion(lt
);
2445 len
= (int32_t)uprv_strlen(subtag
);
2447 if (reslen
< localeIDCapacity
) {
2448 *(localeID
+ reslen
) = LOCALE_SEP
;
2451 /* write out the retion in upper case */
2454 if (reslen
< localeIDCapacity
) {
2455 *(localeID
+ reslen
) = uprv_toupper(*p
);
2464 n
= ultag_getVariantsSize(lt
);
2467 if (reslen
< localeIDCapacity
) {
2468 *(localeID
+ reslen
) = LOCALE_SEP
;
2473 for (i
= 0; i
< n
; i
++) {
2474 subtag
= ultag_getVariant(lt
, i
);
2475 if (reslen
< localeIDCapacity
) {
2476 *(localeID
+ reslen
) = LOCALE_SEP
;
2479 /* write out the variant in upper case */
2482 if (reslen
< localeIDCapacity
) {
2483 *(localeID
+ reslen
) = uprv_toupper(*p
);
2492 n
= ultag_getExtensionsSize(lt
);
2493 subtag
= ultag_getPrivateUse(lt
);
2494 if (n
> 0 || uprv_strlen(subtag
) > 0) {
2495 if (reslen
== 0 && n
> 0) {
2496 /* need a language */
2497 if (reslen
< localeIDCapacity
) {
2498 uprv_memcpy(localeID
+ reslen
, LANG_UND
, uprv_min(LANG_UND_LEN
, localeIDCapacity
- reslen
));
2500 reslen
+= LANG_UND_LEN
;
2502 len
= _appendKeywords(lt
, localeID
+ reslen
, localeIDCapacity
- reslen
, status
);
2507 return u_terminateChars(localeID
, localeIDCapacity
, reslen
, status
);