1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 **********************************************************************
5 * Copyright (C) 2009-2015, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
10 #include "unicode/utypes.h"
11 #include "unicode/ures.h"
12 #include "unicode/putil.h"
13 #include "unicode/uloc.h"
23 /* struct holding a single variant */
24 typedef struct VariantListEntry
{
26 struct VariantListEntry
*next
;
29 /* struct holding a single attribute value */
30 typedef struct AttributeListEntry
{
31 const char *attribute
;
32 struct AttributeListEntry
*next
;
35 /* struct holding a single extension */
36 typedef struct ExtensionListEntry
{
39 struct ExtensionListEntry
*next
;
43 typedef struct ULanguageTag
{
44 char *buf
; /* holding parsed subtags */
46 const char *extlang
[MAXEXTLANG
];
49 VariantListEntry
*variants
;
50 ExtensionListEntry
*extensions
;
51 const char *privateuse
;
52 const char *grandfathered
;
57 #define PRIVATEUSE 'x'
60 #define LOCALE_SEP '_'
61 #define LOCALE_EXT_SEP '@'
62 #define LOCALE_KEYWORD_SEP ';'
63 #define LOCALE_KEY_TYPE_SEP '='
65 #define ISALPHA(c) uprv_isASCIILetter(c)
66 #define ISNUMERIC(c) ((c)>='0' && (c)<='9')
68 static const char EMPTY
[] = "";
69 static const char LANG_UND
[] = "und";
70 static const char PRIVATEUSE_KEY
[] = "x";
71 static const char _POSIX
[] = "_POSIX";
72 static const char POSIX_KEY
[] = "va";
73 static const char POSIX_VALUE
[] = "posix";
74 static const char LOCALE_ATTRIBUTE_KEY
[] = "attribute";
75 static const char PRIVUSE_VARIANT_PREFIX
[] = "lvariant";
76 static const char LOCALE_TYPE_YES
[] = "yes";
78 #define LANG_UND_LEN 3
80 static const char* const GRANDFATHERED
[] = {
81 /* grandfathered preferred */
83 "cel-gaulish", "xtg-x-cel-gaulish",
84 "en-GB-oed", "en-GB-x-oed",
87 "i-default", "en-x-i-default",
88 "i-enochian", "und-x-i-enochian",
92 "i-mingo", "see-x-i-mingo",
105 "zh-min", "nan-x-zh-min",
111 static const char DEPRECATEDLANGS
[][4] = {
119 * -------------------------------------------------
121 * These ultag_ functions may be exposed as APIs later
123 * -------------------------------------------------
127 ultag_parse(const char* tag
, int32_t tagLen
, int32_t* parsedLen
, UErrorCode
* status
);
130 ultag_close(ULanguageTag
* langtag
);
133 ultag_getLanguage(const ULanguageTag
* langtag
);
137 ultag_getJDKLanguage(const ULanguageTag
* langtag
);
141 ultag_getExtlang(const ULanguageTag
* langtag
, int32_t idx
);
144 ultag_getExtlangSize(const ULanguageTag
* langtag
);
147 ultag_getScript(const ULanguageTag
* langtag
);
150 ultag_getRegion(const ULanguageTag
* langtag
);
153 ultag_getVariant(const ULanguageTag
* langtag
, int32_t idx
);
156 ultag_getVariantsSize(const ULanguageTag
* langtag
);
159 ultag_getExtensionKey(const ULanguageTag
* langtag
, int32_t idx
);
162 ultag_getExtensionValue(const ULanguageTag
* langtag
, int32_t idx
);
165 ultag_getExtensionsSize(const ULanguageTag
* langtag
);
168 ultag_getPrivateUse(const ULanguageTag
* langtag
);
172 ultag_getGrandfathered(const ULanguageTag
* langtag
);
176 * -------------------------------------------------
178 * Language subtag syntax validation functions
180 * -------------------------------------------------
184 _isAlphaString(const char* s
, int32_t len
) {
186 for (i
= 0; i
< len
; i
++) {
187 if (!ISALPHA(*(s
+ i
))) {
195 _isNumericString(const char* s
, int32_t len
) {
197 for (i
= 0; i
< len
; i
++) {
198 if (!ISNUMERIC(*(s
+ i
))) {
206 _isAlphaNumericString(const char* s
, int32_t len
) {
208 for (i
= 0; i
< len
; i
++) {
209 if (!ISALPHA(*(s
+ i
)) && !ISNUMERIC(*(s
+ i
))) {
217 _isLanguageSubtag(const char* s
, int32_t len
) {
219 * language = 2*3ALPHA ; shortest ISO 639 code
220 * ["-" extlang] ; sometimes followed by
221 * ; extended language subtags
222 * / 4ALPHA ; or reserved for future use
223 * / 5*8ALPHA ; or registered language subtag
226 len
= (int32_t)uprv_strlen(s
);
228 if (len
>= 2 && len
<= 8 && _isAlphaString(s
, len
)) {
235 _isExtlangSubtag(const char* s
, int32_t len
) {
237 * extlang = 3ALPHA ; selected ISO 639 codes
238 * *2("-" 3ALPHA) ; permanently reserved
241 len
= (int32_t)uprv_strlen(s
);
243 if (len
== 3 && _isAlphaString(s
, len
)) {
250 _isScriptSubtag(const char* s
, int32_t len
) {
252 * script = 4ALPHA ; ISO 15924 code
255 len
= (int32_t)uprv_strlen(s
);
257 if (len
== 4 && _isAlphaString(s
, len
)) {
264 _isRegionSubtag(const char* s
, int32_t len
) {
266 * region = 2ALPHA ; ISO 3166-1 code
267 * / 3DIGIT ; UN M.49 code
270 len
= (int32_t)uprv_strlen(s
);
272 if (len
== 2 && _isAlphaString(s
, len
)) {
275 if (len
== 3 && _isNumericString(s
, len
)) {
282 _isVariantSubtag(const char* s
, int32_t len
) {
284 * variant = 5*8alphanum ; registered variants
285 * / (DIGIT 3alphanum)
288 len
= (int32_t)uprv_strlen(s
);
290 if (len
>= 5 && len
<= 8 && _isAlphaNumericString(s
, len
)) {
293 if (len
== 4 && ISNUMERIC(*s
) && _isAlphaNumericString(s
+ 1, 3)) {
300 _isPrivateuseVariantSubtag(const char* s
, int32_t len
) {
302 * variant = 1*8alphanum ; registered variants
303 * / (DIGIT 3alphanum)
306 len
= (int32_t)uprv_strlen(s
);
308 if (len
>= 1 && len
<= 8 && _isAlphaNumericString(s
, len
)) {
315 _isExtensionSingleton(const char* s
, int32_t len
) {
317 * extension = singleton 1*("-" (2*8alphanum))
320 len
= (int32_t)uprv_strlen(s
);
322 if (len
== 1 && ISALPHA(*s
) && (uprv_tolower(*s
) != PRIVATEUSE
)) {
329 _isExtensionSubtag(const char* s
, int32_t len
) {
331 * extension = singleton 1*("-" (2*8alphanum))
334 len
= (int32_t)uprv_strlen(s
);
336 if (len
>= 2 && len
<= 8 && _isAlphaNumericString(s
, len
)) {
343 _isExtensionSubtags(const char* s
, int32_t len
) {
345 const char *pSubtag
= NULL
;
348 len
= (int32_t)uprv_strlen(s
);
351 while ((p
- s
) < len
) {
353 if (pSubtag
== NULL
) {
356 if (!_isExtensionSubtag(pSubtag
, (int32_t)(p
- pSubtag
))) {
360 } else if (pSubtag
== NULL
) {
365 if (pSubtag
== NULL
) {
368 return _isExtensionSubtag(pSubtag
, (int32_t)(p
- pSubtag
));
372 _isPrivateuseValueSubtag(const char* s
, int32_t len
) {
374 * privateuse = "x" 1*("-" (1*8alphanum))
377 len
= (int32_t)uprv_strlen(s
);
379 if (len
>= 1 && len
<= 8 && _isAlphaNumericString(s
, len
)) {
386 _isPrivateuseValueSubtags(const char* s
, int32_t len
) {
388 const char *pSubtag
= NULL
;
391 len
= (int32_t)uprv_strlen(s
);
394 while ((p
- s
) < len
) {
396 if (pSubtag
== NULL
) {
399 if (!_isPrivateuseValueSubtag(pSubtag
, (int32_t)(p
- pSubtag
))) {
403 } else if (pSubtag
== NULL
) {
408 if (pSubtag
== NULL
) {
411 return _isPrivateuseValueSubtag(pSubtag
, (int32_t)(p
- pSubtag
));
415 ultag_isUnicodeLocaleKey(const char* s
, int32_t len
) {
417 len
= (int32_t)uprv_strlen(s
);
419 if (len
== 2 && _isAlphaNumericString(s
, len
)) {
426 ultag_isUnicodeLocaleType(const char*s
, int32_t len
) {
428 int32_t subtagLen
= 0;
431 len
= (int32_t)uprv_strlen(s
);
434 for (p
= s
; len
> 0; p
++, len
--) {
440 } else if (ISALPHA(*p
) || ISNUMERIC(*p
)) {
450 return (subtagLen
>= 3);
453 * -------------------------------------------------
457 * -------------------------------------------------
461 _addVariantToList(VariantListEntry
**first
, VariantListEntry
*var
) {
464 if (*first
== NULL
) {
468 VariantListEntry
*prev
, *cur
;
471 /* variants order should be preserved */
481 /* Checking for duplicate variant */
482 cmp
= uprv_compareInvCharsAsAscii(var
->variant
, cur
->variant
);
484 /* duplicated variant */
497 _addAttributeToList(AttributeListEntry
**first
, AttributeListEntry
*attr
) {
500 if (*first
== NULL
) {
504 AttributeListEntry
*prev
, *cur
;
507 /* reorder variants in alphabetical order */
516 cmp
= uprv_compareInvCharsAsAscii(attr
->attribute
, cur
->attribute
);
527 /* duplicated variant */
541 _addExtensionToList(ExtensionListEntry
**first
, ExtensionListEntry
*ext
, UBool localeToBCP
) {
544 if (*first
== NULL
) {
548 ExtensionListEntry
*prev
, *cur
;
551 /* reorder variants in alphabetical order */
561 /* special handling for locale to bcp conversion */
564 len
= (int32_t)uprv_strlen(ext
->key
);
565 curlen
= (int32_t)uprv_strlen(cur
->key
);
567 if (len
== 1 && curlen
== 1) {
568 if (*(ext
->key
) == *(cur
->key
)) {
570 } else if (*(ext
->key
) == PRIVATEUSE
) {
572 } else if (*(cur
->key
) == PRIVATEUSE
) {
575 cmp
= *(ext
->key
) - *(cur
->key
);
577 } else if (len
== 1) {
578 cmp
= *(ext
->key
) - LDMLEXT
;
579 } else if (curlen
== 1) {
580 cmp
= LDMLEXT
- *(cur
->key
);
582 cmp
= uprv_compareInvCharsAsAscii(ext
->key
, cur
->key
);
583 /* Both are u extension keys - we need special handling for 'attribute' */
585 if (uprv_strcmp(cur
->key
, LOCALE_ATTRIBUTE_KEY
) == 0) {
587 } else if (uprv_strcmp(ext
->key
, LOCALE_ATTRIBUTE_KEY
) == 0) {
593 cmp
= uprv_compareInvCharsAsAscii(ext
->key
, cur
->key
);
605 /* duplicated extension key */
618 _initializeULanguageTag(ULanguageTag
* langtag
) {
623 langtag
->language
= EMPTY
;
624 for (i
= 0; i
< MAXEXTLANG
; i
++) {
625 langtag
->extlang
[i
] = NULL
;
628 langtag
->script
= EMPTY
;
629 langtag
->region
= EMPTY
;
631 langtag
->variants
= NULL
;
632 langtag
->extensions
= NULL
;
634 langtag
->grandfathered
= EMPTY
;
635 langtag
->privateuse
= EMPTY
;
639 _appendLanguageToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UErrorCode
* status
) {
640 char buf
[ULOC_LANG_CAPACITY
];
641 UErrorCode tmpStatus
= U_ZERO_ERROR
;
645 if (U_FAILURE(*status
)) {
649 len
= uloc_getLanguage(localeID
, buf
, sizeof(buf
), &tmpStatus
);
650 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
652 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
658 /* Note: returned language code is in lower case letters */
661 if (reslen
< capacity
) {
662 uprv_memcpy(appendAt
+ reslen
, LANG_UND
, uprv_min(LANG_UND_LEN
, capacity
- reslen
));
664 reslen
+= LANG_UND_LEN
;
665 } else if (!_isLanguageSubtag(buf
, len
)) {
666 /* invalid language code */
668 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
671 if (reslen
< capacity
) {
672 uprv_memcpy(appendAt
+ reslen
, LANG_UND
, uprv_min(LANG_UND_LEN
, capacity
- reslen
));
674 reslen
+= LANG_UND_LEN
;
676 /* resolve deprecated */
677 for (i
= 0; i
< UPRV_LENGTHOF(DEPRECATEDLANGS
); i
+= 2) {
678 if (uprv_compareInvCharsAsAscii(buf
, DEPRECATEDLANGS
[i
]) == 0) {
679 uprv_strcpy(buf
, DEPRECATEDLANGS
[i
+ 1]);
680 len
= (int32_t)uprv_strlen(buf
);
684 if (reslen
< capacity
) {
685 uprv_memcpy(appendAt
+ reslen
, buf
, uprv_min(len
, capacity
- reslen
));
689 u_terminateChars(appendAt
, capacity
, reslen
, status
);
694 _appendScriptToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UErrorCode
* status
) {
695 char buf
[ULOC_SCRIPT_CAPACITY
];
696 UErrorCode tmpStatus
= U_ZERO_ERROR
;
700 if (U_FAILURE(*status
)) {
704 len
= uloc_getScript(localeID
, buf
, sizeof(buf
), &tmpStatus
);
705 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
707 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
713 if (!_isScriptSubtag(buf
, len
)) {
714 /* invalid script code */
716 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
720 if (reslen
< capacity
) {
721 *(appendAt
+ reslen
) = SEP
;
725 if (reslen
< capacity
) {
726 uprv_memcpy(appendAt
+ reslen
, buf
, uprv_min(len
, capacity
- reslen
));
731 u_terminateChars(appendAt
, capacity
, reslen
, status
);
736 _appendRegionToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UErrorCode
* status
) {
737 char buf
[ULOC_COUNTRY_CAPACITY
];
738 UErrorCode tmpStatus
= U_ZERO_ERROR
;
742 if (U_FAILURE(*status
)) {
746 len
= uloc_getCountry(localeID
, buf
, sizeof(buf
), &tmpStatus
);
747 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
749 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
755 if (!_isRegionSubtag(buf
, len
)) {
756 /* invalid region code */
758 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
762 if (reslen
< capacity
) {
763 *(appendAt
+ reslen
) = SEP
;
767 if (reslen
< capacity
) {
768 uprv_memcpy(appendAt
+ reslen
, buf
, uprv_min(len
, capacity
- reslen
));
773 u_terminateChars(appendAt
, capacity
, reslen
, status
);
778 _appendVariantsToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UBool
*hadPosix
, UErrorCode
* status
) {
779 char buf
[ULOC_FULLNAME_CAPACITY
];
780 UErrorCode tmpStatus
= U_ZERO_ERROR
;
784 if (U_FAILURE(*status
)) {
788 len
= uloc_getVariant(localeID
, buf
, sizeof(buf
), &tmpStatus
);
789 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
791 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
799 VariantListEntry
*var
;
800 VariantListEntry
*varFirst
= NULL
;
805 if (*p
== SEP
|| *p
== LOCALE_SEP
|| *p
== 0) {
809 *p
= 0; /* terminate */
813 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
816 /* ignore empty variant */
818 /* ICU uses upper case letters for variants, but
819 the canonical format is lowercase in BCP47 */
820 for (i
= 0; *(pVar
+ i
) != 0; i
++) {
821 *(pVar
+ i
) = uprv_tolower(*(pVar
+ i
));
825 if (_isVariantSubtag(pVar
, -1)) {
826 if (uprv_strcmp(pVar
,POSIX_VALUE
) || len
!= (int32_t)uprv_strlen(POSIX_VALUE
)) {
827 /* emit the variant to the list */
828 var
= (VariantListEntry
*)uprv_malloc(sizeof(VariantListEntry
));
830 *status
= U_MEMORY_ALLOCATION_ERROR
;
834 if (!_addVariantToList(&varFirst
, var
)) {
835 /* duplicated variant */
838 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
843 /* Special handling for POSIX variant, need to remember that we had it and then */
844 /* treat it like an extension later. */
848 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
850 } else if (_isPrivateuseValueSubtag(pVar
, -1)) {
851 /* Handle private use subtags separately */
855 /* reset variant starting position */
857 } else if (pVar
== NULL
) {
863 if (U_SUCCESS(*status
)) {
864 if (varFirst
!= NULL
) {
867 /* write out validated/normalized variants to the target */
869 while (var
!= NULL
) {
870 if (reslen
< capacity
) {
871 *(appendAt
+ reslen
) = SEP
;
874 varLen
= (int32_t)uprv_strlen(var
->variant
);
875 if (reslen
< capacity
) {
876 uprv_memcpy(appendAt
+ reslen
, var
->variant
, uprv_min(varLen
, capacity
- reslen
));
886 while (var
!= NULL
) {
887 VariantListEntry
*tmpVar
= var
->next
;
892 if (U_FAILURE(*status
)) {
897 u_terminateChars(appendAt
, capacity
, reslen
, status
);
902 _appendKeywordsToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UBool hadPosix
, UErrorCode
* status
) {
903 char buf
[ULOC_KEYWORD_AND_VALUES_CAPACITY
];
904 char attrBuf
[ULOC_KEYWORD_AND_VALUES_CAPACITY
] = { 0 };
905 int32_t attrBufLength
= 0;
906 UEnumeration
*keywordEnum
= NULL
;
909 keywordEnum
= uloc_openKeywords(localeID
, status
);
910 if (U_FAILURE(*status
) && !hadPosix
) {
911 uenum_close(keywordEnum
);
914 if (keywordEnum
!= NULL
|| hadPosix
) {
915 /* reorder extensions */
918 ExtensionListEntry
*firstExt
= NULL
;
919 ExtensionListEntry
*ext
;
920 AttributeListEntry
*firstAttr
= NULL
;
921 AttributeListEntry
*attr
;
923 char extBuf
[ULOC_KEYWORD_AND_VALUES_CAPACITY
];
924 char *pExtBuf
= extBuf
;
925 int32_t extBufCapacity
= sizeof(extBuf
);
926 const char *bcpKey
=nullptr, *bcpValue
=nullptr;
927 UErrorCode tmpStatus
= U_ZERO_ERROR
;
932 key
= uenum_next(keywordEnum
, NULL
, status
);
936 len
= uloc_getKeywordValue(localeID
, key
, buf
, sizeof(buf
), &tmpStatus
);
937 /* buf must be null-terminated */
938 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
940 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
943 /* ignore this keyword */
944 tmpStatus
= U_ZERO_ERROR
;
948 keylen
= (int32_t)uprv_strlen(key
);
949 isBcpUExt
= (keylen
> 1);
951 /* special keyword used for representing Unicode locale attributes */
952 if (uprv_strcmp(key
, LOCALE_ATTRIBUTE_KEY
) == 0) {
957 for (; i
< len
; i
++) {
959 attrBuf
[attrBufLength
++] = buf
[i
];
965 if (attrBufLength
> 0) {
966 attrBuf
[attrBufLength
] = 0;
968 } else if (i
>= len
){
972 /* create AttributeListEntry */
973 attr
= (AttributeListEntry
*)uprv_malloc(sizeof(AttributeListEntry
));
975 *status
= U_MEMORY_ALLOCATION_ERROR
;
978 attrValue
= (char*)uprv_malloc(attrBufLength
+ 1);
979 if (attrValue
== NULL
) {
980 *status
= U_MEMORY_ALLOCATION_ERROR
;
983 uprv_strcpy(attrValue
, attrBuf
);
984 attr
->attribute
= attrValue
;
986 if (!_addAttributeToList(&firstAttr
, attr
)) {
988 uprv_free(attrValue
);
990 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
995 /* for a place holder ExtensionListEntry */
996 bcpKey
= LOCALE_ATTRIBUTE_KEY
;
999 } else if (isBcpUExt
) {
1000 bcpKey
= uloc_toUnicodeLocaleKey(key
);
1001 if (bcpKey
== NULL
) {
1003 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1009 /* we've checked buf is null-terminated above */
1010 bcpValue
= uloc_toUnicodeLocaleType(key
, buf
);
1011 if (bcpValue
== NULL
) {
1013 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1018 if (bcpValue
== buf
) {
1020 When uloc_toUnicodeLocaleType(key, buf) returns the
1021 input value as is, the value is well-formed, but has
1022 no known mapping. This implementation normalizes the
1023 the value to lower case
1025 int32_t bcpValueLen
= static_cast<int32_t>(uprv_strlen(bcpValue
));
1026 if (bcpValueLen
< extBufCapacity
) {
1027 uprv_strcpy(pExtBuf
, bcpValue
);
1028 T_CString_toLowerCase(pExtBuf
);
1032 pExtBuf
+= (bcpValueLen
+ 1);
1033 extBufCapacity
-= (bcpValueLen
+ 1);
1036 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1043 if (*key
== PRIVATEUSE
) {
1044 if (!_isPrivateuseValueSubtags(buf
, len
)) {
1046 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1052 if (!_isExtensionSingleton(key
, keylen
) || !_isExtensionSubtags(buf
, len
)) {
1054 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1061 if ((len
+ 1) < extBufCapacity
) {
1062 uprv_memcpy(pExtBuf
, buf
, len
);
1070 extBufCapacity
-= (len
+ 1);
1072 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1077 /* create ExtensionListEntry */
1078 ext
= (ExtensionListEntry
*)uprv_malloc(sizeof(ExtensionListEntry
));
1080 *status
= U_MEMORY_ALLOCATION_ERROR
;
1084 ext
->value
= bcpValue
;
1086 if (!_addExtensionToList(&firstExt
, ext
, TRUE
)) {
1089 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1095 /* Special handling for POSIX variant - add the keywords for POSIX */
1097 /* create ExtensionListEntry for POSIX */
1098 ext
= (ExtensionListEntry
*)uprv_malloc(sizeof(ExtensionListEntry
));
1100 *status
= U_MEMORY_ALLOCATION_ERROR
;
1103 ext
->key
= POSIX_KEY
;
1104 ext
->value
= POSIX_VALUE
;
1106 if (!_addExtensionToList(&firstExt
, ext
, TRUE
)) {
1111 if (U_SUCCESS(*status
) && (firstExt
!= NULL
|| firstAttr
!= NULL
)) {
1112 UBool startLDMLExtension
= FALSE
;
1113 for (ext
= firstExt
; ext
; ext
= ext
->next
) {
1114 if (!startLDMLExtension
&& uprv_strlen(ext
->key
) > 1) {
1115 /* first LDML u singlton extension */
1116 if (reslen
< capacity
) {
1117 *(appendAt
+ reslen
) = SEP
;
1120 if (reslen
< capacity
) {
1121 *(appendAt
+ reslen
) = LDMLEXT
;
1125 startLDMLExtension
= TRUE
;
1128 /* write out the sorted BCP47 attributes, extensions and private use */
1129 if (uprv_strcmp(ext
->key
, LOCALE_ATTRIBUTE_KEY
) == 0) {
1130 /* write the value for the attributes */
1131 for (attr
= firstAttr
; attr
; attr
= attr
->next
) {
1132 if (reslen
< capacity
) {
1133 *(appendAt
+ reslen
) = SEP
;
1136 len
= (int32_t)uprv_strlen(attr
->attribute
);
1137 if (reslen
< capacity
) {
1138 uprv_memcpy(appendAt
+ reslen
, attr
->attribute
, uprv_min(len
, capacity
- reslen
));
1143 if (reslen
< capacity
) {
1144 *(appendAt
+ reslen
) = SEP
;
1147 len
= (int32_t)uprv_strlen(ext
->key
);
1148 if (reslen
< capacity
) {
1149 uprv_memcpy(appendAt
+ reslen
, ext
->key
, uprv_min(len
, capacity
- reslen
));
1152 if (reslen
< capacity
) {
1153 *(appendAt
+ reslen
) = SEP
;
1156 len
= (int32_t)uprv_strlen(ext
->value
);
1157 if (reslen
< capacity
) {
1158 uprv_memcpy(appendAt
+ reslen
, ext
->value
, uprv_min(len
, capacity
- reslen
));
1167 while (ext
!= NULL
) {
1168 ExtensionListEntry
*tmpExt
= ext
->next
;
1174 while (attr
!= NULL
) {
1175 AttributeListEntry
*tmpAttr
= attr
->next
;
1176 char *pValue
= (char *)attr
->attribute
;
1182 uenum_close(keywordEnum
);
1184 if (U_FAILURE(*status
)) {
1189 return u_terminateChars(appendAt
, capacity
, reslen
, status
);
1193 * Append keywords parsed from LDML extension value
1194 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
1195 * Note: char* buf is used for storing keywords
1198 _appendLDMLExtensionAsKeywords(const char* ldmlext
, ExtensionListEntry
** appendTo
, char* buf
, int32_t bufSize
, UBool
*posixVariant
, UErrorCode
*status
) {
1199 const char *pTag
; /* beginning of current subtag */
1200 const char *pKwds
; /* beginning of key-type pairs */
1201 UBool variantExists
= *posixVariant
;
1203 ExtensionListEntry
*kwdFirst
= NULL
; /* first LDML keyword */
1204 ExtensionListEntry
*kwd
, *nextKwd
;
1206 AttributeListEntry
*attrFirst
= NULL
; /* first attribute */
1207 AttributeListEntry
*attr
, *nextAttr
;
1212 char attrBuf
[ULOC_KEYWORD_AND_VALUES_CAPACITY
];
1213 int32_t attrBufIdx
= 0;
1215 /* Reset the posixVariant value */
1216 *posixVariant
= FALSE
;
1221 /* Iterate through u extension attributes */
1223 /* locate next separator char */
1224 for (len
= 0; *(pTag
+ len
) && *(pTag
+ len
) != SEP
; len
++);
1226 if (ultag_isUnicodeLocaleKey(pTag
, len
)) {
1231 /* add this attribute to the list */
1232 attr
= (AttributeListEntry
*)uprv_malloc(sizeof(AttributeListEntry
));
1234 *status
= U_MEMORY_ALLOCATION_ERROR
;
1238 if (len
< (int32_t)sizeof(attrBuf
) - attrBufIdx
) {
1239 uprv_memcpy(&attrBuf
[attrBufIdx
], pTag
, len
);
1240 attrBuf
[attrBufIdx
+ len
] = 0;
1241 attr
->attribute
= &attrBuf
[attrBufIdx
];
1242 attrBufIdx
+= (len
+ 1);
1244 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1248 if (!_addAttributeToList(&attrFirst
, attr
)) {
1249 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1257 /* next to the separator */
1263 /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */
1265 if (attrBufIdx
> bufSize
) {
1266 /* attrBufIdx == <total length of attribute subtag> + 1 */
1267 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1271 kwd
= (ExtensionListEntry
*)uprv_malloc(sizeof(ExtensionListEntry
));
1273 *status
= U_MEMORY_ALLOCATION_ERROR
;
1277 kwd
->key
= LOCALE_ATTRIBUTE_KEY
;
1280 /* attribute subtags sorted in alphabetical order as type */
1282 while (attr
!= NULL
) {
1283 nextAttr
= attr
->next
;
1285 /* buffer size check is done above */
1286 if (attr
!= attrFirst
) {
1287 *(buf
+ bufIdx
) = SEP
;
1291 len
= static_cast<int32_t>(uprv_strlen(attr
->attribute
));
1292 uprv_memcpy(buf
+ bufIdx
, attr
->attribute
, len
);
1297 *(buf
+ bufIdx
) = 0;
1300 if (!_addExtensionToList(&kwdFirst
, kwd
, FALSE
)) {
1301 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1306 /* once keyword entry is created, delete the attribute list */
1308 while (attr
!= NULL
) {
1309 nextAttr
= attr
->next
;
1317 const char *pBcpKey
= NULL
; /* u extenstion key subtag */
1318 const char *pBcpType
= NULL
; /* beginning of u extension type subtag(s) */
1319 int32_t bcpKeyLen
= 0;
1320 int32_t bcpTypeLen
= 0;
1321 UBool isDone
= FALSE
;
1324 /* BCP47 representation of LDML key/type pairs */
1326 const char *pNextBcpKey
= NULL
;
1327 int32_t nextBcpKeyLen
= 0;
1328 UBool emitKeyword
= FALSE
;
1331 /* locate next separator char */
1332 for (len
= 0; *(pTag
+ len
) && *(pTag
+ len
) != SEP
; len
++);
1334 if (ultag_isUnicodeLocaleKey(pTag
, len
)) {
1338 nextBcpKeyLen
= len
;
1344 U_ASSERT(pBcpKey
!= NULL
);
1345 /* within LDML type subtags */
1347 bcpTypeLen
+= (len
+ 1);
1357 /* next to the separator */
1361 /* processing last one */
1367 const char *pKey
= NULL
; /* LDML key */
1368 const char *pType
= NULL
; /* LDML type */
1370 char bcpKeyBuf
[9]; /* BCP key length is always 2 for now */
1372 U_ASSERT(pBcpKey
!= NULL
);
1374 if (bcpKeyLen
>= (int32_t)sizeof(bcpKeyBuf
)) {
1375 /* the BCP key is invalid */
1376 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1380 uprv_strncpy(bcpKeyBuf
, pBcpKey
, bcpKeyLen
);
1381 bcpKeyBuf
[bcpKeyLen
] = 0;
1383 /* u extension key to LDML key */
1384 pKey
= uloc_toLegacyKey(bcpKeyBuf
);
1386 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1389 if (pKey
== bcpKeyBuf
) {
1391 The key returned by toLegacyKey points to the input buffer.
1392 We normalize the result key to lower case.
1394 T_CString_toLowerCase(bcpKeyBuf
);
1395 if (bufSize
- bufIdx
- 1 >= bcpKeyLen
) {
1396 uprv_memcpy(buf
+ bufIdx
, bcpKeyBuf
, bcpKeyLen
);
1397 pKey
= buf
+ bufIdx
;
1398 bufIdx
+= bcpKeyLen
;
1399 *(buf
+ bufIdx
) = 0;
1402 *status
= U_BUFFER_OVERFLOW_ERROR
;
1408 char bcpTypeBuf
[128]; /* practically long enough even considering multiple subtag type */
1409 if (bcpTypeLen
>= (int32_t)sizeof(bcpTypeBuf
)) {
1410 /* the BCP type is too long */
1411 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1415 uprv_strncpy(bcpTypeBuf
, pBcpType
, bcpTypeLen
);
1416 bcpTypeBuf
[bcpTypeLen
] = 0;
1418 /* BCP type to locale type */
1419 pType
= uloc_toLegacyType(pKey
, bcpTypeBuf
);
1420 if (pType
== NULL
) {
1421 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1424 if (pType
== bcpTypeBuf
) {
1426 The type returned by toLegacyType points to the input buffer.
1427 We normalize the result type to lower case.
1429 /* normalize to lower case */
1430 T_CString_toLowerCase(bcpTypeBuf
);
1431 if (bufSize
- bufIdx
- 1 >= bcpTypeLen
) {
1432 uprv_memcpy(buf
+ bufIdx
, bcpTypeBuf
, bcpTypeLen
);
1433 pType
= buf
+ bufIdx
;
1434 bufIdx
+= bcpTypeLen
;
1435 *(buf
+ bufIdx
) = 0;
1438 *status
= U_BUFFER_OVERFLOW_ERROR
;
1443 /* typeless - default type value is "yes" */
1444 pType
= LOCALE_TYPE_YES
;
1447 /* Special handling for u-va-posix, since we want to treat this as a variant,
1449 if (!variantExists
&& !uprv_strcmp(pKey
, POSIX_KEY
) && !uprv_strcmp(pType
, POSIX_VALUE
) ) {
1450 *posixVariant
= TRUE
;
1452 /* create an ExtensionListEntry for this keyword */
1453 kwd
= (ExtensionListEntry
*)uprv_malloc(sizeof(ExtensionListEntry
));
1455 *status
= U_MEMORY_ALLOCATION_ERROR
;
1462 if (!_addExtensionToList(&kwdFirst
, kwd
, FALSE
)) {
1463 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1469 pBcpKey
= pNextBcpKey
;
1470 bcpKeyLen
= pNextBcpKey
!= NULL
? nextBcpKeyLen
: 0;
1478 while (kwd
!= NULL
) {
1479 nextKwd
= kwd
->next
;
1480 _addExtensionToList(appendTo
, kwd
, FALSE
);
1488 while (attr
!= NULL
) {
1489 nextAttr
= attr
->next
;
1495 while (kwd
!= NULL
) {
1496 nextKwd
= kwd
->next
;
1504 _appendKeywords(ULanguageTag
* langtag
, char* appendAt
, int32_t capacity
, UErrorCode
* status
) {
1508 ExtensionListEntry
*kwdFirst
= NULL
;
1509 ExtensionListEntry
*kwd
;
1510 const char *key
, *type
;
1511 char *kwdBuf
= NULL
;
1512 int32_t kwdBufLength
= capacity
;
1513 UBool posixVariant
= FALSE
;
1515 if (U_FAILURE(*status
)) {
1519 kwdBuf
= (char*)uprv_malloc(kwdBufLength
);
1520 if (kwdBuf
== NULL
) {
1521 *status
= U_MEMORY_ALLOCATION_ERROR
;
1525 /* Determine if variants already exists */
1526 if (ultag_getVariantsSize(langtag
)) {
1527 posixVariant
= TRUE
;
1530 n
= ultag_getExtensionsSize(langtag
);
1532 /* resolve locale keywords and reordering keys */
1533 for (i
= 0; i
< n
; i
++) {
1534 key
= ultag_getExtensionKey(langtag
, i
);
1535 type
= ultag_getExtensionValue(langtag
, i
);
1536 if (*key
== LDMLEXT
) {
1537 _appendLDMLExtensionAsKeywords(type
, &kwdFirst
, kwdBuf
, kwdBufLength
, &posixVariant
, status
);
1538 if (U_FAILURE(*status
)) {
1542 kwd
= (ExtensionListEntry
*)uprv_malloc(sizeof(ExtensionListEntry
));
1544 *status
= U_MEMORY_ALLOCATION_ERROR
;
1549 if (!_addExtensionToList(&kwdFirst
, kwd
, FALSE
)) {
1551 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1557 if (U_SUCCESS(*status
)) {
1558 type
= ultag_getPrivateUse(langtag
);
1559 if ((int32_t)uprv_strlen(type
) > 0) {
1560 /* add private use as a keyword */
1561 kwd
= (ExtensionListEntry
*)uprv_malloc(sizeof(ExtensionListEntry
));
1563 *status
= U_MEMORY_ALLOCATION_ERROR
;
1565 kwd
->key
= PRIVATEUSE_KEY
;
1567 if (!_addExtensionToList(&kwdFirst
, kwd
, FALSE
)) {
1569 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1575 /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
1577 if (U_SUCCESS(*status
) && posixVariant
) {
1578 len
= (int32_t) uprv_strlen(_POSIX
);
1579 if (reslen
< capacity
) {
1580 uprv_memcpy(appendAt
+ reslen
, _POSIX
, uprv_min(len
, capacity
- reslen
));
1585 if (U_SUCCESS(*status
) && kwdFirst
!= NULL
) {
1586 /* write out the sorted keywords */
1587 UBool firstValue
= TRUE
;
1590 if (reslen
< capacity
) {
1593 *(appendAt
+ reslen
) = LOCALE_EXT_SEP
;
1597 *(appendAt
+ reslen
) = LOCALE_KEYWORD_SEP
;
1603 len
= (int32_t)uprv_strlen(kwd
->key
);
1604 if (reslen
< capacity
) {
1605 uprv_memcpy(appendAt
+ reslen
, kwd
->key
, uprv_min(len
, capacity
- reslen
));
1610 if (reslen
< capacity
) {
1611 *(appendAt
+ reslen
) = LOCALE_KEY_TYPE_SEP
;
1616 len
= (int32_t)uprv_strlen(kwd
->value
);
1617 if (reslen
< capacity
) {
1618 uprv_memcpy(appendAt
+ reslen
, kwd
->value
, uprv_min(len
, capacity
- reslen
));
1628 while (kwd
!= NULL
) {
1629 ExtensionListEntry
*tmpKwd
= kwd
->next
;
1636 if (U_FAILURE(*status
)) {
1640 return u_terminateChars(appendAt
, capacity
, reslen
, status
);
1644 _appendPrivateuseToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UBool hadPosix
, UErrorCode
* status
) {
1646 char buf
[ULOC_FULLNAME_CAPACITY
];
1647 char tmpAppend
[ULOC_FULLNAME_CAPACITY
];
1648 UErrorCode tmpStatus
= U_ZERO_ERROR
;
1652 if (U_FAILURE(*status
)) {
1656 len
= uloc_getVariant(localeID
, buf
, sizeof(buf
), &tmpStatus
);
1657 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
1659 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1667 UBool firstValue
= TRUE
;
1674 if (*p
== SEP
|| *p
== LOCALE_SEP
|| *p
== 0) {
1678 *p
= 0; /* terminate */
1680 if (pPriv
!= NULL
) {
1681 /* Private use in the canonical format is lowercase in BCP47 */
1682 for (i
= 0; *(pPriv
+ i
) != 0; i
++) {
1683 *(pPriv
+ i
) = uprv_tolower(*(pPriv
+ i
));
1687 if (_isPrivateuseValueSubtag(pPriv
, -1)) {
1689 if (!_isVariantSubtag(pPriv
, -1)) {
1695 } else if (strict
) {
1696 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1703 if (reslen
< capacity
) {
1704 tmpAppend
[reslen
++] = SEP
;
1708 if (reslen
< capacity
) {
1709 tmpAppend
[reslen
++] = *PRIVATEUSE_KEY
;
1712 if (reslen
< capacity
) {
1713 tmpAppend
[reslen
++] = SEP
;
1716 len
= (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX
);
1717 if (reslen
< capacity
) {
1718 uprv_memcpy(tmpAppend
+ reslen
, PRIVUSE_VARIANT_PREFIX
, uprv_min(len
, capacity
- reslen
));
1722 if (reslen
< capacity
) {
1723 tmpAppend
[reslen
++] = SEP
;
1729 len
= (int32_t)uprv_strlen(pPriv
);
1730 if (reslen
< capacity
) {
1731 uprv_memcpy(tmpAppend
+ reslen
, pPriv
, uprv_min(len
, capacity
- reslen
));
1736 /* reset private use starting position */
1738 } else if (pPriv
== NULL
) {
1744 if (U_FAILURE(*status
)) {
1749 if (U_SUCCESS(*status
)) {
1751 if (reslen
< capacity
) {
1752 uprv_memcpy(appendAt
, tmpAppend
, uprv_min(len
, capacity
- reslen
));
1756 u_terminateChars(appendAt
, capacity
, reslen
, status
);
1762 * -------------------------------------------------
1766 * -------------------------------------------------
1769 /* Bit flags used by the parser */
1780 * Ticket #12705 - Visual Studio 2015 Update 3 contains a new code optimizer which has problems optimizing
1781 * this function. (See https://blogs.msdn.microsoft.com/vcblog/2016/05/04/new-code-optimizer/ )
1782 * As a workaround, we will turn off optimization just for this function on VS2015 Update 3 and above.
1784 #if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210))
1785 #pragma optimize( "", off )
1788 static ULanguageTag
*
1789 ultag_parse(const char* tag
, int32_t tagLen
, int32_t* parsedLen
, UErrorCode
* status
) {
1793 char *pSubtag
, *pNext
, *pLastGoodPosition
;
1796 ExtensionListEntry
*pExtension
;
1797 char *pExtValueSubtag
, *pExtValueSubtagEnd
;
1799 UBool privateuseVar
= FALSE
;
1800 int32_t grandfatheredLen
= 0;
1802 if (parsedLen
!= NULL
) {
1806 if (U_FAILURE(*status
)) {
1811 tagLen
= (int32_t)uprv_strlen(tag
);
1814 /* copy the entire string */
1815 tagBuf
= (char*)uprv_malloc(tagLen
+ 1);
1816 if (tagBuf
== NULL
) {
1817 *status
= U_MEMORY_ALLOCATION_ERROR
;
1820 uprv_memcpy(tagBuf
, tag
, tagLen
);
1821 *(tagBuf
+ tagLen
) = 0;
1823 /* create a ULanguageTag */
1824 t
= (ULanguageTag
*)uprv_malloc(sizeof(ULanguageTag
));
1827 *status
= U_MEMORY_ALLOCATION_ERROR
;
1830 _initializeULanguageTag(t
);
1833 if (tagLen
< MINLEN
) {
1834 /* the input tag is too short - return empty ULanguageTag */
1838 /* check if the tag is grandfathered */
1839 for (i
= 0; GRANDFATHERED
[i
] != NULL
; i
+= 2) {
1840 if (uprv_stricmp(GRANDFATHERED
[i
], tagBuf
) == 0) {
1841 int32_t newTagLength
;
1843 grandfatheredLen
= tagLen
; /* back up for output parsedLen */
1844 newTagLength
= static_cast<int32_t>(uprv_strlen(GRANDFATHERED
[i
+1]));
1845 if (tagLen
< newTagLength
) {
1847 tagBuf
= (char*)uprv_malloc(newTagLength
+ 1);
1848 if (tagBuf
== NULL
) {
1849 *status
= U_MEMORY_ALLOCATION_ERROR
;
1854 tagLen
= newTagLength
;
1856 uprv_strcpy(t
->buf
, GRANDFATHERED
[i
+ 1]);
1862 * langtag = language
1871 pNext
= pLastGoodPosition
= tagBuf
;
1874 pExtValueSubtag
= NULL
;
1875 pExtValueSubtagEnd
= NULL
;
1882 /* locate next separator char */
1896 subtagLen
= (int32_t)(pSep
- pSubtag
);
1899 if (_isLanguageSubtag(pSubtag
, subtagLen
)) {
1900 *pSep
= 0; /* terminate */
1901 t
->language
= T_CString_toLowerCase(pSubtag
);
1903 pLastGoodPosition
= pSep
;
1904 next
= EXTL
| SCRT
| REGN
| VART
| EXTS
| PRIV
;
1909 if (_isExtlangSubtag(pSubtag
, subtagLen
)) {
1911 t
->extlang
[extlangIdx
++] = T_CString_toLowerCase(pSubtag
);
1913 pLastGoodPosition
= pSep
;
1914 if (extlangIdx
< 3) {
1915 next
= EXTL
| SCRT
| REGN
| VART
| EXTS
| PRIV
;
1917 next
= SCRT
| REGN
| VART
| EXTS
| PRIV
;
1923 if (_isScriptSubtag(pSubtag
, subtagLen
)) {
1929 *p
= uprv_toupper(*p
);
1932 *p
= uprv_tolower(*p
);
1935 t
->script
= pSubtag
;
1937 pLastGoodPosition
= pSep
;
1938 next
= REGN
| VART
| EXTS
| PRIV
;
1943 if (_isRegionSubtag(pSubtag
, subtagLen
)) {
1945 t
->region
= T_CString_toUpperCase(pSubtag
);
1947 pLastGoodPosition
= pSep
;
1948 next
= VART
| EXTS
| PRIV
;
1953 if (_isVariantSubtag(pSubtag
, subtagLen
) ||
1954 (privateuseVar
&& _isPrivateuseVariantSubtag(pSubtag
, subtagLen
))) {
1955 VariantListEntry
*var
;
1958 var
= (VariantListEntry
*)uprv_malloc(sizeof(VariantListEntry
));
1960 *status
= U_MEMORY_ALLOCATION_ERROR
;
1964 var
->variant
= T_CString_toUpperCase(pSubtag
);
1965 isAdded
= _addVariantToList(&(t
->variants
), var
);
1967 /* duplicated variant entry */
1971 pLastGoodPosition
= pSep
;
1972 next
= VART
| EXTS
| PRIV
;
1977 if (_isExtensionSingleton(pSubtag
, subtagLen
)) {
1978 if (pExtension
!= NULL
) {
1979 if (pExtValueSubtag
== NULL
|| pExtValueSubtagEnd
== NULL
) {
1980 /* the previous extension is incomplete */
1981 uprv_free(pExtension
);
1986 /* terminate the previous extension value */
1987 *pExtValueSubtagEnd
= 0;
1988 pExtension
->value
= T_CString_toLowerCase(pExtValueSubtag
);
1990 /* insert the extension to the list */
1991 if (_addExtensionToList(&(t
->extensions
), pExtension
, FALSE
)) {
1992 pLastGoodPosition
= pExtValueSubtagEnd
;
1994 /* stop parsing here */
1995 uprv_free(pExtension
);
2001 /* create a new extension */
2002 pExtension
= (ExtensionListEntry
*)uprv_malloc(sizeof(ExtensionListEntry
));
2003 if (pExtension
== NULL
) {
2004 *status
= U_MEMORY_ALLOCATION_ERROR
;
2008 pExtension
->key
= T_CString_toLowerCase(pSubtag
);
2009 pExtension
->value
= NULL
; /* will be set later */
2012 * reset the start and the end location of extension value
2013 * subtags for this extension
2015 pExtValueSubtag
= NULL
;
2016 pExtValueSubtagEnd
= NULL
;
2023 if (_isExtensionSubtag(pSubtag
, subtagLen
)) {
2024 if (pExtValueSubtag
== NULL
) {
2025 /* if the start postion of this extension's value is not yet,
2026 this one is the first value subtag */
2027 pExtValueSubtag
= pSubtag
;
2030 /* Mark the end of this subtag */
2031 pExtValueSubtagEnd
= pSep
;
2032 next
= EXTS
| EXTV
| PRIV
;
2038 if (uprv_tolower(*pSubtag
) == PRIVATEUSE
) {
2041 if (pExtension
!= NULL
) {
2042 /* Process the last extension */
2043 if (pExtValueSubtag
== NULL
|| pExtValueSubtagEnd
== NULL
) {
2044 /* the previous extension is incomplete */
2045 uprv_free(pExtension
);
2049 /* terminate the previous extension value */
2050 *pExtValueSubtagEnd
= 0;
2051 pExtension
->value
= T_CString_toLowerCase(pExtValueSubtag
);
2053 /* insert the extension to the list */
2054 if (_addExtensionToList(&(t
->extensions
), pExtension
, FALSE
)) {
2055 pLastGoodPosition
= pExtValueSubtagEnd
;
2058 /* stop parsing here */
2059 uprv_free(pExtension
);
2066 /* The rest of part will be private use value subtags */
2067 if (pNext
== NULL
) {
2068 /* empty private use subtag */
2071 /* back up the private use value start position */
2072 pPrivuseVal
= pNext
;
2074 /* validate private use value subtags */
2090 subtagLen
= (int32_t)(pSep
- pSubtag
);
2092 if (uprv_strncmp(pSubtag
, PRIVUSE_VARIANT_PREFIX
, uprv_strlen(PRIVUSE_VARIANT_PREFIX
)) == 0) {
2095 privateuseVar
= TRUE
;
2097 } else if (_isPrivateuseValueSubtag(pSubtag
, subtagLen
)) {
2098 pLastGoodPosition
= pSep
;
2108 if (pLastGoodPosition
- pPrivuseVal
> 0) {
2109 *pLastGoodPosition
= 0;
2110 t
->privateuse
= T_CString_toLowerCase(pPrivuseVal
);
2112 /* No more subtags, exiting the parse loop */
2118 /* If we fell through here, it means this subtag is illegal - quit parsing */
2122 if (pExtension
!= NULL
) {
2123 /* Process the last extension */
2124 if (pExtValueSubtag
== NULL
|| pExtValueSubtagEnd
== NULL
) {
2125 /* the previous extension is incomplete */
2126 uprv_free(pExtension
);
2128 /* terminate the previous extension value */
2129 *pExtValueSubtagEnd
= 0;
2130 pExtension
->value
= T_CString_toLowerCase(pExtValueSubtag
);
2131 /* insert the extension to the list */
2132 if (_addExtensionToList(&(t
->extensions
), pExtension
, FALSE
)) {
2133 pLastGoodPosition
= pExtValueSubtagEnd
;
2135 uprv_free(pExtension
);
2140 if (parsedLen
!= NULL
) {
2141 *parsedLen
= (grandfatheredLen
> 0) ? grandfatheredLen
: (int32_t)(pLastGoodPosition
- t
->buf
);
2152 * Ticket #12705 - Turn optimization back on.
2154 #if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210))
2155 #pragma optimize( "", on )
2159 ultag_close(ULanguageTag
* langtag
) {
2161 if (langtag
== NULL
) {
2165 uprv_free(langtag
->buf
);
2167 if (langtag
->variants
) {
2168 VariantListEntry
*curVar
= langtag
->variants
;
2170 VariantListEntry
*nextVar
= curVar
->next
;
2176 if (langtag
->extensions
) {
2177 ExtensionListEntry
*curExt
= langtag
->extensions
;
2179 ExtensionListEntry
*nextExt
= curExt
->next
;
2189 ultag_getLanguage(const ULanguageTag
* langtag
) {
2190 return langtag
->language
;
2195 ultag_getJDKLanguage(const ULanguageTag
* langtag
) {
2197 for (i
= 0; DEPRECATEDLANGS
[i
] != NULL
; i
+= 2) {
2198 if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS
[i
], langtag
->language
) == 0) {
2199 return DEPRECATEDLANGS
[i
+ 1];
2202 return langtag
->language
;
2207 ultag_getExtlang(const ULanguageTag
* langtag
, int32_t idx
) {
2208 if (idx
>= 0 && idx
< MAXEXTLANG
) {
2209 return langtag
->extlang
[idx
];
2215 ultag_getExtlangSize(const ULanguageTag
* langtag
) {
2218 for (i
= 0; i
< MAXEXTLANG
; i
++) {
2219 if (langtag
->extlang
[i
]) {
2227 ultag_getScript(const ULanguageTag
* langtag
) {
2228 return langtag
->script
;
2232 ultag_getRegion(const ULanguageTag
* langtag
) {
2233 return langtag
->region
;
2237 ultag_getVariant(const ULanguageTag
* langtag
, int32_t idx
) {
2238 const char *var
= NULL
;
2239 VariantListEntry
*cur
= langtag
->variants
;
2253 ultag_getVariantsSize(const ULanguageTag
* langtag
) {
2255 VariantListEntry
*cur
= langtag
->variants
;
2267 ultag_getExtensionKey(const ULanguageTag
* langtag
, int32_t idx
) {
2268 const char *key
= NULL
;
2269 ExtensionListEntry
*cur
= langtag
->extensions
;
2283 ultag_getExtensionValue(const ULanguageTag
* langtag
, int32_t idx
) {
2284 const char *val
= NULL
;
2285 ExtensionListEntry
*cur
= langtag
->extensions
;
2299 ultag_getExtensionsSize(const ULanguageTag
* langtag
) {
2301 ExtensionListEntry
*cur
= langtag
->extensions
;
2313 ultag_getPrivateUse(const ULanguageTag
* langtag
) {
2314 return langtag
->privateuse
;
2319 ultag_getGrandfathered(const ULanguageTag
* langtag
) {
2320 return langtag
->grandfathered
;
2326 * -------------------------------------------------
2328 * Locale/BCP47 conversion APIs, exposed as uloc_*
2330 * -------------------------------------------------
2332 U_CAPI
int32_t U_EXPORT2
2333 uloc_toLanguageTag(const char* localeID
,
2335 int32_t langtagCapacity
,
2337 UErrorCode
* status
) {
2338 /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
2339 char canonical
[256];
2341 UErrorCode tmpStatus
= U_ZERO_ERROR
;
2342 UBool hadPosix
= FALSE
;
2343 const char* pKeywordStart
;
2345 /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
2347 if (uprv_strlen(localeID
) > 0) {
2348 uloc_canonicalize(localeID
, canonical
, sizeof(canonical
), &tmpStatus
);
2349 if (tmpStatus
!= U_ZERO_ERROR
) {
2350 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
2355 /* For handling special case - private use only tag */
2356 pKeywordStart
= locale_getKeywordsStart(canonical
);
2357 if (pKeywordStart
== canonical
) {
2358 UEnumeration
*kwdEnum
;
2362 kwdEnum
= uloc_openKeywords((const char*)canonical
, &tmpStatus
);
2363 if (kwdEnum
!= NULL
) {
2364 kwdCnt
= uenum_count(kwdEnum
, &tmpStatus
);
2369 key
= uenum_next(kwdEnum
, &len
, &tmpStatus
);
2370 if (len
== 1 && *key
== PRIVATEUSE
) {
2371 char buf
[ULOC_KEYWORD_AND_VALUES_CAPACITY
];
2372 buf
[0] = PRIVATEUSE
;
2374 len
= uloc_getKeywordValue(localeID
, key
, &buf
[2], sizeof(buf
) - 2, &tmpStatus
);
2375 if (U_SUCCESS(tmpStatus
)) {
2376 if (_isPrivateuseValueSubtags(&buf
[2], len
)) {
2377 /* return private use only tag */
2379 uprv_memcpy(langtag
, buf
, uprv_min(reslen
, langtagCapacity
));
2380 u_terminateChars(langtag
, langtagCapacity
, reslen
, status
);
2382 } else if (strict
) {
2383 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
2386 /* if not strict mode, then "und" will be returned */
2388 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
2393 uenum_close(kwdEnum
);
2400 reslen
+= _appendLanguageToLanguageTag(canonical
, langtag
, langtagCapacity
, strict
, status
);
2401 reslen
+= _appendScriptToLanguageTag(canonical
, langtag
+ reslen
, langtagCapacity
- reslen
, strict
, status
);
2402 reslen
+= _appendRegionToLanguageTag(canonical
, langtag
+ reslen
, langtagCapacity
- reslen
, strict
, status
);
2403 reslen
+= _appendVariantsToLanguageTag(canonical
, langtag
+ reslen
, langtagCapacity
- reslen
, strict
, &hadPosix
, status
);
2404 reslen
+= _appendKeywordsToLanguageTag(canonical
, langtag
+ reslen
, langtagCapacity
- reslen
, strict
, hadPosix
, status
);
2405 reslen
+= _appendPrivateuseToLanguageTag(canonical
, langtag
+ reslen
, langtagCapacity
- reslen
, strict
, hadPosix
, status
);
2411 U_CAPI
int32_t U_EXPORT2
2412 uloc_forLanguageTag(const char* langtag
,
2414 int32_t localeIDCapacity
,
2415 int32_t* parsedLength
,
2416 UErrorCode
* status
) {
2419 const char *subtag
, *p
;
2422 UBool noRegion
= TRUE
;
2424 lt
= ultag_parse(langtag
, -1, parsedLength
, status
);
2425 if (U_FAILURE(*status
)) {
2430 subtag
= ultag_getExtlangSize(lt
) > 0 ? ultag_getExtlang(lt
, 0) : ultag_getLanguage(lt
);
2431 if (uprv_compareInvCharsAsAscii(subtag
, LANG_UND
) != 0) {
2432 len
= (int32_t)uprv_strlen(subtag
);
2434 if (reslen
< localeIDCapacity
) {
2435 uprv_memcpy(localeID
, subtag
, uprv_min(len
, localeIDCapacity
- reslen
));
2442 subtag
= ultag_getScript(lt
);
2443 len
= (int32_t)uprv_strlen(subtag
);
2445 if (reslen
< localeIDCapacity
) {
2446 *(localeID
+ reslen
) = LOCALE_SEP
;
2450 /* write out the script in title case */
2453 if (reslen
< localeIDCapacity
) {
2455 *(localeID
+ reslen
) = uprv_toupper(*p
);
2457 *(localeID
+ reslen
) = *p
;
2466 subtag
= ultag_getRegion(lt
);
2467 len
= (int32_t)uprv_strlen(subtag
);
2469 if (reslen
< localeIDCapacity
) {
2470 *(localeID
+ reslen
) = LOCALE_SEP
;
2473 /* write out the retion in upper case */
2476 if (reslen
< localeIDCapacity
) {
2477 *(localeID
+ reslen
) = uprv_toupper(*p
);
2486 n
= ultag_getVariantsSize(lt
);
2489 if (reslen
< localeIDCapacity
) {
2490 *(localeID
+ reslen
) = LOCALE_SEP
;
2495 for (i
= 0; i
< n
; i
++) {
2496 subtag
= ultag_getVariant(lt
, i
);
2497 if (reslen
< localeIDCapacity
) {
2498 *(localeID
+ reslen
) = LOCALE_SEP
;
2501 /* write out the variant in upper case */
2504 if (reslen
< localeIDCapacity
) {
2505 *(localeID
+ reslen
) = uprv_toupper(*p
);
2514 n
= ultag_getExtensionsSize(lt
);
2515 subtag
= ultag_getPrivateUse(lt
);
2516 if (n
> 0 || uprv_strlen(subtag
) > 0) {
2517 if (reslen
== 0 && n
> 0) {
2518 /* need a language */
2519 if (reslen
< localeIDCapacity
) {
2520 uprv_memcpy(localeID
+ reslen
, LANG_UND
, uprv_min(LANG_UND_LEN
, localeIDCapacity
- reslen
));
2522 reslen
+= LANG_UND_LEN
;
2524 len
= _appendKeywords(lt
, localeID
+ reslen
, localeIDCapacity
- reslen
, status
);
2529 return u_terminateChars(localeID
, localeIDCapacity
, reslen
, status
);