2 **********************************************************************
3 * Copyright (C) 2009-2012, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
8 #include "unicode/utypes.h"
9 #include "unicode/ures.h"
10 #include "unicode/putil.h"
11 #include "unicode/uloc.h"
19 /* struct holding a single variant */
20 typedef struct VariantListEntry
{
22 struct VariantListEntry
*next
;
25 /* struct holding a single attribute value */
26 typedef struct AttributeListEntry
{
27 const char *attribute
;
28 struct AttributeListEntry
*next
;
31 /* struct holding a single extension */
32 typedef struct ExtensionListEntry
{
35 struct ExtensionListEntry
*next
;
39 typedef struct ULanguageTag
{
40 char *buf
; /* holding parsed subtags */
42 const char *extlang
[MAXEXTLANG
];
45 VariantListEntry
*variants
;
46 ExtensionListEntry
*extensions
;
47 AttributeListEntry
*attributes
;
48 const char *privateuse
;
49 const char *grandfathered
;
54 #define PRIVATEUSE 'x'
57 #define LOCALE_SEP '_'
58 #define LOCALE_EXT_SEP '@'
59 #define LOCALE_KEYWORD_SEP ';'
60 #define LOCALE_KEY_TYPE_SEP '='
62 #define ISALPHA(c) uprv_isASCIILetter(c)
63 #define ISNUMERIC(c) ((c)>='0' && (c)<='9')
65 static const char* EMPTY
= "";
66 static const char* LANG_UND
= "und";
67 static const char* PRIVATEUSE_KEY
= "x";
68 static const char* _POSIX
= "_POSIX";
69 static const char* POSIX_KEY
= "va";
70 static const char* POSIX_VALUE
= "posix";
71 static const char* LOCALE_ATTRIBUTE_KEY
= "attribute";
72 static const char* PRIVUSE_VARIANT_PREFIX
= "lvariant";
74 #define LANG_UND_LEN 3
76 static const char* GRANDFATHERED
[] = {
77 /* grandfathered preferred */
79 "cel-gaulish", "xtg-x-cel-gaulish",
80 "en-GB-oed", "en-GB-x-oed",
83 "i-default", "en-x-i-default",
84 "i-enochian", "und-x-i-enochian",
88 "i-mingo", "see-x-i-mingo",
101 "zh-min", "nan-x-zh-min",
107 static const char* DEPRECATEDLANGS
[] = {
116 * -------------------------------------------------
118 * These ultag_ functions may be exposed as APIs later
120 * -------------------------------------------------
124 ultag_parse(const char* tag
, int32_t tagLen
, int32_t* parsedLen
, UErrorCode
* status
);
127 ultag_close(ULanguageTag
* langtag
);
130 ultag_getLanguage(const ULanguageTag
* langtag
);
134 ultag_getJDKLanguage(const ULanguageTag
* langtag
);
138 ultag_getExtlang(const ULanguageTag
* langtag
, int32_t idx
);
141 ultag_getExtlangSize(const ULanguageTag
* langtag
);
144 ultag_getScript(const ULanguageTag
* langtag
);
147 ultag_getRegion(const ULanguageTag
* langtag
);
150 ultag_getVariant(const ULanguageTag
* langtag
, int32_t idx
);
153 ultag_getVariantsSize(const ULanguageTag
* langtag
);
156 /* Currently not being used. */
158 ultag_getAttribute(const ULanguageTag
* langtag
, int32_t idx
);
162 ultag_getAttributesSize(const ULanguageTag
* langtag
);
165 ultag_getExtensionKey(const ULanguageTag
* langtag
, int32_t idx
);
168 ultag_getExtensionValue(const ULanguageTag
* langtag
, int32_t idx
);
171 ultag_getExtensionsSize(const ULanguageTag
* langtag
);
174 ultag_getPrivateUse(const ULanguageTag
* langtag
);
178 ultag_getGrandfathered(const ULanguageTag
* langtag
);
182 * -------------------------------------------------
184 * Language subtag syntax validation functions
186 * -------------------------------------------------
190 _isAlphaString(const char* s
, int32_t len
) {
192 for (i
= 0; i
< len
; i
++) {
193 if (!ISALPHA(*(s
+ i
))) {
201 _isNumericString(const char* s
, int32_t len
) {
203 for (i
= 0; i
< len
; i
++) {
204 if (!ISNUMERIC(*(s
+ i
))) {
212 _isAlphaNumericString(const char* s
, int32_t len
) {
214 for (i
= 0; i
< len
; i
++) {
215 if (!ISALPHA(*(s
+ i
)) && !ISNUMERIC(*(s
+ i
))) {
223 _isLanguageSubtag(const char* s
, int32_t len
) {
225 * language = 2*3ALPHA ; shortest ISO 639 code
226 * ["-" extlang] ; sometimes followed by
227 * ; extended language subtags
228 * / 4ALPHA ; or reserved for future use
229 * / 5*8ALPHA ; or registered language subtag
232 len
= (int32_t)uprv_strlen(s
);
234 if (len
>= 2 && len
<= 8 && _isAlphaString(s
, len
)) {
241 _isExtlangSubtag(const char* s
, int32_t len
) {
243 * extlang = 3ALPHA ; selected ISO 639 codes
244 * *2("-" 3ALPHA) ; permanently reserved
247 len
= (int32_t)uprv_strlen(s
);
249 if (len
== 3 && _isAlphaString(s
, len
)) {
256 _isScriptSubtag(const char* s
, int32_t len
) {
258 * script = 4ALPHA ; ISO 15924 code
261 len
= (int32_t)uprv_strlen(s
);
263 if (len
== 4 && _isAlphaString(s
, len
)) {
270 _isRegionSubtag(const char* s
, int32_t len
) {
272 * region = 2ALPHA ; ISO 3166-1 code
273 * / 3DIGIT ; UN M.49 code
276 len
= (int32_t)uprv_strlen(s
);
278 if (len
== 2 && _isAlphaString(s
, len
)) {
281 if (len
== 3 && _isNumericString(s
, len
)) {
288 _isVariantSubtag(const char* s
, int32_t len
) {
290 * variant = 5*8alphanum ; registered variants
291 * / (DIGIT 3alphanum)
294 len
= (int32_t)uprv_strlen(s
);
296 if (len
>= 5 && len
<= 8 && _isAlphaNumericString(s
, len
)) {
299 if (len
== 4 && ISNUMERIC(*s
) && _isAlphaNumericString(s
+ 1, 3)) {
306 _isPrivateuseVariantSubtag(const char* s
, int32_t len
) {
308 * variant = 1*8alphanum ; registered variants
309 * / (DIGIT 3alphanum)
312 len
= (int32_t)uprv_strlen(s
);
314 if (len
>= 1 && len
<= 8 && _isAlphaNumericString(s
, len
)) {
321 _isAttributeSubtag(const char* s
, int32_t len
) {
323 * attribute = 3*8alphanum
326 len
= (int32_t)uprv_strlen(s
);
328 if (len
>= 3 && len
<= 8 && _isAlphaNumericString(s
, len
)) {
335 _isExtensionSingleton(const char* s
, int32_t len
) {
337 * extension = singleton 1*("-" (2*8alphanum))
340 len
= (int32_t)uprv_strlen(s
);
342 if (len
== 1 && ISALPHA(*s
) && (uprv_tolower(*s
) != PRIVATEUSE
)) {
349 _isExtensionSubtag(const char* s
, int32_t len
) {
351 * extension = singleton 1*("-" (2*8alphanum))
354 len
= (int32_t)uprv_strlen(s
);
356 if (len
>= 2 && len
<= 8 && _isAlphaNumericString(s
, len
)) {
363 _isExtensionSubtags(const char* s
, int32_t len
) {
365 const char *pSubtag
= NULL
;
368 len
= (int32_t)uprv_strlen(s
);
371 while ((p
- s
) < len
) {
373 if (pSubtag
== NULL
) {
376 if (!_isExtensionSubtag(pSubtag
, (int32_t)(p
- pSubtag
))) {
380 } else if (pSubtag
== NULL
) {
385 if (pSubtag
== NULL
) {
388 return _isExtensionSubtag(pSubtag
, (int32_t)(p
- pSubtag
));
392 _isPrivateuseValueSubtag(const char* s
, int32_t len
) {
394 * privateuse = "x" 1*("-" (1*8alphanum))
397 len
= (int32_t)uprv_strlen(s
);
399 if (len
>= 1 && len
<= 8 && _isAlphaNumericString(s
, len
)) {
406 _isPrivateuseValueSubtags(const char* s
, int32_t len
) {
408 const char *pSubtag
= NULL
;
411 len
= (int32_t)uprv_strlen(s
);
414 while ((p
- s
) < len
) {
416 if (pSubtag
== NULL
) {
419 if (!_isPrivateuseValueSubtag(pSubtag
, (int32_t)(p
- pSubtag
))) {
423 } else if (pSubtag
== NULL
) {
428 if (pSubtag
== NULL
) {
431 return _isPrivateuseValueSubtag(pSubtag
, (int32_t)(p
- pSubtag
));
435 _isLDMLKey(const char* s
, int32_t len
) {
437 len
= (int32_t)uprv_strlen(s
);
439 if (len
== 2 && _isAlphaNumericString(s
, len
)) {
446 _isLDMLType(const char* s
, int32_t len
) {
448 len
= (int32_t)uprv_strlen(s
);
450 if (len
>= 3 && len
<= 8 && _isAlphaNumericString(s
, len
)) {
457 * -------------------------------------------------
461 * -------------------------------------------------
465 _addVariantToList(VariantListEntry
**first
, VariantListEntry
*var
) {
468 if (*first
== NULL
) {
472 VariantListEntry
*prev
, *cur
;
475 /* variants order should be preserved */
485 /* Checking for duplicate variant */
486 cmp
= uprv_compareInvCharsAsAscii(var
->variant
, cur
->variant
);
488 /* duplicated variant */
501 _addAttributeToList(AttributeListEntry
**first
, AttributeListEntry
*attr
) {
504 if (*first
== NULL
) {
508 AttributeListEntry
*prev
, *cur
;
511 /* reorder variants in alphabetical order */
520 cmp
= uprv_compareInvCharsAsAscii(attr
->attribute
, cur
->attribute
);
531 /* duplicated variant */
545 _addExtensionToList(ExtensionListEntry
**first
, ExtensionListEntry
*ext
, UBool localeToBCP
) {
548 if (*first
== NULL
) {
552 ExtensionListEntry
*prev
, *cur
;
555 /* reorder variants in alphabetical order */
565 /* special handling for locale to bcp conversion */
568 len
= (int32_t)uprv_strlen(ext
->key
);
569 curlen
= (int32_t)uprv_strlen(cur
->key
);
571 if (len
== 1 && curlen
== 1) {
572 if (*(ext
->key
) == *(cur
->key
)) {
574 } else if (*(ext
->key
) == PRIVATEUSE
) {
576 } else if (*(cur
->key
) == PRIVATEUSE
) {
579 cmp
= *(ext
->key
) - *(cur
->key
);
581 } else if (len
== 1) {
582 cmp
= *(ext
->key
) - LDMLEXT
;
583 } else if (curlen
== 1) {
584 cmp
= LDMLEXT
- *(cur
->key
);
586 cmp
= uprv_compareInvCharsAsAscii(ext
->key
, cur
->key
);
589 cmp
= uprv_compareInvCharsAsAscii(ext
->key
, cur
->key
);
601 /* duplicated extension key */
614 _initializeULanguageTag(ULanguageTag
* langtag
) {
619 langtag
->language
= EMPTY
;
620 for (i
= 0; i
< MAXEXTLANG
; i
++) {
621 langtag
->extlang
[i
] = NULL
;
624 langtag
->script
= EMPTY
;
625 langtag
->region
= EMPTY
;
627 langtag
->variants
= NULL
;
628 langtag
->extensions
= NULL
;
630 langtag
->attributes
= NULL
;
632 langtag
->grandfathered
= EMPTY
;
633 langtag
->privateuse
= EMPTY
;
636 #define KEYTYPEDATA "keyTypeData"
637 #define KEYMAP "keyMap"
638 #define TYPEMAP "typeMap"
639 #define TYPEALIAS "typeAlias"
640 #define MAX_BCP47_SUBTAG_LEN 9 /* including null terminator */
641 #define MAX_LDML_KEY_LEN 22
642 #define MAX_LDML_TYPE_LEN 32
645 _ldmlKeyToBCP47(const char* key
, int32_t keyLen
,
646 char* bcpKey
, int32_t bcpKeyCapacity
,
647 UErrorCode
*status
) {
649 char keyBuf
[MAX_LDML_KEY_LEN
];
650 char bcpKeyBuf
[MAX_BCP47_SUBTAG_LEN
];
651 int32_t resultLen
= 0;
653 UErrorCode tmpStatus
= U_ZERO_ERROR
;
654 const UChar
*uBcpKey
;
658 keyLen
= (int32_t)uprv_strlen(key
);
661 if (keyLen
>= sizeof(keyBuf
)) {
662 /* no known valid LDML key exceeding 21 */
663 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
667 uprv_memcpy(keyBuf
, key
, keyLen
);
671 for (i
= 0; i
< keyLen
; i
++) {
672 keyBuf
[i
] = uprv_tolower(keyBuf
[i
]);
675 rb
= ures_openDirect(NULL
, KEYTYPEDATA
, status
);
676 ures_getByKey(rb
, KEYMAP
, rb
, status
);
678 if (U_FAILURE(*status
)) {
683 uBcpKey
= ures_getStringByKey(rb
, keyBuf
, &bcpKeyLen
, &tmpStatus
);
684 if (U_SUCCESS(tmpStatus
)) {
685 u_UCharsToChars(uBcpKey
, bcpKeyBuf
, bcpKeyLen
);
686 bcpKeyBuf
[bcpKeyLen
] = 0;
687 resultLen
= bcpKeyLen
;
689 if (_isLDMLKey(key
, keyLen
)) {
690 uprv_memcpy(bcpKeyBuf
, key
, keyLen
);
691 bcpKeyBuf
[keyLen
] = 0;
694 /* mapping not availabe */
695 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
700 if (U_FAILURE(*status
)) {
704 uprv_memcpy(bcpKey
, bcpKeyBuf
, uprv_min(resultLen
, bcpKeyCapacity
));
705 return u_terminateChars(bcpKey
, bcpKeyCapacity
, resultLen
, status
);
709 _bcp47ToLDMLKey(const char* bcpKey
, int32_t bcpKeyLen
,
710 char* key
, int32_t keyCapacity
,
711 UErrorCode
*status
) {
713 char bcpKeyBuf
[MAX_BCP47_SUBTAG_LEN
];
714 int32_t resultLen
= 0;
716 const char *resKey
= NULL
;
717 UResourceBundle
*mapData
;
720 bcpKeyLen
= (int32_t)uprv_strlen(bcpKey
);
723 if (bcpKeyLen
>= sizeof(bcpKeyBuf
)) {
724 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
728 uprv_memcpy(bcpKeyBuf
, bcpKey
, bcpKeyLen
);
729 bcpKeyBuf
[bcpKeyLen
] = 0;
732 for (i
= 0; i
< bcpKeyLen
; i
++) {
733 bcpKeyBuf
[i
] = uprv_tolower(bcpKeyBuf
[i
]);
736 rb
= ures_openDirect(NULL
, KEYTYPEDATA
, status
);
737 ures_getByKey(rb
, KEYMAP
, rb
, status
);
738 if (U_FAILURE(*status
)) {
743 mapData
= ures_getNextResource(rb
, NULL
, status
);
744 while (U_SUCCESS(*status
)) {
745 const UChar
*uBcpKey
;
746 char tmpBcpKeyBuf
[MAX_BCP47_SUBTAG_LEN
];
747 int32_t tmpBcpKeyLen
;
749 uBcpKey
= ures_getString(mapData
, &tmpBcpKeyLen
, status
);
750 if (U_FAILURE(*status
)) {
753 u_UCharsToChars(uBcpKey
, tmpBcpKeyBuf
, tmpBcpKeyLen
);
754 tmpBcpKeyBuf
[tmpBcpKeyLen
] = 0;
755 if (uprv_compareInvCharsAsAscii(bcpKeyBuf
, tmpBcpKeyBuf
) == 0) {
756 /* found a matching BCP47 key */
757 resKey
= ures_getKey(mapData
);
758 resultLen
= (int32_t)uprv_strlen(resKey
);
761 if (!ures_hasNext(rb
)) {
764 ures_getNextResource(rb
, mapData
, status
);
769 if (U_FAILURE(*status
)) {
773 if (resKey
== NULL
) {
775 resultLen
= bcpKeyLen
;
778 uprv_memcpy(key
, resKey
, uprv_min(resultLen
, keyCapacity
));
779 return u_terminateChars(key
, keyCapacity
, resultLen
, status
);
783 _ldmlTypeToBCP47(const char* key
, int32_t keyLen
,
784 const char* type
, int32_t typeLen
,
785 char* bcpType
, int32_t bcpTypeCapacity
,
786 UErrorCode
*status
) {
787 UResourceBundle
*rb
, *keyTypeData
, *typeMapForKey
;
788 char keyBuf
[MAX_LDML_KEY_LEN
];
789 char typeBuf
[MAX_LDML_TYPE_LEN
];
790 char bcpTypeBuf
[MAX_BCP47_SUBTAG_LEN
];
791 int32_t resultLen
= 0;
793 UErrorCode tmpStatus
= U_ZERO_ERROR
;
794 const UChar
*uBcpType
, *uCanonicalType
;
795 int32_t bcpTypeLen
, canonicalTypeLen
;
796 UBool isTimezone
= FALSE
;
799 keyLen
= (int32_t)uprv_strlen(key
);
801 if (keyLen
>= sizeof(keyBuf
)) {
802 /* no known valid LDML key exceeding 21 */
803 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
806 uprv_memcpy(keyBuf
, key
, keyLen
);
810 for (i
= 0; i
< keyLen
; i
++) {
811 keyBuf
[i
] = uprv_tolower(keyBuf
[i
]);
813 if (uprv_compareInvCharsAsAscii(keyBuf
, "timezone") == 0) {
818 typeLen
= (int32_t)uprv_strlen(type
);
820 if (typeLen
>= sizeof(typeBuf
)) {
821 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
826 /* replace '/' with ':' */
827 for (i
= 0; i
< typeLen
; i
++) {
828 if (*(type
+ i
) == '/') {
831 typeBuf
[i
] = *(type
+ i
);
834 typeBuf
[typeLen
] = 0;
838 keyTypeData
= ures_openDirect(NULL
, KEYTYPEDATA
, status
);
839 rb
= ures_getByKey(keyTypeData
, TYPEMAP
, NULL
, status
);
840 if (U_FAILURE(*status
)) {
842 ures_close(keyTypeData
);
846 typeMapForKey
= ures_getByKey(rb
, keyBuf
, NULL
, &tmpStatus
);
847 uBcpType
= ures_getStringByKey(typeMapForKey
, type
, &bcpTypeLen
, &tmpStatus
);
848 if (U_SUCCESS(tmpStatus
)) {
849 u_UCharsToChars(uBcpType
, bcpTypeBuf
, bcpTypeLen
);
850 resultLen
= bcpTypeLen
;
851 } else if (tmpStatus
== U_MISSING_RESOURCE_ERROR
) {
852 /* is this type alias? */
853 tmpStatus
= U_ZERO_ERROR
;
854 ures_getByKey(keyTypeData
, TYPEALIAS
, rb
, &tmpStatus
);
855 ures_getByKey(rb
, keyBuf
, rb
, &tmpStatus
);
856 uCanonicalType
= ures_getStringByKey(rb
, type
, &canonicalTypeLen
, &tmpStatus
);
857 if (U_SUCCESS(tmpStatus
)) {
858 u_UCharsToChars(uCanonicalType
, typeBuf
, canonicalTypeLen
);
860 /* replace '/' with ':' */
861 for (i
= 0; i
< canonicalTypeLen
; i
++) {
862 if (typeBuf
[i
] == '/') {
867 typeBuf
[canonicalTypeLen
] = 0;
869 /* look up the canonical type */
870 uBcpType
= ures_getStringByKey(typeMapForKey
, typeBuf
, &bcpTypeLen
, &tmpStatus
);
871 if (U_SUCCESS(tmpStatus
)) {
872 u_UCharsToChars(uBcpType
, bcpTypeBuf
, bcpTypeLen
);
873 resultLen
= bcpTypeLen
;
876 if (tmpStatus
== U_MISSING_RESOURCE_ERROR
) {
877 if (_isLDMLType(type
, typeLen
)) {
878 uprv_memcpy(bcpTypeBuf
, type
, typeLen
);
881 /* mapping not availabe */
882 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
889 ures_close(typeMapForKey
);
890 ures_close(keyTypeData
);
892 if (U_FAILURE(*status
)) {
896 uprv_memcpy(bcpType
, bcpTypeBuf
, uprv_min(resultLen
, bcpTypeCapacity
));
897 return u_terminateChars(bcpType
, bcpTypeCapacity
, resultLen
, status
);
901 _bcp47ToLDMLType(const char* key
, int32_t keyLen
,
902 const char* bcpType
, int32_t bcpTypeLen
,
903 char* type
, int32_t typeCapacity
,
904 UErrorCode
*status
) {
906 char keyBuf
[MAX_LDML_KEY_LEN
];
907 char bcpTypeBuf
[ULOC_KEYWORDS_CAPACITY
]; /* ensure buffter is large enough for multiple values (e.g. buddhist-greg) */
908 int32_t resultLen
= 0;
910 const char *resType
= NULL
;
911 UResourceBundle
*mapData
;
912 UErrorCode tmpStatus
= U_ZERO_ERROR
;
916 keyLen
= (int32_t)uprv_strlen(key
);
919 if (keyLen
>= sizeof(keyBuf
)) {
920 /* no known valid LDML key exceeding 21 */
921 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
924 uprv_memcpy(keyBuf
, key
, keyLen
);
928 for (i
= 0; i
< keyLen
; i
++) {
929 keyBuf
[i
] = uprv_tolower(keyBuf
[i
]);
933 if (bcpTypeLen
< 0) {
934 bcpTypeLen
= (int32_t)uprv_strlen(bcpType
);
938 for (i
= 0; i
< bcpTypeLen
; i
++) {
939 if (bcpType
[i
] == SEP
) {
940 if (typeSize
>= MAX_BCP47_SUBTAG_LEN
) {
941 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
950 uprv_memcpy(bcpTypeBuf
, bcpType
, bcpTypeLen
);
951 bcpTypeBuf
[bcpTypeLen
] = 0;
954 for (i
= 0; i
< bcpTypeLen
; i
++) {
955 bcpTypeBuf
[i
] = uprv_tolower(bcpTypeBuf
[i
]);
958 rb
= ures_openDirect(NULL
, KEYTYPEDATA
, status
);
959 ures_getByKey(rb
, TYPEMAP
, rb
, status
);
960 if (U_FAILURE(*status
)) {
965 ures_getByKey(rb
, keyBuf
, rb
, &tmpStatus
);
966 mapData
= ures_getNextResource(rb
, NULL
, &tmpStatus
);
967 while (U_SUCCESS(tmpStatus
)) {
968 const UChar
*uBcpType
;
969 char tmpBcpTypeBuf
[MAX_BCP47_SUBTAG_LEN
];
970 int32_t tmpBcpTypeLen
;
972 uBcpType
= ures_getString(mapData
, &tmpBcpTypeLen
, &tmpStatus
);
973 if (U_FAILURE(tmpStatus
)) {
976 u_UCharsToChars(uBcpType
, tmpBcpTypeBuf
, tmpBcpTypeLen
);
977 tmpBcpTypeBuf
[tmpBcpTypeLen
] = 0;
978 if (uprv_compareInvCharsAsAscii(bcpTypeBuf
, tmpBcpTypeBuf
) == 0) {
979 /* found a matching BCP47 type */
980 resType
= ures_getKey(mapData
);
981 resultLen
= (int32_t)uprv_strlen(resType
);
984 if (!ures_hasNext(rb
)) {
987 ures_getNextResource(rb
, mapData
, &tmpStatus
);
992 if (U_FAILURE(tmpStatus
) && tmpStatus
!= U_MISSING_RESOURCE_ERROR
) {
997 if (resType
== NULL
) {
998 resType
= bcpTypeBuf
;
999 resultLen
= bcpTypeLen
;
1002 copyLen
= uprv_min(resultLen
, typeCapacity
);
1003 uprv_memcpy(type
, resType
, copyLen
);
1005 if (uprv_compareInvCharsAsAscii(keyBuf
, "timezone") == 0) {
1006 for (i
= 0; i
< copyLen
; i
++) {
1007 if (*(type
+ i
) == ':') {
1013 return u_terminateChars(type
, typeCapacity
, resultLen
, status
);
1017 _appendLanguageToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UErrorCode
* status
) {
1018 char buf
[ULOC_LANG_CAPACITY
];
1019 UErrorCode tmpStatus
= U_ZERO_ERROR
;
1023 if (U_FAILURE(*status
)) {
1027 len
= uloc_getLanguage(localeID
, buf
, sizeof(buf
), &tmpStatus
);
1028 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
1030 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1036 /* Note: returned language code is in lower case letters */
1039 if (reslen
< capacity
) {
1040 uprv_memcpy(appendAt
+ reslen
, LANG_UND
, uprv_min(LANG_UND_LEN
, capacity
- reslen
));
1042 reslen
+= LANG_UND_LEN
;
1043 } else if (!_isLanguageSubtag(buf
, len
)) {
1044 /* invalid language code */
1046 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1049 if (reslen
< capacity
) {
1050 uprv_memcpy(appendAt
+ reslen
, LANG_UND
, uprv_min(LANG_UND_LEN
, capacity
- reslen
));
1052 reslen
+= LANG_UND_LEN
;
1054 /* resolve deprecated */
1055 for (i
= 0; DEPRECATEDLANGS
[i
] != NULL
; i
+= 2) {
1056 if (uprv_compareInvCharsAsAscii(buf
, DEPRECATEDLANGS
[i
]) == 0) {
1057 uprv_strcpy(buf
, DEPRECATEDLANGS
[i
+ 1]);
1058 len
= (int32_t)uprv_strlen(buf
);
1062 if (reslen
< capacity
) {
1063 uprv_memcpy(appendAt
+ reslen
, buf
, uprv_min(len
, capacity
- reslen
));
1067 u_terminateChars(appendAt
, capacity
, reslen
, status
);
1072 _appendScriptToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UErrorCode
* status
) {
1073 char buf
[ULOC_SCRIPT_CAPACITY
];
1074 UErrorCode tmpStatus
= U_ZERO_ERROR
;
1078 if (U_FAILURE(*status
)) {
1082 len
= uloc_getScript(localeID
, buf
, sizeof(buf
), &tmpStatus
);
1083 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
1085 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1091 if (!_isScriptSubtag(buf
, len
)) {
1092 /* invalid script code */
1094 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1098 if (reslen
< capacity
) {
1099 *(appendAt
+ reslen
) = SEP
;
1103 if (reslen
< capacity
) {
1104 uprv_memcpy(appendAt
+ reslen
, buf
, uprv_min(len
, capacity
- reslen
));
1109 u_terminateChars(appendAt
, capacity
, reslen
, status
);
1114 _appendRegionToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UErrorCode
* status
) {
1115 char buf
[ULOC_COUNTRY_CAPACITY
];
1116 UErrorCode tmpStatus
= U_ZERO_ERROR
;
1120 if (U_FAILURE(*status
)) {
1124 len
= uloc_getCountry(localeID
, buf
, sizeof(buf
), &tmpStatus
);
1125 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
1127 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1133 if (!_isRegionSubtag(buf
, len
)) {
1134 /* invalid region code */
1136 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1140 if (reslen
< capacity
) {
1141 *(appendAt
+ reslen
) = SEP
;
1145 if (reslen
< capacity
) {
1146 uprv_memcpy(appendAt
+ reslen
, buf
, uprv_min(len
, capacity
- reslen
));
1151 u_terminateChars(appendAt
, capacity
, reslen
, status
);
1156 _appendVariantsToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UBool
*hadPosix
, UErrorCode
* status
) {
1157 char buf
[ULOC_FULLNAME_CAPACITY
];
1158 UErrorCode tmpStatus
= U_ZERO_ERROR
;
1162 if (U_FAILURE(*status
)) {
1166 len
= uloc_getVariant(localeID
, buf
, sizeof(buf
), &tmpStatus
);
1167 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
1169 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1177 VariantListEntry
*var
;
1178 VariantListEntry
*varFirst
= NULL
;
1183 if (*p
== SEP
|| *p
== LOCALE_SEP
|| *p
== 0) {
1187 *p
= 0; /* terminate */
1191 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1194 /* ignore empty variant */
1196 /* ICU uses upper case letters for variants, but
1197 the canonical format is lowercase in BCP47 */
1198 for (i
= 0; *(pVar
+ i
) != 0; i
++) {
1199 *(pVar
+ i
) = uprv_tolower(*(pVar
+ i
));
1203 if (_isVariantSubtag(pVar
, -1)) {
1204 if (uprv_strcmp(pVar
,POSIX_VALUE
) || len
!= uprv_strlen(POSIX_VALUE
)) {
1205 /* emit the variant to the list */
1206 var
= uprv_malloc(sizeof(VariantListEntry
));
1208 *status
= U_MEMORY_ALLOCATION_ERROR
;
1211 var
->variant
= pVar
;
1212 if (!_addVariantToList(&varFirst
, var
)) {
1213 /* duplicated variant */
1216 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1221 /* Special handling for POSIX variant, need to remember that we had it and then */
1222 /* treat it like an extension later. */
1225 } else if (strict
) {
1226 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1228 } else if (_isPrivateuseValueSubtag(pVar
, -1)) {
1229 /* Handle private use subtags separately */
1233 /* reset variant starting position */
1235 } else if (pVar
== NULL
) {
1241 if (U_SUCCESS(*status
)) {
1242 if (varFirst
!= NULL
) {
1245 /* write out validated/normalized variants to the target */
1247 while (var
!= NULL
) {
1248 if (reslen
< capacity
) {
1249 *(appendAt
+ reslen
) = SEP
;
1252 varLen
= (int32_t)uprv_strlen(var
->variant
);
1253 if (reslen
< capacity
) {
1254 uprv_memcpy(appendAt
+ reslen
, var
->variant
, uprv_min(varLen
, capacity
- reslen
));
1264 while (var
!= NULL
) {
1265 VariantListEntry
*tmpVar
= var
->next
;
1270 if (U_FAILURE(*status
)) {
1275 u_terminateChars(appendAt
, capacity
, reslen
, status
);
1280 _appendKeywordsToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UBool hadPosix
, UErrorCode
* status
) {
1281 char buf
[ULOC_KEYWORD_AND_VALUES_CAPACITY
];
1282 char attrBuf
[ULOC_KEYWORD_AND_VALUES_CAPACITY
] = { 0 };
1283 int32_t attrBufLength
= 0;
1284 UBool isAttribute
= FALSE
;
1285 UEnumeration
*keywordEnum
= NULL
;
1288 keywordEnum
= uloc_openKeywords(localeID
, status
);
1289 if (U_FAILURE(*status
) && !hadPosix
) {
1290 uenum_close(keywordEnum
);
1293 if (keywordEnum
!= NULL
|| hadPosix
) {
1294 /* reorder extensions */
1297 ExtensionListEntry
*firstExt
= NULL
;
1298 ExtensionListEntry
*ext
;
1299 AttributeListEntry
*firstAttr
= NULL
;
1300 AttributeListEntry
*attr
;
1302 char extBuf
[ULOC_KEYWORD_AND_VALUES_CAPACITY
];
1303 char *pExtBuf
= extBuf
;
1304 int32_t extBufCapacity
= sizeof(extBuf
);
1305 const char *bcpKey
, *bcpValue
;
1306 UErrorCode tmpStatus
= U_ZERO_ERROR
;
1308 UBool isLDMLKeyword
;
1311 isAttribute
= FALSE
;
1312 key
= uenum_next(keywordEnum
, NULL
, status
);
1316 len
= uloc_getKeywordValue(localeID
, key
, buf
, sizeof(buf
), &tmpStatus
);
1317 if (U_FAILURE(tmpStatus
)) {
1319 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1322 /* ignore this keyword */
1323 tmpStatus
= U_ZERO_ERROR
;
1327 keylen
= (int32_t)uprv_strlen(key
);
1328 isLDMLKeyword
= (keylen
> 1);
1330 /* special keyword used for representing Unicode locale attributes */
1331 if (uprv_strcmp(key
, LOCALE_ATTRIBUTE_KEY
) == 0) {
1337 for (; i
< len
; i
++) {
1338 if (buf
[i
] != '-') {
1339 attrBuf
[attrBufLength
++] = buf
[i
];
1345 if (attrBufLength
> 0) {
1346 attrBuf
[attrBufLength
] = 0;
1348 } else if (i
>= len
){
1352 /* create AttributeListEntry */
1353 attr
= uprv_malloc(sizeof(AttributeListEntry
));
1355 *status
= U_MEMORY_ALLOCATION_ERROR
;
1358 attrValue
= uprv_malloc(attrBufLength
+ 1);
1359 if (attrValue
== NULL
) {
1360 *status
= U_MEMORY_ALLOCATION_ERROR
;
1363 uprv_strcpy(attrValue
, attrBuf
);
1364 attr
->attribute
= attrValue
;
1366 if (!_addAttributeToList(&firstAttr
, attr
)) {
1368 uprv_free(attrValue
);
1370 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1376 } else if (isLDMLKeyword
) {
1379 /* transform key and value to bcp47 style */
1380 modKeyLen
= _ldmlKeyToBCP47(key
, keylen
, pExtBuf
, extBufCapacity
, &tmpStatus
);
1381 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
1383 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1386 tmpStatus
= U_ZERO_ERROR
;
1391 pExtBuf
+= (modKeyLen
+ 1);
1392 extBufCapacity
-= (modKeyLen
+ 1);
1394 len
= _ldmlTypeToBCP47(key
, keylen
, buf
, len
, pExtBuf
, extBufCapacity
, &tmpStatus
);
1395 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
1397 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1400 tmpStatus
= U_ZERO_ERROR
;
1404 pExtBuf
+= (len
+ 1);
1405 extBufCapacity
-= (len
+ 1);
1407 if (*key
== PRIVATEUSE
) {
1408 if (!_isPrivateuseValueSubtags(buf
, len
)) {
1410 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1416 if (!_isExtensionSingleton(key
, keylen
) || !_isExtensionSubtags(buf
, len
)) {
1418 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1425 if ((len
+ 1) < extBufCapacity
) {
1426 uprv_memcpy(pExtBuf
, buf
, len
);
1434 extBufCapacity
-= (len
+ 1);
1436 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1442 /* create ExtensionListEntry */
1443 ext
= uprv_malloc(sizeof(ExtensionListEntry
));
1445 *status
= U_MEMORY_ALLOCATION_ERROR
;
1449 ext
->value
= bcpValue
;
1451 if (!_addExtensionToList(&firstExt
, ext
, TRUE
)) {
1454 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1461 /* Special handling for POSIX variant - add the keywords for POSIX */
1463 /* create ExtensionListEntry for POSIX */
1464 ext
= uprv_malloc(sizeof(ExtensionListEntry
));
1466 *status
= U_MEMORY_ALLOCATION_ERROR
;
1469 ext
->key
= POSIX_KEY
;
1470 ext
->value
= POSIX_VALUE
;
1472 if (!_addExtensionToList(&firstExt
, ext
, TRUE
)) {
1477 if (U_SUCCESS(*status
) && (firstExt
!= NULL
|| firstAttr
!= NULL
)) {
1478 UBool startLDMLExtension
= FALSE
;
1483 if (!startLDMLExtension
&& (ext
&& uprv_strlen(ext
->key
) > 1)) {
1484 /* write LDML singleton extension */
1485 if (reslen
< capacity
) {
1486 *(appendAt
+ reslen
) = SEP
;
1489 if (reslen
< capacity
) {
1490 *(appendAt
+ reslen
) = LDMLEXT
;
1494 startLDMLExtension
= TRUE
;
1497 /* write out the sorted BCP47 attributes, extensions and private use */
1498 if (ext
&& (uprv_strlen(ext
->key
) == 1 || attr
== NULL
)) {
1499 if (reslen
< capacity
) {
1500 *(appendAt
+ reslen
) = SEP
;
1503 len
= (int32_t)uprv_strlen(ext
->key
);
1504 if (reslen
< capacity
) {
1505 uprv_memcpy(appendAt
+ reslen
, ext
->key
, uprv_min(len
, capacity
- reslen
));
1508 if (reslen
< capacity
) {
1509 *(appendAt
+ reslen
) = SEP
;
1512 len
= (int32_t)uprv_strlen(ext
->value
);
1513 if (reslen
< capacity
) {
1514 uprv_memcpy(appendAt
+ reslen
, ext
->value
, uprv_min(len
, capacity
- reslen
));
1520 /* write the value for the attributes */
1521 if (reslen
< capacity
) {
1522 *(appendAt
+ reslen
) = SEP
;
1525 len
= (int32_t)uprv_strlen(attr
->attribute
);
1526 if (reslen
< capacity
) {
1527 uprv_memcpy(appendAt
+ reslen
, attr
->attribute
, uprv_min(len
, capacity
- reslen
));
1533 } while (attr
!= NULL
|| ext
!= NULL
);
1538 while (ext
!= NULL
) {
1539 ExtensionListEntry
*tmpExt
= ext
->next
;
1545 while (attr
!= NULL
) {
1546 AttributeListEntry
*tmpAttr
= attr
->next
;
1547 char *pValue
= (char *)attr
->attribute
;
1553 uenum_close(keywordEnum
);
1555 if (U_FAILURE(*status
)) {
1560 return u_terminateChars(appendAt
, capacity
, reslen
, status
);
1564 * Append keywords parsed from LDML extension value
1565 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
1566 * Note: char* buf is used for storing keywords
1569 _appendLDMLExtensionAsKeywords(const char* ldmlext
, ExtensionListEntry
** appendTo
, char* buf
, int32_t bufSize
, UBool
*posixVariant
, UErrorCode
*status
) {
1570 const char *p
, *pNext
, *pSep
, *pTmp
, *pTmpStart
;
1571 const char *pBcpKey
, *pBcpType
;
1572 const char *pKey
, *pType
;
1573 int32_t bcpKeyLen
= 0, bcpTypeLen
;
1574 ExtensionListEntry
*kwd
, *nextKwd
;
1575 ExtensionListEntry
*kwdFirst
= NULL
;
1578 UBool variantExists
= *posixVariant
;
1579 UBool searchFurther
;
1581 /* Reset the posixVariant value */
1582 *posixVariant
= FALSE
;
1585 pBcpKey
= pBcpType
= NULL
;
1589 /* locate next separator char */
1592 searchFurther
= FALSE
;
1593 if (pBcpKey
!= NULL
) {
1594 pTmpStart
= (pSep
+ 1);
1596 /* Look at the next subtag and see if it is part of the previous subtag or the start of new keyword */
1598 if (*pTmp
== SEP
|| *(pTmp
+ 1) == 0) {
1599 if (!_isLDMLKey(pTmpStart
, (int32_t)(pTmp
- pTmpStart
))) {
1600 searchFurther
= TRUE
;
1607 if (searchFurther
) {
1623 if (pBcpKey
== NULL
) {
1625 bcpKeyLen
= (int32_t)(pSep
- p
);
1628 bcpTypeLen
= (int32_t)(pSep
- p
);
1630 /* BCP key to locale key */
1631 len
= _bcp47ToLDMLKey(pBcpKey
, bcpKeyLen
, buf
+ bufIdx
, bufSize
- bufIdx
- 1, status
);
1632 if (U_FAILURE(*status
)) {
1635 pKey
= buf
+ bufIdx
;
1637 *(buf
+ bufIdx
) = 0;
1640 /* BCP type to locale type */
1641 len
= _bcp47ToLDMLType(pKey
, -1, pBcpType
, bcpTypeLen
, buf
+ bufIdx
, bufSize
- bufIdx
- 1, status
);
1642 if (U_FAILURE(*status
)) {
1645 pType
= buf
+ bufIdx
;
1647 *(buf
+ bufIdx
) = 0;
1650 /* Special handling for u-va-posix, since we want to treat this as a variant, not */
1653 if (!variantExists
&& !uprv_strcmp(pKey
,POSIX_KEY
) && !uprv_strcmp(pType
,POSIX_VALUE
) ) {
1654 *posixVariant
= TRUE
;
1656 /* create an ExtensionListEntry for this keyword */
1657 kwd
= uprv_malloc(sizeof(ExtensionListEntry
));
1659 *status
= U_MEMORY_ALLOCATION_ERROR
;
1666 if (!_addExtensionToList(&kwdFirst
, kwd
, FALSE
)) {
1667 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1679 if (pBcpKey
!= NULL
) {
1680 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1685 while (kwd
!= NULL
) {
1686 nextKwd
= kwd
->next
;
1687 _addExtensionToList(appendTo
, kwd
, FALSE
);
1695 while (kwd
!= NULL
) {
1696 nextKwd
= kwd
->next
;
1704 _appendKeywords(ULanguageTag
* langtag
, char* appendAt
, int32_t capacity
, UErrorCode
* status
) {
1708 ExtensionListEntry
*kwdFirst
= NULL
;
1709 ExtensionListEntry
*kwd
;
1710 AttributeListEntry
*attrFirst
= NULL
;
1711 AttributeListEntry
*attr
;
1712 const char *key
, *type
;
1713 char *kwdBuf
= NULL
;
1714 int32_t kwdBufLength
= capacity
;
1715 UBool posixVariant
= FALSE
;
1717 if (U_FAILURE(*status
)) {
1721 kwdBuf
= (char *)uprv_malloc(kwdBufLength
);
1722 if (kwdBuf
== NULL
) {
1723 *status
= U_MEMORY_ALLOCATION_ERROR
;
1727 /* Determine if variants already exists */
1728 if (ultag_getVariantsSize(langtag
)) {
1729 posixVariant
= TRUE
;
1732 n
= ultag_getExtensionsSize(langtag
);
1734 /* resolve locale keywords and reordering keys */
1735 for (i
= 0; i
< n
; i
++) {
1736 key
= ultag_getExtensionKey(langtag
, i
);
1737 type
= ultag_getExtensionValue(langtag
, i
);
1738 if (*key
== LDMLEXT
) {
1739 _appendLDMLExtensionAsKeywords(type
, &kwdFirst
, kwdBuf
, kwdBufLength
, &posixVariant
, status
);
1740 if (U_FAILURE(*status
)) {
1744 kwd
= uprv_malloc(sizeof(ExtensionListEntry
));
1746 *status
= U_MEMORY_ALLOCATION_ERROR
;
1751 if (!_addExtensionToList(&kwdFirst
, kwd
, FALSE
)) {
1753 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1759 if (U_SUCCESS(*status
)) {
1760 type
= ultag_getPrivateUse(langtag
);
1761 if ((int32_t)uprv_strlen(type
) > 0) {
1762 /* add private use as a keyword */
1763 kwd
= uprv_malloc(sizeof(ExtensionListEntry
));
1765 *status
= U_MEMORY_ALLOCATION_ERROR
;
1767 kwd
->key
= PRIVATEUSE_KEY
;
1769 if (!_addExtensionToList(&kwdFirst
, kwd
, FALSE
)) {
1771 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1777 /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
1779 if (U_SUCCESS(*status
) && posixVariant
) {
1780 len
= (int32_t) uprv_strlen(_POSIX
);
1781 if (reslen
< capacity
) {
1782 uprv_memcpy(appendAt
+ reslen
, _POSIX
, uprv_min(len
, capacity
- reslen
));
1787 attrFirst
= langtag
->attributes
;
1788 if (U_SUCCESS(*status
) && (kwdFirst
!= NULL
|| attrFirst
!= NULL
)) {
1789 /* write out the sorted keywords */
1790 UBool firstValue
= TRUE
;
1791 UBool firstAttr
= TRUE
;
1795 if (reslen
< capacity
) {
1798 *(appendAt
+ reslen
) = LOCALE_EXT_SEP
;
1802 *(appendAt
+ reslen
) = SEP
;
1805 *(appendAt
+ reslen
) = LOCALE_KEYWORD_SEP
;
1812 len
= (int32_t)uprv_strlen(LOCALE_ATTRIBUTE_KEY
);
1813 if (reslen
< capacity
) {
1814 uprv_memcpy(appendAt
+ reslen
, LOCALE_ATTRIBUTE_KEY
, uprv_min(len
, capacity
- reslen
));
1819 if (reslen
< capacity
) {
1820 *(appendAt
+ reslen
) = LOCALE_KEY_TYPE_SEP
;
1828 len
= (int32_t)uprv_strlen(attr
->attribute
);
1829 if (reslen
< capacity
) {
1830 uprv_memcpy(appendAt
+ reslen
, attr
->attribute
, uprv_min(len
, capacity
- reslen
));
1837 len
= (int32_t)uprv_strlen(kwd
->key
);
1838 if (reslen
< capacity
) {
1839 uprv_memcpy(appendAt
+ reslen
, kwd
->key
, uprv_min(len
, capacity
- reslen
));
1844 if (reslen
< capacity
) {
1845 *(appendAt
+ reslen
) = LOCALE_KEY_TYPE_SEP
;
1850 len
= (int32_t)uprv_strlen(kwd
->value
);
1851 if (reslen
< capacity
) {
1852 uprv_memcpy(appendAt
+ reslen
, kwd
->value
, uprv_min(len
, capacity
- reslen
));
1858 } while (kwd
|| attr
);
1863 while (kwd
!= NULL
) {
1864 ExtensionListEntry
*tmpKwd
= kwd
->next
;
1871 if (U_FAILURE(*status
)) {
1875 return u_terminateChars(appendAt
, capacity
, reslen
, status
);
1879 _appendPrivateuseToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UBool hadPosix
, UErrorCode
* status
) {
1880 char buf
[ULOC_FULLNAME_CAPACITY
];
1881 char tmpAppend
[ULOC_FULLNAME_CAPACITY
];
1882 UErrorCode tmpStatus
= U_ZERO_ERROR
;
1886 if (U_FAILURE(*status
)) {
1890 len
= uloc_getVariant(localeID
, buf
, sizeof(buf
), &tmpStatus
);
1891 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
1893 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1901 UBool firstValue
= TRUE
;
1908 if (*p
== SEP
|| *p
== LOCALE_SEP
|| *p
== 0) {
1912 *p
= 0; /* terminate */
1914 if (pPriv
!= NULL
) {
1915 /* Private use in the canonical format is lowercase in BCP47 */
1916 for (i
= 0; *(pPriv
+ i
) != 0; i
++) {
1917 *(pPriv
+ i
) = uprv_tolower(*(pPriv
+ i
));
1921 if (_isPrivateuseValueSubtag(pPriv
, -1)) {
1923 if (!_isVariantSubtag(pPriv
, -1)) {
1929 } else if (strict
) {
1930 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1937 if (reslen
< capacity
) {
1938 tmpAppend
[reslen
++] = SEP
;
1942 if (reslen
< capacity
) {
1943 tmpAppend
[reslen
++] = *PRIVATEUSE_KEY
;
1946 if (reslen
< capacity
) {
1947 tmpAppend
[reslen
++] = SEP
;
1950 len
= (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX
);
1951 if (reslen
< capacity
) {
1952 uprv_memcpy(tmpAppend
+ reslen
, PRIVUSE_VARIANT_PREFIX
, uprv_min(len
, capacity
- reslen
));
1956 if (reslen
< capacity
) {
1957 tmpAppend
[reslen
++] = SEP
;
1963 len
= (int32_t)uprv_strlen(pPriv
);
1964 if (reslen
< capacity
) {
1965 uprv_memcpy(tmpAppend
+ reslen
, pPriv
, uprv_min(len
, capacity
- reslen
));
1970 /* reset private use starting position */
1972 } else if (pPriv
== NULL
) {
1978 if (U_FAILURE(*status
)) {
1983 if (U_SUCCESS(*status
)) {
1985 if (reslen
< capacity
) {
1986 uprv_memcpy(appendAt
, tmpAppend
, uprv_min(len
, capacity
- reslen
));
1990 u_terminateChars(appendAt
, capacity
, reslen
, status
);
1996 * -------------------------------------------------
2000 * -------------------------------------------------
2003 /* Bit flags used by the parser */
2014 static ULanguageTag
*
2015 ultag_parse(const char* tag
, int32_t tagLen
, int32_t* parsedLen
, UErrorCode
* status
) {
2019 char *pSubtag
, *pNext
, *pLastGoodPosition
;
2022 ExtensionListEntry
*pExtension
;
2023 AttributeListEntry
*pAttribute
;
2024 char *pExtValueSubtag
, *pExtValueSubtagEnd
;
2026 UBool isLDMLExtension
, reqLDMLType
, privateuseVar
= FALSE
;
2028 if (parsedLen
!= NULL
) {
2032 if (U_FAILURE(*status
)) {
2037 tagLen
= (int32_t)uprv_strlen(tag
);
2040 /* copy the entire string */
2041 tagBuf
= (char*)uprv_malloc(tagLen
+ 1);
2042 if (tagBuf
== NULL
) {
2043 *status
= U_MEMORY_ALLOCATION_ERROR
;
2046 uprv_memcpy(tagBuf
, tag
, tagLen
);
2047 *(tagBuf
+ tagLen
) = 0;
2049 /* create a ULanguageTag */
2050 t
= (ULanguageTag
*)uprv_malloc(sizeof(ULanguageTag
));
2053 *status
= U_MEMORY_ALLOCATION_ERROR
;
2056 _initializeULanguageTag(t
);
2059 if (tagLen
< MINLEN
) {
2060 /* the input tag is too short - return empty ULanguageTag */
2064 /* check if the tag is grandfathered */
2065 for (i
= 0; GRANDFATHERED
[i
] != NULL
; i
+= 2) {
2066 if (uprv_stricmp(GRANDFATHERED
[i
], tagBuf
) == 0) {
2067 /* a grandfathered tag is always longer than its preferred mapping */
2068 int32_t newTagLength
= uprv_strlen(GRANDFATHERED
[i
+1]);
2069 if (tagLen
< newTagLength
) {
2071 tagBuf
= (char*)uprv_malloc(newTagLength
+ 1);
2072 if (tagBuf
== NULL
) {
2073 *status
= U_MEMORY_ALLOCATION_ERROR
;
2077 tagLen
= newTagLength
;
2079 uprv_strcpy(t
->buf
, GRANDFATHERED
[i
+ 1]);
2086 * langtag = language
2095 pNext
= pLastGoodPosition
= tagBuf
;
2098 pExtValueSubtag
= NULL
;
2099 pExtValueSubtagEnd
= NULL
;
2101 isLDMLExtension
= FALSE
;
2102 reqLDMLType
= FALSE
;
2109 /* locate next separator char */
2123 subtagLen
= (int32_t)(pSep
- pSubtag
);
2126 if (_isLanguageSubtag(pSubtag
, subtagLen
)) {
2127 *pSep
= 0; /* terminate */
2128 t
->language
= T_CString_toLowerCase(pSubtag
);
2130 pLastGoodPosition
= pSep
;
2131 next
= EXTL
| SCRT
| REGN
| VART
| EXTS
| PRIV
;
2136 if (_isExtlangSubtag(pSubtag
, subtagLen
)) {
2138 t
->extlang
[extlangIdx
++] = T_CString_toLowerCase(pSubtag
);
2140 pLastGoodPosition
= pSep
;
2141 if (extlangIdx
< 3) {
2142 next
= EXTL
| SCRT
| REGN
| VART
| EXTS
| PRIV
;
2144 next
= SCRT
| REGN
| VART
| EXTS
| PRIV
;
2150 if (_isScriptSubtag(pSubtag
, subtagLen
)) {
2156 *p
= uprv_toupper(*p
);
2159 *p
= uprv_tolower(*p
);
2162 t
->script
= pSubtag
;
2164 pLastGoodPosition
= pSep
;
2165 next
= REGN
| VART
| EXTS
| PRIV
;
2170 if (_isRegionSubtag(pSubtag
, subtagLen
)) {
2172 t
->region
= T_CString_toUpperCase(pSubtag
);
2174 pLastGoodPosition
= pSep
;
2175 next
= VART
| EXTS
| PRIV
;
2180 if (_isVariantSubtag(pSubtag
, subtagLen
) ||
2181 (privateuseVar
&& _isPrivateuseVariantSubtag(pSubtag
, subtagLen
))) {
2182 VariantListEntry
*var
;
2185 var
= (VariantListEntry
*)uprv_malloc(sizeof(VariantListEntry
));
2187 *status
= U_MEMORY_ALLOCATION_ERROR
;
2191 var
->variant
= T_CString_toUpperCase(pSubtag
);
2192 isAdded
= _addVariantToList(&(t
->variants
), var
);
2194 /* duplicated variant entry */
2198 pLastGoodPosition
= pSep
;
2199 next
= VART
| EXTS
| PRIV
;
2204 if (_isExtensionSingleton(pSubtag
, subtagLen
)) {
2205 if (pExtension
!= NULL
) {
2206 if (pExtValueSubtag
== NULL
|| pExtValueSubtagEnd
== NULL
) {
2207 /* the previous extension is incomplete */
2208 uprv_free(pExtension
);
2213 /* terminate the previous extension value */
2214 *pExtValueSubtagEnd
= 0;
2215 pExtension
->value
= T_CString_toLowerCase(pExtValueSubtag
);
2217 /* insert the extension to the list */
2218 if (_addExtensionToList(&(t
->extensions
), pExtension
, FALSE
)) {
2219 pLastGoodPosition
= pExtValueSubtagEnd
;
2221 /* stop parsing here */
2222 uprv_free(pExtension
);
2227 if (isLDMLExtension
&& reqLDMLType
) {
2228 /* incomplete LDML extension key and type pair */
2234 isLDMLExtension
= (uprv_tolower(*pSubtag
) == LDMLEXT
);
2236 /* create a new extension */
2237 pExtension
= uprv_malloc(sizeof(ExtensionListEntry
));
2238 if (pExtension
== NULL
) {
2239 *status
= U_MEMORY_ALLOCATION_ERROR
;
2243 pExtension
->key
= T_CString_toLowerCase(pSubtag
);
2244 pExtension
->value
= NULL
; /* will be set later */
2247 * reset the start and the end location of extension value
2248 * subtags for this extension
2250 pExtValueSubtag
= NULL
;
2251 pExtValueSubtagEnd
= NULL
;
2258 if (_isExtensionSubtag(pSubtag
, subtagLen
)) {
2259 if (isLDMLExtension
) {
2261 /* already saw an LDML key */
2262 if (!_isLDMLType(pSubtag
, subtagLen
)) {
2263 /* stop parsing here and let the valid LDML extension key/type
2264 pairs processed by the code out of this while loop */
2267 pExtValueSubtagEnd
= pSep
;
2268 reqLDMLType
= FALSE
;
2269 next
= EXTS
| EXTV
| PRIV
;
2272 if (!_isLDMLKey(pSubtag
, subtagLen
)) {
2273 /* May be part of incomplete type */
2274 if (pExtValueSubtag
!= NULL
) {
2275 if (_isLDMLType(pSubtag
, subtagLen
)) {
2276 pExtValueSubtagEnd
= pSep
;
2277 reqLDMLType
= FALSE
;
2278 next
= EXTS
| EXTV
| PRIV
;
2280 } else if (pExtValueSubtag
== NULL
&& _isAttributeSubtag(pSubtag
, subtagLen
)) {
2284 /* stop parsing here and let the valid LDML extension key/type
2285 pairs processed by the code out of this while loop */
2294 /* Mark the end of this subtag */
2295 pExtValueSubtagEnd
= pSep
;
2296 next
= EXTS
| EXTV
| PRIV
;
2300 if (pExtValueSubtag
== NULL
) {
2301 /* if the start postion of this extension's value is not yet,
2302 this one is the first value subtag */
2303 pExtValueSubtag
= pSubtag
;
2311 if (uprv_tolower(*pSubtag
) == PRIVATEUSE
) {
2314 if (pExtension
!= NULL
) {
2315 /* Process the last extension */
2316 if (pExtValueSubtag
== NULL
|| pExtValueSubtagEnd
== NULL
) {
2317 /* the previous extension is incomplete */
2318 uprv_free(pExtension
);
2322 /* terminate the previous extension value */
2323 *pExtValueSubtagEnd
= 0;
2324 pExtension
->value
= T_CString_toLowerCase(pExtValueSubtag
);
2326 /* insert the extension to the list */
2327 if (_addExtensionToList(&(t
->extensions
), pExtension
, FALSE
)) {
2328 pLastGoodPosition
= pExtValueSubtagEnd
;
2331 /* stop parsing here */
2332 uprv_free(pExtension
);
2339 /* The rest of part will be private use value subtags */
2340 if (pNext
== NULL
) {
2341 /* empty private use subtag */
2344 /* back up the private use value start position */
2345 pPrivuseVal
= pNext
;
2347 /* validate private use value subtags */
2363 subtagLen
= (int32_t)(pSep
- pSubtag
);
2365 if (uprv_strncmp(pSubtag
, PRIVUSE_VARIANT_PREFIX
, uprv_strlen(PRIVUSE_VARIANT_PREFIX
)) == 0) {
2368 privateuseVar
= TRUE
;
2370 } else if (_isPrivateuseValueSubtag(pSubtag
, subtagLen
)) {
2371 pLastGoodPosition
= pSep
;
2381 if (pLastGoodPosition
- pPrivuseVal
> 0) {
2382 *pLastGoodPosition
= 0;
2383 t
->privateuse
= T_CString_toLowerCase(pPrivuseVal
);
2385 /* No more subtags, exiting the parse loop */
2392 /* create a new attribute */
2393 pAttribute
= uprv_malloc(sizeof(AttributeListEntry
));
2394 if (pAttribute
== NULL
) {
2395 *status
= U_MEMORY_ALLOCATION_ERROR
;
2400 pAttribute
->attribute
=T_CString_toLowerCase(pSubtag
);
2402 if (!_addAttributeToList(&(t
->attributes
), pAttribute
)) {
2403 uprv_free(pAttribute
);
2406 next
= EXTS
| EXTV
| PRIV
;
2409 /* If we fell through here, it means this subtag is illegal - quit parsing */
2413 if (pExtension
!= NULL
) {
2414 /* Process the last extension */
2415 if (pExtValueSubtag
== NULL
|| pExtValueSubtagEnd
== NULL
) {
2416 /* the previous extension is incomplete */
2417 uprv_free(pExtension
);
2419 /* terminate the previous extension value */
2420 *pExtValueSubtagEnd
= 0;
2421 pExtension
->value
= T_CString_toLowerCase(pExtValueSubtag
);
2422 /* insert the extension to the list */
2423 if (_addExtensionToList(&(t
->extensions
), pExtension
, FALSE
)) {
2424 pLastGoodPosition
= pExtValueSubtagEnd
;
2426 uprv_free(pExtension
);
2431 if (parsedLen
!= NULL
) {
2432 *parsedLen
= (int32_t)(pLastGoodPosition
- t
->buf
);
2443 ultag_close(ULanguageTag
* langtag
) {
2445 if (langtag
== NULL
) {
2449 uprv_free(langtag
->buf
);
2451 if (langtag
->variants
) {
2452 VariantListEntry
*curVar
= langtag
->variants
;
2454 VariantListEntry
*nextVar
= curVar
->next
;
2460 if (langtag
->extensions
) {
2461 ExtensionListEntry
*curExt
= langtag
->extensions
;
2463 ExtensionListEntry
*nextExt
= curExt
->next
;
2469 if (langtag
->attributes
) {
2470 AttributeListEntry
*curAttr
= langtag
->attributes
;
2472 AttributeListEntry
*nextAttr
= curAttr
->next
;
2482 ultag_getLanguage(const ULanguageTag
* langtag
) {
2483 return langtag
->language
;
2488 ultag_getJDKLanguage(const ULanguageTag
* langtag
) {
2490 for (i
= 0; DEPRECATEDLANGS
[i
] != NULL
; i
+= 2) {
2491 if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS
[i
], langtag
->language
) == 0) {
2492 return DEPRECATEDLANGS
[i
+ 1];
2495 return langtag
->language
;
2500 ultag_getExtlang(const ULanguageTag
* langtag
, int32_t idx
) {
2501 if (idx
>= 0 && idx
< MAXEXTLANG
) {
2502 return langtag
->extlang
[idx
];
2508 ultag_getExtlangSize(const ULanguageTag
* langtag
) {
2511 for (i
= 0; i
< MAXEXTLANG
; i
++) {
2512 if (langtag
->extlang
[i
]) {
2520 ultag_getScript(const ULanguageTag
* langtag
) {
2521 return langtag
->script
;
2525 ultag_getRegion(const ULanguageTag
* langtag
) {
2526 return langtag
->region
;
2530 ultag_getVariant(const ULanguageTag
* langtag
, int32_t idx
) {
2531 const char *var
= NULL
;
2532 VariantListEntry
*cur
= langtag
->variants
;
2546 ultag_getVariantsSize(const ULanguageTag
* langtag
) {
2548 VariantListEntry
*cur
= langtag
->variants
;
2560 /* Currently not being used. */
2562 ultag_getAttribute(const ULanguageTag
* langtag
, int32_t idx
) {
2563 const char *attr
= NULL
;
2564 AttributeListEntry
*cur
= langtag
->attributes
;
2568 attr
= cur
->attribute
;
2579 ultag_getAttributesSize(const ULanguageTag
* langtag
) {
2581 AttributeListEntry
*cur
= langtag
->attributes
;
2593 ultag_getExtensionKey(const ULanguageTag
* langtag
, int32_t idx
) {
2594 const char *key
= NULL
;
2595 ExtensionListEntry
*cur
= langtag
->extensions
;
2609 ultag_getExtensionValue(const ULanguageTag
* langtag
, int32_t idx
) {
2610 const char *val
= NULL
;
2611 ExtensionListEntry
*cur
= langtag
->extensions
;
2625 ultag_getExtensionsSize(const ULanguageTag
* langtag
) {
2627 ExtensionListEntry
*cur
= langtag
->extensions
;
2639 ultag_getPrivateUse(const ULanguageTag
* langtag
) {
2640 return langtag
->privateuse
;
2645 ultag_getGrandfathered(const ULanguageTag
* langtag
) {
2646 return langtag
->grandfathered
;
2652 * -------------------------------------------------
2654 * Locale/BCP47 conversion APIs, exposed as uloc_*
2656 * -------------------------------------------------
2658 U_DRAFT
int32_t U_EXPORT2
2659 uloc_toLanguageTag(const char* localeID
,
2661 int32_t langtagCapacity
,
2663 UErrorCode
* status
) {
2664 /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
2665 char canonical
[256];
2667 UErrorCode tmpStatus
= U_ZERO_ERROR
;
2668 UBool hadPosix
= FALSE
;
2669 const char* pKeywordStart
;
2671 /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
2673 if (uprv_strlen(localeID
) > 0) {
2674 uloc_canonicalize(localeID
, canonical
, sizeof(canonical
), &tmpStatus
);
2675 if (tmpStatus
!= U_ZERO_ERROR
) {
2676 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
2681 /* For handling special case - private use only tag */
2682 pKeywordStart
= locale_getKeywordsStart(canonical
);
2683 if (pKeywordStart
== canonical
) {
2684 UEnumeration
*kwdEnum
;
2688 kwdEnum
= uloc_openKeywords((const char*)canonical
, &tmpStatus
);
2689 if (kwdEnum
!= NULL
) {
2690 kwdCnt
= uenum_count(kwdEnum
, &tmpStatus
);
2695 key
= uenum_next(kwdEnum
, &len
, &tmpStatus
);
2696 if (len
== 1 && *key
== PRIVATEUSE
) {
2697 char buf
[ULOC_KEYWORD_AND_VALUES_CAPACITY
];
2698 buf
[0] = PRIVATEUSE
;
2700 len
= uloc_getKeywordValue(localeID
, key
, &buf
[2], sizeof(buf
) - 2, &tmpStatus
);
2701 if (U_SUCCESS(tmpStatus
)) {
2702 if (_isPrivateuseValueSubtags(&buf
[2], len
)) {
2703 /* return private use only tag */
2705 uprv_memcpy(langtag
, buf
, uprv_min(reslen
, langtagCapacity
));
2706 u_terminateChars(langtag
, langtagCapacity
, reslen
, status
);
2708 } else if (strict
) {
2709 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
2712 /* if not strict mode, then "und" will be returned */
2714 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
2719 uenum_close(kwdEnum
);
2726 reslen
+= _appendLanguageToLanguageTag(canonical
, langtag
, langtagCapacity
, strict
, status
);
2727 reslen
+= _appendScriptToLanguageTag(canonical
, langtag
+ reslen
, langtagCapacity
- reslen
, strict
, status
);
2728 reslen
+= _appendRegionToLanguageTag(canonical
, langtag
+ reslen
, langtagCapacity
- reslen
, strict
, status
);
2729 reslen
+= _appendVariantsToLanguageTag(canonical
, langtag
+ reslen
, langtagCapacity
- reslen
, strict
, &hadPosix
, status
);
2730 reslen
+= _appendKeywordsToLanguageTag(canonical
, langtag
+ reslen
, langtagCapacity
- reslen
, strict
, hadPosix
, status
);
2731 reslen
+= _appendPrivateuseToLanguageTag(canonical
, langtag
+ reslen
, langtagCapacity
- reslen
, strict
, hadPosix
, status
);
2737 U_DRAFT
int32_t U_EXPORT2
2738 uloc_forLanguageTag(const char* langtag
,
2740 int32_t localeIDCapacity
,
2741 int32_t* parsedLength
,
2742 UErrorCode
* status
) {
2745 const char *subtag
, *p
;
2748 UBool noRegion
= TRUE
;
2750 lt
= ultag_parse(langtag
, -1, parsedLength
, status
);
2751 if (U_FAILURE(*status
)) {
2756 subtag
= ultag_getExtlangSize(lt
) > 0 ? ultag_getExtlang(lt
, 0) : ultag_getLanguage(lt
);
2757 if (uprv_compareInvCharsAsAscii(subtag
, LANG_UND
) != 0) {
2758 len
= (int32_t)uprv_strlen(subtag
);
2760 if (reslen
< localeIDCapacity
) {
2761 uprv_memcpy(localeID
, subtag
, uprv_min(len
, localeIDCapacity
- reslen
));
2768 subtag
= ultag_getScript(lt
);
2769 len
= (int32_t)uprv_strlen(subtag
);
2771 if (reslen
< localeIDCapacity
) {
2772 *(localeID
+ reslen
) = LOCALE_SEP
;
2776 /* write out the script in title case */
2779 if (reslen
< localeIDCapacity
) {
2781 *(localeID
+ reslen
) = uprv_toupper(*p
);
2783 *(localeID
+ reslen
) = *p
;
2792 subtag
= ultag_getRegion(lt
);
2793 len
= (int32_t)uprv_strlen(subtag
);
2795 if (reslen
< localeIDCapacity
) {
2796 *(localeID
+ reslen
) = LOCALE_SEP
;
2799 /* write out the retion in upper case */
2802 if (reslen
< localeIDCapacity
) {
2803 *(localeID
+ reslen
) = uprv_toupper(*p
);
2812 n
= ultag_getVariantsSize(lt
);
2815 if (reslen
< localeIDCapacity
) {
2816 *(localeID
+ reslen
) = LOCALE_SEP
;
2821 for (i
= 0; i
< n
; i
++) {
2822 subtag
= ultag_getVariant(lt
, i
);
2823 if (reslen
< localeIDCapacity
) {
2824 *(localeID
+ reslen
) = LOCALE_SEP
;
2827 /* write out the variant in upper case */
2830 if (reslen
< localeIDCapacity
) {
2831 *(localeID
+ reslen
) = uprv_toupper(*p
);
2840 n
= ultag_getExtensionsSize(lt
);
2841 m
= ultag_getAttributesSize(lt
);
2842 subtag
= ultag_getPrivateUse(lt
);
2843 if (n
> 0 || m
> 0 || uprv_strlen(subtag
) > 0) {
2844 if (reslen
== 0 && (n
> 0 || m
> 0)) {
2845 /* need a language */
2846 if (reslen
< localeIDCapacity
) {
2847 uprv_memcpy(localeID
+ reslen
, LANG_UND
, uprv_min(LANG_UND_LEN
, localeIDCapacity
- reslen
));
2849 reslen
+= LANG_UND_LEN
;
2851 len
= _appendKeywords(lt
, localeID
+ reslen
, localeIDCapacity
- reslen
, status
);
2856 return u_terminateChars(localeID
, localeIDCapacity
, reslen
, status
);