2 **********************************************************************
3 * Copyright (C) 2009-2012, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
8 #include "unicode/utypes.h"
9 #include "unicode/ures.h"
10 #include "unicode/putil.h"
11 #include "unicode/uloc.h"
20 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
22 /* struct holding a single variant */
23 typedef struct VariantListEntry
{
25 struct VariantListEntry
*next
;
28 /* struct holding a single attribute value */
29 typedef struct AttributeListEntry
{
30 const char *attribute
;
31 struct AttributeListEntry
*next
;
34 /* struct holding a single extension */
35 typedef struct ExtensionListEntry
{
38 struct ExtensionListEntry
*next
;
42 typedef struct ULanguageTag
{
43 char *buf
; /* holding parsed subtags */
45 const char *extlang
[MAXEXTLANG
];
48 VariantListEntry
*variants
;
49 ExtensionListEntry
*extensions
;
50 const char *privateuse
;
51 const char *grandfathered
;
56 #define PRIVATEUSE 'x'
59 #define LOCALE_SEP '_'
60 #define LOCALE_EXT_SEP '@'
61 #define LOCALE_KEYWORD_SEP ';'
62 #define LOCALE_KEY_TYPE_SEP '='
64 #define ISALPHA(c) uprv_isASCIILetter(c)
65 #define ISNUMERIC(c) ((c)>='0' && (c)<='9')
67 static const char EMPTY
[] = "";
68 static const char LANG_UND
[] = "und";
69 static const char PRIVATEUSE_KEY
[] = "x";
70 static const char _POSIX
[] = "_POSIX";
71 static const char POSIX_KEY
[] = "va";
72 static const char POSIX_VALUE
[] = "posix";
73 static const char LOCALE_ATTRIBUTE_KEY
[] = "attribute";
74 static const char PRIVUSE_VARIANT_PREFIX
[] = "lvariant";
75 static const char LOCALE_TYPE_YES
[] = "yes";
77 #define LANG_UND_LEN 3
79 static const char* const GRANDFATHERED
[] = {
80 /* grandfathered preferred */
82 "cel-gaulish", "xtg-x-cel-gaulish",
83 "en-GB-oed", "en-GB-x-oed",
86 "i-default", "en-x-i-default",
87 "i-enochian", "und-x-i-enochian",
91 "i-mingo", "see-x-i-mingo",
104 "zh-min", "nan-x-zh-min",
110 static const char DEPRECATEDLANGS
[][4] = {
118 * -------------------------------------------------
120 * These ultag_ functions may be exposed as APIs later
122 * -------------------------------------------------
126 ultag_parse(const char* tag
, int32_t tagLen
, int32_t* parsedLen
, UErrorCode
* status
);
129 ultag_close(ULanguageTag
* langtag
);
132 ultag_getLanguage(const ULanguageTag
* langtag
);
136 ultag_getJDKLanguage(const ULanguageTag
* langtag
);
140 ultag_getExtlang(const ULanguageTag
* langtag
, int32_t idx
);
143 ultag_getExtlangSize(const ULanguageTag
* langtag
);
146 ultag_getScript(const ULanguageTag
* langtag
);
149 ultag_getRegion(const ULanguageTag
* langtag
);
152 ultag_getVariant(const ULanguageTag
* langtag
, int32_t idx
);
155 ultag_getVariantsSize(const ULanguageTag
* langtag
);
158 ultag_getExtensionKey(const ULanguageTag
* langtag
, int32_t idx
);
161 ultag_getExtensionValue(const ULanguageTag
* langtag
, int32_t idx
);
164 ultag_getExtensionsSize(const ULanguageTag
* langtag
);
167 ultag_getPrivateUse(const ULanguageTag
* langtag
);
171 ultag_getGrandfathered(const ULanguageTag
* langtag
);
175 * -------------------------------------------------
177 * Language subtag syntax validation functions
179 * -------------------------------------------------
183 _isAlphaString(const char* s
, int32_t len
) {
185 for (i
= 0; i
< len
; i
++) {
186 if (!ISALPHA(*(s
+ i
))) {
194 _isNumericString(const char* s
, int32_t len
) {
196 for (i
= 0; i
< len
; i
++) {
197 if (!ISNUMERIC(*(s
+ i
))) {
205 _isAlphaNumericString(const char* s
, int32_t len
) {
207 for (i
= 0; i
< len
; i
++) {
208 if (!ISALPHA(*(s
+ i
)) && !ISNUMERIC(*(s
+ i
))) {
216 _isLanguageSubtag(const char* s
, int32_t len
) {
218 * language = 2*3ALPHA ; shortest ISO 639 code
219 * ["-" extlang] ; sometimes followed by
220 * ; extended language subtags
221 * / 4ALPHA ; or reserved for future use
222 * / 5*8ALPHA ; or registered language subtag
225 len
= (int32_t)uprv_strlen(s
);
227 if (len
>= 2 && len
<= 8 && _isAlphaString(s
, len
)) {
234 _isExtlangSubtag(const char* s
, int32_t len
) {
236 * extlang = 3ALPHA ; selected ISO 639 codes
237 * *2("-" 3ALPHA) ; permanently reserved
240 len
= (int32_t)uprv_strlen(s
);
242 if (len
== 3 && _isAlphaString(s
, len
)) {
249 _isScriptSubtag(const char* s
, int32_t len
) {
251 * script = 4ALPHA ; ISO 15924 code
254 len
= (int32_t)uprv_strlen(s
);
256 if (len
== 4 && _isAlphaString(s
, len
)) {
263 _isRegionSubtag(const char* s
, int32_t len
) {
265 * region = 2ALPHA ; ISO 3166-1 code
266 * / 3DIGIT ; UN M.49 code
269 len
= (int32_t)uprv_strlen(s
);
271 if (len
== 2 && _isAlphaString(s
, len
)) {
274 if (len
== 3 && _isNumericString(s
, len
)) {
281 _isVariantSubtag(const char* s
, int32_t len
) {
283 * variant = 5*8alphanum ; registered variants
284 * / (DIGIT 3alphanum)
287 len
= (int32_t)uprv_strlen(s
);
289 if (len
>= 5 && len
<= 8 && _isAlphaNumericString(s
, len
)) {
292 if (len
== 4 && ISNUMERIC(*s
) && _isAlphaNumericString(s
+ 1, 3)) {
299 _isPrivateuseVariantSubtag(const char* s
, int32_t len
) {
301 * variant = 1*8alphanum ; registered variants
302 * / (DIGIT 3alphanum)
305 len
= (int32_t)uprv_strlen(s
);
307 if (len
>= 1 && len
<= 8 && _isAlphaNumericString(s
, len
)) {
314 _isExtensionSingleton(const char* s
, int32_t len
) {
316 * extension = singleton 1*("-" (2*8alphanum))
319 len
= (int32_t)uprv_strlen(s
);
321 if (len
== 1 && ISALPHA(*s
) && (uprv_tolower(*s
) != PRIVATEUSE
)) {
328 _isExtensionSubtag(const char* s
, int32_t len
) {
330 * extension = singleton 1*("-" (2*8alphanum))
333 len
= (int32_t)uprv_strlen(s
);
335 if (len
>= 2 && len
<= 8 && _isAlphaNumericString(s
, len
)) {
342 _isExtensionSubtags(const char* s
, int32_t len
) {
344 const char *pSubtag
= NULL
;
347 len
= (int32_t)uprv_strlen(s
);
350 while ((p
- s
) < len
) {
352 if (pSubtag
== NULL
) {
355 if (!_isExtensionSubtag(pSubtag
, (int32_t)(p
- pSubtag
))) {
359 } else if (pSubtag
== NULL
) {
364 if (pSubtag
== NULL
) {
367 return _isExtensionSubtag(pSubtag
, (int32_t)(p
- pSubtag
));
371 _isPrivateuseValueSubtag(const char* s
, int32_t len
) {
373 * privateuse = "x" 1*("-" (1*8alphanum))
376 len
= (int32_t)uprv_strlen(s
);
378 if (len
>= 1 && len
<= 8 && _isAlphaNumericString(s
, len
)) {
385 _isPrivateuseValueSubtags(const char* s
, int32_t len
) {
387 const char *pSubtag
= NULL
;
390 len
= (int32_t)uprv_strlen(s
);
393 while ((p
- s
) < len
) {
395 if (pSubtag
== NULL
) {
398 if (!_isPrivateuseValueSubtag(pSubtag
, (int32_t)(p
- pSubtag
))) {
402 } else if (pSubtag
== NULL
) {
407 if (pSubtag
== NULL
) {
410 return _isPrivateuseValueSubtag(pSubtag
, (int32_t)(p
- pSubtag
));
414 _isLDMLKey(const char* s
, int32_t len
) {
416 len
= (int32_t)uprv_strlen(s
);
418 if (len
== 2 && _isAlphaNumericString(s
, len
)) {
425 _isLDMLType(const char* s
, int32_t len
) {
427 len
= (int32_t)uprv_strlen(s
);
429 if (len
>= 3 && len
<= 8 && _isAlphaNumericString(s
, len
)) {
436 * -------------------------------------------------
440 * -------------------------------------------------
444 _addVariantToList(VariantListEntry
**first
, VariantListEntry
*var
) {
447 if (*first
== NULL
) {
451 VariantListEntry
*prev
, *cur
;
454 /* variants order should be preserved */
464 /* Checking for duplicate variant */
465 cmp
= uprv_compareInvCharsAsAscii(var
->variant
, cur
->variant
);
467 /* duplicated variant */
480 _addAttributeToList(AttributeListEntry
**first
, AttributeListEntry
*attr
) {
483 if (*first
== NULL
) {
487 AttributeListEntry
*prev
, *cur
;
490 /* reorder variants in alphabetical order */
499 cmp
= uprv_compareInvCharsAsAscii(attr
->attribute
, cur
->attribute
);
510 /* duplicated variant */
524 _addExtensionToList(ExtensionListEntry
**first
, ExtensionListEntry
*ext
, UBool localeToBCP
) {
527 if (*first
== NULL
) {
531 ExtensionListEntry
*prev
, *cur
;
534 /* reorder variants in alphabetical order */
544 /* special handling for locale to bcp conversion */
547 len
= (int32_t)uprv_strlen(ext
->key
);
548 curlen
= (int32_t)uprv_strlen(cur
->key
);
550 if (len
== 1 && curlen
== 1) {
551 if (*(ext
->key
) == *(cur
->key
)) {
553 } else if (*(ext
->key
) == PRIVATEUSE
) {
555 } else if (*(cur
->key
) == PRIVATEUSE
) {
558 cmp
= *(ext
->key
) - *(cur
->key
);
560 } else if (len
== 1) {
561 cmp
= *(ext
->key
) - LDMLEXT
;
562 } else if (curlen
== 1) {
563 cmp
= LDMLEXT
- *(cur
->key
);
565 cmp
= uprv_compareInvCharsAsAscii(ext
->key
, cur
->key
);
568 cmp
= uprv_compareInvCharsAsAscii(ext
->key
, cur
->key
);
580 /* duplicated extension key */
593 _initializeULanguageTag(ULanguageTag
* langtag
) {
598 langtag
->language
= EMPTY
;
599 for (i
= 0; i
< MAXEXTLANG
; i
++) {
600 langtag
->extlang
[i
] = NULL
;
603 langtag
->script
= EMPTY
;
604 langtag
->region
= EMPTY
;
606 langtag
->variants
= NULL
;
607 langtag
->extensions
= NULL
;
609 langtag
->grandfathered
= EMPTY
;
610 langtag
->privateuse
= EMPTY
;
613 #define KEYTYPEDATA "keyTypeData"
614 #define KEYMAP "keyMap"
615 #define TYPEMAP "typeMap"
616 #define TYPEALIAS "typeAlias"
617 #define MAX_BCP47_SUBTAG_LEN 9 /* including null terminator */
618 #define MAX_LDML_KEY_LEN 22
619 #define MAX_LDML_TYPE_LEN 32
622 _ldmlKeyToBCP47(const char* key
, int32_t keyLen
,
623 char* bcpKey
, int32_t bcpKeyCapacity
,
624 UErrorCode
*status
) {
626 char keyBuf
[MAX_LDML_KEY_LEN
];
627 char bcpKeyBuf
[MAX_BCP47_SUBTAG_LEN
];
628 int32_t resultLen
= 0;
630 UErrorCode tmpStatus
= U_ZERO_ERROR
;
631 const UChar
*uBcpKey
;
635 keyLen
= (int32_t)uprv_strlen(key
);
638 if (keyLen
>= sizeof(keyBuf
)) {
639 /* no known valid LDML key exceeding 21 */
640 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
644 uprv_memcpy(keyBuf
, key
, keyLen
);
648 for (i
= 0; i
< keyLen
; i
++) {
649 keyBuf
[i
] = uprv_tolower(keyBuf
[i
]);
652 rb
= ures_openDirect(NULL
, KEYTYPEDATA
, status
);
653 ures_getByKey(rb
, KEYMAP
, rb
, status
);
655 if (U_FAILURE(*status
)) {
660 uBcpKey
= ures_getStringByKey(rb
, keyBuf
, &bcpKeyLen
, &tmpStatus
);
661 if (U_SUCCESS(tmpStatus
)) {
662 u_UCharsToChars(uBcpKey
, bcpKeyBuf
, bcpKeyLen
);
663 bcpKeyBuf
[bcpKeyLen
] = 0;
664 resultLen
= bcpKeyLen
;
666 if (_isLDMLKey(key
, keyLen
)) {
667 uprv_memcpy(bcpKeyBuf
, key
, keyLen
);
668 bcpKeyBuf
[keyLen
] = 0;
671 /* mapping not availabe */
672 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
677 if (U_FAILURE(*status
)) {
681 uprv_memcpy(bcpKey
, bcpKeyBuf
, uprv_min(resultLen
, bcpKeyCapacity
));
682 return u_terminateChars(bcpKey
, bcpKeyCapacity
, resultLen
, status
);
686 _bcp47ToLDMLKey(const char* bcpKey
, int32_t bcpKeyLen
,
687 char* key
, int32_t keyCapacity
,
688 UErrorCode
*status
) {
690 char bcpKeyBuf
[MAX_BCP47_SUBTAG_LEN
];
691 int32_t resultLen
= 0;
693 const char *resKey
= NULL
;
694 UResourceBundle
*mapData
;
697 bcpKeyLen
= (int32_t)uprv_strlen(bcpKey
);
700 if (bcpKeyLen
>= sizeof(bcpKeyBuf
)) {
701 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
705 uprv_memcpy(bcpKeyBuf
, bcpKey
, bcpKeyLen
);
706 bcpKeyBuf
[bcpKeyLen
] = 0;
709 for (i
= 0; i
< bcpKeyLen
; i
++) {
710 bcpKeyBuf
[i
] = uprv_tolower(bcpKeyBuf
[i
]);
713 rb
= ures_openDirect(NULL
, KEYTYPEDATA
, status
);
714 ures_getByKey(rb
, KEYMAP
, rb
, status
);
715 if (U_FAILURE(*status
)) {
720 mapData
= ures_getNextResource(rb
, NULL
, status
);
721 while (U_SUCCESS(*status
)) {
722 const UChar
*uBcpKey
;
723 char tmpBcpKeyBuf
[MAX_BCP47_SUBTAG_LEN
];
724 int32_t tmpBcpKeyLen
;
726 uBcpKey
= ures_getString(mapData
, &tmpBcpKeyLen
, status
);
727 if (U_FAILURE(*status
)) {
730 u_UCharsToChars(uBcpKey
, tmpBcpKeyBuf
, tmpBcpKeyLen
);
731 tmpBcpKeyBuf
[tmpBcpKeyLen
] = 0;
732 if (uprv_compareInvCharsAsAscii(bcpKeyBuf
, tmpBcpKeyBuf
) == 0) {
733 /* found a matching BCP47 key */
734 resKey
= ures_getKey(mapData
);
735 resultLen
= (int32_t)uprv_strlen(resKey
);
738 if (!ures_hasNext(rb
)) {
741 ures_getNextResource(rb
, mapData
, status
);
746 if (U_FAILURE(*status
)) {
750 if (resKey
== NULL
) {
752 resultLen
= bcpKeyLen
;
755 uprv_memcpy(key
, resKey
, uprv_min(resultLen
, keyCapacity
));
756 return u_terminateChars(key
, keyCapacity
, resultLen
, status
);
760 _ldmlTypeToBCP47(const char* key
, int32_t keyLen
,
761 const char* type
, int32_t typeLen
,
762 char* bcpType
, int32_t bcpTypeCapacity
,
763 UErrorCode
*status
) {
764 UResourceBundle
*rb
, *keyTypeData
, *typeMapForKey
;
765 char keyBuf
[MAX_LDML_KEY_LEN
];
766 char typeBuf
[MAX_LDML_TYPE_LEN
];
767 char bcpTypeBuf
[MAX_BCP47_SUBTAG_LEN
];
768 int32_t resultLen
= 0;
770 UErrorCode tmpStatus
= U_ZERO_ERROR
;
771 const UChar
*uBcpType
, *uCanonicalType
;
772 int32_t bcpTypeLen
, canonicalTypeLen
;
773 UBool isTimezone
= FALSE
;
776 keyLen
= (int32_t)uprv_strlen(key
);
778 if (keyLen
>= sizeof(keyBuf
)) {
779 /* no known valid LDML key exceeding 21 */
780 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
783 uprv_memcpy(keyBuf
, key
, keyLen
);
787 for (i
= 0; i
< keyLen
; i
++) {
788 keyBuf
[i
] = uprv_tolower(keyBuf
[i
]);
790 if (uprv_compareInvCharsAsAscii(keyBuf
, "timezone") == 0) {
795 typeLen
= (int32_t)uprv_strlen(type
);
797 if (typeLen
>= sizeof(typeBuf
)) {
798 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
803 /* replace '/' with ':' */
804 for (i
= 0; i
< typeLen
; i
++) {
805 if (*(type
+ i
) == '/') {
808 typeBuf
[i
] = *(type
+ i
);
811 typeBuf
[typeLen
] = 0;
815 keyTypeData
= ures_openDirect(NULL
, KEYTYPEDATA
, status
);
816 rb
= ures_getByKey(keyTypeData
, TYPEMAP
, NULL
, status
);
817 if (U_FAILURE(*status
)) {
819 ures_close(keyTypeData
);
823 typeMapForKey
= ures_getByKey(rb
, keyBuf
, NULL
, &tmpStatus
);
824 uBcpType
= ures_getStringByKey(typeMapForKey
, type
, &bcpTypeLen
, &tmpStatus
);
825 if (U_SUCCESS(tmpStatus
)) {
826 u_UCharsToChars(uBcpType
, bcpTypeBuf
, bcpTypeLen
);
827 resultLen
= bcpTypeLen
;
828 } else if (tmpStatus
== U_MISSING_RESOURCE_ERROR
) {
829 /* is this type alias? */
830 tmpStatus
= U_ZERO_ERROR
;
831 ures_getByKey(keyTypeData
, TYPEALIAS
, rb
, &tmpStatus
);
832 ures_getByKey(rb
, keyBuf
, rb
, &tmpStatus
);
833 uCanonicalType
= ures_getStringByKey(rb
, type
, &canonicalTypeLen
, &tmpStatus
);
834 if (U_SUCCESS(tmpStatus
)) {
835 u_UCharsToChars(uCanonicalType
, typeBuf
, canonicalTypeLen
);
837 /* replace '/' with ':' */
838 for (i
= 0; i
< canonicalTypeLen
; i
++) {
839 if (typeBuf
[i
] == '/') {
844 typeBuf
[canonicalTypeLen
] = 0;
846 /* look up the canonical type */
847 uBcpType
= ures_getStringByKey(typeMapForKey
, typeBuf
, &bcpTypeLen
, &tmpStatus
);
848 if (U_SUCCESS(tmpStatus
)) {
849 u_UCharsToChars(uBcpType
, bcpTypeBuf
, bcpTypeLen
);
850 resultLen
= bcpTypeLen
;
853 if (tmpStatus
== U_MISSING_RESOURCE_ERROR
) {
854 if (_isLDMLType(type
, typeLen
)) {
855 uprv_memcpy(bcpTypeBuf
, type
, typeLen
);
858 /* mapping not availabe */
859 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
866 ures_close(typeMapForKey
);
867 ures_close(keyTypeData
);
869 if (U_FAILURE(*status
)) {
873 uprv_memcpy(bcpType
, bcpTypeBuf
, uprv_min(resultLen
, bcpTypeCapacity
));
874 return u_terminateChars(bcpType
, bcpTypeCapacity
, resultLen
, status
);
878 _bcp47ToLDMLType(const char* key
, int32_t keyLen
,
879 const char* bcpType
, int32_t bcpTypeLen
,
880 char* type
, int32_t typeCapacity
,
881 UErrorCode
*status
) {
883 char keyBuf
[MAX_LDML_KEY_LEN
];
884 char bcpTypeBuf
[ULOC_KEYWORDS_CAPACITY
]; /* ensure buffter is large enough for multiple values (e.g. buddhist-greg) */
885 int32_t resultLen
= 0;
887 const char *resType
= NULL
;
888 UResourceBundle
*mapData
;
889 UErrorCode tmpStatus
= U_ZERO_ERROR
;
893 keyLen
= (int32_t)uprv_strlen(key
);
896 if (keyLen
>= sizeof(keyBuf
)) {
897 /* no known valid LDML key exceeding 21 */
898 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
901 uprv_memcpy(keyBuf
, key
, keyLen
);
905 for (i
= 0; i
< keyLen
; i
++) {
906 keyBuf
[i
] = uprv_tolower(keyBuf
[i
]);
910 if (bcpTypeLen
< 0) {
911 bcpTypeLen
= (int32_t)uprv_strlen(bcpType
);
915 for (i
= 0; i
< bcpTypeLen
; i
++) {
916 if (bcpType
[i
] == SEP
) {
917 if (typeSize
>= MAX_BCP47_SUBTAG_LEN
) {
918 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
927 uprv_memcpy(bcpTypeBuf
, bcpType
, bcpTypeLen
);
928 bcpTypeBuf
[bcpTypeLen
] = 0;
931 for (i
= 0; i
< bcpTypeLen
; i
++) {
932 bcpTypeBuf
[i
] = uprv_tolower(bcpTypeBuf
[i
]);
935 rb
= ures_openDirect(NULL
, KEYTYPEDATA
, status
);
936 ures_getByKey(rb
, TYPEMAP
, rb
, status
);
937 if (U_FAILURE(*status
)) {
942 ures_getByKey(rb
, keyBuf
, rb
, &tmpStatus
);
943 mapData
= ures_getNextResource(rb
, NULL
, &tmpStatus
);
944 while (U_SUCCESS(tmpStatus
)) {
945 const UChar
*uBcpType
;
946 char tmpBcpTypeBuf
[MAX_BCP47_SUBTAG_LEN
];
947 int32_t tmpBcpTypeLen
;
949 uBcpType
= ures_getString(mapData
, &tmpBcpTypeLen
, &tmpStatus
);
950 if (U_FAILURE(tmpStatus
)) {
953 u_UCharsToChars(uBcpType
, tmpBcpTypeBuf
, tmpBcpTypeLen
);
954 tmpBcpTypeBuf
[tmpBcpTypeLen
] = 0;
955 if (uprv_compareInvCharsAsAscii(bcpTypeBuf
, tmpBcpTypeBuf
) == 0) {
956 /* found a matching BCP47 type */
957 resType
= ures_getKey(mapData
);
958 resultLen
= (int32_t)uprv_strlen(resType
);
961 if (!ures_hasNext(rb
)) {
964 ures_getNextResource(rb
, mapData
, &tmpStatus
);
969 if (U_FAILURE(tmpStatus
) && tmpStatus
!= U_MISSING_RESOURCE_ERROR
) {
974 if (resType
== NULL
) {
975 resType
= bcpTypeBuf
;
976 resultLen
= bcpTypeLen
;
979 copyLen
= uprv_min(resultLen
, typeCapacity
);
980 uprv_memcpy(type
, resType
, copyLen
);
982 if (uprv_compareInvCharsAsAscii(keyBuf
, "timezone") == 0) {
983 for (i
= 0; i
< copyLen
; i
++) {
984 if (*(type
+ i
) == ':') {
990 return u_terminateChars(type
, typeCapacity
, resultLen
, status
);
994 _appendLanguageToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UErrorCode
* status
) {
995 char buf
[ULOC_LANG_CAPACITY
];
996 UErrorCode tmpStatus
= U_ZERO_ERROR
;
1000 if (U_FAILURE(*status
)) {
1004 len
= uloc_getLanguage(localeID
, buf
, sizeof(buf
), &tmpStatus
);
1005 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
1007 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1013 /* Note: returned language code is in lower case letters */
1016 if (reslen
< capacity
) {
1017 uprv_memcpy(appendAt
+ reslen
, LANG_UND
, uprv_min(LANG_UND_LEN
, capacity
- reslen
));
1019 reslen
+= LANG_UND_LEN
;
1020 } else if (!_isLanguageSubtag(buf
, len
)) {
1021 /* invalid language code */
1023 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1026 if (reslen
< capacity
) {
1027 uprv_memcpy(appendAt
+ reslen
, LANG_UND
, uprv_min(LANG_UND_LEN
, capacity
- reslen
));
1029 reslen
+= LANG_UND_LEN
;
1031 /* resolve deprecated */
1032 for (i
= 0; i
< LENGTHOF(DEPRECATEDLANGS
); i
+= 2) {
1033 if (uprv_compareInvCharsAsAscii(buf
, DEPRECATEDLANGS
[i
]) == 0) {
1034 uprv_strcpy(buf
, DEPRECATEDLANGS
[i
+ 1]);
1035 len
= (int32_t)uprv_strlen(buf
);
1039 if (reslen
< capacity
) {
1040 uprv_memcpy(appendAt
+ reslen
, buf
, uprv_min(len
, capacity
- reslen
));
1044 u_terminateChars(appendAt
, capacity
, reslen
, status
);
1049 _appendScriptToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UErrorCode
* status
) {
1050 char buf
[ULOC_SCRIPT_CAPACITY
];
1051 UErrorCode tmpStatus
= U_ZERO_ERROR
;
1055 if (U_FAILURE(*status
)) {
1059 len
= uloc_getScript(localeID
, buf
, sizeof(buf
), &tmpStatus
);
1060 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
1062 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1068 if (!_isScriptSubtag(buf
, len
)) {
1069 /* invalid script code */
1071 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1075 if (reslen
< capacity
) {
1076 *(appendAt
+ reslen
) = SEP
;
1080 if (reslen
< capacity
) {
1081 uprv_memcpy(appendAt
+ reslen
, buf
, uprv_min(len
, capacity
- reslen
));
1086 u_terminateChars(appendAt
, capacity
, reslen
, status
);
1091 _appendRegionToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UErrorCode
* status
) {
1092 char buf
[ULOC_COUNTRY_CAPACITY
];
1093 UErrorCode tmpStatus
= U_ZERO_ERROR
;
1097 if (U_FAILURE(*status
)) {
1101 len
= uloc_getCountry(localeID
, buf
, sizeof(buf
), &tmpStatus
);
1102 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
1104 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1110 if (!_isRegionSubtag(buf
, len
)) {
1111 /* invalid region code */
1113 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1117 if (reslen
< capacity
) {
1118 *(appendAt
+ reslen
) = SEP
;
1122 if (reslen
< capacity
) {
1123 uprv_memcpy(appendAt
+ reslen
, buf
, uprv_min(len
, capacity
- reslen
));
1128 u_terminateChars(appendAt
, capacity
, reslen
, status
);
1133 _appendVariantsToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UBool
*hadPosix
, UErrorCode
* status
) {
1134 char buf
[ULOC_FULLNAME_CAPACITY
];
1135 UErrorCode tmpStatus
= U_ZERO_ERROR
;
1139 if (U_FAILURE(*status
)) {
1143 len
= uloc_getVariant(localeID
, buf
, sizeof(buf
), &tmpStatus
);
1144 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
1146 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1154 VariantListEntry
*var
;
1155 VariantListEntry
*varFirst
= NULL
;
1160 if (*p
== SEP
|| *p
== LOCALE_SEP
|| *p
== 0) {
1164 *p
= 0; /* terminate */
1168 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1171 /* ignore empty variant */
1173 /* ICU uses upper case letters for variants, but
1174 the canonical format is lowercase in BCP47 */
1175 for (i
= 0; *(pVar
+ i
) != 0; i
++) {
1176 *(pVar
+ i
) = uprv_tolower(*(pVar
+ i
));
1180 if (_isVariantSubtag(pVar
, -1)) {
1181 if (uprv_strcmp(pVar
,POSIX_VALUE
) || len
!= uprv_strlen(POSIX_VALUE
)) {
1182 /* emit the variant to the list */
1183 var
= (VariantListEntry
*)uprv_malloc(sizeof(VariantListEntry
));
1185 *status
= U_MEMORY_ALLOCATION_ERROR
;
1188 var
->variant
= pVar
;
1189 if (!_addVariantToList(&varFirst
, var
)) {
1190 /* duplicated variant */
1193 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1198 /* Special handling for POSIX variant, need to remember that we had it and then */
1199 /* treat it like an extension later. */
1202 } else if (strict
) {
1203 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1205 } else if (_isPrivateuseValueSubtag(pVar
, -1)) {
1206 /* Handle private use subtags separately */
1210 /* reset variant starting position */
1212 } else if (pVar
== NULL
) {
1218 if (U_SUCCESS(*status
)) {
1219 if (varFirst
!= NULL
) {
1222 /* write out validated/normalized variants to the target */
1224 while (var
!= NULL
) {
1225 if (reslen
< capacity
) {
1226 *(appendAt
+ reslen
) = SEP
;
1229 varLen
= (int32_t)uprv_strlen(var
->variant
);
1230 if (reslen
< capacity
) {
1231 uprv_memcpy(appendAt
+ reslen
, var
->variant
, uprv_min(varLen
, capacity
- reslen
));
1241 while (var
!= NULL
) {
1242 VariantListEntry
*tmpVar
= var
->next
;
1247 if (U_FAILURE(*status
)) {
1252 u_terminateChars(appendAt
, capacity
, reslen
, status
);
1257 _appendKeywordsToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UBool hadPosix
, UErrorCode
* status
) {
1258 char buf
[ULOC_KEYWORD_AND_VALUES_CAPACITY
];
1259 char attrBuf
[ULOC_KEYWORD_AND_VALUES_CAPACITY
] = { 0 };
1260 int32_t attrBufLength
= 0;
1261 UBool isAttribute
= FALSE
;
1262 UEnumeration
*keywordEnum
= NULL
;
1265 keywordEnum
= uloc_openKeywords(localeID
, status
);
1266 if (U_FAILURE(*status
) && !hadPosix
) {
1267 uenum_close(keywordEnum
);
1270 if (keywordEnum
!= NULL
|| hadPosix
) {
1271 /* reorder extensions */
1274 ExtensionListEntry
*firstExt
= NULL
;
1275 ExtensionListEntry
*ext
;
1276 AttributeListEntry
*firstAttr
= NULL
;
1277 AttributeListEntry
*attr
;
1279 char extBuf
[ULOC_KEYWORD_AND_VALUES_CAPACITY
];
1280 char *pExtBuf
= extBuf
;
1281 int32_t extBufCapacity
= sizeof(extBuf
);
1282 const char *bcpKey
, *bcpValue
;
1283 UErrorCode tmpStatus
= U_ZERO_ERROR
;
1285 UBool isLDMLKeyword
;
1288 isAttribute
= FALSE
;
1289 key
= uenum_next(keywordEnum
, NULL
, status
);
1293 len
= uloc_getKeywordValue(localeID
, key
, buf
, sizeof(buf
), &tmpStatus
);
1294 if (U_FAILURE(tmpStatus
)) {
1296 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1299 /* ignore this keyword */
1300 tmpStatus
= U_ZERO_ERROR
;
1304 keylen
= (int32_t)uprv_strlen(key
);
1305 isLDMLKeyword
= (keylen
> 1);
1307 /* special keyword used for representing Unicode locale attributes */
1308 if (uprv_strcmp(key
, LOCALE_ATTRIBUTE_KEY
) == 0) {
1314 for (; i
< len
; i
++) {
1315 if (buf
[i
] != '-') {
1316 attrBuf
[attrBufLength
++] = buf
[i
];
1322 if (attrBufLength
> 0) {
1323 attrBuf
[attrBufLength
] = 0;
1325 } else if (i
>= len
){
1329 /* create AttributeListEntry */
1330 attr
= (AttributeListEntry
*)uprv_malloc(sizeof(AttributeListEntry
));
1332 *status
= U_MEMORY_ALLOCATION_ERROR
;
1335 attrValue
= (char*)uprv_malloc(attrBufLength
+ 1);
1336 if (attrValue
== NULL
) {
1337 *status
= U_MEMORY_ALLOCATION_ERROR
;
1340 uprv_strcpy(attrValue
, attrBuf
);
1341 attr
->attribute
= attrValue
;
1343 if (!_addAttributeToList(&firstAttr
, attr
)) {
1345 uprv_free(attrValue
);
1347 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1353 } else if (isLDMLKeyword
) {
1356 /* transform key and value to bcp47 style */
1357 modKeyLen
= _ldmlKeyToBCP47(key
, keylen
, pExtBuf
, extBufCapacity
, &tmpStatus
);
1358 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
1360 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1363 tmpStatus
= U_ZERO_ERROR
;
1368 pExtBuf
+= (modKeyLen
+ 1);
1369 extBufCapacity
-= (modKeyLen
+ 1);
1371 len
= _ldmlTypeToBCP47(key
, keylen
, buf
, len
, pExtBuf
, extBufCapacity
, &tmpStatus
);
1372 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
1374 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1377 tmpStatus
= U_ZERO_ERROR
;
1381 pExtBuf
+= (len
+ 1);
1382 extBufCapacity
-= (len
+ 1);
1384 if (*key
== PRIVATEUSE
) {
1385 if (!_isPrivateuseValueSubtags(buf
, len
)) {
1387 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1393 if (!_isExtensionSingleton(key
, keylen
) || !_isExtensionSubtags(buf
, len
)) {
1395 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1402 if ((len
+ 1) < extBufCapacity
) {
1403 uprv_memcpy(pExtBuf
, buf
, len
);
1411 extBufCapacity
-= (len
+ 1);
1413 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1419 /* create ExtensionListEntry */
1420 ext
= (ExtensionListEntry
*)uprv_malloc(sizeof(ExtensionListEntry
));
1422 *status
= U_MEMORY_ALLOCATION_ERROR
;
1426 ext
->value
= bcpValue
;
1428 if (!_addExtensionToList(&firstExt
, ext
, TRUE
)) {
1431 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1438 /* Special handling for POSIX variant - add the keywords for POSIX */
1440 /* create ExtensionListEntry for POSIX */
1441 ext
= (ExtensionListEntry
*)uprv_malloc(sizeof(ExtensionListEntry
));
1443 *status
= U_MEMORY_ALLOCATION_ERROR
;
1446 ext
->key
= POSIX_KEY
;
1447 ext
->value
= POSIX_VALUE
;
1449 if (!_addExtensionToList(&firstExt
, ext
, TRUE
)) {
1454 if (U_SUCCESS(*status
) && (firstExt
!= NULL
|| firstAttr
!= NULL
)) {
1455 UBool startLDMLExtension
= FALSE
;
1460 if (!startLDMLExtension
&& (ext
&& uprv_strlen(ext
->key
) > 1)) {
1461 /* write LDML singleton extension */
1462 if (reslen
< capacity
) {
1463 *(appendAt
+ reslen
) = SEP
;
1466 if (reslen
< capacity
) {
1467 *(appendAt
+ reslen
) = LDMLEXT
;
1471 startLDMLExtension
= TRUE
;
1474 /* write out the sorted BCP47 attributes, extensions and private use */
1475 if (ext
&& (uprv_strlen(ext
->key
) == 1 || attr
== NULL
)) {
1476 if (reslen
< capacity
) {
1477 *(appendAt
+ reslen
) = SEP
;
1480 len
= (int32_t)uprv_strlen(ext
->key
);
1481 if (reslen
< capacity
) {
1482 uprv_memcpy(appendAt
+ reslen
, ext
->key
, uprv_min(len
, capacity
- reslen
));
1485 if (reslen
< capacity
) {
1486 *(appendAt
+ reslen
) = SEP
;
1489 len
= (int32_t)uprv_strlen(ext
->value
);
1490 if (reslen
< capacity
) {
1491 uprv_memcpy(appendAt
+ reslen
, ext
->value
, uprv_min(len
, capacity
- reslen
));
1497 /* write the value for the attributes */
1498 if (reslen
< capacity
) {
1499 *(appendAt
+ reslen
) = SEP
;
1502 len
= (int32_t)uprv_strlen(attr
->attribute
);
1503 if (reslen
< capacity
) {
1504 uprv_memcpy(appendAt
+ reslen
, attr
->attribute
, uprv_min(len
, capacity
- reslen
));
1510 } while (attr
!= NULL
|| ext
!= NULL
);
1515 while (ext
!= NULL
) {
1516 ExtensionListEntry
*tmpExt
= ext
->next
;
1522 while (attr
!= NULL
) {
1523 AttributeListEntry
*tmpAttr
= attr
->next
;
1524 char *pValue
= (char *)attr
->attribute
;
1530 uenum_close(keywordEnum
);
1532 if (U_FAILURE(*status
)) {
1537 return u_terminateChars(appendAt
, capacity
, reslen
, status
);
1541 * Append keywords parsed from LDML extension value
1542 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
1543 * Note: char* buf is used for storing keywords
1546 _appendLDMLExtensionAsKeywords(const char* ldmlext
, ExtensionListEntry
** appendTo
, char* buf
, int32_t bufSize
, UBool
*posixVariant
, UErrorCode
*status
) {
1547 const char *pTag
; /* beginning of current subtag */
1548 const char *pKwds
; /* beginning of key-type pairs */
1549 UBool variantExists
= *posixVariant
;
1551 ExtensionListEntry
*kwdFirst
= NULL
; /* first LDML keyword */
1552 ExtensionListEntry
*kwd
, *nextKwd
;
1554 AttributeListEntry
*attrFirst
= NULL
; /* first attribute */
1555 AttributeListEntry
*attr
, *nextAttr
;
1560 char attrBuf
[ULOC_KEYWORD_AND_VALUES_CAPACITY
];
1561 int32_t attrBufIdx
= 0;
1563 /* Reset the posixVariant value */
1564 *posixVariant
= FALSE
;
1569 /* Iterate through u extension attributes */
1571 /* locate next separator char */
1572 for (len
= 0; *(pTag
+ len
) && *(pTag
+ len
) != SEP
; len
++);
1574 if (_isLDMLKey(pTag
, len
)) {
1579 /* add this attribute to the list */
1580 attr
= (AttributeListEntry
*)uprv_malloc(sizeof(AttributeListEntry
));
1582 *status
= U_MEMORY_ALLOCATION_ERROR
;
1586 if (len
< (int32_t)sizeof(attrBuf
) - attrBufIdx
) {
1587 uprv_memcpy(&attrBuf
[attrBufIdx
], pTag
, len
);
1588 attrBuf
[attrBufIdx
+ len
] = 0;
1589 attr
->attribute
= &attrBuf
[attrBufIdx
];
1590 attrBufIdx
+= (len
+ 1);
1592 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1596 if (!_addAttributeToList(&attrFirst
, attr
)) {
1597 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1605 /* next to the separator */
1611 /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */
1613 if (attrBufIdx
> bufSize
) {
1614 /* attrBufIdx == <total length of attribute subtag> + 1 */
1615 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1619 kwd
= (ExtensionListEntry
*)uprv_malloc(sizeof(ExtensionListEntry
));
1621 *status
= U_MEMORY_ALLOCATION_ERROR
;
1625 kwd
->key
= LOCALE_ATTRIBUTE_KEY
;
1628 /* attribute subtags sorted in alphabetical order as type */
1630 while (attr
!= NULL
) {
1631 nextAttr
= attr
->next
;
1633 /* buffer size check is done above */
1634 if (attr
!= attrFirst
) {
1635 *(buf
+ bufIdx
) = SEP
;
1639 len
= uprv_strlen(attr
->attribute
);
1640 uprv_memcpy(buf
+ bufIdx
, attr
->attribute
, len
);
1645 *(buf
+ bufIdx
) = 0;
1648 if (!_addExtensionToList(&kwdFirst
, kwd
, FALSE
)) {
1649 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1654 /* once keyword entry is created, delete the attribute list */
1656 while (attr
!= NULL
) {
1657 nextAttr
= attr
->next
;
1665 const char *pBcpKey
= NULL
; /* u extenstion key subtag */
1666 const char *pBcpType
= NULL
; /* beginning of u extension type subtag(s) */
1667 int32_t bcpKeyLen
= 0;
1668 int32_t bcpTypeLen
= 0;
1669 UBool isDone
= FALSE
;
1672 /* BCP47 representation of LDML key/type pairs */
1674 const char *pNextBcpKey
= NULL
;
1675 int32_t nextBcpKeyLen
;
1676 UBool emitKeyword
= FALSE
;
1679 /* locate next separator char */
1680 for (len
= 0; *(pTag
+ len
) && *(pTag
+ len
) != SEP
; len
++);
1682 if (_isLDMLKey(pTag
, len
)) {
1686 nextBcpKeyLen
= len
;
1692 U_ASSERT(pBcpKey
!= NULL
);
1693 /* within LDML type subtags */
1695 bcpTypeLen
+= (len
+ 1);
1705 /* next to the separator */
1709 /* processing last one */
1715 const char *pKey
= NULL
; /* LDML key */
1716 const char *pType
= NULL
; /* LDML type */
1718 U_ASSERT(pBcpKey
!= NULL
);
1720 /* u extension key to LDML key */
1721 len
= _bcp47ToLDMLKey(pBcpKey
, bcpKeyLen
, buf
+ bufIdx
, bufSize
- bufIdx
- 1, status
);
1722 if (U_FAILURE(*status
)) {
1725 pKey
= buf
+ bufIdx
;
1727 *(buf
+ bufIdx
) = 0;
1731 /* BCP type to locale type */
1732 len
= _bcp47ToLDMLType(pKey
, -1, pBcpType
, bcpTypeLen
, buf
+ bufIdx
, bufSize
- bufIdx
- 1, status
);
1733 if (U_FAILURE(*status
)) {
1736 pType
= buf
+ bufIdx
;
1738 *(buf
+ bufIdx
) = 0;
1741 /* typeless - default type value is "yes" */
1742 pType
= LOCALE_TYPE_YES
;
1745 /* Special handling for u-va-posix, since we want to treat this as a variant,
1747 if (!variantExists
&& !uprv_strcmp(pKey
, POSIX_KEY
) && !uprv_strcmp(pType
, POSIX_VALUE
) ) {
1748 *posixVariant
= TRUE
;
1750 /* create an ExtensionListEntry for this keyword */
1751 kwd
= (ExtensionListEntry
*)uprv_malloc(sizeof(ExtensionListEntry
));
1753 *status
= U_MEMORY_ALLOCATION_ERROR
;
1760 if (!_addExtensionToList(&kwdFirst
, kwd
, FALSE
)) {
1761 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1767 pBcpKey
= pNextBcpKey
;
1768 bcpKeyLen
= pNextBcpKey
!= NULL
? nextBcpKeyLen
: 0;
1776 while (kwd
!= NULL
) {
1777 nextKwd
= kwd
->next
;
1778 _addExtensionToList(appendTo
, kwd
, FALSE
);
1786 while (attr
!= NULL
) {
1787 nextAttr
= attr
->next
;
1793 while (kwd
!= NULL
) {
1794 nextKwd
= kwd
->next
;
1802 _appendKeywords(ULanguageTag
* langtag
, char* appendAt
, int32_t capacity
, UErrorCode
* status
) {
1806 ExtensionListEntry
*kwdFirst
= NULL
;
1807 ExtensionListEntry
*kwd
;
1808 const char *key
, *type
;
1809 char *kwdBuf
= NULL
;
1810 int32_t kwdBufLength
= capacity
;
1811 UBool posixVariant
= FALSE
;
1813 if (U_FAILURE(*status
)) {
1817 kwdBuf
= (char*)uprv_malloc(kwdBufLength
);
1818 if (kwdBuf
== NULL
) {
1819 *status
= U_MEMORY_ALLOCATION_ERROR
;
1823 /* Determine if variants already exists */
1824 if (ultag_getVariantsSize(langtag
)) {
1825 posixVariant
= TRUE
;
1828 n
= ultag_getExtensionsSize(langtag
);
1830 /* resolve locale keywords and reordering keys */
1831 for (i
= 0; i
< n
; i
++) {
1832 key
= ultag_getExtensionKey(langtag
, i
);
1833 type
= ultag_getExtensionValue(langtag
, i
);
1834 if (*key
== LDMLEXT
) {
1835 _appendLDMLExtensionAsKeywords(type
, &kwdFirst
, kwdBuf
, kwdBufLength
, &posixVariant
, status
);
1836 if (U_FAILURE(*status
)) {
1840 kwd
= (ExtensionListEntry
*)uprv_malloc(sizeof(ExtensionListEntry
));
1842 *status
= U_MEMORY_ALLOCATION_ERROR
;
1847 if (!_addExtensionToList(&kwdFirst
, kwd
, FALSE
)) {
1849 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1855 if (U_SUCCESS(*status
)) {
1856 type
= ultag_getPrivateUse(langtag
);
1857 if ((int32_t)uprv_strlen(type
) > 0) {
1858 /* add private use as a keyword */
1859 kwd
= (ExtensionListEntry
*)uprv_malloc(sizeof(ExtensionListEntry
));
1861 *status
= U_MEMORY_ALLOCATION_ERROR
;
1863 kwd
->key
= PRIVATEUSE_KEY
;
1865 if (!_addExtensionToList(&kwdFirst
, kwd
, FALSE
)) {
1867 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1873 /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
1875 if (U_SUCCESS(*status
) && posixVariant
) {
1876 len
= (int32_t) uprv_strlen(_POSIX
);
1877 if (reslen
< capacity
) {
1878 uprv_memcpy(appendAt
+ reslen
, _POSIX
, uprv_min(len
, capacity
- reslen
));
1883 if (U_SUCCESS(*status
) && kwdFirst
!= NULL
) {
1884 /* write out the sorted keywords */
1885 UBool firstValue
= TRUE
;
1888 if (reslen
< capacity
) {
1891 *(appendAt
+ reslen
) = LOCALE_EXT_SEP
;
1895 *(appendAt
+ reslen
) = LOCALE_KEYWORD_SEP
;
1901 len
= (int32_t)uprv_strlen(kwd
->key
);
1902 if (reslen
< capacity
) {
1903 uprv_memcpy(appendAt
+ reslen
, kwd
->key
, uprv_min(len
, capacity
- reslen
));
1908 if (reslen
< capacity
) {
1909 *(appendAt
+ reslen
) = LOCALE_KEY_TYPE_SEP
;
1914 len
= (int32_t)uprv_strlen(kwd
->value
);
1915 if (reslen
< capacity
) {
1916 uprv_memcpy(appendAt
+ reslen
, kwd
->value
, uprv_min(len
, capacity
- reslen
));
1926 while (kwd
!= NULL
) {
1927 ExtensionListEntry
*tmpKwd
= kwd
->next
;
1934 if (U_FAILURE(*status
)) {
1938 return u_terminateChars(appendAt
, capacity
, reslen
, status
);
1942 _appendPrivateuseToLanguageTag(const char* localeID
, char* appendAt
, int32_t capacity
, UBool strict
, UBool hadPosix
, UErrorCode
* status
) {
1943 char buf
[ULOC_FULLNAME_CAPACITY
];
1944 char tmpAppend
[ULOC_FULLNAME_CAPACITY
];
1945 UErrorCode tmpStatus
= U_ZERO_ERROR
;
1949 if (U_FAILURE(*status
)) {
1953 len
= uloc_getVariant(localeID
, buf
, sizeof(buf
), &tmpStatus
);
1954 if (U_FAILURE(tmpStatus
) || tmpStatus
== U_STRING_NOT_TERMINATED_WARNING
) {
1956 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1964 UBool firstValue
= TRUE
;
1971 if (*p
== SEP
|| *p
== LOCALE_SEP
|| *p
== 0) {
1975 *p
= 0; /* terminate */
1977 if (pPriv
!= NULL
) {
1978 /* Private use in the canonical format is lowercase in BCP47 */
1979 for (i
= 0; *(pPriv
+ i
) != 0; i
++) {
1980 *(pPriv
+ i
) = uprv_tolower(*(pPriv
+ i
));
1984 if (_isPrivateuseValueSubtag(pPriv
, -1)) {
1986 if (!_isVariantSubtag(pPriv
, -1)) {
1992 } else if (strict
) {
1993 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
2000 if (reslen
< capacity
) {
2001 tmpAppend
[reslen
++] = SEP
;
2005 if (reslen
< capacity
) {
2006 tmpAppend
[reslen
++] = *PRIVATEUSE_KEY
;
2009 if (reslen
< capacity
) {
2010 tmpAppend
[reslen
++] = SEP
;
2013 len
= (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX
);
2014 if (reslen
< capacity
) {
2015 uprv_memcpy(tmpAppend
+ reslen
, PRIVUSE_VARIANT_PREFIX
, uprv_min(len
, capacity
- reslen
));
2019 if (reslen
< capacity
) {
2020 tmpAppend
[reslen
++] = SEP
;
2026 len
= (int32_t)uprv_strlen(pPriv
);
2027 if (reslen
< capacity
) {
2028 uprv_memcpy(tmpAppend
+ reslen
, pPriv
, uprv_min(len
, capacity
- reslen
));
2033 /* reset private use starting position */
2035 } else if (pPriv
== NULL
) {
2041 if (U_FAILURE(*status
)) {
2046 if (U_SUCCESS(*status
)) {
2048 if (reslen
< capacity
) {
2049 uprv_memcpy(appendAt
, tmpAppend
, uprv_min(len
, capacity
- reslen
));
2053 u_terminateChars(appendAt
, capacity
, reslen
, status
);
2059 * -------------------------------------------------
2063 * -------------------------------------------------
2066 /* Bit flags used by the parser */
2076 static ULanguageTag
*
2077 ultag_parse(const char* tag
, int32_t tagLen
, int32_t* parsedLen
, UErrorCode
* status
) {
2081 char *pSubtag
, *pNext
, *pLastGoodPosition
;
2084 ExtensionListEntry
*pExtension
;
2085 char *pExtValueSubtag
, *pExtValueSubtagEnd
;
2087 UBool privateuseVar
= FALSE
;
2088 int32_t grandfatheredLen
= 0;
2090 if (parsedLen
!= NULL
) {
2094 if (U_FAILURE(*status
)) {
2099 tagLen
= (int32_t)uprv_strlen(tag
);
2102 /* copy the entire string */
2103 tagBuf
= (char*)uprv_malloc(tagLen
+ 1);
2104 if (tagBuf
== NULL
) {
2105 *status
= U_MEMORY_ALLOCATION_ERROR
;
2108 uprv_memcpy(tagBuf
, tag
, tagLen
);
2109 *(tagBuf
+ tagLen
) = 0;
2111 /* create a ULanguageTag */
2112 t
= (ULanguageTag
*)uprv_malloc(sizeof(ULanguageTag
));
2115 *status
= U_MEMORY_ALLOCATION_ERROR
;
2118 _initializeULanguageTag(t
);
2121 if (tagLen
< MINLEN
) {
2122 /* the input tag is too short - return empty ULanguageTag */
2126 /* check if the tag is grandfathered */
2127 for (i
= 0; GRANDFATHERED
[i
] != NULL
; i
+= 2) {
2128 if (uprv_stricmp(GRANDFATHERED
[i
], tagBuf
) == 0) {
2129 int32_t newTagLength
;
2131 grandfatheredLen
= tagLen
; /* back up for output parsedLen */
2132 newTagLength
= uprv_strlen(GRANDFATHERED
[i
+1]);
2133 if (tagLen
< newTagLength
) {
2135 tagBuf
= (char*)uprv_malloc(newTagLength
+ 1);
2136 if (tagBuf
== NULL
) {
2137 *status
= U_MEMORY_ALLOCATION_ERROR
;
2141 tagLen
= newTagLength
;
2143 uprv_strcpy(t
->buf
, GRANDFATHERED
[i
+ 1]);
2149 * langtag = language
2158 pNext
= pLastGoodPosition
= tagBuf
;
2161 pExtValueSubtag
= NULL
;
2162 pExtValueSubtagEnd
= NULL
;
2169 /* locate next separator char */
2183 subtagLen
= (int32_t)(pSep
- pSubtag
);
2186 if (_isLanguageSubtag(pSubtag
, subtagLen
)) {
2187 *pSep
= 0; /* terminate */
2188 t
->language
= T_CString_toLowerCase(pSubtag
);
2190 pLastGoodPosition
= pSep
;
2191 next
= EXTL
| SCRT
| REGN
| VART
| EXTS
| PRIV
;
2196 if (_isExtlangSubtag(pSubtag
, subtagLen
)) {
2198 t
->extlang
[extlangIdx
++] = T_CString_toLowerCase(pSubtag
);
2200 pLastGoodPosition
= pSep
;
2201 if (extlangIdx
< 3) {
2202 next
= EXTL
| SCRT
| REGN
| VART
| EXTS
| PRIV
;
2204 next
= SCRT
| REGN
| VART
| EXTS
| PRIV
;
2210 if (_isScriptSubtag(pSubtag
, subtagLen
)) {
2216 *p
= uprv_toupper(*p
);
2219 *p
= uprv_tolower(*p
);
2222 t
->script
= pSubtag
;
2224 pLastGoodPosition
= pSep
;
2225 next
= REGN
| VART
| EXTS
| PRIV
;
2230 if (_isRegionSubtag(pSubtag
, subtagLen
)) {
2232 t
->region
= T_CString_toUpperCase(pSubtag
);
2234 pLastGoodPosition
= pSep
;
2235 next
= VART
| EXTS
| PRIV
;
2240 if (_isVariantSubtag(pSubtag
, subtagLen
) ||
2241 (privateuseVar
&& _isPrivateuseVariantSubtag(pSubtag
, subtagLen
))) {
2242 VariantListEntry
*var
;
2245 var
= (VariantListEntry
*)uprv_malloc(sizeof(VariantListEntry
));
2247 *status
= U_MEMORY_ALLOCATION_ERROR
;
2251 var
->variant
= T_CString_toUpperCase(pSubtag
);
2252 isAdded
= _addVariantToList(&(t
->variants
), var
);
2254 /* duplicated variant entry */
2258 pLastGoodPosition
= pSep
;
2259 next
= VART
| EXTS
| PRIV
;
2264 if (_isExtensionSingleton(pSubtag
, subtagLen
)) {
2265 if (pExtension
!= NULL
) {
2266 if (pExtValueSubtag
== NULL
|| pExtValueSubtagEnd
== NULL
) {
2267 /* the previous extension is incomplete */
2268 uprv_free(pExtension
);
2273 /* terminate the previous extension value */
2274 *pExtValueSubtagEnd
= 0;
2275 pExtension
->value
= T_CString_toLowerCase(pExtValueSubtag
);
2277 /* insert the extension to the list */
2278 if (_addExtensionToList(&(t
->extensions
), pExtension
, FALSE
)) {
2279 pLastGoodPosition
= pExtValueSubtagEnd
;
2281 /* stop parsing here */
2282 uprv_free(pExtension
);
2288 /* create a new extension */
2289 pExtension
= (ExtensionListEntry
*)uprv_malloc(sizeof(ExtensionListEntry
));
2290 if (pExtension
== NULL
) {
2291 *status
= U_MEMORY_ALLOCATION_ERROR
;
2295 pExtension
->key
= T_CString_toLowerCase(pSubtag
);
2296 pExtension
->value
= NULL
; /* will be set later */
2299 * reset the start and the end location of extension value
2300 * subtags for this extension
2302 pExtValueSubtag
= NULL
;
2303 pExtValueSubtagEnd
= NULL
;
2310 if (_isExtensionSubtag(pSubtag
, subtagLen
)) {
2311 if (pExtValueSubtag
== NULL
) {
2312 /* if the start postion of this extension's value is not yet,
2313 this one is the first value subtag */
2314 pExtValueSubtag
= pSubtag
;
2317 /* Mark the end of this subtag */
2318 pExtValueSubtagEnd
= pSep
;
2319 next
= EXTS
| EXTV
| PRIV
;
2325 if (uprv_tolower(*pSubtag
) == PRIVATEUSE
) {
2328 if (pExtension
!= NULL
) {
2329 /* Process the last extension */
2330 if (pExtValueSubtag
== NULL
|| pExtValueSubtagEnd
== NULL
) {
2331 /* the previous extension is incomplete */
2332 uprv_free(pExtension
);
2336 /* terminate the previous extension value */
2337 *pExtValueSubtagEnd
= 0;
2338 pExtension
->value
= T_CString_toLowerCase(pExtValueSubtag
);
2340 /* insert the extension to the list */
2341 if (_addExtensionToList(&(t
->extensions
), pExtension
, FALSE
)) {
2342 pLastGoodPosition
= pExtValueSubtagEnd
;
2345 /* stop parsing here */
2346 uprv_free(pExtension
);
2353 /* The rest of part will be private use value subtags */
2354 if (pNext
== NULL
) {
2355 /* empty private use subtag */
2358 /* back up the private use value start position */
2359 pPrivuseVal
= pNext
;
2361 /* validate private use value subtags */
2377 subtagLen
= (int32_t)(pSep
- pSubtag
);
2379 if (uprv_strncmp(pSubtag
, PRIVUSE_VARIANT_PREFIX
, uprv_strlen(PRIVUSE_VARIANT_PREFIX
)) == 0) {
2382 privateuseVar
= TRUE
;
2384 } else if (_isPrivateuseValueSubtag(pSubtag
, subtagLen
)) {
2385 pLastGoodPosition
= pSep
;
2395 if (pLastGoodPosition
- pPrivuseVal
> 0) {
2396 *pLastGoodPosition
= 0;
2397 t
->privateuse
= T_CString_toLowerCase(pPrivuseVal
);
2399 /* No more subtags, exiting the parse loop */
2405 /* If we fell through here, it means this subtag is illegal - quit parsing */
2409 if (pExtension
!= NULL
) {
2410 /* Process the last extension */
2411 if (pExtValueSubtag
== NULL
|| pExtValueSubtagEnd
== NULL
) {
2412 /* the previous extension is incomplete */
2413 uprv_free(pExtension
);
2415 /* terminate the previous extension value */
2416 *pExtValueSubtagEnd
= 0;
2417 pExtension
->value
= T_CString_toLowerCase(pExtValueSubtag
);
2418 /* insert the extension to the list */
2419 if (_addExtensionToList(&(t
->extensions
), pExtension
, FALSE
)) {
2420 pLastGoodPosition
= pExtValueSubtagEnd
;
2422 uprv_free(pExtension
);
2427 if (parsedLen
!= NULL
) {
2428 *parsedLen
= (grandfatheredLen
> 0) ? grandfatheredLen
: (int32_t)(pLastGoodPosition
- t
->buf
);
2439 ultag_close(ULanguageTag
* langtag
) {
2441 if (langtag
== NULL
) {
2445 uprv_free(langtag
->buf
);
2447 if (langtag
->variants
) {
2448 VariantListEntry
*curVar
= langtag
->variants
;
2450 VariantListEntry
*nextVar
= curVar
->next
;
2456 if (langtag
->extensions
) {
2457 ExtensionListEntry
*curExt
= langtag
->extensions
;
2459 ExtensionListEntry
*nextExt
= curExt
->next
;
2469 ultag_getLanguage(const ULanguageTag
* langtag
) {
2470 return langtag
->language
;
2475 ultag_getJDKLanguage(const ULanguageTag
* langtag
) {
2477 for (i
= 0; DEPRECATEDLANGS
[i
] != NULL
; i
+= 2) {
2478 if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS
[i
], langtag
->language
) == 0) {
2479 return DEPRECATEDLANGS
[i
+ 1];
2482 return langtag
->language
;
2487 ultag_getExtlang(const ULanguageTag
* langtag
, int32_t idx
) {
2488 if (idx
>= 0 && idx
< MAXEXTLANG
) {
2489 return langtag
->extlang
[idx
];
2495 ultag_getExtlangSize(const ULanguageTag
* langtag
) {
2498 for (i
= 0; i
< MAXEXTLANG
; i
++) {
2499 if (langtag
->extlang
[i
]) {
2507 ultag_getScript(const ULanguageTag
* langtag
) {
2508 return langtag
->script
;
2512 ultag_getRegion(const ULanguageTag
* langtag
) {
2513 return langtag
->region
;
2517 ultag_getVariant(const ULanguageTag
* langtag
, int32_t idx
) {
2518 const char *var
= NULL
;
2519 VariantListEntry
*cur
= langtag
->variants
;
2533 ultag_getVariantsSize(const ULanguageTag
* langtag
) {
2535 VariantListEntry
*cur
= langtag
->variants
;
2547 ultag_getExtensionKey(const ULanguageTag
* langtag
, int32_t idx
) {
2548 const char *key
= NULL
;
2549 ExtensionListEntry
*cur
= langtag
->extensions
;
2563 ultag_getExtensionValue(const ULanguageTag
* langtag
, int32_t idx
) {
2564 const char *val
= NULL
;
2565 ExtensionListEntry
*cur
= langtag
->extensions
;
2579 ultag_getExtensionsSize(const ULanguageTag
* langtag
) {
2581 ExtensionListEntry
*cur
= langtag
->extensions
;
2593 ultag_getPrivateUse(const ULanguageTag
* langtag
) {
2594 return langtag
->privateuse
;
2599 ultag_getGrandfathered(const ULanguageTag
* langtag
) {
2600 return langtag
->grandfathered
;
2606 * -------------------------------------------------
2608 * Locale/BCP47 conversion APIs, exposed as uloc_*
2610 * -------------------------------------------------
2612 U_CAPI
int32_t U_EXPORT2
2613 uloc_toLanguageTag(const char* localeID
,
2615 int32_t langtagCapacity
,
2617 UErrorCode
* status
) {
2618 /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
2619 char canonical
[256];
2621 UErrorCode tmpStatus
= U_ZERO_ERROR
;
2622 UBool hadPosix
= FALSE
;
2623 const char* pKeywordStart
;
2625 /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
2627 if (uprv_strlen(localeID
) > 0) {
2628 uloc_canonicalize(localeID
, canonical
, sizeof(canonical
), &tmpStatus
);
2629 if (tmpStatus
!= U_ZERO_ERROR
) {
2630 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
2635 /* For handling special case - private use only tag */
2636 pKeywordStart
= locale_getKeywordsStart(canonical
);
2637 if (pKeywordStart
== canonical
) {
2638 UEnumeration
*kwdEnum
;
2642 kwdEnum
= uloc_openKeywords((const char*)canonical
, &tmpStatus
);
2643 if (kwdEnum
!= NULL
) {
2644 kwdCnt
= uenum_count(kwdEnum
, &tmpStatus
);
2649 key
= uenum_next(kwdEnum
, &len
, &tmpStatus
);
2650 if (len
== 1 && *key
== PRIVATEUSE
) {
2651 char buf
[ULOC_KEYWORD_AND_VALUES_CAPACITY
];
2652 buf
[0] = PRIVATEUSE
;
2654 len
= uloc_getKeywordValue(localeID
, key
, &buf
[2], sizeof(buf
) - 2, &tmpStatus
);
2655 if (U_SUCCESS(tmpStatus
)) {
2656 if (_isPrivateuseValueSubtags(&buf
[2], len
)) {
2657 /* return private use only tag */
2659 uprv_memcpy(langtag
, buf
, uprv_min(reslen
, langtagCapacity
));
2660 u_terminateChars(langtag
, langtagCapacity
, reslen
, status
);
2662 } else if (strict
) {
2663 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
2666 /* if not strict mode, then "und" will be returned */
2668 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
2673 uenum_close(kwdEnum
);
2680 reslen
+= _appendLanguageToLanguageTag(canonical
, langtag
, langtagCapacity
, strict
, status
);
2681 reslen
+= _appendScriptToLanguageTag(canonical
, langtag
+ reslen
, langtagCapacity
- reslen
, strict
, status
);
2682 reslen
+= _appendRegionToLanguageTag(canonical
, langtag
+ reslen
, langtagCapacity
- reslen
, strict
, status
);
2683 reslen
+= _appendVariantsToLanguageTag(canonical
, langtag
+ reslen
, langtagCapacity
- reslen
, strict
, &hadPosix
, status
);
2684 reslen
+= _appendKeywordsToLanguageTag(canonical
, langtag
+ reslen
, langtagCapacity
- reslen
, strict
, hadPosix
, status
);
2685 reslen
+= _appendPrivateuseToLanguageTag(canonical
, langtag
+ reslen
, langtagCapacity
- reslen
, strict
, hadPosix
, status
);
2691 U_CAPI
int32_t U_EXPORT2
2692 uloc_forLanguageTag(const char* langtag
,
2694 int32_t localeIDCapacity
,
2695 int32_t* parsedLength
,
2696 UErrorCode
* status
) {
2699 const char *subtag
, *p
;
2702 UBool noRegion
= TRUE
;
2704 lt
= ultag_parse(langtag
, -1, parsedLength
, status
);
2705 if (U_FAILURE(*status
)) {
2710 subtag
= ultag_getExtlangSize(lt
) > 0 ? ultag_getExtlang(lt
, 0) : ultag_getLanguage(lt
);
2711 if (uprv_compareInvCharsAsAscii(subtag
, LANG_UND
) != 0) {
2712 len
= (int32_t)uprv_strlen(subtag
);
2714 if (reslen
< localeIDCapacity
) {
2715 uprv_memcpy(localeID
, subtag
, uprv_min(len
, localeIDCapacity
- reslen
));
2722 subtag
= ultag_getScript(lt
);
2723 len
= (int32_t)uprv_strlen(subtag
);
2725 if (reslen
< localeIDCapacity
) {
2726 *(localeID
+ reslen
) = LOCALE_SEP
;
2730 /* write out the script in title case */
2733 if (reslen
< localeIDCapacity
) {
2735 *(localeID
+ reslen
) = uprv_toupper(*p
);
2737 *(localeID
+ reslen
) = *p
;
2746 subtag
= ultag_getRegion(lt
);
2747 len
= (int32_t)uprv_strlen(subtag
);
2749 if (reslen
< localeIDCapacity
) {
2750 *(localeID
+ reslen
) = LOCALE_SEP
;
2753 /* write out the retion in upper case */
2756 if (reslen
< localeIDCapacity
) {
2757 *(localeID
+ reslen
) = uprv_toupper(*p
);
2766 n
= ultag_getVariantsSize(lt
);
2769 if (reslen
< localeIDCapacity
) {
2770 *(localeID
+ reslen
) = LOCALE_SEP
;
2775 for (i
= 0; i
< n
; i
++) {
2776 subtag
= ultag_getVariant(lt
, i
);
2777 if (reslen
< localeIDCapacity
) {
2778 *(localeID
+ reslen
) = LOCALE_SEP
;
2781 /* write out the variant in upper case */
2784 if (reslen
< localeIDCapacity
) {
2785 *(localeID
+ reslen
) = uprv_toupper(*p
);
2794 n
= ultag_getExtensionsSize(lt
);
2795 subtag
= ultag_getPrivateUse(lt
);
2796 if (n
> 0 || uprv_strlen(subtag
) > 0) {
2797 if (reslen
== 0 && n
> 0) {
2798 /* need a language */
2799 if (reslen
< localeIDCapacity
) {
2800 uprv_memcpy(localeID
+ reslen
, LANG_UND
, uprv_min(LANG_UND_LEN
, localeIDCapacity
- reslen
));
2802 reslen
+= LANG_UND_LEN
;
2804 len
= _appendKeywords(lt
, localeID
+ reslen
, localeIDCapacity
- reslen
, status
);
2809 return u_terminateChars(localeID
, localeIDCapacity
, reslen
, status
);