1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
8 #include "localebuildertest.h"
9 #include "unicode/localebuilder.h"
10 #include "unicode/strenum.h"
12 LocaleBuilderTest::LocaleBuilderTest()
16 LocaleBuilderTest::~LocaleBuilderTest()
20 void LocaleBuilderTest::runIndexedTest( int32_t index
, UBool exec
, const char* &name
, char* /*par*/ )
23 TESTCASE_AUTO(TestAddRemoveUnicodeLocaleAttribute
);
24 TESTCASE_AUTO(TestAddRemoveUnicodeLocaleAttributeWellFormed
);
25 TESTCASE_AUTO(TestAddUnicodeLocaleAttributeIllFormed
);
26 TESTCASE_AUTO(TestLocaleBuilder
);
27 TESTCASE_AUTO(TestLocaleBuilderBasic
);
28 TESTCASE_AUTO(TestPosixCases
);
29 TESTCASE_AUTO(TestSetExtensionOthers
);
30 TESTCASE_AUTO(TestSetExtensionPU
);
31 TESTCASE_AUTO(TestSetExtensionT
);
32 TESTCASE_AUTO(TestSetExtensionU
);
33 TESTCASE_AUTO(TestSetExtensionValidateOthersIllFormed
);
34 TESTCASE_AUTO(TestSetExtensionValidateOthersWellFormed
);
35 TESTCASE_AUTO(TestSetExtensionValidatePUIllFormed
);
36 TESTCASE_AUTO(TestSetExtensionValidatePUWellFormed
);
37 TESTCASE_AUTO(TestSetExtensionValidateTIllFormed
);
38 TESTCASE_AUTO(TestSetExtensionValidateTWellFormed
);
39 TESTCASE_AUTO(TestSetExtensionValidateUIllFormed
);
40 TESTCASE_AUTO(TestSetExtensionValidateUWellFormed
);
41 TESTCASE_AUTO(TestSetLanguageIllFormed
);
42 TESTCASE_AUTO(TestSetLanguageWellFormed
);
43 TESTCASE_AUTO(TestSetLocale
);
44 TESTCASE_AUTO(TestSetRegionIllFormed
);
45 TESTCASE_AUTO(TestSetRegionWellFormed
);
46 TESTCASE_AUTO(TestSetScriptIllFormed
);
47 TESTCASE_AUTO(TestSetScriptWellFormed
);
48 TESTCASE_AUTO(TestSetUnicodeLocaleKeywordIllFormedKey
);
49 TESTCASE_AUTO(TestSetUnicodeLocaleKeywordIllFormedValue
);
50 TESTCASE_AUTO(TestSetUnicodeLocaleKeywordWellFormed
);
51 TESTCASE_AUTO(TestSetVariantIllFormed
);
52 TESTCASE_AUTO(TestSetVariantWellFormed
);
56 void LocaleBuilderTest::Verify(LocaleBuilder
& bld
, const char* expected
, const char* msg
) {
57 UErrorCode status
= U_ZERO_ERROR
;
58 Locale loc
= bld
.build(status
);
59 if (U_FAILURE(status
)) {
60 errln(msg
, u_errorName(status
));
62 std::string tag
= loc
.toLanguageTag
<std::string
>(status
);
63 if (U_FAILURE(status
)) {
64 errln("loc.toLanguageTag() got Error: %s\n",
67 if (tag
!= expected
) {
68 errln("should get \"%s\", but got \"%s\"\n", expected
, tag
.c_str());
72 void LocaleBuilderTest::TestLocaleBuilder() {
73 // The following test data are copy from
74 // icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleBuilderTest.java
79 // "K": +1 = Unicode locale key / +2 = Unicode locale type
80 // "A": +1 = Unicode locale attribute
81 // "E": +1 = extension letter / +2 = extension value
82 // "P": +1 = private use
84 // "B": +1 = BCP47 language tag
86 // "N": Clear extensions
87 // "D": +1 = Unicode locale attribute to be removed
88 // "X": indicates an exception must be thrown
89 // "T": +1 = expected language tag / +2 = expected locale string
90 const char* TESTCASES
[][14] = {
91 {"L", "en", "R", "us", "T", "en-US", "en_US"},
92 {"L", "en", "R", "CA", "L", nullptr, "T", "und-CA", "_CA"},
93 {"L", "en", "R", "CA", "L", "", "T", "und-CA", "_CA"},
94 {"L", "en", "R", "FR", "L", "fr", "T", "fr-FR", "fr_FR"},
96 {"R", "us", "T", "und-US", "_US"},
98 {"R", "123", "L", "it", "R", nullptr, "T", "it", "it"},
99 {"R", "123", "L", "it", "R", "", "T", "it", "it"},
100 {"R", "123", "L", "en", "T", "en-123", "en_123"},
101 {"S", "LATN", "L", "DE", "T", "de-Latn", "de_Latn"},
102 {"L", "De", "S", "latn", "R", "de", "S", "", "T", "de-DE", "de_DE"},
103 {"L", "De", "S", "Arab", "R", "de", "S", nullptr, "T", "de-DE", "de_DE"},
105 {"V", "1234", "L", "en", "T", "en-1234", "en__1234"},
106 {"V", "1234", "L", "en", "V", "5678", "T", "en-5678", "en__5678"},
107 {"V", "1234", "L", "en", "V", nullptr, "T", "en", "en"},
108 {"V", "1234", "L", "en", "V", "", "T", "en", "en"},
110 {"U", "en_US", "T", "en-US", "en_US"},
111 {"U", "en_US_WIN", "X"},
112 {"B", "fr-FR-1606nict-u-ca-gregory-x-test", "T",
113 "fr-FR-1606nict-u-ca-gregory-x-test",
114 "fr_FR_1606NICT@calendar=gregorian;x=test"},
115 {"B", "ab-cde-fghij", "T", "cde-fghij", "cde__FGHIJ"},
116 {"B", "und-CA", "T", "und-CA", "_CA"},
117 // Blocked by ICU-20327
118 // {"B", "en-US-x-test-lvariant-var", "T", "en-US-x-test-lvariant-var",
119 // "en_US_VAR@x=test"},
120 {"B", "en-US-VAR", "X"},
121 {"U", "ja_JP@calendar=japanese;currency=JPY", "L", "ko", "T",
122 "ko-JP-u-ca-japanese-cu-jpy", "ko_JP@calendar=japanese;currency=JPY"},
123 {"U", "ja_JP@calendar=japanese;currency=JPY", "K", "ca", nullptr, "T",
124 "ja-JP-u-cu-jpy", "ja_JP@currency=JPY"},
125 {"U", "ja_JP@calendar=japanese;currency=JPY", "E", "u",
126 "attr1-ca-gregory", "T", "ja-JP-u-attr1-ca-gregory",
127 "ja_JP@attribute=attr1;calendar=gregorian"},
128 {"U", "en@colnumeric=yes", "K", "kn", "true", "T", "en-u-kn-true",
129 "en@colnumeric=yes"},
130 {"L", "th", "R", "th", "K", "nu", "thai", "T", "th-TH-u-nu-thai",
131 "th_TH@numbers=thai"},
132 {"U", "zh_Hans", "R", "sg", "K", "ca", "badcalendar", "X"},
133 {"U", "zh_Hans", "R", "sg", "K", "cal", "gregory", "X"},
134 {"E", "z", "ExtZ", "L", "en", "T", "en-z-extz", "en@z=extz"},
135 {"E", "z", "ExtZ", "L", "en", "E", "z", "", "T", "en", "en"},
136 {"E", "z", "ExtZ", "L", "en", "E", "z", nullptr, "T", "en", "en"},
137 {"E", "a", "x", "X"},
138 {"E", "a", "abc_def", "T", "und-a-abc-def", "@a=abc-def"},
139 // Design limitation - typeless u extension keyword 0a below is interpreted as a boolean value true/yes.
140 // With the legacy keyword syntax, "yes" is used for such boolean value instead of "true".
141 // However, once the legacy keyword is translated back to BCP 47 u extension, key "0a" is unknown,
142 // so "yes" is preserved - not mapped to "true". We could change the code to automatically transform
143 // key = alphanum alpha
144 {"L", "en", "E", "u", "bbb-aaa-0a", "T", "en-u-aaa-bbb-0a-yes",
145 "en@0a=yes;attribute=aaa-bbb"},
146 {"L", "fr", "R", "FR", "P", "Yoshito-ICU", "T", "fr-FR-x-yoshito-icu",
147 "fr_FR@x=yoshito-icu"},
148 {"L", "ja", "R", "jp", "K", "ca", "japanese", "T", "ja-JP-u-ca-japanese",
149 "ja_JP@calendar=japanese"},
150 {"K", "co", "PHONEBK", "K", "ca", "gregory", "L", "De", "T",
151 "de-u-ca-gregory-co-phonebk", "de@calendar=gregorian;collation=phonebook"},
152 {"E", "o", "OPQR", "E", "a", "aBcD", "T", "und-a-abcd-o-opqr", "@a=abcd;o=opqr"},
153 {"E", "u", "nu-thai-ca-gregory", "L", "TH", "T", "th-u-ca-gregory-nu-thai",
154 "th@calendar=gregorian;numbers=thai"},
155 {"L", "en", "K", "tz", "usnyc", "R", "US", "T", "en-US-u-tz-usnyc",
156 "en_US@timezone=America/New_York"},
157 {"L", "de", "K", "co", "phonebk", "K", "ks", "level1", "K", "kk",
158 "true", "T", "de-u-co-phonebk-kk-true-ks-level1",
159 "de@collation=phonebook;colnormalization=yes;colstrength=primary"},
160 {"L", "en", "R", "US", "K", "ca", "gregory", "T", "en-US-u-ca-gregory",
161 "en_US@calendar=gregorian"},
162 {"L", "en", "R", "US", "K", "cal", "gregory", "X"},
163 {"L", "en", "R", "US", "K", "ca", "gregorian", "X"},
164 {"L", "en", "R", "US", "K", "kn", "true", "T", "en-US-u-kn-true",
165 "en_US@colnumeric=yes"},
166 {"B", "de-DE-u-co-phonebk", "C", "L", "pt", "T", "pt", "pt"},
167 {"B", "ja-jp-u-ca-japanese", "N", "T", "ja-JP", "ja_JP"},
168 {"B", "es-u-def-abc-co-trad", "A", "hij", "D", "def", "T",
169 "es-u-abc-hij-co-trad", "es@attribute=abc-hij;collation=traditional"},
170 {"B", "es-u-def-abc-co-trad", "A", "hij", "D", "def", "D", "def", "T",
171 "es-u-abc-hij-co-trad", "es@attribute=abc-hij;collation=traditional"},
172 {"L", "en", "A", "aa", "X"},
173 {"B", "fr-u-attr1-cu-eur", "D", "attribute1", "X"},
175 UErrorCode status
= U_ZERO_ERROR
;
177 for (int tidx
= 0; tidx
< UPRV_LENGTHOF(TESTCASES
); tidx
++) {
178 const char* (&testCase
)[14] = TESTCASES
[tidx
];
180 for (int p
= 0; p
< UPRV_LENGTHOF(testCase
); p
++) {
181 if (testCase
[p
] == nullptr) {
182 actions
+= " (nullptr)";
185 if (p
> 0) actions
+= " ";
186 actions
+= testCase
[p
];
190 status
= U_ZERO_ERROR
;
193 method
= testCase
[i
++];
194 if (strcmp("L", method
) == 0) {
195 bld
.setLanguage(testCase
[i
++]).build(status
);
196 } else if (strcmp("S", method
) == 0) {
197 bld
.setScript(testCase
[i
++]).build(status
);
198 } else if (strcmp("R", method
) == 0) {
199 bld
.setRegion(testCase
[i
++]).build(status
);
200 } else if (strcmp("V", method
) == 0) {
201 bld
.setVariant(testCase
[i
++]).build(status
);
202 } else if (strcmp("K", method
) == 0) {
203 const char* key
= testCase
[i
++];
204 const char* type
= testCase
[i
++];
205 bld
.setUnicodeLocaleKeyword(key
, type
).build(status
);
206 } else if (strcmp("A", method
) == 0) {
207 bld
.addUnicodeLocaleAttribute(testCase
[i
++]).build(status
);
208 } else if (strcmp("E", method
) == 0) {
209 const char* key
= testCase
[i
++];
210 const char* value
= testCase
[i
++];
211 bld
.setExtension(key
[0], value
).build(status
);
212 } else if (strcmp("P", method
) == 0) {
213 bld
.setExtension('x', testCase
[i
++]).build(status
);
214 } else if (strcmp("U", method
) == 0) {
215 bld
.setLocale(Locale(testCase
[i
++])).build(status
);
216 } else if (strcmp("B", method
) == 0) {
217 bld
.setLanguageTag(testCase
[i
++]).build(status
);
220 else if (strcmp("C", method
) == 0) {
221 bld
.clear().build(status
);
222 } else if (strcmp("N", method
) == 0) {
223 bld
.clearExtensions().build(status
);
224 } else if (strcmp("D", method
) == 0) {
225 bld
.removeUnicodeLocaleAttribute(testCase
[i
++]).build(status
);
228 else if (strcmp("X", method
) == 0) {
229 if (U_SUCCESS(status
)) {
230 errln("FAIL: No error return - test case: %s", actions
.c_str());
232 } else if (strcmp("T", method
) == 0) {
233 status
= U_ZERO_ERROR
;
234 Locale loc
= bld
.build(status
);
235 if (U_FAILURE(status
) ||
236 strcmp(loc
.getName(), testCase
[i
+ 1]) != 0) {
237 errln("FAIL: Wrong locale ID - %s %s %s", loc
.getName(),
238 " for test case: ", actions
.c_str());
240 std::string langtag
= loc
.toLanguageTag
<std::string
>(status
);
241 if (U_FAILURE(status
) || langtag
!= testCase
[i
]) {
242 errln("FAIL: Wrong language tag - %s %s %s", langtag
.c_str(),
243 " for test case: ", actions
.c_str());
247 // Unknow test method
248 errln("Unknown test case method: There is an error in the test case data.");
251 if (U_FAILURE(status
)) {
252 if (strcmp("X", testCase
[i
]) == 0) {
253 // This failure is expected
256 errln("FAIL: U_ILLEGAL_ARGUMENT_ERROR at offset %d %s %s", i
,
257 " in test case: ", actions
.c_str());
261 if (strcmp("T", method
) == 0) {
268 void LocaleBuilderTest::TestLocaleBuilderBasic() {
270 bld
.setLanguage("zh");
271 Verify(bld
, "zh", "setLanguage('zh') got Error: %s\n");
273 bld
.setScript("Hant");
274 Verify(bld
, "zh-Hant", "setScript('Hant') got Error: %s\n");
277 Verify(bld
, "zh-Hant-SG", "setRegion('SG') got Error: %s\n");
280 bld
.setScript("Hans");
281 Verify(bld
, "zh-Hans-HK",
282 "setRegion('HK') and setScript('Hans') got Error: %s\n");
284 bld
.setVariant("revised");
285 Verify(bld
, "zh-Hans-HK-revised",
286 "setVariant('revised') got Error: %s\n");
288 bld
.setUnicodeLocaleKeyword("nu", "thai");
289 Verify(bld
, "zh-Hans-HK-revised-u-nu-thai",
290 "setUnicodeLocaleKeyword('nu', 'thai'') got Error: %s\n");
292 bld
.setUnicodeLocaleKeyword("co", "pinyin");
293 Verify(bld
, "zh-Hans-HK-revised-u-co-pinyin-nu-thai",
294 "setUnicodeLocaleKeyword('co', 'pinyin'') got Error: %s\n");
296 bld
.setUnicodeLocaleKeyword("nu", "latn");
297 Verify(bld
, "zh-Hans-HK-revised-u-co-pinyin-nu-latn",
298 "setUnicodeLocaleKeyword('nu', 'latn'') got Error: %s\n");
300 bld
.setUnicodeLocaleKeyword("nu", nullptr);
301 Verify(bld
, "zh-Hans-HK-revised-u-co-pinyin",
302 "setUnicodeLocaleKeyword('nu', ''') got Error: %s\n");
304 bld
.setUnicodeLocaleKeyword("co", nullptr);
305 Verify(bld
, "zh-Hans-HK-revised",
306 "setUnicodeLocaleKeyword('nu', nullptr) got Error: %s\n");
309 Verify(bld
, "zh-HK-revised",
310 "setScript('') got Error: %s\n");
314 "setVariant('') got Error: %s\n");
318 "setRegion('') got Error: %s\n");
321 void LocaleBuilderTest::TestSetLanguageWellFormed() {
322 // http://www.unicode.org/reports/tr35/tr35.html#unicode_language_subtag
323 // unicode_language_subtag = alpha{2,3} | alpha{5,8};
324 // ICUTC decided also support alpha{4}
325 static const char* wellFormedLanguages
[] = {
363 for (const char* lang
: wellFormedLanguages
) {
364 UErrorCode status
= U_ZERO_ERROR
;
366 bld
.setLanguage(lang
);
367 Locale loc
= bld
.build(status
);
368 if (U_FAILURE(status
)) {
369 errln("setLanguage(\"%s\") got Error: %s\n",
370 lang
, u_errorName(status
));
375 void LocaleBuilderTest::TestSetLanguageIllFormed() {
376 static const char* illFormed
[] = {
405 // Per 2019-01-23 ICUTC, we still accept 4alpha as tlang. see ICU-20321.
430 for (const char* ill
: illFormed
) {
431 UErrorCode status
= U_ZERO_ERROR
;
433 bld
.setLanguage(ill
);
434 Locale loc
= bld
.build(status
);
435 if (status
!= U_ILLEGAL_ARGUMENT_ERROR
) {
436 errln("setLanguage(\"%s\") should fail but has no Error\n", ill
);
441 void LocaleBuilderTest::TestSetScriptWellFormed() {
442 // http://www.unicode.org/reports/tr35/tr35.html#unicode_script_subtag
443 // unicode_script_subtag = alpha{4} ;
444 static const char* wellFormedScripts
[] = {
458 for (const char* script
: wellFormedScripts
) {
459 UErrorCode status
= U_ZERO_ERROR
;
461 bld
.setScript(script
);
462 Locale loc
= bld
.build(status
);
463 if (U_FAILURE(status
)) {
464 errln("setScript(\"%s\") got Error: %s\n",
465 script
, u_errorName(status
));
470 void LocaleBuilderTest::TestSetScriptIllFormed() {
471 static const char* illFormed
[] = {
528 for (const char* ill
: illFormed
) {
529 UErrorCode status
= U_ZERO_ERROR
;
532 Locale loc
= bld
.build(status
);
533 if (status
!= U_ILLEGAL_ARGUMENT_ERROR
) {
534 errln("setScript(\"%s\") should fail but has no Error\n", ill
);
539 void LocaleBuilderTest::TestSetRegionWellFormed() {
540 // http://www.unicode.org/reports/tr35/tr35.html#unicode_region_subtag
541 // unicode_region_subtag = (alpha{2} | digit{3})
542 static const char* wellFormedRegions
[] = {
557 for (const char* region
: wellFormedRegions
) {
558 UErrorCode status
= U_ZERO_ERROR
;
560 bld
.setRegion(region
);
561 Locale loc
= bld
.build(status
);
562 if (U_FAILURE(status
)) {
563 errln("setRegion(\"%s\") got Error: %s\n",
564 region
, u_errorName(status
));
569 void LocaleBuilderTest::TestSetRegionIllFormed() {
570 static const char* illFormed
[] = {
627 for (const char* ill
: illFormed
) {
628 UErrorCode status
= U_ZERO_ERROR
;
631 Locale loc
= bld
.build(status
);
632 if (status
!= U_ILLEGAL_ARGUMENT_ERROR
) {
633 errln("setRegion(\"%s\") should fail but has no Error\n", ill
);
638 void LocaleBuilderTest::TestSetVariantWellFormed() {
639 // http://www.unicode.org/reports/tr35/tr35.html#unicode_variant_subtag
640 // (sep unicode_variant_subtag)*
641 // unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ;
642 static const char* wellFormedVariants
[] = {
687 // (sep unicode_variant_subtag)*
691 "9ax3-xByD9-adfk934a",
696 "9ax3_xByD9_adfk934a",
698 "9ax3-xByD9_adfk934a",
699 "9ax3_xByD9-adfk934a",
701 for (const char* variant
: wellFormedVariants
) {
702 UErrorCode status
= U_ZERO_ERROR
;
704 bld
.setVariant(variant
);
705 Locale loc
= bld
.build(status
);
706 if (U_FAILURE(status
)) {
707 errln("setVariant(\"%s\") got Error: %s\n",
708 variant
, u_errorName(status
));
713 void LocaleBuilderTest::TestSetVariantIllFormed() {
714 static const char* illFormed
[] = {
807 for (const char* ill
: illFormed
) {
808 UErrorCode status
= U_ZERO_ERROR
;
811 Locale loc
= bld
.build(status
);
812 if (status
!= U_ILLEGAL_ARGUMENT_ERROR
) {
813 errln("setVariant(\"%s\") should fail but has no Error\n", ill
);
818 void LocaleBuilderTest::TestSetUnicodeLocaleKeywordWellFormed() {
819 // http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_extensions
820 // keyword = key (sep type)? ;
821 // key = alphanum alpha ;
822 // type = alphanum{3,8} (sep alphanum{3,8})* ;
823 static const char* wellFormed_key_value
[] = {
826 "0Z", "1ZB30zk9-abc",
827 "cZ", "2ck30zfZ-adsf023-234kcZ",
831 for (int i
= 0; i
< UPRV_LENGTHOF(wellFormed_key_value
); i
+= 2) {
832 UErrorCode status
= U_ZERO_ERROR
;
834 bld
.setUnicodeLocaleKeyword(wellFormed_key_value
[i
],
835 wellFormed_key_value
[i
+ 1]);
836 Locale loc
= bld
.build(status
);
837 if (U_FAILURE(status
)) {
838 errln("setUnicodeLocaleKeyword(\"%s\", \"%s\") got Error: %s\n",
839 wellFormed_key_value
[i
],
840 wellFormed_key_value
[i
+ 1],
841 u_errorName(status
));
846 void LocaleBuilderTest::TestSetUnicodeLocaleKeywordIllFormedKey() {
847 static const char* illFormed
[] = {
855 for (const char* ill
: illFormed
) {
856 UErrorCode status
= U_ZERO_ERROR
;
858 bld
.setUnicodeLocaleKeyword(ill
, "abc");
859 Locale loc
= bld
.build(status
);
860 if (status
!= U_ILLEGAL_ARGUMENT_ERROR
) {
861 errln("setUnicodeLocaleKeyword(\"%s\", \"abc\") should fail but has no Error\n",
867 void LocaleBuilderTest::TestSetUnicodeLocaleKeywordIllFormedValue() {
868 static const char* illFormed
[] = {
876 "2ck30zfk9-adsf023-234kcZ",
878 for (const char* ill
: illFormed
) {
879 UErrorCode status
= U_ZERO_ERROR
;
881 bld
.setUnicodeLocaleKeyword("ab", ill
);
882 Locale loc
= bld
.build(status
);
883 if (status
!= U_ILLEGAL_ARGUMENT_ERROR
) {
884 errln("setUnicodeLocaleKeyword(\"ab\", \"%s\") should fail but has no Error\n",
890 void LocaleBuilderTest::TestAddRemoveUnicodeLocaleAttribute() {
892 UErrorCode status
= U_ZERO_ERROR
;
893 Locale loc
= bld
.setLanguage("fr")
894 .addUnicodeLocaleAttribute("abc")
895 .addUnicodeLocaleAttribute("aBc")
896 .addUnicodeLocaleAttribute("EFG")
897 .addUnicodeLocaleAttribute("efghi")
898 .addUnicodeLocaleAttribute("efgh")
899 .addUnicodeLocaleAttribute("efGhi")
900 .addUnicodeLocaleAttribute("EFg")
901 .addUnicodeLocaleAttribute("hijk")
902 .addUnicodeLocaleAttribute("EFG")
903 .addUnicodeLocaleAttribute("HiJK")
904 .addUnicodeLocaleAttribute("aBc")
906 if (U_FAILURE(status
)) {
907 errln("addUnicodeLocaleAttribute() got Error: %s\n",
908 u_errorName(status
));
910 std::string
expected("fr-u-abc-efg-efgh-efghi-hijk");
911 std::string actual
= loc
.toLanguageTag
<std::string
>(status
);
912 if (U_FAILURE(status
) || expected
!= actual
) {
913 errln("Should get \"%s\" but get \"%s\"\n", expected
.c_str(), actual
.c_str());
916 // remove "efgh" in the middle with different casing.
917 loc
= bld
.removeUnicodeLocaleAttribute("eFgH").build(status
);
918 if (U_FAILURE(status
)) {
919 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
920 u_errorName(status
));
922 expected
= "fr-u-abc-efg-efghi-hijk";
923 actual
= loc
.toLanguageTag
<std::string
>(status
);
924 if (U_FAILURE(status
) || expected
!= actual
) {
925 errln("Should get \"%s\" but get \"%s\"\n", expected
.c_str(), actual
.c_str());
928 // remove non-existing attributes.
929 loc
= bld
.removeUnicodeLocaleAttribute("efgh").build(status
);
930 if (U_FAILURE(status
)) {
931 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
932 u_errorName(status
));
934 actual
= loc
.toLanguageTag
<std::string
>(status
);
935 if (U_FAILURE(status
) || expected
!= actual
) {
936 errln("Should get \"%s\" but get \"%s\"\n", expected
.c_str(), actual
.c_str());
939 // remove "abc" in the beginning with different casing.
940 loc
= bld
.removeUnicodeLocaleAttribute("ABC").build(status
);
941 if (U_FAILURE(status
)) {
942 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
943 u_errorName(status
));
945 expected
= "fr-u-efg-efghi-hijk";
946 actual
= loc
.toLanguageTag
<std::string
>(status
);
947 if (U_FAILURE(status
) || expected
!= actual
) {
948 errln("Should get \"%s\" but get \"%s\"\n", expected
.c_str(), actual
.c_str());
951 // remove non-existing substring in the end.
952 loc
= bld
.removeUnicodeLocaleAttribute("hij").build(status
);
953 if (U_FAILURE(status
)) {
954 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
955 u_errorName(status
));
957 actual
= loc
.toLanguageTag
<std::string
>(status
);
958 if (U_FAILURE(status
) || expected
!= actual
) {
959 errln("Should get \"%s\" but get \"%s\"\n", expected
.c_str(), actual
.c_str());
962 // remove "hijk" in the end with different casing.
963 loc
= bld
.removeUnicodeLocaleAttribute("hIJK").build(status
);
964 if (U_FAILURE(status
)) {
965 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
966 u_errorName(status
));
968 expected
= "fr-u-efg-efghi";
969 actual
= loc
.toLanguageTag
<std::string
>(status
);
970 if (U_FAILURE(status
) || expected
!= actual
) {
971 errln("Should get \"%s\" but get \"%s\"\n", expected
.c_str(), actual
.c_str());
974 // remove "efghi" in the end with different casing.
975 loc
= bld
.removeUnicodeLocaleAttribute("EFGhi").build(status
);
976 if (U_FAILURE(status
)) {
977 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
978 u_errorName(status
));
980 expected
= "fr-u-efg";
981 actual
= loc
.toLanguageTag
<std::string
>(status
);
982 if (U_FAILURE(status
) || expected
!= actual
) {
983 errln("Should get \"%s\" but get \"%s\"\n", expected
.c_str(), actual
.c_str());
986 // remove "efg" in as the only one, with different casing.
987 loc
= bld
.removeUnicodeLocaleAttribute("EFG").build(status
);
988 if (U_FAILURE(status
)) {
989 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
990 u_errorName(status
));
993 actual
= loc
.toLanguageTag
<std::string
>(status
);
994 if (U_FAILURE(status
) || expected
!= actual
) {
995 errln("Should get \"%s\" but get \"%s\"\n", expected
.c_str(), actual
.c_str());
1000 void LocaleBuilderTest::TestAddRemoveUnicodeLocaleAttributeWellFormed() {
1001 // http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_extensions
1002 // attribute = alphanum{3,8} ;
1003 static const char* wellFormedAttributes
[] = {
1050 for (int i
= 0; i
< UPRV_LENGTHOF(wellFormedAttributes
); i
++) {
1054 UErrorCode status
= U_ZERO_ERROR
;
1055 bld
.addUnicodeLocaleAttribute(wellFormedAttributes
[i
]);
1056 Locale loc
= bld
.build(status
);
1057 if (U_FAILURE(status
)) {
1058 errln("addUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
1059 wellFormedAttributes
[i
], u_errorName(status
));
1062 bld
.removeUnicodeLocaleAttribute(wellFormedAttributes
[i
- 1]);
1063 loc
= bld
.build(status
);
1064 if (U_FAILURE(status
)) {
1065 errln("removeUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
1066 wellFormedAttributes
[i
- 1], u_errorName(status
));
1068 bld
.removeUnicodeLocaleAttribute(wellFormedAttributes
[i
- 3]);
1069 loc
= bld
.build(status
);
1070 if (U_FAILURE(status
)) {
1071 errln("removeUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
1072 wellFormedAttributes
[i
- 3], u_errorName(status
));
1078 void LocaleBuilderTest::TestAddUnicodeLocaleAttributeIllFormed() {
1079 static const char* illFormed
[] = {
1089 "2ck30zfk9-adsf023-234kcZ",
1091 for (const char* ill
: illFormed
) {
1092 UErrorCode status
= U_ZERO_ERROR
;
1094 bld
.addUnicodeLocaleAttribute(ill
);
1095 Locale loc
= bld
.build(status
);
1096 if (status
!= U_ILLEGAL_ARGUMENT_ERROR
) {
1097 errln("addUnicodeLocaleAttribute(\"%s\") should fail but has no Error\n",
1103 void LocaleBuilderTest::TestSetExtensionU() {
1105 bld
.setLanguage("zh");
1107 "setLanguage(\"zh\") got Error: %s\n");
1109 bld
.setExtension('u', "co-stroke");
1110 Verify(bld
, "zh-u-co-stroke",
1111 "setExtension('u', \"co-stroke\") got Error: %s\n");
1113 bld
.setExtension('U', "ca-islamic");
1114 Verify(bld
, "zh-u-ca-islamic",
1115 "setExtension('U', \"zh-u-ca-islamic\") got Error: %s\n");
1117 bld
.setExtension('u', "ca-chinese");
1118 Verify(bld
, "zh-u-ca-chinese",
1119 "setExtension('u', \"ca-chinese\") got Error: %s\n");
1121 bld
.setExtension('U', "co-pinyin");
1122 Verify(bld
, "zh-u-co-pinyin",
1123 "setExtension('U', \"co-pinyin\") got Error: %s\n");
1125 bld
.setRegion("TW");
1126 Verify(bld
, "zh-TW-u-co-pinyin",
1127 "setRegion(\"TW\") got Error: %s\n");
1129 bld
.setExtension('U', "");
1130 Verify(bld
, "zh-TW",
1131 "setExtension('U', \"\") got Error: %s\n");
1133 bld
.setExtension('u', "abc-defg-kr-face");
1134 Verify(bld
, "zh-TW-u-abc-defg-kr-face",
1135 "setExtension('u', \"abc-defg-kr-face\") got Error: %s\n");
1137 bld
.setExtension('U', "ca-japanese");
1138 Verify(bld
, "zh-TW-u-ca-japanese",
1139 "setExtension('U', \"ca-japanese\") got Error: %s\n");
1143 void LocaleBuilderTest::TestSetExtensionValidateUWellFormed() {
1144 static const char* wellFormedExtensions
[] = {
1146 // keyword = key (sep type)? ;
1147 // key = alphanum alpha ;
1148 // type = alphanum{3,8} (sep alphanum{3,8})* ;
1159 "0z-ZZ-123-cd-efghijkl",
1170 // (sep attribute)+ (sep keyword)*
1172 "K2K-12345678-zz-0z",
1173 "K2K-12345678-9z-AZ-abc",
1174 "K2K-12345678-zz-9A-234",
1175 "K2K-12345678-zk0-abc-efg-zz-9k-234",
1177 for (const char* extension
: wellFormedExtensions
) {
1178 UErrorCode status
= U_ZERO_ERROR
;
1180 bld
.setExtension('u', extension
);
1181 Locale loc
= bld
.build(status
);
1182 if (U_FAILURE(status
)) {
1183 errln("setExtension('u', \"%s\") got Error: %s\n",
1184 extension
, u_errorName(status
));
1189 void LocaleBuilderTest::TestSetExtensionValidateUIllFormed() {
1190 static const char* illFormed
[] = {
1212 "abcdefgh-abcdefghi",
1215 "abcdefgh-a2345678z",
1217 for (const char* ill
: illFormed
) {
1218 UErrorCode status
= U_ZERO_ERROR
;
1220 bld
.setExtension('u', ill
);
1221 Locale loc
= bld
.build(status
);
1222 if (status
!= U_ILLEGAL_ARGUMENT_ERROR
) {
1223 errln("setExtension('u', \"%s\") should fail but has no Error\n",
1229 void LocaleBuilderTest::TestSetExtensionT() {
1231 bld
.setLanguage("fr");
1233 "setLanguage(\"fr\") got Error: %s\n");
1235 bld
.setExtension('T', "zh");
1236 Verify(bld
, "fr-t-zh",
1237 "setExtension('T', \"zh\") got Error: %s\n");
1239 bld
.setExtension('t', "zh-Hant-TW-1234-A9-123-456ABCDE");
1240 Verify(bld
, "fr-t-zh-hant-tw-1234-a9-123-456abcde",
1241 "setExtension('t', \"zh-Hant-TW-1234-A9-123-456ABCDE\") got Error: %s\n");
1243 bld
.setExtension('T', "a9-123");
1244 Verify(bld
, "fr-t-a9-123",
1245 "setExtension('T', \"a9-123\") got Error: %s\n");
1247 bld
.setRegion("MX");
1248 Verify(bld
, "fr-MX-t-a9-123",
1249 "setRegion(\"MX\") got Error: %s\n");
1251 bld
.setScript("Hans");
1252 Verify(bld
, "fr-Hans-MX-t-a9-123",
1253 "setScript(\"Hans\") got Error: %s\n");
1255 bld
.setVariant("9abc-abcde");
1256 Verify(bld
, "fr-Hans-MX-9abc-abcde-t-a9-123",
1257 "setVariant(\"9abc-abcde\") got Error: %s\n");
1259 bld
.setExtension('T', "");
1260 Verify(bld
, "fr-Hans-MX-9abc-abcde",
1261 "bld.setExtension('T', \"\") got Error: %s\n");
1264 void LocaleBuilderTest::TestSetExtensionValidateTWellFormed() {
1265 // ((sep tlang (sep tfield)*) | (sep tfield)+)
1266 static const char* wellFormedExtensions
[] = {
1268 // tlang = unicode_language_subtag (sep unicode_script_subtag)?
1269 // (sep unicode_region_subtag)? (sep unicode_variant_subtag)* ;
1270 // unicode_language_subtag
1275 // unicode_language_subtag sep unicode_script_subtag
1279 // unicode_language_subtag sep unicode_script_subtag sep unicode_region_subtag
1285 "ABCDEFGH-Thai-456",
1286 // unicode_language_subtag sep unicode_region_subtag
1293 // unicode_language_subtag sep unicode_script_subtag sep unicode_region_subtag
1294 // sep (sep unicode_variant_subtag)*
1296 "abc-arab-RU-3abc-abcdef",
1297 "ABCDEFGH-Thai-TH-ADSFS-9xyz-abcdef",
1298 "en-latn-409-xafsa",
1299 "abc-arab-123-ADASDF",
1300 "ABCDEFGH-Thai-456-9sdf-ADASFAS",
1304 "z9-abcde123-a1-abcde",
1305 // tlang (sep tfield)*
1308 "fr-123-z9-abcde123-a1-abcde",
1309 "fr-Latn-FR-z9-abcde123-a1-abcde",
1310 "gab-Thai-TH-abcde-z9-abcde123-a1-abcde",
1311 "gab-Thai-TH-0bde-z9-abcde123-a1-abcde",
1313 for (const char* extension
: wellFormedExtensions
) {
1314 UErrorCode status
= U_ZERO_ERROR
;
1316 bld
.setExtension('t', extension
);
1317 Locale loc
= bld
.build(status
);
1318 if (U_FAILURE(status
)) {
1319 errln("setExtension('t', \"%s\") got Error: %s\n",
1320 extension
, u_errorName(status
));
1325 void LocaleBuilderTest::TestSetExtensionValidateTIllFormed() {
1326 static const char* illFormed
[] = {
1333 // "Latn", // Per 2019-01-23 ICUTC, still accept 4alpha. See ICU-20321
1347 "gab-Thai-TH-0bde-a1",
1348 "gab-Thai-TH-0bde-3b",
1349 "gab-Thai-TH-0bde-z9-a1",
1350 "gab-Thai-TH-0bde-z9-3b",
1351 "gab-Thai-TH-0bde-z9-abcde123-3b",
1352 "gab-Thai-TH-0bde-z9-abcde123-ab",
1353 "gab-Thai-TH-0bde-z9-abcde123-ab",
1354 "gab-Thai-TH-0bde-z9-abcde123-a1",
1355 "gab-Thai-TH-0bde-z9-abcde123-a1-",
1356 "gab-Thai-TH-0bde-z9-abcde123-a1-a",
1357 "gab-Thai-TH-0bde-z9-abcde123-a1-ab",
1359 for (const char* ill
: illFormed
) {
1360 UErrorCode status
= U_ZERO_ERROR
;
1362 bld
.setExtension('t', ill
);
1363 Locale loc
= bld
.build(status
);
1364 if (status
!= U_ILLEGAL_ARGUMENT_ERROR
) {
1365 errln("setExtension('t', \"%s\") should fail but has no Error\n",
1371 void LocaleBuilderTest::TestSetExtensionPU() {
1373 bld
.setLanguage("ar");
1375 "setLanguage(\"ar\") got Error: %s\n");
1377 bld
.setExtension('X', "a-b-c-d-e");
1378 Verify(bld
, "ar-x-a-b-c-d-e",
1379 "setExtension('X', \"a-b-c-d-e\") got Error: %s\n");
1381 bld
.setExtension('x', "0-1-2-3");
1382 Verify(bld
, "ar-x-0-1-2-3",
1383 "setExtension('x', \"0-1-2-3\") got Error: %s\n");
1385 bld
.setExtension('X', "0-12345678-x-x");
1386 Verify(bld
, "ar-x-0-12345678-x-x",
1387 "setExtension('x', \"ar-x-0-12345678-x-x\") got Error: %s\n");
1389 bld
.setRegion("TH");
1390 Verify(bld
, "ar-TH-x-0-12345678-x-x",
1391 "setRegion(\"TH\") got Error: %s\n");
1393 bld
.setExtension('X', "");
1394 Verify(bld
, "ar-TH",
1395 "setExtension(\"X\") got Error: %s\n");
1398 void LocaleBuilderTest::TestSetExtensionValidatePUWellFormed() {
1399 // ((sep tlang (sep tfield)*) | (sep tfield)+)
1400 static const char* wellFormedExtensions
[] = {
1401 "a", // Short subtag
1402 "z", // Short subtag
1403 "0", // Short subtag, digit
1404 "9", // Short subtag, digit
1405 "a-0", // Two short subtag, alpha and digit
1406 "9-z", // Two short subtag, digit and alpha
1409 "abcefghi", // Long subtag
1413 "0a-ab-87654321", // Three subtags
1414 "87654321-ab-00-3A", // Four subtabs
1415 "a-9-87654321", // Three subtags with short and long subtags
1418 for (const char* extension
: wellFormedExtensions
) {
1419 UErrorCode status
= U_ZERO_ERROR
;
1421 bld
.setExtension('x', extension
);
1422 Locale loc
= bld
.build(status
);
1423 if (U_FAILURE(status
)) {
1424 errln("setExtension('x', \"%s\") got Error: %s\n",
1425 extension
, u_errorName(status
));
1430 void LocaleBuilderTest::TestSetExtensionValidatePUIllFormed() {
1431 static const char* illFormed
[] = {
1432 "123456789", // Too long
1433 "abcdefghi", // Too long
1434 "ab-123456789", // Second subtag too long
1435 "abcdefghi-12", // First subtag too long
1436 "a-ab-987654321", // Third subtag too long
1437 "987654321-a-0-3", // First subtag too long
1439 for (const char* ill
: illFormed
) {
1440 UErrorCode status
= U_ZERO_ERROR
;
1442 bld
.setExtension('x', ill
);
1443 Locale loc
= bld
.build(status
);
1444 if (status
!= U_ILLEGAL_ARGUMENT_ERROR
) {
1445 errln("setExtension('x', \"%s\") should fail but has no Error\n",
1451 void LocaleBuilderTest::TestSetExtensionOthers() {
1453 bld
.setLanguage("fr");
1455 "setLanguage(\"fr\") got Error: %s\n");
1457 bld
.setExtension('Z', "ab");
1458 Verify(bld
, "fr-z-ab",
1459 "setExtension('Z', \"ab\") got Error: %s\n");
1461 bld
.setExtension('0', "xyz12345-abcdefg");
1462 Verify(bld
, "fr-0-xyz12345-abcdefg-z-ab",
1463 "setExtension('0', \"xyz12345-abcdefg\") got Error: %s\n");
1465 bld
.setExtension('a', "01-12345678-ABcdef");
1466 Verify(bld
, "fr-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
1467 "setExtension('a', \"01-12345678-ABcdef\") got Error: %s\n");
1469 bld
.setRegion("TH");
1470 Verify(bld
, "fr-TH-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
1471 "setRegion(\"TH\") got Error: %s\n");
1473 bld
.setScript("Arab");
1474 Verify(bld
, "fr-Arab-TH-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
1475 "setRegion(\"Arab\") got Error: %s\n");
1477 bld
.setExtension('A', "97");
1478 Verify(bld
, "fr-Arab-TH-0-xyz12345-abcdefg-a-97-z-ab",
1479 "setExtension('a', \"97\") got Error: %s\n");
1481 bld
.setExtension('a', "");
1482 Verify(bld
, "fr-Arab-TH-0-xyz12345-abcdefg-z-ab",
1483 "setExtension('a', \"\") got Error: %s\n");
1485 bld
.setExtension('0', "");
1486 Verify(bld
, "fr-Arab-TH-z-ab",
1487 "setExtension('0', \"\") got Error: %s\n");
1490 void LocaleBuilderTest::TestSetExtensionValidateOthersWellFormed() {
1491 static const char* wellFormedExtensions
[] = {
1499 "87654321-ab-00-3A",
1502 const char * aToZ
= "abcdefghijklmnopqrstuvwxyz";
1503 const int32_t aToZLen
= static_cast<int32_t>(uprv_strlen(aToZ
));
1505 for (const char* extension
: wellFormedExtensions
) {
1507 i
= (i
+ 1) % aToZLen
;
1508 UErrorCode status
= U_ZERO_ERROR
;
1510 bld
.setExtension(ch
, extension
);
1511 Locale loc
= bld
.build(status
);
1512 if (U_FAILURE(status
)) {
1513 errln("setExtension('%c', \"%s\") got Error: %s\n",
1514 ch
, extension
, u_errorName(status
));
1518 const char* someChars
=
1519 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789`~!@#$%^&*()-_=+;:,.<>?";
1520 const int32_t someCharsLen
= static_cast<int32_t>(uprv_strlen(someChars
));
1521 for (int32_t i
= 0; i
< someCharsLen
; i
++) {
1522 char ch
= someChars
[i
];
1523 UErrorCode status
= U_ZERO_ERROR
;
1525 bld
.setExtension(ch
, wellFormedExtensions
[ch
% UPRV_LENGTHOF(wellFormedExtensions
)]);
1526 Locale loc
= bld
.build(status
);
1527 if (uprv_isASCIILetter(ch
) || ('0' <= ch
&& ch
<= '9')) {
1528 if (ch
!= 't' && ch
!= 'T' && ch
!= 'u' && ch
!= 'U' && ch
!= 'x' && ch
!= 'X') {
1529 if (U_FAILURE(status
)) {
1530 errln("setExtension('%c', \"%s\") got Error: %s\n",
1531 ch
, wellFormedExtensions
[ch
% UPRV_LENGTHOF(wellFormedExtensions
)], u_errorName(status
));
1535 if (status
!= U_ILLEGAL_ARGUMENT_ERROR
) {
1536 errln("setExtension('%c', \"%s\") should fail but has no Error\n",
1537 ch
, wellFormedExtensions
[ch
% UPRV_LENGTHOF(wellFormedExtensions
)]);
1544 void LocaleBuilderTest::TestSetExtensionValidateOthersIllFormed() {
1545 static const char* illFormed
[] = {
1548 "123456789", // Too long
1549 "abcdefghi", // Too long
1550 "ab-123456789", // Second subtag too long
1551 "abcdefghi-12", // First subtag too long
1552 "a-ab-87654321", // Third subtag too long
1553 "87654321-a-0-3", // First subtag too long
1555 const char * aToZ
= "abcdefghijklmnopqrstuvwxyz";
1556 const int32_t aToZLen
= static_cast<int32_t>(uprv_strlen(aToZ
));
1558 for (const char* ill
: illFormed
) {
1560 i
= (i
+ 1) % aToZLen
;
1561 UErrorCode status
= U_ZERO_ERROR
;
1563 bld
.setExtension(ch
, ill
);
1564 Locale loc
= bld
.build(status
);
1565 if (status
!= U_ILLEGAL_ARGUMENT_ERROR
) {
1566 errln("setExtension('%c', \"%s\") should fail but has no Error\n",
1572 void LocaleBuilderTest::TestSetLocale() {
1573 LocaleBuilder bld1
, bld2
;
1574 UErrorCode status
= U_ZERO_ERROR
;
1575 Locale l1
= bld1
.setLanguage("en")
1578 .setVariant("3456-abcde")
1579 .addUnicodeLocaleAttribute("456")
1580 .addUnicodeLocaleAttribute("123")
1581 .setUnicodeLocaleKeyword("nu", "thai")
1582 .setUnicodeLocaleKeyword("co", "stroke")
1583 .setUnicodeLocaleKeyword("ca", "chinese")
1585 if (U_FAILURE(status
) || l1
.isBogus()) {
1586 errln("build got Error: %s\n", u_errorName(status
));
1588 status
= U_ZERO_ERROR
;
1589 Locale l2
= bld1
.setLocale(l1
).build(status
);
1590 if (U_FAILURE(status
) || l2
.isBogus()) {
1591 errln("build got Error: %s\n", u_errorName(status
));
1595 errln("Two locales should be the same, but one is '%s' and the other is '%s'",
1596 l1
.getName(), l2
.getName());
1600 void LocaleBuilderTest::TestPosixCases() {
1601 UErrorCode status
= U_ZERO_ERROR
;
1602 Locale l1
= Locale::forLanguageTag("en-US-u-va-posix", status
);
1603 if (U_FAILURE(status
) || l1
.isBogus()) {
1604 errln("build got Error: %s\n", u_errorName(status
));
1607 bld
.setLanguage("en")
1610 .setUnicodeLocaleKeyword("nu", "Thai")
1611 .setExtension('x', "1");
1612 // All of above should be cleared by the setLocale call.
1613 Locale l2
= bld
.setLocale(l1
).build(status
);
1614 if (U_FAILURE(status
) || l2
.isBogus()) {
1615 errln("build got Error: %s\n", u_errorName(status
));
1618 errln("The result locale should be the set as the setLocale %s but got %s\n",
1619 l1
.toLanguageTag
<std::string
>(status
).c_str(),
1620 l2
.toLanguageTag
<std::string
>(status
).c_str());
1622 Locale
posix("en-US-POSIX");
1624 errln("The result locale should be the set as %s but got %s\n",
1625 posix
.getName(), l2
.getName());