]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/intltest/localebuildertest.cpp
ICU-64260.0.1.tar.gz
[apple/icu.git] / icuSources / test / intltest / localebuildertest.cpp
CommitLineData
3d1f044b
A
1// © 2018 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4#include <memory>
5
6#include "cmemory.h"
7#include "cstring.h"
8#include "localebuildertest.h"
9#include "unicode/localebuilder.h"
10#include "unicode/strenum.h"
11
12LocaleBuilderTest::LocaleBuilderTest()
13{
14}
15
16LocaleBuilderTest::~LocaleBuilderTest()
17{
18}
19
20void LocaleBuilderTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
21{
22 TESTCASE_AUTO_BEGIN;
23 TESTCASE_AUTO(TestAddRemoveUnicodeLocaleAttribute);
24 TESTCASE_AUTO(TestAddRemoveUnicodeLocaleAttributeWellFormed);
25 TESTCASE_AUTO(TestAddUnicodeLocaleAttributeIllFormed);
26 TESTCASE_AUTO(TestLocaleBuilder);
27 TESTCASE_AUTO(TestLocaleBuilderBasic);
28 TESTCASE_AUTO(TestPosixCases);
29 TESTCASE_AUTO(TestSetExtensionOthers);
30 TESTCASE_AUTO(TestSetExtensionPU);
31 TESTCASE_AUTO(TestSetExtensionT);
32 TESTCASE_AUTO(TestSetExtensionU);
33 TESTCASE_AUTO(TestSetExtensionValidateOthersIllFormed);
34 TESTCASE_AUTO(TestSetExtensionValidateOthersWellFormed);
35 TESTCASE_AUTO(TestSetExtensionValidatePUIllFormed);
36 TESTCASE_AUTO(TestSetExtensionValidatePUWellFormed);
37 TESTCASE_AUTO(TestSetExtensionValidateTIllFormed);
38 TESTCASE_AUTO(TestSetExtensionValidateTWellFormed);
39 TESTCASE_AUTO(TestSetExtensionValidateUIllFormed);
40 TESTCASE_AUTO(TestSetExtensionValidateUWellFormed);
41 TESTCASE_AUTO(TestSetLanguageIllFormed);
42 TESTCASE_AUTO(TestSetLanguageWellFormed);
43 TESTCASE_AUTO(TestSetLocale);
44 TESTCASE_AUTO(TestSetRegionIllFormed);
45 TESTCASE_AUTO(TestSetRegionWellFormed);
46 TESTCASE_AUTO(TestSetScriptIllFormed);
47 TESTCASE_AUTO(TestSetScriptWellFormed);
48 TESTCASE_AUTO(TestSetUnicodeLocaleKeywordIllFormedKey);
49 TESTCASE_AUTO(TestSetUnicodeLocaleKeywordIllFormedValue);
50 TESTCASE_AUTO(TestSetUnicodeLocaleKeywordWellFormed);
51 TESTCASE_AUTO(TestSetVariantIllFormed);
52 TESTCASE_AUTO(TestSetVariantWellFormed);
53 TESTCASE_AUTO_END;
54}
55
56void LocaleBuilderTest::Verify(LocaleBuilder& bld, const char* expected, const char* msg) {
57 UErrorCode status = U_ZERO_ERROR;
58 Locale loc = bld.build(status);
59 if (U_FAILURE(status)) {
60 errln(msg, u_errorName(status));
61 }
62 std::string tag = loc.toLanguageTag<std::string>(status);
63 if (U_FAILURE(status)) {
64 errln("loc.toLanguageTag() got Error: %s\n",
65 u_errorName(status));
66 }
67 if (tag != expected) {
68 errln("should get \"%s\", but got \"%s\"\n", expected, tag.c_str());
69 }
70}
71
72void LocaleBuilderTest::TestLocaleBuilder() {
73 // The following test data are copy from
74 // icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleBuilderTest.java
75 // "L": +1 = language
76 // "S": +1 = script
77 // "R": +1 = region
78 // "V": +1 = variant
79 // "K": +1 = Unicode locale key / +2 = Unicode locale type
80 // "A": +1 = Unicode locale attribute
81 // "E": +1 = extension letter / +2 = extension value
82 // "P": +1 = private use
83 // "U": +1 = ULocale
84 // "B": +1 = BCP47 language tag
85 // "C": Clear all
86 // "N": Clear extensions
87 // "D": +1 = Unicode locale attribute to be removed
88 // "X": indicates an exception must be thrown
89 // "T": +1 = expected language tag / +2 = expected locale string
90 const char* TESTCASES[][14] = {
91 {"L", "en", "R", "us", "T", "en-US", "en_US"},
92 {"L", "en", "R", "CA", "L", nullptr, "T", "und-CA", "_CA"},
93 {"L", "en", "R", "CA", "L", "", "T", "und-CA", "_CA"},
94 {"L", "en", "R", "FR", "L", "fr", "T", "fr-FR", "fr_FR"},
95 {"L", "123", "X"},
96 {"R", "us", "T", "und-US", "_US"},
97 {"R", "usa", "X"},
98 {"R", "123", "L", "it", "R", nullptr, "T", "it", "it"},
99 {"R", "123", "L", "it", "R", "", "T", "it", "it"},
100 {"R", "123", "L", "en", "T", "en-123", "en_123"},
101 {"S", "LATN", "L", "DE", "T", "de-Latn", "de_Latn"},
102 {"L", "De", "S", "latn", "R", "de", "S", "", "T", "de-DE", "de_DE"},
103 {"L", "De", "S", "Arab", "R", "de", "S", nullptr, "T", "de-DE", "de_DE"},
104 {"S", "latin", "X"},
105 {"V", "1234", "L", "en", "T", "en-1234", "en__1234"},
106 {"V", "1234", "L", "en", "V", "5678", "T", "en-5678", "en__5678"},
107 {"V", "1234", "L", "en", "V", nullptr, "T", "en", "en"},
108 {"V", "1234", "L", "en", "V", "", "T", "en", "en"},
109 {"V", "123", "X"},
110 {"U", "en_US", "T", "en-US", "en_US"},
111 {"U", "en_US_WIN", "X"},
112 {"B", "fr-FR-1606nict-u-ca-gregory-x-test", "T",
113 "fr-FR-1606nict-u-ca-gregory-x-test",
114 "fr_FR_1606NICT@calendar=gregorian;x=test"},
115 {"B", "ab-cde-fghij", "T", "cde-fghij", "cde__FGHIJ"},
116 {"B", "und-CA", "T", "und-CA", "_CA"},
117 // Blocked by ICU-20327
118 // {"B", "en-US-x-test-lvariant-var", "T", "en-US-x-test-lvariant-var",
119 // "en_US_VAR@x=test"},
120 {"B", "en-US-VAR", "X"},
121 {"U", "ja_JP@calendar=japanese;currency=JPY", "L", "ko", "T",
122 "ko-JP-u-ca-japanese-cu-jpy", "ko_JP@calendar=japanese;currency=JPY"},
123 {"U", "ja_JP@calendar=japanese;currency=JPY", "K", "ca", nullptr, "T",
124 "ja-JP-u-cu-jpy", "ja_JP@currency=JPY"},
125 {"U", "ja_JP@calendar=japanese;currency=JPY", "E", "u",
126 "attr1-ca-gregory", "T", "ja-JP-u-attr1-ca-gregory",
127 "ja_JP@attribute=attr1;calendar=gregorian"},
128 {"U", "en@colnumeric=yes", "K", "kn", "true", "T", "en-u-kn-true",
129 "en@colnumeric=yes"},
130 {"L", "th", "R", "th", "K", "nu", "thai", "T", "th-TH-u-nu-thai",
131 "th_TH@numbers=thai"},
132 {"U", "zh_Hans", "R", "sg", "K", "ca", "badcalendar", "X"},
133 {"U", "zh_Hans", "R", "sg", "K", "cal", "gregory", "X"},
134 {"E", "z", "ExtZ", "L", "en", "T", "en-z-extz", "en@z=extz"},
135 {"E", "z", "ExtZ", "L", "en", "E", "z", "", "T", "en", "en"},
136 {"E", "z", "ExtZ", "L", "en", "E", "z", nullptr, "T", "en", "en"},
137 {"E", "a", "x", "X"},
138 {"E", "a", "abc_def", "T", "und-a-abc-def", "@a=abc-def"},
139 // Design limitation - typeless u extension keyword 0a below is interpreted as a boolean value true/yes.
140 // With the legacy keyword syntax, "yes" is used for such boolean value instead of "true".
141 // However, once the legacy keyword is translated back to BCP 47 u extension, key "0a" is unknown,
142 // so "yes" is preserved - not mapped to "true". We could change the code to automatically transform
143 // key = alphanum alpha
144 {"L", "en", "E", "u", "bbb-aaa-0a", "T", "en-u-aaa-bbb-0a-yes",
145 "en@0a=yes;attribute=aaa-bbb"},
146 {"L", "fr", "R", "FR", "P", "Yoshito-ICU", "T", "fr-FR-x-yoshito-icu",
147 "fr_FR@x=yoshito-icu"},
148 {"L", "ja", "R", "jp", "K", "ca", "japanese", "T", "ja-JP-u-ca-japanese",
149 "ja_JP@calendar=japanese"},
150 {"K", "co", "PHONEBK", "K", "ca", "gregory", "L", "De", "T",
151 "de-u-ca-gregory-co-phonebk", "de@calendar=gregorian;collation=phonebook"},
152 {"E", "o", "OPQR", "E", "a", "aBcD", "T", "und-a-abcd-o-opqr", "@a=abcd;o=opqr"},
153 {"E", "u", "nu-thai-ca-gregory", "L", "TH", "T", "th-u-ca-gregory-nu-thai",
154 "th@calendar=gregorian;numbers=thai"},
155 {"L", "en", "K", "tz", "usnyc", "R", "US", "T", "en-US-u-tz-usnyc",
156 "en_US@timezone=America/New_York"},
157 {"L", "de", "K", "co", "phonebk", "K", "ks", "level1", "K", "kk",
158 "true", "T", "de-u-co-phonebk-kk-true-ks-level1",
159 "de@collation=phonebook;colnormalization=yes;colstrength=primary"},
160 {"L", "en", "R", "US", "K", "ca", "gregory", "T", "en-US-u-ca-gregory",
161 "en_US@calendar=gregorian"},
162 {"L", "en", "R", "US", "K", "cal", "gregory", "X"},
163 {"L", "en", "R", "US", "K", "ca", "gregorian", "X"},
164 {"L", "en", "R", "US", "K", "kn", "true", "T", "en-US-u-kn-true",
165 "en_US@colnumeric=yes"},
166 {"B", "de-DE-u-co-phonebk", "C", "L", "pt", "T", "pt", "pt"},
167 {"B", "ja-jp-u-ca-japanese", "N", "T", "ja-JP", "ja_JP"},
168 {"B", "es-u-def-abc-co-trad", "A", "hij", "D", "def", "T",
169 "es-u-abc-hij-co-trad", "es@attribute=abc-hij;collation=traditional"},
170 {"B", "es-u-def-abc-co-trad", "A", "hij", "D", "def", "D", "def", "T",
171 "es-u-abc-hij-co-trad", "es@attribute=abc-hij;collation=traditional"},
172 {"L", "en", "A", "aa", "X"},
173 {"B", "fr-u-attr1-cu-eur", "D", "attribute1", "X"},
174 };
175 UErrorCode status = U_ZERO_ERROR;
176 LocaleBuilder bld;
177 for (int tidx = 0; tidx < UPRV_LENGTHOF(TESTCASES); tidx++) {
178 const char* (&testCase)[14] = TESTCASES[tidx];
179 std::string actions;
180 for (int p = 0; p < UPRV_LENGTHOF(testCase); p++) {
181 if (testCase[p] == nullptr) {
182 actions += " (nullptr)";
183 break;
184 }
185 if (p > 0) actions += " ";
186 actions += testCase[p];
187 }
188 int i = 0;
189 const char* method;
190 status = U_ZERO_ERROR;
191 bld.clear();
192 while (true) {
193 method = testCase[i++];
194 if (strcmp("L", method) == 0) {
195 bld.setLanguage(testCase[i++]).build(status);
196 } else if (strcmp("S", method) == 0) {
197 bld.setScript(testCase[i++]).build(status);
198 } else if (strcmp("R", method) == 0) {
199 bld.setRegion(testCase[i++]).build(status);
200 } else if (strcmp("V", method) == 0) {
201 bld.setVariant(testCase[i++]).build(status);
202 } else if (strcmp("K", method) == 0) {
203 const char* key = testCase[i++];
204 const char* type = testCase[i++];
205 bld.setUnicodeLocaleKeyword(key, type).build(status);
206 } else if (strcmp("A", method) == 0) {
207 bld.addUnicodeLocaleAttribute(testCase[i++]).build(status);
208 } else if (strcmp("E", method) == 0) {
209 const char* key = testCase[i++];
210 const char* value = testCase[i++];
211 bld.setExtension(key[0], value).build(status);
212 } else if (strcmp("P", method) == 0) {
213 bld.setExtension('x', testCase[i++]).build(status);
214 } else if (strcmp("U", method) == 0) {
215 bld.setLocale(Locale(testCase[i++])).build(status);
216 } else if (strcmp("B", method) == 0) {
217 bld.setLanguageTag(testCase[i++]).build(status);
218 }
219 // clear / remove
220 else if (strcmp("C", method) == 0) {
221 bld.clear().build(status);
222 } else if (strcmp("N", method) == 0) {
223 bld.clearExtensions().build(status);
224 } else if (strcmp("D", method) == 0) {
225 bld.removeUnicodeLocaleAttribute(testCase[i++]).build(status);
226 }
227 // result
228 else if (strcmp("X", method) == 0) {
229 if (U_SUCCESS(status)) {
230 errln("FAIL: No error return - test case: %s", actions.c_str());
231 }
232 } else if (strcmp("T", method) == 0) {
233 status = U_ZERO_ERROR;
234 Locale loc = bld.build(status);
235 if (U_FAILURE(status) ||
236 strcmp(loc.getName(), testCase[i + 1]) != 0) {
237 errln("FAIL: Wrong locale ID - %s %s %s", loc.getName(),
238 " for test case: ", actions.c_str());
239 }
240 std::string langtag = loc.toLanguageTag<std::string>(status);
241 if (U_FAILURE(status) || langtag != testCase[i]) {
242 errln("FAIL: Wrong language tag - %s %s %s", langtag.c_str(),
243 " for test case: ", actions.c_str());
244 }
245 break;
246 } else {
247 // Unknow test method
248 errln("Unknown test case method: There is an error in the test case data.");
249 break;
250 }
251 if (U_FAILURE(status)) {
252 if (strcmp("X", testCase[i]) == 0) {
253 // This failure is expected
254 break;
255 } else {
256 errln("FAIL: U_ILLEGAL_ARGUMENT_ERROR at offset %d %s %s", i,
257 " in test case: ", actions.c_str());
258 break;
259 }
260 }
261 if (strcmp("T", method) == 0) {
262 break;
263 }
264 } // while(true)
265 } // for TESTCASES
266}
267
268void LocaleBuilderTest::TestLocaleBuilderBasic() {
269 LocaleBuilder bld;
270 bld.setLanguage("zh");
271 Verify(bld, "zh", "setLanguage('zh') got Error: %s\n");
272
273 bld.setScript("Hant");
274 Verify(bld, "zh-Hant", "setScript('Hant') got Error: %s\n");
275
276 bld.setRegion("SG");
277 Verify(bld, "zh-Hant-SG", "setRegion('SG') got Error: %s\n");
278
279 bld.setRegion("HK");
280 bld.setScript("Hans");
281 Verify(bld, "zh-Hans-HK",
282 "setRegion('HK') and setScript('Hans') got Error: %s\n");
283
284 bld.setVariant("revised");
285 Verify(bld, "zh-Hans-HK-revised",
286 "setVariant('revised') got Error: %s\n");
287
288 bld.setUnicodeLocaleKeyword("nu", "thai");
289 Verify(bld, "zh-Hans-HK-revised-u-nu-thai",
290 "setUnicodeLocaleKeyword('nu', 'thai'') got Error: %s\n");
291
292 bld.setUnicodeLocaleKeyword("co", "pinyin");
293 Verify(bld, "zh-Hans-HK-revised-u-co-pinyin-nu-thai",
294 "setUnicodeLocaleKeyword('co', 'pinyin'') got Error: %s\n");
295
296 bld.setUnicodeLocaleKeyword("nu", "latn");
297 Verify(bld, "zh-Hans-HK-revised-u-co-pinyin-nu-latn",
298 "setUnicodeLocaleKeyword('nu', 'latn'') got Error: %s\n");
299
300 bld.setUnicodeLocaleKeyword("nu", nullptr);
301 Verify(bld, "zh-Hans-HK-revised-u-co-pinyin",
302 "setUnicodeLocaleKeyword('nu', ''') got Error: %s\n");
303
304 bld.setUnicodeLocaleKeyword("co", nullptr);
305 Verify(bld, "zh-Hans-HK-revised",
306 "setUnicodeLocaleKeyword('nu', nullptr) got Error: %s\n");
307
308 bld.setScript("");
309 Verify(bld, "zh-HK-revised",
310 "setScript('') got Error: %s\n");
311
312 bld.setVariant("");
313 Verify(bld, "zh-HK",
314 "setVariant('') got Error: %s\n");
315
316 bld.setRegion("");
317 Verify(bld, "zh",
318 "setRegion('') got Error: %s\n");
319}
320
321void LocaleBuilderTest::TestSetLanguageWellFormed() {
322 // http://www.unicode.org/reports/tr35/tr35.html#unicode_language_subtag
323 // unicode_language_subtag = alpha{2,3} | alpha{5,8};
324 // ICUTC decided also support alpha{4}
325 static const char* wellFormedLanguages[] = {
326 "",
327
328 // alpha{2}
329 "en",
330 "NE",
331 "eN",
332 "Ne",
333
334 // alpha{3}
335 "aNe",
336 "zzz",
337 "AAA",
338
339 // alpha{4}
340 "ABCD",
341 "abcd",
342
343 // alpha{5}
344 "efgij",
345 "AbCAD",
346 "ZAASD",
347
348 // alpha{6}
349 "efgijk",
350 "AADGFE",
351 "AkDfFz",
352
353 // alpha{7}
354 "asdfads",
355 "ADSFADF",
356 "piSFkDk",
357
358 // alpha{8}
359 "oieradfz",
360 "IADSFJKR",
361 "kkDSFJkR",
362 };
363 for (const char* lang : wellFormedLanguages) {
364 UErrorCode status = U_ZERO_ERROR;
365 LocaleBuilder bld;
366 bld.setLanguage(lang);
367 Locale loc = bld.build(status);
368 if (U_FAILURE(status)) {
369 errln("setLanguage(\"%s\") got Error: %s\n",
370 lang, u_errorName(status));
371 }
372 }
373}
374
375void LocaleBuilderTest::TestSetLanguageIllFormed() {
376 static const char* illFormed[] = {
377 "a",
378 "z",
379 "A",
380 "F",
381 "2",
382 "0",
383 "9"
384 "{",
385 ".",
386 "[",
387 "]",
388 "\\",
389
390 "e1",
391 "N2",
392 "3N",
393 "4e",
394 "e:",
395 "43",
396 "a9",
397
398 "aN0",
399 "z1z",
400 "2zz",
401 "3A3",
402 "456",
403 "af)",
404
405 // Per 2019-01-23 ICUTC, we still accept 4alpha as tlang. see ICU-20321.
406 // "latn",
407 // "Arab",
408 // "LATN",
409
410 "e)gij",
411 "Ab3AD",
412 "ZAAS8",
413
414 "efgi[]",
415 "AA9GFE",
416 "7kD3Fz",
417 "as8fads",
418 "0DSFADF",
419 "'iSFkDk",
420
421 "oieradf+",
422 "IADSFJK-",
423 "kkDSFJk0",
424
425 // alpha{9}
426 "oieradfab",
427 "IADSFJKDE",
428 "kkDSFJkzf",
429 };
430 for (const char* ill : illFormed) {
431 UErrorCode status = U_ZERO_ERROR;
432 LocaleBuilder bld;
433 bld.setLanguage(ill);
434 Locale loc = bld.build(status);
435 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
436 errln("setLanguage(\"%s\") should fail but has no Error\n", ill);
437 }
438 }
439}
440
441void LocaleBuilderTest::TestSetScriptWellFormed() {
442 // http://www.unicode.org/reports/tr35/tr35.html#unicode_script_subtag
443 // unicode_script_subtag = alpha{4} ;
444 static const char* wellFormedScripts[] = {
445 "",
446
447 "Latn",
448 "latn",
449 "lATN",
450 "laTN",
451 "arBN",
452 "ARbn",
453 "adsf",
454 "aADF",
455 "BSVS",
456 "LATn",
457 };
458 for (const char* script : wellFormedScripts) {
459 UErrorCode status = U_ZERO_ERROR;
460 LocaleBuilder bld;
461 bld.setScript(script);
462 Locale loc = bld.build(status);
463 if (U_FAILURE(status)) {
464 errln("setScript(\"%s\") got Error: %s\n",
465 script, u_errorName(status));
466 }
467 }
468}
469
470void LocaleBuilderTest::TestSetScriptIllFormed() {
471 static const char* illFormed[] = {
472 "a",
473 "z",
474 "A",
475 "F",
476 "2",
477 "0",
478 "9"
479 "{",
480 ".",
481 "[",
482 "]",
483 "\\",
484
485 "e1",
486 "N2",
487 "3N",
488 "4e",
489 "e:",
490 "43",
491 "a9",
492
493 "aN0",
494 "z1z",
495 "2zz",
496 "3A3",
497 "456",
498 "af)",
499
500 "0atn",
501 "l1tn",
502 "lA2N",
503 "la4N",
504 "arB5",
505 "1234",
506
507 "e)gij",
508 "Ab3AD",
509 "ZAAS8",
510
511 "efgi[]",
512 "AA9GFE",
513 "7kD3Fz",
514
515 "as8fads",
516 "0DSFADF",
517 "'iSFkDk",
518
519 "oieradf+",
520 "IADSFJK-",
521 "kkDSFJk0",
522
523 // alpha{9}
524 "oieradfab",
525 "IADSFJKDE",
526 "kkDSFJkzf",
527 };
528 for (const char* ill : illFormed) {
529 UErrorCode status = U_ZERO_ERROR;
530 LocaleBuilder bld;
531 bld.setScript(ill);
532 Locale loc = bld.build(status);
533 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
534 errln("setScript(\"%s\") should fail but has no Error\n", ill);
535 }
536 }
537}
538
539void LocaleBuilderTest::TestSetRegionWellFormed() {
540 // http://www.unicode.org/reports/tr35/tr35.html#unicode_region_subtag
541 // unicode_region_subtag = (alpha{2} | digit{3})
542 static const char* wellFormedRegions[] = {
543 "",
544
545 // alpha{2}
546 "en",
547 "NE",
548 "eN",
549 "Ne",
550
551 // digit{3}
552 "000",
553 "999",
554 "123",
555 "987"
556 };
557 for (const char* region : wellFormedRegions) {
558 UErrorCode status = U_ZERO_ERROR;
559 LocaleBuilder bld;
560 bld.setRegion(region);
561 Locale loc = bld.build(status);
562 if (U_FAILURE(status)) {
563 errln("setRegion(\"%s\") got Error: %s\n",
564 region, u_errorName(status));
565 }
566 }
567}
568
569void LocaleBuilderTest::TestSetRegionIllFormed() {
570 static const char* illFormed[] = {
571 "a",
572 "z",
573 "A",
574 "F",
575 "2",
576 "0",
577 "9"
578 "{",
579 ".",
580 "[",
581 "]",
582 "\\",
583
584 "e1",
585 "N2",
586 "3N",
587 "4e",
588 "e:",
589 "43",
590 "a9",
591
592 "aN0",
593 "z1z",
594 "2zz",
595 "3A3",
596 "4.6",
597 "af)",
598
599 "0atn",
600 "l1tn",
601 "lA2N",
602 "la4N",
603 "arB5",
604 "1234",
605
606 "e)gij",
607 "Ab3AD",
608 "ZAAS8",
609
610 "efgi[]",
611 "AA9GFE",
612 "7kD3Fz",
613
614 "as8fads",
615 "0DSFADF",
616 "'iSFkDk",
617
618 "oieradf+",
619 "IADSFJK-",
620 "kkDSFJk0",
621
622 // alpha{9}
623 "oieradfab",
624 "IADSFJKDE",
625 "kkDSFJkzf",
626 };
627 for (const char* ill : illFormed) {
628 UErrorCode status = U_ZERO_ERROR;
629 LocaleBuilder bld;
630 bld.setRegion(ill);
631 Locale loc = bld.build(status);
632 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
633 errln("setRegion(\"%s\") should fail but has no Error\n", ill);
634 }
635 }
636}
637
638void LocaleBuilderTest::TestSetVariantWellFormed() {
639 // http://www.unicode.org/reports/tr35/tr35.html#unicode_variant_subtag
640 // (sep unicode_variant_subtag)*
641 // unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ;
642 static const char* wellFormedVariants[] = {
643 "",
644
645 // alphanum{5}
646 "efgij",
647 "AbCAD",
648 "ZAASD",
649 "0AASD",
650 "A1CAD",
651 "ef2ij",
652 "ads3X",
653 "owqF4",
654
655 // alphanum{6}
656 "efgijk",
657 "AADGFE",
658 "AkDfFz",
659 "0ADGFE",
660 "A9DfFz",
661 "AADG7E",
662
663 // alphanum{7}
664 "asdfads",
665 "ADSFADF",
666 "piSFkDk",
667 "a0dfads",
668 "ADSF3DF",
669 "piSFkD9",
670
671 // alphanum{8}
672 "oieradfz",
673 "IADSFJKR",
674 "kkDSFJkR",
675 "0ADSFJKR",
676 "12345679",
677
678 // digit alphanum{3}
679 "0123",
680 "1abc",
681 "20EF",
682 "30EF",
683 "8A03",
684 "3Ax3",
685 "9Axy",
686
687 // (sep unicode_variant_subtag)*
688 "0123-4567",
689 "0ab3-ABCDE",
690 "9ax3-xByD9",
691 "9ax3-xByD9-adfk934a",
692
693 "0123_4567",
694 "0ab3_ABCDE",
695 "9ax3_xByD9",
696 "9ax3_xByD9_adfk934a",
697
698 "9ax3-xByD9_adfk934a",
699 "9ax3_xByD9-adfk934a",
700 };
701 for (const char* variant : wellFormedVariants) {
702 UErrorCode status = U_ZERO_ERROR;
703 LocaleBuilder bld;
704 bld.setVariant(variant);
705 Locale loc = bld.build(status);
706 if (U_FAILURE(status)) {
707 errln("setVariant(\"%s\") got Error: %s\n",
708 variant, u_errorName(status));
709 }
710 }
711}
712
713void LocaleBuilderTest::TestSetVariantIllFormed() {
714 static const char* illFormed[] = {
715 "a",
716 "z",
717 "A",
718 "F",
719 "2",
720 "0",
721 "9"
722 "{",
723 ".",
724 "[",
725 "]",
726 "\\",
727
728 "e1",
729 "N2",
730 "3N",
731 "4e",
732 "e:",
733 "43",
734 "a9",
735 "en",
736 "NE",
737 "eN",
738 "Ne",
739
740 "aNe",
741 "zzz",
742 "AAA",
743 "aN0",
744 "z1z",
745 "2zz",
746 "3A3",
747 "4.6",
748 "af)",
749 "345",
750 "923",
751
752 "Latn",
753 "latn",
754 "lATN",
755 "laTN",
756 "arBN",
757 "ARbn",
758 "adsf",
759 "aADF",
760 "BSVS",
761 "LATn",
762 "l1tn",
763 "lA2N",
764 "la4N",
765 "arB5",
766 "abc3",
767 "A3BC",
768
769 "e)gij",
770 "A+3AD",
771 "ZAA=8",
772
773 "efgi[]",
774 "AA9]FE",
775 "7k[3Fz",
776
777 "as8f/ds",
778 "0DSFAD{",
779 "'iSFkDk",
780
781 "oieradf+",
782 "IADSFJK-",
783 "k}DSFJk0",
784
785 // alpha{9}
786 "oieradfab",
787 "IADSFJKDE",
788 "kkDSFJkzf",
789 "123456789",
790
791 "-0123",
792 "-0123-4567",
793 "0123-4567-",
794 "-123-4567",
795 "_0123",
796 "_0123_4567",
797 "0123_4567_",
798 "_123_4567",
799
800 "-abcde-figjk",
801 "abcde-figjk-",
802 "-abcde-figjk-",
803 "_abcde_figjk",
804 "abcde_figjk_",
805 "_abcde_figjk_",
806 };
807 for (const char* ill : illFormed) {
808 UErrorCode status = U_ZERO_ERROR;
809 LocaleBuilder bld;
810 bld.setVariant(ill);
811 Locale loc = bld.build(status);
812 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
813 errln("setVariant(\"%s\") should fail but has no Error\n", ill);
814 }
815 }
816}
817
818void LocaleBuilderTest::TestSetUnicodeLocaleKeywordWellFormed() {
819 // http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_extensions
820 // keyword = key (sep type)? ;
821 // key = alphanum alpha ;
822 // type = alphanum{3,8} (sep alphanum{3,8})* ;
823 static const char* wellFormed_key_value[] = {
824 "aa", "123",
825 "3b", "zyzbcdef",
826 "0Z", "1ZB30zk9-abc",
827 "cZ", "2ck30zfZ-adsf023-234kcZ",
828 "ZZ", "Lant",
829 "ko", "",
830 };
831 for (int i = 0; i < UPRV_LENGTHOF(wellFormed_key_value); i += 2) {
832 UErrorCode status = U_ZERO_ERROR;
833 LocaleBuilder bld;
834 bld.setUnicodeLocaleKeyword(wellFormed_key_value[i],
835 wellFormed_key_value[i + 1]);
836 Locale loc = bld.build(status);
837 if (U_FAILURE(status)) {
838 errln("setUnicodeLocaleKeyword(\"%s\", \"%s\") got Error: %s\n",
839 wellFormed_key_value[i],
840 wellFormed_key_value[i + 1],
841 u_errorName(status));
842 }
843 }
844}
845
846void LocaleBuilderTest::TestSetUnicodeLocaleKeywordIllFormedKey() {
847 static const char* illFormed[] = {
848 "34",
849 "ab-cde",
850 "123",
851 "b3",
852 "zyzabcdef",
853 "Z0",
854 };
855 for (const char* ill : illFormed) {
856 UErrorCode status = U_ZERO_ERROR;
857 LocaleBuilder bld;
858 bld.setUnicodeLocaleKeyword(ill, "abc");
859 Locale loc = bld.build(status);
860 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
861 errln("setUnicodeLocaleKeyword(\"%s\", \"abc\") should fail but has no Error\n",
862 ill);
863 }
864 }
865}
866
867void LocaleBuilderTest::TestSetUnicodeLocaleKeywordIllFormedValue() {
868 static const char* illFormed[] = {
869 "34",
870 "ab-",
871 "-cd",
872 "-ef-",
873 "zyzabcdef",
874 "ab-abc",
875 "1ZB30zfk9-abc",
876 "2ck30zfk9-adsf023-234kcZ",
877 };
878 for (const char* ill : illFormed) {
879 UErrorCode status = U_ZERO_ERROR;
880 LocaleBuilder bld;
881 bld.setUnicodeLocaleKeyword("ab", ill);
882 Locale loc = bld.build(status);
883 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
884 errln("setUnicodeLocaleKeyword(\"ab\", \"%s\") should fail but has no Error\n",
885 ill);
886 }
887 }
888}
889
890void LocaleBuilderTest::TestAddRemoveUnicodeLocaleAttribute() {
891 LocaleBuilder bld;
892 UErrorCode status = U_ZERO_ERROR;
893 Locale loc = bld.setLanguage("fr")
894 .addUnicodeLocaleAttribute("abc")
895 .addUnicodeLocaleAttribute("aBc")
896 .addUnicodeLocaleAttribute("EFG")
897 .addUnicodeLocaleAttribute("efghi")
898 .addUnicodeLocaleAttribute("efgh")
899 .addUnicodeLocaleAttribute("efGhi")
900 .addUnicodeLocaleAttribute("EFg")
901 .addUnicodeLocaleAttribute("hijk")
902 .addUnicodeLocaleAttribute("EFG")
903 .addUnicodeLocaleAttribute("HiJK")
904 .addUnicodeLocaleAttribute("aBc")
905 .build(status);
906 if (U_FAILURE(status)) {
907 errln("addUnicodeLocaleAttribute() got Error: %s\n",
908 u_errorName(status));
909 }
910 std::string expected("fr-u-abc-efg-efgh-efghi-hijk");
911 std::string actual = loc.toLanguageTag<std::string>(status);
912 if (U_FAILURE(status) || expected != actual) {
913 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
914 }
915
916 // remove "efgh" in the middle with different casing.
917 loc = bld.removeUnicodeLocaleAttribute("eFgH").build(status);
918 if (U_FAILURE(status)) {
919 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
920 u_errorName(status));
921 }
922 expected = "fr-u-abc-efg-efghi-hijk";
923 actual = loc.toLanguageTag<std::string>(status);
924 if (U_FAILURE(status) || expected != actual) {
925 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
926 }
927
928 // remove non-existing attributes.
929 loc = bld.removeUnicodeLocaleAttribute("efgh").build(status);
930 if (U_FAILURE(status)) {
931 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
932 u_errorName(status));
933 }
934 actual = loc.toLanguageTag<std::string>(status);
935 if (U_FAILURE(status) || expected != actual) {
936 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
937 }
938
939 // remove "abc" in the beginning with different casing.
940 loc = bld.removeUnicodeLocaleAttribute("ABC").build(status);
941 if (U_FAILURE(status)) {
942 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
943 u_errorName(status));
944 }
945 expected = "fr-u-efg-efghi-hijk";
946 actual = loc.toLanguageTag<std::string>(status);
947 if (U_FAILURE(status) || expected != actual) {
948 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
949 }
950
951 // remove non-existing substring in the end.
952 loc = bld.removeUnicodeLocaleAttribute("hij").build(status);
953 if (U_FAILURE(status)) {
954 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
955 u_errorName(status));
956 }
957 actual = loc.toLanguageTag<std::string>(status);
958 if (U_FAILURE(status) || expected != actual) {
959 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
960 }
961
962 // remove "hijk" in the end with different casing.
963 loc = bld.removeUnicodeLocaleAttribute("hIJK").build(status);
964 if (U_FAILURE(status)) {
965 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
966 u_errorName(status));
967 }
968 expected = "fr-u-efg-efghi";
969 actual = loc.toLanguageTag<std::string>(status);
970 if (U_FAILURE(status) || expected != actual) {
971 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
972 }
973
974 // remove "efghi" in the end with different casing.
975 loc = bld.removeUnicodeLocaleAttribute("EFGhi").build(status);
976 if (U_FAILURE(status)) {
977 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
978 u_errorName(status));
979 }
980 expected = "fr-u-efg";
981 actual = loc.toLanguageTag<std::string>(status);
982 if (U_FAILURE(status) || expected != actual) {
983 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
984 }
985
986 // remove "efg" in as the only one, with different casing.
987 loc = bld.removeUnicodeLocaleAttribute("EFG").build(status);
988 if (U_FAILURE(status)) {
989 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
990 u_errorName(status));
991 }
992 expected = "fr";
993 actual = loc.toLanguageTag<std::string>(status);
994 if (U_FAILURE(status) || expected != actual) {
995 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
996 }
997
998}
999
1000void LocaleBuilderTest::TestAddRemoveUnicodeLocaleAttributeWellFormed() {
1001 // http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_extensions
1002 // attribute = alphanum{3,8} ;
1003 static const char* wellFormedAttributes[] = {
1004 // alphanum{3}
1005 "AbC",
1006 "ZAA",
1007 "0AA",
1008 "x3A",
1009 "xa8",
1010
1011 // alphanum{4}
1012 "AbCA",
1013 "ZASD",
1014 "0ASD",
1015 "A3a4",
1016 "zK90",
1017
1018 // alphanum{5}
1019 "efgij",
1020 "AbCAD",
1021 "ZAASD",
1022 "0AASD",
1023 "A1CAD",
1024 "ef2ij",
1025 "ads3X",
1026 "owqF4",
1027
1028 // alphanum{6}
1029 "efgijk",
1030 "AADGFE",
1031 "AkDfFz",
1032 "0ADGFE",
1033 "A9DfFz",
1034 "AADG7E",
1035
1036 // alphanum{7}
1037 "asdfads",
1038 "ADSFADF",
1039 "piSFkDk",
1040 "a0dfads",
1041 "ADSF3DF",
1042 "piSFkD9",
1043
1044 // alphanum{8}
1045 "oieradfz",
1046 "IADSFJKR",
1047 "kkDSFJkR",
1048 };
1049 LocaleBuilder bld;
1050 for (int i = 0; i < UPRV_LENGTHOF(wellFormedAttributes); i++) {
1051 if (i % 5 == 0) {
1052 bld.clear();
1053 }
1054 UErrorCode status = U_ZERO_ERROR;
1055 bld.addUnicodeLocaleAttribute(wellFormedAttributes[i]);
1056 Locale loc = bld.build(status);
1057 if (U_FAILURE(status)) {
1058 errln("addUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
1059 wellFormedAttributes[i], u_errorName(status));
1060 }
1061 if (i > 2) {
1062 bld.removeUnicodeLocaleAttribute(wellFormedAttributes[i - 1]);
1063 loc = bld.build(status);
1064 if (U_FAILURE(status)) {
1065 errln("removeUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
1066 wellFormedAttributes[i - 1], u_errorName(status));
1067 }
1068 bld.removeUnicodeLocaleAttribute(wellFormedAttributes[i - 3]);
1069 loc = bld.build(status);
1070 if (U_FAILURE(status)) {
1071 errln("removeUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
1072 wellFormedAttributes[i - 3], u_errorName(status));
1073 }
1074 }
1075 }
1076}
1077
1078void LocaleBuilderTest::TestAddUnicodeLocaleAttributeIllFormed() {
1079 static const char* illFormed[] = {
1080 "aa",
1081 "34",
1082 "ab-",
1083 "-cd",
1084 "-ef-",
1085 "zyzabcdef",
1086 "123456789",
1087 "ab-abc",
1088 "1ZB30zfk9-abc",
1089 "2ck30zfk9-adsf023-234kcZ",
1090 };
1091 for (const char* ill : illFormed) {
1092 UErrorCode status = U_ZERO_ERROR;
1093 LocaleBuilder bld;
1094 bld.addUnicodeLocaleAttribute(ill);
1095 Locale loc = bld.build(status);
1096 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1097 errln("addUnicodeLocaleAttribute(\"%s\") should fail but has no Error\n",
1098 ill);
1099 }
1100 }
1101}
1102
1103void LocaleBuilderTest::TestSetExtensionU() {
1104 LocaleBuilder bld;
1105 bld.setLanguage("zh");
1106 Verify(bld, "zh",
1107 "setLanguage(\"zh\") got Error: %s\n");
1108
1109 bld.setExtension('u', "co-stroke");
1110 Verify(bld, "zh-u-co-stroke",
1111 "setExtension('u', \"co-stroke\") got Error: %s\n");
1112
1113 bld.setExtension('U', "ca-islamic");
1114 Verify(bld, "zh-u-ca-islamic",
1115 "setExtension('U', \"zh-u-ca-islamic\") got Error: %s\n");
1116
1117 bld.setExtension('u', "ca-chinese");
1118 Verify(bld, "zh-u-ca-chinese",
1119 "setExtension('u', \"ca-chinese\") got Error: %s\n");
1120
1121 bld.setExtension('U', "co-pinyin");
1122 Verify(bld, "zh-u-co-pinyin",
1123 "setExtension('U', \"co-pinyin\") got Error: %s\n");
1124
1125 bld.setRegion("TW");
1126 Verify(bld, "zh-TW-u-co-pinyin",
1127 "setRegion(\"TW\") got Error: %s\n");
1128
1129 bld.setExtension('U', "");
1130 Verify(bld, "zh-TW",
1131 "setExtension('U', \"\") got Error: %s\n");
1132
1133 bld.setExtension('u', "abc-defg-kr-face");
1134 Verify(bld, "zh-TW-u-abc-defg-kr-face",
1135 "setExtension('u', \"abc-defg-kr-face\") got Error: %s\n");
1136
1137 bld.setExtension('U', "ca-japanese");
1138 Verify(bld, "zh-TW-u-ca-japanese",
1139 "setExtension('U', \"ca-japanese\") got Error: %s\n");
1140
1141}
1142
1143void LocaleBuilderTest::TestSetExtensionValidateUWellFormed() {
1144 static const char* wellFormedExtensions[] = {
1145 // keyword
1146 // keyword = key (sep type)? ;
1147 // key = alphanum alpha ;
1148 // type = alphanum{3,8} (sep alphanum{3,8})* ;
1149 "3A",
1150 "ZA",
1151 "az-abc",
1152 "zz-123",
1153 "7z-12345678",
1154 "kb-A234567Z",
1155 // (sep keyword)+
1156 "1z-ZZ",
1157 "2z-ZZ-123",
1158 "3z-ZZ-123-cd",
1159 "0z-ZZ-123-cd-efghijkl",
1160 // attribute
1161 "abc",
1162 "456",
1163 "87654321",
1164 "ZABADFSD",
1165 // (sep attribute)+
1166 "abc-ZABADFSD",
1167 "123-ZABADFSD",
1168 "K2K-12345678",
1169 "K2K-12345678-zzz",
1170 // (sep attribute)+ (sep keyword)*
1171 "K2K-12345678-zz",
1172 "K2K-12345678-zz-0z",
1173 "K2K-12345678-9z-AZ-abc",
1174 "K2K-12345678-zz-9A-234",
1175 "K2K-12345678-zk0-abc-efg-zz-9k-234",
1176 };
1177 for (const char* extension : wellFormedExtensions) {
1178 UErrorCode status = U_ZERO_ERROR;
1179 LocaleBuilder bld;
1180 bld.setExtension('u', extension);
1181 Locale loc = bld.build(status);
1182 if (U_FAILURE(status)) {
1183 errln("setExtension('u', \"%s\") got Error: %s\n",
1184 extension, u_errorName(status));
1185 }
1186 };
1187}
1188
1189void LocaleBuilderTest::TestSetExtensionValidateUIllFormed() {
1190 static const char* illFormed[] = {
1191 // bad key
1192 "-",
1193 "-ab",
1194 "ab-",
1195 "abc-",
1196 "-abc",
1197 "0",
1198 "a",
1199 "A0",
1200 "z9",
1201 "09",
1202 "90",
1203 // bad keyword
1204 "AB-A0",
1205 "AB-efg-A0",
1206 "xy-123456789",
1207 "AB-Aa-",
1208 "AB-Aac-",
1209 // bad attribute
1210 "abcdefghi",
1211 "abcdefgh-",
1212 "abcdefgh-abcdefghi",
1213 "abcdefgh-1",
1214 "abcdefgh-a",
1215 "abcdefgh-a2345678z",
1216 };
1217 for (const char* ill : illFormed) {
1218 UErrorCode status = U_ZERO_ERROR;
1219 LocaleBuilder bld;
1220 bld.setExtension('u', ill);
1221 Locale loc = bld.build(status);
1222 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1223 errln("setExtension('u', \"%s\") should fail but has no Error\n",
1224 ill);
1225 }
1226 }
1227}
1228
1229void LocaleBuilderTest::TestSetExtensionT() {
1230 LocaleBuilder bld;
1231 bld.setLanguage("fr");
1232 Verify(bld, "fr",
1233 "setLanguage(\"fr\") got Error: %s\n");
1234
1235 bld.setExtension('T', "zh");
1236 Verify(bld, "fr-t-zh",
1237 "setExtension('T', \"zh\") got Error: %s\n");
1238
1239 bld.setExtension('t', "zh-Hant-TW-1234-A9-123-456ABCDE");
1240 Verify(bld, "fr-t-zh-hant-tw-1234-a9-123-456abcde",
1241 "setExtension('t', \"zh-Hant-TW-1234-A9-123-456ABCDE\") got Error: %s\n");
1242
1243 bld.setExtension('T', "a9-123");
1244 Verify(bld, "fr-t-a9-123",
1245 "setExtension('T', \"a9-123\") got Error: %s\n");
1246
1247 bld.setRegion("MX");
1248 Verify(bld, "fr-MX-t-a9-123",
1249 "setRegion(\"MX\") got Error: %s\n");
1250
1251 bld.setScript("Hans");
1252 Verify(bld, "fr-Hans-MX-t-a9-123",
1253 "setScript(\"Hans\") got Error: %s\n");
1254
1255 bld.setVariant("9abc-abcde");
1256 Verify(bld, "fr-Hans-MX-9abc-abcde-t-a9-123",
1257 "setVariant(\"9abc-abcde\") got Error: %s\n");
1258
1259 bld.setExtension('T', "");
1260 Verify(bld, "fr-Hans-MX-9abc-abcde",
1261 "bld.setExtension('T', \"\") got Error: %s\n");
1262}
1263
1264void LocaleBuilderTest::TestSetExtensionValidateTWellFormed() {
1265 // ((sep tlang (sep tfield)*) | (sep tfield)+)
1266 static const char* wellFormedExtensions[] = {
1267 // tlang
1268 // tlang = unicode_language_subtag (sep unicode_script_subtag)?
1269 // (sep unicode_region_subtag)? (sep unicode_variant_subtag)* ;
1270 // unicode_language_subtag
1271 "en",
1272 "abc",
1273 "abcde",
1274 "ABCDEFGH",
1275 // unicode_language_subtag sep unicode_script_subtag
1276 "en-latn",
1277 "abc-arab",
1278 "ABCDEFGH-Thai",
1279 // unicode_language_subtag sep unicode_script_subtag sep unicode_region_subtag
1280 "en-latn-ME",
1281 "abc-arab-RU",
1282 "ABCDEFGH-Thai-TH",
1283 "en-latn-409",
1284 "abc-arab-123",
1285 "ABCDEFGH-Thai-456",
1286 // unicode_language_subtag sep unicode_region_subtag
1287 "en-ME",
1288 "abc-RU",
1289 "ABCDEFGH-TH",
1290 "en-409",
1291 "abc-123",
1292 "ABCDEFGH-456",
1293 // unicode_language_subtag sep unicode_script_subtag sep unicode_region_subtag
1294 // sep (sep unicode_variant_subtag)*
1295 "en-latn-ME-abcde",
1296 "abc-arab-RU-3abc-abcdef",
1297 "ABCDEFGH-Thai-TH-ADSFS-9xyz-abcdef",
1298 "en-latn-409-xafsa",
1299 "abc-arab-123-ADASDF",
1300 "ABCDEFGH-Thai-456-9sdf-ADASFAS",
1301 // (sep tfield)+
1302 "A0-abcde",
1303 "z9-abcde123",
1304 "z9-abcde123-a1-abcde",
1305 // tlang (sep tfield)*
1306 "fr-A0-abcde",
1307 "fr-FR-A0-abcde",
1308 "fr-123-z9-abcde123-a1-abcde",
1309 "fr-Latn-FR-z9-abcde123-a1-abcde",
1310 "gab-Thai-TH-abcde-z9-abcde123-a1-abcde",
1311 "gab-Thai-TH-0bde-z9-abcde123-a1-abcde",
1312 };
1313 for (const char* extension : wellFormedExtensions) {
1314 UErrorCode status = U_ZERO_ERROR;
1315 LocaleBuilder bld;
1316 bld.setExtension('t', extension);
1317 Locale loc = bld.build(status);
1318 if (U_FAILURE(status)) {
1319 errln("setExtension('t', \"%s\") got Error: %s\n",
1320 extension, u_errorName(status));
1321 }
1322 };
1323}
1324
1325void LocaleBuilderTest::TestSetExtensionValidateTIllFormed() {
1326 static const char* illFormed[] = {
1327 "a",
1328 "a-",
1329 "0",
1330 "9-",
1331 "-9",
1332 "-z",
1333 // "Latn", // Per 2019-01-23 ICUTC, still accept 4alpha. See ICU-20321
1334 "Latn-",
1335 "en-",
1336 "nob-",
1337 "-z9",
1338 "a3",
1339 "a3-",
1340 "3a",
1341 "0z-",
1342 "en-123-a1",
1343 "en-TH-a1",
1344 "gab-TH-a1",
1345 "gab-Thai-a1",
1346 "gab-Thai-TH-a1",
1347 "gab-Thai-TH-0bde-a1",
1348 "gab-Thai-TH-0bde-3b",
1349 "gab-Thai-TH-0bde-z9-a1",
1350 "gab-Thai-TH-0bde-z9-3b",
1351 "gab-Thai-TH-0bde-z9-abcde123-3b",
1352 "gab-Thai-TH-0bde-z9-abcde123-ab",
1353 "gab-Thai-TH-0bde-z9-abcde123-ab",
1354 "gab-Thai-TH-0bde-z9-abcde123-a1",
1355 "gab-Thai-TH-0bde-z9-abcde123-a1-",
1356 "gab-Thai-TH-0bde-z9-abcde123-a1-a",
1357 "gab-Thai-TH-0bde-z9-abcde123-a1-ab",
1358 };
1359 for (const char* ill : illFormed) {
1360 UErrorCode status = U_ZERO_ERROR;
1361 LocaleBuilder bld;
1362 bld.setExtension('t', ill);
1363 Locale loc = bld.build(status);
1364 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1365 errln("setExtension('t', \"%s\") should fail but has no Error\n",
1366 ill);
1367 }
1368 }
1369}
1370
1371void LocaleBuilderTest::TestSetExtensionPU() {
1372 LocaleBuilder bld;
1373 bld.setLanguage("ar");
1374 Verify(bld, "ar",
1375 "setLanguage(\"ar\") got Error: %s\n");
1376
1377 bld.setExtension('X', "a-b-c-d-e");
1378 Verify(bld, "ar-x-a-b-c-d-e",
1379 "setExtension('X', \"a-b-c-d-e\") got Error: %s\n");
1380
1381 bld.setExtension('x', "0-1-2-3");
1382 Verify(bld, "ar-x-0-1-2-3",
1383 "setExtension('x', \"0-1-2-3\") got Error: %s\n");
1384
1385 bld.setExtension('X', "0-12345678-x-x");
1386 Verify(bld, "ar-x-0-12345678-x-x",
1387 "setExtension('x', \"ar-x-0-12345678-x-x\") got Error: %s\n");
1388
1389 bld.setRegion("TH");
1390 Verify(bld, "ar-TH-x-0-12345678-x-x",
1391 "setRegion(\"TH\") got Error: %s\n");
1392
1393 bld.setExtension('X', "");
1394 Verify(bld, "ar-TH",
1395 "setExtension(\"X\") got Error: %s\n");
1396}
1397
1398void LocaleBuilderTest::TestSetExtensionValidatePUWellFormed() {
1399 // ((sep tlang (sep tfield)*) | (sep tfield)+)
1400 static const char* wellFormedExtensions[] = {
1401 "a", // Short subtag
1402 "z", // Short subtag
1403 "0", // Short subtag, digit
1404 "9", // Short subtag, digit
1405 "a-0", // Two short subtag, alpha and digit
1406 "9-z", // Two short subtag, digit and alpha
1407 "ab",
1408 "abc",
1409 "abcefghi", // Long subtag
1410 "87654321",
1411 "01",
1412 "234",
1413 "0a-ab-87654321", // Three subtags
1414 "87654321-ab-00-3A", // Four subtabs
1415 "a-9-87654321", // Three subtags with short and long subtags
1416 "87654321-ab-0-3A",
1417 };
1418 for (const char* extension : wellFormedExtensions) {
1419 UErrorCode status = U_ZERO_ERROR;
1420 LocaleBuilder bld;
1421 bld.setExtension('x', extension);
1422 Locale loc = bld.build(status);
1423 if (U_FAILURE(status)) {
1424 errln("setExtension('x', \"%s\") got Error: %s\n",
1425 extension, u_errorName(status));
1426 }
1427 };
1428}
1429
1430void LocaleBuilderTest::TestSetExtensionValidatePUIllFormed() {
1431 static const char* illFormed[] = {
1432 "123456789", // Too long
1433 "abcdefghi", // Too long
1434 "ab-123456789", // Second subtag too long
1435 "abcdefghi-12", // First subtag too long
1436 "a-ab-987654321", // Third subtag too long
1437 "987654321-a-0-3", // First subtag too long
1438 };
1439 for (const char* ill : illFormed) {
1440 UErrorCode status = U_ZERO_ERROR;
1441 LocaleBuilder bld;
1442 bld.setExtension('x', ill);
1443 Locale loc = bld.build(status);
1444 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1445 errln("setExtension('x', \"%s\") should fail but has no Error\n",
1446 ill);
1447 }
1448 }
1449}
1450
1451void LocaleBuilderTest::TestSetExtensionOthers() {
1452 LocaleBuilder bld;
1453 bld.setLanguage("fr");
1454 Verify(bld, "fr",
1455 "setLanguage(\"fr\") got Error: %s\n");
1456
1457 bld.setExtension('Z', "ab");
1458 Verify(bld, "fr-z-ab",
1459 "setExtension('Z', \"ab\") got Error: %s\n");
1460
1461 bld.setExtension('0', "xyz12345-abcdefg");
1462 Verify(bld, "fr-0-xyz12345-abcdefg-z-ab",
1463 "setExtension('0', \"xyz12345-abcdefg\") got Error: %s\n");
1464
1465 bld.setExtension('a', "01-12345678-ABcdef");
1466 Verify(bld, "fr-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
1467 "setExtension('a', \"01-12345678-ABcdef\") got Error: %s\n");
1468
1469 bld.setRegion("TH");
1470 Verify(bld, "fr-TH-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
1471 "setRegion(\"TH\") got Error: %s\n");
1472
1473 bld.setScript("Arab");
1474 Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
1475 "setRegion(\"Arab\") got Error: %s\n");
1476
1477 bld.setExtension('A', "97");
1478 Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-a-97-z-ab",
1479 "setExtension('a', \"97\") got Error: %s\n");
1480
1481 bld.setExtension('a', "");
1482 Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-z-ab",
1483 "setExtension('a', \"\") got Error: %s\n");
1484
1485 bld.setExtension('0', "");
1486 Verify(bld, "fr-Arab-TH-z-ab",
1487 "setExtension('0', \"\") got Error: %s\n");
1488}
1489
1490void LocaleBuilderTest::TestSetExtensionValidateOthersWellFormed() {
1491 static const char* wellFormedExtensions[] = {
1492 "ab",
1493 "abc",
1494 "abcefghi",
1495 "01",
1496 "234",
1497 "87654321",
1498 "0a-ab-87654321",
1499 "87654321-ab-00-3A",
1500 };
1501
1502 const char * aToZ = "abcdefghijklmnopqrstuvwxyz";
1503 const int32_t aToZLen = static_cast<int32_t>(uprv_strlen(aToZ));
1504 int32_t i = 0;
1505 for (const char* extension : wellFormedExtensions) {
1506 char ch = aToZ[i];
1507 i = (i + 1) % aToZLen;
1508 UErrorCode status = U_ZERO_ERROR;
1509 LocaleBuilder bld;
1510 bld.setExtension(ch, extension);
1511 Locale loc = bld.build(status);
1512 if (U_FAILURE(status)) {
1513 errln("setExtension('%c', \"%s\") got Error: %s\n",
1514 ch, extension, u_errorName(status));
1515 }
1516 };
1517
1518 const char* someChars =
1519 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789`~!@#$%^&*()-_=+;:,.<>?";
1520 const int32_t someCharsLen = static_cast<int32_t>(uprv_strlen(someChars));
1521 for (int32_t i = 0; i < someCharsLen; i++) {
1522 char ch = someChars[i];
1523 UErrorCode status = U_ZERO_ERROR;
1524 LocaleBuilder bld;
1525 bld.setExtension(ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)]);
1526 Locale loc = bld.build(status);
1527 if (uprv_isASCIILetter(ch) || ('0' <= ch && ch <= '9')) {
1528 if (ch != 't' && ch != 'T' && ch != 'u' && ch != 'U' && ch != 'x' && ch != 'X') {
1529 if (U_FAILURE(status)) {
1530 errln("setExtension('%c', \"%s\") got Error: %s\n",
1531 ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)], u_errorName(status));
1532 }
1533 }
1534 } else {
1535 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1536 errln("setExtension('%c', \"%s\") should fail but has no Error\n",
1537 ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)]);
1538 }
1539 }
1540
1541 }
1542}
1543
1544void LocaleBuilderTest::TestSetExtensionValidateOthersIllFormed() {
1545 static const char* illFormed[] = {
1546 "0", // Too short
1547 "a", // Too short
1548 "123456789", // Too long
1549 "abcdefghi", // Too long
1550 "ab-123456789", // Second subtag too long
1551 "abcdefghi-12", // First subtag too long
1552 "a-ab-87654321", // Third subtag too long
1553 "87654321-a-0-3", // First subtag too long
1554 };
1555 const char * aToZ = "abcdefghijklmnopqrstuvwxyz";
1556 const int32_t aToZLen = static_cast<int32_t>(uprv_strlen(aToZ));
1557 int32_t i = 0;
1558 for (const char* ill : illFormed) {
1559 char ch = aToZ[i];
1560 i = (i + 1) % aToZLen;
1561 UErrorCode status = U_ZERO_ERROR;
1562 LocaleBuilder bld;
1563 bld.setExtension(ch, ill);
1564 Locale loc = bld.build(status);
1565 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1566 errln("setExtension('%c', \"%s\") should fail but has no Error\n",
1567 ch, ill);
1568 }
1569 }
1570}
1571
1572void LocaleBuilderTest::TestSetLocale() {
1573 LocaleBuilder bld1, bld2;
1574 UErrorCode status = U_ZERO_ERROR;
1575 Locale l1 = bld1.setLanguage("en")
1576 .setScript("Latn")
1577 .setRegion("MX")
1578 .setVariant("3456-abcde")
1579 .addUnicodeLocaleAttribute("456")
1580 .addUnicodeLocaleAttribute("123")
1581 .setUnicodeLocaleKeyword("nu", "thai")
1582 .setUnicodeLocaleKeyword("co", "stroke")
1583 .setUnicodeLocaleKeyword("ca", "chinese")
1584 .build(status);
1585 if (U_FAILURE(status) || l1.isBogus()) {
1586 errln("build got Error: %s\n", u_errorName(status));
1587 }
1588 status = U_ZERO_ERROR;
1589 Locale l2 = bld1.setLocale(l1).build(status);
1590 if (U_FAILURE(status) || l2.isBogus()) {
1591 errln("build got Error: %s\n", u_errorName(status));
1592 }
1593
1594 if (l1 != l2) {
1595 errln("Two locales should be the same, but one is '%s' and the other is '%s'",
1596 l1.getName(), l2.getName());
1597 }
1598}
1599
1600void LocaleBuilderTest::TestPosixCases() {
1601 UErrorCode status = U_ZERO_ERROR;
1602 Locale l1 = Locale::forLanguageTag("en-US-u-va-posix", status);
1603 if (U_FAILURE(status) || l1.isBogus()) {
1604 errln("build got Error: %s\n", u_errorName(status));
1605 }
1606 LocaleBuilder bld;
1607 bld.setLanguage("en")
1608 .setRegion("MX")
1609 .setScript("Arab")
1610 .setUnicodeLocaleKeyword("nu", "Thai")
1611 .setExtension('x', "1");
1612 // All of above should be cleared by the setLocale call.
1613 Locale l2 = bld.setLocale(l1).build(status);
1614 if (U_FAILURE(status) || l2.isBogus()) {
1615 errln("build got Error: %s\n", u_errorName(status));
1616 }
1617 if (l1 != l2) {
1618 errln("The result locale should be the set as the setLocale %s but got %s\n",
1619 l1.toLanguageTag<std::string>(status).c_str(),
1620 l2.toLanguageTag<std::string>(status).c_str());
1621 }
1622 Locale posix("en-US-POSIX");
1623 if (posix != l2) {
1624 errln("The result locale should be the set as %s but got %s\n",
1625 posix.getName(), l2.getName());
1626 }
1627}