1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * Copyright (c) 1997-2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 ********************************************************************/
9 #include "unicode/utypes.h"
10 #include "unicode/uscript.h"
11 #include "unicode/uchar.h"
16 static void scriptsToString(const UScriptCode scripts
[], int32_t length
, char s
[]) {
19 strcpy(s
, "(no scripts)");
23 for(i
= 0; i
< length
; ++i
) {
27 strcat(s
, uscript_getShortName(scripts
[i
]));
31 static void assertEqualScripts(const char *msg
,
32 const UScriptCode scripts1
[], int32_t length1
,
33 const UScriptCode scripts2
[], int32_t length2
,
34 UErrorCode errorCode
) {
37 if(U_FAILURE(errorCode
)) {
38 log_err("Failed: %s - %s\n", msg
, u_errorName(errorCode
));
41 scriptsToString(scripts1
, length1
, s1
);
42 scriptsToString(scripts2
, length2
, s2
);
43 if(0!=strcmp(s1
, s2
)) {
44 log_data_err("Failed: %s: expected %s but got %s\n", msg
, s1
, s2
);
48 void TestUScriptCodeAPI(){
52 const char* testNames
[]={
54 "en", "en_US", "sr", "ta" , "te_IN",
57 "Hani", "Hang","Hebr","Hira",
58 "Knda","Kana","Khmr","Lao",
59 "Latn",/*"Latf","Latg",*/
63 "CYRILLIC","DESERET","DEVANAGARI","ETHIOPIC","GEORGIAN",
64 "GOTHIC", "GREEK", "GUJARATI", "COMMON", "INHERITED",
65 /* test lower case names */
66 "malayalam", "mongolian", "myanmar", "ogham", "old-italic",
67 "oriya", "runic", "sinhala", "syriac","tamil",
68 "telugu", "thaana", "thai", "tibetan",
72 "asfdasd", "5464", "12235",
73 /* test the last index */
77 UScriptCode expected
[] ={
78 /* locales should return */
79 USCRIPT_LATIN
, USCRIPT_LATIN
, USCRIPT_CYRILLIC
, USCRIPT_TAMIL
, USCRIPT_TELUGU
,
80 USCRIPT_DEVANAGARI
, USCRIPT_HEBREW
, USCRIPT_ARABIC
,
81 /* abbr should return */
82 USCRIPT_HAN
, USCRIPT_HANGUL
, USCRIPT_HEBREW
, USCRIPT_HIRAGANA
,
83 USCRIPT_KANNADA
, USCRIPT_KATAKANA
, USCRIPT_KHMER
, USCRIPT_LAO
,
84 USCRIPT_LATIN
,/* USCRIPT_LATIN, USCRIPT_LATIN,*/
85 USCRIPT_MALAYALAM
, USCRIPT_MONGOLIAN
,
86 /* names should return */
87 USCRIPT_CYRILLIC
, USCRIPT_DESERET
, USCRIPT_DEVANAGARI
, USCRIPT_ETHIOPIC
, USCRIPT_GEORGIAN
,
88 USCRIPT_GOTHIC
, USCRIPT_GREEK
, USCRIPT_GUJARATI
, USCRIPT_COMMON
, USCRIPT_INHERITED
,
89 /* lower case names should return */
90 USCRIPT_MALAYALAM
, USCRIPT_MONGOLIAN
, USCRIPT_MYANMAR
, USCRIPT_OGHAM
, USCRIPT_OLD_ITALIC
,
91 USCRIPT_ORIYA
, USCRIPT_RUNIC
, USCRIPT_SINHALA
, USCRIPT_SYRIAC
, USCRIPT_TAMIL
,
92 USCRIPT_TELUGU
, USCRIPT_THAANA
, USCRIPT_THAI
, USCRIPT_TIBETAN
,
94 USCRIPT_TAGBANWA
, USCRIPT_ARABIC
,
95 /* bogus names should return invalid code */
96 USCRIPT_INVALID_CODE
, USCRIPT_INVALID_CODE
, USCRIPT_INVALID_CODE
,
97 USCRIPT_COMMON
, USCRIPT_YI
,
100 UErrorCode err
= U_ZERO_ERROR
;
102 const int32_t capacity
= 10;
104 for( ; testNames
[i
]!=NULL
; i
++){
105 UScriptCode script
[10]={USCRIPT_INVALID_CODE
};
106 uscript_getCode(testNames
[i
],script
,capacity
, &err
);
107 if( script
[0] != expected
[i
]){
108 log_data_err("Error getting script code Got: %i Expected: %i for name %s (Error code does not propagate if data is not present. Are you missing data?)\n",
109 script
[0],expected
[i
],testNames
[i
]);
114 log_data_err("Errors uchar_getScriptCode() : %i \n",numErrors
);
119 UErrorCode err
= U_ZERO_ERROR
;
122 UScriptCode jaCode
[]={USCRIPT_KATAKANA
, USCRIPT_HIRAGANA
, USCRIPT_HAN
};
123 UScriptCode script
[10]={USCRIPT_INVALID_CODE
};
124 int32_t num
= uscript_getCode("ja",script
,capacity
, &err
);
126 if(err
==U_BUFFER_OVERFLOW_ERROR
){
129 num
= uscript_getCode("ja",script
,capacity
, &err
);
130 if(num
!=UPRV_LENGTHOF(jaCode
)){
131 log_err("Errors uscript_getScriptCode() for Japanese locale: num=%d, expected %d \n",
132 num
, UPRV_LENGTHOF(jaCode
));
134 for(j
=0;j
<UPRV_LENGTHOF(jaCode
);j
++) {
135 if(script
[j
]!=jaCode
[j
]) {
136 log_err("Japanese locale: code #%d was %d (%s) but expected %d (%s)\n", j
,
137 script
[j
], uscript_getName(script
[j
]),
138 jaCode
[j
], uscript_getName(jaCode
[j
]));
143 log_data_err("Errors in uscript_getScriptCode() expected error : %s got: %s \n",
144 "U_BUFFER_OVERFLOW_ERROR",
150 static const UScriptCode LATIN
[1] = { USCRIPT_LATIN
};
151 static const UScriptCode CYRILLIC
[1] = { USCRIPT_CYRILLIC
};
152 static const UScriptCode DEVANAGARI
[1] = { USCRIPT_DEVANAGARI
};
153 static const UScriptCode HAN
[1] = { USCRIPT_HAN
};
154 static const UScriptCode JAPANESE
[3] = { USCRIPT_KATAKANA
, USCRIPT_HIRAGANA
, USCRIPT_HAN
};
155 static const UScriptCode KOREAN
[2] = { USCRIPT_HANGUL
, USCRIPT_HAN
};
156 static const UScriptCode HAN_BOPO
[2] = { USCRIPT_HAN
, USCRIPT_BOPOMOFO
};
157 UScriptCode scripts
[5];
161 // Should work regardless of whether we have locale data for the language.
163 num
= uscript_getCode("tg", scripts
, UPRV_LENGTHOF(scripts
), &err
);
164 assertEqualScripts("tg script: Cyrl", CYRILLIC
, 1, scripts
, num
, err
); // Tajik
166 num
= uscript_getCode("xsr", scripts
, UPRV_LENGTHOF(scripts
), &err
);
167 assertEqualScripts("xsr script: Deva", DEVANAGARI
, 1, scripts
, num
, err
); // Sherpa
169 // Multi-script languages.
171 num
= uscript_getCode("ja", scripts
, UPRV_LENGTHOF(scripts
), &err
);
172 assertEqualScripts("ja scripts: Kana Hira Hani",
173 JAPANESE
, UPRV_LENGTHOF(JAPANESE
), scripts
, num
, err
);
175 num
= uscript_getCode("ko", scripts
, UPRV_LENGTHOF(scripts
), &err
);
176 assertEqualScripts("ko scripts: Hang Hani",
177 KOREAN
, UPRV_LENGTHOF(KOREAN
), scripts
, num
, err
);
179 num
= uscript_getCode("zh", scripts
, UPRV_LENGTHOF(scripts
), &err
);
180 assertEqualScripts("zh script: Hani", HAN
, 1, scripts
, num
, err
);
182 num
= uscript_getCode("zh-Hant", scripts
, UPRV_LENGTHOF(scripts
), &err
);
183 assertEqualScripts("zh-Hant scripts: Hani Bopo", HAN_BOPO
, 2, scripts
, num
, err
);
185 num
= uscript_getCode("zh-TW", scripts
, UPRV_LENGTHOF(scripts
), &err
);
186 assertEqualScripts("zh-TW scripts: Hani Bopo", HAN_BOPO
, 2, scripts
, num
, err
);
188 // Ambiguous API, but this probably wants to return Latin rather than Rongorongo (Roro).
190 num
= uscript_getCode("ro-RO", scripts
, UPRV_LENGTHOF(scripts
), &err
);
191 assertEqualScripts("ro-RO script: Latn", LATIN
, 1, scripts
, num
, err
);
195 UScriptCode testAbbr
[]={
196 /* names should return */
197 USCRIPT_CYRILLIC
, USCRIPT_DESERET
, USCRIPT_DEVANAGARI
, USCRIPT_ETHIOPIC
, USCRIPT_GEORGIAN
,
198 USCRIPT_GOTHIC
, USCRIPT_GREEK
, USCRIPT_GUJARATI
,
201 const char* expectedNames
[]={
204 "Cyrillic","Deseret","Devanagari","Ethiopic","Georgian",
205 "Gothic", "Greek", "Gujarati",
209 while(i
<UPRV_LENGTHOF(testAbbr
)){
210 const char* name
= uscript_getName(testAbbr
[i
]);
212 log_data_err("Couldn't get script name\n");
216 if(strcmp(expectedNames
[i
],name
)!=0){
217 log_err("Error getting abbreviations Got: %s Expected: %s\n",name
,expectedNames
[i
]);
222 log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors
);
231 UScriptCode testAbbr
[]={
232 /* abbr should return */
233 USCRIPT_HAN
, USCRIPT_HANGUL
, USCRIPT_HEBREW
, USCRIPT_HIRAGANA
,
234 USCRIPT_KANNADA
, USCRIPT_KATAKANA
, USCRIPT_KHMER
, USCRIPT_LAO
,
236 USCRIPT_MALAYALAM
, USCRIPT_MONGOLIAN
,
239 const char* expectedAbbr
[]={
241 "Hani", "Hang","Hebr","Hira",
242 "Knda","Kana","Khmr","Laoo",
248 while(i
<UPRV_LENGTHOF(testAbbr
)){
249 const char* name
= uscript_getShortName(testAbbr
[i
]);
251 if(strcmp(expectedAbbr
[i
],name
)!=0){
252 log_err("Error getting abbreviations Got: %s Expected: %s\n",name
,expectedAbbr
[i
]);
257 log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors
);
264 /* now test uscript_getScript() API */
266 uint32_t codepoints
[] = {
267 0x0000FF9D, /* USCRIPT_KATAKANA*/
268 0x0000FFBE, /* USCRIPT_HANGUL*/
269 0x0000FFC7, /* USCRIPT_HANGUL*/
270 0x0000FFCF, /* USCRIPT_HANGUL*/
271 0x0000FFD7, /* USCRIPT_HANGUL*/
272 0x0000FFDC, /* USCRIPT_HANGUL*/
273 0x00010300, /* USCRIPT_OLD_ITALIC*/
274 0x00010330, /* USCRIPT_GOTHIC*/
275 0x0001034A, /* USCRIPT_GOTHIC*/
276 0x00010400, /* USCRIPT_DESERET*/
277 0x00010428, /* USCRIPT_DESERET*/
278 0x0001D167, /* USCRIPT_INHERITED*/
279 0x0001D17B, /* USCRIPT_INHERITED*/
280 0x0001D185, /* USCRIPT_INHERITED*/
281 0x0001D1AA, /* USCRIPT_INHERITED*/
282 0x00020000, /* USCRIPT_HAN*/
283 0x00000D02, /* USCRIPT_MALAYALAM*/
284 0x00050005, /* USCRIPT_UNKNOWN (new Zzzz value in Unicode 5.0) */
285 0x00000000, /* USCRIPT_COMMON*/
286 0x0001D169, /* USCRIPT_INHERITED*/
287 0x0001D182, /* USCRIPT_INHERITED*/
288 0x0001D18B, /* USCRIPT_INHERITED*/
289 0x0001D1AD, /* USCRIPT_INHERITED*/
292 UScriptCode expected
[] = {
317 UScriptCode code
= USCRIPT_INVALID_CODE
;
318 UErrorCode status
= U_ZERO_ERROR
;
321 for(i
=0; i
<UPRV_LENGTHOF(codepoints
); ++i
){
322 code
= uscript_getScript(codepoints
[i
],&status
);
323 if(U_SUCCESS(status
)){
324 if( code
!= expected
[i
] ||
325 code
!= (UScriptCode
)u_getIntPropertyValue(codepoints
[i
], UCHAR_SCRIPT
)
327 log_err("uscript_getScript for codepoint \\U%08X failed\n",codepoints
[i
]);
331 log_err("uscript_getScript for codepoint \\U%08X failed. Error: %s\n",
332 codepoints
[i
],u_errorName(status
));
338 log_err("uscript_getScript failed.\n");
342 UScriptCode code
= USCRIPT_INVALID_CODE
;
343 UErrorCode status
= U_ZERO_ERROR
;
344 code
= uscript_getScript(0x001D169,&status
);
345 if(code
!= USCRIPT_INHERITED
){
346 log_err("\\U001D169 is not contained in USCRIPT_INHERITED");
350 UScriptCode code
= USCRIPT_INVALID_CODE
;
351 UErrorCode status
= U_ZERO_ERROR
;
354 for(i
= 0; i
<=0x10ffff; i
++){
355 code
= uscript_getScript(i
,&status
);
356 if(code
== USCRIPT_INVALID_CODE
){
358 log_err("uscript_getScript for codepoint \\U%08X failed.\n", i
);
362 log_err("uscript_getScript failed for %d codepoints\n", err
);
366 for(i
=0; (UScriptCode
)i
< USCRIPT_CODE_LIMIT
; i
++){
367 const char* name
= uscript_getName((UScriptCode
)i
);
368 if(name
==NULL
|| strcmp(name
,"")==0){
369 log_err("uscript_getName failed for code %i: name is NULL or \"\"\n",i
);
376 * These script codes were originally added to ICU pre-3.6, so that ICU would
377 * have all ISO 15924 script codes. ICU was then based on Unicode 4.1.
378 * These script codes were added with only short names because we don't
379 * want to invent long names ourselves.
380 * Unicode 5 and later encode some of these scripts and give them long names.
381 * Whenever this happens, the long script names here need to be updated.
383 static const char* expectedLong
[] = {
384 "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs",
385 "Egyd", "Egyh", "Egyptian_Hieroglyphs",
386 "Geok", "Hans", "Hant", "Pahawh_Hmong", "Old_Hungarian", "Inds",
387 "Javanese", "Kayah_Li", "Latf", "Latg",
388 "Lepcha", "Linear_A", "Mandaic", "Maya", "Meroitic_Hieroglyphs",
389 "Nko", "Old_Turkic", "Old_Permic", "Phags_Pa", "Phoenician",
390 "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
392 "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "SignWriting", "Sundanese",
393 "Moon", "Meetei_Mayek",
395 "Imperial_Aramaic", "Avestan", "Chakma", "Kore",
396 "Kaithi", "Manichaean", "Inscriptional_Pahlavi", "Psalter_Pahlavi", "Phlv",
397 "Inscriptional_Parthian", "Samaritan", "Tai_Viet",
400 "Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
402 "Bassa_Vah", "Duployan", "Elbasan", "Grantha", "Kpel",
403 "Loma", "Mende_Kikakui", "Meroitic_Cursive",
404 "Old_North_Arabian", "Nabataean", "Palmyrene", "Khudawadi", "Warang_Citi",
406 "Afak", "Jurc", "Mro", "Nushu", "Sharada", "Sora_Sompeng", "Takri", "Tangut", "Wole",
408 "Anatolian_Hieroglyphs", "Khojki", "Tirhuta",
410 "Caucasian_Albanian", "Mahajani",
412 "Ahom", "Hatran", "Modi", "Multani", "Pau_Cin_Hau", "Siddham",
414 "Adlam", "Bhaiksuki", "Marchen", "Newa", "Osage", "Hanb", "Jamo", "Zsye",
416 "Masaram_Gondi", "Soyombo", "Zanabazar_Square"
418 static const char* expectedShort
[] = {
419 "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
420 "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg",
421 "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx",
422 "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux",
424 "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
427 "Armi", "Avst", "Cakm", "Kore",
428 "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt",
431 "Bamu", "Lisu", "Nkgb", "Sarb",
433 "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
434 "Narb", "Nbat", "Palm", "Sind", "Wara",
436 "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
438 "Hluw", "Khoj", "Tirh",
442 "Ahom", "Hatr", "Modi", "Mult", "Pauc", "Sidd",
444 "Adlm", "Bhks", "Marc", "Newa", "Osge", "Hanb", "Jamo", "Zsye",
446 "Gonm", "Soyo", "Zanb"
449 if(UPRV_LENGTHOF(expectedLong
)!=(USCRIPT_CODE_LIMIT
-USCRIPT_BALINESE
)) {
450 log_err("need to add new script codes in cucdapi.c!\n");
453 for(i
=USCRIPT_BALINESE
; (UScriptCode
)i
<USCRIPT_CODE_LIMIT
; i
++, j
++){
454 const char* name
= uscript_getName((UScriptCode
)i
);
455 if(name
==NULL
|| strcmp(name
,expectedLong
[j
])!=0){
456 log_err("uscript_getName failed for code %i: %s!=%s\n", i
, name
, expectedLong
[j
]);
458 name
= uscript_getShortName((UScriptCode
)i
);
459 if(name
==NULL
|| strcmp(name
,expectedShort
[j
])!=0){
460 log_err("uscript_getShortName failed for code %i: %s!=%s\n", i
, name
, expectedShort
[j
]);
463 for(i
=0; i
<UPRV_LENGTHOF(expectedLong
); i
++){
464 UScriptCode fillIn
[5] = {USCRIPT_INVALID_CODE
};
465 UErrorCode status
= U_ZERO_ERROR
;
467 len
= uscript_getCode(expectedShort
[i
], fillIn
, UPRV_LENGTHOF(fillIn
), &status
);
468 if(U_FAILURE(status
)){
469 log_err("uscript_getCode failed for script name %s. Error: %s\n",expectedShort
[i
], u_errorName(status
));
472 log_err("uscript_getCode did not return expected number of codes for script %s. EXPECTED: 1 GOT: %i\n", expectedShort
[i
], len
);
474 if(fillIn
[0]!= (UScriptCode
)(USCRIPT_BALINESE
+i
)){
475 log_err("uscript_getCode did not return expected code for script %s. EXPECTED: %i GOT: %i\n", expectedShort
[i
], (USCRIPT_BALINESE
+i
), fillIn
[0] );
481 /* test characters which have Script_Extensions */
482 UErrorCode errorCode
=U_ZERO_ERROR
;
484 USCRIPT_COMMON
==uscript_getScript(0x0640, &errorCode
) &&
485 USCRIPT_INHERITED
==uscript_getScript(0x0650, &errorCode
) &&
486 USCRIPT_ARABIC
==uscript_getScript(0xfdf2, &errorCode
)) ||
489 log_err("uscript_getScript(character with Script_Extensions) failed\n");
494 void TestHasScript() {
496 !uscript_hasScript(0x063f, USCRIPT_COMMON
) &&
497 uscript_hasScript(0x063f, USCRIPT_ARABIC
) && /* main Script value */
498 !uscript_hasScript(0x063f, USCRIPT_SYRIAC
) &&
499 !uscript_hasScript(0x063f, USCRIPT_THAANA
))
501 log_err("uscript_hasScript(U+063F, ...) is wrong\n");
504 !uscript_hasScript(0x0640, USCRIPT_COMMON
) && /* main Script value */
505 uscript_hasScript(0x0640, USCRIPT_ARABIC
) &&
506 uscript_hasScript(0x0640, USCRIPT_SYRIAC
) &&
507 !uscript_hasScript(0x0640, USCRIPT_THAANA
))
509 log_err("uscript_hasScript(U+0640, ...) is wrong\n");
512 !uscript_hasScript(0x0650, USCRIPT_INHERITED
) && /* main Script value */
513 uscript_hasScript(0x0650, USCRIPT_ARABIC
) &&
514 uscript_hasScript(0x0650, USCRIPT_SYRIAC
) &&
515 !uscript_hasScript(0x0650, USCRIPT_THAANA
))
517 log_err("uscript_hasScript(U+0650, ...) is wrong\n");
520 !uscript_hasScript(0x0660, USCRIPT_COMMON
) && /* main Script value */
521 uscript_hasScript(0x0660, USCRIPT_ARABIC
) &&
522 !uscript_hasScript(0x0660, USCRIPT_SYRIAC
) &&
523 uscript_hasScript(0x0660, USCRIPT_THAANA
))
525 log_err("uscript_hasScript(U+0660, ...) is wrong\n");
528 !uscript_hasScript(0xfdf2, USCRIPT_COMMON
) &&
529 uscript_hasScript(0xfdf2, USCRIPT_ARABIC
) && /* main Script value */
530 !uscript_hasScript(0xfdf2, USCRIPT_SYRIAC
) &&
531 uscript_hasScript(0xfdf2, USCRIPT_THAANA
))
533 log_err("uscript_hasScript(U+FDF2, ...) is wrong\n");
535 if(uscript_hasScript(0x0640, 0xaffe)) {
536 /* An unguarded implementation might go into an infinite loop. */
537 log_err("uscript_hasScript(U+0640, bogus 0xaffe) is wrong\n");
541 static UBool
scriptsContain(UScriptCode scripts
[], int32_t length
, UScriptCode script
) {
544 for(i
=0; i
<length
; ++i
) {
545 int32_t s
=scripts
[i
];
547 log_err("uscript_getScriptExtensions() not in sorted order: %d %d\n", (int)prev
, (int)s
);
549 if(s
==script
) { contain
=TRUE
; }
554 void TestGetScriptExtensions() {
555 UScriptCode scripts
[20];
557 UErrorCode errorCode
;
559 /* errors and overflows */
560 errorCode
=U_PARSE_ERROR
;
561 length
=uscript_getScriptExtensions(0x0640, scripts
, UPRV_LENGTHOF(scripts
), &errorCode
);
562 if(errorCode
!=U_PARSE_ERROR
) {
563 log_err("uscript_getScriptExtensions(U+0640, U_PARSE_ERROR) did not preserve the UErrorCode - %s\n",
564 u_errorName(errorCode
));
566 errorCode
=U_ZERO_ERROR
;
567 length
=uscript_getScriptExtensions(0x0640, NULL
, UPRV_LENGTHOF(scripts
), &errorCode
);
568 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
569 log_err("uscript_getScriptExtensions(U+0640, NULL) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
570 u_errorName(errorCode
));
572 errorCode
=U_ZERO_ERROR
;
573 length
=uscript_getScriptExtensions(0x0640, scripts
, -1, &errorCode
);
574 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
575 log_err("uscript_getScriptExtensions(U+0640, capacity<0) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
576 u_errorName(errorCode
));
578 errorCode
=U_ZERO_ERROR
;
579 length
=uscript_getScriptExtensions(0x0640, scripts
, 0, &errorCode
);
580 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
<3) {
581 log_err("uscript_getScriptExtensions(U+0640, capacity=0: pure preflighting)=%d < 3 - %s\n",
582 (int)length
, u_errorName(errorCode
));
584 errorCode
=U_ZERO_ERROR
;
585 length
=uscript_getScriptExtensions(0x0640, scripts
, 1, &errorCode
);
586 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
<3) {
587 log_err("uscript_getScriptExtensions(U+0640, capacity=1: preflighting)=%d < 3 - %s\n",
588 (int)length
, u_errorName(errorCode
));
590 /* U+063F has only a Script code, no Script_Extensions. */
591 errorCode
=U_ZERO_ERROR
;
592 length
=uscript_getScriptExtensions(0x063f, scripts
, 0, &errorCode
);
593 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=1) {
594 log_err("uscript_getScriptExtensions(U+063F, capacity=0)=%d != 1 - %s\n",
595 (int)length
, u_errorName(errorCode
));
598 /* invalid code points */
599 errorCode
=U_ZERO_ERROR
;
600 length
=uscript_getScriptExtensions(-1, scripts
, UPRV_LENGTHOF(scripts
), &errorCode
);
601 if(U_FAILURE(errorCode
) || length
!=1 || scripts
[0]!=USCRIPT_UNKNOWN
) {
602 log_err("uscript_getScriptExtensions(-1)=%d does not return {UNKNOWN} - %s\n",
603 (int)length
, u_errorName(errorCode
));
605 errorCode
=U_ZERO_ERROR
;
606 length
=uscript_getScriptExtensions(0x110000, scripts
, UPRV_LENGTHOF(scripts
), &errorCode
);
607 if(U_FAILURE(errorCode
) || length
!=1 || scripts
[0]!=USCRIPT_UNKNOWN
) {
608 log_err("uscript_getScriptExtensions(0x110000)=%d does not return {UNKNOWN} - %s\n",
609 (int)length
, u_errorName(errorCode
));
613 errorCode
=U_ZERO_ERROR
;
614 length
=uscript_getScriptExtensions(0x063f, scripts
, 1, &errorCode
);
615 if(U_FAILURE(errorCode
) || length
!=1 || scripts
[0]!=USCRIPT_ARABIC
) {
616 log_err("uscript_getScriptExtensions(U+063F, capacity=1)=%d does not return {ARABIC} - %s\n",
617 (int)length
, u_errorName(errorCode
));
619 errorCode
=U_ZERO_ERROR
;
620 length
=uscript_getScriptExtensions(0x0640, scripts
, UPRV_LENGTHOF(scripts
), &errorCode
);
621 if(U_FAILURE(errorCode
) || length
<3 ||
622 !scriptsContain(scripts
, length
, USCRIPT_ARABIC
) ||
623 !scriptsContain(scripts
, length
, USCRIPT_SYRIAC
) ||
624 !scriptsContain(scripts
, length
, USCRIPT_MANDAIC
)) {
625 log_err("uscript_getScriptExtensions(U+0640)=%d failed - %s\n",
626 (int)length
, u_errorName(errorCode
));
628 errorCode
=U_ZERO_ERROR
;
629 length
=uscript_getScriptExtensions(0xfdf2, scripts
, UPRV_LENGTHOF(scripts
), &errorCode
);
630 if(U_FAILURE(errorCode
) || length
!=2 || scripts
[0]!=USCRIPT_ARABIC
|| scripts
[1]!=USCRIPT_THAANA
) {
631 log_err("uscript_getScriptExtensions(U+FDF2)=%d failed - %s\n",
632 (int)length
, u_errorName(errorCode
));
634 errorCode
=U_ZERO_ERROR
;
635 length
=uscript_getScriptExtensions(0xff65, scripts
, UPRV_LENGTHOF(scripts
), &errorCode
);
636 if(U_FAILURE(errorCode
) || length
!=6 || scripts
[0]!=USCRIPT_BOPOMOFO
|| scripts
[5]!=USCRIPT_YI
) {
637 log_err("uscript_getScriptExtensions(U+FF65)=%d failed - %s\n",
638 (int)length
, u_errorName(errorCode
));
642 void TestScriptMetadataAPI() {
643 /* API & code coverage. More testing in intltest/ucdtest.cpp. */
644 UErrorCode errorCode
=U_ZERO_ERROR
;
647 if(uscript_getSampleString(USCRIPT_LATIN
, sample
, UPRV_LENGTHOF(sample
), &errorCode
)!=1 ||
648 U_FAILURE(errorCode
) ||
649 uscript_getScript(sample
[0], &errorCode
)!=USCRIPT_LATIN
||
651 log_err("uscript_getSampleString(Latn) failed - %s\n", u_errorName(errorCode
));
654 if(uscript_getSampleString(USCRIPT_LATIN
, sample
, 0, &errorCode
)!=1 ||
655 errorCode
!=U_BUFFER_OVERFLOW_ERROR
||
657 log_err("uscript_getSampleString(Latn, capacity=0) failed - %s\n", u_errorName(errorCode
));
659 errorCode
=U_ZERO_ERROR
;
660 if(uscript_getSampleString(USCRIPT_INVALID_CODE
, sample
, UPRV_LENGTHOF(sample
), &errorCode
)!=0 ||
661 U_FAILURE(errorCode
) ||
663 log_err("uscript_getSampleString(invalid) failed - %s\n", u_errorName(errorCode
));
666 if(uscript_getSampleString(USCRIPT_CODE_LIMIT
, sample
, 0, &errorCode
)!=0 ||
667 errorCode
!=U_STRING_NOT_TERMINATED_WARNING
||
669 log_err("uscript_getSampleString(limit, capacity=0) failed - %s\n", u_errorName(errorCode
));
672 if(uscript_getUsage(USCRIPT_LATIN
)!=USCRIPT_USAGE_RECOMMENDED
||
673 // Unicode 10 gives up on "aspirational".
674 uscript_getUsage(USCRIPT_YI
)!=USCRIPT_USAGE_LIMITED_USE
||
675 uscript_getUsage(USCRIPT_CHEROKEE
)!=USCRIPT_USAGE_LIMITED_USE
||
676 uscript_getUsage(USCRIPT_COPTIC
)!=USCRIPT_USAGE_EXCLUDED
||
677 uscript_getUsage(USCRIPT_CIRTH
)!=USCRIPT_USAGE_NOT_ENCODED
||
678 uscript_getUsage(USCRIPT_INVALID_CODE
)!=USCRIPT_USAGE_NOT_ENCODED
||
679 uscript_getUsage(USCRIPT_CODE_LIMIT
)!=USCRIPT_USAGE_NOT_ENCODED
) {
680 log_err("uscript_getUsage() failed\n");
683 if(uscript_isRightToLeft(USCRIPT_LATIN
) ||
684 uscript_isRightToLeft(USCRIPT_CIRTH
) ||
685 !uscript_isRightToLeft(USCRIPT_ARABIC
) ||
686 !uscript_isRightToLeft(USCRIPT_HEBREW
)) {
687 log_err("uscript_isRightToLeft() failed\n");
690 if(uscript_breaksBetweenLetters(USCRIPT_LATIN
) ||
691 uscript_breaksBetweenLetters(USCRIPT_CIRTH
) ||
692 !uscript_breaksBetweenLetters(USCRIPT_HAN
) ||
693 !uscript_breaksBetweenLetters(USCRIPT_THAI
)) {
694 log_err("uscript_breaksBetweenLetters() failed\n");
697 if(uscript_isCased(USCRIPT_CIRTH
) ||
698 uscript_isCased(USCRIPT_HAN
) ||
699 !uscript_isCased(USCRIPT_LATIN
) ||
700 !uscript_isCased(USCRIPT_GREEK
)) {
701 log_err("uscript_isCased() failed\n");
705 void TestBinaryValues() {
707 * Unicode 5.1 explicitly defines binary property value aliases.
708 * Verify that they are all recognized.
710 static const char *const falseValues
[]={ "N", "No", "F", "False" };
711 static const char *const trueValues
[]={ "Y", "Yes", "T", "True" };
713 for(i
=0; i
<UPRV_LENGTHOF(falseValues
); ++i
) {
714 if(FALSE
!=u_getPropertyValueEnum(UCHAR_ALPHABETIC
, falseValues
[i
])) {
715 log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=FALSE (Are you missing data?)\n", falseValues
[i
]);
718 for(i
=0; i
<UPRV_LENGTHOF(trueValues
); ++i
) {
719 if(TRUE
!=u_getPropertyValueEnum(UCHAR_ALPHABETIC
, trueValues
[i
])) {
720 log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=TRUE (Are you missing data?)\n", trueValues
[i
]);