]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/cucdapi.c
ICU-62135.0.1.tar.gz
[apple/icu.git] / icuSources / test / cintltst / cucdapi.c
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f 3/********************************************************************
2ca993e8 4 * Copyright (c) 1997-2016, International Business Machines
46f4442e 5 * Corporation and others. All Rights Reserved.
b75a7d8f
A
6 ********************************************************************/
7
8#include <string.h>
9#include "unicode/utypes.h"
10#include "unicode/uscript.h"
11#include "unicode/uchar.h"
12#include "cintltst.h"
374ca955 13#include "cucdapi.h"
b331163b 14#include "cmemory.h"
b75a7d8f 15
b331163b
A
16static void scriptsToString(const UScriptCode scripts[], int32_t length, char s[]) {
17 int32_t i;
18 if(length == 0) {
19 strcpy(s, "(no scripts)");
20 return;
21 }
22 s[0] = 0;
23 for(i = 0; i < length; ++i) {
24 if(i > 0) {
25 strcat(s, " ");
26 }
27 strcat(s, uscript_getShortName(scripts[i]));
28 }
29}
30
31static void assertEqualScripts(const char *msg,
32 const UScriptCode scripts1[], int32_t length1,
33 const UScriptCode scripts2[], int32_t length2,
34 UErrorCode errorCode) {
35 char s1[80];
36 char s2[80];
37 if(U_FAILURE(errorCode)) {
38 log_err("Failed: %s - %s\n", msg, u_errorName(errorCode));
39 return;
40 }
41 scriptsToString(scripts1, length1, s1);
42 scriptsToString(scripts2, length2, s2);
43 if(0!=strcmp(s1, s2)) {
44 log_data_err("Failed: %s: expected %s but got %s\n", msg, s1, s2);
45 }
46}
73c04bcf 47
b75a7d8f
A
48void TestUScriptCodeAPI(){
49 int i =0;
50 int numErrors =0;
51 {
52 const char* testNames[]={
53 /* test locale */
54 "en", "en_US", "sr", "ta" , "te_IN",
55 "hi", "he", "ar",
56 /* test abbr */
57 "Hani", "Hang","Hebr","Hira",
58 "Knda","Kana","Khmr","Lao",
59 "Latn",/*"Latf","Latg",*/
60 "Mlym", "Mong",
61
62 /* test names */
63 "CYRILLIC","DESERET","DEVANAGARI","ETHIOPIC","GEORGIAN",
64 "GOTHIC", "GREEK", "GUJARATI", "COMMON", "INHERITED",
65 /* test lower case names */
66 "malayalam", "mongolian", "myanmar", "ogham", "old-italic",
67 "oriya", "runic", "sinhala", "syriac","tamil",
68 "telugu", "thaana", "thai", "tibetan",
69 /* test the bounds*/
70 "tagb", "arabic",
71 /* test bogus */
72 "asfdasd", "5464", "12235",
73 /* test the last index */
74 "zyyy", "YI",
57a6839d 75 NULL
b75a7d8f
A
76 };
77 UScriptCode expected[] ={
78 /* locales should return */
79 USCRIPT_LATIN, USCRIPT_LATIN, USCRIPT_CYRILLIC, USCRIPT_TAMIL, USCRIPT_TELUGU,
80 USCRIPT_DEVANAGARI, USCRIPT_HEBREW, USCRIPT_ARABIC,
81 /* abbr should return */
82 USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
83 USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
84 USCRIPT_LATIN,/* USCRIPT_LATIN, USCRIPT_LATIN,*/
85 USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
86 /* names should return */
87 USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
88 USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI, USCRIPT_COMMON, USCRIPT_INHERITED,
89 /* lower case names should return */
90 USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN, USCRIPT_MYANMAR, USCRIPT_OGHAM, USCRIPT_OLD_ITALIC,
91 USCRIPT_ORIYA, USCRIPT_RUNIC, USCRIPT_SINHALA, USCRIPT_SYRIAC, USCRIPT_TAMIL,
92 USCRIPT_TELUGU, USCRIPT_THAANA, USCRIPT_THAI, USCRIPT_TIBETAN,
93 /* bounds */
94 USCRIPT_TAGBANWA, USCRIPT_ARABIC,
95 /* bogus names should return invalid code */
96 USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE,
97 USCRIPT_COMMON, USCRIPT_YI,
98 };
99
100 UErrorCode err = U_ZERO_ERROR;
101
102 const int32_t capacity = 10;
103
57a6839d 104 for( ; testNames[i]!=NULL; i++){
b75a7d8f
A
105 UScriptCode script[10]={USCRIPT_INVALID_CODE};
106 uscript_getCode(testNames[i],script,capacity, &err);
107 if( script[0] != expected[i]){
729e4ab9 108 log_data_err("Error getting script code Got: %i Expected: %i for name %s (Error code does not propagate if data is not present. Are you missing data?)\n",
b75a7d8f
A
109 script[0],expected[i],testNames[i]);
110 numErrors++;
111 }
112 }
113 if(numErrors >0 ){
114 log_data_err("Errors uchar_getScriptCode() : %i \n",numErrors);
115 }
116 }
117
118 {
119 UErrorCode err = U_ZERO_ERROR;
120 int32_t capacity=0;
46f4442e 121 int32_t j;
73c04bcf 122 UScriptCode jaCode[]={USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
b75a7d8f
A
123 UScriptCode script[10]={USCRIPT_INVALID_CODE};
124 int32_t num = uscript_getCode("ja",script,capacity, &err);
125 /* preflight */
126 if(err==U_BUFFER_OVERFLOW_ERROR){
127 err = U_ZERO_ERROR;
128 capacity = 10;
129 num = uscript_getCode("ja",script,capacity, &err);
2ca993e8 130 if(num!=UPRV_LENGTHOF(jaCode)){
46f4442e 131 log_err("Errors uscript_getScriptCode() for Japanese locale: num=%d, expected %d \n",
2ca993e8 132 num, UPRV_LENGTHOF(jaCode));
46f4442e 133 }
2ca993e8 134 for(j=0;j<UPRV_LENGTHOF(jaCode);j++) {
46f4442e
A
135 if(script[j]!=jaCode[j]) {
136 log_err("Japanese locale: code #%d was %d (%s) but expected %d (%s)\n", j,
137 script[j], uscript_getName(script[j]),
138 jaCode[j], uscript_getName(jaCode[j]));
139
140 }
b75a7d8f
A
141 }
142 }else{
143 log_data_err("Errors in uscript_getScriptCode() expected error : %s got: %s \n",
144 "U_BUFFER_OVERFLOW_ERROR",
145 u_errorName(err));
146 }
147
148 }
b331163b
A
149 {
150 static const UScriptCode LATIN[1] = { USCRIPT_LATIN };
151 static const UScriptCode CYRILLIC[1] = { USCRIPT_CYRILLIC };
152 static const UScriptCode DEVANAGARI[1] = { USCRIPT_DEVANAGARI };
153 static const UScriptCode HAN[1] = { USCRIPT_HAN };
154 static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
155 static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN };
156 static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO };
157 UScriptCode scripts[5];
158 UErrorCode err;
159 int32_t num;
160
161 // Should work regardless of whether we have locale data for the language.
162 err = U_ZERO_ERROR;
163 num = uscript_getCode("tg", scripts, UPRV_LENGTHOF(scripts), &err);
164 assertEqualScripts("tg script: Cyrl", CYRILLIC, 1, scripts, num, err); // Tajik
165 err = U_ZERO_ERROR;
166 num = uscript_getCode("xsr", scripts, UPRV_LENGTHOF(scripts), &err);
167 assertEqualScripts("xsr script: Deva", DEVANAGARI, 1, scripts, num, err); // Sherpa
168
169 // Multi-script languages.
170 err = U_ZERO_ERROR;
171 num = uscript_getCode("ja", scripts, UPRV_LENGTHOF(scripts), &err);
172 assertEqualScripts("ja scripts: Kana Hira Hani",
173 JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, num, err);
174 err = U_ZERO_ERROR;
175 num = uscript_getCode("ko", scripts, UPRV_LENGTHOF(scripts), &err);
176 assertEqualScripts("ko scripts: Hang Hani",
177 KOREAN, UPRV_LENGTHOF(KOREAN), scripts, num, err);
178 err = U_ZERO_ERROR;
179 num = uscript_getCode("zh", scripts, UPRV_LENGTHOF(scripts), &err);
180 assertEqualScripts("zh script: Hani", HAN, 1, scripts, num, err);
181 err = U_ZERO_ERROR;
182 num = uscript_getCode("zh-Hant", scripts, UPRV_LENGTHOF(scripts), &err);
183 assertEqualScripts("zh-Hant scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err);
184 err = U_ZERO_ERROR;
185 num = uscript_getCode("zh-TW", scripts, UPRV_LENGTHOF(scripts), &err);
186 assertEqualScripts("zh-TW scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err);
187
188 // Ambiguous API, but this probably wants to return Latin rather than Rongorongo (Roro).
189 err = U_ZERO_ERROR;
190 num = uscript_getCode("ro-RO", scripts, UPRV_LENGTHOF(scripts), &err);
191 assertEqualScripts("ro-RO script: Latn", LATIN, 1, scripts, num, err);
192 }
b75a7d8f
A
193
194 {
195 UScriptCode testAbbr[]={
196 /* names should return */
197 USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
198 USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI,
199 };
200
201 const char* expectedNames[]={
202
203 /* test names */
204 "Cyrillic","Deseret","Devanagari","Ethiopic","Georgian",
205 "Gothic", "Greek", "Gujarati",
57a6839d 206 NULL
b75a7d8f
A
207 };
208 i=0;
2ca993e8 209 while(i<UPRV_LENGTHOF(testAbbr)){
b75a7d8f
A
210 const char* name = uscript_getName(testAbbr[i]);
211 if(name == NULL) {
212 log_data_err("Couldn't get script name\n");
213 return;
214 }
215 numErrors=0;
216 if(strcmp(expectedNames[i],name)!=0){
217 log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedNames[i]);
218 numErrors++;
219 }
220 if(numErrors > 0){
221 if(numErrors >0 ){
222 log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors);
223 }
224 }
225 i++;
226 }
227
228 }
229
230 {
231 UScriptCode testAbbr[]={
232 /* abbr should return */
233 USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
234 USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
235 USCRIPT_LATIN,
236 USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
237 };
238
239 const char* expectedAbbr[]={
240 /* test abbr */
241 "Hani", "Hang","Hebr","Hira",
242 "Knda","Kana","Khmr","Laoo",
243 "Latn",
244 "Mlym", "Mong",
57a6839d 245 NULL
b75a7d8f
A
246 };
247 i=0;
2ca993e8 248 while(i<UPRV_LENGTHOF(testAbbr)){
b75a7d8f
A
249 const char* name = uscript_getShortName(testAbbr[i]);
250 numErrors=0;
251 if(strcmp(expectedAbbr[i],name)!=0){
252 log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedAbbr[i]);
253 numErrors++;
254 }
255 if(numErrors > 0){
256 if(numErrors >0 ){
257 log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors);
258 }
259 }
260 i++;
261 }
262
263 }
264 /* now test uscript_getScript() API */
265 {
b75a7d8f
A
266 uint32_t codepoints[] = {
267 0x0000FF9D, /* USCRIPT_KATAKANA*/
268 0x0000FFBE, /* USCRIPT_HANGUL*/
269 0x0000FFC7, /* USCRIPT_HANGUL*/
270 0x0000FFCF, /* USCRIPT_HANGUL*/
271 0x0000FFD7, /* USCRIPT_HANGUL*/
272 0x0000FFDC, /* USCRIPT_HANGUL*/
273 0x00010300, /* USCRIPT_OLD_ITALIC*/
274 0x00010330, /* USCRIPT_GOTHIC*/
275 0x0001034A, /* USCRIPT_GOTHIC*/
276 0x00010400, /* USCRIPT_DESERET*/
277 0x00010428, /* USCRIPT_DESERET*/
278 0x0001D167, /* USCRIPT_INHERITED*/
279 0x0001D17B, /* USCRIPT_INHERITED*/
280 0x0001D185, /* USCRIPT_INHERITED*/
281 0x0001D1AA, /* USCRIPT_INHERITED*/
282 0x00020000, /* USCRIPT_HAN*/
283 0x00000D02, /* USCRIPT_MALAYALAM*/
6be67b06 284 0x00050005, /* USCRIPT_UNKNOWN (new Zzzz value in Unicode 5.0) */
b75a7d8f
A
285 0x00000000, /* USCRIPT_COMMON*/
286 0x0001D169, /* USCRIPT_INHERITED*/
287 0x0001D182, /* USCRIPT_INHERITED*/
288 0x0001D18B, /* USCRIPT_INHERITED*/
289 0x0001D1AD, /* USCRIPT_INHERITED*/
b75a7d8f
A
290 };
291
292 UScriptCode expected[] = {
293 USCRIPT_KATAKANA ,
294 USCRIPT_HANGUL ,
295 USCRIPT_HANGUL ,
296 USCRIPT_HANGUL ,
297 USCRIPT_HANGUL ,
298 USCRIPT_HANGUL ,
299 USCRIPT_OLD_ITALIC,
300 USCRIPT_GOTHIC ,
301 USCRIPT_GOTHIC ,
302 USCRIPT_DESERET ,
303 USCRIPT_DESERET ,
304 USCRIPT_INHERITED,
305 USCRIPT_INHERITED,
306 USCRIPT_INHERITED,
307 USCRIPT_INHERITED,
308 USCRIPT_HAN ,
309 USCRIPT_MALAYALAM,
73c04bcf 310 USCRIPT_UNKNOWN,
b75a7d8f
A
311 USCRIPT_COMMON,
312 USCRIPT_INHERITED ,
313 USCRIPT_INHERITED ,
314 USCRIPT_INHERITED ,
315 USCRIPT_INHERITED ,
b75a7d8f
A
316 };
317 UScriptCode code = USCRIPT_INVALID_CODE;
318 UErrorCode status = U_ZERO_ERROR;
319 UBool passed = TRUE;
320
b331163b 321 for(i=0; i<UPRV_LENGTHOF(codepoints); ++i){
b75a7d8f
A
322 code = uscript_getScript(codepoints[i],&status);
323 if(U_SUCCESS(status)){
324 if( code != expected[i] ||
325 code != (UScriptCode)u_getIntPropertyValue(codepoints[i], UCHAR_SCRIPT)
326 ) {
327 log_err("uscript_getScript for codepoint \\U%08X failed\n",codepoints[i]);
328 passed = FALSE;
329 }
330 }else{
331 log_err("uscript_getScript for codepoint \\U%08X failed. Error: %s\n",
332 codepoints[i],u_errorName(status));
333 break;
334 }
b75a7d8f
A
335 }
336
337 if(passed==FALSE){
338 log_err("uscript_getScript failed.\n");
339 }
340 }
341 {
342 UScriptCode code= USCRIPT_INVALID_CODE;
343 UErrorCode status = U_ZERO_ERROR;
344 code = uscript_getScript(0x001D169,&status);
345 if(code != USCRIPT_INHERITED){
346 log_err("\\U001D169 is not contained in USCRIPT_INHERITED");
347 }
348 }
349 {
350 UScriptCode code= USCRIPT_INVALID_CODE;
351 UErrorCode status = U_ZERO_ERROR;
352 int32_t err = 0;
353
354 for(i = 0; i<=0x10ffff; i++){
355 code = uscript_getScript(i,&status);
356 if(code == USCRIPT_INVALID_CODE){
357 err++;
358 log_err("uscript_getScript for codepoint \\U%08X failed.\n", i);
359 }
360 }
361 if(err>0){
362 log_err("uscript_getScript failed for %d codepoints\n", err);
363 }
364 }
365 {
366 for(i=0; (UScriptCode)i< USCRIPT_CODE_LIMIT; i++){
367 const char* name = uscript_getName((UScriptCode)i);
368 if(name==NULL || strcmp(name,"")==0){
73c04bcf 369 log_err("uscript_getName failed for code %i: name is NULL or \"\"\n",i);
b75a7d8f
A
370 }
371 }
372 }
729e4ab9 373
73c04bcf
A
374 {
375 /*
376 * These script codes were originally added to ICU pre-3.6, so that ICU would
377 * have all ISO 15924 script codes. ICU was then based on Unicode 4.1.
378 * These script codes were added with only short names because we don't
379 * want to invent long names ourselves.
380 * Unicode 5 and later encode some of these scripts and give them long names.
381 * Whenever this happens, the long script names here need to be updated.
382 */
383 static const char* expectedLong[] = {
b331163b
A
384 "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs",
385 "Egyd", "Egyh", "Egyptian_Hieroglyphs",
2ca993e8 386 "Geok", "Hans", "Hant", "Pahawh_Hmong", "Old_Hungarian", "Inds",
b331163b
A
387 "Javanese", "Kayah_Li", "Latf", "Latg",
388 "Lepcha", "Linear_A", "Mandaic", "Maya", "Meroitic_Hieroglyphs",
389 "Nko", "Old_Turkic", "Old_Permic", "Phags_Pa", "Phoenician",
4388f060 390 "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
73c04bcf 391 "Zxxx", "Unknown",
2ca993e8 392 "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "SignWriting", "Sundanese",
729e4ab9 393 "Moon", "Meetei_Mayek",
46f4442e 394 /* new in ICU 4.0 */
4388f060 395 "Imperial_Aramaic", "Avestan", "Chakma", "Kore",
b331163b
A
396 "Kaithi", "Manichaean", "Inscriptional_Pahlavi", "Psalter_Pahlavi", "Phlv",
397 "Inscriptional_Parthian", "Samaritan", "Tai_Viet",
46f4442e 398 "Zmth", "Zsym",
729e4ab9
A
399 /* new in ICU 4.4 */
400 "Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
401 /* new in ICU 4.6 */
b331163b
A
402 "Bassa_Vah", "Duployan", "Elbasan", "Grantha", "Kpel",
403 "Loma", "Mende_Kikakui", "Meroitic_Cursive",
404 "Old_North_Arabian", "Nabataean", "Palmyrene", "Khudawadi", "Warang_Citi",
4388f060 405 /* new in ICU 4.8 */
6be67b06 406 "Afak", "Jurc", "Mro", "Nushu", "Sharada", "Sora_Sompeng", "Takri", "Tangut", "Wole",
4388f060 407 /* new in ICU 49 */
2ca993e8 408 "Anatolian_Hieroglyphs", "Khojki", "Tirhuta",
57a6839d 409 /* new in ICU 52 */
b331163b
A
410 "Caucasian_Albanian", "Mahajani",
411 /* new in ICU 54 */
f3c0d7a5
A
412 "Ahom", "Hatran", "Modi", "Multani", "Pau_Cin_Hau", "Siddham",
413 // new in ICU 58
6be67b06
A
414 "Adlam", "Bhaiksuki", "Marchen", "Newa", "Osage", "Hanb", "Jamo", "Zsye",
415 // new in ICU 60
0f5d89e8
A
416 "Masaram_Gondi", "Soyombo", "Zanabazar_Square",
417 // new in ICU 61
418 "Dogra", "Gunjala_Gondi", "Makasar", "Medefaidrin",
419 "Hanifi_Rohingya", "Sogdian", "Old_Sogdian",
73c04bcf
A
420 };
421 static const char* expectedShort[] = {
b331163b
A
422 "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
423 "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg",
424 "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx",
425 "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux",
73c04bcf 426 "Zxxx", "Zzzz",
46f4442e
A
427 "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
428 "Moon", "Mtei",
429 /* new in ICU 4.0 */
430 "Armi", "Avst", "Cakm", "Kore",
431 "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt",
432 "Zmth", "Zsym",
729e4ab9
A
433 /* new in ICU 4.4 */
434 "Bamu", "Lisu", "Nkgb", "Sarb",
435 /* new in ICU 4.6 */
436 "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
437 "Narb", "Nbat", "Palm", "Sind", "Wara",
4388f060
A
438 /* new in ICU 4.8 */
439 "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
440 /* new in ICU 49 */
441 "Hluw", "Khoj", "Tirh",
57a6839d 442 /* new in ICU 52 */
b331163b
A
443 "Aghb", "Mahj",
444 /* new in ICU 54 */
f3c0d7a5
A
445 "Ahom", "Hatr", "Modi", "Mult", "Pauc", "Sidd",
446 // new in ICU 58
6be67b06
A
447 "Adlm", "Bhks", "Marc", "Newa", "Osge", "Hanb", "Jamo", "Zsye",
448 // new in ICU 60
0f5d89e8
A
449 "Gonm", "Soyo", "Zanb",
450 // new in ICU 61
451 "Dogr", "Gong", "Maka", "Medf", "Rohg", "Sogd", "Sogo",
73c04bcf
A
452 };
453 int32_t j = 0;
b331163b 454 if(UPRV_LENGTHOF(expectedLong)!=(USCRIPT_CODE_LIMIT-USCRIPT_BALINESE)) {
729e4ab9
A
455 log_err("need to add new script codes in cucdapi.c!\n");
456 return;
457 }
73c04bcf
A
458 for(i=USCRIPT_BALINESE; (UScriptCode)i<USCRIPT_CODE_LIMIT; i++, j++){
459 const char* name = uscript_getName((UScriptCode)i);
460 if(name==NULL || strcmp(name,expectedLong[j])!=0){
461 log_err("uscript_getName failed for code %i: %s!=%s\n", i, name, expectedLong[j]);
462 }
463 name = uscript_getShortName((UScriptCode)i);
464 if(name==NULL || strcmp(name,expectedShort[j])!=0){
465 log_err("uscript_getShortName failed for code %i: %s!=%s\n", i, name, expectedShort[j]);
466 }
467 }
b331163b 468 for(i=0; i<UPRV_LENGTHOF(expectedLong); i++){
73c04bcf
A
469 UScriptCode fillIn[5] = {USCRIPT_INVALID_CODE};
470 UErrorCode status = U_ZERO_ERROR;
471 int32_t len = 0;
b331163b 472 len = uscript_getCode(expectedShort[i], fillIn, UPRV_LENGTHOF(fillIn), &status);
73c04bcf
A
473 if(U_FAILURE(status)){
474 log_err("uscript_getCode failed for script name %s. Error: %s\n",expectedShort[i], u_errorName(status));
475 }
476 if(len>1){
477 log_err("uscript_getCode did not return expected number of codes for script %s. EXPECTED: 1 GOT: %i\n", expectedShort[i], len);
478 }
479 if(fillIn[0]!= (UScriptCode)(USCRIPT_BALINESE+i)){
480 log_err("uscript_getCode did not return expected code for script %s. EXPECTED: %i GOT: %i\n", expectedShort[i], (USCRIPT_BALINESE+i), fillIn[0] );
481 }
482 }
483 }
729e4ab9
A
484
485 {
486 /* test characters which have Script_Extensions */
487 UErrorCode errorCode=U_ZERO_ERROR;
488 if(!(
489 USCRIPT_COMMON==uscript_getScript(0x0640, &errorCode) &&
490 USCRIPT_INHERITED==uscript_getScript(0x0650, &errorCode) &&
491 USCRIPT_ARABIC==uscript_getScript(0xfdf2, &errorCode)) ||
492 U_FAILURE(errorCode)
493 ) {
494 log_err("uscript_getScript(character with Script_Extensions) failed\n");
495 }
496 }
497}
498
499void TestHasScript() {
500 if(!(
501 !uscript_hasScript(0x063f, USCRIPT_COMMON) &&
502 uscript_hasScript(0x063f, USCRIPT_ARABIC) && /* main Script value */
503 !uscript_hasScript(0x063f, USCRIPT_SYRIAC) &&
504 !uscript_hasScript(0x063f, USCRIPT_THAANA))
505 ) {
506 log_err("uscript_hasScript(U+063F, ...) is wrong\n");
507 }
508 if(!(
4388f060 509 !uscript_hasScript(0x0640, USCRIPT_COMMON) && /* main Script value */
729e4ab9
A
510 uscript_hasScript(0x0640, USCRIPT_ARABIC) &&
511 uscript_hasScript(0x0640, USCRIPT_SYRIAC) &&
512 !uscript_hasScript(0x0640, USCRIPT_THAANA))
513 ) {
514 log_err("uscript_hasScript(U+0640, ...) is wrong\n");
515 }
516 if(!(
4388f060 517 !uscript_hasScript(0x0650, USCRIPT_INHERITED) && /* main Script value */
729e4ab9
A
518 uscript_hasScript(0x0650, USCRIPT_ARABIC) &&
519 uscript_hasScript(0x0650, USCRIPT_SYRIAC) &&
520 !uscript_hasScript(0x0650, USCRIPT_THAANA))
521 ) {
522 log_err("uscript_hasScript(U+0650, ...) is wrong\n");
523 }
524 if(!(
4388f060 525 !uscript_hasScript(0x0660, USCRIPT_COMMON) && /* main Script value */
729e4ab9
A
526 uscript_hasScript(0x0660, USCRIPT_ARABIC) &&
527 !uscript_hasScript(0x0660, USCRIPT_SYRIAC) &&
528 uscript_hasScript(0x0660, USCRIPT_THAANA))
529 ) {
530 log_err("uscript_hasScript(U+0660, ...) is wrong\n");
531 }
532 if(!(
533 !uscript_hasScript(0xfdf2, USCRIPT_COMMON) &&
534 uscript_hasScript(0xfdf2, USCRIPT_ARABIC) && /* main Script value */
535 !uscript_hasScript(0xfdf2, USCRIPT_SYRIAC) &&
536 uscript_hasScript(0xfdf2, USCRIPT_THAANA))
537 ) {
538 log_err("uscript_hasScript(U+FDF2, ...) is wrong\n");
539 }
4388f060
A
540 if(uscript_hasScript(0x0640, 0xaffe)) {
541 /* An unguarded implementation might go into an infinite loop. */
542 log_err("uscript_hasScript(U+0640, bogus 0xaffe) is wrong\n");
543 }
729e4ab9
A
544}
545
f3c0d7a5 546static UBool scriptsContain(UScriptCode scripts[], int32_t length, UScriptCode script) {
b331163b
A
547 UBool contain=FALSE;
548 int32_t prev=-1, i;
549 for(i=0; i<length; ++i) {
550 int32_t s=scripts[i];
551 if(s<=prev) {
552 log_err("uscript_getScriptExtensions() not in sorted order: %d %d\n", (int)prev, (int)s);
553 }
554 if(s==script) { contain=TRUE; }
555 }
556 return contain;
557}
558
729e4ab9
A
559void TestGetScriptExtensions() {
560 UScriptCode scripts[20];
561 int32_t length;
562 UErrorCode errorCode;
563
564 /* errors and overflows */
565 errorCode=U_PARSE_ERROR;
b331163b 566 length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode);
729e4ab9 567 if(errorCode!=U_PARSE_ERROR) {
4388f060 568 log_err("uscript_getScriptExtensions(U+0640, U_PARSE_ERROR) did not preserve the UErrorCode - %s\n",
729e4ab9
A
569 u_errorName(errorCode));
570 }
571 errorCode=U_ZERO_ERROR;
b331163b 572 length=uscript_getScriptExtensions(0x0640, NULL, UPRV_LENGTHOF(scripts), &errorCode);
729e4ab9 573 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
4388f060 574 log_err("uscript_getScriptExtensions(U+0640, NULL) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
729e4ab9
A
575 u_errorName(errorCode));
576 }
577 errorCode=U_ZERO_ERROR;
578 length=uscript_getScriptExtensions(0x0640, scripts, -1, &errorCode);
579 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
4388f060 580 log_err("uscript_getScriptExtensions(U+0640, capacity<0) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
729e4ab9
A
581 u_errorName(errorCode));
582 }
583 errorCode=U_ZERO_ERROR;
584 length=uscript_getScriptExtensions(0x0640, scripts, 0, &errorCode);
b331163b
A
585 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) {
586 log_err("uscript_getScriptExtensions(U+0640, capacity=0: pure preflighting)=%d < 3 - %s\n",
729e4ab9
A
587 (int)length, u_errorName(errorCode));
588 }
589 errorCode=U_ZERO_ERROR;
590 length=uscript_getScriptExtensions(0x0640, scripts, 1, &errorCode);
b331163b
A
591 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) {
592 log_err("uscript_getScriptExtensions(U+0640, capacity=1: preflighting)=%d < 3 - %s\n",
4388f060
A
593 (int)length, u_errorName(errorCode));
594 }
595 /* U+063F has only a Script code, no Script_Extensions. */
596 errorCode=U_ZERO_ERROR;
597 length=uscript_getScriptExtensions(0x063f, scripts, 0, &errorCode);
598 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=1) {
599 log_err("uscript_getScriptExtensions(U+063F, capacity=0)=%d != 1 - %s\n",
600 (int)length, u_errorName(errorCode));
601 }
602
603 /* invalid code points */
604 errorCode=U_ZERO_ERROR;
b331163b 605 length=uscript_getScriptExtensions(-1, scripts, UPRV_LENGTHOF(scripts), &errorCode);
4388f060
A
606 if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
607 log_err("uscript_getScriptExtensions(-1)=%d does not return {UNKNOWN} - %s\n",
608 (int)length, u_errorName(errorCode));
609 }
610 errorCode=U_ZERO_ERROR;
b331163b 611 length=uscript_getScriptExtensions(0x110000, scripts, UPRV_LENGTHOF(scripts), &errorCode);
4388f060
A
612 if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
613 log_err("uscript_getScriptExtensions(0x110000)=%d does not return {UNKNOWN} - %s\n",
729e4ab9
A
614 (int)length, u_errorName(errorCode));
615 }
616
617 /* normal usage */
618 errorCode=U_ZERO_ERROR;
4388f060
A
619 length=uscript_getScriptExtensions(0x063f, scripts, 1, &errorCode);
620 if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_ARABIC) {
621 log_err("uscript_getScriptExtensions(U+063F, capacity=1)=%d does not return {ARABIC} - %s\n",
729e4ab9
A
622 (int)length, u_errorName(errorCode));
623 }
4388f060 624 errorCode=U_ZERO_ERROR;
b331163b
A
625 length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode);
626 if(U_FAILURE(errorCode) || length<3 ||
627 !scriptsContain(scripts, length, USCRIPT_ARABIC) ||
628 !scriptsContain(scripts, length, USCRIPT_SYRIAC) ||
629 !scriptsContain(scripts, length, USCRIPT_MANDAIC)) {
729e4ab9
A
630 log_err("uscript_getScriptExtensions(U+0640)=%d failed - %s\n",
631 (int)length, u_errorName(errorCode));
632 }
4388f060 633 errorCode=U_ZERO_ERROR;
b331163b 634 length=uscript_getScriptExtensions(0xfdf2, scripts, UPRV_LENGTHOF(scripts), &errorCode);
729e4ab9
A
635 if(U_FAILURE(errorCode) || length!=2 || scripts[0]!=USCRIPT_ARABIC || scripts[1]!=USCRIPT_THAANA) {
636 log_err("uscript_getScriptExtensions(U+FDF2)=%d failed - %s\n",
637 (int)length, u_errorName(errorCode));
638 }
4388f060 639 errorCode=U_ZERO_ERROR;
b331163b 640 length=uscript_getScriptExtensions(0xff65, scripts, UPRV_LENGTHOF(scripts), &errorCode);
729e4ab9
A
641 if(U_FAILURE(errorCode) || length!=6 || scripts[0]!=USCRIPT_BOPOMOFO || scripts[5]!=USCRIPT_YI) {
642 log_err("uscript_getScriptExtensions(U+FF65)=%d failed - %s\n",
643 (int)length, u_errorName(errorCode));
644 }
46f4442e
A
645}
646
51004dcb
A
647void TestScriptMetadataAPI() {
648 /* API & code coverage. More testing in intltest/ucdtest.cpp. */
649 UErrorCode errorCode=U_ZERO_ERROR;
650 UChar sample[8];
651
b331163b 652 if(uscript_getSampleString(USCRIPT_LATIN, sample, UPRV_LENGTHOF(sample), &errorCode)!=1 ||
51004dcb
A
653 U_FAILURE(errorCode) ||
654 uscript_getScript(sample[0], &errorCode)!=USCRIPT_LATIN ||
655 sample[1]!=0) {
656 log_err("uscript_getSampleString(Latn) failed - %s\n", u_errorName(errorCode));
657 }
658 sample[0]=0xfffe;
659 if(uscript_getSampleString(USCRIPT_LATIN, sample, 0, &errorCode)!=1 ||
660 errorCode!=U_BUFFER_OVERFLOW_ERROR ||
661 sample[0]!=0xfffe) {
662 log_err("uscript_getSampleString(Latn, capacity=0) failed - %s\n", u_errorName(errorCode));
663 }
664 errorCode=U_ZERO_ERROR;
b331163b 665 if(uscript_getSampleString(USCRIPT_INVALID_CODE, sample, UPRV_LENGTHOF(sample), &errorCode)!=0 ||
51004dcb
A
666 U_FAILURE(errorCode) ||
667 sample[0]!=0) {
668 log_err("uscript_getSampleString(invalid) failed - %s\n", u_errorName(errorCode));
669 }
670 sample[0]=0xfffe;
671 if(uscript_getSampleString(USCRIPT_CODE_LIMIT, sample, 0, &errorCode)!=0 ||
672 errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
673 sample[0]!=0xfffe) {
674 log_err("uscript_getSampleString(limit, capacity=0) failed - %s\n", u_errorName(errorCode));
675 }
676
677 if(uscript_getUsage(USCRIPT_LATIN)!=USCRIPT_USAGE_RECOMMENDED ||
6be67b06
A
678 // Unicode 10 gives up on "aspirational".
679 uscript_getUsage(USCRIPT_YI)!=USCRIPT_USAGE_LIMITED_USE ||
51004dcb
A
680 uscript_getUsage(USCRIPT_CHEROKEE)!=USCRIPT_USAGE_LIMITED_USE ||
681 uscript_getUsage(USCRIPT_COPTIC)!=USCRIPT_USAGE_EXCLUDED ||
682 uscript_getUsage(USCRIPT_CIRTH)!=USCRIPT_USAGE_NOT_ENCODED ||
683 uscript_getUsage(USCRIPT_INVALID_CODE)!=USCRIPT_USAGE_NOT_ENCODED ||
684 uscript_getUsage(USCRIPT_CODE_LIMIT)!=USCRIPT_USAGE_NOT_ENCODED) {
685 log_err("uscript_getUsage() failed\n");
686 }
687
688 if(uscript_isRightToLeft(USCRIPT_LATIN) ||
689 uscript_isRightToLeft(USCRIPT_CIRTH) ||
690 !uscript_isRightToLeft(USCRIPT_ARABIC) ||
691 !uscript_isRightToLeft(USCRIPT_HEBREW)) {
692 log_err("uscript_isRightToLeft() failed\n");
693 }
694
695 if(uscript_breaksBetweenLetters(USCRIPT_LATIN) ||
696 uscript_breaksBetweenLetters(USCRIPT_CIRTH) ||
697 !uscript_breaksBetweenLetters(USCRIPT_HAN) ||
698 !uscript_breaksBetweenLetters(USCRIPT_THAI)) {
699 log_err("uscript_breaksBetweenLetters() failed\n");
700 }
701
702 if(uscript_isCased(USCRIPT_CIRTH) ||
703 uscript_isCased(USCRIPT_HAN) ||
704 !uscript_isCased(USCRIPT_LATIN) ||
705 !uscript_isCased(USCRIPT_GREEK)) {
706 log_err("uscript_isCased() failed\n");
707 }
708}
709
46f4442e
A
710void TestBinaryValues() {
711 /*
712 * Unicode 5.1 explicitly defines binary property value aliases.
713 * Verify that they are all recognized.
714 */
715 static const char *const falseValues[]={ "N", "No", "F", "False" };
716 static const char *const trueValues[]={ "Y", "Yes", "T", "True" };
717 int32_t i;
b331163b 718 for(i=0; i<UPRV_LENGTHOF(falseValues); ++i) {
46f4442e 719 if(FALSE!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, falseValues[i])) {
729e4ab9 720 log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=FALSE (Are you missing data?)\n", falseValues[i]);
46f4442e
A
721 }
722 }
b331163b 723 for(i=0; i<UPRV_LENGTHOF(trueValues); ++i) {
46f4442e 724 if(TRUE!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, trueValues[i])) {
729e4ab9 725 log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=TRUE (Are you missing data?)\n", trueValues[i]);
46f4442e
A
726 }
727 }
728}