]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
b75a7d8f | 3 | /******************************************************************** |
2ca993e8 | 4 | * Copyright (c) 1997-2016, International Business Machines |
46f4442e | 5 | * Corporation and others. All Rights Reserved. |
b75a7d8f A |
6 | ********************************************************************/ |
7 | ||
8 | #include <string.h> | |
9 | #include "unicode/utypes.h" | |
10 | #include "unicode/uscript.h" | |
11 | #include "unicode/uchar.h" | |
12 | #include "cintltst.h" | |
374ca955 | 13 | #include "cucdapi.h" |
b331163b | 14 | #include "cmemory.h" |
b75a7d8f | 15 | |
b331163b A |
16 | static void scriptsToString(const UScriptCode scripts[], int32_t length, char s[]) { |
17 | int32_t i; | |
18 | if(length == 0) { | |
19 | strcpy(s, "(no scripts)"); | |
20 | return; | |
21 | } | |
22 | s[0] = 0; | |
23 | for(i = 0; i < length; ++i) { | |
24 | if(i > 0) { | |
25 | strcat(s, " "); | |
26 | } | |
27 | strcat(s, uscript_getShortName(scripts[i])); | |
28 | } | |
29 | } | |
30 | ||
31 | static void assertEqualScripts(const char *msg, | |
32 | const UScriptCode scripts1[], int32_t length1, | |
33 | const UScriptCode scripts2[], int32_t length2, | |
34 | UErrorCode errorCode) { | |
35 | char s1[80]; | |
36 | char s2[80]; | |
37 | if(U_FAILURE(errorCode)) { | |
38 | log_err("Failed: %s - %s\n", msg, u_errorName(errorCode)); | |
39 | return; | |
40 | } | |
41 | scriptsToString(scripts1, length1, s1); | |
42 | scriptsToString(scripts2, length2, s2); | |
43 | if(0!=strcmp(s1, s2)) { | |
44 | log_data_err("Failed: %s: expected %s but got %s\n", msg, s1, s2); | |
45 | } | |
46 | } | |
73c04bcf | 47 | |
b75a7d8f A |
48 | void TestUScriptCodeAPI(){ |
49 | int i =0; | |
50 | int numErrors =0; | |
51 | { | |
52 | const char* testNames[]={ | |
53 | /* test locale */ | |
54 | "en", "en_US", "sr", "ta" , "te_IN", | |
55 | "hi", "he", "ar", | |
56 | /* test abbr */ | |
57 | "Hani", "Hang","Hebr","Hira", | |
58 | "Knda","Kana","Khmr","Lao", | |
59 | "Latn",/*"Latf","Latg",*/ | |
60 | "Mlym", "Mong", | |
61 | ||
62 | /* test names */ | |
63 | "CYRILLIC","DESERET","DEVANAGARI","ETHIOPIC","GEORGIAN", | |
64 | "GOTHIC", "GREEK", "GUJARATI", "COMMON", "INHERITED", | |
65 | /* test lower case names */ | |
66 | "malayalam", "mongolian", "myanmar", "ogham", "old-italic", | |
67 | "oriya", "runic", "sinhala", "syriac","tamil", | |
68 | "telugu", "thaana", "thai", "tibetan", | |
69 | /* test the bounds*/ | |
70 | "tagb", "arabic", | |
71 | /* test bogus */ | |
72 | "asfdasd", "5464", "12235", | |
73 | /* test the last index */ | |
74 | "zyyy", "YI", | |
57a6839d | 75 | NULL |
b75a7d8f A |
76 | }; |
77 | UScriptCode expected[] ={ | |
78 | /* locales should return */ | |
79 | USCRIPT_LATIN, USCRIPT_LATIN, USCRIPT_CYRILLIC, USCRIPT_TAMIL, USCRIPT_TELUGU, | |
80 | USCRIPT_DEVANAGARI, USCRIPT_HEBREW, USCRIPT_ARABIC, | |
81 | /* abbr should return */ | |
82 | USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA, | |
83 | USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO, | |
84 | USCRIPT_LATIN,/* USCRIPT_LATIN, USCRIPT_LATIN,*/ | |
85 | USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN, | |
86 | /* names should return */ | |
87 | USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN, | |
88 | USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI, USCRIPT_COMMON, USCRIPT_INHERITED, | |
89 | /* lower case names should return */ | |
90 | USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN, USCRIPT_MYANMAR, USCRIPT_OGHAM, USCRIPT_OLD_ITALIC, | |
91 | USCRIPT_ORIYA, USCRIPT_RUNIC, USCRIPT_SINHALA, USCRIPT_SYRIAC, USCRIPT_TAMIL, | |
92 | USCRIPT_TELUGU, USCRIPT_THAANA, USCRIPT_THAI, USCRIPT_TIBETAN, | |
93 | /* bounds */ | |
94 | USCRIPT_TAGBANWA, USCRIPT_ARABIC, | |
95 | /* bogus names should return invalid code */ | |
96 | USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE, | |
97 | USCRIPT_COMMON, USCRIPT_YI, | |
98 | }; | |
99 | ||
100 | UErrorCode err = U_ZERO_ERROR; | |
101 | ||
102 | const int32_t capacity = 10; | |
103 | ||
57a6839d | 104 | for( ; testNames[i]!=NULL; i++){ |
b75a7d8f A |
105 | UScriptCode script[10]={USCRIPT_INVALID_CODE}; |
106 | uscript_getCode(testNames[i],script,capacity, &err); | |
107 | if( script[0] != expected[i]){ | |
729e4ab9 | 108 | log_data_err("Error getting script code Got: %i Expected: %i for name %s (Error code does not propagate if data is not present. Are you missing data?)\n", |
b75a7d8f A |
109 | script[0],expected[i],testNames[i]); |
110 | numErrors++; | |
111 | } | |
112 | } | |
113 | if(numErrors >0 ){ | |
114 | log_data_err("Errors uchar_getScriptCode() : %i \n",numErrors); | |
115 | } | |
116 | } | |
117 | ||
118 | { | |
119 | UErrorCode err = U_ZERO_ERROR; | |
120 | int32_t capacity=0; | |
46f4442e | 121 | int32_t j; |
73c04bcf | 122 | UScriptCode jaCode[]={USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN }; |
b75a7d8f A |
123 | UScriptCode script[10]={USCRIPT_INVALID_CODE}; |
124 | int32_t num = uscript_getCode("ja",script,capacity, &err); | |
125 | /* preflight */ | |
126 | if(err==U_BUFFER_OVERFLOW_ERROR){ | |
127 | err = U_ZERO_ERROR; | |
128 | capacity = 10; | |
129 | num = uscript_getCode("ja",script,capacity, &err); | |
2ca993e8 | 130 | if(num!=UPRV_LENGTHOF(jaCode)){ |
46f4442e | 131 | log_err("Errors uscript_getScriptCode() for Japanese locale: num=%d, expected %d \n", |
2ca993e8 | 132 | num, UPRV_LENGTHOF(jaCode)); |
46f4442e | 133 | } |
2ca993e8 | 134 | for(j=0;j<UPRV_LENGTHOF(jaCode);j++) { |
46f4442e A |
135 | if(script[j]!=jaCode[j]) { |
136 | log_err("Japanese locale: code #%d was %d (%s) but expected %d (%s)\n", j, | |
137 | script[j], uscript_getName(script[j]), | |
138 | jaCode[j], uscript_getName(jaCode[j])); | |
139 | ||
140 | } | |
b75a7d8f A |
141 | } |
142 | }else{ | |
143 | log_data_err("Errors in uscript_getScriptCode() expected error : %s got: %s \n", | |
144 | "U_BUFFER_OVERFLOW_ERROR", | |
145 | u_errorName(err)); | |
146 | } | |
147 | ||
148 | } | |
b331163b A |
149 | { |
150 | static const UScriptCode LATIN[1] = { USCRIPT_LATIN }; | |
151 | static const UScriptCode CYRILLIC[1] = { USCRIPT_CYRILLIC }; | |
152 | static const UScriptCode DEVANAGARI[1] = { USCRIPT_DEVANAGARI }; | |
153 | static const UScriptCode HAN[1] = { USCRIPT_HAN }; | |
154 | static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN }; | |
155 | static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN }; | |
156 | static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO }; | |
157 | UScriptCode scripts[5]; | |
158 | UErrorCode err; | |
159 | int32_t num; | |
160 | ||
161 | // Should work regardless of whether we have locale data for the language. | |
162 | err = U_ZERO_ERROR; | |
163 | num = uscript_getCode("tg", scripts, UPRV_LENGTHOF(scripts), &err); | |
164 | assertEqualScripts("tg script: Cyrl", CYRILLIC, 1, scripts, num, err); // Tajik | |
165 | err = U_ZERO_ERROR; | |
166 | num = uscript_getCode("xsr", scripts, UPRV_LENGTHOF(scripts), &err); | |
167 | assertEqualScripts("xsr script: Deva", DEVANAGARI, 1, scripts, num, err); // Sherpa | |
168 | ||
169 | // Multi-script languages. | |
170 | err = U_ZERO_ERROR; | |
171 | num = uscript_getCode("ja", scripts, UPRV_LENGTHOF(scripts), &err); | |
172 | assertEqualScripts("ja scripts: Kana Hira Hani", | |
173 | JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, num, err); | |
174 | err = U_ZERO_ERROR; | |
175 | num = uscript_getCode("ko", scripts, UPRV_LENGTHOF(scripts), &err); | |
176 | assertEqualScripts("ko scripts: Hang Hani", | |
177 | KOREAN, UPRV_LENGTHOF(KOREAN), scripts, num, err); | |
178 | err = U_ZERO_ERROR; | |
179 | num = uscript_getCode("zh", scripts, UPRV_LENGTHOF(scripts), &err); | |
180 | assertEqualScripts("zh script: Hani", HAN, 1, scripts, num, err); | |
181 | err = U_ZERO_ERROR; | |
182 | num = uscript_getCode("zh-Hant", scripts, UPRV_LENGTHOF(scripts), &err); | |
183 | assertEqualScripts("zh-Hant scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err); | |
184 | err = U_ZERO_ERROR; | |
185 | num = uscript_getCode("zh-TW", scripts, UPRV_LENGTHOF(scripts), &err); | |
186 | assertEqualScripts("zh-TW scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err); | |
187 | ||
188 | // Ambiguous API, but this probably wants to return Latin rather than Rongorongo (Roro). | |
189 | err = U_ZERO_ERROR; | |
190 | num = uscript_getCode("ro-RO", scripts, UPRV_LENGTHOF(scripts), &err); | |
191 | assertEqualScripts("ro-RO script: Latn", LATIN, 1, scripts, num, err); | |
192 | } | |
b75a7d8f A |
193 | |
194 | { | |
195 | UScriptCode testAbbr[]={ | |
196 | /* names should return */ | |
197 | USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN, | |
198 | USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI, | |
199 | }; | |
200 | ||
201 | const char* expectedNames[]={ | |
202 | ||
203 | /* test names */ | |
204 | "Cyrillic","Deseret","Devanagari","Ethiopic","Georgian", | |
205 | "Gothic", "Greek", "Gujarati", | |
57a6839d | 206 | NULL |
b75a7d8f A |
207 | }; |
208 | i=0; | |
2ca993e8 | 209 | while(i<UPRV_LENGTHOF(testAbbr)){ |
b75a7d8f A |
210 | const char* name = uscript_getName(testAbbr[i]); |
211 | if(name == NULL) { | |
212 | log_data_err("Couldn't get script name\n"); | |
213 | return; | |
214 | } | |
215 | numErrors=0; | |
216 | if(strcmp(expectedNames[i],name)!=0){ | |
217 | log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedNames[i]); | |
218 | numErrors++; | |
219 | } | |
220 | if(numErrors > 0){ | |
221 | if(numErrors >0 ){ | |
222 | log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors); | |
223 | } | |
224 | } | |
225 | i++; | |
226 | } | |
227 | ||
228 | } | |
229 | ||
230 | { | |
231 | UScriptCode testAbbr[]={ | |
232 | /* abbr should return */ | |
233 | USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA, | |
234 | USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO, | |
235 | USCRIPT_LATIN, | |
236 | USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN, | |
237 | }; | |
238 | ||
239 | const char* expectedAbbr[]={ | |
240 | /* test abbr */ | |
241 | "Hani", "Hang","Hebr","Hira", | |
242 | "Knda","Kana","Khmr","Laoo", | |
243 | "Latn", | |
244 | "Mlym", "Mong", | |
57a6839d | 245 | NULL |
b75a7d8f A |
246 | }; |
247 | i=0; | |
2ca993e8 | 248 | while(i<UPRV_LENGTHOF(testAbbr)){ |
b75a7d8f A |
249 | const char* name = uscript_getShortName(testAbbr[i]); |
250 | numErrors=0; | |
251 | if(strcmp(expectedAbbr[i],name)!=0){ | |
252 | log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedAbbr[i]); | |
253 | numErrors++; | |
254 | } | |
255 | if(numErrors > 0){ | |
256 | if(numErrors >0 ){ | |
257 | log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors); | |
258 | } | |
259 | } | |
260 | i++; | |
261 | } | |
262 | ||
263 | } | |
264 | /* now test uscript_getScript() API */ | |
265 | { | |
b75a7d8f A |
266 | uint32_t codepoints[] = { |
267 | 0x0000FF9D, /* USCRIPT_KATAKANA*/ | |
268 | 0x0000FFBE, /* USCRIPT_HANGUL*/ | |
269 | 0x0000FFC7, /* USCRIPT_HANGUL*/ | |
270 | 0x0000FFCF, /* USCRIPT_HANGUL*/ | |
271 | 0x0000FFD7, /* USCRIPT_HANGUL*/ | |
272 | 0x0000FFDC, /* USCRIPT_HANGUL*/ | |
273 | 0x00010300, /* USCRIPT_OLD_ITALIC*/ | |
274 | 0x00010330, /* USCRIPT_GOTHIC*/ | |
275 | 0x0001034A, /* USCRIPT_GOTHIC*/ | |
276 | 0x00010400, /* USCRIPT_DESERET*/ | |
277 | 0x00010428, /* USCRIPT_DESERET*/ | |
278 | 0x0001D167, /* USCRIPT_INHERITED*/ | |
279 | 0x0001D17B, /* USCRIPT_INHERITED*/ | |
280 | 0x0001D185, /* USCRIPT_INHERITED*/ | |
281 | 0x0001D1AA, /* USCRIPT_INHERITED*/ | |
282 | 0x00020000, /* USCRIPT_HAN*/ | |
283 | 0x00000D02, /* USCRIPT_MALAYALAM*/ | |
6be67b06 | 284 | 0x00050005, /* USCRIPT_UNKNOWN (new Zzzz value in Unicode 5.0) */ |
b75a7d8f A |
285 | 0x00000000, /* USCRIPT_COMMON*/ |
286 | 0x0001D169, /* USCRIPT_INHERITED*/ | |
287 | 0x0001D182, /* USCRIPT_INHERITED*/ | |
288 | 0x0001D18B, /* USCRIPT_INHERITED*/ | |
289 | 0x0001D1AD, /* USCRIPT_INHERITED*/ | |
b75a7d8f A |
290 | }; |
291 | ||
292 | UScriptCode expected[] = { | |
293 | USCRIPT_KATAKANA , | |
294 | USCRIPT_HANGUL , | |
295 | USCRIPT_HANGUL , | |
296 | USCRIPT_HANGUL , | |
297 | USCRIPT_HANGUL , | |
298 | USCRIPT_HANGUL , | |
299 | USCRIPT_OLD_ITALIC, | |
300 | USCRIPT_GOTHIC , | |
301 | USCRIPT_GOTHIC , | |
302 | USCRIPT_DESERET , | |
303 | USCRIPT_DESERET , | |
304 | USCRIPT_INHERITED, | |
305 | USCRIPT_INHERITED, | |
306 | USCRIPT_INHERITED, | |
307 | USCRIPT_INHERITED, | |
308 | USCRIPT_HAN , | |
309 | USCRIPT_MALAYALAM, | |
73c04bcf | 310 | USCRIPT_UNKNOWN, |
b75a7d8f A |
311 | USCRIPT_COMMON, |
312 | USCRIPT_INHERITED , | |
313 | USCRIPT_INHERITED , | |
314 | USCRIPT_INHERITED , | |
315 | USCRIPT_INHERITED , | |
b75a7d8f A |
316 | }; |
317 | UScriptCode code = USCRIPT_INVALID_CODE; | |
318 | UErrorCode status = U_ZERO_ERROR; | |
319 | UBool passed = TRUE; | |
320 | ||
b331163b | 321 | for(i=0; i<UPRV_LENGTHOF(codepoints); ++i){ |
b75a7d8f A |
322 | code = uscript_getScript(codepoints[i],&status); |
323 | if(U_SUCCESS(status)){ | |
324 | if( code != expected[i] || | |
325 | code != (UScriptCode)u_getIntPropertyValue(codepoints[i], UCHAR_SCRIPT) | |
326 | ) { | |
327 | log_err("uscript_getScript for codepoint \\U%08X failed\n",codepoints[i]); | |
328 | passed = FALSE; | |
329 | } | |
330 | }else{ | |
331 | log_err("uscript_getScript for codepoint \\U%08X failed. Error: %s\n", | |
332 | codepoints[i],u_errorName(status)); | |
333 | break; | |
334 | } | |
b75a7d8f A |
335 | } |
336 | ||
337 | if(passed==FALSE){ | |
338 | log_err("uscript_getScript failed.\n"); | |
339 | } | |
340 | } | |
341 | { | |
342 | UScriptCode code= USCRIPT_INVALID_CODE; | |
343 | UErrorCode status = U_ZERO_ERROR; | |
344 | code = uscript_getScript(0x001D169,&status); | |
345 | if(code != USCRIPT_INHERITED){ | |
346 | log_err("\\U001D169 is not contained in USCRIPT_INHERITED"); | |
347 | } | |
348 | } | |
349 | { | |
350 | UScriptCode code= USCRIPT_INVALID_CODE; | |
351 | UErrorCode status = U_ZERO_ERROR; | |
352 | int32_t err = 0; | |
353 | ||
354 | for(i = 0; i<=0x10ffff; i++){ | |
355 | code = uscript_getScript(i,&status); | |
356 | if(code == USCRIPT_INVALID_CODE){ | |
357 | err++; | |
358 | log_err("uscript_getScript for codepoint \\U%08X failed.\n", i); | |
359 | } | |
360 | } | |
361 | if(err>0){ | |
362 | log_err("uscript_getScript failed for %d codepoints\n", err); | |
363 | } | |
364 | } | |
365 | { | |
366 | for(i=0; (UScriptCode)i< USCRIPT_CODE_LIMIT; i++){ | |
367 | const char* name = uscript_getName((UScriptCode)i); | |
368 | if(name==NULL || strcmp(name,"")==0){ | |
73c04bcf | 369 | log_err("uscript_getName failed for code %i: name is NULL or \"\"\n",i); |
b75a7d8f A |
370 | } |
371 | } | |
372 | } | |
729e4ab9 | 373 | |
73c04bcf A |
374 | { |
375 | /* | |
376 | * These script codes were originally added to ICU pre-3.6, so that ICU would | |
377 | * have all ISO 15924 script codes. ICU was then based on Unicode 4.1. | |
378 | * These script codes were added with only short names because we don't | |
379 | * want to invent long names ourselves. | |
380 | * Unicode 5 and later encode some of these scripts and give them long names. | |
381 | * Whenever this happens, the long script names here need to be updated. | |
382 | */ | |
383 | static const char* expectedLong[] = { | |
b331163b A |
384 | "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs", |
385 | "Egyd", "Egyh", "Egyptian_Hieroglyphs", | |
2ca993e8 | 386 | "Geok", "Hans", "Hant", "Pahawh_Hmong", "Old_Hungarian", "Inds", |
b331163b A |
387 | "Javanese", "Kayah_Li", "Latf", "Latg", |
388 | "Lepcha", "Linear_A", "Mandaic", "Maya", "Meroitic_Hieroglyphs", | |
389 | "Nko", "Old_Turkic", "Old_Permic", "Phags_Pa", "Phoenician", | |
4388f060 | 390 | "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform", |
73c04bcf | 391 | "Zxxx", "Unknown", |
2ca993e8 | 392 | "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "SignWriting", "Sundanese", |
729e4ab9 | 393 | "Moon", "Meetei_Mayek", |
46f4442e | 394 | /* new in ICU 4.0 */ |
4388f060 | 395 | "Imperial_Aramaic", "Avestan", "Chakma", "Kore", |
b331163b A |
396 | "Kaithi", "Manichaean", "Inscriptional_Pahlavi", "Psalter_Pahlavi", "Phlv", |
397 | "Inscriptional_Parthian", "Samaritan", "Tai_Viet", | |
46f4442e | 398 | "Zmth", "Zsym", |
729e4ab9 A |
399 | /* new in ICU 4.4 */ |
400 | "Bamum", "Lisu", "Nkgb", "Old_South_Arabian", | |
401 | /* new in ICU 4.6 */ | |
b331163b A |
402 | "Bassa_Vah", "Duployan", "Elbasan", "Grantha", "Kpel", |
403 | "Loma", "Mende_Kikakui", "Meroitic_Cursive", | |
404 | "Old_North_Arabian", "Nabataean", "Palmyrene", "Khudawadi", "Warang_Citi", | |
4388f060 | 405 | /* new in ICU 4.8 */ |
6be67b06 | 406 | "Afak", "Jurc", "Mro", "Nushu", "Sharada", "Sora_Sompeng", "Takri", "Tangut", "Wole", |
4388f060 | 407 | /* new in ICU 49 */ |
2ca993e8 | 408 | "Anatolian_Hieroglyphs", "Khojki", "Tirhuta", |
57a6839d | 409 | /* new in ICU 52 */ |
b331163b A |
410 | "Caucasian_Albanian", "Mahajani", |
411 | /* new in ICU 54 */ | |
f3c0d7a5 A |
412 | "Ahom", "Hatran", "Modi", "Multani", "Pau_Cin_Hau", "Siddham", |
413 | // new in ICU 58 | |
6be67b06 A |
414 | "Adlam", "Bhaiksuki", "Marchen", "Newa", "Osage", "Hanb", "Jamo", "Zsye", |
415 | // new in ICU 60 | |
0f5d89e8 A |
416 | "Masaram_Gondi", "Soyombo", "Zanabazar_Square", |
417 | // new in ICU 61 | |
418 | "Dogra", "Gunjala_Gondi", "Makasar", "Medefaidrin", | |
419 | "Hanifi_Rohingya", "Sogdian", "Old_Sogdian", | |
73c04bcf A |
420 | }; |
421 | static const char* expectedShort[] = { | |
b331163b A |
422 | "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp", |
423 | "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg", | |
424 | "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx", | |
425 | "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux", | |
73c04bcf | 426 | "Zxxx", "Zzzz", |
46f4442e A |
427 | "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund", |
428 | "Moon", "Mtei", | |
429 | /* new in ICU 4.0 */ | |
430 | "Armi", "Avst", "Cakm", "Kore", | |
431 | "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt", | |
432 | "Zmth", "Zsym", | |
729e4ab9 A |
433 | /* new in ICU 4.4 */ |
434 | "Bamu", "Lisu", "Nkgb", "Sarb", | |
435 | /* new in ICU 4.6 */ | |
436 | "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc", | |
437 | "Narb", "Nbat", "Palm", "Sind", "Wara", | |
4388f060 A |
438 | /* new in ICU 4.8 */ |
439 | "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole", | |
440 | /* new in ICU 49 */ | |
441 | "Hluw", "Khoj", "Tirh", | |
57a6839d | 442 | /* new in ICU 52 */ |
b331163b A |
443 | "Aghb", "Mahj", |
444 | /* new in ICU 54 */ | |
f3c0d7a5 A |
445 | "Ahom", "Hatr", "Modi", "Mult", "Pauc", "Sidd", |
446 | // new in ICU 58 | |
6be67b06 A |
447 | "Adlm", "Bhks", "Marc", "Newa", "Osge", "Hanb", "Jamo", "Zsye", |
448 | // new in ICU 60 | |
0f5d89e8 A |
449 | "Gonm", "Soyo", "Zanb", |
450 | // new in ICU 61 | |
451 | "Dogr", "Gong", "Maka", "Medf", "Rohg", "Sogd", "Sogo", | |
73c04bcf A |
452 | }; |
453 | int32_t j = 0; | |
b331163b | 454 | if(UPRV_LENGTHOF(expectedLong)!=(USCRIPT_CODE_LIMIT-USCRIPT_BALINESE)) { |
729e4ab9 A |
455 | log_err("need to add new script codes in cucdapi.c!\n"); |
456 | return; | |
457 | } | |
73c04bcf A |
458 | for(i=USCRIPT_BALINESE; (UScriptCode)i<USCRIPT_CODE_LIMIT; i++, j++){ |
459 | const char* name = uscript_getName((UScriptCode)i); | |
460 | if(name==NULL || strcmp(name,expectedLong[j])!=0){ | |
461 | log_err("uscript_getName failed for code %i: %s!=%s\n", i, name, expectedLong[j]); | |
462 | } | |
463 | name = uscript_getShortName((UScriptCode)i); | |
464 | if(name==NULL || strcmp(name,expectedShort[j])!=0){ | |
465 | log_err("uscript_getShortName failed for code %i: %s!=%s\n", i, name, expectedShort[j]); | |
466 | } | |
467 | } | |
b331163b | 468 | for(i=0; i<UPRV_LENGTHOF(expectedLong); i++){ |
73c04bcf A |
469 | UScriptCode fillIn[5] = {USCRIPT_INVALID_CODE}; |
470 | UErrorCode status = U_ZERO_ERROR; | |
471 | int32_t len = 0; | |
b331163b | 472 | len = uscript_getCode(expectedShort[i], fillIn, UPRV_LENGTHOF(fillIn), &status); |
73c04bcf A |
473 | if(U_FAILURE(status)){ |
474 | log_err("uscript_getCode failed for script name %s. Error: %s\n",expectedShort[i], u_errorName(status)); | |
475 | } | |
476 | if(len>1){ | |
477 | log_err("uscript_getCode did not return expected number of codes for script %s. EXPECTED: 1 GOT: %i\n", expectedShort[i], len); | |
478 | } | |
479 | if(fillIn[0]!= (UScriptCode)(USCRIPT_BALINESE+i)){ | |
480 | log_err("uscript_getCode did not return expected code for script %s. EXPECTED: %i GOT: %i\n", expectedShort[i], (USCRIPT_BALINESE+i), fillIn[0] ); | |
481 | } | |
482 | } | |
483 | } | |
729e4ab9 A |
484 | |
485 | { | |
486 | /* test characters which have Script_Extensions */ | |
487 | UErrorCode errorCode=U_ZERO_ERROR; | |
488 | if(!( | |
489 | USCRIPT_COMMON==uscript_getScript(0x0640, &errorCode) && | |
490 | USCRIPT_INHERITED==uscript_getScript(0x0650, &errorCode) && | |
491 | USCRIPT_ARABIC==uscript_getScript(0xfdf2, &errorCode)) || | |
492 | U_FAILURE(errorCode) | |
493 | ) { | |
494 | log_err("uscript_getScript(character with Script_Extensions) failed\n"); | |
495 | } | |
496 | } | |
497 | } | |
498 | ||
499 | void TestHasScript() { | |
500 | if(!( | |
501 | !uscript_hasScript(0x063f, USCRIPT_COMMON) && | |
502 | uscript_hasScript(0x063f, USCRIPT_ARABIC) && /* main Script value */ | |
503 | !uscript_hasScript(0x063f, USCRIPT_SYRIAC) && | |
504 | !uscript_hasScript(0x063f, USCRIPT_THAANA)) | |
505 | ) { | |
506 | log_err("uscript_hasScript(U+063F, ...) is wrong\n"); | |
507 | } | |
508 | if(!( | |
4388f060 | 509 | !uscript_hasScript(0x0640, USCRIPT_COMMON) && /* main Script value */ |
729e4ab9 A |
510 | uscript_hasScript(0x0640, USCRIPT_ARABIC) && |
511 | uscript_hasScript(0x0640, USCRIPT_SYRIAC) && | |
512 | !uscript_hasScript(0x0640, USCRIPT_THAANA)) | |
513 | ) { | |
514 | log_err("uscript_hasScript(U+0640, ...) is wrong\n"); | |
515 | } | |
516 | if(!( | |
4388f060 | 517 | !uscript_hasScript(0x0650, USCRIPT_INHERITED) && /* main Script value */ |
729e4ab9 A |
518 | uscript_hasScript(0x0650, USCRIPT_ARABIC) && |
519 | uscript_hasScript(0x0650, USCRIPT_SYRIAC) && | |
520 | !uscript_hasScript(0x0650, USCRIPT_THAANA)) | |
521 | ) { | |
522 | log_err("uscript_hasScript(U+0650, ...) is wrong\n"); | |
523 | } | |
524 | if(!( | |
4388f060 | 525 | !uscript_hasScript(0x0660, USCRIPT_COMMON) && /* main Script value */ |
729e4ab9 A |
526 | uscript_hasScript(0x0660, USCRIPT_ARABIC) && |
527 | !uscript_hasScript(0x0660, USCRIPT_SYRIAC) && | |
528 | uscript_hasScript(0x0660, USCRIPT_THAANA)) | |
529 | ) { | |
530 | log_err("uscript_hasScript(U+0660, ...) is wrong\n"); | |
531 | } | |
532 | if(!( | |
533 | !uscript_hasScript(0xfdf2, USCRIPT_COMMON) && | |
534 | uscript_hasScript(0xfdf2, USCRIPT_ARABIC) && /* main Script value */ | |
535 | !uscript_hasScript(0xfdf2, USCRIPT_SYRIAC) && | |
536 | uscript_hasScript(0xfdf2, USCRIPT_THAANA)) | |
537 | ) { | |
538 | log_err("uscript_hasScript(U+FDF2, ...) is wrong\n"); | |
539 | } | |
4388f060 A |
540 | if(uscript_hasScript(0x0640, 0xaffe)) { |
541 | /* An unguarded implementation might go into an infinite loop. */ | |
542 | log_err("uscript_hasScript(U+0640, bogus 0xaffe) is wrong\n"); | |
543 | } | |
729e4ab9 A |
544 | } |
545 | ||
f3c0d7a5 | 546 | static UBool scriptsContain(UScriptCode scripts[], int32_t length, UScriptCode script) { |
b331163b A |
547 | UBool contain=FALSE; |
548 | int32_t prev=-1, i; | |
549 | for(i=0; i<length; ++i) { | |
550 | int32_t s=scripts[i]; | |
551 | if(s<=prev) { | |
552 | log_err("uscript_getScriptExtensions() not in sorted order: %d %d\n", (int)prev, (int)s); | |
553 | } | |
554 | if(s==script) { contain=TRUE; } | |
555 | } | |
556 | return contain; | |
557 | } | |
558 | ||
729e4ab9 A |
559 | void TestGetScriptExtensions() { |
560 | UScriptCode scripts[20]; | |
561 | int32_t length; | |
562 | UErrorCode errorCode; | |
563 | ||
564 | /* errors and overflows */ | |
565 | errorCode=U_PARSE_ERROR; | |
b331163b | 566 | length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode); |
729e4ab9 | 567 | if(errorCode!=U_PARSE_ERROR) { |
4388f060 | 568 | log_err("uscript_getScriptExtensions(U+0640, U_PARSE_ERROR) did not preserve the UErrorCode - %s\n", |
729e4ab9 A |
569 | u_errorName(errorCode)); |
570 | } | |
571 | errorCode=U_ZERO_ERROR; | |
b331163b | 572 | length=uscript_getScriptExtensions(0x0640, NULL, UPRV_LENGTHOF(scripts), &errorCode); |
729e4ab9 | 573 | if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { |
4388f060 | 574 | log_err("uscript_getScriptExtensions(U+0640, NULL) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n", |
729e4ab9 A |
575 | u_errorName(errorCode)); |
576 | } | |
577 | errorCode=U_ZERO_ERROR; | |
578 | length=uscript_getScriptExtensions(0x0640, scripts, -1, &errorCode); | |
579 | if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
4388f060 | 580 | log_err("uscript_getScriptExtensions(U+0640, capacity<0) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n", |
729e4ab9 A |
581 | u_errorName(errorCode)); |
582 | } | |
583 | errorCode=U_ZERO_ERROR; | |
584 | length=uscript_getScriptExtensions(0x0640, scripts, 0, &errorCode); | |
b331163b A |
585 | if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) { |
586 | log_err("uscript_getScriptExtensions(U+0640, capacity=0: pure preflighting)=%d < 3 - %s\n", | |
729e4ab9 A |
587 | (int)length, u_errorName(errorCode)); |
588 | } | |
589 | errorCode=U_ZERO_ERROR; | |
590 | length=uscript_getScriptExtensions(0x0640, scripts, 1, &errorCode); | |
b331163b A |
591 | if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) { |
592 | log_err("uscript_getScriptExtensions(U+0640, capacity=1: preflighting)=%d < 3 - %s\n", | |
4388f060 A |
593 | (int)length, u_errorName(errorCode)); |
594 | } | |
595 | /* U+063F has only a Script code, no Script_Extensions. */ | |
596 | errorCode=U_ZERO_ERROR; | |
597 | length=uscript_getScriptExtensions(0x063f, scripts, 0, &errorCode); | |
598 | if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=1) { | |
599 | log_err("uscript_getScriptExtensions(U+063F, capacity=0)=%d != 1 - %s\n", | |
600 | (int)length, u_errorName(errorCode)); | |
601 | } | |
602 | ||
603 | /* invalid code points */ | |
604 | errorCode=U_ZERO_ERROR; | |
b331163b | 605 | length=uscript_getScriptExtensions(-1, scripts, UPRV_LENGTHOF(scripts), &errorCode); |
4388f060 A |
606 | if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) { |
607 | log_err("uscript_getScriptExtensions(-1)=%d does not return {UNKNOWN} - %s\n", | |
608 | (int)length, u_errorName(errorCode)); | |
609 | } | |
610 | errorCode=U_ZERO_ERROR; | |
b331163b | 611 | length=uscript_getScriptExtensions(0x110000, scripts, UPRV_LENGTHOF(scripts), &errorCode); |
4388f060 A |
612 | if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) { |
613 | log_err("uscript_getScriptExtensions(0x110000)=%d does not return {UNKNOWN} - %s\n", | |
729e4ab9 A |
614 | (int)length, u_errorName(errorCode)); |
615 | } | |
616 | ||
617 | /* normal usage */ | |
618 | errorCode=U_ZERO_ERROR; | |
4388f060 A |
619 | length=uscript_getScriptExtensions(0x063f, scripts, 1, &errorCode); |
620 | if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_ARABIC) { | |
621 | log_err("uscript_getScriptExtensions(U+063F, capacity=1)=%d does not return {ARABIC} - %s\n", | |
729e4ab9 A |
622 | (int)length, u_errorName(errorCode)); |
623 | } | |
4388f060 | 624 | errorCode=U_ZERO_ERROR; |
b331163b A |
625 | length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode); |
626 | if(U_FAILURE(errorCode) || length<3 || | |
627 | !scriptsContain(scripts, length, USCRIPT_ARABIC) || | |
628 | !scriptsContain(scripts, length, USCRIPT_SYRIAC) || | |
629 | !scriptsContain(scripts, length, USCRIPT_MANDAIC)) { | |
729e4ab9 A |
630 | log_err("uscript_getScriptExtensions(U+0640)=%d failed - %s\n", |
631 | (int)length, u_errorName(errorCode)); | |
632 | } | |
4388f060 | 633 | errorCode=U_ZERO_ERROR; |
b331163b | 634 | length=uscript_getScriptExtensions(0xfdf2, scripts, UPRV_LENGTHOF(scripts), &errorCode); |
729e4ab9 A |
635 | if(U_FAILURE(errorCode) || length!=2 || scripts[0]!=USCRIPT_ARABIC || scripts[1]!=USCRIPT_THAANA) { |
636 | log_err("uscript_getScriptExtensions(U+FDF2)=%d failed - %s\n", | |
637 | (int)length, u_errorName(errorCode)); | |
638 | } | |
4388f060 | 639 | errorCode=U_ZERO_ERROR; |
b331163b | 640 | length=uscript_getScriptExtensions(0xff65, scripts, UPRV_LENGTHOF(scripts), &errorCode); |
729e4ab9 A |
641 | if(U_FAILURE(errorCode) || length!=6 || scripts[0]!=USCRIPT_BOPOMOFO || scripts[5]!=USCRIPT_YI) { |
642 | log_err("uscript_getScriptExtensions(U+FF65)=%d failed - %s\n", | |
643 | (int)length, u_errorName(errorCode)); | |
644 | } | |
46f4442e A |
645 | } |
646 | ||
51004dcb A |
647 | void TestScriptMetadataAPI() { |
648 | /* API & code coverage. More testing in intltest/ucdtest.cpp. */ | |
649 | UErrorCode errorCode=U_ZERO_ERROR; | |
650 | UChar sample[8]; | |
651 | ||
b331163b | 652 | if(uscript_getSampleString(USCRIPT_LATIN, sample, UPRV_LENGTHOF(sample), &errorCode)!=1 || |
51004dcb A |
653 | U_FAILURE(errorCode) || |
654 | uscript_getScript(sample[0], &errorCode)!=USCRIPT_LATIN || | |
655 | sample[1]!=0) { | |
656 | log_err("uscript_getSampleString(Latn) failed - %s\n", u_errorName(errorCode)); | |
657 | } | |
658 | sample[0]=0xfffe; | |
659 | if(uscript_getSampleString(USCRIPT_LATIN, sample, 0, &errorCode)!=1 || | |
660 | errorCode!=U_BUFFER_OVERFLOW_ERROR || | |
661 | sample[0]!=0xfffe) { | |
662 | log_err("uscript_getSampleString(Latn, capacity=0) failed - %s\n", u_errorName(errorCode)); | |
663 | } | |
664 | errorCode=U_ZERO_ERROR; | |
b331163b | 665 | if(uscript_getSampleString(USCRIPT_INVALID_CODE, sample, UPRV_LENGTHOF(sample), &errorCode)!=0 || |
51004dcb A |
666 | U_FAILURE(errorCode) || |
667 | sample[0]!=0) { | |
668 | log_err("uscript_getSampleString(invalid) failed - %s\n", u_errorName(errorCode)); | |
669 | } | |
670 | sample[0]=0xfffe; | |
671 | if(uscript_getSampleString(USCRIPT_CODE_LIMIT, sample, 0, &errorCode)!=0 || | |
672 | errorCode!=U_STRING_NOT_TERMINATED_WARNING || | |
673 | sample[0]!=0xfffe) { | |
674 | log_err("uscript_getSampleString(limit, capacity=0) failed - %s\n", u_errorName(errorCode)); | |
675 | } | |
676 | ||
677 | if(uscript_getUsage(USCRIPT_LATIN)!=USCRIPT_USAGE_RECOMMENDED || | |
6be67b06 A |
678 | // Unicode 10 gives up on "aspirational". |
679 | uscript_getUsage(USCRIPT_YI)!=USCRIPT_USAGE_LIMITED_USE || | |
51004dcb A |
680 | uscript_getUsage(USCRIPT_CHEROKEE)!=USCRIPT_USAGE_LIMITED_USE || |
681 | uscript_getUsage(USCRIPT_COPTIC)!=USCRIPT_USAGE_EXCLUDED || | |
682 | uscript_getUsage(USCRIPT_CIRTH)!=USCRIPT_USAGE_NOT_ENCODED || | |
683 | uscript_getUsage(USCRIPT_INVALID_CODE)!=USCRIPT_USAGE_NOT_ENCODED || | |
684 | uscript_getUsage(USCRIPT_CODE_LIMIT)!=USCRIPT_USAGE_NOT_ENCODED) { | |
685 | log_err("uscript_getUsage() failed\n"); | |
686 | } | |
687 | ||
688 | if(uscript_isRightToLeft(USCRIPT_LATIN) || | |
689 | uscript_isRightToLeft(USCRIPT_CIRTH) || | |
690 | !uscript_isRightToLeft(USCRIPT_ARABIC) || | |
691 | !uscript_isRightToLeft(USCRIPT_HEBREW)) { | |
692 | log_err("uscript_isRightToLeft() failed\n"); | |
693 | } | |
694 | ||
695 | if(uscript_breaksBetweenLetters(USCRIPT_LATIN) || | |
696 | uscript_breaksBetweenLetters(USCRIPT_CIRTH) || | |
697 | !uscript_breaksBetweenLetters(USCRIPT_HAN) || | |
698 | !uscript_breaksBetweenLetters(USCRIPT_THAI)) { | |
699 | log_err("uscript_breaksBetweenLetters() failed\n"); | |
700 | } | |
701 | ||
702 | if(uscript_isCased(USCRIPT_CIRTH) || | |
703 | uscript_isCased(USCRIPT_HAN) || | |
704 | !uscript_isCased(USCRIPT_LATIN) || | |
705 | !uscript_isCased(USCRIPT_GREEK)) { | |
706 | log_err("uscript_isCased() failed\n"); | |
707 | } | |
708 | } | |
709 | ||
46f4442e A |
710 | void TestBinaryValues() { |
711 | /* | |
712 | * Unicode 5.1 explicitly defines binary property value aliases. | |
713 | * Verify that they are all recognized. | |
714 | */ | |
715 | static const char *const falseValues[]={ "N", "No", "F", "False" }; | |
716 | static const char *const trueValues[]={ "Y", "Yes", "T", "True" }; | |
717 | int32_t i; | |
b331163b | 718 | for(i=0; i<UPRV_LENGTHOF(falseValues); ++i) { |
46f4442e | 719 | if(FALSE!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, falseValues[i])) { |
729e4ab9 | 720 | log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=FALSE (Are you missing data?)\n", falseValues[i]); |
46f4442e A |
721 | } |
722 | } | |
b331163b | 723 | for(i=0; i<UPRV_LENGTHOF(trueValues); ++i) { |
46f4442e | 724 | if(TRUE!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, trueValues[i])) { |
729e4ab9 | 725 | log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=TRUE (Are you missing data?)\n", trueValues[i]); |
46f4442e A |
726 | } |
727 | } | |
728 | } |