]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/cucdapi.c
ICU-57163.0.1.tar.gz
[apple/icu.git] / icuSources / test / cintltst / cucdapi.c
CommitLineData
b75a7d8f 1/********************************************************************
2ca993e8 2 * Copyright (c) 1997-2016, International Business Machines
46f4442e 3 * Corporation and others. All Rights Reserved.
b75a7d8f
A
4 ********************************************************************/
5
6#include <string.h>
7#include "unicode/utypes.h"
8#include "unicode/uscript.h"
9#include "unicode/uchar.h"
10#include "cintltst.h"
374ca955 11#include "cucdapi.h"
b331163b 12#include "cmemory.h"
b75a7d8f 13
b331163b
A
14static void scriptsToString(const UScriptCode scripts[], int32_t length, char s[]) {
15 int32_t i;
16 if(length == 0) {
17 strcpy(s, "(no scripts)");
18 return;
19 }
20 s[0] = 0;
21 for(i = 0; i < length; ++i) {
22 if(i > 0) {
23 strcat(s, " ");
24 }
25 strcat(s, uscript_getShortName(scripts[i]));
26 }
27}
28
29static void assertEqualScripts(const char *msg,
30 const UScriptCode scripts1[], int32_t length1,
31 const UScriptCode scripts2[], int32_t length2,
32 UErrorCode errorCode) {
33 char s1[80];
34 char s2[80];
35 if(U_FAILURE(errorCode)) {
36 log_err("Failed: %s - %s\n", msg, u_errorName(errorCode));
37 return;
38 }
39 scriptsToString(scripts1, length1, s1);
40 scriptsToString(scripts2, length2, s2);
41 if(0!=strcmp(s1, s2)) {
42 log_data_err("Failed: %s: expected %s but got %s\n", msg, s1, s2);
43 }
44}
73c04bcf 45
b75a7d8f
A
46void TestUScriptCodeAPI(){
47 int i =0;
48 int numErrors =0;
49 {
50 const char* testNames[]={
51 /* test locale */
52 "en", "en_US", "sr", "ta" , "te_IN",
53 "hi", "he", "ar",
54 /* test abbr */
55 "Hani", "Hang","Hebr","Hira",
56 "Knda","Kana","Khmr","Lao",
57 "Latn",/*"Latf","Latg",*/
58 "Mlym", "Mong",
59
60 /* test names */
61 "CYRILLIC","DESERET","DEVANAGARI","ETHIOPIC","GEORGIAN",
62 "GOTHIC", "GREEK", "GUJARATI", "COMMON", "INHERITED",
63 /* test lower case names */
64 "malayalam", "mongolian", "myanmar", "ogham", "old-italic",
65 "oriya", "runic", "sinhala", "syriac","tamil",
66 "telugu", "thaana", "thai", "tibetan",
67 /* test the bounds*/
68 "tagb", "arabic",
69 /* test bogus */
70 "asfdasd", "5464", "12235",
71 /* test the last index */
72 "zyyy", "YI",
57a6839d 73 NULL
b75a7d8f
A
74 };
75 UScriptCode expected[] ={
76 /* locales should return */
77 USCRIPT_LATIN, USCRIPT_LATIN, USCRIPT_CYRILLIC, USCRIPT_TAMIL, USCRIPT_TELUGU,
78 USCRIPT_DEVANAGARI, USCRIPT_HEBREW, USCRIPT_ARABIC,
79 /* abbr should return */
80 USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
81 USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
82 USCRIPT_LATIN,/* USCRIPT_LATIN, USCRIPT_LATIN,*/
83 USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
84 /* names should return */
85 USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
86 USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI, USCRIPT_COMMON, USCRIPT_INHERITED,
87 /* lower case names should return */
88 USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN, USCRIPT_MYANMAR, USCRIPT_OGHAM, USCRIPT_OLD_ITALIC,
89 USCRIPT_ORIYA, USCRIPT_RUNIC, USCRIPT_SINHALA, USCRIPT_SYRIAC, USCRIPT_TAMIL,
90 USCRIPT_TELUGU, USCRIPT_THAANA, USCRIPT_THAI, USCRIPT_TIBETAN,
91 /* bounds */
92 USCRIPT_TAGBANWA, USCRIPT_ARABIC,
93 /* bogus names should return invalid code */
94 USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE,
95 USCRIPT_COMMON, USCRIPT_YI,
96 };
97
98 UErrorCode err = U_ZERO_ERROR;
99
100 const int32_t capacity = 10;
101
57a6839d 102 for( ; testNames[i]!=NULL; i++){
b75a7d8f
A
103 UScriptCode script[10]={USCRIPT_INVALID_CODE};
104 uscript_getCode(testNames[i],script,capacity, &err);
105 if( script[0] != expected[i]){
729e4ab9 106 log_data_err("Error getting script code Got: %i Expected: %i for name %s (Error code does not propagate if data is not present. Are you missing data?)\n",
b75a7d8f
A
107 script[0],expected[i],testNames[i]);
108 numErrors++;
109 }
110 }
111 if(numErrors >0 ){
112 log_data_err("Errors uchar_getScriptCode() : %i \n",numErrors);
113 }
114 }
115
116 {
117 UErrorCode err = U_ZERO_ERROR;
118 int32_t capacity=0;
46f4442e 119 int32_t j;
73c04bcf 120 UScriptCode jaCode[]={USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
b75a7d8f
A
121 UScriptCode script[10]={USCRIPT_INVALID_CODE};
122 int32_t num = uscript_getCode("ja",script,capacity, &err);
123 /* preflight */
124 if(err==U_BUFFER_OVERFLOW_ERROR){
125 err = U_ZERO_ERROR;
126 capacity = 10;
127 num = uscript_getCode("ja",script,capacity, &err);
2ca993e8 128 if(num!=UPRV_LENGTHOF(jaCode)){
46f4442e 129 log_err("Errors uscript_getScriptCode() for Japanese locale: num=%d, expected %d \n",
2ca993e8 130 num, UPRV_LENGTHOF(jaCode));
46f4442e 131 }
2ca993e8 132 for(j=0;j<UPRV_LENGTHOF(jaCode);j++) {
46f4442e
A
133 if(script[j]!=jaCode[j]) {
134 log_err("Japanese locale: code #%d was %d (%s) but expected %d (%s)\n", j,
135 script[j], uscript_getName(script[j]),
136 jaCode[j], uscript_getName(jaCode[j]));
137
138 }
b75a7d8f
A
139 }
140 }else{
141 log_data_err("Errors in uscript_getScriptCode() expected error : %s got: %s \n",
142 "U_BUFFER_OVERFLOW_ERROR",
143 u_errorName(err));
144 }
145
146 }
b331163b
A
147 {
148 static const UScriptCode LATIN[1] = { USCRIPT_LATIN };
149 static const UScriptCode CYRILLIC[1] = { USCRIPT_CYRILLIC };
150 static const UScriptCode DEVANAGARI[1] = { USCRIPT_DEVANAGARI };
151 static const UScriptCode HAN[1] = { USCRIPT_HAN };
152 static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
153 static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN };
154 static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO };
155 UScriptCode scripts[5];
156 UErrorCode err;
157 int32_t num;
158
159 // Should work regardless of whether we have locale data for the language.
160 err = U_ZERO_ERROR;
161 num = uscript_getCode("tg", scripts, UPRV_LENGTHOF(scripts), &err);
162 assertEqualScripts("tg script: Cyrl", CYRILLIC, 1, scripts, num, err); // Tajik
163 err = U_ZERO_ERROR;
164 num = uscript_getCode("xsr", scripts, UPRV_LENGTHOF(scripts), &err);
165 assertEqualScripts("xsr script: Deva", DEVANAGARI, 1, scripts, num, err); // Sherpa
166
167 // Multi-script languages.
168 err = U_ZERO_ERROR;
169 num = uscript_getCode("ja", scripts, UPRV_LENGTHOF(scripts), &err);
170 assertEqualScripts("ja scripts: Kana Hira Hani",
171 JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, num, err);
172 err = U_ZERO_ERROR;
173 num = uscript_getCode("ko", scripts, UPRV_LENGTHOF(scripts), &err);
174 assertEqualScripts("ko scripts: Hang Hani",
175 KOREAN, UPRV_LENGTHOF(KOREAN), scripts, num, err);
176 err = U_ZERO_ERROR;
177 num = uscript_getCode("zh", scripts, UPRV_LENGTHOF(scripts), &err);
178 assertEqualScripts("zh script: Hani", HAN, 1, scripts, num, err);
179 err = U_ZERO_ERROR;
180 num = uscript_getCode("zh-Hant", scripts, UPRV_LENGTHOF(scripts), &err);
181 assertEqualScripts("zh-Hant scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err);
182 err = U_ZERO_ERROR;
183 num = uscript_getCode("zh-TW", scripts, UPRV_LENGTHOF(scripts), &err);
184 assertEqualScripts("zh-TW scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err);
185
186 // Ambiguous API, but this probably wants to return Latin rather than Rongorongo (Roro).
187 err = U_ZERO_ERROR;
188 num = uscript_getCode("ro-RO", scripts, UPRV_LENGTHOF(scripts), &err);
189 assertEqualScripts("ro-RO script: Latn", LATIN, 1, scripts, num, err);
190 }
b75a7d8f
A
191
192 {
193 UScriptCode testAbbr[]={
194 /* names should return */
195 USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
196 USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI,
197 };
198
199 const char* expectedNames[]={
200
201 /* test names */
202 "Cyrillic","Deseret","Devanagari","Ethiopic","Georgian",
203 "Gothic", "Greek", "Gujarati",
57a6839d 204 NULL
b75a7d8f
A
205 };
206 i=0;
2ca993e8 207 while(i<UPRV_LENGTHOF(testAbbr)){
b75a7d8f
A
208 const char* name = uscript_getName(testAbbr[i]);
209 if(name == NULL) {
210 log_data_err("Couldn't get script name\n");
211 return;
212 }
213 numErrors=0;
214 if(strcmp(expectedNames[i],name)!=0){
215 log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedNames[i]);
216 numErrors++;
217 }
218 if(numErrors > 0){
219 if(numErrors >0 ){
220 log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors);
221 }
222 }
223 i++;
224 }
225
226 }
227
228 {
229 UScriptCode testAbbr[]={
230 /* abbr should return */
231 USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
232 USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
233 USCRIPT_LATIN,
234 USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
235 };
236
237 const char* expectedAbbr[]={
238 /* test abbr */
239 "Hani", "Hang","Hebr","Hira",
240 "Knda","Kana","Khmr","Laoo",
241 "Latn",
242 "Mlym", "Mong",
57a6839d 243 NULL
b75a7d8f
A
244 };
245 i=0;
2ca993e8 246 while(i<UPRV_LENGTHOF(testAbbr)){
b75a7d8f
A
247 const char* name = uscript_getShortName(testAbbr[i]);
248 numErrors=0;
249 if(strcmp(expectedAbbr[i],name)!=0){
250 log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedAbbr[i]);
251 numErrors++;
252 }
253 if(numErrors > 0){
254 if(numErrors >0 ){
255 log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors);
256 }
257 }
258 i++;
259 }
260
261 }
262 /* now test uscript_getScript() API */
263 {
b75a7d8f
A
264 uint32_t codepoints[] = {
265 0x0000FF9D, /* USCRIPT_KATAKANA*/
266 0x0000FFBE, /* USCRIPT_HANGUL*/
267 0x0000FFC7, /* USCRIPT_HANGUL*/
268 0x0000FFCF, /* USCRIPT_HANGUL*/
269 0x0000FFD7, /* USCRIPT_HANGUL*/
270 0x0000FFDC, /* USCRIPT_HANGUL*/
271 0x00010300, /* USCRIPT_OLD_ITALIC*/
272 0x00010330, /* USCRIPT_GOTHIC*/
273 0x0001034A, /* USCRIPT_GOTHIC*/
274 0x00010400, /* USCRIPT_DESERET*/
275 0x00010428, /* USCRIPT_DESERET*/
276 0x0001D167, /* USCRIPT_INHERITED*/
277 0x0001D17B, /* USCRIPT_INHERITED*/
278 0x0001D185, /* USCRIPT_INHERITED*/
279 0x0001D1AA, /* USCRIPT_INHERITED*/
280 0x00020000, /* USCRIPT_HAN*/
281 0x00000D02, /* USCRIPT_MALAYALAM*/
73c04bcf 282 0x00000D00, /* USCRIPT_UNKNOWN (new Zzzz value in Unicode 5.0) */
b75a7d8f
A
283 0x00000000, /* USCRIPT_COMMON*/
284 0x0001D169, /* USCRIPT_INHERITED*/
285 0x0001D182, /* USCRIPT_INHERITED*/
286 0x0001D18B, /* USCRIPT_INHERITED*/
287 0x0001D1AD, /* USCRIPT_INHERITED*/
b75a7d8f
A
288 };
289
290 UScriptCode expected[] = {
291 USCRIPT_KATAKANA ,
292 USCRIPT_HANGUL ,
293 USCRIPT_HANGUL ,
294 USCRIPT_HANGUL ,
295 USCRIPT_HANGUL ,
296 USCRIPT_HANGUL ,
297 USCRIPT_OLD_ITALIC,
298 USCRIPT_GOTHIC ,
299 USCRIPT_GOTHIC ,
300 USCRIPT_DESERET ,
301 USCRIPT_DESERET ,
302 USCRIPT_INHERITED,
303 USCRIPT_INHERITED,
304 USCRIPT_INHERITED,
305 USCRIPT_INHERITED,
306 USCRIPT_HAN ,
307 USCRIPT_MALAYALAM,
73c04bcf 308 USCRIPT_UNKNOWN,
b75a7d8f
A
309 USCRIPT_COMMON,
310 USCRIPT_INHERITED ,
311 USCRIPT_INHERITED ,
312 USCRIPT_INHERITED ,
313 USCRIPT_INHERITED ,
b75a7d8f
A
314 };
315 UScriptCode code = USCRIPT_INVALID_CODE;
316 UErrorCode status = U_ZERO_ERROR;
317 UBool passed = TRUE;
318
b331163b 319 for(i=0; i<UPRV_LENGTHOF(codepoints); ++i){
b75a7d8f
A
320 code = uscript_getScript(codepoints[i],&status);
321 if(U_SUCCESS(status)){
322 if( code != expected[i] ||
323 code != (UScriptCode)u_getIntPropertyValue(codepoints[i], UCHAR_SCRIPT)
324 ) {
325 log_err("uscript_getScript for codepoint \\U%08X failed\n",codepoints[i]);
326 passed = FALSE;
327 }
328 }else{
329 log_err("uscript_getScript for codepoint \\U%08X failed. Error: %s\n",
330 codepoints[i],u_errorName(status));
331 break;
332 }
b75a7d8f
A
333 }
334
335 if(passed==FALSE){
336 log_err("uscript_getScript failed.\n");
337 }
338 }
339 {
340 UScriptCode code= USCRIPT_INVALID_CODE;
341 UErrorCode status = U_ZERO_ERROR;
342 code = uscript_getScript(0x001D169,&status);
343 if(code != USCRIPT_INHERITED){
344 log_err("\\U001D169 is not contained in USCRIPT_INHERITED");
345 }
346 }
347 {
348 UScriptCode code= USCRIPT_INVALID_CODE;
349 UErrorCode status = U_ZERO_ERROR;
350 int32_t err = 0;
351
352 for(i = 0; i<=0x10ffff; i++){
353 code = uscript_getScript(i,&status);
354 if(code == USCRIPT_INVALID_CODE){
355 err++;
356 log_err("uscript_getScript for codepoint \\U%08X failed.\n", i);
357 }
358 }
359 if(err>0){
360 log_err("uscript_getScript failed for %d codepoints\n", err);
361 }
362 }
363 {
364 for(i=0; (UScriptCode)i< USCRIPT_CODE_LIMIT; i++){
365 const char* name = uscript_getName((UScriptCode)i);
366 if(name==NULL || strcmp(name,"")==0){
73c04bcf 367 log_err("uscript_getName failed for code %i: name is NULL or \"\"\n",i);
b75a7d8f
A
368 }
369 }
370 }
729e4ab9 371
73c04bcf
A
372 {
373 /*
374 * These script codes were originally added to ICU pre-3.6, so that ICU would
375 * have all ISO 15924 script codes. ICU was then based on Unicode 4.1.
376 * These script codes were added with only short names because we don't
377 * want to invent long names ourselves.
378 * Unicode 5 and later encode some of these scripts and give them long names.
379 * Whenever this happens, the long script names here need to be updated.
380 */
381 static const char* expectedLong[] = {
b331163b
A
382 "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs",
383 "Egyd", "Egyh", "Egyptian_Hieroglyphs",
2ca993e8 384 "Geok", "Hans", "Hant", "Pahawh_Hmong", "Old_Hungarian", "Inds",
b331163b
A
385 "Javanese", "Kayah_Li", "Latf", "Latg",
386 "Lepcha", "Linear_A", "Mandaic", "Maya", "Meroitic_Hieroglyphs",
387 "Nko", "Old_Turkic", "Old_Permic", "Phags_Pa", "Phoenician",
4388f060 388 "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
73c04bcf 389 "Zxxx", "Unknown",
2ca993e8 390 "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "SignWriting", "Sundanese",
729e4ab9 391 "Moon", "Meetei_Mayek",
46f4442e 392 /* new in ICU 4.0 */
4388f060 393 "Imperial_Aramaic", "Avestan", "Chakma", "Kore",
b331163b
A
394 "Kaithi", "Manichaean", "Inscriptional_Pahlavi", "Psalter_Pahlavi", "Phlv",
395 "Inscriptional_Parthian", "Samaritan", "Tai_Viet",
46f4442e 396 "Zmth", "Zsym",
729e4ab9
A
397 /* new in ICU 4.4 */
398 "Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
399 /* new in ICU 4.6 */
b331163b
A
400 "Bassa_Vah", "Duployan", "Elbasan", "Grantha", "Kpel",
401 "Loma", "Mende_Kikakui", "Meroitic_Cursive",
402 "Old_North_Arabian", "Nabataean", "Palmyrene", "Khudawadi", "Warang_Citi",
4388f060 403 /* new in ICU 4.8 */
b331163b 404 "Afak", "Jurc", "Mro", "Nshu", "Sharada", "Sora_Sompeng", "Takri", "Tang", "Wole",
4388f060 405 /* new in ICU 49 */
2ca993e8 406 "Anatolian_Hieroglyphs", "Khojki", "Tirhuta",
57a6839d 407 /* new in ICU 52 */
b331163b
A
408 "Caucasian_Albanian", "Mahajani",
409 /* new in ICU 54 */
2ca993e8 410 "Ahom", "Hatran", "Modi", "Multani", "Pau_Cin_Hau", "Siddham"
73c04bcf
A
411 };
412 static const char* expectedShort[] = {
b331163b
A
413 "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
414 "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg",
415 "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx",
416 "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux",
73c04bcf 417 "Zxxx", "Zzzz",
46f4442e
A
418 "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
419 "Moon", "Mtei",
420 /* new in ICU 4.0 */
421 "Armi", "Avst", "Cakm", "Kore",
422 "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt",
423 "Zmth", "Zsym",
729e4ab9
A
424 /* new in ICU 4.4 */
425 "Bamu", "Lisu", "Nkgb", "Sarb",
426 /* new in ICU 4.6 */
427 "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
428 "Narb", "Nbat", "Palm", "Sind", "Wara",
4388f060
A
429 /* new in ICU 4.8 */
430 "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
431 /* new in ICU 49 */
432 "Hluw", "Khoj", "Tirh",
57a6839d 433 /* new in ICU 52 */
b331163b
A
434 "Aghb", "Mahj",
435 /* new in ICU 54 */
436 "Ahom", "Hatr", "Modi", "Mult", "Pauc", "Sidd"
73c04bcf
A
437 };
438 int32_t j = 0;
b331163b 439 if(UPRV_LENGTHOF(expectedLong)!=(USCRIPT_CODE_LIMIT-USCRIPT_BALINESE)) {
729e4ab9
A
440 log_err("need to add new script codes in cucdapi.c!\n");
441 return;
442 }
73c04bcf
A
443 for(i=USCRIPT_BALINESE; (UScriptCode)i<USCRIPT_CODE_LIMIT; i++, j++){
444 const char* name = uscript_getName((UScriptCode)i);
445 if(name==NULL || strcmp(name,expectedLong[j])!=0){
446 log_err("uscript_getName failed for code %i: %s!=%s\n", i, name, expectedLong[j]);
447 }
448 name = uscript_getShortName((UScriptCode)i);
449 if(name==NULL || strcmp(name,expectedShort[j])!=0){
450 log_err("uscript_getShortName failed for code %i: %s!=%s\n", i, name, expectedShort[j]);
451 }
452 }
b331163b 453 for(i=0; i<UPRV_LENGTHOF(expectedLong); i++){
73c04bcf
A
454 UScriptCode fillIn[5] = {USCRIPT_INVALID_CODE};
455 UErrorCode status = U_ZERO_ERROR;
456 int32_t len = 0;
b331163b 457 len = uscript_getCode(expectedShort[i], fillIn, UPRV_LENGTHOF(fillIn), &status);
73c04bcf
A
458 if(U_FAILURE(status)){
459 log_err("uscript_getCode failed for script name %s. Error: %s\n",expectedShort[i], u_errorName(status));
460 }
461 if(len>1){
462 log_err("uscript_getCode did not return expected number of codes for script %s. EXPECTED: 1 GOT: %i\n", expectedShort[i], len);
463 }
464 if(fillIn[0]!= (UScriptCode)(USCRIPT_BALINESE+i)){
465 log_err("uscript_getCode did not return expected code for script %s. EXPECTED: %i GOT: %i\n", expectedShort[i], (USCRIPT_BALINESE+i), fillIn[0] );
466 }
467 }
468 }
729e4ab9
A
469
470 {
471 /* test characters which have Script_Extensions */
472 UErrorCode errorCode=U_ZERO_ERROR;
473 if(!(
474 USCRIPT_COMMON==uscript_getScript(0x0640, &errorCode) &&
475 USCRIPT_INHERITED==uscript_getScript(0x0650, &errorCode) &&
476 USCRIPT_ARABIC==uscript_getScript(0xfdf2, &errorCode)) ||
477 U_FAILURE(errorCode)
478 ) {
479 log_err("uscript_getScript(character with Script_Extensions) failed\n");
480 }
481 }
482}
483
484void TestHasScript() {
485 if(!(
486 !uscript_hasScript(0x063f, USCRIPT_COMMON) &&
487 uscript_hasScript(0x063f, USCRIPT_ARABIC) && /* main Script value */
488 !uscript_hasScript(0x063f, USCRIPT_SYRIAC) &&
489 !uscript_hasScript(0x063f, USCRIPT_THAANA))
490 ) {
491 log_err("uscript_hasScript(U+063F, ...) is wrong\n");
492 }
493 if(!(
4388f060 494 !uscript_hasScript(0x0640, USCRIPT_COMMON) && /* main Script value */
729e4ab9
A
495 uscript_hasScript(0x0640, USCRIPT_ARABIC) &&
496 uscript_hasScript(0x0640, USCRIPT_SYRIAC) &&
497 !uscript_hasScript(0x0640, USCRIPT_THAANA))
498 ) {
499 log_err("uscript_hasScript(U+0640, ...) is wrong\n");
500 }
501 if(!(
4388f060 502 !uscript_hasScript(0x0650, USCRIPT_INHERITED) && /* main Script value */
729e4ab9
A
503 uscript_hasScript(0x0650, USCRIPT_ARABIC) &&
504 uscript_hasScript(0x0650, USCRIPT_SYRIAC) &&
505 !uscript_hasScript(0x0650, USCRIPT_THAANA))
506 ) {
507 log_err("uscript_hasScript(U+0650, ...) is wrong\n");
508 }
509 if(!(
4388f060 510 !uscript_hasScript(0x0660, USCRIPT_COMMON) && /* main Script value */
729e4ab9
A
511 uscript_hasScript(0x0660, USCRIPT_ARABIC) &&
512 !uscript_hasScript(0x0660, USCRIPT_SYRIAC) &&
513 uscript_hasScript(0x0660, USCRIPT_THAANA))
514 ) {
515 log_err("uscript_hasScript(U+0660, ...) is wrong\n");
516 }
517 if(!(
518 !uscript_hasScript(0xfdf2, USCRIPT_COMMON) &&
519 uscript_hasScript(0xfdf2, USCRIPT_ARABIC) && /* main Script value */
520 !uscript_hasScript(0xfdf2, USCRIPT_SYRIAC) &&
521 uscript_hasScript(0xfdf2, USCRIPT_THAANA))
522 ) {
523 log_err("uscript_hasScript(U+FDF2, ...) is wrong\n");
524 }
4388f060
A
525 if(uscript_hasScript(0x0640, 0xaffe)) {
526 /* An unguarded implementation might go into an infinite loop. */
527 log_err("uscript_hasScript(U+0640, bogus 0xaffe) is wrong\n");
528 }
729e4ab9
A
529}
530
b331163b
A
531static UBool scriptsContain(int32_t scripts[], int32_t length, int32_t script) {
532 UBool contain=FALSE;
533 int32_t prev=-1, i;
534 for(i=0; i<length; ++i) {
535 int32_t s=scripts[i];
536 if(s<=prev) {
537 log_err("uscript_getScriptExtensions() not in sorted order: %d %d\n", (int)prev, (int)s);
538 }
539 if(s==script) { contain=TRUE; }
540 }
541 return contain;
542}
543
729e4ab9
A
544void TestGetScriptExtensions() {
545 UScriptCode scripts[20];
546 int32_t length;
547 UErrorCode errorCode;
548
549 /* errors and overflows */
550 errorCode=U_PARSE_ERROR;
b331163b 551 length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode);
729e4ab9 552 if(errorCode!=U_PARSE_ERROR) {
4388f060 553 log_err("uscript_getScriptExtensions(U+0640, U_PARSE_ERROR) did not preserve the UErrorCode - %s\n",
729e4ab9
A
554 u_errorName(errorCode));
555 }
556 errorCode=U_ZERO_ERROR;
b331163b 557 length=uscript_getScriptExtensions(0x0640, NULL, UPRV_LENGTHOF(scripts), &errorCode);
729e4ab9 558 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
4388f060 559 log_err("uscript_getScriptExtensions(U+0640, NULL) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
729e4ab9
A
560 u_errorName(errorCode));
561 }
562 errorCode=U_ZERO_ERROR;
563 length=uscript_getScriptExtensions(0x0640, scripts, -1, &errorCode);
564 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
4388f060 565 log_err("uscript_getScriptExtensions(U+0640, capacity<0) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
729e4ab9
A
566 u_errorName(errorCode));
567 }
568 errorCode=U_ZERO_ERROR;
569 length=uscript_getScriptExtensions(0x0640, scripts, 0, &errorCode);
b331163b
A
570 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) {
571 log_err("uscript_getScriptExtensions(U+0640, capacity=0: pure preflighting)=%d < 3 - %s\n",
729e4ab9
A
572 (int)length, u_errorName(errorCode));
573 }
574 errorCode=U_ZERO_ERROR;
575 length=uscript_getScriptExtensions(0x0640, scripts, 1, &errorCode);
b331163b
A
576 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) {
577 log_err("uscript_getScriptExtensions(U+0640, capacity=1: preflighting)=%d < 3 - %s\n",
4388f060
A
578 (int)length, u_errorName(errorCode));
579 }
580 /* U+063F has only a Script code, no Script_Extensions. */
581 errorCode=U_ZERO_ERROR;
582 length=uscript_getScriptExtensions(0x063f, scripts, 0, &errorCode);
583 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=1) {
584 log_err("uscript_getScriptExtensions(U+063F, capacity=0)=%d != 1 - %s\n",
585 (int)length, u_errorName(errorCode));
586 }
587
588 /* invalid code points */
589 errorCode=U_ZERO_ERROR;
b331163b 590 length=uscript_getScriptExtensions(-1, scripts, UPRV_LENGTHOF(scripts), &errorCode);
4388f060
A
591 if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
592 log_err("uscript_getScriptExtensions(-1)=%d does not return {UNKNOWN} - %s\n",
593 (int)length, u_errorName(errorCode));
594 }
595 errorCode=U_ZERO_ERROR;
b331163b 596 length=uscript_getScriptExtensions(0x110000, scripts, UPRV_LENGTHOF(scripts), &errorCode);
4388f060
A
597 if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
598 log_err("uscript_getScriptExtensions(0x110000)=%d does not return {UNKNOWN} - %s\n",
729e4ab9
A
599 (int)length, u_errorName(errorCode));
600 }
601
602 /* normal usage */
603 errorCode=U_ZERO_ERROR;
4388f060
A
604 length=uscript_getScriptExtensions(0x063f, scripts, 1, &errorCode);
605 if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_ARABIC) {
606 log_err("uscript_getScriptExtensions(U+063F, capacity=1)=%d does not return {ARABIC} - %s\n",
729e4ab9
A
607 (int)length, u_errorName(errorCode));
608 }
4388f060 609 errorCode=U_ZERO_ERROR;
b331163b
A
610 length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode);
611 if(U_FAILURE(errorCode) || length<3 ||
612 !scriptsContain(scripts, length, USCRIPT_ARABIC) ||
613 !scriptsContain(scripts, length, USCRIPT_SYRIAC) ||
614 !scriptsContain(scripts, length, USCRIPT_MANDAIC)) {
729e4ab9
A
615 log_err("uscript_getScriptExtensions(U+0640)=%d failed - %s\n",
616 (int)length, u_errorName(errorCode));
617 }
4388f060 618 errorCode=U_ZERO_ERROR;
b331163b 619 length=uscript_getScriptExtensions(0xfdf2, scripts, UPRV_LENGTHOF(scripts), &errorCode);
729e4ab9
A
620 if(U_FAILURE(errorCode) || length!=2 || scripts[0]!=USCRIPT_ARABIC || scripts[1]!=USCRIPT_THAANA) {
621 log_err("uscript_getScriptExtensions(U+FDF2)=%d failed - %s\n",
622 (int)length, u_errorName(errorCode));
623 }
4388f060 624 errorCode=U_ZERO_ERROR;
b331163b 625 length=uscript_getScriptExtensions(0xff65, scripts, UPRV_LENGTHOF(scripts), &errorCode);
729e4ab9
A
626 if(U_FAILURE(errorCode) || length!=6 || scripts[0]!=USCRIPT_BOPOMOFO || scripts[5]!=USCRIPT_YI) {
627 log_err("uscript_getScriptExtensions(U+FF65)=%d failed - %s\n",
628 (int)length, u_errorName(errorCode));
629 }
46f4442e
A
630}
631
51004dcb
A
632void TestScriptMetadataAPI() {
633 /* API & code coverage. More testing in intltest/ucdtest.cpp. */
634 UErrorCode errorCode=U_ZERO_ERROR;
635 UChar sample[8];
636
b331163b 637 if(uscript_getSampleString(USCRIPT_LATIN, sample, UPRV_LENGTHOF(sample), &errorCode)!=1 ||
51004dcb
A
638 U_FAILURE(errorCode) ||
639 uscript_getScript(sample[0], &errorCode)!=USCRIPT_LATIN ||
640 sample[1]!=0) {
641 log_err("uscript_getSampleString(Latn) failed - %s\n", u_errorName(errorCode));
642 }
643 sample[0]=0xfffe;
644 if(uscript_getSampleString(USCRIPT_LATIN, sample, 0, &errorCode)!=1 ||
645 errorCode!=U_BUFFER_OVERFLOW_ERROR ||
646 sample[0]!=0xfffe) {
647 log_err("uscript_getSampleString(Latn, capacity=0) failed - %s\n", u_errorName(errorCode));
648 }
649 errorCode=U_ZERO_ERROR;
b331163b 650 if(uscript_getSampleString(USCRIPT_INVALID_CODE, sample, UPRV_LENGTHOF(sample), &errorCode)!=0 ||
51004dcb
A
651 U_FAILURE(errorCode) ||
652 sample[0]!=0) {
653 log_err("uscript_getSampleString(invalid) failed - %s\n", u_errorName(errorCode));
654 }
655 sample[0]=0xfffe;
656 if(uscript_getSampleString(USCRIPT_CODE_LIMIT, sample, 0, &errorCode)!=0 ||
657 errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
658 sample[0]!=0xfffe) {
659 log_err("uscript_getSampleString(limit, capacity=0) failed - %s\n", u_errorName(errorCode));
660 }
661
662 if(uscript_getUsage(USCRIPT_LATIN)!=USCRIPT_USAGE_RECOMMENDED ||
663 uscript_getUsage(USCRIPT_YI)!=USCRIPT_USAGE_ASPIRATIONAL ||
664 uscript_getUsage(USCRIPT_CHEROKEE)!=USCRIPT_USAGE_LIMITED_USE ||
665 uscript_getUsage(USCRIPT_COPTIC)!=USCRIPT_USAGE_EXCLUDED ||
666 uscript_getUsage(USCRIPT_CIRTH)!=USCRIPT_USAGE_NOT_ENCODED ||
667 uscript_getUsage(USCRIPT_INVALID_CODE)!=USCRIPT_USAGE_NOT_ENCODED ||
668 uscript_getUsage(USCRIPT_CODE_LIMIT)!=USCRIPT_USAGE_NOT_ENCODED) {
669 log_err("uscript_getUsage() failed\n");
670 }
671
672 if(uscript_isRightToLeft(USCRIPT_LATIN) ||
673 uscript_isRightToLeft(USCRIPT_CIRTH) ||
674 !uscript_isRightToLeft(USCRIPT_ARABIC) ||
675 !uscript_isRightToLeft(USCRIPT_HEBREW)) {
676 log_err("uscript_isRightToLeft() failed\n");
677 }
678
679 if(uscript_breaksBetweenLetters(USCRIPT_LATIN) ||
680 uscript_breaksBetweenLetters(USCRIPT_CIRTH) ||
681 !uscript_breaksBetweenLetters(USCRIPT_HAN) ||
682 !uscript_breaksBetweenLetters(USCRIPT_THAI)) {
683 log_err("uscript_breaksBetweenLetters() failed\n");
684 }
685
686 if(uscript_isCased(USCRIPT_CIRTH) ||
687 uscript_isCased(USCRIPT_HAN) ||
688 !uscript_isCased(USCRIPT_LATIN) ||
689 !uscript_isCased(USCRIPT_GREEK)) {
690 log_err("uscript_isCased() failed\n");
691 }
692}
693
46f4442e
A
694void TestBinaryValues() {
695 /*
696 * Unicode 5.1 explicitly defines binary property value aliases.
697 * Verify that they are all recognized.
698 */
699 static const char *const falseValues[]={ "N", "No", "F", "False" };
700 static const char *const trueValues[]={ "Y", "Yes", "T", "True" };
701 int32_t i;
b331163b 702 for(i=0; i<UPRV_LENGTHOF(falseValues); ++i) {
46f4442e 703 if(FALSE!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, falseValues[i])) {
729e4ab9 704 log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=FALSE (Are you missing data?)\n", falseValues[i]);
46f4442e
A
705 }
706 }
b331163b 707 for(i=0; i<UPRV_LENGTHOF(trueValues); ++i) {
46f4442e 708 if(TRUE!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, trueValues[i])) {
729e4ab9 709 log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=TRUE (Are you missing data?)\n", trueValues[i]);
46f4442e
A
710 }
711 }
712}