]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/cucdtst.c
ICU-491.11.3.tar.gz
[apple/icu.git] / icuSources / test / cintltst / cucdtst.c
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2011, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /*******************************************************************************
7 *
8 * File CUCDTST.C
9 *
10 * Modification History:
11 * Name Description
12 * Madhu Katragadda Ported for C API, added tests for string functions
13 ********************************************************************************
14 */
15
16 #include <string.h>
17 #include <math.h>
18 #include <stdlib.h>
19
20 #include "unicode/utypes.h"
21 #include "unicode/uchar.h"
22 #include "unicode/putil.h"
23 #include "unicode/ustring.h"
24 #include "unicode/uloc.h"
25 #include "unicode/unorm2.h"
26
27 #include "cintltst.h"
28 #include "putilimp.h"
29 #include "uparse.h"
30 #include "ucase.h"
31 #include "ubidi_props.h"
32 #include "uprops.h"
33 #include "uset_imp.h"
34 #include "usc_impl.h"
35 #include "udatamem.h" /* for testing ucase_openBinary() */
36 #include "cucdapi.h"
37
38 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
39
40 /* prototypes --------------------------------------------------------------- */
41
42 static void TestUpperLower(void);
43 static void TestLetterNumber(void);
44 static void TestMisc(void);
45 static void TestPOSIX(void);
46 static void TestControlPrint(void);
47 static void TestIdentifier(void);
48 static void TestUnicodeData(void);
49 static void TestCodeUnit(void);
50 static void TestCodePoint(void);
51 static void TestCharLength(void);
52 static void TestCharNames(void);
53 static void TestMirroring(void);
54 static void TestUScriptRunAPI(void);
55 static void TestAdditionalProperties(void);
56 static void TestNumericProperties(void);
57 static void TestPropertyNames(void);
58 static void TestPropertyValues(void);
59 static void TestConsistency(void);
60 static void TestUCase(void);
61 static void TestUBiDiProps(void);
62 static void TestCaseFolding(void);
63
64 /* internal methods used */
65 static int32_t MakeProp(char* str);
66 static int32_t MakeDir(char* str);
67
68 /* helpers ------------------------------------------------------------------ */
69
70 static void
71 parseUCDFile(const char *filename,
72 char *fields[][2], int32_t fieldCount,
73 UParseLineFn *lineFn, void *context,
74 UErrorCode *pErrorCode) {
75 char path[256];
76 char backupPath[256];
77
78 if(U_FAILURE(*pErrorCode)) {
79 return;
80 }
81
82 /* Look inside ICU_DATA first */
83 strcpy(path, u_getDataDirectory());
84 strcat(path, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING);
85 strcat(path, filename);
86
87 /* As a fallback, try to guess where the source data was located
88 * at the time ICU was built, and look there.
89 */
90 strcpy(backupPath, ctest_dataSrcDir());
91 strcat(backupPath, U_FILE_SEP_STRING);
92 strcat(backupPath, "unidata" U_FILE_SEP_STRING);
93 strcat(backupPath, filename);
94
95 u_parseDelimitedFile(path, ';', fields, fieldCount, lineFn, context, pErrorCode);
96 if(*pErrorCode==U_FILE_ACCESS_ERROR) {
97 *pErrorCode=U_ZERO_ERROR;
98 u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, context, pErrorCode);
99 }
100 if(U_FAILURE(*pErrorCode)) {
101 log_err_status(*pErrorCode, "error parsing %s: %s\n", filename, u_errorName(*pErrorCode));
102 }
103 }
104
105 /* test data ---------------------------------------------------------------- */
106
107 static const UChar LAST_CHAR_CODE_IN_FILE = 0xFFFD;
108 static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
109 static const int32_t tagValues[] =
110 {
111 /* Mn */ U_NON_SPACING_MARK,
112 /* Mc */ U_COMBINING_SPACING_MARK,
113 /* Me */ U_ENCLOSING_MARK,
114 /* Nd */ U_DECIMAL_DIGIT_NUMBER,
115 /* Nl */ U_LETTER_NUMBER,
116 /* No */ U_OTHER_NUMBER,
117 /* Zs */ U_SPACE_SEPARATOR,
118 /* Zl */ U_LINE_SEPARATOR,
119 /* Zp */ U_PARAGRAPH_SEPARATOR,
120 /* Cc */ U_CONTROL_CHAR,
121 /* Cf */ U_FORMAT_CHAR,
122 /* Cs */ U_SURROGATE,
123 /* Co */ U_PRIVATE_USE_CHAR,
124 /* Cn */ U_UNASSIGNED,
125 /* Lu */ U_UPPERCASE_LETTER,
126 /* Ll */ U_LOWERCASE_LETTER,
127 /* Lt */ U_TITLECASE_LETTER,
128 /* Lm */ U_MODIFIER_LETTER,
129 /* Lo */ U_OTHER_LETTER,
130 /* Pc */ U_CONNECTOR_PUNCTUATION,
131 /* Pd */ U_DASH_PUNCTUATION,
132 /* Ps */ U_START_PUNCTUATION,
133 /* Pe */ U_END_PUNCTUATION,
134 /* Po */ U_OTHER_PUNCTUATION,
135 /* Sm */ U_MATH_SYMBOL,
136 /* Sc */ U_CURRENCY_SYMBOL,
137 /* Sk */ U_MODIFIER_SYMBOL,
138 /* So */ U_OTHER_SYMBOL,
139 /* Pi */ U_INITIAL_PUNCTUATION,
140 /* Pf */ U_FINAL_PUNCTUATION
141 };
142
143 static const char dirStrings[][5] = {
144 "L",
145 "R",
146 "EN",
147 "ES",
148 "ET",
149 "AN",
150 "CS",
151 "B",
152 "S",
153 "WS",
154 "ON",
155 "LRE",
156 "LRO",
157 "AL",
158 "RLE",
159 "RLO",
160 "PDF",
161 "NSM",
162 "BN"
163 };
164
165 void addUnicodeTest(TestNode** root);
166
167 void addUnicodeTest(TestNode** root)
168 {
169 addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit");
170 addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint");
171 addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength");
172 addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues");
173 addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData");
174 addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalProperties");
175 addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties");
176 addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower");
177 addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber");
178 addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc");
179 addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX");
180 addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint");
181 addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier");
182 addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames");
183 addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring");
184 addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");
185 addTest(root, &TestHasScript, "tsutil/cucdtst/TestHasScript");
186 addTest(root, &TestGetScriptExtensions, "tsutil/cucdtst/TestGetScriptExtensions");
187 addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");
188 addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
189 addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
190 addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");
191 addTest(root, &TestUCase, "tsutil/cucdtst/TestUCase");
192 addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps");
193 addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding");
194 }
195
196 /*==================================================== */
197 /* test u_toupper() and u_tolower() */
198 /*==================================================== */
199 static void TestUpperLower()
200 {
201 const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0x01c9, 0x000c, 0x0000};
202 const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01c9, 0x000c, 0x0000};
203 U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21);
204 U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
205 int32_t i;
206
207 U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21);
208 U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
209
210 /*
211 Checks LetterLike Symbols which were previously a source of confusion
212 [Bertrand A. D. 02/04/98]
213 */
214 for (i=0x2100;i<0x2138;i++)
215 {
216 /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */
217 if(i!=0x2126 && i!=0x212a && i!=0x212b && i!=0x2132)
218 {
219 if (i != (int)u_tolower(i)) /* itself */
220 log_err("Failed case conversion with itself: U+%04x\n", i);
221 if (i != (int)u_toupper(i))
222 log_err("Failed case conversion with itself: U+%04x\n", i);
223 }
224 }
225
226 for(i=0; i < u_strlen(upper); i++){
227 if(u_tolower(upper[i]) != lower[i]){
228 log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i], lower[i], u_tolower(upper[i]));
229 }
230 }
231
232 log_verbose("testing upper lower\n");
233 for (i = 0; i < 21; i++) {
234
235 if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i]))
236 {
237 log_err("Failed isLowerCase test at %c\n", upperTest[i]);
238 }
239 else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i]))
240 {
241 log_err("Failed isUpperCase test at %c\n", lowerTest[i]);
242 }
243 else if (upperTest[i] != u_tolower(lowerTest[i]))
244 {
245 log_err("Failed case conversion from %c To %c :\n", lowerTest[i], upperTest[i]);
246 }
247 else if (lowerTest[i] != u_toupper(upperTest[i]))
248 {
249 log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerTest[i]);
250 }
251 else if (upperTest[i] != u_tolower(upperTest[i]))
252 {
253 log_err("Failed case conversion with itself: %c\n", upperTest[i]);
254 }
255 else if (lowerTest[i] != u_toupper(lowerTest[i]))
256 {
257 log_err("Failed case conversion with itself: %c\n", lowerTest[i]);
258 }
259 }
260 log_verbose("done testing upper lower\n");
261
262 log_verbose("testing u_istitle\n");
263 {
264 static const UChar expected[] = {
265 0x1F88,
266 0x1F89,
267 0x1F8A,
268 0x1F8B,
269 0x1F8C,
270 0x1F8D,
271 0x1F8E,
272 0x1F8F,
273 0x1F88,
274 0x1F89,
275 0x1F8A,
276 0x1F8B,
277 0x1F8C,
278 0x1F8D,
279 0x1F8E,
280 0x1F8F,
281 0x1F98,
282 0x1F99,
283 0x1F9A,
284 0x1F9B,
285 0x1F9C,
286 0x1F9D,
287 0x1F9E,
288 0x1F9F,
289 0x1F98,
290 0x1F99,
291 0x1F9A,
292 0x1F9B,
293 0x1F9C,
294 0x1F9D,
295 0x1F9E,
296 0x1F9F,
297 0x1FA8,
298 0x1FA9,
299 0x1FAA,
300 0x1FAB,
301 0x1FAC,
302 0x1FAD,
303 0x1FAE,
304 0x1FAF,
305 0x1FA8,
306 0x1FA9,
307 0x1FAA,
308 0x1FAB,
309 0x1FAC,
310 0x1FAD,
311 0x1FAE,
312 0x1FAF,
313 0x1FBC,
314 0x1FBC,
315 0x1FCC,
316 0x1FCC,
317 0x1FFC,
318 0x1FFC,
319 };
320 int32_t num = sizeof(expected)/sizeof(expected[0]);
321 for(i=0; i<num; i++){
322 if(!u_istitle(expected[i])){
323 log_err("u_istitle failed for 0x%4X. Expected TRUE, got FALSE\n",expected[i]);
324 }
325 }
326
327 }
328 }
329
330 /* compare two sets and verify that their difference or intersection is empty */
331 static UBool
332 showADiffB(const USet *a, const USet *b,
333 const char *a_name, const char *b_name,
334 UBool expect, UBool diffIsError) {
335 USet *aa;
336 int32_t i, start, end, length;
337 UErrorCode errorCode;
338
339 /*
340 * expect:
341 * TRUE -> a-b should be empty, that is, b should contain all of a
342 * FALSE -> a&b should be empty, that is, a should contain none of b (and vice versa)
343 */
344 if(expect ? uset_containsAll(b, a) : uset_containsNone(a, b)) {
345 return TRUE;
346 }
347
348 /* clone a to aa because a is const */
349 aa=uset_open(1, 0);
350 if(aa==NULL) {
351 /* unusual problem - out of memory? */
352 return FALSE;
353 }
354 uset_addAll(aa, a);
355
356 /* compute the set in question */
357 if(expect) {
358 /* a-b */
359 uset_removeAll(aa, b);
360 } else {
361 /* a&b */
362 uset_retainAll(aa, b);
363 }
364
365 /* aa is not empty because of the initial tests above; show its contents */
366 errorCode=U_ZERO_ERROR;
367 i=0;
368 for(;;) {
369 length=uset_getItem(aa, i, &start, &end, NULL, 0, &errorCode);
370 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
371 break; /* done */
372 }
373 if(U_FAILURE(errorCode)) {
374 log_err("error comparing %s with %s at difference item %d: %s\n",
375 a_name, b_name, i, u_errorName(errorCode));
376 break;
377 }
378 if(length!=0) {
379 break; /* done with code points, got a string or -1 */
380 }
381
382 if(diffIsError) {
383 if(expect) {
384 log_err("error: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
385 } else {
386 log_err("error: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
387 }
388 } else {
389 if(expect) {
390 log_verbose("info: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
391 } else {
392 log_verbose("info: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
393 }
394 }
395
396 ++i;
397 }
398
399 uset_close(aa);
400 return FALSE;
401 }
402
403 static UBool
404 showAMinusB(const USet *a, const USet *b,
405 const char *a_name, const char *b_name,
406 UBool diffIsError) {
407 return showADiffB(a, b, a_name, b_name, TRUE, diffIsError);
408 }
409
410 static UBool
411 showAIntersectB(const USet *a, const USet *b,
412 const char *a_name, const char *b_name,
413 UBool diffIsError) {
414 return showADiffB(a, b, a_name, b_name, FALSE, diffIsError);
415 }
416
417 static UBool
418 compareUSets(const USet *a, const USet *b,
419 const char *a_name, const char *b_name,
420 UBool diffIsError) {
421 /*
422 * Use an arithmetic & not a logical && so that both branches
423 * are always taken and all differences are shown.
424 */
425 return
426 showAMinusB(a, b, a_name, b_name, diffIsError) &
427 showAMinusB(b, a, b_name, a_name, diffIsError);
428 }
429
430 /* test isLetter(u_isapha()) and isDigit(u_isdigit()) */
431 static void TestLetterNumber()
432 {
433 UChar i = 0x0000;
434
435 log_verbose("Testing for isalpha\n");
436 for (i = 0x0041; i < 0x005B; i++) {
437 if (!u_isalpha(i))
438 {
439 log_err("Failed isLetter test at %.4X\n", i);
440 }
441 }
442 for (i = 0x0660; i < 0x066A; i++) {
443 if (u_isalpha(i))
444 {
445 log_err("Failed isLetter test with numbers at %.4X\n", i);
446 }
447 }
448
449 log_verbose("Testing for isdigit\n");
450 for (i = 0x0660; i < 0x066A; i++) {
451 if (!u_isdigit(i))
452 {
453 log_verbose("Failed isNumber test at %.4X\n", i);
454 }
455 }
456
457 log_verbose("Testing for isalnum\n");
458 for (i = 0x0041; i < 0x005B; i++) {
459 if (!u_isalnum(i))
460 {
461 log_err("Failed isAlNum test at %.4X\n", i);
462 }
463 }
464 for (i = 0x0660; i < 0x066A; i++) {
465 if (!u_isalnum(i))
466 {
467 log_err("Failed isAlNum test at %.4X\n", i);
468 }
469 }
470
471 {
472 /*
473 * The following checks work only starting from Unicode 4.0.
474 * Check the version number here.
475 */
476 static UVersionInfo u401={ 4, 0, 1, 0 };
477 UVersionInfo version;
478 u_getUnicodeVersion(version);
479 if(version[0]<4 || 0==memcmp(version, u401, 4)) {
480 return;
481 }
482 }
483
484 {
485 /*
486 * Sanity check:
487 * Verify that exactly the digit characters have decimal digit values.
488 * This assumption is used in the implementation of u_digit()
489 * (which checks nt=de)
490 * compared with the parallel java.lang.Character.digit()
491 * (which checks Nd).
492 *
493 * This was not true in Unicode 3.2 and earlier.
494 * Unicode 4.0 fixed discrepancies.
495 * Unicode 4.0.1 re-introduced problems in this area due to an
496 * unintentionally incomplete last-minute change.
497 */
498 U_STRING_DECL(digitsPattern, "[:Nd:]", 6);
499 U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
500
501 USet *digits, *decimalValues;
502 UErrorCode errorCode;
503
504 U_STRING_INIT(digitsPattern, "[:Nd:]", 6);
505 U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
506 errorCode=U_ZERO_ERROR;
507 digits=uset_openPattern(digitsPattern, 6, &errorCode);
508 decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode);
509
510 if(U_SUCCESS(errorCode)) {
511 compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", TRUE);
512 }
513
514 uset_close(digits);
515 uset_close(decimalValues);
516 }
517 }
518
519 static void testSampleCharProps(UBool propFn(UChar32), const char *propName,
520 const UChar32 *sampleChars, int32_t sampleCharsLength,
521 UBool expected) {
522 int32_t i;
523 for (i = 0; i < sampleCharsLength; ++i) {
524 UBool result = propFn(sampleChars[i]);
525 if (result != expected) {
526 log_err("error: character property function %s(U+%04x)=%d is wrong\n",
527 propName, sampleChars[i], result);
528 }
529 }
530 }
531
532 /* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_isspace()), isWhiteSpace(), u_CharDigitValue() */
533 static void TestMisc()
534 {
535 static const UChar32 sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
536 static const UChar32 sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
537 static const UChar32 sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6e};
538 static const UChar32 sampleDefined[] = {0x523E, 0x4f88, 0xfffd};
539 static const UChar32 sampleBase[] = {0x0061, 0x0031, 0x03d2};
540 static const UChar32 sampleNonBase[] = {0x002B, 0x0020, 0x203B};
541 /* static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8, 0xFFF0};*/
542 static const UChar32 sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5};
543 static const UChar32 sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};
544 static const UChar32 sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};
545 static const UChar32 sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f, 0x85, 0x2007, 0xffef};
546
547 static const int32_t sampleDigitValues[] = {0, 2, 3, 5};
548
549 uint32_t mask;
550
551 int32_t i;
552 char icuVersion[U_MAX_VERSION_STRING_LENGTH];
553 UVersionInfo realVersion;
554
555 memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH);
556
557 testSampleCharProps(u_isspace, "u_isspace", sampleSpaces, LENGTHOF(sampleSpaces), TRUE);
558 testSampleCharProps(u_isspace, "u_isspace", sampleNonSpaces, LENGTHOF(sampleNonSpaces), FALSE);
559
560 testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
561 sampleSpaces, LENGTHOF(sampleSpaces), TRUE);
562 testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
563 sampleNonSpaces, LENGTHOF(sampleNonSpaces), FALSE);
564
565 testSampleCharProps(u_isWhitespace, "u_isWhitespace",
566 sampleWhiteSpaces, LENGTHOF(sampleWhiteSpaces), TRUE);
567 testSampleCharProps(u_isWhitespace, "u_isWhitespace",
568 sampleNonWhiteSpaces, LENGTHOF(sampleNonWhiteSpaces), FALSE);
569
570 testSampleCharProps(u_isdefined, "u_isdefined",
571 sampleDefined, LENGTHOF(sampleDefined), TRUE);
572 testSampleCharProps(u_isdefined, "u_isdefined",
573 sampleUndefined, LENGTHOF(sampleUndefined), FALSE);
574
575 testSampleCharProps(u_isbase, "u_isbase", sampleBase, LENGTHOF(sampleBase), TRUE);
576 testSampleCharProps(u_isbase, "u_isbase", sampleNonBase, LENGTHOF(sampleNonBase), FALSE);
577
578 testSampleCharProps(u_isdigit, "u_isdigit", sampleDigits, LENGTHOF(sampleDigits), TRUE);
579 testSampleCharProps(u_isdigit, "u_isdigit", sampleNonDigits, LENGTHOF(sampleNonDigits), FALSE);
580
581 for (i = 0; i < LENGTHOF(sampleDigits); i++) {
582 if (u_charDigitValue(sampleDigits[i]) != sampleDigitValues[i]) {
583 log_err("error: u_charDigitValue(U+04x)=%d != %d\n",
584 sampleDigits[i], u_charDigitValue(sampleDigits[i]), sampleDigitValues[i]);
585 }
586 }
587
588 /* Tests the ICU version #*/
589 u_getVersion(realVersion);
590 u_versionToString(realVersion, icuVersion);
591 if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion), (int32_t)strlen(U_ICU_VERSION))) != 0)
592 {
593 log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERSION, icuVersion);
594 }
595 #if defined(ICU_VERSION)
596 /* test only happens where we have configure.in with VERSION - sanity check. */
597 if(strcmp(U_ICU_VERSION, ICU_VERSION))
598 {
599 log_err("ICU version mismatch: Header says %s, build environment says %s.\n", U_ICU_VERSION, ICU_VERSION);
600 }
601 #endif
602
603 /* test U_GC_... */
604 if(
605 U_GET_GC_MASK(0x41)!=U_GC_LU_MASK ||
606 U_GET_GC_MASK(0x662)!=U_GC_ND_MASK ||
607 U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK ||
608 U_GET_GC_MASK(0x28)!=U_GC_PS_MASK ||
609 U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK ||
610 U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK
611 ) {
612 log_err("error: U_GET_GC_MASK does not work properly\n");
613 }
614
615 mask=0;
616 mask=(mask&~U_GC_CN_MASK)|U_GC_CN_MASK;
617
618 mask=(mask&~U_GC_LU_MASK)|U_GC_LU_MASK;
619 mask=(mask&~U_GC_LL_MASK)|U_GC_LL_MASK;
620 mask=(mask&~U_GC_LT_MASK)|U_GC_LT_MASK;
621 mask=(mask&~U_GC_LM_MASK)|U_GC_LM_MASK;
622 mask=(mask&~U_GC_LO_MASK)|U_GC_LO_MASK;
623
624 mask=(mask&~U_GC_MN_MASK)|U_GC_MN_MASK;
625 mask=(mask&~U_GC_ME_MASK)|U_GC_ME_MASK;
626 mask=(mask&~U_GC_MC_MASK)|U_GC_MC_MASK;
627
628 mask=(mask&~U_GC_ND_MASK)|U_GC_ND_MASK;
629 mask=(mask&~U_GC_NL_MASK)|U_GC_NL_MASK;
630 mask=(mask&~U_GC_NO_MASK)|U_GC_NO_MASK;
631
632 mask=(mask&~U_GC_ZS_MASK)|U_GC_ZS_MASK;
633 mask=(mask&~U_GC_ZL_MASK)|U_GC_ZL_MASK;
634 mask=(mask&~U_GC_ZP_MASK)|U_GC_ZP_MASK;
635
636 mask=(mask&~U_GC_CC_MASK)|U_GC_CC_MASK;
637 mask=(mask&~U_GC_CF_MASK)|U_GC_CF_MASK;
638 mask=(mask&~U_GC_CO_MASK)|U_GC_CO_MASK;
639 mask=(mask&~U_GC_CS_MASK)|U_GC_CS_MASK;
640
641 mask=(mask&~U_GC_PD_MASK)|U_GC_PD_MASK;
642 mask=(mask&~U_GC_PS_MASK)|U_GC_PS_MASK;
643 mask=(mask&~U_GC_PE_MASK)|U_GC_PE_MASK;
644 mask=(mask&~U_GC_PC_MASK)|U_GC_PC_MASK;
645 mask=(mask&~U_GC_PO_MASK)|U_GC_PO_MASK;
646
647 mask=(mask&~U_GC_SM_MASK)|U_GC_SM_MASK;
648 mask=(mask&~U_GC_SC_MASK)|U_GC_SC_MASK;
649 mask=(mask&~U_GC_SK_MASK)|U_GC_SK_MASK;
650 mask=(mask&~U_GC_SO_MASK)|U_GC_SO_MASK;
651
652 mask=(mask&~U_GC_PI_MASK)|U_GC_PI_MASK;
653 mask=(mask&~U_GC_PF_MASK)|U_GC_PF_MASK;
654
655 if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
656 log_err("error: problems with U_GC_XX_MASK constants\n");
657 }
658
659 mask=0;
660 mask=(mask&~U_GC_C_MASK)|U_GC_C_MASK;
661 mask=(mask&~U_GC_L_MASK)|U_GC_L_MASK;
662 mask=(mask&~U_GC_M_MASK)|U_GC_M_MASK;
663 mask=(mask&~U_GC_N_MASK)|U_GC_N_MASK;
664 mask=(mask&~U_GC_Z_MASK)|U_GC_Z_MASK;
665 mask=(mask&~U_GC_P_MASK)|U_GC_P_MASK;
666 mask=(mask&~U_GC_S_MASK)|U_GC_S_MASK;
667
668 if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
669 log_err("error: problems with U_GC_Y_MASK constants\n");
670 }
671 {
672 static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039 };
673 for(i=0; i<10; i++){
674 if(digit[i]!=u_forDigit(i,10)){
675 log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n",i,digit[i],u_forDigit(i,10));
676 }
677 }
678 }
679
680 /* test u_digit() */
681 {
682 static const struct {
683 UChar32 c;
684 int8_t radix, value;
685 } data[]={
686 /* base 16 */
687 { 0x0031, 16, 1 },
688 { 0x0038, 16, 8 },
689 { 0x0043, 16, 12 },
690 { 0x0066, 16, 15 },
691 { 0x00e4, 16, -1 },
692 { 0x0662, 16, 2 },
693 { 0x06f5, 16, 5 },
694 { 0xff13, 16, 3 },
695 { 0xff41, 16, 10 },
696
697 /* base 8 */
698 { 0x0031, 8, 1 },
699 { 0x0038, 8, -1 },
700 { 0x0043, 8, -1 },
701 { 0x0066, 8, -1 },
702 { 0x00e4, 8, -1 },
703 { 0x0662, 8, 2 },
704 { 0x06f5, 8, 5 },
705 { 0xff13, 8, 3 },
706 { 0xff41, 8, -1 },
707
708 /* base 36 */
709 { 0x5a, 36, 35 },
710 { 0x7a, 36, 35 },
711 { 0xff3a, 36, 35 },
712 { 0xff5a, 36, 35 },
713
714 /* wrong radix values */
715 { 0x0031, 1, -1 },
716 { 0xff3a, 37, -1 }
717 };
718
719 for(i=0; i<LENGTHOF(data); ++i) {
720 if(u_digit(data[i].c, data[i].radix)!=data[i].value) {
721 log_err("u_digit(U+%04x, %d)=%d expected %d\n",
722 data[i].c,
723 data[i].radix,
724 u_digit(data[i].c, data[i].radix),
725 data[i].value);
726 }
727 }
728 }
729 }
730
731 /* test C/POSIX-style functions --------------------------------------------- */
732
733 /* bit flags */
734 #define ISAL 1
735 #define ISLO 2
736 #define ISUP 4
737
738 #define ISDI 8
739 #define ISXD 0x10
740
741 #define ISAN 0x20
742
743 #define ISPU 0x40
744 #define ISGR 0x80
745 #define ISPR 0x100
746
747 #define ISSP 0x200
748 #define ISBL 0x400
749 #define ISCN 0x800
750
751 /* C/POSIX-style functions, in the same order as the bit flags */
752 typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c);
753
754 static const struct {
755 IsPOSIXClass *fn;
756 const char *name;
757 } posixClasses[]={
758 { u_isalpha, "isalpha" },
759 { u_islower, "islower" },
760 { u_isupper, "isupper" },
761 { u_isdigit, "isdigit" },
762 { u_isxdigit, "isxdigit" },
763 { u_isalnum, "isalnum" },
764 { u_ispunct, "ispunct" },
765 { u_isgraph, "isgraph" },
766 { u_isprint, "isprint" },
767 { u_isspace, "isspace" },
768 { u_isblank, "isblank" },
769 { u_iscntrl, "iscntrl" }
770 };
771
772 static const struct {
773 UChar32 c;
774 uint32_t posixResults;
775 } posixData[]={
776 { 0x0008, ISCN }, /* backspace */
777 { 0x0009, ISSP|ISBL|ISCN }, /* TAB */
778 { 0x000a, ISSP| ISCN }, /* LF */
779 { 0x000c, ISSP| ISCN }, /* FF */
780 { 0x000d, ISSP| ISCN }, /* CR */
781 { 0x0020, ISPR|ISSP|ISBL }, /* space */
782 { 0x0021, ISPU|ISGR|ISPR }, /* ! */
783 { 0x0033, ISDI|ISXD|ISAN| ISGR|ISPR }, /* 3 */
784 { 0x0040, ISPU|ISGR|ISPR }, /* @ */
785 { 0x0041, ISAL| ISUP| ISXD|ISAN| ISGR|ISPR }, /* A */
786 { 0x007a, ISAL|ISLO| ISAN| ISGR|ISPR }, /* z */
787 { 0x007b, ISPU|ISGR|ISPR }, /* { */
788 { 0x0085, ISSP| ISCN }, /* NEL */
789 { 0x00a0, ISPR|ISSP|ISBL }, /* NBSP */
790 { 0x00a4, ISGR|ISPR }, /* currency sign */
791 { 0x00e4, ISAL|ISLO| ISAN| ISGR|ISPR }, /* a-umlaut */
792 { 0x0300, ISGR|ISPR }, /* combining grave */
793 { 0x0600, ISCN }, /* arabic number sign */
794 { 0x0627, ISAL| ISAN| ISGR|ISPR }, /* alef */
795 { 0x0663, ISDI|ISXD|ISAN| ISGR|ISPR }, /* arabic 3 */
796 { 0x2002, ISPR|ISSP|ISBL }, /* en space */
797 { 0x2007, ISPR|ISSP|ISBL }, /* figure space */
798 { 0x2009, ISPR|ISSP|ISBL }, /* thin space */
799 { 0x200b, ISCN }, /* ZWSP */
800 /*{ 0x200b, ISPR|ISSP },*/ /* ZWSP */ /* ZWSP became a control char in 4.0.1*/
801 { 0x200e, ISCN }, /* LRM */
802 { 0x2028, ISPR|ISSP| ISCN }, /* LS */
803 { 0x2029, ISPR|ISSP| ISCN }, /* PS */
804 { 0x20ac, ISGR|ISPR }, /* Euro */
805 { 0xff15, ISDI|ISXD|ISAN| ISGR|ISPR }, /* fullwidth 5 */
806 { 0xff25, ISAL| ISUP| ISXD|ISAN| ISGR|ISPR }, /* fullwidth E */
807 { 0xff35, ISAL| ISUP| ISAN| ISGR|ISPR }, /* fullwidth U */
808 { 0xff45, ISAL|ISLO| ISXD|ISAN| ISGR|ISPR }, /* fullwidth e */
809 { 0xff55, ISAL|ISLO| ISAN| ISGR|ISPR } /* fullwidth u */
810 };
811
812 static void
813 TestPOSIX() {
814 uint32_t mask;
815 int32_t cl, i;
816 UBool expect;
817
818 mask=1;
819 for(cl=0; cl<12; ++cl) {
820 for(i=0; i<LENGTHOF(posixData); ++i) {
821 expect=(UBool)((posixData[i].posixResults&mask)!=0);
822 if(posixClasses[cl].fn(posixData[i].c)!=expect) {
823 log_err("u_%s(U+%04x)=%s is wrong\n",
824 posixClasses[cl].name, posixData[i].c, expect ? "FALSE" : "TRUE");
825 }
826 }
827 mask<<=1;
828 }
829 }
830
831 /* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */
832 static void TestControlPrint()
833 {
834 const UChar32 sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0x202b};
835 const UChar32 sampleNonControl[] = {0x61, 0x0031, 0x00e2};
836 const UChar32 samplePrintable[] = {0x0042, 0x005f, 0x2014};
837 const UChar32 sampleNonPrintable[] = {0x200c, 0x009f, 0x001b};
838 UChar32 c;
839
840 testSampleCharProps(u_iscntrl, "u_iscntrl", sampleControl, LENGTHOF(sampleControl), TRUE);
841 testSampleCharProps(u_iscntrl, "u_iscntrl", sampleNonControl, LENGTHOF(sampleNonControl), FALSE);
842
843 testSampleCharProps(u_isprint, "u_isprint",
844 samplePrintable, LENGTHOF(samplePrintable), TRUE);
845 testSampleCharProps(u_isprint, "u_isprint",
846 sampleNonPrintable, LENGTHOF(sampleNonPrintable), FALSE);
847
848 /* test all ISO 8 controls */
849 for(c=0; c<=0x9f; ++c) {
850 if(c==0x20) {
851 /* skip ASCII graphic characters and continue with DEL */
852 c=0x7f;
853 }
854 if(!u_iscntrl(c)) {
855 log_err("error: u_iscntrl(ISO 8 control U+%04x)=FALSE\n", c);
856 }
857 if(!u_isISOControl(c)) {
858 log_err("error: u_isISOControl(ISO 8 control U+%04x)=FALSE\n", c);
859 }
860 if(u_isprint(c)) {
861 log_err("error: u_isprint(ISO 8 control U+%04x)=TRUE\n", c);
862 }
863 }
864
865 /* test all Latin-1 graphic characters */
866 for(c=0x20; c<=0xff; ++c) {
867 if(c==0x7f) {
868 c=0xa0;
869 } else if(c==0xad) {
870 /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not printable) */
871 ++c;
872 }
873 if(!u_isprint(c)) {
874 log_err("error: u_isprint(Latin-1 graphic character U+%04x)=FALSE\n", c);
875 }
876 }
877 }
878
879 /* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable()*/
880 static void TestIdentifier()
881 {
882 const UChar32 sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f};
883 const UChar32 sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};
884 const UChar32 sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};
885 const UChar32 sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};
886 const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};
887 const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};
888 const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};
889 const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};
890 const UChar32 sampleIDIgnore[] = {0x0006, 0x0010, 0x206b, 0x85};
891 const UChar32 sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};
892
893 testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
894 sampleJavaIDStart, LENGTHOF(sampleJavaIDStart), TRUE);
895 testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
896 sampleNonJavaIDStart, LENGTHOF(sampleNonJavaIDStart), FALSE);
897
898 testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
899 sampleJavaIDPart, LENGTHOF(sampleJavaIDPart), TRUE);
900 testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
901 sampleNonJavaIDPart, LENGTHOF(sampleNonJavaIDPart), FALSE);
902
903 /* IDPart should imply IDStart */
904 testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
905 sampleJavaIDStart, LENGTHOF(sampleJavaIDStart), TRUE);
906
907 testSampleCharProps(u_isIDStart, "u_isIDStart",
908 sampleUnicodeIDStart, LENGTHOF(sampleUnicodeIDStart), TRUE);
909 testSampleCharProps(u_isIDStart, "u_isIDStart",
910 sampleNonUnicodeIDStart, LENGTHOF(sampleNonUnicodeIDStart), FALSE);
911
912 testSampleCharProps(u_isIDPart, "u_isIDPart",
913 sampleUnicodeIDPart, LENGTHOF(sampleUnicodeIDPart), TRUE);
914 testSampleCharProps(u_isIDPart, "u_isIDPart",
915 sampleNonUnicodeIDPart, LENGTHOF(sampleNonUnicodeIDPart), FALSE);
916
917 /* IDPart should imply IDStart */
918 testSampleCharProps(u_isIDPart, "u_isIDPart",
919 sampleUnicodeIDStart, LENGTHOF(sampleUnicodeIDStart), TRUE);
920
921 testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
922 sampleIDIgnore, LENGTHOF(sampleIDIgnore), TRUE);
923 testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
924 sampleNonIDIgnore, LENGTHOF(sampleNonIDIgnore), FALSE);
925 }
926
927 /* for each line of UnicodeData.txt, check some of the properties */
928 typedef struct UnicodeDataContext {
929 #if UCONFIG_NO_NORMALIZATION
930 const void *dummy;
931 #else
932 const UNormalizer2 *nfc;
933 const UNormalizer2 *nfkc;
934 #endif
935 } UnicodeDataContext;
936
937 /*
938 * ### TODO
939 * This test fails incorrectly if the First or Last code point of a repetitive area
940 * is overridden, which is allowed and is encouraged for the PUAs.
941 * Currently, this means that both area First/Last and override lines are
942 * tested against the properties from the API,
943 * and the area boundary will not match and cause an error.
944 *
945 * This function should detect area boundaries and skip them for the test of individual
946 * code points' properties.
947 * Then it should check that the areas contain all the same properties except where overridden.
948 * For this, it would have had to set a flag for which code points were listed explicitly.
949 */
950 static void U_CALLCONV
951 unicodeDataLineFn(void *context,
952 char *fields[][2], int32_t fieldCount,
953 UErrorCode *pErrorCode)
954 {
955 char buffer[100];
956 const char *d;
957 char *end;
958 uint32_t value;
959 UChar32 c;
960 int32_t i;
961 int8_t type;
962 int32_t dt;
963 UChar dm[32], s[32];
964 int32_t dmLength, length;
965
966 #if !UCONFIG_NO_NORMALIZATION
967 const UNormalizer2 *nfc, *nfkc;
968 #endif
969
970 /* get the character code, field 0 */
971 c=strtoul(fields[0][0], &end, 16);
972 if(end<=fields[0][0] || end!=fields[0][1]) {
973 log_err("error: syntax error in field 0 at %s\n", fields[0][0]);
974 return;
975 }
976 if((uint32_t)c>=UCHAR_MAX_VALUE + 1) {
977 log_err("error in UnicodeData.txt: code point %lu out of range\n", c);
978 return;
979 }
980
981 /* get general category, field 2 */
982 *fields[2][1]=0;
983 type = (int8_t)tagValues[MakeProp(fields[2][0])];
984 if(u_charType(c)!=type) {
985 log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType(c), type);
986 }
987 if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
988 log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
989 }
990
991 /* get canonical combining class, field 3 */
992 value=strtoul(fields[3][0], &end, 10);
993 if(end<=fields[3][0] || end!=fields[3][1]) {
994 log_err("error: syntax error in field 3 at code 0x%lx\n", c);
995 return;
996 }
997 if(value>255) {
998 log_err("error in UnicodeData.txt: combining class %lu out of range\n", value);
999 return;
1000 }
1001 #if !UCONFIG_NO_NORMALIZATION
1002 if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c, UCHAR_CANONICAL_COMBINING_CLASS)) {
1003 log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value);
1004 }
1005 nfkc=((UnicodeDataContext *)context)->nfkc;
1006 if(value!=unorm2_getCombiningClass(nfkc, c)) {
1007 log_err("error: unorm2_getCombiningClass(nfkc, U+%04lx)==%hu instead of %lu\n", c, unorm2_getCombiningClass(nfkc, c), value);
1008 }
1009 #endif
1010
1011 /* get BiDi category, field 4 */
1012 *fields[4][1]=0;
1013 i=MakeDir(fields[4][0]);
1014 if(i!=u_charDirection(c) || i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) {
1015 log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u_charDirection(c), MakeDir(fields[4][0]), fields[4][0]);
1016 }
1017
1018 /* get Decomposition_Type & Decomposition_Mapping, field 5 */
1019 d=NULL;
1020 if(fields[5][0]==fields[5][1]) {
1021 /* no decomposition, except UnicodeData.txt omits Hangul syllable decompositions */
1022 if(c==0xac00 || c==0xd7a3) {
1023 dt=U_DT_CANONICAL;
1024 } else {
1025 dt=U_DT_NONE;
1026 }
1027 } else {
1028 d=fields[5][0];
1029 *fields[5][1]=0;
1030 dt=UCHAR_INVALID_CODE;
1031 if(*d=='<') {
1032 end=strchr(++d, '>');
1033 if(end!=NULL) {
1034 *end=0;
1035 dt=u_getPropertyValueEnum(UCHAR_DECOMPOSITION_TYPE, d);
1036 d=u_skipWhitespace(end+1);
1037 }
1038 } else {
1039 dt=U_DT_CANONICAL;
1040 }
1041 }
1042 if(dt>U_DT_NONE) {
1043 if(c==0xac00) {
1044 dm[0]=0x1100;
1045 dm[1]=0x1161;
1046 dm[2]=0;
1047 dmLength=2;
1048 } else if(c==0xd7a3) {
1049 dm[0]=0xd788;
1050 dm[1]=0x11c2;
1051 dm[2]=0;
1052 dmLength=2;
1053 } else {
1054 dmLength=u_parseString(d, dm, 32, NULL, pErrorCode);
1055 }
1056 } else {
1057 dmLength=-1;
1058 }
1059 if(dt<0 || U_FAILURE(*pErrorCode)) {
1060 log_err("error in UnicodeData.txt: syntax error in U+%04lX decomposition field\n", (long)c);
1061 return;
1062 }
1063 #if !UCONFIG_NO_NORMALIZATION
1064 i=u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE);
1065 if(i!=dt) {
1066 log_err("error: u_getIntPropertyValue(U+%04lx, UCHAR_DECOMPOSITION_TYPE)==%d instead of %d\n", c, i, dt);
1067 }
1068 /* Expect Decomposition_Mapping=nfkc.getRawDecomposition(c). */
1069 length=unorm2_getRawDecomposition(nfkc, c, s, 32, pErrorCode);
1070 if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
1071 log_err("error: unorm2_getRawDecomposition(nfkc, U+%04lx)==%d instead of %d "
1072 "or the Decomposition_Mapping is different (%s)\n",
1073 c, length, dmLength, u_errorName(*pErrorCode));
1074 return;
1075 }
1076 /* For canonical decompositions only, expect Decomposition_Mapping=nfc.getRawDecomposition(c). */
1077 if(dt!=U_DT_CANONICAL) {
1078 dmLength=-1;
1079 }
1080 nfc=((UnicodeDataContext *)context)->nfc;
1081 length=unorm2_getRawDecomposition(nfc, c, s, 32, pErrorCode);
1082 if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
1083 log_err("error: unorm2_getRawDecomposition(nfc, U+%04lx)==%d instead of %d "
1084 "or the Decomposition_Mapping is different (%s)\n",
1085 c, length, dmLength, u_errorName(*pErrorCode));
1086 return;
1087 }
1088 /* recompose */
1089 if(dt==U_DT_CANONICAL && !u_hasBinaryProperty(c, UCHAR_FULL_COMPOSITION_EXCLUSION)) {
1090 UChar32 a, b, composite;
1091 i=0;
1092 U16_NEXT(dm, i, dmLength, a);
1093 U16_NEXT(dm, i, dmLength, b);
1094 /* i==dmLength */
1095 composite=unorm2_composePair(nfc, a, b);
1096 if(composite!=c) {
1097 log_err("error: nfc U+%04lX decomposes to U+%04lX+U+%04lX but does not compose back (instead U+%04lX)\n",
1098 (long)c, (long)a, (long)b, (long)composite);
1099 }
1100 /*
1101 * Note: NFKC has fewer round-trip mappings than NFC,
1102 * so we can't just test unorm2_composePair(nfkc, a, b) here without further data.
1103 */
1104 }
1105 #endif
1106
1107 /* get ISO Comment, field 11 */
1108 *fields[11][1]=0;
1109 i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode);
1110 if(U_FAILURE(*pErrorCode) || 0!=strcmp(fields[11][0], buffer)) {
1111 log_err_status(*pErrorCode, "error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n",
1112 c, u_errorName(*pErrorCode),
1113 U_FAILURE(*pErrorCode) ? buffer : "[error]",
1114 fields[11][0]);
1115 }
1116
1117 /* get uppercase mapping, field 12 */
1118 if(fields[12][0]!=fields[12][1]) {
1119 value=strtoul(fields[12][0], &end, 16);
1120 if(end!=fields[12][1]) {
1121 log_err("error: syntax error in field 12 at code 0x%lx\n", c);
1122 return;
1123 }
1124 if((UChar32)value!=u_toupper(c)) {
1125 log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_toupper(c), value);
1126 }
1127 } else {
1128 /* no case mapping: the API must map the code point to itself */
1129 if(c!=u_toupper(c)) {
1130 log_err("error: U+%04lx does not have an uppercase mapping but u_toupper()==U+%04lx\n", c, u_toupper(c));
1131 }
1132 }
1133
1134 /* get lowercase mapping, field 13 */
1135 if(fields[13][0]!=fields[13][1]) {
1136 value=strtoul(fields[13][0], &end, 16);
1137 if(end!=fields[13][1]) {
1138 log_err("error: syntax error in field 13 at code 0x%lx\n", c);
1139 return;
1140 }
1141 if((UChar32)value!=u_tolower(c)) {
1142 log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_tolower(c), value);
1143 }
1144 } else {
1145 /* no case mapping: the API must map the code point to itself */
1146 if(c!=u_tolower(c)) {
1147 log_err("error: U+%04lx does not have a lowercase mapping but u_tolower()==U+%04lx\n", c, u_tolower(c));
1148 }
1149 }
1150
1151 /* get titlecase mapping, field 14 */
1152 if(fields[14][0]!=fields[14][1]) {
1153 value=strtoul(fields[14][0], &end, 16);
1154 if(end!=fields[14][1]) {
1155 log_err("error: syntax error in field 14 at code 0x%lx\n", c);
1156 return;
1157 }
1158 if((UChar32)value!=u_totitle(c)) {
1159 log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_totitle(c), value);
1160 }
1161 } else {
1162 /* no case mapping: the API must map the code point to itself */
1163 if(c!=u_totitle(c)) {
1164 log_err("error: U+%04lx does not have a titlecase mapping but u_totitle()==U+%04lx\n", c, u_totitle(c));
1165 }
1166 }
1167 }
1168
1169 static UBool U_CALLCONV
1170 enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1171 static const UChar32 test[][2]={
1172 {0x41, U_UPPERCASE_LETTER},
1173 {0x308, U_NON_SPACING_MARK},
1174 {0xfffe, U_GENERAL_OTHER_TYPES},
1175 {0xe0041, U_FORMAT_CHAR},
1176 {0xeffff, U_UNASSIGNED}
1177 };
1178
1179 int32_t i, count;
1180
1181 if(0!=strcmp((const char *)context, "a1")) {
1182 log_err("error: u_enumCharTypes() passes on an incorrect context pointer\n");
1183 return FALSE;
1184 }
1185
1186 count=LENGTHOF(test);
1187 for(i=0; i<count; ++i) {
1188 if(start<=test[i][0] && test[i][0]<limit) {
1189 if(type!=(UCharCategory)test[i][1]) {
1190 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld instead of U+%04lx with %ld\n",
1191 start, limit, (long)type, test[i][0], test[i][1]);
1192 }
1193 /* stop at the range that includes the last test code point (increases code coverage for enumeration) */
1194 return i==(count-1) ? FALSE : TRUE;
1195 }
1196 }
1197
1198 if(start>test[count-1][0]) {
1199 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n",
1200 start, limit, (long)type);
1201 return FALSE;
1202 }
1203
1204 return TRUE;
1205 }
1206
1207 static UBool U_CALLCONV
1208 enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1209 /* default Bidi classes for unassigned code points, from the DerivedBidiClass.txt header */
1210 static const int32_t defaultBidi[][2]={ /* { limit, class } */
1211 { 0x0590, U_LEFT_TO_RIGHT },
1212 { 0x0600, U_RIGHT_TO_LEFT },
1213 { 0x07C0, U_RIGHT_TO_LEFT_ARABIC },
1214 { 0x08A0, U_RIGHT_TO_LEFT },
1215 { 0x0900, U_RIGHT_TO_LEFT_ARABIC }, /* Unicode 6.1 changes U+08A0..U+08FF from R to AL */
1216 { 0xFB1D, U_LEFT_TO_RIGHT },
1217 { 0xFB50, U_RIGHT_TO_LEFT },
1218 { 0xFE00, U_RIGHT_TO_LEFT_ARABIC },
1219 { 0xFE70, U_LEFT_TO_RIGHT },
1220 { 0xFF00, U_RIGHT_TO_LEFT_ARABIC },
1221 { 0x10800, U_LEFT_TO_RIGHT },
1222 { 0x11000, U_RIGHT_TO_LEFT },
1223 { 0x1E800, U_LEFT_TO_RIGHT }, /* new default-R range in Unicode 5.2: U+1E800 - U+1EFFF */
1224 { 0x1EE00, U_RIGHT_TO_LEFT },
1225 { 0x1EF00, U_RIGHT_TO_LEFT_ARABIC }, /* Unicode 6.1 changes U+1EE00..U+1EEFF from R to AL */
1226 { 0x1F000, U_RIGHT_TO_LEFT },
1227 { 0x110000, U_LEFT_TO_RIGHT }
1228 };
1229
1230 UChar32 c;
1231 int32_t i;
1232 UCharDirection shouldBeDir;
1233
1234 /*
1235 * LineBreak.txt specifies:
1236 * # - Assigned characters that are not listed explicitly are given the value
1237 * # "AL".
1238 * # - Unassigned characters are given the value "XX".
1239 *
1240 * PUA characters are listed explicitly with "XX".
1241 * Verify that no assigned character has "XX".
1242 */
1243 if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) {
1244 c=start;
1245 while(c<limit) {
1246 if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) {
1247 log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c);
1248 }
1249 ++c;
1250 }
1251 }
1252
1253 /*
1254 * Verify default Bidi classes.
1255 * For recent Unicode versions, see UCD.html.
1256 *
1257 * For older Unicode versions:
1258 * See table 3-7 "Bidirectional Character Types" in UAX #9.
1259 * http://www.unicode.org/reports/tr9/
1260 *
1261 * See also DerivedBidiClass.txt for Cn code points!
1262 *
1263 * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/resolved-pri.html)
1264 * changed some default values.
1265 * In particular, non-characters and unassigned Default Ignorable Code Points
1266 * change from L to BN.
1267 *
1268 * UCD.html version 4.0.1 does not yet reflect these changes.
1269 */
1270 if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) {
1271 /* enumerate the intersections of defaultBidi ranges with [start..limit[ */
1272 c=start;
1273 for(i=0; i<LENGTHOF(defaultBidi) && c<limit; ++i) {
1274 if((int32_t)c<defaultBidi[i][0]) {
1275 while(c<limit && (int32_t)c<defaultBidi[i][0]) {
1276 if(U_IS_UNICODE_NONCHAR(c) || u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
1277 shouldBeDir=U_BOUNDARY_NEUTRAL;
1278 } else {
1279 shouldBeDir=(UCharDirection)defaultBidi[i][1];
1280 }
1281
1282 if( u_charDirection(c)!=shouldBeDir ||
1283 u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir
1284 ) {
1285 log_err("error: u_charDirection(unassigned/PUA U+%04lx)=%s should be %s\n",
1286 c, dirStrings[u_charDirection(c)], dirStrings[shouldBeDir]);
1287 }
1288 ++c;
1289 }
1290 }
1291 }
1292 }
1293
1294 return TRUE;
1295 }
1296
1297 /* tests for several properties */
1298 static void TestUnicodeData()
1299 {
1300 UVersionInfo expectVersionArray;
1301 UVersionInfo versionArray;
1302 char *fields[15][2];
1303 UErrorCode errorCode;
1304 UChar32 c;
1305 int8_t type;
1306
1307 UnicodeDataContext context;
1308
1309 u_versionFromString(expectVersionArray, U_UNICODE_VERSION);
1310 u_getUnicodeVersion(versionArray);
1311 if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0)
1312 {
1313 log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION " got %d.%d.%d.%d\n",
1314 versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
1315 }
1316
1317 #if defined(ICU_UNICODE_VERSION)
1318 /* test only happens where we have configure.in with UNICODE_VERSION - sanity check. */
1319 if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION))
1320 {
1321 log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNICODE_VERSION " got " ICU_UNICODE_VERSION "\n");
1322 }
1323 #endif
1324
1325 if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN || u_getIntPropertyValue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) {
1326 log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041));
1327 }
1328
1329 errorCode=U_ZERO_ERROR;
1330 #if !UCONFIG_NO_NORMALIZATION
1331 context.nfc=unorm2_getNFCInstance(&errorCode);
1332 context.nfkc=unorm2_getNFKCInstance(&errorCode);
1333 if(U_FAILURE(errorCode)) {
1334 log_data_err("error: unable to open an NFC or NFKC UNormalizer2 - %s\n", u_errorName(errorCode));
1335 return;
1336 }
1337 #endif
1338 parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, &context, &errorCode);
1339 if(U_FAILURE(errorCode)) {
1340 return; /* if we couldn't parse UnicodeData.txt, we should return */
1341 }
1342
1343 /* sanity check on repeated properties */
1344 for(c=0xfffe; c<=0x10ffff;) {
1345 type=u_charType(c);
1346 if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1347 log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1348 }
1349 if(type!=U_UNASSIGNED) {
1350 log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c, u_charType(c));
1351 }
1352 if((c&0xffff)==0xfffe) {
1353 ++c;
1354 } else {
1355 c+=0xffff;
1356 }
1357 }
1358
1359 /* test that PUA is not "unassigned" */
1360 for(c=0xe000; c<=0x10fffd;) {
1361 type=u_charType(c);
1362 if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1363 log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1364 }
1365 if(type==U_UNASSIGNED) {
1366 log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c);
1367 } else if(type!=U_PRIVATE_USE_CHAR) {
1368 log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type);
1369 }
1370 if(c==0xf8ff) {
1371 c=0xf0000;
1372 } else if(c==0xffffd) {
1373 c=0x100000;
1374 } else {
1375 ++c;
1376 }
1377 }
1378
1379 /* test u_enumCharTypes() */
1380 u_enumCharTypes(enumTypeRange, "a1");
1381
1382 /* check default properties */
1383 u_enumCharTypes(enumDefaultsRange, NULL);
1384 }
1385
1386 static void TestCodeUnit(){
1387 const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0};
1388
1389 int32_t i;
1390
1391 for(i=0; i<(int32_t)(sizeof(codeunit)/sizeof(codeunit[0])); i++){
1392 UChar c=codeunit[i];
1393 if(i<4){
1394 if(!(UTF_IS_SINGLE(c)) || (UTF_IS_LEAD(c)) || (UTF_IS_TRAIL(c)) ||(UTF_IS_SURROGATE(c))){
1395 log_err("ERROR: U+%04x is a single", c);
1396 }
1397
1398 }
1399 if(i >= 4 && i< 8){
1400 if(!(UTF_IS_LEAD(c)) || UTF_IS_SINGLE(c) || UTF_IS_TRAIL(c) || !(UTF_IS_SURROGATE(c))){
1401 log_err("ERROR: U+%04x is a first surrogate", c);
1402 }
1403 }
1404 if(i >= 8 && i< 12){
1405 if(!(UTF_IS_TRAIL(c)) || UTF_IS_SINGLE(c) || UTF_IS_LEAD(c) || !(UTF_IS_SURROGATE(c))){
1406 log_err("ERROR: U+%04x is a second surrogate", c);
1407 }
1408 }
1409 }
1410
1411 }
1412
1413 static void TestCodePoint(){
1414 const UChar32 codePoint[]={
1415 /*surrogate, notvalid(codepoint), not a UnicodeChar, not Error */
1416 0xd800,
1417 0xdbff,
1418 0xdc00,
1419 0xdfff,
1420 0xdc04,
1421 0xd821,
1422 /*not a surrogate, valid, isUnicodeChar , not Error*/
1423 0x20ac,
1424 0xd7ff,
1425 0xe000,
1426 0xe123,
1427 0x0061,
1428 0xe065,
1429 0x20402,
1430 0x24506,
1431 0x23456,
1432 0x20402,
1433 0x10402,
1434 0x23456,
1435 /*not a surrogate, not valid, isUnicodeChar, isError */
1436 0x0015,
1437 0x009f,
1438 /*not a surrogate, not valid, not isUnicodeChar, isError */
1439 0xffff,
1440 0xfffe,
1441 };
1442 int32_t i;
1443 for(i=0; i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0])); i++){
1444 UChar32 c=codePoint[i];
1445 if(i<6){
1446 if(!UTF_IS_SURROGATE(c) || !U_IS_SURROGATE(c) || !U16_IS_SURROGATE(c)){
1447 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1448 }
1449 if(UTF_IS_VALID(c)){
1450 log_err("ERROR: isValid() failed for U+%04x\n", c);
1451 }
1452 if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
1453 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1454 }
1455 if(UTF_IS_ERROR(c)){
1456 log_err("ERROR: isError() failed for U+%04x\n", c);
1457 }
1458 }else if(i >=6 && i<18){
1459 if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1460 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1461 }
1462 if(!UTF_IS_VALID(c)){
1463 log_err("ERROR: isValid() failed for U+%04x\n", c);
1464 }
1465 if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
1466 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1467 }
1468 if(UTF_IS_ERROR(c)){
1469 log_err("ERROR: isError() failed for U+%04x\n", c);
1470 }
1471 }else if(i >=18 && i<20){
1472 if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1473 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1474 }
1475 if(UTF_IS_VALID(c)){
1476 log_err("ERROR: isValid() failed for U+%04x\n", c);
1477 }
1478 if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
1479 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1480 }
1481 if(!UTF_IS_ERROR(c)){
1482 log_err("ERROR: isError() failed for U+%04x\n", c);
1483 }
1484 }
1485 else if(i >=18 && i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0]))){
1486 if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1487 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1488 }
1489 if(UTF_IS_VALID(c)){
1490 log_err("ERROR: isValid() failed for U+%04x\n", c);
1491 }
1492 if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
1493 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1494 }
1495 if(!UTF_IS_ERROR(c)){
1496 log_err("ERROR: isError() failed for U+%04x\n", c);
1497 }
1498 }
1499 }
1500
1501 if(
1502 !U_IS_BMP(0) || !U_IS_BMP(0x61) || !U_IS_BMP(0x20ac) ||
1503 !U_IS_BMP(0xd9da) || !U_IS_BMP(0xdfed) || !U_IS_BMP(0xffff) ||
1504 U_IS_BMP(U_SENTINEL) || U_IS_BMP(0x10000) || U_IS_BMP(0x50005) ||
1505 U_IS_BMP(0x10ffff) || U_IS_BMP(0x110000) || U_IS_BMP(0x7fffffff)
1506 ) {
1507 log_err("error with U_IS_BMP()\n");
1508 }
1509
1510 if(
1511 U_IS_SUPPLEMENTARY(0) || U_IS_SUPPLEMENTARY(0x61) || U_IS_SUPPLEMENTARY(0x20ac) ||
1512 U_IS_SUPPLEMENTARY(0xd9da) || U_IS_SUPPLEMENTARY(0xdfed) || U_IS_SUPPLEMENTARY(0xffff) ||
1513 U_IS_SUPPLEMENTARY(U_SENTINEL) || !U_IS_SUPPLEMENTARY(0x10000) || !U_IS_SUPPLEMENTARY(0x50005) ||
1514 !U_IS_SUPPLEMENTARY(0x10ffff) || U_IS_SUPPLEMENTARY(0x110000) || U_IS_SUPPLEMENTARY(0x7fffffff)
1515 ) {
1516 log_err("error with U_IS_SUPPLEMENTARY()\n");
1517 }
1518 }
1519
1520 static void TestCharLength()
1521 {
1522 const int32_t codepoint[]={
1523 1, 0x0061,
1524 1, 0xe065,
1525 1, 0x20ac,
1526 2, 0x20402,
1527 2, 0x23456,
1528 2, 0x24506,
1529 2, 0x20402,
1530 2, 0x10402,
1531 1, 0xd7ff,
1532 1, 0xe000
1533 };
1534
1535 int32_t i;
1536 UBool multiple;
1537 for(i=0; i<(int32_t)(sizeof(codepoint)/sizeof(codepoint[0])); i=(int16_t)(i+2)){
1538 UChar32 c=codepoint[i+1];
1539 if(UTF_CHAR_LENGTH(c) != codepoint[i] || U16_LENGTH(c) != codepoint[i]){
1540 log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], U16_LENGTH(c));
1541 }
1542 multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
1543 if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){
1544 log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c);
1545 }
1546 }
1547 }
1548
1549 /*internal functions ----*/
1550 static int32_t MakeProp(char* str)
1551 {
1552 int32_t result = 0;
1553 char* matchPosition =0;
1554
1555 matchPosition = strstr(tagStrings, str);
1556 if (matchPosition == 0)
1557 {
1558 log_err("unrecognized type letter ");
1559 log_err(str);
1560 }
1561 else
1562 result = (int32_t)((matchPosition - tagStrings) / 2);
1563 return result;
1564 }
1565
1566 static int32_t MakeDir(char* str)
1567 {
1568 int32_t pos = 0;
1569 for (pos = 0; pos < 19; pos++) {
1570 if (strcmp(str, dirStrings[pos]) == 0) {
1571 return pos;
1572 }
1573 }
1574 return -1;
1575 }
1576
1577 /* test u_charName() -------------------------------------------------------- */
1578
1579 static const struct {
1580 uint32_t code;
1581 const char *name, *oldName, *extName, *alias;
1582 } names[]={
1583 {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"},
1584 {0x01a2, "LATIN CAPITAL LETTER OI", "",
1585 "LATIN CAPITAL LETTER OI",
1586 "LATIN CAPITAL LETTER GHA"},
1587 {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "",
1588 "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" },
1589 {0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "",
1590 "TIBETAN MARK BSKA- SHOG GI MGO RGYAN",
1591 "TIBETAN MARK BKA- SHOG GI MGO RGYAN"},
1592 {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" },
1593 {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" },
1594 {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" },
1595 {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" },
1596 {0xd800, "", "", "<lead surrogate-D800>" },
1597 {0xdc00, "", "", "<trail surrogate-DC00>" },
1598 {0xff08, "FULLWIDTH LEFT PARENTHESIS", "", "FULLWIDTH LEFT PARENTHESIS" },
1599 {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" },
1600 {0xffff, "", "", "<noncharacter-FFFF>" },
1601 {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "",
1602 "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS",
1603 "BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS"},
1604 {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" }
1605 };
1606
1607 static UBool
1608 enumCharNamesFn(void *context,
1609 UChar32 code, UCharNameChoice nameChoice,
1610 const char *name, int32_t length) {
1611 int32_t *pCount=(int32_t *)context;
1612 const char *expected;
1613 int i;
1614
1615 if(length<=0 || length!=(int32_t)strlen(name)) {
1616 /* should not be called with an empty string or invalid length */
1617 log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length);
1618 return TRUE;
1619 }
1620
1621 ++*pCount;
1622 for(i=0; i<sizeof(names)/sizeof(names[0]); ++i) {
1623 if(code==(UChar32)names[i].code) {
1624 switch (nameChoice) {
1625 case U_EXTENDED_CHAR_NAME:
1626 if(0!=strcmp(name, names[i].extName)) {
1627 log_err("u_enumCharName(0x%lx - Extended)=%s instead of %s\n", code, name, names[i].extName);
1628 }
1629 break;
1630 case U_UNICODE_CHAR_NAME:
1631 if(0!=strcmp(name, names[i].name)) {
1632 log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code, name, names[i].name);
1633 }
1634 break;
1635 case U_UNICODE_10_CHAR_NAME:
1636 expected=names[i].oldName;
1637 if(expected[0]==0 || 0!=strcmp(name, expected)) {
1638 log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n", code, name, expected);
1639 }
1640 break;
1641 case U_CHAR_NAME_ALIAS:
1642 expected=names[i].alias;
1643 if(expected==NULL || expected[0]==0 || 0!=strcmp(name, expected)) {
1644 log_err("u_enumCharName(0x%lx - alias)=%s instead of %s\n", code, name, expected);
1645 }
1646 break;
1647 case U_CHAR_NAME_CHOICE_COUNT:
1648 break;
1649 }
1650 break;
1651 }
1652 }
1653 return TRUE;
1654 }
1655
1656 struct enumExtCharNamesContext {
1657 uint32_t length;
1658 int32_t last;
1659 };
1660
1661 static UBool
1662 enumExtCharNamesFn(void *context,
1663 UChar32 code, UCharNameChoice nameChoice,
1664 const char *name, int32_t length) {
1665 struct enumExtCharNamesContext *ecncp = (struct enumExtCharNamesContext *) context;
1666
1667 if (ecncp->last != (int32_t) code - 1) {
1668 if (ecncp->last < 0) {
1669 log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1);
1670 } else {
1671 log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 - Ext)\n", code);
1672 }
1673 }
1674 ecncp->last = (int32_t) code;
1675
1676 if (!*name) {
1677 log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", code);
1678 }
1679
1680 return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length);
1681 }
1682
1683 /**
1684 * This can be made more efficient by moving it into putil.c and having
1685 * it directly access the ebcdic translation tables.
1686 * TODO: If we get this method in putil.c, then delete it from here.
1687 */
1688 static UChar
1689 u_charToUChar(char c) {
1690 UChar uc;
1691 u_charsToUChars(&c, &uc, 1);
1692 return uc;
1693 }
1694
1695 static void
1696 TestCharNames() {
1697 static char name[80];
1698 UErrorCode errorCode=U_ZERO_ERROR;
1699 struct enumExtCharNamesContext extContext;
1700 const char *expected;
1701 int32_t length;
1702 UChar32 c;
1703 int32_t i;
1704
1705 log_verbose("Testing uprv_getMaxCharNameLength()\n");
1706 length=uprv_getMaxCharNameLength();
1707 if(length==0) {
1708 /* no names data available */
1709 return;
1710 }
1711 if(length<83) { /* Unicode 3.2 max char name length */
1712 log_err("uprv_getMaxCharNameLength()=%d is too short");
1713 }
1714 /* ### TODO same tests for max ISO comment length as for max name length */
1715
1716 log_verbose("Testing u_charName()\n");
1717 for(i=0; i<(int32_t)(sizeof(names)/sizeof(names[0])); ++i) {
1718 /* modern Unicode character name */
1719 length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name), &errorCode);
1720 if(U_FAILURE(errorCode)) {
1721 log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(errorCode));
1722 return;
1723 }
1724 if(length<0 || 0!=strcmp(name, names[i].name) || length!=(uint16_t)strlen(name)) {
1725 log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n", names[i].code, name, length, names[i].name);
1726 }
1727
1728 /* find the modern name */
1729 if (*names[i].name) {
1730 c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode);
1731 if(U_FAILURE(errorCode)) {
1732 log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorName(errorCode));
1733 return;
1734 }
1735 if(c!=(UChar32)names[i].code) {
1736 log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", names[i].name, c, names[i].code);
1737 }
1738 }
1739
1740 /* Unicode 1.0 character name */
1741 length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(name), &errorCode);
1742 if(U_FAILURE(errorCode)) {
1743 log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_errorName(errorCode));
1744 return;
1745 }
1746 if(length<0 || (length>0 && 0!=strcmp(name, names[i].oldName)) || length!=(uint16_t)strlen(name)) {
1747 log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothing or %s\n", names[i].code, name, length, names[i].oldName);
1748 }
1749
1750 /* find the Unicode 1.0 name if it is stored (length>0 means that we could read it) */
1751 if(names[i].oldName[0]!=0 /* && length>0 */) {
1752 c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCode);
1753 if(U_FAILURE(errorCode)) {
1754 log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName, u_errorName(errorCode));
1755 return;
1756 }
1757 if(c!=(UChar32)names[i].code) {
1758 log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n", names[i].oldName, c, names[i].code);
1759 }
1760 }
1761
1762 /* Unicode character name alias */
1763 length=u_charName(names[i].code, U_CHAR_NAME_ALIAS, name, sizeof(name), &errorCode);
1764 if(U_FAILURE(errorCode)) {
1765 log_err("u_charName(0x%lx - alias) error %s\n", names[i].code, u_errorName(errorCode));
1766 return;
1767 }
1768 expected=names[i].alias;
1769 if(expected==NULL) {
1770 expected="";
1771 }
1772 if(length<0 || (length>0 && 0!=strcmp(name, expected)) || length!=(uint16_t)strlen(name)) {
1773 log_err("u_charName(0x%lx - alias) gets %s length %ld instead of nothing or %s\n",
1774 names[i].code, name, length, expected);
1775 }
1776
1777 /* find the Unicode character name alias if it is stored (length>0 means that we could read it) */
1778 if(expected[0]!=0 /* && length>0 */) {
1779 c=u_charFromName(U_CHAR_NAME_ALIAS, expected, &errorCode);
1780 if(U_FAILURE(errorCode)) {
1781 log_err("u_charFromName(%s - alias) error %s\n",
1782 expected, u_errorName(errorCode));
1783 return;
1784 }
1785 if(c!=(UChar32)names[i].code) {
1786 log_err("u_charFromName(%s - alias) gets 0x%lx instead of 0x%lx\n",
1787 expected, c, names[i].code);
1788 }
1789 }
1790 }
1791
1792 /* test u_enumCharNames() */
1793 length=0;
1794 errorCode=U_ZERO_ERROR;
1795 u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &length, U_UNICODE_CHAR_NAME, &errorCode);
1796 if(U_FAILURE(errorCode) || length<94140) {
1797 log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length);
1798 }
1799
1800 extContext.length = 0;
1801 extContext.last = -1;
1802 errorCode=U_ZERO_ERROR;
1803 u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &extContext, U_EXTENDED_CHAR_NAME, &errorCode);
1804 if(U_FAILURE(errorCode) || extContext.length<UCHAR_MAX_VALUE + 1) {
1805 log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.length);
1806 }
1807
1808 /* test that u_charFromName() uppercases the input name, i.e., works with mixed-case names (new in 2.0) */
1809 if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorCode)) {
1810 log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") did not find U+0061 (%s)\n", u_errorName(errorCode));
1811 }
1812
1813 /* Test getCharNameCharacters */
1814 if(!getTestOption(QUICK_OPTION)) {
1815 enum { BUFSIZE = 256 };
1816 UErrorCode ec = U_ZERO_ERROR;
1817 char buf[BUFSIZE];
1818 int32_t maxLength;
1819 UChar32 cp;
1820 UChar pat[BUFSIZE], dumbPat[BUFSIZE];
1821 int32_t l1, l2;
1822 UBool map[256];
1823 UBool ok;
1824
1825 USet* set = uset_open(1, 0); /* empty set */
1826 USet* dumb = uset_open(1, 0); /* empty set */
1827
1828 /*
1829 * uprv_getCharNameCharacters() will likely return more lowercase
1830 * letters than actual character names contain because
1831 * it includes all the characters in lowercased names of
1832 * general categories, for the full possible set of extended names.
1833 */
1834 {
1835 USetAdder sa={
1836 NULL,
1837 uset_add,
1838 uset_addRange,
1839 uset_addString,
1840 NULL /* don't need remove() */
1841 };
1842 sa.set=set;
1843 uprv_getCharNameCharacters(&sa);
1844 }
1845
1846 /* build set the dumb (but sure-fire) way */
1847 for (i=0; i<256; ++i) {
1848 map[i] = FALSE;
1849 }
1850
1851 maxLength=0;
1852 for (cp=0; cp<0x110000; ++cp) {
1853 int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME,
1854 buf, BUFSIZE, &ec);
1855 if (U_FAILURE(ec)) {
1856 log_err("FAIL: u_charName failed when it shouldn't\n");
1857 uset_close(set);
1858 uset_close(dumb);
1859 return;
1860 }
1861 if(len>maxLength) {
1862 maxLength=len;
1863 }
1864
1865 for (i=0; i<len; ++i) {
1866 if (!map[(uint8_t) buf[i]]) {
1867 uset_add(dumb, (UChar32)u_charToUChar(buf[i]));
1868 map[(uint8_t) buf[i]] = TRUE;
1869 }
1870 }
1871
1872 /* test for leading/trailing whitespace */
1873 if(buf[0]==' ' || buf[0]=='\t' || buf[len-1]==' ' || buf[len-1]=='\t') {
1874 log_err("u_charName(U+%04x) returns a name with leading or trailing whitespace\n", cp);
1875 }
1876 }
1877
1878 if(map[(uint8_t)'\t']) {
1879 log_err("u_charName() returned a name with a TAB for some code point\n", cp);
1880 }
1881
1882 length=uprv_getMaxCharNameLength();
1883 if(length!=maxLength) {
1884 log_err("uprv_getMaxCharNameLength()=%d differs from the maximum length %d of all extended names\n",
1885 length, maxLength);
1886 }
1887
1888 /* compare the sets. Where is my uset_equals?!! */
1889 ok=TRUE;
1890 for(i=0; i<256; ++i) {
1891 if(uset_contains(set, i)!=uset_contains(dumb, i)) {
1892 if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !uset_contains(dumb, i)) {
1893 /* ignore lowercase a-z that are in set but not in dumb */
1894 ok=TRUE;
1895 } else {
1896 ok=FALSE;
1897 break;
1898 }
1899 }
1900 }
1901
1902 l1 = uset_toPattern(set, pat, BUFSIZE, TRUE, &ec);
1903 l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, TRUE, &ec);
1904 if (U_FAILURE(ec)) {
1905 log_err("FAIL: uset_toPattern failed when it shouldn't\n");
1906 uset_close(set);
1907 uset_close(dumb);
1908 return;
1909 }
1910
1911 if (l1 >= BUFSIZE) {
1912 l1 = BUFSIZE-1;
1913 pat[l1] = 0;
1914 }
1915 if (l2 >= BUFSIZE) {
1916 l2 = BUFSIZE-1;
1917 dumbPat[l2] = 0;
1918 }
1919
1920 if (!ok) {
1921 log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n",
1922 aescstrdup(pat, l1), aescstrdup(dumbPat, l2));
1923 } else if(getTestOption(VERBOSITY_OPTION)) {
1924 log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescstrdup(pat, l1));
1925 }
1926
1927 uset_close(set);
1928 uset_close(dumb);
1929 }
1930
1931 /* ### TODO: test error cases and other interesting things */
1932 }
1933
1934 /* test u_isMirrored() and u_charMirror() ----------------------------------- */
1935
1936 static void
1937 TestMirroring() {
1938 USet *set;
1939 UErrorCode errorCode;
1940
1941 UChar32 start, end, c2, c3;
1942 int32_t i;
1943
1944 U_STRING_DECL(mirroredPattern, "[:Bidi_Mirrored:]", 17);
1945
1946 U_STRING_INIT(mirroredPattern, "[:Bidi_Mirrored:]", 17);
1947
1948 log_verbose("Testing u_isMirrored()\n");
1949 if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_isMirrored(0x232a) &&
1950 !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !u_isMirrored(0x3400)
1951 )
1952 ) {
1953 log_err("u_isMirrored() does not work correctly\n");
1954 }
1955
1956 log_verbose("Testing u_charMirror()\n");
1957 if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 &&
1958 u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirror(0x29F5)==0x2215 && /* large delta between the code points */
1959 u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab &&
1960 /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
1961 u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charMirror(0x301d)==0x301d
1962 )
1963 ) {
1964 log_err("u_charMirror() does not work correctly\n");
1965 }
1966
1967 /* verify that Bidi_Mirroring_Glyph roundtrips */
1968 errorCode=U_ZERO_ERROR;
1969 set=uset_openPattern(mirroredPattern, 17, &errorCode);
1970
1971 if (U_FAILURE(errorCode)) {
1972 log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!\n");
1973 } else {
1974 for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i) {
1975 do {
1976 c2=u_charMirror(start);
1977 c3=u_charMirror(c2);
1978 if(c3!=start) {
1979 log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx->U+%04lx\n", (long)start, (long)c2, (long)c3);
1980 }
1981 } while(++start<=end);
1982 }
1983 }
1984
1985 uset_close(set);
1986 }
1987
1988
1989 struct RunTestData
1990 {
1991 const char *runText;
1992 UScriptCode runCode;
1993 };
1994
1995 typedef struct RunTestData RunTestData;
1996
1997 static void
1998 CheckScriptRuns(UScriptRun *scriptRun, int32_t *runStarts, const RunTestData *testData, int32_t nRuns,
1999 const char *prefix)
2000 {
2001 int32_t run, runStart, runLimit;
2002 UScriptCode runCode;
2003
2004 /* iterate over all the runs */
2005 run = 0;
2006 while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) {
2007 if (runStart != runStarts[run]) {
2008 log_err("%s: incorrect start offset for run %d: expected %d, got %d\n",
2009 prefix, run, runStarts[run], runStart);
2010 }
2011
2012 if (runLimit != runStarts[run + 1]) {
2013 log_err("%s: incorrect limit offset for run %d: expected %d, got %d\n",
2014 prefix, run, runStarts[run + 1], runLimit);
2015 }
2016
2017 if (runCode != testData[run].runCode) {
2018 log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\"\n",
2019 prefix, run, uscript_getName(testData[run].runCode), uscript_getName(runCode));
2020 }
2021
2022 run += 1;
2023
2024 /* stop when we've seen all the runs we expect to see */
2025 if (run >= nRuns) {
2026 break;
2027 }
2028 }
2029
2030 /* Complain if we didn't see then number of runs we expected */
2031 if (run != nRuns) {
2032 log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, run, nRuns);
2033 }
2034 }
2035
2036 static void
2037 TestUScriptRunAPI()
2038 {
2039 static const RunTestData testData1[] = {
2040 {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCRIPT_DEVANAGARI},
2041 {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARABIC},
2042 {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYRILLIC},
2043 {"English (", USCRIPT_LATIN},
2044 {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI},
2045 {") ", USCRIPT_LATIN},
2046 {"\\u6F22\\u5B75", USCRIPT_HAN},
2047 {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA},
2048 {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA},
2049 {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET}
2050 };
2051
2052 static const RunTestData testData2[] = {
2053 {"((((((((((abc))))))))))", USCRIPT_LATIN}
2054 };
2055
2056 static const struct {
2057 const RunTestData *testData;
2058 int32_t nRuns;
2059 } testDataEntries[] = {
2060 {testData1, LENGTHOF(testData1)},
2061 {testData2, LENGTHOF(testData2)}
2062 };
2063
2064 static const int32_t nTestEntries = LENGTHOF(testDataEntries);
2065 int32_t testEntry;
2066
2067 for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) {
2068 UChar testString[1024];
2069 int32_t runStarts[256];
2070 int32_t nTestRuns = testDataEntries[testEntry].nRuns;
2071 const RunTestData *testData = testDataEntries[testEntry].testData;
2072
2073 int32_t run, stringLimit;
2074 UScriptRun *scriptRun = NULL;
2075 UErrorCode err;
2076
2077 /*
2078 * Fill in the test string and the runStarts array.
2079 */
2080 stringLimit = 0;
2081 for (run = 0; run < nTestRuns; run += 1) {
2082 runStarts[run] = stringLimit;
2083 stringLimit += u_unescape(testData[run].runText, &testString[stringLimit], 1024 - stringLimit);
2084 /*stringLimit -= 1;*/
2085 }
2086
2087 /* The limit of the last run */
2088 runStarts[nTestRuns] = stringLimit;
2089
2090 /*
2091 * Make sure that calling uscript_OpenRun with a NULL text pointer
2092 * and a non-zero text length returns the correct error.
2093 */
2094 err = U_ZERO_ERROR;
2095 scriptRun = uscript_openRun(NULL, stringLimit, &err);
2096
2097 if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2098 log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2099 }
2100
2101 if (scriptRun != NULL) {
2102 log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NULL result.\n");
2103 uscript_closeRun(scriptRun);
2104 }
2105
2106 /*
2107 * Make sure that calling uscript_OpenRun with a non-NULL text pointer
2108 * and a zero text length returns the correct error.
2109 */
2110 err = U_ZERO_ERROR;
2111 scriptRun = uscript_openRun(testString, 0, &err);
2112
2113 if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2114 log_err("uscript_openRun(testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2115 }
2116
2117 if (scriptRun != NULL) {
2118 log_err("uscript_openRun(testString, 0, &err) returned a non-NULL result.\n");
2119 uscript_closeRun(scriptRun);
2120 }
2121
2122 /*
2123 * Make sure that calling uscript_openRun with a NULL text pointer
2124 * and a zero text length doesn't return an error.
2125 */
2126 err = U_ZERO_ERROR;
2127 scriptRun = uscript_openRun(NULL, 0, &err);
2128
2129 if (U_FAILURE(err)) {
2130 log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_errorName(err));
2131 }
2132
2133 /* Make sure that the empty iterator doesn't find any runs */
2134 if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) {
2135 log_err("uscript_nextRun(...) returned TRUE for an empty iterator.\n");
2136 }
2137
2138 /*
2139 * Make sure that calling uscript_setRunText with a NULL text pointer
2140 * and a non-zero text length returns the correct error.
2141 */
2142 err = U_ZERO_ERROR;
2143 uscript_setRunText(scriptRun, NULL, stringLimit, &err);
2144
2145 if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2146 log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2147 }
2148
2149 /*
2150 * Make sure that calling uscript_OpenRun with a non-NULL text pointer
2151 * and a zero text length returns the correct error.
2152 */
2153 err = U_ZERO_ERROR;
2154 uscript_setRunText(scriptRun, testString, 0, &err);
2155
2156 if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2157 log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2158 }
2159
2160 /*
2161 * Now call uscript_setRunText on the empty iterator
2162 * and make sure that it works.
2163 */
2164 err = U_ZERO_ERROR;
2165 uscript_setRunText(scriptRun, testString, stringLimit, &err);
2166
2167 if (U_FAILURE(err)) {
2168 log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(err));
2169 } else {
2170 CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_setRunText");
2171 }
2172
2173 uscript_closeRun(scriptRun);
2174
2175 /*
2176 * Now open an interator over the testString
2177 * using uscript_openRun and make sure that it works
2178 */
2179 scriptRun = uscript_openRun(testString, stringLimit, &err);
2180
2181 if (U_FAILURE(err)) {
2182 log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err));
2183 } else {
2184 CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_openRun");
2185 }
2186
2187 /* Now reset the iterator, and make sure
2188 * that it still works.
2189 */
2190 uscript_resetRun(scriptRun);
2191
2192 CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_resetRun");
2193
2194 /* Close the iterator */
2195 uscript_closeRun(scriptRun);
2196 }
2197 }
2198
2199 /* test additional, non-core properties */
2200 static void
2201 TestAdditionalProperties() {
2202 /* test data for u_charAge() */
2203 static const struct {
2204 UChar32 c;
2205 UVersionInfo version;
2206 } charAges[]={
2207 {0x41, { 1, 1, 0, 0 }},
2208 {0xffff, { 1, 1, 0, 0 }},
2209 {0x20ab, { 2, 0, 0, 0 }},
2210 {0x2fffe, { 2, 0, 0, 0 }},
2211 {0x20ac, { 2, 1, 0, 0 }},
2212 {0xfb1d, { 3, 0, 0, 0 }},
2213 {0x3f4, { 3, 1, 0, 0 }},
2214 {0x10300, { 3, 1, 0, 0 }},
2215 {0x220, { 3, 2, 0, 0 }},
2216 {0xff60, { 3, 2, 0, 0 }}
2217 };
2218
2219 /* test data for u_hasBinaryProperty() */
2220 static const int32_t
2221 props[][3]={ /* code point, property, value */
2222 { 0x0627, UCHAR_ALPHABETIC, TRUE },
2223 { 0x1034a, UCHAR_ALPHABETIC, TRUE },
2224 { 0x2028, UCHAR_ALPHABETIC, FALSE },
2225
2226 { 0x0066, UCHAR_ASCII_HEX_DIGIT, TRUE },
2227 { 0x0067, UCHAR_ASCII_HEX_DIGIT, FALSE },
2228
2229 { 0x202c, UCHAR_BIDI_CONTROL, TRUE },
2230 { 0x202f, UCHAR_BIDI_CONTROL, FALSE },
2231
2232 { 0x003c, UCHAR_BIDI_MIRRORED, TRUE },
2233 { 0x003d, UCHAR_BIDI_MIRRORED, FALSE },
2234
2235 /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
2236 { 0x2018, UCHAR_BIDI_MIRRORED, FALSE },
2237 { 0x201d, UCHAR_BIDI_MIRRORED, FALSE },
2238 { 0x201f, UCHAR_BIDI_MIRRORED, FALSE },
2239 { 0x301e, UCHAR_BIDI_MIRRORED, FALSE },
2240
2241 { 0x058a, UCHAR_DASH, TRUE },
2242 { 0x007e, UCHAR_DASH, FALSE },
2243
2244 { 0x0c4d, UCHAR_DIACRITIC, TRUE },
2245 { 0x3000, UCHAR_DIACRITIC, FALSE },
2246
2247 { 0x0e46, UCHAR_EXTENDER, TRUE },
2248 { 0x0020, UCHAR_EXTENDER, FALSE },
2249
2250 #if !UCONFIG_NO_NORMALIZATION
2251 { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2252 { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2253 { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, FALSE },
2254
2255 { 0x110a, UCHAR_NFD_INERT, TRUE }, /* Jamo L */
2256 { 0x0308, UCHAR_NFD_INERT, FALSE },
2257
2258 { 0x1164, UCHAR_NFKD_INERT, TRUE }, /* Jamo V */
2259 { 0x1d79d, UCHAR_NFKD_INERT, FALSE }, /* math compat version of xi */
2260
2261 { 0x0021, UCHAR_NFC_INERT, TRUE }, /* ! */
2262 { 0x0061, UCHAR_NFC_INERT, FALSE }, /* a */
2263 { 0x00e4, UCHAR_NFC_INERT, FALSE }, /* a-umlaut */
2264 { 0x0102, UCHAR_NFC_INERT, FALSE }, /* a-breve */
2265 { 0xac1c, UCHAR_NFC_INERT, FALSE }, /* Hangul LV */
2266 { 0xac1d, UCHAR_NFC_INERT, TRUE }, /* Hangul LVT */
2267
2268 { 0x1d79d, UCHAR_NFKC_INERT, FALSE }, /* math compat version of xi */
2269 { 0x2a6d6, UCHAR_NFKC_INERT, TRUE }, /* Han, last of CJK ext. B */
2270
2271 { 0x00e4, UCHAR_SEGMENT_STARTER, TRUE },
2272 { 0x0308, UCHAR_SEGMENT_STARTER, FALSE },
2273 { 0x110a, UCHAR_SEGMENT_STARTER, TRUE }, /* Jamo L */
2274 { 0x1164, UCHAR_SEGMENT_STARTER, FALSE },/* Jamo V */
2275 { 0xac1c, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LV */
2276 { 0xac1d, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LVT */
2277 #endif
2278
2279 { 0x0044, UCHAR_HEX_DIGIT, TRUE },
2280 { 0xff46, UCHAR_HEX_DIGIT, TRUE },
2281 { 0x0047, UCHAR_HEX_DIGIT, FALSE },
2282
2283 { 0x30fb, UCHAR_HYPHEN, TRUE },
2284 { 0xfe58, UCHAR_HYPHEN, FALSE },
2285
2286 { 0x2172, UCHAR_ID_CONTINUE, TRUE },
2287 { 0x0307, UCHAR_ID_CONTINUE, TRUE },
2288 { 0x005c, UCHAR_ID_CONTINUE, FALSE },
2289
2290 { 0x2172, UCHAR_ID_START, TRUE },
2291 { 0x007a, UCHAR_ID_START, TRUE },
2292 { 0x0039, UCHAR_ID_START, FALSE },
2293
2294 { 0x4db5, UCHAR_IDEOGRAPHIC, TRUE },
2295 { 0x2f999, UCHAR_IDEOGRAPHIC, TRUE },
2296 { 0x2f99, UCHAR_IDEOGRAPHIC, FALSE },
2297
2298 { 0x200c, UCHAR_JOIN_CONTROL, TRUE },
2299 { 0x2029, UCHAR_JOIN_CONTROL, FALSE },
2300
2301 { 0x1d7bc, UCHAR_LOWERCASE, TRUE },
2302 { 0x0345, UCHAR_LOWERCASE, TRUE },
2303 { 0x0030, UCHAR_LOWERCASE, FALSE },
2304
2305 { 0x1d7a9, UCHAR_MATH, TRUE },
2306 { 0x2135, UCHAR_MATH, TRUE },
2307 { 0x0062, UCHAR_MATH, FALSE },
2308
2309 { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2310 { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2311 { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, FALSE },
2312
2313 { 0x0022, UCHAR_QUOTATION_MARK, TRUE },
2314 { 0xff62, UCHAR_QUOTATION_MARK, TRUE },
2315 { 0xd840, UCHAR_QUOTATION_MARK, FALSE },
2316
2317 { 0x061f, UCHAR_TERMINAL_PUNCTUATION, TRUE },
2318 { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, FALSE },
2319
2320 { 0x1d44a, UCHAR_UPPERCASE, TRUE },
2321 { 0x2162, UCHAR_UPPERCASE, TRUE },
2322 { 0x0345, UCHAR_UPPERCASE, FALSE },
2323
2324 { 0x0020, UCHAR_WHITE_SPACE, TRUE },
2325 { 0x202f, UCHAR_WHITE_SPACE, TRUE },
2326 { 0x3001, UCHAR_WHITE_SPACE, FALSE },
2327
2328 { 0x0711, UCHAR_XID_CONTINUE, TRUE },
2329 { 0x1d1aa, UCHAR_XID_CONTINUE, TRUE },
2330 { 0x007c, UCHAR_XID_CONTINUE, FALSE },
2331
2332 { 0x16ee, UCHAR_XID_START, TRUE },
2333 { 0x23456, UCHAR_XID_START, TRUE },
2334 { 0x1d1aa, UCHAR_XID_START, FALSE },
2335
2336 /*
2337 * Version break:
2338 * The following properties are only supported starting with the
2339 * Unicode version indicated in the second field.
2340 */
2341 { -1, 0x320, 0 },
2342
2343 { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2344 { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2345 { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE },
2346
2347 { 0x0149, UCHAR_DEPRECATED, TRUE }, /* changed in Unicode 5.2 */
2348 { 0x0341, UCHAR_DEPRECATED, FALSE }, /* changed in Unicode 5.2 */
2349 { 0xe0041, UCHAR_DEPRECATED, TRUE }, /* changed from Unicode 5 to 5.1 */
2350 { 0xe0100, UCHAR_DEPRECATED, FALSE },
2351
2352 { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE },
2353 { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE },
2354 { 0xff9d, UCHAR_GRAPHEME_BASE, TRUE },
2355 { 0xff9f, UCHAR_GRAPHEME_BASE, FALSE }, /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
2356
2357 { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE },
2358 { 0xff9d, UCHAR_GRAPHEME_EXTEND, FALSE },
2359 { 0xff9f, UCHAR_GRAPHEME_EXTEND, TRUE }, /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
2360 { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE },
2361
2362 { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE },
2363 { 0xff9f, UCHAR_GRAPHEME_LINK, FALSE },
2364
2365 { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, TRUE },
2366 { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, FALSE },
2367
2368 { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, TRUE },
2369 { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, FALSE },
2370
2371 { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, TRUE },
2372 { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, FALSE },
2373
2374 { 0x2e9b, UCHAR_RADICAL, TRUE },
2375 { 0x4e00, UCHAR_RADICAL, FALSE },
2376
2377 { 0x012f, UCHAR_SOFT_DOTTED, TRUE },
2378 { 0x0049, UCHAR_SOFT_DOTTED, FALSE },
2379
2380 { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE },
2381 { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE },
2382
2383 { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */
2384
2385 { 0x002e, UCHAR_S_TERM, TRUE },
2386 { 0x0061, UCHAR_S_TERM, FALSE },
2387
2388 { 0x180c, UCHAR_VARIATION_SELECTOR, TRUE },
2389 { 0xfe03, UCHAR_VARIATION_SELECTOR, TRUE },
2390 { 0xe01ef, UCHAR_VARIATION_SELECTOR, TRUE },
2391 { 0xe0200, UCHAR_VARIATION_SELECTOR, FALSE },
2392
2393 /* enum/integer type properties */
2394
2395 /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */
2396 /* test default Bidi classes for unassigned code points */
2397 { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2398 { 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2399 { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2400 { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */
2401 { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */
2402 { 0x089f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2403 { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2404 { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2405 { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2406 { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2407 { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2408
2409 { 0x0605, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2410 { 0x061c, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2411 { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2412 { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2413 { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2414 { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2415 { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2416 { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2417
2418 { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS },
2419 { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU },
2420 { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS },
2421 { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG },
2422 { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU },
2423 { 0x1AFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2424 { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA },
2425 { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS },
2426 { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2427 { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2428 { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B },
2429
2430 /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */
2431 { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 },
2432
2433 { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK },
2434 { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT },
2435 { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2436 { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2437 { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2438 { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2439 { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL },
2440 { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT },
2441 { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2442
2443 { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2444 { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW },
2445 { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2446 { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH },
2447 { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2448 { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH },
2449 { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2450 { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2451 { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2452 { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2453 { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2454 { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2455 { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2456 { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */
2457 { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2458 { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2459 { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2460
2461 /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */
2462 { 0xd7c7, UCHAR_GENERAL_CATEGORY, 0 },
2463 { 0xd7d7, UCHAR_GENERAL_CATEGORY, U_OTHER_LETTER }, /* changed in Unicode 5.2 */
2464
2465 { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2466 { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN },
2467 { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH },
2468 { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH },
2469 { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL },
2470
2471 { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING },
2472 { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2473 { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING },
2474 { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2475 { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING },
2476 { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2477 { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2478 { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2479
2480 /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */
2481 { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2482 { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2483 { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION },
2484 { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION },
2485 { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2486 { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2487 { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2488 { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2489 { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2490 { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2491 { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2492 { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION },
2493 { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS },
2494 { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2495 { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2496
2497 /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */
2498
2499 /* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */
2500
2501 { 0x10ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2502 { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2503 { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2504 { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2505 { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* changed in Unicode 5.2 */
2506 { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* changed in Unicode 5.2 */
2507 { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2508
2509 { 0xa95f, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2510 { 0xa960, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* changed in Unicode 5.2 */
2511 { 0xa97c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* changed in Unicode 5.2 */
2512 { 0xa97d, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2513
2514 { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2515 { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2516 { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2517 { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2518 { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* changed in Unicode 5.2 */
2519 { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* changed in Unicode 5.2 */
2520
2521 { 0xd7af, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2522 { 0xd7b0, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* changed in Unicode 5.2 */
2523 { 0xd7c6, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* changed in Unicode 5.2 */
2524 { 0xd7c7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2525
2526 { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2527 { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2528 { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2529 { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2530 { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* changed in Unicode 5.2 */
2531 { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* changed in Unicode 5.2 */
2532 { 0x1200, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2533
2534 { 0xd7ca, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2535 { 0xd7cb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* changed in Unicode 5.2 */
2536 { 0xd7fb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* changed in Unicode 5.2 */
2537 { 0xd7fc, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2538
2539 { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2540 { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2541 { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2542 { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2543
2544 { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2545 { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2546 { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2547 { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2548 { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2549
2550 { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2551
2552 { -1, 0x410, 0 }, /* version break for Unicode 4.1 */
2553
2554 { 0x00d7, UCHAR_PATTERN_SYNTAX, TRUE },
2555 { 0xfe45, UCHAR_PATTERN_SYNTAX, TRUE },
2556 { 0x0061, UCHAR_PATTERN_SYNTAX, FALSE },
2557
2558 { 0x0020, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2559 { 0x0085, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2560 { 0x200f, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2561 { 0x00a0, UCHAR_PATTERN_WHITE_SPACE, FALSE },
2562 { 0x3000, UCHAR_PATTERN_WHITE_SPACE, FALSE },
2563
2564 { 0x1d200, UCHAR_BLOCK, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION },
2565 { 0x2c8e, UCHAR_BLOCK, UBLOCK_COPTIC },
2566 { 0xfe17, UCHAR_BLOCK, UBLOCK_VERTICAL_FORMS },
2567
2568 { 0x1a00, UCHAR_SCRIPT, USCRIPT_BUGINESE },
2569 { 0x2cea, UCHAR_SCRIPT, USCRIPT_COPTIC },
2570 { 0xa82b, UCHAR_SCRIPT, USCRIPT_SYLOTI_NAGRI },
2571 { 0x103d0, UCHAR_SCRIPT, USCRIPT_OLD_PERSIAN },
2572
2573 { 0xcc28, UCHAR_LINE_BREAK, U_LB_H2 },
2574 { 0xcc29, UCHAR_LINE_BREAK, U_LB_H3 },
2575 { 0xac03, UCHAR_LINE_BREAK, U_LB_H3 },
2576 { 0x115f, UCHAR_LINE_BREAK, U_LB_JL },
2577 { 0x11aa, UCHAR_LINE_BREAK, U_LB_JT },
2578 { 0x11a1, UCHAR_LINE_BREAK, U_LB_JV },
2579
2580 { 0xb2c9, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_LVT },
2581 { 0x036f, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_EXTEND },
2582 { 0x0000, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_CONTROL },
2583 { 0x1160, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_V },
2584
2585 { 0x05f4, UCHAR_WORD_BREAK, U_WB_MIDLETTER },
2586 { 0x4ef0, UCHAR_WORD_BREAK, U_WB_OTHER },
2587 { 0x19d9, UCHAR_WORD_BREAK, U_WB_NUMERIC },
2588 { 0x2044, UCHAR_WORD_BREAK, U_WB_MIDNUM },
2589
2590 { 0xfffd, UCHAR_SENTENCE_BREAK, U_SB_OTHER },
2591 { 0x1ffc, UCHAR_SENTENCE_BREAK, U_SB_UPPER },
2592 { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE },
2593 { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP },
2594
2595 { -1, 0x520, 0 }, /* version break for Unicode 5.2 */
2596
2597 /* unassigned code points in new default Bidi R blocks */
2598 { 0x1ede4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2599 { 0x1efe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2600
2601 /* test some script codes >127 */
2602 { 0xa6e6, UCHAR_SCRIPT, USCRIPT_BAMUM },
2603 { 0xa4d0, UCHAR_SCRIPT, USCRIPT_LISU },
2604 { 0x10a7f, UCHAR_SCRIPT, USCRIPT_OLD_SOUTH_ARABIAN },
2605
2606 { -1, 0x600, 0 }, /* version break for Unicode 6.0 */
2607
2608 /* value changed in Unicode 6.0 */
2609 { 0x06C3, UCHAR_JOINING_GROUP, U_JG_TEH_MARBUTA_GOAL },
2610
2611 { -1, 0x610, 0 }, /* version break for Unicode 6.1 */
2612
2613 /* unassigned code points in new/changed default Bidi AL blocks */
2614 { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2615 { 0x1eee4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2616
2617 /* undefined UProperty values */
2618 { 0x61, 0x4a7, 0 },
2619 { 0x234bc, 0x15ed, 0 }
2620 };
2621
2622 UVersionInfo version;
2623 UChar32 c;
2624 int32_t i, result, uVersion;
2625 UProperty which;
2626
2627 /* what is our Unicode version? */
2628 u_getUnicodeVersion(version);
2629 uVersion=((int32_t)version[0]<<8)|(version[1]<<4)|version[2]; /* major/minor/update version numbers */
2630
2631 u_charAge(0x20, version);
2632 if(version[0]==0) {
2633 /* no additional properties available */
2634 log_err("TestAdditionalProperties: no additional properties available, not tested\n");
2635 return;
2636 }
2637
2638 /* test u_charAge() */
2639 for(i=0; i<sizeof(charAges)/sizeof(charAges[0]); ++i) {
2640 u_charAge(charAges[i].c, version);
2641 if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) {
2642 log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { %u, %u, %u, %u }\n",
2643 charAges[i].c,
2644 version[0], version[1], version[2], version[3],
2645 charAges[i].version[0], charAges[i].version[1], charAges[i].version[2], charAges[i].version[3]);
2646 }
2647 }
2648
2649 if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 ||
2650 u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 ||
2651 u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 || /* j2478 */
2652 u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 || /*JB#2410*/
2653 u_getIntPropertyMinValue(0x2345)!=0
2654 ) {
2655 log_err("error: u_getIntPropertyMinValue() wrong\n");
2656 }
2657 if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1) {
2658 log_err("error: u_getIntPropertyMaxValue(UCHAR_DASH) wrong\n");
2659 }
2660 if( u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1) {
2661 log_err("error: u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE) wrong\n");
2662 }
2663 if( u_getIntPropertyMaxValue((UProperty)(UCHAR_BINARY_LIMIT-1))!=1) {
2664 log_err("error: u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1) wrong\n");
2665 }
2666 if( u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_COUNT-1 ) {
2667 log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS) wrong\n");
2668 }
2669 if( u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ) {
2670 log_err("error: u_getIntPropertyMaxValue(UCHAR_BLOCK) wrong\n");
2671 }
2672 if(u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1) {
2673 log_err("error: u_getIntPropertyMaxValue(UCHAR_LINE_BREAK) wrong\n");
2674 }
2675 if(u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1) {
2676 log_err("error: u_getIntPropertyMaxValue(UCHAR_SCRIPT) wrong\n");
2677 }
2678 if(u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE)!=(int32_t)U_NT_COUNT-1) {
2679 log_err("error: u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE) wrong\n");
2680 }
2681 if(u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY)!=(int32_t)U_CHAR_CATEGORY_COUNT-1) {
2682 log_err("error: u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY) wrong\n");
2683 }
2684 if(u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE)!=(int32_t)U_HST_COUNT-1) {
2685 log_err("error: u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE) wrong\n");
2686 }
2687 if(u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK)!=(int32_t)U_GCB_COUNT-1) {
2688 log_err("error: u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK) wrong\n");
2689 }
2690 if(u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK)!=(int32_t)U_SB_COUNT-1) {
2691 log_err("error: u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK) wrong\n");
2692 }
2693 if(u_getIntPropertyMaxValue(UCHAR_WORD_BREAK)!=(int32_t)U_WB_COUNT-1) {
2694 log_err("error: u_getIntPropertyMaxValue(UCHAR_WORD_BREAK) wrong\n");
2695 }
2696 /*JB#2410*/
2697 if( u_getIntPropertyMaxValue(0x2345)!=-1) {
2698 log_err("error: u_getIntPropertyMaxValue(0x2345) wrong\n");
2699 }
2700 if( u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_COUNT - 1)) {
2701 log_err("error: u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) wrong\n");
2702 }
2703 if( u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) != (int32_t) (U_JG_COUNT -1)) {
2704 log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) wrong\n");
2705 }
2706 if( u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1)) {
2707 log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) wrong\n");
2708 }
2709 if( u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUNT -1)) {
2710 log_err("error: u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) wrong\n");
2711 }
2712
2713 /* test u_hasBinaryProperty() and u_getIntPropertyValue() */
2714 for(i=0; i<sizeof(props)/sizeof(props[0]); ++i) {
2715 const char *whichName;
2716
2717 if(props[i][0]<0) {
2718 /* Unicode version break */
2719 if(uVersion<props[i][1]) {
2720 break; /* do not test properties that are not yet supported */
2721 } else {
2722 continue; /* skip this row */
2723 }
2724 }
2725
2726 c=(UChar32)props[i][0];
2727 which=(UProperty)props[i][1];
2728 whichName=u_getPropertyName(which, U_LONG_PROPERTY_NAME);
2729
2730 if(which<UCHAR_INT_START) {
2731 result=u_hasBinaryProperty(c, which);
2732 if(result!=props[i][2]) {
2733 log_data_err("error: u_hasBinaryProperty(U+%04lx, %s)=%d is wrong (props[%d]) - (Are you missing data?)\n",
2734 c, whichName, result, i);
2735 }
2736 }
2737
2738 result=u_getIntPropertyValue(c, which);
2739 if(result!=props[i][2]) {
2740 log_data_err("error: u_getIntPropertyValue(U+%04lx, %s)=%d is wrong, should be %d (props[%d]) - (Are you missing data?)\n",
2741 c, whichName, result, props[i][2], i);
2742 }
2743
2744 /* test separate functions, too */
2745 switch((UProperty)props[i][1]) {
2746 case UCHAR_ALPHABETIC:
2747 if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) {
2748 log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d])\n",
2749 props[i][0], result, i);
2750 }
2751 break;
2752 case UCHAR_LOWERCASE:
2753 if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2754 log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\n",
2755 props[i][0], result, i);
2756 }
2757 break;
2758 case UCHAR_UPPERCASE:
2759 if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2760 log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\n",
2761 props[i][0], result, i);
2762 }
2763 break;
2764 case UCHAR_WHITE_SPACE:
2765 if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) {
2766 log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d])\n",
2767 props[i][0], result, i);
2768 }
2769 break;
2770 default:
2771 break;
2772 }
2773 }
2774 }
2775
2776 static void
2777 TestNumericProperties(void) {
2778 /* see UnicodeData.txt, DerivedNumericValues.txt */
2779 static const struct {
2780 UChar32 c;
2781 int32_t type;
2782 double numValue;
2783 } values[]={
2784 { 0x0F33, U_NT_NUMERIC, -1./2. },
2785 { 0x0C66, U_NT_DECIMAL, 0 },
2786 { 0x96f6, U_NT_NUMERIC, 0 },
2787 { 0xa833, U_NT_NUMERIC, 1./16. },
2788 { 0x2152, U_NT_NUMERIC, 1./10. },
2789 { 0x2151, U_NT_NUMERIC, 1./9. },
2790 { 0x1245f, U_NT_NUMERIC, 1./8. },
2791 { 0x2150, U_NT_NUMERIC, 1./7. },
2792 { 0x2159, U_NT_NUMERIC, 1./6. },
2793 { 0x09f6, U_NT_NUMERIC, 3./16. },
2794 { 0x2155, U_NT_NUMERIC, 1./5. },
2795 { 0x00BD, U_NT_NUMERIC, 1./2. },
2796 { 0x0031, U_NT_DECIMAL, 1. },
2797 { 0x4e00, U_NT_NUMERIC, 1. },
2798 { 0x58f1, U_NT_NUMERIC, 1. },
2799 { 0x10320, U_NT_NUMERIC, 1. },
2800 { 0x0F2B, U_NT_NUMERIC, 3./2. },
2801 { 0x00B2, U_NT_DIGIT, 2. },
2802 { 0x5f10, U_NT_NUMERIC, 2. },
2803 { 0x1813, U_NT_DECIMAL, 3. },
2804 { 0x5f0e, U_NT_NUMERIC, 3. },
2805 { 0x2173, U_NT_NUMERIC, 4. },
2806 { 0x8086, U_NT_NUMERIC, 4. },
2807 { 0x278E, U_NT_DIGIT, 5. },
2808 { 0x1D7F2, U_NT_DECIMAL, 6. },
2809 { 0x247A, U_NT_DIGIT, 7. },
2810 { 0x7396, U_NT_NUMERIC, 9. },
2811 { 0x1372, U_NT_NUMERIC, 10. },
2812 { 0x216B, U_NT_NUMERIC, 12. },
2813 { 0x16EE, U_NT_NUMERIC, 17. },
2814 { 0x249A, U_NT_NUMERIC, 19. },
2815 { 0x303A, U_NT_NUMERIC, 30. },
2816 { 0x5345, U_NT_NUMERIC, 30. },
2817 { 0x32B2, U_NT_NUMERIC, 37. },
2818 { 0x1375, U_NT_NUMERIC, 40. },
2819 { 0x10323, U_NT_NUMERIC, 50. },
2820 { 0x0BF1, U_NT_NUMERIC, 100. },
2821 { 0x964c, U_NT_NUMERIC, 100. },
2822 { 0x217E, U_NT_NUMERIC, 500. },
2823 { 0x2180, U_NT_NUMERIC, 1000. },
2824 { 0x4edf, U_NT_NUMERIC, 1000. },
2825 { 0x2181, U_NT_NUMERIC, 5000. },
2826 { 0x137C, U_NT_NUMERIC, 10000. },
2827 { 0x4e07, U_NT_NUMERIC, 10000. },
2828 { 0x4ebf, U_NT_NUMERIC, 100000000. },
2829 { 0x5146, U_NT_NUMERIC, 1000000000000. },
2830 { -1, U_NT_NONE, U_NO_NUMERIC_VALUE },
2831 { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },
2832 { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },
2833 { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },
2834 { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE },
2835 { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE },
2836 { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE },
2837 { 0x110000, U_NT_NONE, U_NO_NUMERIC_VALUE }
2838 };
2839
2840 double nv;
2841 UChar32 c;
2842 int32_t i, type;
2843
2844 for(i=0; i<LENGTHOF(values); ++i) {
2845 c=values[i].c;
2846 type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE);
2847 nv=u_getNumericValue(c);
2848
2849 if(type!=values[i].type) {
2850 log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, values[i].type);
2851 }
2852 if(0.000001 <= fabs(nv - values[i].numValue)) {
2853 log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, values[i].numValue);
2854 }
2855 }
2856 }
2857
2858 /**
2859 * Test the property names and property value names API.
2860 */
2861 static void
2862 TestPropertyNames(void) {
2863 int32_t p, v, choice=0, rev;
2864 UBool atLeastSomething = FALSE;
2865
2866 for (p=0; ; ++p) {
2867 UProperty propEnum = (UProperty)p;
2868 UBool sawProp = FALSE;
2869 if(p > 10 && !atLeastSomething) {
2870 log_data_err("Never got anything after 10 tries.\nYour data is probably fried. Quitting this test\n", p, choice);
2871 return;
2872 }
2873
2874 for (choice=0; ; ++choice) {
2875 const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice)choice);
2876 if (name) {
2877 if (!sawProp)
2878 log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff);
2879 log_verbose("%d=\"%s\"", choice, name);
2880 sawProp = TRUE;
2881 atLeastSomething = TRUE;
2882
2883 /* test reverse mapping */
2884 rev = u_getPropertyEnum(name);
2885 if (rev != p) {
2886 log_err("Property round-trip failure: %d -> %s -> %d\n",
2887 p, name, rev);
2888 }
2889 }
2890 if (!name && choice>0) break;
2891 }
2892 if (sawProp) {
2893 /* looks like a valid property; check the values */
2894 const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
2895 int32_t max = 0;
2896 if (p == UCHAR_CANONICAL_COMBINING_CLASS) {
2897 max = 255;
2898 } else if (p == UCHAR_GENERAL_CATEGORY_MASK) {
2899 /* it's far too slow to iterate all the way up to
2900 the real max, U_GC_P_MASK */
2901 max = U_GC_NL_MASK;
2902 } else if (p == UCHAR_BLOCK) {
2903 /* UBlockCodes, unlike other values, start at 1 */
2904 max = 1;
2905 }
2906 log_verbose("\n");
2907 for (v=-1; ; ++v) {
2908 UBool sawValue = FALSE;
2909 for (choice=0; ; ++choice) {
2910 const char* vname = u_getPropertyValueName(propEnum, v, (UPropertyNameChoice)choice);
2911 if (vname) {
2912 if (!sawValue) log_verbose(" %s, value %d:", pname, v);
2913 log_verbose("%d=\"%s\"", choice, vname);
2914 sawValue = TRUE;
2915
2916 /* test reverse mapping */
2917 rev = u_getPropertyValueEnum(propEnum, vname);
2918 if (rev != v) {
2919 log_err("Value round-trip failure (%s): %d -> %s -> %d\n",
2920 pname, v, vname, rev);
2921 }
2922 }
2923 if (!vname && choice>0) break;
2924 }
2925 if (sawValue) {
2926 log_verbose("\n");
2927 }
2928 if (!sawValue && v>=max) break;
2929 }
2930 }
2931 if (!sawProp) {
2932 if (p>=UCHAR_STRING_LIMIT) {
2933 break;
2934 } else if (p>=UCHAR_DOUBLE_LIMIT) {
2935 p = UCHAR_STRING_START - 1;
2936 } else if (p>=UCHAR_MASK_LIMIT) {
2937 p = UCHAR_DOUBLE_START - 1;
2938 } else if (p>=UCHAR_INT_LIMIT) {
2939 p = UCHAR_MASK_START - 1;
2940 } else if (p>=UCHAR_BINARY_LIMIT) {
2941 p = UCHAR_INT_START - 1;
2942 }
2943 }
2944 }
2945 }
2946
2947 /**
2948 * Test the property values API. See JB#2410.
2949 */
2950 static void
2951 TestPropertyValues(void) {
2952 int32_t i, p, min, max;
2953 UErrorCode ec;
2954
2955 /* Min should be 0 for everything. */
2956 /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */
2957 for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) {
2958 UProperty propEnum = (UProperty)p;
2959 min = u_getIntPropertyMinValue(propEnum);
2960 if (min != 0) {
2961 if (p == UCHAR_BLOCK) {
2962 /* This is okay...for now. See JB#2487.
2963 TODO Update this for JB#2487. */
2964 } else {
2965 const char* name;
2966 name = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
2967 if (name == NULL)
2968 name = "<ERROR>";
2969 log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n",
2970 name, min);
2971 }
2972 }
2973 }
2974
2975 if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 ||
2976 u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) {
2977 log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK) is wrong\n");
2978 }
2979
2980 /* Max should be -1 for invalid properties. */
2981 max = u_getIntPropertyMaxValue(UCHAR_INVALID_CODE);
2982 if (max != -1) {
2983 log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n",
2984 max);
2985 }
2986
2987 /* Script should return USCRIPT_INVALID_CODE for an invalid code point. */
2988 for (i=0; i<2; ++i) {
2989 int32_t script;
2990 const char* desc;
2991 ec = U_ZERO_ERROR;
2992 switch (i) {
2993 case 0:
2994 script = uscript_getScript(-1, &ec);
2995 desc = "uscript_getScript(-1)";
2996 break;
2997 case 1:
2998 script = u_getIntPropertyValue(-1, UCHAR_SCRIPT);
2999 desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)";
3000 break;
3001 default:
3002 log_err("Internal test error. Too many scripts\n");
3003 return;
3004 }
3005 /* We don't explicitly test ec. It should be U_FAILURE but it
3006 isn't documented as such. */
3007 if (script != (int32_t)USCRIPT_INVALID_CODE) {
3008 log_err("FAIL: %s = %d, exp. 0\n",
3009 desc, script);
3010 }
3011 }
3012 }
3013
3014 /* various tests for consistency of UCD data and API behavior */
3015 static void
3016 TestConsistency() {
3017 char buffer[300];
3018 USet *set1, *set2, *set3, *set4;
3019 UErrorCode errorCode;
3020
3021 UChar32 start, end;
3022 int32_t i, length;
3023
3024 U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10);
3025 U_STRING_DECL(dashPattern, "[:Dash:]", 8);
3026 U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13);
3027 U_STRING_DECL(formatPattern, "[:Cf:]", 6);
3028 U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14);
3029
3030 U_STRING_DECL(mathBlocksPattern,
3031 "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
3032 1+32+46+46+45+43+1+1); /* +1 for NUL */
3033 U_STRING_DECL(mathPattern, "[:Math:]", 8);
3034 U_STRING_DECL(unassignedPattern, "[:Cn:]", 6);
3035 U_STRING_DECL(unknownPattern, "[:sc=Unknown:]", 14);
3036 U_STRING_DECL(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
3037
3038 U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10);
3039 U_STRING_INIT(dashPattern, "[:Dash:]", 8);
3040 U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13);
3041 U_STRING_INIT(formatPattern, "[:Cf:]", 6);
3042 U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14);
3043
3044 U_STRING_INIT(mathBlocksPattern,
3045 "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
3046 1+32+46+46+45+43+1+1); /* +1 for NUL */
3047 U_STRING_INIT(mathPattern, "[:Math:]", 8);
3048 U_STRING_INIT(unassignedPattern, "[:Cn:]", 6);
3049 U_STRING_INIT(unknownPattern, "[:sc=Unknown:]", 14);
3050 U_STRING_INIT(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
3051
3052 /*
3053 * It used to be that UCD.html and its precursors said
3054 * "Those dashes used to mark connections between pieces of words,
3055 * plus the Katakana middle dot."
3056 *
3057 * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
3058 * but not from Hyphen.
3059 * UTC 94 (2003mar) decided to leave it that way and to change UCD.html.
3060 * Therefore, do not show errors when testing the Hyphen property.
3061 */
3062 log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
3063 "known to the UTC and not considered errors.\n");
3064
3065 errorCode=U_ZERO_ERROR;
3066 set1=uset_openPattern(hyphenPattern, 10, &errorCode);
3067 set2=uset_openPattern(dashPattern, 8, &errorCode);
3068 if(U_SUCCESS(errorCode)) {
3069 /* remove the Katakana middle dot(s) from set1 */
3070 uset_remove(set1, 0x30fb);
3071 uset_remove(set1, 0xff65); /* halfwidth variant */
3072 showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", FALSE);
3073 } else {
3074 log_data_err("error opening [:Hyphen:] or [:Dash:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3075 }
3076
3077 /* check that Cf is neither Hyphen nor Dash nor Alphabetic */
3078 set3=uset_openPattern(formatPattern, 6, &errorCode);
3079 set4=uset_openPattern(alphaPattern, 14, &errorCode);
3080 if(U_SUCCESS(errorCode)) {
3081 showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", FALSE);
3082 showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", TRUE);
3083 showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", TRUE);
3084 } else {
3085 log_data_err("error opening [:Cf:] or [:Alpbabetic:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3086 }
3087
3088 uset_close(set1);
3089 uset_close(set2);
3090 uset_close(set3);
3091 uset_close(set4);
3092
3093 /*
3094 * Check that each lowercase character has "small" in its name
3095 * and not "capital".
3096 * There are some such characters, some of which seem odd.
3097 * Use the verbose flag to see these notices.
3098 */
3099 errorCode=U_ZERO_ERROR;
3100 set1=uset_openPattern(lowerPattern, 13, &errorCode);
3101 if(U_SUCCESS(errorCode)) {
3102 for(i=0;; ++i) {
3103 length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode);
3104 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
3105 break; /* done */
3106 }
3107 if(U_FAILURE(errorCode)) {
3108 log_err("error iterating over [:Lowercase:] at item %d: %s\n",
3109 i, u_errorName(errorCode));
3110 break;
3111 }
3112 if(length!=0) {
3113 break; /* done with code points, got a string or -1 */
3114 }
3115
3116 while(start<=end) {
3117 length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);
3118 if(U_FAILURE(errorCode)) {
3119 log_data_err("error getting the name of U+%04x - %s\n", start, u_errorName(errorCode));
3120 errorCode=U_ZERO_ERROR;
3121 }
3122 if( (strstr(buffer, "SMALL")==NULL || strstr(buffer, "CAPITAL")!=NULL) &&
3123 strstr(buffer, "SMALL CAPITAL")==NULL
3124 ) {
3125 log_verbose("info: [:Lowercase:] contains U+%04x whose name does not suggest lowercase: %s\n", start, buffer);
3126 }
3127 ++start;
3128 }
3129 }
3130 } else {
3131 log_data_err("error opening [:Lowercase:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3132 }
3133 uset_close(set1);
3134
3135 /* verify that all assigned characters in Math blocks are exactly Math characters */
3136 errorCode=U_ZERO_ERROR;
3137 set1=uset_openPattern(mathBlocksPattern, -1, &errorCode);
3138 set2=uset_openPattern(mathPattern, 8, &errorCode);
3139 set3=uset_openPattern(unassignedPattern, 6, &errorCode);
3140 if(U_SUCCESS(errorCode)) {
3141 uset_retainAll(set2, set1); /* [math blocks]&[:Math:] */
3142 uset_complement(set3); /* assigned characters */
3143 uset_retainAll(set1, set3); /* [math blocks]&[assigned] */
3144 compareUSets(set1, set2,
3145 "[assigned Math block chars]", "[math blocks]&[:Math:]",
3146 TRUE);
3147 } else {
3148 log_data_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3149 }
3150 uset_close(set1);
3151 uset_close(set2);
3152 uset_close(set3);
3153
3154 /* new in Unicode 5.0: exactly all unassigned+PUA+surrogate code points have script=Unknown */
3155 errorCode=U_ZERO_ERROR;
3156 set1=uset_openPattern(unknownPattern, 14, &errorCode);
3157 set2=uset_openPattern(reservedPattern, 20, &errorCode);
3158 if(U_SUCCESS(errorCode)) {
3159 compareUSets(set1, set2,
3160 "[:sc=Unknown:]", "[[:Cn:][:Co:][:Cs:]]",
3161 TRUE);
3162 } else {
3163 log_data_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s (Are you missing data?)\n", u_errorName(errorCode));
3164 }
3165 uset_close(set1);
3166 uset_close(set2);
3167 }
3168
3169 /*
3170 * Starting with ICU4C 3.4, the core Unicode properties files
3171 * (uprops.icu, ucase.icu, ubidi.icu, unorm.icu)
3172 * are hardcoded in the common DLL and therefore not included
3173 * in the data package any more.
3174 * Test requiring these files are disabled so that
3175 * we need not jump through hoops (like adding snapshots of these files
3176 * to testdata).
3177 * See Jitterbug 4497.
3178 */
3179 #define HARDCODED_DATA_4497 1
3180
3181 /* API coverage for ucase.c */
3182 static void TestUCase() {
3183 #if !HARDCODED_DATA_4497
3184 UDataMemory *pData;
3185 UCaseProps *csp;
3186 const UCaseProps *ccsp;
3187 UErrorCode errorCode;
3188
3189 /* coverage for ucase_openBinary() */
3190 errorCode=U_ZERO_ERROR;
3191 pData=udata_open(NULL, UCASE_DATA_TYPE, UCASE_DATA_NAME, &errorCode);
3192 if(U_FAILURE(errorCode)) {
3193 log_data_err("unable to open " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
3194 u_errorName(errorCode));
3195 return;
3196 }
3197
3198 csp=ucase_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
3199 if(U_FAILURE(errorCode)) {
3200 log_err("ucase_openBinary() fails for the contents of " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
3201 u_errorName(errorCode));
3202 udata_close(pData);
3203 return;
3204 }
3205
3206 if(UCASE_LOWER!=ucase_getType(csp, 0xdf)) { /* verify islower(sharp s) */
3207 log_err("ucase_openBinary() does not seem to return working UCaseProps\n");
3208 }
3209
3210 ucase_close(csp);
3211 udata_close(pData);
3212
3213 /* coverage for ucase_getDummy() */
3214 errorCode=U_ZERO_ERROR;
3215 ccsp=ucase_getDummy(&errorCode);
3216 if(ucase_tolower(ccsp, 0x41)!=0x41) {
3217 log_err("ucase_tolower(dummy, A)!=A\n");
3218 }
3219 #endif
3220 }
3221
3222 /* API coverage for ubidi_props.c */
3223 static void TestUBiDiProps() {
3224 #if !HARDCODED_DATA_4497
3225 UDataMemory *pData;
3226 UBiDiProps *bdp;
3227 const UBiDiProps *cbdp;
3228 UErrorCode errorCode;
3229
3230 /* coverage for ubidi_openBinary() */
3231 errorCode=U_ZERO_ERROR;
3232 pData=udata_open(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &errorCode);
3233 if(U_FAILURE(errorCode)) {
3234 log_data_err("unable to open " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
3235 u_errorName(errorCode));
3236 return;
3237 }
3238
3239 bdp=ubidi_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
3240 if(U_FAILURE(errorCode)) {
3241 log_err("ubidi_openBinary() fails for the contents of " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
3242 u_errorName(errorCode));
3243 udata_close(pData);
3244 return;
3245 }
3246
3247 if(0x2215!=ubidi_getMirror(bdp, 0x29F5)) { /* verify some data */
3248 log_err("ubidi_openBinary() does not seem to return working UBiDiProps\n");
3249 }
3250
3251 ubidi_closeProps(bdp);
3252 udata_close(pData);
3253
3254 /* coverage for ubidi_getDummy() */
3255 errorCode=U_ZERO_ERROR;
3256 cbdp=ubidi_getDummy(&errorCode);
3257 if(ubidi_getClass(cbdp, 0x20)!=0) {
3258 log_err("ubidi_getClass(dummy, space)!=0\n");
3259 }
3260 #endif
3261 }
3262
3263 /* test case folding, compare return values with CaseFolding.txt ------------ */
3264
3265 /* bit set for which case foldings for a character have been tested already */
3266 enum {
3267 CF_SIMPLE=1,
3268 CF_FULL=2,
3269 CF_TURKIC=4,
3270 CF_ALL=7
3271 };
3272
3273 static void
3274 testFold(UChar32 c, int which,
3275 UChar32 simple, UChar32 turkic,
3276 const UChar *full, int32_t fullLength,
3277 const UChar *turkicFull, int32_t turkicFullLength) {
3278 UChar s[2], t[32];
3279 UChar32 c2;
3280 int32_t length, length2;
3281
3282 UErrorCode errorCode=U_ZERO_ERROR;
3283
3284 length=0;
3285 U16_APPEND_UNSAFE(s, length, c);
3286
3287 if((which&CF_SIMPLE)!=0 && (c2=u_foldCase(c, 0))!=simple) {
3288 log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3289 }
3290 if((which&CF_FULL)!=0) {
3291 length2=u_strFoldCase(t, LENGTHOF(t), s, length, 0, &errorCode);
3292 if(length2!=fullLength || 0!=u_memcmp(t, full, fullLength)) {
3293 log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n", (long)c);
3294 }
3295 }
3296 if((which&CF_TURKIC)!=0) {
3297 if((c2=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I))!=turkic) {
3298 log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3299 }
3300
3301 length2=u_strFoldCase(t, LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
3302 if(length2!=turkicFullLength || 0!=u_memcmp(t, turkicFull, length2)) {
3303 log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", (long)c);
3304 }
3305 }
3306 }
3307
3308 /* test that c case-folds to itself */
3309 static void
3310 testFoldToSelf(UChar32 c, int which) {
3311 UChar s[2];
3312 int32_t length;
3313
3314 length=0;
3315 U16_APPEND_UNSAFE(s, length, c);
3316 testFold(c, which, c, c, s, length, s, length);
3317 }
3318
3319 struct CaseFoldingData {
3320 USet *notSeen;
3321 UChar32 prev, prevSimple;
3322 UChar prevFull[32];
3323 int32_t prevFullLength;
3324 int which;
3325 };
3326 typedef struct CaseFoldingData CaseFoldingData;
3327
3328 static void U_CALLCONV
3329 caseFoldingLineFn(void *context,
3330 char *fields[][2], int32_t fieldCount,
3331 UErrorCode *pErrorCode) {
3332 CaseFoldingData *pData=(CaseFoldingData *)context;
3333 char *end;
3334 UChar full[32];
3335 UChar32 c, prev, simple;
3336 int32_t count;
3337 int which;
3338 char status;
3339
3340 /* get code point */
3341 const char *s=u_skipWhitespace(fields[0][0]);
3342 if(0==strncmp(s, "0000..10FFFF", 12)) {
3343 /*
3344 * Ignore the line
3345 * # @missing: 0000..10FFFF; C; <code point>
3346 * because maps-to-self is already our default, and this line breaks this parser.
3347 */
3348 return;
3349 }
3350 c=(UChar32)strtoul(s, &end, 16);
3351 end=(char *)u_skipWhitespace(end);
3352 if(end<=fields[0][0] || end!=fields[0][1]) {
3353 log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]);
3354 *pErrorCode=U_PARSE_ERROR;
3355 return;
3356 }
3357
3358 /* get the status of this mapping */
3359 status=*u_skipWhitespace(fields[1][0]);
3360 if(status!='C' && status!='S' && status!='F' && status!='T') {
3361 log_err("unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]);
3362 *pErrorCode=U_PARSE_ERROR;
3363 return;
3364 }
3365
3366 /* get the mapping */
3367 count=u_parseString(fields[2][0], full, 32, (uint32_t *)&simple, pErrorCode);
3368 if(U_FAILURE(*pErrorCode)) {
3369 log_err("error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);
3370 return;
3371 }
3372
3373 /* there is a simple mapping only if there is exactly one code point (count is in UChars) */
3374 if(count==0 || count>2 || (count==2 && U16_IS_SINGLE(full[1]))) {
3375 simple=c;
3376 }
3377
3378 if(c!=(prev=pData->prev)) {
3379 /*
3380 * Test remaining mappings for the previous code point.
3381 * If a turkic folding was not mentioned, then it should fold the same
3382 * as the regular simple case folding.
3383 */
3384 UChar prevString[2];
3385 int32_t length;
3386
3387 length=0;
3388 U16_APPEND_UNSAFE(prevString, length, prev);
3389 testFold(prev, (~pData->which)&CF_ALL,
3390 prev, pData->prevSimple,
3391 prevString, length,
3392 pData->prevFull, pData->prevFullLength);
3393 pData->prev=pData->prevSimple=c;
3394 length=0;
3395 U16_APPEND_UNSAFE(pData->prevFull, length, c);
3396 pData->prevFullLength=length;
3397 pData->which=0;
3398 }
3399
3400 /*
3401 * Turn the status into a bit set of case foldings to test.
3402 * Remember non-Turkic case foldings as defaults for Turkic mode.
3403 */
3404 switch(status) {
3405 case 'C':
3406 which=CF_SIMPLE|CF_FULL;
3407 pData->prevSimple=simple;
3408 u_memcpy(pData->prevFull, full, count);
3409 pData->prevFullLength=count;
3410 break;
3411 case 'S':
3412 which=CF_SIMPLE;
3413 pData->prevSimple=simple;
3414 break;
3415 case 'F':
3416 which=CF_FULL;
3417 u_memcpy(pData->prevFull, full, count);
3418 pData->prevFullLength=count;
3419 break;
3420 case 'T':
3421 which=CF_TURKIC;
3422 break;
3423 default:
3424 which=0;
3425 break; /* won't happen because of test above */
3426 }
3427
3428 testFold(c, which, simple, simple, full, count, full, count);
3429
3430 /* remember which case foldings of c have been tested */
3431 pData->which|=which;
3432
3433 /* remove c from the set of ones not mentioned in CaseFolding.txt */
3434 uset_remove(pData->notSeen, c);
3435 }
3436
3437 static void
3438 TestCaseFolding() {
3439 CaseFoldingData data={ NULL };
3440 char *fields[3][2];
3441 UErrorCode errorCode;
3442
3443 static char *lastLine= (char *)"10FFFF; C; 10FFFF;";
3444
3445 errorCode=U_ZERO_ERROR;
3446 /* test BMP & plane 1 - nothing interesting above */
3447 data.notSeen=uset_open(0, 0x1ffff);
3448 data.prevFullLength=1; /* length of full case folding of U+0000 */
3449
3450 parseUCDFile("CaseFolding.txt", fields, 3, caseFoldingLineFn, &data, &errorCode);
3451 if(U_SUCCESS(errorCode)) {
3452 int32_t i, start, end;
3453
3454 /* add a pseudo-last line to finish testing of the actual last one */
3455 fields[0][0]=lastLine;
3456 fields[0][1]=lastLine+6;
3457 fields[1][0]=lastLine+7;
3458 fields[1][1]=lastLine+9;
3459 fields[2][0]=lastLine+10;
3460 fields[2][1]=lastLine+17;
3461 caseFoldingLineFn(&data, fields, 3, &errorCode);
3462
3463 /* verify that all code points that are not mentioned in CaseFolding.txt fold to themselves */
3464 for(i=0;
3465 0==uset_getItem(data.notSeen, i, &start, &end, NULL, 0, &errorCode) &&
3466 U_SUCCESS(errorCode);
3467 ++i
3468 ) {
3469 do {
3470 testFoldToSelf(start, CF_ALL);
3471 } while(++start<=end);
3472 }
3473 }
3474
3475 uset_close(data.notSeen);
3476 }