]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/cucdtst.c
ICU-461.12.tar.gz
[apple/icu.git] / icuSources / test / cintltst / cucdtst.c
CommitLineData
b75a7d8f
A
1/********************************************************************
2 * COPYRIGHT:
729e4ab9 3 * Copyright (c) 1997-2010, International Business Machines Corporation and
b75a7d8f
A
4 * others. All Rights Reserved.
5 ********************************************************************/
46f4442e 6/*******************************************************************************
b75a7d8f
A
7*
8* File CUCDTST.C
9*
10* Modification History:
11* Name Description
12* Madhu Katragadda Ported for C API, added tests for string functions
46f4442e 13********************************************************************************
b75a7d8f
A
14*/
15
16#include <string.h>
17#include <math.h>
18#include <stdlib.h>
19
20#include "unicode/utypes.h"
21#include "unicode/uchar.h"
22#include "unicode/putil.h"
23#include "unicode/ustring.h"
24#include "unicode/uloc.h"
729e4ab9 25#include "unicode/unorm2.h"
b75a7d8f
A
26
27#include "cintltst.h"
374ca955 28#include "putilimp.h"
b75a7d8f 29#include "uparse.h"
374ca955 30#include "ucase.h"
73c04bcf 31#include "ubidi_props.h"
b75a7d8f 32#include "uprops.h"
374ca955 33#include "uset_imp.h"
b75a7d8f 34#include "usc_impl.h"
374ca955
A
35#include "udatamem.h" /* for testing ucase_openBinary() */
36#include "cucdapi.h"
b75a7d8f 37
374ca955 38#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
b75a7d8f
A
39
40/* prototypes --------------------------------------------------------------- */
41
42static void TestUpperLower(void);
43static void TestLetterNumber(void);
44static void TestMisc(void);
45static void TestPOSIX(void);
46static void TestControlPrint(void);
47static void TestIdentifier(void);
48static void TestUnicodeData(void);
49static void TestCodeUnit(void);
50static void TestCodePoint(void);
51static void TestCharLength(void);
52static void TestCharNames(void);
53static void TestMirroring(void);
b75a7d8f
A
54static void TestUScriptRunAPI(void);
55static void TestAdditionalProperties(void);
56static void TestNumericProperties(void);
57static void TestPropertyNames(void);
58static void TestPropertyValues(void);
59static void TestConsistency(void);
374ca955 60static void TestUCase(void);
73c04bcf
A
61static void TestUBiDiProps(void);
62static void TestCaseFolding(void);
b75a7d8f
A
63
64/* internal methods used */
65static int32_t MakeProp(char* str);
66static int32_t MakeDir(char* str);
67
73c04bcf
A
68/* helpers ------------------------------------------------------------------ */
69
70static void
71parseUCDFile(const char *filename,
72 char *fields[][2], int32_t fieldCount,
73 UParseLineFn *lineFn, void *context,
74 UErrorCode *pErrorCode) {
75 char path[256];
76 char backupPath[256];
77
78 if(U_FAILURE(*pErrorCode)) {
79 return;
80 }
81
82 /* Look inside ICU_DATA first */
83 strcpy(path, u_getDataDirectory());
84 strcat(path, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING);
85 strcat(path, filename);
86
87 /* As a fallback, try to guess where the source data was located
88 * at the time ICU was built, and look there.
89 */
90 strcpy(backupPath, ctest_dataSrcDir());
91 strcat(backupPath, U_FILE_SEP_STRING);
92 strcat(backupPath, "unidata" U_FILE_SEP_STRING);
93 strcat(backupPath, filename);
94
95 u_parseDelimitedFile(path, ';', fields, fieldCount, lineFn, context, pErrorCode);
96 if(*pErrorCode==U_FILE_ACCESS_ERROR) {
97 *pErrorCode=U_ZERO_ERROR;
98 u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, context, pErrorCode);
99 }
100 if(U_FAILURE(*pErrorCode)) {
729e4ab9 101 log_err_status(*pErrorCode, "error parsing %s: %s\n", filename, u_errorName(*pErrorCode));
73c04bcf
A
102 }
103}
104
b75a7d8f
A
105/* test data ---------------------------------------------------------------- */
106
107static const UChar LAST_CHAR_CODE_IN_FILE = 0xFFFD;
108static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
109static const int32_t tagValues[] =
110 {
111 /* Mn */ U_NON_SPACING_MARK,
112 /* Mc */ U_COMBINING_SPACING_MARK,
113 /* Me */ U_ENCLOSING_MARK,
114 /* Nd */ U_DECIMAL_DIGIT_NUMBER,
115 /* Nl */ U_LETTER_NUMBER,
116 /* No */ U_OTHER_NUMBER,
117 /* Zs */ U_SPACE_SEPARATOR,
118 /* Zl */ U_LINE_SEPARATOR,
119 /* Zp */ U_PARAGRAPH_SEPARATOR,
120 /* Cc */ U_CONTROL_CHAR,
121 /* Cf */ U_FORMAT_CHAR,
122 /* Cs */ U_SURROGATE,
123 /* Co */ U_PRIVATE_USE_CHAR,
124 /* Cn */ U_UNASSIGNED,
125 /* Lu */ U_UPPERCASE_LETTER,
126 /* Ll */ U_LOWERCASE_LETTER,
127 /* Lt */ U_TITLECASE_LETTER,
128 /* Lm */ U_MODIFIER_LETTER,
129 /* Lo */ U_OTHER_LETTER,
130 /* Pc */ U_CONNECTOR_PUNCTUATION,
131 /* Pd */ U_DASH_PUNCTUATION,
132 /* Ps */ U_START_PUNCTUATION,
133 /* Pe */ U_END_PUNCTUATION,
134 /* Po */ U_OTHER_PUNCTUATION,
135 /* Sm */ U_MATH_SYMBOL,
136 /* Sc */ U_CURRENCY_SYMBOL,
137 /* Sk */ U_MODIFIER_SYMBOL,
138 /* So */ U_OTHER_SYMBOL,
139 /* Pi */ U_INITIAL_PUNCTUATION,
140 /* Pf */ U_FINAL_PUNCTUATION
141 };
142
143static const char dirStrings[][5] = {
144 "L",
145 "R",
146 "EN",
147 "ES",
148 "ET",
149 "AN",
150 "CS",
151 "B",
152 "S",
153 "WS",
154 "ON",
155 "LRE",
156 "LRO",
157 "AL",
158 "RLE",
159 "RLO",
160 "PDF",
161 "NSM",
162 "BN"
163};
164
165void addUnicodeTest(TestNode** root);
166
167void addUnicodeTest(TestNode** root)
168{
b75a7d8f
A
169 addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit");
170 addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint");
171 addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength");
46f4442e
A
172 addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues");
173 addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData");
b75a7d8f
A
174 addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalProperties");
175 addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties");
176 addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower");
177 addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber");
178 addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc");
179 addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX");
180 addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint");
181 addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier");
182 addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames");
183 addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring");
184 addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");
729e4ab9
A
185 addTest(root, &TestHasScript, "tsutil/cucdtst/TestHasScript");
186 addTest(root, &TestGetScriptExtensions, "tsutil/cucdtst/TestGetScriptExtensions");
b75a7d8f
A
187 addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");
188 addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
189 addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
190 addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");
374ca955 191 addTest(root, &TestUCase, "tsutil/cucdtst/TestUCase");
73c04bcf
A
192 addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps");
193 addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding");
b75a7d8f
A
194}
195
196/*==================================================== */
197/* test u_toupper() and u_tolower() */
198/*==================================================== */
199static void TestUpperLower()
200{
201 const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0x01c9, 0x000c, 0x0000};
202 const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01c9, 0x000c, 0x0000};
203 U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21);
204 U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
205 int32_t i;
206
207 U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21);
208 U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
209
210/*
211Checks LetterLike Symbols which were previously a source of confusion
212[Bertrand A. D. 02/04/98]
213*/
214 for (i=0x2100;i<0x2138;i++)
215 {
73c04bcf
A
216 /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */
217 if(i!=0x2126 && i!=0x212a && i!=0x212b && i!=0x2132)
b75a7d8f
A
218 {
219 if (i != (int)u_tolower(i)) /* itself */
220 log_err("Failed case conversion with itself: U+%04x\n", i);
221 if (i != (int)u_toupper(i))
222 log_err("Failed case conversion with itself: U+%04x\n", i);
223 }
224 }
225
226 for(i=0; i < u_strlen(upper); i++){
227 if(u_tolower(upper[i]) != lower[i]){
228 log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i], lower[i], u_tolower(upper[i]));
229 }
230 }
231
232 log_verbose("testing upper lower\n");
233 for (i = 0; i < 21; i++) {
234
235 if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i]))
236 {
237 log_err("Failed isLowerCase test at %c\n", upperTest[i]);
238 }
239 else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i]))
240 {
241 log_err("Failed isUpperCase test at %c\n", lowerTest[i]);
242 }
243 else if (upperTest[i] != u_tolower(lowerTest[i]))
244 {
245 log_err("Failed case conversion from %c To %c :\n", lowerTest[i], upperTest[i]);
246 }
247 else if (lowerTest[i] != u_toupper(upperTest[i]))
248 {
249 log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerTest[i]);
250 }
251 else if (upperTest[i] != u_tolower(upperTest[i]))
252 {
253 log_err("Failed case conversion with itself: %c\n", upperTest[i]);
254 }
255 else if (lowerTest[i] != u_toupper(lowerTest[i]))
256 {
257 log_err("Failed case conversion with itself: %c\n", lowerTest[i]);
258 }
259 }
260 log_verbose("done testing upper lower\n");
261
262 log_verbose("testing u_istitle\n");
263 {
264 static const UChar expected[] = {
265 0x1F88,
266 0x1F89,
267 0x1F8A,
268 0x1F8B,
269 0x1F8C,
270 0x1F8D,
271 0x1F8E,
272 0x1F8F,
273 0x1F88,
274 0x1F89,
275 0x1F8A,
276 0x1F8B,
277 0x1F8C,
278 0x1F8D,
279 0x1F8E,
280 0x1F8F,
281 0x1F98,
282 0x1F99,
283 0x1F9A,
284 0x1F9B,
285 0x1F9C,
286 0x1F9D,
287 0x1F9E,
288 0x1F9F,
289 0x1F98,
290 0x1F99,
291 0x1F9A,
292 0x1F9B,
293 0x1F9C,
294 0x1F9D,
295 0x1F9E,
296 0x1F9F,
297 0x1FA8,
298 0x1FA9,
299 0x1FAA,
300 0x1FAB,
301 0x1FAC,
302 0x1FAD,
303 0x1FAE,
304 0x1FAF,
305 0x1FA8,
306 0x1FA9,
307 0x1FAA,
308 0x1FAB,
309 0x1FAC,
310 0x1FAD,
311 0x1FAE,
312 0x1FAF,
313 0x1FBC,
314 0x1FBC,
315 0x1FCC,
316 0x1FCC,
317 0x1FFC,
318 0x1FFC,
319 };
320 int32_t num = sizeof(expected)/sizeof(expected[0]);
321 for(i=0; i<num; i++){
322 if(!u_istitle(expected[i])){
323 log_err("u_istitle failed for 0x%4X. Expected TRUE, got FALSE\n",expected[i]);
324 }
325 }
326
327 }
328}
329
73c04bcf 330/* compare two sets and verify that their difference or intersection is empty */
b75a7d8f
A
331static UBool
332showADiffB(const USet *a, const USet *b,
333 const char *a_name, const char *b_name,
334 UBool expect, UBool diffIsError) {
73c04bcf 335 USet *aa;
b75a7d8f 336 int32_t i, start, end, length;
b75a7d8f
A
337 UErrorCode errorCode;
338
73c04bcf
A
339 /*
340 * expect:
341 * TRUE -> a-b should be empty, that is, b should contain all of a
342 * FALSE -> a&b should be empty, that is, a should contain none of b (and vice versa)
343 */
344 if(expect ? uset_containsAll(b, a) : uset_containsNone(a, b)) {
345 return TRUE;
346 }
347
348 /* clone a to aa because a is const */
349 aa=uset_open(1, 0);
350 if(aa==NULL) {
351 /* unusual problem - out of memory? */
352 return FALSE;
353 }
354 uset_addAll(aa, a);
355
356 /* compute the set in question */
357 if(expect) {
358 /* a-b */
359 uset_removeAll(aa, b);
360 } else {
361 /* a&b */
362 uset_retainAll(aa, b);
363 }
364
365 /* aa is not empty because of the initial tests above; show its contents */
b75a7d8f 366 errorCode=U_ZERO_ERROR;
b75a7d8f
A
367 i=0;
368 for(;;) {
73c04bcf 369 length=uset_getItem(aa, i, &start, &end, NULL, 0, &errorCode);
b75a7d8f 370 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
73c04bcf 371 break; /* done */
b75a7d8f
A
372 }
373 if(U_FAILURE(errorCode)) {
73c04bcf 374 log_err("error comparing %s with %s at difference item %d: %s\n",
b75a7d8f 375 a_name, b_name, i, u_errorName(errorCode));
73c04bcf 376 break;
b75a7d8f
A
377 }
378 if(length!=0) {
73c04bcf 379 break; /* done with code points, got a string or -1 */
b75a7d8f
A
380 }
381
73c04bcf
A
382 if(diffIsError) {
383 if(expect) {
384 log_err("error: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
385 } else {
386 log_err("error: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
387 }
388 } else {
389 if(expect) {
390 log_verbose("info: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
391 } else {
392 log_verbose("info: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
b75a7d8f
A
393 }
394 }
395
396 ++i;
397 }
73c04bcf
A
398
399 uset_close(aa);
400 return FALSE;
b75a7d8f
A
401}
402
403static UBool
404showAMinusB(const USet *a, const USet *b,
405 const char *a_name, const char *b_name,
406 UBool diffIsError) {
407 return showADiffB(a, b, a_name, b_name, TRUE, diffIsError);
408}
409
410static UBool
411showAIntersectB(const USet *a, const USet *b,
412 const char *a_name, const char *b_name,
413 UBool diffIsError) {
414 return showADiffB(a, b, a_name, b_name, FALSE, diffIsError);
415}
416
417static UBool
418compareUSets(const USet *a, const USet *b,
419 const char *a_name, const char *b_name,
420 UBool diffIsError) {
73c04bcf
A
421 /*
422 * Use an arithmetic & not a logical && so that both branches
423 * are always taken and all differences are shown.
424 */
b75a7d8f 425 return
73c04bcf 426 showAMinusB(a, b, a_name, b_name, diffIsError) &
b75a7d8f
A
427 showAMinusB(b, a, b_name, a_name, diffIsError);
428}
429
430/* test isLetter(u_isapha()) and isDigit(u_isdigit()) */
431static void TestLetterNumber()
432{
433 UChar i = 0x0000;
434
435 log_verbose("Testing for isalpha\n");
436 for (i = 0x0041; i < 0x005B; i++) {
437 if (!u_isalpha(i))
438 {
439 log_err("Failed isLetter test at %.4X\n", i);
440 }
441 }
442 for (i = 0x0660; i < 0x066A; i++) {
443 if (u_isalpha(i))
444 {
445 log_err("Failed isLetter test with numbers at %.4X\n", i);
446 }
447 }
448
449 log_verbose("Testing for isdigit\n");
450 for (i = 0x0660; i < 0x066A; i++) {
451 if (!u_isdigit(i))
452 {
453 log_verbose("Failed isNumber test at %.4X\n", i);
454 }
455 }
456
457 log_verbose("Testing for isalnum\n");
458 for (i = 0x0041; i < 0x005B; i++) {
459 if (!u_isalnum(i))
460 {
461 log_err("Failed isAlNum test at %.4X\n", i);
462 }
463 }
464 for (i = 0x0660; i < 0x066A; i++) {
465 if (!u_isalnum(i))
466 {
467 log_err("Failed isAlNum test at %.4X\n", i);
468 }
469 }
470
471 {
472 /*
473 * The following checks work only starting from Unicode 4.0.
474 * Check the version number here.
475 */
374ca955 476 static UVersionInfo u401={ 4, 0, 1, 0 };
b75a7d8f
A
477 UVersionInfo version;
478 u_getUnicodeVersion(version);
374ca955 479 if(version[0]<4 || 0==memcmp(version, u401, 4)) {
b75a7d8f
A
480 return;
481 }
482 }
483
484 {
485 /*
486 * Sanity check:
487 * Verify that exactly the digit characters have decimal digit values.
488 * This assumption is used in the implementation of u_digit()
489 * (which checks nt=de)
490 * compared with the parallel java.lang.Character.digit()
491 * (which checks Nd).
492 *
493 * This was not true in Unicode 3.2 and earlier.
374ca955
A
494 * Unicode 4.0 fixed discrepancies.
495 * Unicode 4.0.1 re-introduced problems in this area due to an
496 * unintentionally incomplete last-minute change.
b75a7d8f
A
497 */
498 U_STRING_DECL(digitsPattern, "[:Nd:]", 6);
499 U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
500
501 USet *digits, *decimalValues;
502 UErrorCode errorCode;
503
504 U_STRING_INIT(digitsPattern, "[:Nd:]", 6);
505 U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
506 errorCode=U_ZERO_ERROR;
507 digits=uset_openPattern(digitsPattern, 6, &errorCode);
508 decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode);
509
510 if(U_SUCCESS(errorCode)) {
511 compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", TRUE);
512 }
513
514 uset_close(digits);
515 uset_close(decimalValues);
516 }
517}
518
729e4ab9
A
519static void testSampleCharProps(UBool propFn(UChar32), const char *propName,
520 const UChar32 *sampleChars, int32_t sampleCharsLength,
521 UBool expected) {
522 int32_t i;
523 for (i = 0; i < sampleCharsLength; ++i) {
524 UBool result = propFn(sampleChars[i]);
525 if (result != expected) {
526 log_err("error: character property function %s(U+%04x)=%d is wrong\n",
527 propName, sampleChars[i], result);
528 }
529 }
530}
531
b75a7d8f
A
532/* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_isspace()), isWhiteSpace(), u_CharDigitValue() */
533static void TestMisc()
534{
729e4ab9
A
535 static const UChar32 sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
536 static const UChar32 sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
537 static const UChar32 sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6e};
538 static const UChar32 sampleDefined[] = {0x523E, 0x4f88, 0xfffd};
539 static const UChar32 sampleBase[] = {0x0061, 0x0031, 0x03d2};
540 static const UChar32 sampleNonBase[] = {0x002B, 0x0020, 0x203B};
b75a7d8f 541/* static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8, 0xFFF0};*/
729e4ab9
A
542 static const UChar32 sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5};
543 static const UChar32 sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};
544 static const UChar32 sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};
545 static const UChar32 sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f, 0x85, 0x2007, 0xffef};
b75a7d8f
A
546
547 static const int32_t sampleDigitValues[] = {0, 2, 3, 5};
548
549 uint32_t mask;
550
551 int32_t i;
552 char icuVersion[U_MAX_VERSION_STRING_LENGTH];
553 UVersionInfo realVersion;
554
555 memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH);
556
729e4ab9
A
557 testSampleCharProps(u_isspace, "u_isspace", sampleSpaces, LENGTHOF(sampleSpaces), TRUE);
558 testSampleCharProps(u_isspace, "u_isspace", sampleNonSpaces, LENGTHOF(sampleNonSpaces), FALSE);
b75a7d8f 559
729e4ab9
A
560 testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
561 sampleSpaces, LENGTHOF(sampleSpaces), TRUE);
562 testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
563 sampleNonSpaces, LENGTHOF(sampleNonSpaces), FALSE);
b75a7d8f 564
729e4ab9
A
565 testSampleCharProps(u_isWhitespace, "u_isWhitespace",
566 sampleWhiteSpaces, LENGTHOF(sampleWhiteSpaces), TRUE);
567 testSampleCharProps(u_isWhitespace, "u_isWhitespace",
568 sampleNonWhiteSpaces, LENGTHOF(sampleNonWhiteSpaces), FALSE);
b75a7d8f 569
729e4ab9
A
570 testSampleCharProps(u_isdefined, "u_isdefined",
571 sampleDefined, LENGTHOF(sampleDefined), TRUE);
572 testSampleCharProps(u_isdefined, "u_isdefined",
573 sampleUndefined, LENGTHOF(sampleUndefined), FALSE);
574
575 testSampleCharProps(u_isbase, "u_isbase", sampleBase, LENGTHOF(sampleBase), TRUE);
576 testSampleCharProps(u_isbase, "u_isbase", sampleNonBase, LENGTHOF(sampleNonBase), FALSE);
b75a7d8f 577
729e4ab9
A
578 testSampleCharProps(u_isdigit, "u_isdigit", sampleDigits, LENGTHOF(sampleDigits), TRUE);
579 testSampleCharProps(u_isdigit, "u_isdigit", sampleNonDigits, LENGTHOF(sampleNonDigits), FALSE);
580
581 for (i = 0; i < LENGTHOF(sampleDigits); i++) {
582 if (u_charDigitValue(sampleDigits[i]) != sampleDigitValues[i]) {
583 log_err("error: u_charDigitValue(U+04x)=%d != %d\n",
584 sampleDigits[i], u_charDigitValue(sampleDigits[i]), sampleDigitValues[i]);
b75a7d8f
A
585 }
586 }
587
588 /* Tests the ICU version #*/
589 u_getVersion(realVersion);
590 u_versionToString(realVersion, icuVersion);
374ca955 591 if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion), (int32_t)strlen(U_ICU_VERSION))) != 0)
b75a7d8f
A
592 {
593 log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERSION, icuVersion);
594 }
595#if defined(ICU_VERSION)
596 /* test only happens where we have configure.in with VERSION - sanity check. */
597 if(strcmp(U_ICU_VERSION, ICU_VERSION))
598 {
599 log_err("ICU version mismatch: Header says %s, build environment says %s.\n", U_ICU_VERSION, ICU_VERSION);
600 }
601#endif
602
603 /* test U_GC_... */
604 if(
605 U_GET_GC_MASK(0x41)!=U_GC_LU_MASK ||
606 U_GET_GC_MASK(0x662)!=U_GC_ND_MASK ||
607 U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK ||
608 U_GET_GC_MASK(0x28)!=U_GC_PS_MASK ||
609 U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK ||
610 U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK
611 ) {
612 log_err("error: U_GET_GC_MASK does not work properly\n");
613 }
614
615 mask=0;
616 mask=(mask&~U_GC_CN_MASK)|U_GC_CN_MASK;
617
618 mask=(mask&~U_GC_LU_MASK)|U_GC_LU_MASK;
619 mask=(mask&~U_GC_LL_MASK)|U_GC_LL_MASK;
620 mask=(mask&~U_GC_LT_MASK)|U_GC_LT_MASK;
621 mask=(mask&~U_GC_LM_MASK)|U_GC_LM_MASK;
622 mask=(mask&~U_GC_LO_MASK)|U_GC_LO_MASK;
623
624 mask=(mask&~U_GC_MN_MASK)|U_GC_MN_MASK;
625 mask=(mask&~U_GC_ME_MASK)|U_GC_ME_MASK;
626 mask=(mask&~U_GC_MC_MASK)|U_GC_MC_MASK;
627
628 mask=(mask&~U_GC_ND_MASK)|U_GC_ND_MASK;
629 mask=(mask&~U_GC_NL_MASK)|U_GC_NL_MASK;
630 mask=(mask&~U_GC_NO_MASK)|U_GC_NO_MASK;
631
632 mask=(mask&~U_GC_ZS_MASK)|U_GC_ZS_MASK;
633 mask=(mask&~U_GC_ZL_MASK)|U_GC_ZL_MASK;
634 mask=(mask&~U_GC_ZP_MASK)|U_GC_ZP_MASK;
635
636 mask=(mask&~U_GC_CC_MASK)|U_GC_CC_MASK;
637 mask=(mask&~U_GC_CF_MASK)|U_GC_CF_MASK;
638 mask=(mask&~U_GC_CO_MASK)|U_GC_CO_MASK;
639 mask=(mask&~U_GC_CS_MASK)|U_GC_CS_MASK;
640
641 mask=(mask&~U_GC_PD_MASK)|U_GC_PD_MASK;
642 mask=(mask&~U_GC_PS_MASK)|U_GC_PS_MASK;
643 mask=(mask&~U_GC_PE_MASK)|U_GC_PE_MASK;
644 mask=(mask&~U_GC_PC_MASK)|U_GC_PC_MASK;
645 mask=(mask&~U_GC_PO_MASK)|U_GC_PO_MASK;
646
647 mask=(mask&~U_GC_SM_MASK)|U_GC_SM_MASK;
648 mask=(mask&~U_GC_SC_MASK)|U_GC_SC_MASK;
649 mask=(mask&~U_GC_SK_MASK)|U_GC_SK_MASK;
650 mask=(mask&~U_GC_SO_MASK)|U_GC_SO_MASK;
651
652 mask=(mask&~U_GC_PI_MASK)|U_GC_PI_MASK;
653 mask=(mask&~U_GC_PF_MASK)|U_GC_PF_MASK;
654
655 if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
656 log_err("error: problems with U_GC_XX_MASK constants\n");
657 }
658
659 mask=0;
660 mask=(mask&~U_GC_C_MASK)|U_GC_C_MASK;
661 mask=(mask&~U_GC_L_MASK)|U_GC_L_MASK;
662 mask=(mask&~U_GC_M_MASK)|U_GC_M_MASK;
663 mask=(mask&~U_GC_N_MASK)|U_GC_N_MASK;
664 mask=(mask&~U_GC_Z_MASK)|U_GC_Z_MASK;
665 mask=(mask&~U_GC_P_MASK)|U_GC_P_MASK;
666 mask=(mask&~U_GC_S_MASK)|U_GC_S_MASK;
667
668 if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
669 log_err("error: problems with U_GC_Y_MASK constants\n");
670 }
671 {
672 static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039 };
673 for(i=0; i<10; i++){
674 if(digit[i]!=u_forDigit(i,10)){
675 log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n",i,digit[i],u_forDigit(i,10));
676 }
677 }
678 }
679
680 /* test u_digit() */
681 {
682 static const struct {
683 UChar32 c;
684 int8_t radix, value;
685 } data[]={
686 /* base 16 */
687 { 0x0031, 16, 1 },
688 { 0x0038, 16, 8 },
689 { 0x0043, 16, 12 },
690 { 0x0066, 16, 15 },
691 { 0x00e4, 16, -1 },
692 { 0x0662, 16, 2 },
693 { 0x06f5, 16, 5 },
694 { 0xff13, 16, 3 },
695 { 0xff41, 16, 10 },
696
697 /* base 8 */
698 { 0x0031, 8, 1 },
699 { 0x0038, 8, -1 },
700 { 0x0043, 8, -1 },
701 { 0x0066, 8, -1 },
702 { 0x00e4, 8, -1 },
703 { 0x0662, 8, 2 },
704 { 0x06f5, 8, 5 },
705 { 0xff13, 8, 3 },
706 { 0xff41, 8, -1 },
707
708 /* base 36 */
709 { 0x5a, 36, 35 },
710 { 0x7a, 36, 35 },
711 { 0xff3a, 36, 35 },
712 { 0xff5a, 36, 35 },
713
714 /* wrong radix values */
715 { 0x0031, 1, -1 },
716 { 0xff3a, 37, -1 }
717 };
718
719 for(i=0; i<LENGTHOF(data); ++i) {
720 if(u_digit(data[i].c, data[i].radix)!=data[i].value) {
721 log_err("u_digit(U+%04x, %d)=%d expected %d\n",
722 data[i].c,
723 data[i].radix,
724 u_digit(data[i].c, data[i].radix),
725 data[i].value);
726 }
727 }
728 }
729}
730
731/* test C/POSIX-style functions --------------------------------------------- */
732
733/* bit flags */
734#define ISAL 1
735#define ISLO 2
736#define ISUP 4
737
738#define ISDI 8
739#define ISXD 0x10
740
741#define ISAN 0x20
742
743#define ISPU 0x40
744#define ISGR 0x80
745#define ISPR 0x100
746
747#define ISSP 0x200
748#define ISBL 0x400
749#define ISCN 0x800
750
751/* C/POSIX-style functions, in the same order as the bit flags */
374ca955 752typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c);
b75a7d8f
A
753
754static const struct {
755 IsPOSIXClass *fn;
756 const char *name;
757} posixClasses[]={
758 { u_isalpha, "isalpha" },
759 { u_islower, "islower" },
760 { u_isupper, "isupper" },
761 { u_isdigit, "isdigit" },
762 { u_isxdigit, "isxdigit" },
763 { u_isalnum, "isalnum" },
764 { u_ispunct, "ispunct" },
765 { u_isgraph, "isgraph" },
766 { u_isprint, "isprint" },
767 { u_isspace, "isspace" },
768 { u_isblank, "isblank" },
769 { u_iscntrl, "iscntrl" }
770};
771
772static const struct {
773 UChar32 c;
774 uint32_t posixResults;
775} posixData[]={
776 { 0x0008, ISCN }, /* backspace */
777 { 0x0009, ISSP|ISBL|ISCN }, /* TAB */
778 { 0x000a, ISSP| ISCN }, /* LF */
779 { 0x000c, ISSP| ISCN }, /* FF */
780 { 0x000d, ISSP| ISCN }, /* CR */
781 { 0x0020, ISPR|ISSP|ISBL }, /* space */
782 { 0x0021, ISPU|ISGR|ISPR }, /* ! */
783 { 0x0033, ISDI|ISXD|ISAN| ISGR|ISPR }, /* 3 */
784 { 0x0040, ISPU|ISGR|ISPR }, /* @ */
785 { 0x0041, ISAL| ISUP| ISXD|ISAN| ISGR|ISPR }, /* A */
786 { 0x007a, ISAL|ISLO| ISAN| ISGR|ISPR }, /* z */
787 { 0x007b, ISPU|ISGR|ISPR }, /* { */
788 { 0x0085, ISSP| ISCN }, /* NEL */
789 { 0x00a0, ISPR|ISSP|ISBL }, /* NBSP */
790 { 0x00a4, ISGR|ISPR }, /* currency sign */
791 { 0x00e4, ISAL|ISLO| ISAN| ISGR|ISPR }, /* a-umlaut */
792 { 0x0300, ISGR|ISPR }, /* combining grave */
793 { 0x0600, ISCN }, /* arabic number sign */
794 { 0x0627, ISAL| ISAN| ISGR|ISPR }, /* alef */
795 { 0x0663, ISDI|ISXD|ISAN| ISGR|ISPR }, /* arabic 3 */
796 { 0x2002, ISPR|ISSP|ISBL }, /* en space */
797 { 0x2007, ISPR|ISSP|ISBL }, /* figure space */
798 { 0x2009, ISPR|ISSP|ISBL }, /* thin space */
374ca955
A
799 { 0x200b, ISCN }, /* ZWSP */
800 /*{ 0x200b, ISPR|ISSP },*/ /* ZWSP */ /* ZWSP became a control char in 4.0.1*/
b75a7d8f
A
801 { 0x200e, ISCN }, /* LRM */
802 { 0x2028, ISPR|ISSP| ISCN }, /* LS */
803 { 0x2029, ISPR|ISSP| ISCN }, /* PS */
804 { 0x20ac, ISGR|ISPR }, /* Euro */
805 { 0xff15, ISDI|ISXD|ISAN| ISGR|ISPR }, /* fullwidth 5 */
806 { 0xff25, ISAL| ISUP| ISXD|ISAN| ISGR|ISPR }, /* fullwidth E */
807 { 0xff35, ISAL| ISUP| ISAN| ISGR|ISPR }, /* fullwidth U */
808 { 0xff45, ISAL|ISLO| ISXD|ISAN| ISGR|ISPR }, /* fullwidth e */
809 { 0xff55, ISAL|ISLO| ISAN| ISGR|ISPR } /* fullwidth u */
810};
811
812static void
813TestPOSIX() {
814 uint32_t mask;
815 int32_t cl, i;
816 UBool expect;
817
818 mask=1;
819 for(cl=0; cl<12; ++cl) {
820 for(i=0; i<LENGTHOF(posixData); ++i) {
821 expect=(UBool)((posixData[i].posixResults&mask)!=0);
822 if(posixClasses[cl].fn(posixData[i].c)!=expect) {
823 log_err("u_%s(U+%04x)=%s is wrong\n",
824 posixClasses[cl].name, posixData[i].c, expect ? "FALSE" : "TRUE");
825 }
826 }
827 mask<<=1;
828 }
829}
830
831/* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */
832static void TestControlPrint()
833{
729e4ab9
A
834 const UChar32 sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0x202b};
835 const UChar32 sampleNonControl[] = {0x61, 0x0031, 0x00e2};
836 const UChar32 samplePrintable[] = {0x0042, 0x005f, 0x2014};
837 const UChar32 sampleNonPrintable[] = {0x200c, 0x009f, 0x001b};
b75a7d8f 838 UChar32 c;
b75a7d8f 839
729e4ab9
A
840 testSampleCharProps(u_iscntrl, "u_iscntrl", sampleControl, LENGTHOF(sampleControl), TRUE);
841 testSampleCharProps(u_iscntrl, "u_iscntrl", sampleNonControl, LENGTHOF(sampleNonControl), FALSE);
b75a7d8f 842
729e4ab9
A
843 testSampleCharProps(u_isprint, "u_isprint",
844 samplePrintable, LENGTHOF(samplePrintable), TRUE);
845 testSampleCharProps(u_isprint, "u_isprint",
846 sampleNonPrintable, LENGTHOF(sampleNonPrintable), FALSE);
b75a7d8f
A
847
848 /* test all ISO 8 controls */
849 for(c=0; c<=0x9f; ++c) {
850 if(c==0x20) {
851 /* skip ASCII graphic characters and continue with DEL */
852 c=0x7f;
853 }
854 if(!u_iscntrl(c)) {
855 log_err("error: u_iscntrl(ISO 8 control U+%04x)=FALSE\n", c);
856 }
857 if(!u_isISOControl(c)) {
858 log_err("error: u_isISOControl(ISO 8 control U+%04x)=FALSE\n", c);
859 }
860 if(u_isprint(c)) {
861 log_err("error: u_isprint(ISO 8 control U+%04x)=TRUE\n", c);
862 }
863 }
864
865 /* test all Latin-1 graphic characters */
866 for(c=0x20; c<=0xff; ++c) {
867 if(c==0x7f) {
868 c=0xa0;
869 } else if(c==0xad) {
870 /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not printable) */
871 ++c;
872 }
873 if(!u_isprint(c)) {
874 log_err("error: u_isprint(Latin-1 graphic character U+%04x)=FALSE\n", c);
875 }
876 }
877}
878
879/* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable()*/
880static void TestIdentifier()
881{
729e4ab9
A
882 const UChar32 sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f};
883 const UChar32 sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};
884 const UChar32 sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};
885 const UChar32 sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};
886 const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};
887 const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};
888 const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};
889 const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};
890 const UChar32 sampleIDIgnore[] = {0x0006, 0x0010, 0x206b, 0x85};
891 const UChar32 sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};
892
893 testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
894 sampleJavaIDStart, LENGTHOF(sampleJavaIDStart), TRUE);
895 testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
896 sampleNonJavaIDStart, LENGTHOF(sampleNonJavaIDStart), FALSE);
897
898 testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
899 sampleJavaIDPart, LENGTHOF(sampleJavaIDPart), TRUE);
900 testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
901 sampleNonJavaIDPart, LENGTHOF(sampleNonJavaIDPart), FALSE);
902
903 /* IDPart should imply IDStart */
904 testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
905 sampleJavaIDStart, LENGTHOF(sampleJavaIDStart), TRUE);
906
907 testSampleCharProps(u_isIDStart, "u_isIDStart",
908 sampleUnicodeIDStart, LENGTHOF(sampleUnicodeIDStart), TRUE);
909 testSampleCharProps(u_isIDStart, "u_isIDStart",
910 sampleNonUnicodeIDStart, LENGTHOF(sampleNonUnicodeIDStart), FALSE);
911
912 testSampleCharProps(u_isIDPart, "u_isIDPart",
913 sampleUnicodeIDPart, LENGTHOF(sampleUnicodeIDPart), TRUE);
914 testSampleCharProps(u_isIDPart, "u_isIDPart",
915 sampleNonUnicodeIDPart, LENGTHOF(sampleNonUnicodeIDPart), FALSE);
916
917 /* IDPart should imply IDStart */
918 testSampleCharProps(u_isIDPart, "u_isIDPart",
919 sampleUnicodeIDStart, LENGTHOF(sampleUnicodeIDStart), TRUE);
920
921 testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
922 sampleIDIgnore, LENGTHOF(sampleIDIgnore), TRUE);
923 testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
924 sampleNonIDIgnore, LENGTHOF(sampleNonIDIgnore), FALSE);
b75a7d8f
A
925}
926
927/* for each line of UnicodeData.txt, check some of the properties */
928/*
929 * ### TODO
930 * This test fails incorrectly if the First or Last code point of a repetitive area
931 * is overridden, which is allowed and is encouraged for the PUAs.
932 * Currently, this means that both area First/Last and override lines are
933 * tested against the properties from the API,
934 * and the area boundary will not match and cause an error.
935 *
936 * This function should detect area boundaries and skip them for the test of individual
937 * code points' properties.
938 * Then it should check that the areas contain all the same properties except where overridden.
939 * For this, it would have had to set a flag for which code points were listed explicitly.
940 */
941static void U_CALLCONV
942unicodeDataLineFn(void *context,
943 char *fields[][2], int32_t fieldCount,
944 UErrorCode *pErrorCode)
945{
946 char buffer[100];
947 char *end;
948 uint32_t value;
949 UChar32 c;
950 int32_t i;
951 int8_t type;
952
953 /* get the character code, field 0 */
954 c=strtoul(fields[0][0], &end, 16);
955 if(end<=fields[0][0] || end!=fields[0][1]) {
956 log_err("error: syntax error in field 0 at %s\n", fields[0][0]);
957 return;
958 }
959 if((uint32_t)c>=UCHAR_MAX_VALUE + 1) {
960 log_err("error in UnicodeData.txt: code point %lu out of range\n", c);
961 return;
962 }
963
964 /* get general category, field 2 */
965 *fields[2][1]=0;
966 type = (int8_t)tagValues[MakeProp(fields[2][0])];
967 if(u_charType(c)!=type) {
968 log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType(c), type);
969 }
970 if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
971 log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
972 }
973
974 /* get canonical combining class, field 3 */
975 value=strtoul(fields[3][0], &end, 10);
976 if(end<=fields[3][0] || end!=fields[3][1]) {
977 log_err("error: syntax error in field 3 at code 0x%lx\n", c);
978 return;
979 }
980 if(value>255) {
981 log_err("error in UnicodeData.txt: combining class %lu out of range\n", value);
982 return;
983 }
984#if !UCONFIG_NO_NORMALIZATION
985 if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c, UCHAR_CANONICAL_COMBINING_CLASS)) {
986 log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value);
987 }
988#endif
989
990 /* get BiDi category, field 4 */
991 *fields[4][1]=0;
992 i=MakeDir(fields[4][0]);
993 if(i!=u_charDirection(c) || i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) {
994 log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u_charDirection(c), MakeDir(fields[4][0]), fields[4][0]);
995 }
996
997 /* get ISO Comment, field 11 */
998 *fields[11][1]=0;
999 i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode);
1000 if(U_FAILURE(*pErrorCode) || 0!=strcmp(fields[11][0], buffer)) {
729e4ab9 1001 log_err_status(*pErrorCode, "error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n",
b75a7d8f
A
1002 c, u_errorName(*pErrorCode),
1003 U_FAILURE(*pErrorCode) ? buffer : "[error]",
1004 fields[11][0]);
1005 }
1006
1007 /* get uppercase mapping, field 12 */
1008 if(fields[12][0]!=fields[12][1]) {
1009 value=strtoul(fields[12][0], &end, 16);
1010 if(end!=fields[12][1]) {
1011 log_err("error: syntax error in field 12 at code 0x%lx\n", c);
1012 return;
1013 }
1014 if((UChar32)value!=u_toupper(c)) {
1015 log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_toupper(c), value);
1016 }
1017 } else {
1018 /* no case mapping: the API must map the code point to itself */
1019 if(c!=u_toupper(c)) {
1020 log_err("error: U+%04lx does not have an uppercase mapping but u_toupper()==U+%04lx\n", c, u_toupper(c));
1021 }
1022 }
1023
1024 /* get lowercase mapping, field 13 */
1025 if(fields[13][0]!=fields[13][1]) {
1026 value=strtoul(fields[13][0], &end, 16);
1027 if(end!=fields[13][1]) {
1028 log_err("error: syntax error in field 13 at code 0x%lx\n", c);
1029 return;
1030 }
1031 if((UChar32)value!=u_tolower(c)) {
1032 log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_tolower(c), value);
1033 }
1034 } else {
1035 /* no case mapping: the API must map the code point to itself */
1036 if(c!=u_tolower(c)) {
1037 log_err("error: U+%04lx does not have a lowercase mapping but u_tolower()==U+%04lx\n", c, u_tolower(c));
1038 }
1039 }
1040
1041 /* get titlecase mapping, field 14 */
1042 if(fields[14][0]!=fields[14][1]) {
1043 value=strtoul(fields[14][0], &end, 16);
1044 if(end!=fields[14][1]) {
1045 log_err("error: syntax error in field 14 at code 0x%lx\n", c);
1046 return;
1047 }
1048 if((UChar32)value!=u_totitle(c)) {
1049 log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_totitle(c), value);
1050 }
1051 } else {
1052 /* no case mapping: the API must map the code point to itself */
1053 if(c!=u_totitle(c)) {
1054 log_err("error: U+%04lx does not have a titlecase mapping but u_totitle()==U+%04lx\n", c, u_totitle(c));
1055 }
1056 }
1057}
1058
1059static UBool U_CALLCONV
1060enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1061 static const UChar32 test[][2]={
1062 {0x41, U_UPPERCASE_LETTER},
1063 {0x308, U_NON_SPACING_MARK},
1064 {0xfffe, U_GENERAL_OTHER_TYPES},
1065 {0xe0041, U_FORMAT_CHAR},
1066 {0xeffff, U_UNASSIGNED}
1067 };
1068
374ca955 1069 int32_t i, count;
b75a7d8f
A
1070
1071 if(0!=strcmp((const char *)context, "a1")) {
1072 log_err("error: u_enumCharTypes() passes on an incorrect context pointer\n");
1073 return FALSE;
1074 }
1075
374ca955 1076 count=LENGTHOF(test);
b75a7d8f
A
1077 for(i=0; i<count; ++i) {
1078 if(start<=test[i][0] && test[i][0]<limit) {
1079 if(type!=(UCharCategory)test[i][1]) {
1080 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld instead of U+%04lx with %ld\n",
1081 start, limit, (long)type, test[i][0], test[i][1]);
1082 }
374ca955 1083 /* stop at the range that includes the last test code point (increases code coverage for enumeration) */
b75a7d8f
A
1084 return i==(count-1) ? FALSE : TRUE;
1085 }
1086 }
1087
1088 if(start>test[count-1][0]) {
1089 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n",
1090 start, limit, (long)type);
1091 return FALSE;
1092 }
1093
374ca955
A
1094 return TRUE;
1095}
1096
1097static UBool U_CALLCONV
1098enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1099 /* default Bidi classes for unassigned code points */
1100 static const int32_t defaultBidi[][2]={ /* { limit, class } */
1101 { 0x0590, U_LEFT_TO_RIGHT },
1102 { 0x0600, U_RIGHT_TO_LEFT },
1103 { 0x07C0, U_RIGHT_TO_LEFT_ARABIC },
1104 { 0x0900, U_RIGHT_TO_LEFT },
1105 { 0xFB1D, U_LEFT_TO_RIGHT },
1106 { 0xFB50, U_RIGHT_TO_LEFT },
1107 { 0xFE00, U_RIGHT_TO_LEFT_ARABIC },
1108 { 0xFE70, U_LEFT_TO_RIGHT },
1109 { 0xFF00, U_RIGHT_TO_LEFT_ARABIC },
1110 { 0x10800, U_LEFT_TO_RIGHT },
1111 { 0x11000, U_RIGHT_TO_LEFT },
729e4ab9
A
1112 { 0x1E800, U_LEFT_TO_RIGHT }, /* new default-R range in Unicode 5.2: U+1E800 - U+1EFFF */
1113 { 0x1F000, U_RIGHT_TO_LEFT },
374ca955
A
1114 { 0x110000, U_LEFT_TO_RIGHT }
1115 };
1116
1117 UChar32 c;
1118 int32_t i;
1119 UCharDirection shouldBeDir;
1120
b75a7d8f
A
1121 /*
1122 * LineBreak.txt specifies:
1123 * # - Assigned characters that are not listed explicitly are given the value
1124 * # "AL".
1125 * # - Unassigned characters are given the value "XX".
1126 *
1127 * PUA characters are listed explicitly with "XX".
1128 * Verify that no assigned character has "XX".
1129 */
1130 if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) {
1131 c=start;
1132 while(c<limit) {
1133 if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) {
1134 log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c);
1135 }
1136 ++c;
1137 }
1138 }
1139
1140 /*
1141 * Verify default Bidi classes.
374ca955
A
1142 * For recent Unicode versions, see UCD.html.
1143 *
1144 * For older Unicode versions:
b75a7d8f
A
1145 * See table 3-7 "Bidirectional Character Types" in UAX #9.
1146 * http://www.unicode.org/reports/tr9/
1147 *
1148 * See also DerivedBidiClass.txt for Cn code points!
374ca955
A
1149 *
1150 * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/resolved-pri.html)
1151 * changed some default values.
1152 * In particular, non-characters and unassigned Default Ignorable Code Points
1153 * change from L to BN.
1154 *
1155 * UCD.html version 4.0.1 does not yet reflect these changes.
b75a7d8f
A
1156 */
1157 if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) {
1158 /* enumerate the intersections of defaultBidi ranges with [start..limit[ */
1159 c=start;
1160 for(i=0; i<LENGTHOF(defaultBidi) && c<limit; ++i) {
1161 if((int32_t)c<defaultBidi[i][0]) {
1162 while(c<limit && (int32_t)c<defaultBidi[i][0]) {
374ca955
A
1163 if(U_IS_UNICODE_NONCHAR(c) || u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
1164 shouldBeDir=U_BOUNDARY_NEUTRAL;
1165 } else {
1166 shouldBeDir=(UCharDirection)defaultBidi[i][1];
1167 }
1168
1169 if( u_charDirection(c)!=shouldBeDir ||
1170 u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir
b75a7d8f
A
1171 ) {
1172 log_err("error: u_charDirection(unassigned/PUA U+%04lx)=%s should be %s\n",
374ca955 1173 c, dirStrings[u_charDirection(c)], dirStrings[shouldBeDir]);
b75a7d8f
A
1174 }
1175 ++c;
1176 }
1177 }
1178 }
1179 }
1180
1181 return TRUE;
1182}
1183
1184/* tests for several properties */
1185static void TestUnicodeData()
1186{
b75a7d8f
A
1187 UVersionInfo expectVersionArray;
1188 UVersionInfo versionArray;
1189 char *fields[15][2];
1190 UErrorCode errorCode;
1191 UChar32 c;
1192 int8_t type;
1193
b75a7d8f
A
1194 u_versionFromString(expectVersionArray, U_UNICODE_VERSION);
1195 u_getUnicodeVersion(versionArray);
1196 if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0)
1197 {
1198 log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION " got %d.%d.%d.%d\n",
1199 versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
1200 }
1201
1202#if defined(ICU_UNICODE_VERSION)
1203 /* test only happens where we have configure.in with UNICODE_VERSION - sanity check. */
1204 if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION))
1205 {
1206 log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNICODE_VERSION " got " ICU_UNICODE_VERSION "\n");
1207 }
1208#endif
1209
1210 if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN || u_getIntPropertyValue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) {
1211 log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041));
1212 }
1213
1214 errorCode=U_ZERO_ERROR;
73c04bcf 1215 parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, NULL, &errorCode);
b75a7d8f 1216 if(U_FAILURE(errorCode)) {
b75a7d8f
A
1217 return; /* if we couldn't parse UnicodeData.txt, we should return */
1218 }
1219
1220 /* sanity check on repeated properties */
1221 for(c=0xfffe; c<=0x10ffff;) {
1222 type=u_charType(c);
1223 if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1224 log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1225 }
1226 if(type!=U_UNASSIGNED) {
1227 log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c, u_charType(c));
1228 }
1229 if((c&0xffff)==0xfffe) {
1230 ++c;
1231 } else {
1232 c+=0xffff;
1233 }
1234 }
1235
1236 /* test that PUA is not "unassigned" */
1237 for(c=0xe000; c<=0x10fffd;) {
1238 type=u_charType(c);
1239 if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1240 log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1241 }
1242 if(type==U_UNASSIGNED) {
1243 log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c);
1244 } else if(type!=U_PRIVATE_USE_CHAR) {
1245 log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type);
1246 }
1247 if(c==0xf8ff) {
1248 c=0xf0000;
1249 } else if(c==0xffffd) {
1250 c=0x100000;
1251 } else {
1252 ++c;
1253 }
1254 }
1255
1256 /* test u_enumCharTypes() */
1257 u_enumCharTypes(enumTypeRange, "a1");
374ca955
A
1258
1259 /* check default properties */
1260 u_enumCharTypes(enumDefaultsRange, NULL);
b75a7d8f
A
1261}
1262
1263static void TestCodeUnit(){
1264 const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0};
1265
1266 int32_t i;
1267
1268 for(i=0; i<(int32_t)(sizeof(codeunit)/sizeof(codeunit[0])); i++){
1269 UChar c=codeunit[i];
1270 if(i<4){
1271 if(!(UTF_IS_SINGLE(c)) || (UTF_IS_LEAD(c)) || (UTF_IS_TRAIL(c)) ||(UTF_IS_SURROGATE(c))){
1272 log_err("ERROR: U+%04x is a single", c);
1273 }
1274
1275 }
1276 if(i >= 4 && i< 8){
1277 if(!(UTF_IS_LEAD(c)) || UTF_IS_SINGLE(c) || UTF_IS_TRAIL(c) || !(UTF_IS_SURROGATE(c))){
1278 log_err("ERROR: U+%04x is a first surrogate", c);
1279 }
1280 }
1281 if(i >= 8 && i< 12){
1282 if(!(UTF_IS_TRAIL(c)) || UTF_IS_SINGLE(c) || UTF_IS_LEAD(c) || !(UTF_IS_SURROGATE(c))){
1283 log_err("ERROR: U+%04x is a second surrogate", c);
1284 }
1285 }
1286 }
1287
1288}
1289
1290static void TestCodePoint(){
1291 const UChar32 codePoint[]={
1292 /*surrogate, notvalid(codepoint), not a UnicodeChar, not Error */
1293 0xd800,
1294 0xdbff,
1295 0xdc00,
1296 0xdfff,
1297 0xdc04,
1298 0xd821,
1299 /*not a surrogate, valid, isUnicodeChar , not Error*/
1300 0x20ac,
1301 0xd7ff,
1302 0xe000,
1303 0xe123,
1304 0x0061,
1305 0xe065,
1306 0x20402,
1307 0x24506,
1308 0x23456,
1309 0x20402,
1310 0x10402,
1311 0x23456,
1312 /*not a surrogate, not valid, isUnicodeChar, isError */
1313 0x0015,
1314 0x009f,
1315 /*not a surrogate, not valid, not isUnicodeChar, isError */
1316 0xffff,
1317 0xfffe,
1318 };
1319 int32_t i;
1320 for(i=0; i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0])); i++){
1321 UChar32 c=codePoint[i];
1322 if(i<6){
1323 if(!UTF_IS_SURROGATE(c) || !U_IS_SURROGATE(c) || !U16_IS_SURROGATE(c)){
1324 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1325 }
1326 if(UTF_IS_VALID(c)){
1327 log_err("ERROR: isValid() failed for U+%04x\n", c);
1328 }
1329 if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
1330 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1331 }
1332 if(UTF_IS_ERROR(c)){
1333 log_err("ERROR: isError() failed for U+%04x\n", c);
1334 }
1335 }else if(i >=6 && i<18){
1336 if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1337 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1338 }
1339 if(!UTF_IS_VALID(c)){
1340 log_err("ERROR: isValid() failed for U+%04x\n", c);
1341 }
1342 if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
1343 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1344 }
1345 if(UTF_IS_ERROR(c)){
1346 log_err("ERROR: isError() failed for U+%04x\n", c);
1347 }
1348 }else if(i >=18 && i<20){
1349 if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1350 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1351 }
1352 if(UTF_IS_VALID(c)){
1353 log_err("ERROR: isValid() failed for U+%04x\n", c);
1354 }
1355 if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
1356 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1357 }
1358 if(!UTF_IS_ERROR(c)){
1359 log_err("ERROR: isError() failed for U+%04x\n", c);
1360 }
1361 }
1362 else if(i >=18 && i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0]))){
1363 if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1364 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1365 }
1366 if(UTF_IS_VALID(c)){
1367 log_err("ERROR: isValid() failed for U+%04x\n", c);
1368 }
1369 if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
1370 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1371 }
1372 if(!UTF_IS_ERROR(c)){
1373 log_err("ERROR: isError() failed for U+%04x\n", c);
1374 }
1375 }
1376 }
1377
374ca955
A
1378 if(
1379 !U_IS_BMP(0) || !U_IS_BMP(0x61) || !U_IS_BMP(0x20ac) ||
1380 !U_IS_BMP(0xd9da) || !U_IS_BMP(0xdfed) || !U_IS_BMP(0xffff) ||
1381 U_IS_BMP(U_SENTINEL) || U_IS_BMP(0x10000) || U_IS_BMP(0x50005) ||
1382 U_IS_BMP(0x10ffff) || U_IS_BMP(0x110000) || U_IS_BMP(0x7fffffff)
1383 ) {
1384 log_err("error with U_IS_BMP()\n");
1385 }
1386
1387 if(
1388 U_IS_SUPPLEMENTARY(0) || U_IS_SUPPLEMENTARY(0x61) || U_IS_SUPPLEMENTARY(0x20ac) ||
1389 U_IS_SUPPLEMENTARY(0xd9da) || U_IS_SUPPLEMENTARY(0xdfed) || U_IS_SUPPLEMENTARY(0xffff) ||
1390 U_IS_SUPPLEMENTARY(U_SENTINEL) || !U_IS_SUPPLEMENTARY(0x10000) || !U_IS_SUPPLEMENTARY(0x50005) ||
1391 !U_IS_SUPPLEMENTARY(0x10ffff) || U_IS_SUPPLEMENTARY(0x110000) || U_IS_SUPPLEMENTARY(0x7fffffff)
1392 ) {
1393 log_err("error with U_IS_SUPPLEMENTARY()\n");
1394 }
b75a7d8f
A
1395}
1396
1397static void TestCharLength()
1398{
1399 const int32_t codepoint[]={
1400 1, 0x0061,
1401 1, 0xe065,
1402 1, 0x20ac,
1403 2, 0x20402,
1404 2, 0x23456,
1405 2, 0x24506,
1406 2, 0x20402,
1407 2, 0x10402,
1408 1, 0xd7ff,
1409 1, 0xe000
1410 };
1411
1412 int32_t i;
1413 UBool multiple;
1414 for(i=0; i<(int32_t)(sizeof(codepoint)/sizeof(codepoint[0])); i=(int16_t)(i+2)){
1415 UChar32 c=codepoint[i+1];
1416 if(UTF_CHAR_LENGTH(c) != codepoint[i] || U16_LENGTH(c) != codepoint[i]){
1417 log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], UTF_CHAR_LENGTH(c));
1418 }
1419 multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
1420 if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){
1421 log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c);
1422 }
1423 }
1424}
1425
1426/*internal functions ----*/
1427static int32_t MakeProp(char* str)
1428{
1429 int32_t result = 0;
1430 char* matchPosition =0;
1431
1432 matchPosition = strstr(tagStrings, str);
1433 if (matchPosition == 0)
1434 {
1435 log_err("unrecognized type letter ");
1436 log_err(str);
1437 }
374ca955
A
1438 else
1439 result = (int32_t)((matchPosition - tagStrings) / 2);
b75a7d8f
A
1440 return result;
1441}
1442
1443static int32_t MakeDir(char* str)
1444{
1445 int32_t pos = 0;
1446 for (pos = 0; pos < 19; pos++) {
1447 if (strcmp(str, dirStrings[pos]) == 0) {
1448 return pos;
1449 }
1450 }
1451 return -1;
1452}
1453
1454/* test u_charName() -------------------------------------------------------- */
1455
1456static const struct {
1457 uint32_t code;
729e4ab9 1458 const char *name, *oldName, *extName, *alias;
b75a7d8f
A
1459} names[]={
1460 {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"},
729e4ab9
A
1461 {0x01a2, "LATIN CAPITAL LETTER OI",
1462 "LATIN CAPITAL LETTER O I",
1463 "LATIN CAPITAL LETTER OI",
1464 "LATIN CAPITAL LETTER GHA"},
1465 {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK",
1466 "LATIN SMALL LETTER DOTLESS J BAR HOOK",
1467 "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" },
1468 {0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "",
1469 "TIBETAN MARK BSKA- SHOG GI MGO RGYAN",
1470 "TIBETAN MARK BKA- SHOG GI MGO RGYAN"},
b75a7d8f
A
1471 {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" },
1472 {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" },
1473 {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" },
1474 {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" },
1475 {0xd800, "", "", "<lead surrogate-D800>" },
1476 {0xdc00, "", "", "<trail surrogate-DC00>" },
1477 {0xff08, "FULLWIDTH LEFT PARENTHESIS", "FULLWIDTH OPENING PARENTHESIS", "FULLWIDTH LEFT PARENTHESIS" },
1478 {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" },
1479 {0xffff, "", "", "<noncharacter-FFFF>" },
729e4ab9
A
1480 {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "",
1481 "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS",
1482 "BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS"},
b75a7d8f
A
1483 {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" }
1484};
1485
1486static UBool
1487enumCharNamesFn(void *context,
1488 UChar32 code, UCharNameChoice nameChoice,
1489 const char *name, int32_t length) {
1490 int32_t *pCount=(int32_t *)context;
729e4ab9 1491 const char *expected;
b75a7d8f
A
1492 int i;
1493
1494 if(length<=0 || length!=(int32_t)strlen(name)) {
1495 /* should not be called with an empty string or invalid length */
1496 log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length);
1497 return TRUE;
1498 }
1499
1500 ++*pCount;
1501 for(i=0; i<sizeof(names)/sizeof(names[0]); ++i) {
1502 if(code==(UChar32)names[i].code) {
1503 switch (nameChoice) {
1504 case U_EXTENDED_CHAR_NAME:
1505 if(0!=strcmp(name, names[i].extName)) {
1506 log_err("u_enumCharName(0x%lx - Extended)=%s instead of %s\n", code, name, names[i].extName);
1507 }
1508 break;
1509 case U_UNICODE_CHAR_NAME:
1510 if(0!=strcmp(name, names[i].name)) {
1511 log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code, name, names[i].name);
1512 }
1513 break;
1514 case U_UNICODE_10_CHAR_NAME:
729e4ab9
A
1515 expected=names[i].oldName;
1516 if(expected[0]==0 || 0!=strcmp(name, expected)) {
1517 log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n", code, name, expected);
1518 }
1519 break;
1520 case U_CHAR_NAME_ALIAS:
1521 expected=names[i].alias;
1522 if(expected==NULL || expected[0]==0 || 0!=strcmp(name, expected)) {
1523 log_err("u_enumCharName(0x%lx - alias)=%s instead of %s\n", code, name, expected);
b75a7d8f
A
1524 }
1525 break;
1526 case U_CHAR_NAME_CHOICE_COUNT:
1527 break;
1528 }
1529 break;
1530 }
1531 }
1532 return TRUE;
1533}
1534
1535struct enumExtCharNamesContext {
1536 uint32_t length;
1537 int32_t last;
1538};
1539
1540static UBool
1541enumExtCharNamesFn(void *context,
1542 UChar32 code, UCharNameChoice nameChoice,
1543 const char *name, int32_t length) {
1544 struct enumExtCharNamesContext *ecncp = (struct enumExtCharNamesContext *) context;
1545
1546 if (ecncp->last != (int32_t) code - 1) {
1547 if (ecncp->last < 0) {
1548 log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1);
1549 } else {
1550 log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 - Ext)\n", code);
1551 }
1552 }
1553 ecncp->last = (int32_t) code;
1554
1555 if (!*name) {
1556 log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", code);
1557 }
1558
1559 return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length);
1560}
1561
1562/**
1563 * This can be made more efficient by moving it into putil.c and having
1564 * it directly access the ebcdic translation tables.
1565 * TODO: If we get this method in putil.c, then delete it from here.
1566 */
1567static UChar
1568u_charToUChar(char c) {
1569 UChar uc;
1570 u_charsToUChars(&c, &uc, 1);
1571 return uc;
1572}
1573
1574static void
1575TestCharNames() {
1576 static char name[80];
1577 UErrorCode errorCode=U_ZERO_ERROR;
1578 struct enumExtCharNamesContext extContext;
729e4ab9 1579 const char *expected;
b75a7d8f
A
1580 int32_t length;
1581 UChar32 c;
1582 int32_t i;
1583
1584 log_verbose("Testing uprv_getMaxCharNameLength()\n");
1585 length=uprv_getMaxCharNameLength();
1586 if(length==0) {
1587 /* no names data available */
1588 return;
1589 }
1590 if(length<83) { /* Unicode 3.2 max char name length */
1591 log_err("uprv_getMaxCharNameLength()=%d is too short");
1592 }
1593 /* ### TODO same tests for max ISO comment length as for max name length */
1594
1595 log_verbose("Testing u_charName()\n");
1596 for(i=0; i<(int32_t)(sizeof(names)/sizeof(names[0])); ++i) {
1597 /* modern Unicode character name */
1598 length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name), &errorCode);
1599 if(U_FAILURE(errorCode)) {
1600 log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(errorCode));
1601 return;
1602 }
1603 if(length<0 || 0!=strcmp(name, names[i].name) || length!=(uint16_t)strlen(name)) {
1604 log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n", names[i].code, name, length, names[i].name);
1605 }
1606
1607 /* find the modern name */
1608 if (*names[i].name) {
1609 c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode);
1610 if(U_FAILURE(errorCode)) {
1611 log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorName(errorCode));
1612 return;
1613 }
1614 if(c!=(UChar32)names[i].code) {
1615 log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", names[i].name, c, names[i].code);
1616 }
1617 }
1618
1619 /* Unicode 1.0 character name */
1620 length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(name), &errorCode);
1621 if(U_FAILURE(errorCode)) {
1622 log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_errorName(errorCode));
1623 return;
1624 }
1625 if(length<0 || (length>0 && 0!=strcmp(name, names[i].oldName)) || length!=(uint16_t)strlen(name)) {
1626 log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothing or %s\n", names[i].code, name, length, names[i].oldName);
1627 }
1628
1629 /* find the Unicode 1.0 name if it is stored (length>0 means that we could read it) */
1630 if(names[i].oldName[0]!=0 /* && length>0 */) {
1631 c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCode);
1632 if(U_FAILURE(errorCode)) {
1633 log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName, u_errorName(errorCode));
1634 return;
1635 }
1636 if(c!=(UChar32)names[i].code) {
1637 log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n", names[i].oldName, c, names[i].code);
1638 }
1639 }
729e4ab9
A
1640
1641 /* Unicode character name alias */
1642 length=u_charName(names[i].code, U_CHAR_NAME_ALIAS, name, sizeof(name), &errorCode);
1643 if(U_FAILURE(errorCode)) {
1644 log_err("u_charName(0x%lx - alias) error %s\n", names[i].code, u_errorName(errorCode));
1645 return;
1646 }
1647 expected=names[i].alias;
1648 if(expected==NULL) {
1649 expected="";
1650 }
1651 if(length<0 || (length>0 && 0!=strcmp(name, expected)) || length!=(uint16_t)strlen(name)) {
1652 log_err("u_charName(0x%lx - alias) gets %s length %ld instead of nothing or %s\n",
1653 names[i].code, name, length, expected);
1654 }
1655
1656 /* find the Unicode character name alias if it is stored (length>0 means that we could read it) */
1657 if(expected[0]!=0 /* && length>0 */) {
1658 c=u_charFromName(U_CHAR_NAME_ALIAS, expected, &errorCode);
1659 if(U_FAILURE(errorCode)) {
1660 log_err("u_charFromName(%s - alias) error %s\n",
1661 expected, u_errorName(errorCode));
1662 return;
1663 }
1664 if(c!=(UChar32)names[i].code) {
1665 log_err("u_charFromName(%s - alias) gets 0x%lx instead of 0x%lx\n",
1666 expected, c, names[i].code);
1667 }
1668 }
b75a7d8f
A
1669 }
1670
1671 /* test u_enumCharNames() */
1672 length=0;
1673 errorCode=U_ZERO_ERROR;
1674 u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &length, U_UNICODE_CHAR_NAME, &errorCode);
1675 if(U_FAILURE(errorCode) || length<94140) {
1676 log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length);
1677 }
1678
1679 extContext.length = 0;
1680 extContext.last = -1;
1681 errorCode=U_ZERO_ERROR;
1682 u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &extContext, U_EXTENDED_CHAR_NAME, &errorCode);
1683 if(U_FAILURE(errorCode) || extContext.length<UCHAR_MAX_VALUE + 1) {
1684 log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.length);
1685 }
1686
1687 /* test that u_charFromName() uppercases the input name, i.e., works with mixed-case names (new in 2.0) */
1688 if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorCode)) {
1689 log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") did not find U+0061 (%s)\n", u_errorName(errorCode));
1690 }
1691
1692 /* Test getCharNameCharacters */
729e4ab9 1693 if(!getTestOption(QUICK_OPTION)) {
b75a7d8f
A
1694 enum { BUFSIZE = 256 };
1695 UErrorCode ec = U_ZERO_ERROR;
1696 char buf[BUFSIZE];
1697 int32_t maxLength;
1698 UChar32 cp;
1699 UChar pat[BUFSIZE], dumbPat[BUFSIZE];
1700 int32_t l1, l2;
1701 UBool map[256];
1702 UBool ok;
1703
1704 USet* set = uset_open(1, 0); /* empty set */
1705 USet* dumb = uset_open(1, 0); /* empty set */
1706
1707 /*
1708 * uprv_getCharNameCharacters() will likely return more lowercase
1709 * letters than actual character names contain because
1710 * it includes all the characters in lowercased names of
1711 * general categories, for the full possible set of extended names.
1712 */
374ca955
A
1713 {
1714 USetAdder sa={
1715 NULL,
1716 uset_add,
1717 uset_addRange,
73c04bcf
A
1718 uset_addString,
1719 NULL /* don't need remove() */
374ca955
A
1720 };
1721 sa.set=set;
1722 uprv_getCharNameCharacters(&sa);
1723 }
b75a7d8f
A
1724
1725 /* build set the dumb (but sure-fire) way */
374ca955 1726 for (i=0; i<256; ++i) {
b75a7d8f 1727 map[i] = FALSE;
374ca955 1728 }
b75a7d8f
A
1729
1730 maxLength=0;
1731 for (cp=0; cp<0x110000; ++cp) {
1732 int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME,
1733 buf, BUFSIZE, &ec);
1734 if (U_FAILURE(ec)) {
1735 log_err("FAIL: u_charName failed when it shouldn't\n");
1736 uset_close(set);
1737 uset_close(dumb);
1738 return;
1739 }
1740 if(len>maxLength) {
1741 maxLength=len;
1742 }
1743
1744 for (i=0; i<len; ++i) {
1745 if (!map[(uint8_t) buf[i]]) {
1746 uset_add(dumb, (UChar32)u_charToUChar(buf[i]));
1747 map[(uint8_t) buf[i]] = TRUE;
1748 }
1749 }
374ca955
A
1750
1751 /* test for leading/trailing whitespace */
1752 if(buf[0]==' ' || buf[0]=='\t' || buf[len-1]==' ' || buf[len-1]=='\t') {
1753 log_err("u_charName(U+%04x) returns a name with leading or trailing whitespace\n", cp);
1754 }
1755 }
1756
1757 if(map[(uint8_t)'\t']) {
1758 log_err("u_charName() returned a name with a TAB for some code point\n", cp);
b75a7d8f
A
1759 }
1760
1761 length=uprv_getMaxCharNameLength();
1762 if(length!=maxLength) {
1763 log_err("uprv_getMaxCharNameLength()=%d differs from the maximum length %d of all extended names\n",
1764 length, maxLength);
1765 }
1766
1767 /* compare the sets. Where is my uset_equals?!! */
1768 ok=TRUE;
1769 for(i=0; i<256; ++i) {
1770 if(uset_contains(set, i)!=uset_contains(dumb, i)) {
1771 if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !uset_contains(dumb, i)) {
1772 /* ignore lowercase a-z that are in set but not in dumb */
1773 ok=TRUE;
1774 } else {
1775 ok=FALSE;
1776 break;
1777 }
1778 }
1779 }
1780
1781 l1 = uset_toPattern(set, pat, BUFSIZE, TRUE, &ec);
1782 l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, TRUE, &ec);
1783 if (U_FAILURE(ec)) {
1784 log_err("FAIL: uset_toPattern failed when it shouldn't\n");
1785 uset_close(set);
1786 uset_close(dumb);
1787 return;
1788 }
1789
1790 if (l1 >= BUFSIZE) {
1791 l1 = BUFSIZE-1;
1792 pat[l1] = 0;
1793 }
1794 if (l2 >= BUFSIZE) {
1795 l2 = BUFSIZE-1;
1796 dumbPat[l2] = 0;
1797 }
1798
1799 if (!ok) {
b75a7d8f 1800 log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n",
374ca955 1801 aescstrdup(pat, l1), aescstrdup(dumbPat, l2));
729e4ab9 1802 } else if(getTestOption(VERBOSITY_OPTION)) {
374ca955 1803 log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescstrdup(pat, l1));
b75a7d8f
A
1804 }
1805
1806 uset_close(set);
1807 uset_close(dumb);
1808 }
1809
1810 /* ### TODO: test error cases and other interesting things */
1811}
1812
1813/* test u_isMirrored() and u_charMirror() ----------------------------------- */
1814
1815static void
1816TestMirroring() {
73c04bcf
A
1817 USet *set;
1818 UErrorCode errorCode;
1819
1820 UChar32 start, end, c2, c3;
1821 int32_t i;
1822
1823 U_STRING_DECL(mirroredPattern, "[:Bidi_Mirrored:]", 17);
1824
1825 U_STRING_INIT(mirroredPattern, "[:Bidi_Mirrored:]", 17);
1826
b75a7d8f
A
1827 log_verbose("Testing u_isMirrored()\n");
1828 if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_isMirrored(0x232a) &&
1829 !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !u_isMirrored(0x3400)
1830 )
1831 ) {
1832 log_err("u_isMirrored() does not work correctly\n");
1833 }
1834
1835 log_verbose("Testing u_charMirror()\n");
1836 if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 &&
73c04bcf 1837 u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirror(0x29F5)==0x2215 && /* large delta between the code points */
46f4442e
A
1838 u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab &&
1839 /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
1840 u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charMirror(0x301d)==0x301d
b75a7d8f
A
1841 )
1842 ) {
1843 log_err("u_charMirror() does not work correctly\n");
1844 }
73c04bcf
A
1845
1846 /* verify that Bidi_Mirroring_Glyph roundtrips */
1847 errorCode=U_ZERO_ERROR;
1848 set=uset_openPattern(mirroredPattern, 17, &errorCode);
1849
1850 if (U_FAILURE(errorCode)) {
729e4ab9 1851 log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!\n");
73c04bcf
A
1852 } else {
1853 for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i) {
1854 do {
1855 c2=u_charMirror(start);
1856 c3=u_charMirror(c2);
1857 if(c3!=start) {
1858 log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx->U+%04lx\n", (long)start, (long)c2, (long)c3);
1859 }
1860 } while(++start<=end);
1861 }
1862 }
1863
1864 uset_close(set);
b75a7d8f
A
1865}
1866
1867
1868struct RunTestData
1869{
1870 const char *runText;
1871 UScriptCode runCode;
1872};
1873
1874typedef struct RunTestData RunTestData;
1875
1876static void
1877CheckScriptRuns(UScriptRun *scriptRun, int32_t *runStarts, const RunTestData *testData, int32_t nRuns,
1878 const char *prefix)
1879{
1880 int32_t run, runStart, runLimit;
1881 UScriptCode runCode;
1882
1883 /* iterate over all the runs */
1884 run = 0;
1885 while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) {
1886 if (runStart != runStarts[run]) {
1887 log_err("%s: incorrect start offset for run %d: expected %d, got %d\n",
1888 prefix, run, runStarts[run], runStart);
1889 }
1890
1891 if (runLimit != runStarts[run + 1]) {
1892 log_err("%s: incorrect limit offset for run %d: expected %d, got %d\n",
1893 prefix, run, runStarts[run + 1], runLimit);
1894 }
1895
1896 if (runCode != testData[run].runCode) {
1897 log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\"\n",
1898 prefix, run, uscript_getName(testData[run].runCode), uscript_getName(runCode));
1899 }
1900
1901 run += 1;
1902
1903 /* stop when we've seen all the runs we expect to see */
1904 if (run >= nRuns) {
1905 break;
1906 }
1907 }
1908
1909 /* Complain if we didn't see then number of runs we expected */
1910 if (run != nRuns) {
1911 log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, run, nRuns);
1912 }
1913}
1914
1915static void
1916TestUScriptRunAPI()
1917{
374ca955 1918 static const RunTestData testData1[] = {
b75a7d8f
A
1919 {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCRIPT_DEVANAGARI},
1920 {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARABIC},
1921 {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYRILLIC},
1922 {"English (", USCRIPT_LATIN},
1923 {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI},
1924 {") ", USCRIPT_LATIN},
1925 {"\\u6F22\\u5B75", USCRIPT_HAN},
1926 {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA},
1927 {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA},
1928 {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET}
1929 };
374ca955
A
1930
1931 static const RunTestData testData2[] = {
1932 {"((((((((((abc))))))))))", USCRIPT_LATIN}
1933 };
1934
1935 static const struct {
1936 const RunTestData *testData;
1937 int32_t nRuns;
1938 } testDataEntries[] = {
1939 {testData1, LENGTHOF(testData1)},
1940 {testData2, LENGTHOF(testData2)}
1941 };
1942
1943 static const int32_t nTestEntries = LENGTHOF(testDataEntries);
1944 int32_t testEntry;
1945
1946 for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) {
1947 UChar testString[1024];
1948 int32_t runStarts[256];
1949 int32_t nTestRuns = testDataEntries[testEntry].nRuns;
1950 const RunTestData *testData = testDataEntries[testEntry].testData;
1951
1952 int32_t run, stringLimit;
1953 UScriptRun *scriptRun = NULL;
1954 UErrorCode err;
1955
1956 /*
1957 * Fill in the test string and the runStarts array.
1958 */
1959 stringLimit = 0;
1960 for (run = 0; run < nTestRuns; run += 1) {
1961 runStarts[run] = stringLimit;
1962 stringLimit += u_unescape(testData[run].runText, &testString[stringLimit], 1024 - stringLimit);
1963 /*stringLimit -= 1;*/
1964 }
1965
1966 /* The limit of the last run */
1967 runStarts[nTestRuns] = stringLimit;
1968
1969 /*
1970 * Make sure that calling uscript_OpenRun with a NULL text pointer
1971 * and a non-zero text length returns the correct error.
1972 */
1973 err = U_ZERO_ERROR;
1974 scriptRun = uscript_openRun(NULL, stringLimit, &err);
1975
1976 if (err != U_ILLEGAL_ARGUMENT_ERROR) {
1977 log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
1978 }
1979
1980 if (scriptRun != NULL) {
1981 log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NULL result.\n");
1982 uscript_closeRun(scriptRun);
1983 }
1984
1985 /*
1986 * Make sure that calling uscript_OpenRun with a non-NULL text pointer
1987 * and a zero text length returns the correct error.
1988 */
1989 err = U_ZERO_ERROR;
1990 scriptRun = uscript_openRun(testString, 0, &err);
1991
1992 if (err != U_ILLEGAL_ARGUMENT_ERROR) {
1993 log_err("uscript_openRun(testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
1994 }
1995
1996 if (scriptRun != NULL) {
1997 log_err("uscript_openRun(testString, 0, &err) returned a non-NULL result.\n");
1998 uscript_closeRun(scriptRun);
1999 }
2000
2001 /*
2002 * Make sure that calling uscript_openRun with a NULL text pointer
2003 * and a zero text length doesn't return an error.
2004 */
2005 err = U_ZERO_ERROR;
2006 scriptRun = uscript_openRun(NULL, 0, &err);
2007
2008 if (U_FAILURE(err)) {
2009 log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_errorName(err));
2010 }
2011
2012 /* Make sure that the empty iterator doesn't find any runs */
2013 if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) {
2014 log_err("uscript_nextRun(...) returned TRUE for an empty iterator.\n");
2015 }
2016
2017 /*
2018 * Make sure that calling uscript_setRunText with a NULL text pointer
2019 * and a non-zero text length returns the correct error.
2020 */
2021 err = U_ZERO_ERROR;
2022 uscript_setRunText(scriptRun, NULL, stringLimit, &err);
2023
2024 if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2025 log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2026 }
2027
2028 /*
2029 * Make sure that calling uscript_OpenRun with a non-NULL text pointer
2030 * and a zero text length returns the correct error.
2031 */
2032 err = U_ZERO_ERROR;
2033 uscript_setRunText(scriptRun, testString, 0, &err);
2034
2035 if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2036 log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2037 }
2038
2039 /*
2040 * Now call uscript_setRunText on the empty iterator
2041 * and make sure that it works.
2042 */
2043 err = U_ZERO_ERROR;
2044 uscript_setRunText(scriptRun, testString, stringLimit, &err);
2045
2046 if (U_FAILURE(err)) {
2047 log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(err));
2048 } else {
2049 CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_setRunText");
2050 }
2051
b75a7d8f 2052 uscript_closeRun(scriptRun);
374ca955
A
2053
2054 /*
2055 * Now open an interator over the testString
2056 * using uscript_openRun and make sure that it works
2057 */
2058 scriptRun = uscript_openRun(testString, stringLimit, &err);
2059
2060 if (U_FAILURE(err)) {
2061 log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err));
2062 } else {
2063 CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_openRun");
2064 }
2065
2066 /* Now reset the iterator, and make sure
2067 * that it still works.
2068 */
2069 uscript_resetRun(scriptRun);
2070
2071 CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_resetRun");
2072
2073 /* Close the iterator */
b75a7d8f
A
2074 uscript_closeRun(scriptRun);
2075 }
b75a7d8f
A
2076}
2077
2078/* test additional, non-core properties */
2079static void
2080TestAdditionalProperties() {
2081 /* test data for u_charAge() */
2082 static const struct {
2083 UChar32 c;
2084 UVersionInfo version;
2085 } charAges[]={
2086 {0x41, { 1, 1, 0, 0 }},
2087 {0xffff, { 1, 1, 0, 0 }},
2088 {0x20ab, { 2, 0, 0, 0 }},
2089 {0x2fffe, { 2, 0, 0, 0 }},
2090 {0x20ac, { 2, 1, 0, 0 }},
2091 {0xfb1d, { 3, 0, 0, 0 }},
2092 {0x3f4, { 3, 1, 0, 0 }},
2093 {0x10300, { 3, 1, 0, 0 }},
2094 {0x220, { 3, 2, 0, 0 }},
2095 {0xff60, { 3, 2, 0, 0 }}
2096 };
2097
2098 /* test data for u_hasBinaryProperty() */
46f4442e 2099 static const int32_t
b75a7d8f
A
2100 props[][3]={ /* code point, property, value */
2101 { 0x0627, UCHAR_ALPHABETIC, TRUE },
2102 { 0x1034a, UCHAR_ALPHABETIC, TRUE },
2103 { 0x2028, UCHAR_ALPHABETIC, FALSE },
2104
2105 { 0x0066, UCHAR_ASCII_HEX_DIGIT, TRUE },
2106 { 0x0067, UCHAR_ASCII_HEX_DIGIT, FALSE },
2107
2108 { 0x202c, UCHAR_BIDI_CONTROL, TRUE },
2109 { 0x202f, UCHAR_BIDI_CONTROL, FALSE },
2110
2111 { 0x003c, UCHAR_BIDI_MIRRORED, TRUE },
2112 { 0x003d, UCHAR_BIDI_MIRRORED, FALSE },
2113
46f4442e
A
2114 /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
2115 { 0x2018, UCHAR_BIDI_MIRRORED, FALSE },
2116 { 0x201d, UCHAR_BIDI_MIRRORED, FALSE },
2117 { 0x201f, UCHAR_BIDI_MIRRORED, FALSE },
2118 { 0x301e, UCHAR_BIDI_MIRRORED, FALSE },
2119
b75a7d8f
A
2120 { 0x058a, UCHAR_DASH, TRUE },
2121 { 0x007e, UCHAR_DASH, FALSE },
2122
2123 { 0x0c4d, UCHAR_DIACRITIC, TRUE },
2124 { 0x3000, UCHAR_DIACRITIC, FALSE },
2125
2126 { 0x0e46, UCHAR_EXTENDER, TRUE },
2127 { 0x0020, UCHAR_EXTENDER, FALSE },
2128
2129#if !UCONFIG_NO_NORMALIZATION
2130 { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2131 { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2132 { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, FALSE },
374ca955
A
2133
2134 { 0x110a, UCHAR_NFD_INERT, TRUE }, /* Jamo L */
2135 { 0x0308, UCHAR_NFD_INERT, FALSE },
2136
2137 { 0x1164, UCHAR_NFKD_INERT, TRUE }, /* Jamo V */
2138 { 0x1d79d, UCHAR_NFKD_INERT, FALSE }, /* math compat version of xi */
2139
2140 { 0x0021, UCHAR_NFC_INERT, TRUE }, /* ! */
2141 { 0x0061, UCHAR_NFC_INERT, FALSE }, /* a */
2142 { 0x00e4, UCHAR_NFC_INERT, FALSE }, /* a-umlaut */
2143 { 0x0102, UCHAR_NFC_INERT, FALSE }, /* a-breve */
2144 { 0xac1c, UCHAR_NFC_INERT, FALSE }, /* Hangul LV */
2145 { 0xac1d, UCHAR_NFC_INERT, TRUE }, /* Hangul LVT */
2146
2147 { 0x1d79d, UCHAR_NFKC_INERT, FALSE }, /* math compat version of xi */
2148 { 0x2a6d6, UCHAR_NFKC_INERT, TRUE }, /* Han, last of CJK ext. B */
2149
2150 { 0x00e4, UCHAR_SEGMENT_STARTER, TRUE },
2151 { 0x0308, UCHAR_SEGMENT_STARTER, FALSE },
2152 { 0x110a, UCHAR_SEGMENT_STARTER, TRUE }, /* Jamo L */
2153 { 0x1164, UCHAR_SEGMENT_STARTER, FALSE },/* Jamo V */
2154 { 0xac1c, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LV */
2155 { 0xac1d, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LVT */
b75a7d8f
A
2156#endif
2157
2158 { 0x0044, UCHAR_HEX_DIGIT, TRUE },
2159 { 0xff46, UCHAR_HEX_DIGIT, TRUE },
2160 { 0x0047, UCHAR_HEX_DIGIT, FALSE },
2161
2162 { 0x30fb, UCHAR_HYPHEN, TRUE },
2163 { 0xfe58, UCHAR_HYPHEN, FALSE },
2164
2165 { 0x2172, UCHAR_ID_CONTINUE, TRUE },
2166 { 0x0307, UCHAR_ID_CONTINUE, TRUE },
2167 { 0x005c, UCHAR_ID_CONTINUE, FALSE },
2168
2169 { 0x2172, UCHAR_ID_START, TRUE },
2170 { 0x007a, UCHAR_ID_START, TRUE },
2171 { 0x0039, UCHAR_ID_START, FALSE },
2172
2173 { 0x4db5, UCHAR_IDEOGRAPHIC, TRUE },
2174 { 0x2f999, UCHAR_IDEOGRAPHIC, TRUE },
2175 { 0x2f99, UCHAR_IDEOGRAPHIC, FALSE },
2176
2177 { 0x200c, UCHAR_JOIN_CONTROL, TRUE },
2178 { 0x2029, UCHAR_JOIN_CONTROL, FALSE },
2179
2180 { 0x1d7bc, UCHAR_LOWERCASE, TRUE },
2181 { 0x0345, UCHAR_LOWERCASE, TRUE },
2182 { 0x0030, UCHAR_LOWERCASE, FALSE },
2183
2184 { 0x1d7a9, UCHAR_MATH, TRUE },
2185 { 0x2135, UCHAR_MATH, TRUE },
2186 { 0x0062, UCHAR_MATH, FALSE },
2187
2188 { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2189 { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2190 { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, FALSE },
2191
2192 { 0x0022, UCHAR_QUOTATION_MARK, TRUE },
2193 { 0xff62, UCHAR_QUOTATION_MARK, TRUE },
2194 { 0xd840, UCHAR_QUOTATION_MARK, FALSE },
2195
2196 { 0x061f, UCHAR_TERMINAL_PUNCTUATION, TRUE },
2197 { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, FALSE },
2198
2199 { 0x1d44a, UCHAR_UPPERCASE, TRUE },
2200 { 0x2162, UCHAR_UPPERCASE, TRUE },
2201 { 0x0345, UCHAR_UPPERCASE, FALSE },
2202
2203 { 0x0020, UCHAR_WHITE_SPACE, TRUE },
2204 { 0x202f, UCHAR_WHITE_SPACE, TRUE },
2205 { 0x3001, UCHAR_WHITE_SPACE, FALSE },
2206
2207 { 0x0711, UCHAR_XID_CONTINUE, TRUE },
2208 { 0x1d1aa, UCHAR_XID_CONTINUE, TRUE },
2209 { 0x007c, UCHAR_XID_CONTINUE, FALSE },
2210
2211 { 0x16ee, UCHAR_XID_START, TRUE },
2212 { 0x23456, UCHAR_XID_START, TRUE },
2213 { 0x1d1aa, UCHAR_XID_START, FALSE },
2214
2215 /*
2216 * Version break:
2217 * The following properties are only supported starting with the
2218 * Unicode version indicated in the second field.
2219 */
374ca955 2220 { -1, 0x320, 0 },
b75a7d8f
A
2221
2222 { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2223 { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2224 { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE },
2225
729e4ab9
A
2226 { 0x0149, UCHAR_DEPRECATED, TRUE }, /* changed in Unicode 5.2 */
2227 { 0x0341, UCHAR_DEPRECATED, FALSE }, /* changed in Unicode 5.2 */
46f4442e
A
2228 { 0xe0041, UCHAR_DEPRECATED, TRUE }, /* changed from Unicode 5 to 5.1 */
2229 { 0xe0100, UCHAR_DEPRECATED, FALSE },
b75a7d8f
A
2230
2231 { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE },
2232 { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE },
46f4442e
A
2233 { 0xff9d, UCHAR_GRAPHEME_BASE, TRUE },
2234 { 0xff9f, UCHAR_GRAPHEME_BASE, FALSE }, /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
b75a7d8f
A
2235
2236 { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE },
46f4442e
A
2237 { 0xff9d, UCHAR_GRAPHEME_EXTEND, FALSE },
2238 { 0xff9f, UCHAR_GRAPHEME_EXTEND, TRUE }, /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
b75a7d8f
A
2239 { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE },
2240
2241 { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE },
2242 { 0xff9f, UCHAR_GRAPHEME_LINK, FALSE },
2243
2244 { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, TRUE },
2245 { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, FALSE },
2246
2247 { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, TRUE },
2248 { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, FALSE },
2249
2250 { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, TRUE },
2251 { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, FALSE },
2252
2253 { 0x2e9b, UCHAR_RADICAL, TRUE },
2254 { 0x4e00, UCHAR_RADICAL, FALSE },
2255
2256 { 0x012f, UCHAR_SOFT_DOTTED, TRUE },
2257 { 0x0049, UCHAR_SOFT_DOTTED, FALSE },
2258
2259 { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE },
2260 { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE },
2261
73c04bcf 2262 { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */
374ca955
A
2263
2264 { 0x002e, UCHAR_S_TERM, TRUE },
2265 { 0x0061, UCHAR_S_TERM, FALSE },
2266
2267 { 0x180c, UCHAR_VARIATION_SELECTOR, TRUE },
2268 { 0xfe03, UCHAR_VARIATION_SELECTOR, TRUE },
2269 { 0xe01ef, UCHAR_VARIATION_SELECTOR, TRUE },
2270 { 0xe0200, UCHAR_VARIATION_SELECTOR, FALSE },
2271
b75a7d8f
A
2272 /* enum/integer type properties */
2273
2274 /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */
2275 /* test default Bidi classes for unassigned code points */
2276 { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
73c04bcf 2277 { 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
b75a7d8f 2278 { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
73c04bcf
A
2279 { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */
2280 { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */
b75a7d8f
A
2281 { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2282 { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2283 { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2284 { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2285 { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2286 { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2287
46f4442e 2288 { 0x0605, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
b75a7d8f
A
2289 { 0x061c, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2290 { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2291 { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2292 { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2293 { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2294 { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2295 { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2296
2297 { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS },
2298 { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU },
2299 { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS },
2300 { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG },
374ca955 2301 { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU },
b75a7d8f
A
2302 { 0x1AFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2303 { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA },
2304 { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS },
374ca955 2305 { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
b75a7d8f 2306 { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
374ca955 2307 { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B },
b75a7d8f
A
2308
2309 /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */
2310 { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 },
2311
2312 { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK },
2313 { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT },
2314 { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2315 { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2316 { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2317 { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2318 { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL },
2319 { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT },
2320 { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2321
2322 { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2323 { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW },
2324 { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2325 { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH },
2326 { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2327 { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH },
2328 { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2329 { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2330 { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2331 { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2332 { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2333 { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2334 { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2335 { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */
2336 { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2337 { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2338 { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2339
2340 /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */
729e4ab9
A
2341 { 0xd7c7, UCHAR_GENERAL_CATEGORY, 0 },
2342 { 0xd7d7, UCHAR_GENERAL_CATEGORY, U_OTHER_LETTER }, /* changed in Unicode 5.2 */
b75a7d8f
A
2343
2344 { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2345 { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN },
2346 { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH },
2347 { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH },
2348 { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL },
b75a7d8f
A
2349
2350 { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING },
2351 { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2352 { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING },
2353 { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2354 { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING },
2355 { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2356 { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2357 { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2358
2359 /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */
2360 { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2361 { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2362 { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION },
2363 { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION },
2364 { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2365 { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
b75a7d8f
A
2366 { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2367 { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2368 { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2369 { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2370 { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2371 { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION },
2372 { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS },
2373 { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2374 { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2375
2376 /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */
2377
2378 /* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */
2379
729e4ab9 2380 { 0x10ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
b75a7d8f
A
2381 { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2382 { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2383 { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
729e4ab9
A
2384 { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* changed in Unicode 5.2 */
2385 { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* changed in Unicode 5.2 */
b75a7d8f
A
2386 { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2387
729e4ab9
A
2388 { 0xa95f, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2389 { 0xa960, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* changed in Unicode 5.2 */
2390 { 0xa97c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* changed in Unicode 5.2 */
2391 { 0xa97d, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2392
b75a7d8f
A
2393 { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2394 { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2395 { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2396 { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
729e4ab9
A
2397 { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* changed in Unicode 5.2 */
2398 { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* changed in Unicode 5.2 */
2399
2400 { 0xd7af, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2401 { 0xd7b0, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* changed in Unicode 5.2 */
2402 { 0xd7c6, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* changed in Unicode 5.2 */
2403 { 0xd7c7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
b75a7d8f
A
2404
2405 { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2406 { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2407 { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2408 { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
729e4ab9
A
2409 { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* changed in Unicode 5.2 */
2410 { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* changed in Unicode 5.2 */
2411 { 0x1200, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
b75a7d8f 2412
729e4ab9
A
2413 { 0xd7ca, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2414 { 0xd7cb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* changed in Unicode 5.2 */
2415 { 0xd7fb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* changed in Unicode 5.2 */
2416 { 0xd7fc, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
b75a7d8f
A
2417
2418 { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2419 { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2420 { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2421 { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2422
2423 { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2424 { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2425 { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2426 { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2427 { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2428
2429 { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2430
73c04bcf
A
2431 { -1, 0x410, 0 }, /* version break for Unicode 4.1 */
2432
2433 { 0x00d7, UCHAR_PATTERN_SYNTAX, TRUE },
2434 { 0xfe45, UCHAR_PATTERN_SYNTAX, TRUE },
2435 { 0x0061, UCHAR_PATTERN_SYNTAX, FALSE },
2436
2437 { 0x0020, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2438 { 0x0085, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2439 { 0x200f, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2440 { 0x00a0, UCHAR_PATTERN_WHITE_SPACE, FALSE },
2441 { 0x3000, UCHAR_PATTERN_WHITE_SPACE, FALSE },
2442
2443 { 0x1d200, UCHAR_BLOCK, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION },
2444 { 0x2c8e, UCHAR_BLOCK, UBLOCK_COPTIC },
2445 { 0xfe17, UCHAR_BLOCK, UBLOCK_VERTICAL_FORMS },
2446
2447 { 0x1a00, UCHAR_SCRIPT, USCRIPT_BUGINESE },
2448 { 0x2cea, UCHAR_SCRIPT, USCRIPT_COPTIC },
2449 { 0xa82b, UCHAR_SCRIPT, USCRIPT_SYLOTI_NAGRI },
2450 { 0x103d0, UCHAR_SCRIPT, USCRIPT_OLD_PERSIAN },
2451
2452 { 0xcc28, UCHAR_LINE_BREAK, U_LB_H2 },
2453 { 0xcc29, UCHAR_LINE_BREAK, U_LB_H3 },
2454 { 0xac03, UCHAR_LINE_BREAK, U_LB_H3 },
2455 { 0x115f, UCHAR_LINE_BREAK, U_LB_JL },
2456 { 0x11aa, UCHAR_LINE_BREAK, U_LB_JT },
2457 { 0x11a1, UCHAR_LINE_BREAK, U_LB_JV },
2458
2459 { 0xb2c9, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_LVT },
2460 { 0x036f, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_EXTEND },
2461 { 0x0000, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_CONTROL },
2462 { 0x1160, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_V },
2463
2464 { 0x05f4, UCHAR_WORD_BREAK, U_WB_MIDLETTER },
2465 { 0x4ef0, UCHAR_WORD_BREAK, U_WB_OTHER },
2466 { 0x19d9, UCHAR_WORD_BREAK, U_WB_NUMERIC },
2467 { 0x2044, UCHAR_WORD_BREAK, U_WB_MIDNUM },
2468
2469 { 0xfffd, UCHAR_SENTENCE_BREAK, U_SB_OTHER },
2470 { 0x1ffc, UCHAR_SENTENCE_BREAK, U_SB_UPPER },
2471 { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE },
2472 { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP },
2473
729e4ab9
A
2474 { -1, 0x520, 0 }, /* version break for Unicode 5.2 */
2475
2476 /* test some script codes >127 */
2477 { 0xa6e6, UCHAR_SCRIPT, USCRIPT_BAMUM },
2478 { 0xa4d0, UCHAR_SCRIPT, USCRIPT_LISU },
2479 { 0x10a7f, UCHAR_SCRIPT, USCRIPT_OLD_SOUTH_ARABIAN },
2480
2481 { -1, 0x600, 0 }, /* version break for Unicode 6.0 */
2482
2483 /* value changed in Unicode 6.0 */
2484 { 0x06C3, UCHAR_JOINING_GROUP, U_JG_TEH_MARBUTA_GOAL },
2485
b75a7d8f
A
2486 /* undefined UProperty values */
2487 { 0x61, 0x4a7, 0 },
2488 { 0x234bc, 0x15ed, 0 }
2489 };
2490
2491 UVersionInfo version;
2492 UChar32 c;
2493 int32_t i, result, uVersion;
2494 UProperty which;
2495
2496 /* what is our Unicode version? */
2497 u_getUnicodeVersion(version);
374ca955 2498 uVersion=((int32_t)version[0]<<8)|(version[1]<<4)|version[2]; /* major/minor/update version numbers */
b75a7d8f
A
2499
2500 u_charAge(0x20, version);
2501 if(version[0]==0) {
2502 /* no additional properties available */
2503 log_err("TestAdditionalProperties: no additional properties available, not tested\n");
2504 return;
2505 }
2506
2507 /* test u_charAge() */
2508 for(i=0; i<sizeof(charAges)/sizeof(charAges[0]); ++i) {
2509 u_charAge(charAges[i].c, version);
2510 if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) {
2511 log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { %u, %u, %u, %u }\n",
2512 charAges[i].c,
2513 version[0], version[1], version[2], version[3],
2514 charAges[i].version[0], charAges[i].version[1], charAges[i].version[2], charAges[i].version[3]);
2515 }
2516 }
2517
2518 if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 ||
2519 u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 ||
2520 u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 || /* j2478 */
2521 u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 || /*JB#2410*/
2522 u_getIntPropertyMinValue(0x2345)!=0
2523 ) {
2524 log_err("error: u_getIntPropertyMinValue() wrong\n");
2525 }
73c04bcf
A
2526 if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1) {
2527 log_err("error: u_getIntPropertyMaxValue(UCHAR_DASH) wrong\n");
2528 }
2529 if( u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1) {
2530 log_err("error: u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE) wrong\n");
2531 }
46f4442e 2532 if( u_getIntPropertyMaxValue((UProperty)(UCHAR_BINARY_LIMIT-1))!=1) {
73c04bcf
A
2533 log_err("error: u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1) wrong\n");
2534 }
2535 if( u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_COUNT-1 ) {
2536 log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS) wrong\n");
2537 }
2538 if( u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ) {
2539 log_err("error: u_getIntPropertyMaxValue(UCHAR_BLOCK) wrong\n");
2540 }
2541 if(u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1) {
2542 log_err("error: u_getIntPropertyMaxValue(UCHAR_LINE_BREAK) wrong\n");
2543 }
2544 if(u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1) {
2545 log_err("error: u_getIntPropertyMaxValue(UCHAR_SCRIPT) wrong\n");
2546 }
2547 if(u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE)!=(int32_t)U_NT_COUNT-1) {
2548 log_err("error: u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE) wrong\n");
2549 }
2550 if(u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY)!=(int32_t)U_CHAR_CATEGORY_COUNT-1) {
2551 log_err("error: u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY) wrong\n");
2552 }
2553 if(u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE)!=(int32_t)U_HST_COUNT-1) {
2554 log_err("error: u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE) wrong\n");
2555 }
2556 if(u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK)!=(int32_t)U_GCB_COUNT-1) {
2557 log_err("error: u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK) wrong\n");
2558 }
2559 if(u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK)!=(int32_t)U_SB_COUNT-1) {
2560 log_err("error: u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK) wrong\n");
2561 }
2562 if(u_getIntPropertyMaxValue(UCHAR_WORD_BREAK)!=(int32_t)U_WB_COUNT-1) {
2563 log_err("error: u_getIntPropertyMaxValue(UCHAR_WORD_BREAK) wrong\n");
2564 }
2565 /*JB#2410*/
2566 if( u_getIntPropertyMaxValue(0x2345)!=-1) {
2567 log_err("error: u_getIntPropertyMaxValue(0x2345) wrong\n");
2568 }
2569 if( u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_COUNT - 1)) {
2570 log_err("error: u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) wrong\n");
2571 }
2572 if( u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) != (int32_t) (U_JG_COUNT -1)) {
2573 log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) wrong\n");
2574 }
2575 if( u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1)) {
2576 log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) wrong\n");
2577 }
2578 if( u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUNT -1)) {
2579 log_err("error: u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) wrong\n");
b75a7d8f
A
2580 }
2581
2582 /* test u_hasBinaryProperty() and u_getIntPropertyValue() */
2583 for(i=0; i<sizeof(props)/sizeof(props[0]); ++i) {
729e4ab9
A
2584 const char *whichName;
2585
b75a7d8f
A
2586 if(props[i][0]<0) {
2587 /* Unicode version break */
2588 if(uVersion<props[i][1]) {
2589 break; /* do not test properties that are not yet supported */
2590 } else {
2591 continue; /* skip this row */
2592 }
2593 }
2594
2595 c=(UChar32)props[i][0];
2596 which=(UProperty)props[i][1];
729e4ab9 2597 whichName=u_getPropertyName(which, U_LONG_PROPERTY_NAME);
b75a7d8f
A
2598
2599 if(which<UCHAR_INT_START) {
2600 result=u_hasBinaryProperty(c, which);
2601 if(result!=props[i][2]) {
729e4ab9
A
2602 log_data_err("error: u_hasBinaryProperty(U+%04lx, %s)=%d is wrong (props[%d]) - (Are you missing data?)\n",
2603 c, whichName, result, i);
b75a7d8f
A
2604 }
2605 }
2606
2607 result=u_getIntPropertyValue(c, which);
2608 if(result!=props[i][2]) {
729e4ab9
A
2609 log_data_err("error: u_getIntPropertyValue(U+%04lx, %s)=%d is wrong, should be %d (props[%d]) - (Are you missing data?)\n",
2610 c, whichName, result, props[i][2], i);
b75a7d8f
A
2611 }
2612
2613 /* test separate functions, too */
2614 switch((UProperty)props[i][1]) {
2615 case UCHAR_ALPHABETIC:
2616 if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) {
2617 log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d])\n",
2618 props[i][0], result, i);
2619 }
2620 break;
2621 case UCHAR_LOWERCASE:
2622 if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2623 log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\n",
2624 props[i][0], result, i);
2625 }
2626 break;
2627 case UCHAR_UPPERCASE:
2628 if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2629 log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\n",
2630 props[i][0], result, i);
2631 }
2632 break;
2633 case UCHAR_WHITE_SPACE:
2634 if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) {
2635 log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d])\n",
2636 props[i][0], result, i);
2637 }
2638 break;
2639 default:
2640 break;
2641 }
2642 }
2643}
2644
2645static void
2646TestNumericProperties(void) {
2647 /* see UnicodeData.txt, DerivedNumericValues.txt */
2648 static const struct {
2649 UChar32 c;
2650 int32_t type;
2651 double numValue;
2652 } values[]={
2653 { 0x0F33, U_NT_NUMERIC, -1./2. },
2654 { 0x0C66, U_NT_DECIMAL, 0 },
2655 { 0x96f6, U_NT_NUMERIC, 0 },
729e4ab9
A
2656 { 0xa833, U_NT_NUMERIC, 1./16. },
2657 { 0x2152, U_NT_NUMERIC, 1./10. },
2658 { 0x2151, U_NT_NUMERIC, 1./9. },
2659 { 0x1245f, U_NT_NUMERIC, 1./8. },
2660 { 0x2150, U_NT_NUMERIC, 1./7. },
b75a7d8f 2661 { 0x2159, U_NT_NUMERIC, 1./6. },
729e4ab9
A
2662 { 0x09f6, U_NT_NUMERIC, 3./16. },
2663 { 0x2155, U_NT_NUMERIC, 1./5. },
b75a7d8f
A
2664 { 0x00BD, U_NT_NUMERIC, 1./2. },
2665 { 0x0031, U_NT_DECIMAL, 1. },
2666 { 0x4e00, U_NT_NUMERIC, 1. },
2667 { 0x58f1, U_NT_NUMERIC, 1. },
2668 { 0x10320, U_NT_NUMERIC, 1. },
2669 { 0x0F2B, U_NT_NUMERIC, 3./2. },
2670 { 0x00B2, U_NT_DIGIT, 2. },
2671 { 0x5f10, U_NT_NUMERIC, 2. },
2672 { 0x1813, U_NT_DECIMAL, 3. },
2673 { 0x5f0e, U_NT_NUMERIC, 3. },
2674 { 0x2173, U_NT_NUMERIC, 4. },
2675 { 0x8086, U_NT_NUMERIC, 4. },
2676 { 0x278E, U_NT_DIGIT, 5. },
2677 { 0x1D7F2, U_NT_DECIMAL, 6. },
2678 { 0x247A, U_NT_DIGIT, 7. },
2679 { 0x7396, U_NT_NUMERIC, 9. },
2680 { 0x1372, U_NT_NUMERIC, 10. },
2681 { 0x216B, U_NT_NUMERIC, 12. },
2682 { 0x16EE, U_NT_NUMERIC, 17. },
2683 { 0x249A, U_NT_NUMERIC, 19. },
2684 { 0x303A, U_NT_NUMERIC, 30. },
2685 { 0x5345, U_NT_NUMERIC, 30. },
2686 { 0x32B2, U_NT_NUMERIC, 37. },
2687 { 0x1375, U_NT_NUMERIC, 40. },
2688 { 0x10323, U_NT_NUMERIC, 50. },
2689 { 0x0BF1, U_NT_NUMERIC, 100. },
2690 { 0x964c, U_NT_NUMERIC, 100. },
2691 { 0x217E, U_NT_NUMERIC, 500. },
2692 { 0x2180, U_NT_NUMERIC, 1000. },
2693 { 0x4edf, U_NT_NUMERIC, 1000. },
2694 { 0x2181, U_NT_NUMERIC, 5000. },
2695 { 0x137C, U_NT_NUMERIC, 10000. },
2696 { 0x4e07, U_NT_NUMERIC, 10000. },
2697 { 0x4ebf, U_NT_NUMERIC, 100000000. },
2698 { 0x5146, U_NT_NUMERIC, 1000000000000. },
729e4ab9 2699 { -1, U_NT_NONE, U_NO_NUMERIC_VALUE },
b75a7d8f
A
2700 { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },
2701 { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },
2702 { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },
2703 { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE },
2704 { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE },
729e4ab9
A
2705 { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE },
2706 { 0x110000, U_NT_NONE, U_NO_NUMERIC_VALUE }
b75a7d8f
A
2707 };
2708
2709 double nv;
2710 UChar32 c;
2711 int32_t i, type;
2712
2713 for(i=0; i<LENGTHOF(values); ++i) {
2714 c=values[i].c;
2715 type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE);
2716 nv=u_getNumericValue(c);
2717
2718 if(type!=values[i].type) {
2719 log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, values[i].type);
2720 }
2721 if(0.000001 <= fabs(nv - values[i].numValue)) {
2722 log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, values[i].numValue);
2723 }
2724 }
2725}
2726
2727/**
2728 * Test the property names and property value names API.
2729 */
2730static void
2731TestPropertyNames(void) {
2732 int32_t p, v, choice=0, rev;
2733 UBool atLeastSomething = FALSE;
2734
2735 for (p=0; ; ++p) {
46f4442e 2736 UProperty propEnum = (UProperty)p;
b75a7d8f
A
2737 UBool sawProp = FALSE;
2738 if(p > 10 && !atLeastSomething) {
2739 log_data_err("Never got anything after 10 tries.\nYour data is probably fried. Quitting this test\n", p, choice);
2740 return;
2741 }
2742
2743 for (choice=0; ; ++choice) {
46f4442e 2744 const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice)choice);
b75a7d8f 2745 if (name) {
46f4442e
A
2746 if (!sawProp)
2747 log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff);
b75a7d8f
A
2748 log_verbose("%d=\"%s\"", choice, name);
2749 sawProp = TRUE;
2750 atLeastSomething = TRUE;
2751
2752 /* test reverse mapping */
2753 rev = u_getPropertyEnum(name);
2754 if (rev != p) {
2755 log_err("Property round-trip failure: %d -> %s -> %d\n",
2756 p, name, rev);
2757 }
2758 }
2759 if (!name && choice>0) break;
2760 }
2761 if (sawProp) {
2762 /* looks like a valid property; check the values */
46f4442e 2763 const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
b75a7d8f
A
2764 int32_t max = 0;
2765 if (p == UCHAR_CANONICAL_COMBINING_CLASS) {
2766 max = 255;
2767 } else if (p == UCHAR_GENERAL_CATEGORY_MASK) {
2768 /* it's far too slow to iterate all the way up to
2769 the real max, U_GC_P_MASK */
2770 max = U_GC_NL_MASK;
2771 } else if (p == UCHAR_BLOCK) {
2772 /* UBlockCodes, unlike other values, start at 1 */
2773 max = 1;
2774 }
2775 log_verbose("\n");
2776 for (v=-1; ; ++v) {
2777 UBool sawValue = FALSE;
2778 for (choice=0; ; ++choice) {
46f4442e 2779 const char* vname = u_getPropertyValueName(propEnum, v, (UPropertyNameChoice)choice);
b75a7d8f
A
2780 if (vname) {
2781 if (!sawValue) log_verbose(" %s, value %d:", pname, v);
2782 log_verbose("%d=\"%s\"", choice, vname);
2783 sawValue = TRUE;
2784
2785 /* test reverse mapping */
46f4442e 2786 rev = u_getPropertyValueEnum(propEnum, vname);
b75a7d8f
A
2787 if (rev != v) {
2788 log_err("Value round-trip failure (%s): %d -> %s -> %d\n",
2789 pname, v, vname, rev);
2790 }
2791 }
2792 if (!vname && choice>0) break;
2793 }
2794 if (sawValue) {
2795 log_verbose("\n");
2796 }
2797 if (!sawValue && v>=max) break;
2798 }
2799 }
2800 if (!sawProp) {
2801 if (p>=UCHAR_STRING_LIMIT) {
2802 break;
2803 } else if (p>=UCHAR_DOUBLE_LIMIT) {
2804 p = UCHAR_STRING_START - 1;
2805 } else if (p>=UCHAR_MASK_LIMIT) {
2806 p = UCHAR_DOUBLE_START - 1;
2807 } else if (p>=UCHAR_INT_LIMIT) {
2808 p = UCHAR_MASK_START - 1;
2809 } else if (p>=UCHAR_BINARY_LIMIT) {
2810 p = UCHAR_INT_START - 1;
2811 }
2812 }
2813 }
2814}
2815
2816/**
2817 * Test the property values API. See JB#2410.
2818 */
2819static void
2820TestPropertyValues(void) {
2821 int32_t i, p, min, max;
2822 UErrorCode ec;
2823
2824 /* Min should be 0 for everything. */
2825 /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */
2826 for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) {
46f4442e
A
2827 UProperty propEnum = (UProperty)p;
2828 min = u_getIntPropertyMinValue(propEnum);
b75a7d8f
A
2829 if (min != 0) {
2830 if (p == UCHAR_BLOCK) {
2831 /* This is okay...for now. See JB#2487.
2832 TODO Update this for JB#2487. */
2833 } else {
2834 const char* name;
46f4442e
A
2835 name = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
2836 if (name == NULL)
2837 name = "<ERROR>";
b75a7d8f
A
2838 log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n",
2839 name, min);
2840 }
2841 }
2842 }
2843
2844 if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 ||
2845 u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) {
2846 log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK) is wrong\n");
2847 }
2848
2849 /* Max should be -1 for invalid properties. */
46f4442e 2850 max = u_getIntPropertyMaxValue(UCHAR_INVALID_CODE);
b75a7d8f
A
2851 if (max != -1) {
2852 log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n",
2853 max);
2854 }
2855
73c04bcf 2856 /* Script should return USCRIPT_INVALID_CODE for an invalid code point. */
b75a7d8f
A
2857 for (i=0; i<2; ++i) {
2858 int32_t script;
2859 const char* desc;
2860 ec = U_ZERO_ERROR;
2861 switch (i) {
2862 case 0:
2863 script = uscript_getScript(-1, &ec);
2864 desc = "uscript_getScript(-1)";
2865 break;
2866 case 1:
2867 script = u_getIntPropertyValue(-1, UCHAR_SCRIPT);
2868 desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)";
2869 break;
2870 default:
2871 log_err("Internal test error. Too many scripts\n");
2872 return;
2873 }
2874 /* We don't explicitly test ec. It should be U_FAILURE but it
2875 isn't documented as such. */
73c04bcf 2876 if (script != (int32_t)USCRIPT_INVALID_CODE) {
b75a7d8f
A
2877 log_err("FAIL: %s = %d, exp. 0\n",
2878 desc, script);
2879 }
2880 }
2881}
2882
b75a7d8f
A
2883/* various tests for consistency of UCD data and API behavior */
2884static void
2885TestConsistency() {
b75a7d8f
A
2886 char buffer[300];
2887 USet *set1, *set2, *set3, *set4;
2888 UErrorCode errorCode;
2889
b75a7d8f
A
2890 UChar32 start, end;
2891 int32_t i, length;
2892
2893 U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10);
2894 U_STRING_DECL(dashPattern, "[:Dash:]", 8);
2895 U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13);
2896 U_STRING_DECL(formatPattern, "[:Cf:]", 6);
2897 U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14);
2898
73c04bcf
A
2899 U_STRING_DECL(mathBlocksPattern,
2900 "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
2901 1+32+46+46+45+43+1+1); /* +1 for NUL */
2902 U_STRING_DECL(mathPattern, "[:Math:]", 8);
2903 U_STRING_DECL(unassignedPattern, "[:Cn:]", 6);
2904 U_STRING_DECL(unknownPattern, "[:sc=Unknown:]", 14);
2905 U_STRING_DECL(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
2906
b75a7d8f
A
2907 U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10);
2908 U_STRING_INIT(dashPattern, "[:Dash:]", 8);
2909 U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13);
2910 U_STRING_INIT(formatPattern, "[:Cf:]", 6);
2911 U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14);
2912
73c04bcf
A
2913 U_STRING_INIT(mathBlocksPattern,
2914 "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
2915 1+32+46+46+45+43+1+1); /* +1 for NUL */
2916 U_STRING_INIT(mathPattern, "[:Math:]", 8);
2917 U_STRING_INIT(unassignedPattern, "[:Cn:]", 6);
2918 U_STRING_INIT(unknownPattern, "[:sc=Unknown:]", 14);
2919 U_STRING_INIT(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
2920
b75a7d8f
A
2921 /*
2922 * It used to be that UCD.html and its precursors said
2923 * "Those dashes used to mark connections between pieces of words,
2924 * plus the Katakana middle dot."
2925 *
2926 * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
2927 * but not from Hyphen.
729e4ab9 2928 * UTC 94 (2003mar) decided to leave it that way and to change UCD.html.
b75a7d8f
A
2929 * Therefore, do not show errors when testing the Hyphen property.
2930 */
2931 log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
2932 "known to the UTC and not considered errors.\n");
2933
2934 errorCode=U_ZERO_ERROR;
2935 set1=uset_openPattern(hyphenPattern, 10, &errorCode);
2936 set2=uset_openPattern(dashPattern, 8, &errorCode);
2937 if(U_SUCCESS(errorCode)) {
2938 /* remove the Katakana middle dot(s) from set1 */
2939 uset_remove(set1, 0x30fb);
2940 uset_remove(set1, 0xff65); /* halfwidth variant */
2941 showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", FALSE);
2942 } else {
729e4ab9 2943 log_data_err("error opening [:Hyphen:] or [:Dash:] - %s (Are you missing data?)\n", u_errorName(errorCode));
b75a7d8f
A
2944 }
2945
2946 /* check that Cf is neither Hyphen nor Dash nor Alphabetic */
2947 set3=uset_openPattern(formatPattern, 6, &errorCode);
2948 set4=uset_openPattern(alphaPattern, 14, &errorCode);
2949 if(U_SUCCESS(errorCode)) {
2950 showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", FALSE);
2951 showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", TRUE);
2952 showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", TRUE);
2953 } else {
729e4ab9 2954 log_data_err("error opening [:Cf:] or [:Alpbabetic:] - %s (Are you missing data?)\n", u_errorName(errorCode));
b75a7d8f
A
2955 }
2956
2957 uset_close(set1);
2958 uset_close(set2);
2959 uset_close(set3);
2960 uset_close(set4);
2961
2962 /*
2963 * Check that each lowercase character has "small" in its name
2964 * and not "capital".
2965 * There are some such characters, some of which seem odd.
2966 * Use the verbose flag to see these notices.
2967 */
2968 errorCode=U_ZERO_ERROR;
2969 set1=uset_openPattern(lowerPattern, 13, &errorCode);
2970 if(U_SUCCESS(errorCode)) {
2971 for(i=0;; ++i) {
2972 length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode);
2973 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2974 break; /* done */
2975 }
2976 if(U_FAILURE(errorCode)) {
2977 log_err("error iterating over [:Lowercase:] at item %d: %s\n",
2978 i, u_errorName(errorCode));
2979 break;
2980 }
2981 if(length!=0) {
2982 break; /* done with code points, got a string or -1 */
2983 }
2984
2985 while(start<=end) {
2986 length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);
2987 if(U_FAILURE(errorCode)) {
2988 log_err("error getting the name of U+%04x - %s\n", start, u_errorName(errorCode));
2989 errorCode=U_ZERO_ERROR;
2990 continue;
2991 }
2992 if( (strstr(buffer, "SMALL")==NULL || strstr(buffer, "CAPITAL")!=NULL) &&
2993 strstr(buffer, "SMALL CAPITAL")==NULL
2994 ) {
2995 log_verbose("info: [:Lowercase:] contains U+%04x whose name does not suggest lowercase: %s\n", start, buffer);
2996 }
2997 ++start;
2998 }
2999 }
3000 } else {
729e4ab9 3001 log_data_err("error opening [:Lowercase:] - %s (Are you missing data?)\n", u_errorName(errorCode));
b75a7d8f 3002 }
b75a7d8f 3003 uset_close(set1);
73c04bcf
A
3004
3005 /* verify that all assigned characters in Math blocks are exactly Math characters */
3006 errorCode=U_ZERO_ERROR;
3007 set1=uset_openPattern(mathBlocksPattern, -1, &errorCode);
3008 set2=uset_openPattern(mathPattern, 8, &errorCode);
3009 set3=uset_openPattern(unassignedPattern, 6, &errorCode);
3010 if(U_SUCCESS(errorCode)) {
3011 uset_retainAll(set2, set1); /* [math blocks]&[:Math:] */
3012 uset_complement(set3); /* assigned characters */
3013 uset_retainAll(set1, set3); /* [math blocks]&[assigned] */
3014 compareUSets(set1, set2,
3015 "[assigned Math block chars]", "[math blocks]&[:Math:]",
3016 TRUE);
3017 } else {
729e4ab9 3018 log_data_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s (Are you missing data?)\n", u_errorName(errorCode));
73c04bcf
A
3019 }
3020 uset_close(set1);
3021 uset_close(set2);
3022 uset_close(set3);
3023
3024 /* new in Unicode 5.0: exactly all unassigned+PUA+surrogate code points have script=Unknown */
3025 errorCode=U_ZERO_ERROR;
3026 set1=uset_openPattern(unknownPattern, 14, &errorCode);
3027 set2=uset_openPattern(reservedPattern, 20, &errorCode);
3028 if(U_SUCCESS(errorCode)) {
3029 compareUSets(set1, set2,
3030 "[:sc=Unknown:]", "[[:Cn:][:Co:][:Cs:]]",
3031 TRUE);
3032 } else {
729e4ab9 3033 log_data_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s (Are you missing data?)\n", u_errorName(errorCode));
73c04bcf
A
3034 }
3035 uset_close(set1);
3036 uset_close(set2);
b75a7d8f 3037}
374ca955 3038
73c04bcf
A
3039/*
3040 * Starting with ICU4C 3.4, the core Unicode properties files
3041 * (uprops.icu, ucase.icu, ubidi.icu, unorm.icu)
3042 * are hardcoded in the common DLL and therefore not included
3043 * in the data package any more.
3044 * Test requiring these files are disabled so that
3045 * we need not jump through hoops (like adding snapshots of these files
3046 * to testdata).
3047 * See Jitterbug 4497.
3048 */
3049#define HARDCODED_DATA_4497 1
3050
374ca955
A
3051/* API coverage for ucase.c */
3052static void TestUCase() {
73c04bcf 3053#if !HARDCODED_DATA_4497
374ca955
A
3054 UDataMemory *pData;
3055 UCaseProps *csp;
73c04bcf 3056 const UCaseProps *ccsp;
374ca955
A
3057 UErrorCode errorCode;
3058
3059 /* coverage for ucase_openBinary() */
3060 errorCode=U_ZERO_ERROR;
3061 pData=udata_open(NULL, UCASE_DATA_TYPE, UCASE_DATA_NAME, &errorCode);
3062 if(U_FAILURE(errorCode)) {
3063 log_data_err("unable to open " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
3064 u_errorName(errorCode));
3065 return;
3066 }
3067
3068 csp=ucase_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
3069 if(U_FAILURE(errorCode)) {
3070 log_err("ucase_openBinary() fails for the contents of " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
3071 u_errorName(errorCode));
3072 udata_close(pData);
3073 return;
3074 }
3075
3076 if(UCASE_LOWER!=ucase_getType(csp, 0xdf)) { /* verify islower(sharp s) */
3077 log_err("ucase_openBinary() does not seem to return working UCaseProps\n");
3078 }
3079
3080 ucase_close(csp);
3081 udata_close(pData);
73c04bcf
A
3082
3083 /* coverage for ucase_getDummy() */
3084 errorCode=U_ZERO_ERROR;
3085 ccsp=ucase_getDummy(&errorCode);
3086 if(ucase_tolower(ccsp, 0x41)!=0x41) {
3087 log_err("ucase_tolower(dummy, A)!=A\n");
3088 }
46f4442e 3089#endif
73c04bcf
A
3090}
3091
3092/* API coverage for ubidi_props.c */
3093static void TestUBiDiProps() {
3094#if !HARDCODED_DATA_4497
3095 UDataMemory *pData;
3096 UBiDiProps *bdp;
73c04bcf
A
3097 const UBiDiProps *cbdp;
3098 UErrorCode errorCode;
3099
73c04bcf
A
3100 /* coverage for ubidi_openBinary() */
3101 errorCode=U_ZERO_ERROR;
3102 pData=udata_open(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &errorCode);
3103 if(U_FAILURE(errorCode)) {
3104 log_data_err("unable to open " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
3105 u_errorName(errorCode));
3106 return;
3107 }
3108
3109 bdp=ubidi_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
3110 if(U_FAILURE(errorCode)) {
3111 log_err("ubidi_openBinary() fails for the contents of " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
3112 u_errorName(errorCode));
3113 udata_close(pData);
3114 return;
3115 }
3116
3117 if(0x2215!=ubidi_getMirror(bdp, 0x29F5)) { /* verify some data */
3118 log_err("ubidi_openBinary() does not seem to return working UBiDiProps\n");
3119 }
3120
3121 ubidi_closeProps(bdp);
3122 udata_close(pData);
73c04bcf
A
3123
3124 /* coverage for ubidi_getDummy() */
3125 errorCode=U_ZERO_ERROR;
3126 cbdp=ubidi_getDummy(&errorCode);
3127 if(ubidi_getClass(cbdp, 0x20)!=0) {
3128 log_err("ubidi_getClass(dummy, space)!=0\n");
3129 }
46f4442e 3130#endif
73c04bcf
A
3131}
3132
3133/* test case folding, compare return values with CaseFolding.txt ------------ */
3134
3135/* bit set for which case foldings for a character have been tested already */
3136enum {
3137 CF_SIMPLE=1,
3138 CF_FULL=2,
3139 CF_TURKIC=4,
3140 CF_ALL=7
3141};
3142
3143static void
3144testFold(UChar32 c, int which,
3145 UChar32 simple, UChar32 turkic,
3146 const UChar *full, int32_t fullLength,
3147 const UChar *turkicFull, int32_t turkicFullLength) {
3148 UChar s[2], t[32];
3149 UChar32 c2;
3150 int32_t length, length2;
3151
3152 UErrorCode errorCode=U_ZERO_ERROR;
3153
3154 length=0;
3155 U16_APPEND_UNSAFE(s, length, c);
3156
3157 if((which&CF_SIMPLE)!=0 && (c2=u_foldCase(c, 0))!=simple) {
3158 log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3159 }
3160 if((which&CF_FULL)!=0) {
3161 length2=u_strFoldCase(t, LENGTHOF(t), s, length, 0, &errorCode);
3162 if(length2!=fullLength || 0!=u_memcmp(t, full, fullLength)) {
3163 log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n", (long)c);
3164 }
3165 }
3166 if((which&CF_TURKIC)!=0) {
3167 if((c2=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I))!=turkic) {
3168 log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3169 }
3170
3171 length2=u_strFoldCase(t, LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
3172 if(length2!=turkicFullLength || 0!=u_memcmp(t, turkicFull, length2)) {
3173 log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", (long)c);
3174 }
3175 }
3176}
3177
3178/* test that c case-folds to itself */
3179static void
3180testFoldToSelf(UChar32 c, int which) {
3181 UChar s[2];
3182 int32_t length;
3183
3184 length=0;
3185 U16_APPEND_UNSAFE(s, length, c);
3186 testFold(c, which, c, c, s, length, s, length);
3187}
3188
3189struct CaseFoldingData {
3190 USet *notSeen;
3191 UChar32 prev, prevSimple;
3192 UChar prevFull[32];
3193 int32_t prevFullLength;
3194 int which;
3195};
3196typedef struct CaseFoldingData CaseFoldingData;
3197
3198static void U_CALLCONV
3199caseFoldingLineFn(void *context,
3200 char *fields[][2], int32_t fieldCount,
3201 UErrorCode *pErrorCode) {
3202 CaseFoldingData *pData=(CaseFoldingData *)context;
3203 char *end;
3204 UChar full[32];
3205 UChar32 c, prev, simple;
3206 int32_t count;
3207 int which;
3208 char status;
3209
3210 /* get code point */
3211 c=(UChar32)strtoul(u_skipWhitespace(fields[0][0]), &end, 16);
3212 end=(char *)u_skipWhitespace(end);
3213 if(end<=fields[0][0] || end!=fields[0][1]) {
3214 log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]);
3215 *pErrorCode=U_PARSE_ERROR;
3216 return;
3217 }
3218
3219 /* get the status of this mapping */
3220 status=*u_skipWhitespace(fields[1][0]);
3221 if(status!='C' && status!='S' && status!='F' && status!='T') {
3222 log_err("unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]);
3223 *pErrorCode=U_PARSE_ERROR;
3224 return;
3225 }
3226
3227 /* get the mapping */
3228 count=u_parseString(fields[2][0], full, 32, (uint32_t *)&simple, pErrorCode);
3229 if(U_FAILURE(*pErrorCode)) {
3230 log_err("error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);
3231 return;
3232 }
3233
3234 /* there is a simple mapping only if there is exactly one code point (count is in UChars) */
3235 if(count==0 || count>2 || (count==2 && U16_IS_SINGLE(full[1]))) {
3236 simple=c;
3237 }
3238
3239 if(c!=(prev=pData->prev)) {
3240 /*
3241 * Test remaining mappings for the previous code point.
3242 * If a turkic folding was not mentioned, then it should fold the same
3243 * as the regular simple case folding.
3244 */
3245 UChar s[2];
3246 int32_t length;
3247
3248 length=0;
3249 U16_APPEND_UNSAFE(s, length, prev);
3250 testFold(prev, (~pData->which)&CF_ALL,
3251 prev, pData->prevSimple,
3252 s, length,
3253 pData->prevFull, pData->prevFullLength);
3254 pData->prev=pData->prevSimple=c;
3255 length=0;
3256 U16_APPEND_UNSAFE(pData->prevFull, length, c);
3257 pData->prevFullLength=length;
3258 pData->which=0;
3259 }
3260
3261 /*
3262 * Turn the status into a bit set of case foldings to test.
3263 * Remember non-Turkic case foldings as defaults for Turkic mode.
3264 */
3265 switch(status) {
3266 case 'C':
3267 which=CF_SIMPLE|CF_FULL;
3268 pData->prevSimple=simple;
3269 u_memcpy(pData->prevFull, full, count);
3270 pData->prevFullLength=count;
3271 break;
3272 case 'S':
3273 which=CF_SIMPLE;
3274 pData->prevSimple=simple;
3275 break;
3276 case 'F':
3277 which=CF_FULL;
3278 u_memcpy(pData->prevFull, full, count);
3279 pData->prevFullLength=count;
3280 break;
3281 case 'T':
3282 which=CF_TURKIC;
3283 break;
3284 default:
3285 which=0;
3286 break; /* won't happen because of test above */
3287 }
3288
3289 testFold(c, which, simple, simple, full, count, full, count);
3290
3291 /* remember which case foldings of c have been tested */
3292 pData->which|=which;
3293
3294 /* remove c from the set of ones not mentioned in CaseFolding.txt */
3295 uset_remove(pData->notSeen, c);
3296}
3297
3298static void
3299TestCaseFolding() {
3300 CaseFoldingData data={ NULL };
3301 char *fields[3][2];
3302 UErrorCode errorCode;
3303
3304 static char *lastLine= (char *)"10FFFF; C; 10FFFF;";
3305
3306 errorCode=U_ZERO_ERROR;
3307 /* test BMP & plane 1 - nothing interesting above */
3308 data.notSeen=uset_open(0, 0x1ffff);
3309 data.prevFullLength=1; /* length of full case folding of U+0000 */
3310
3311 parseUCDFile("CaseFolding.txt", fields, 3, caseFoldingLineFn, &data, &errorCode);
3312 if(U_SUCCESS(errorCode)) {
3313 int32_t i, start, end;
3314
3315 /* add a pseudo-last line to finish testing of the actual last one */
3316 fields[0][0]=lastLine;
3317 fields[0][1]=lastLine+6;
3318 fields[1][0]=lastLine+7;
3319 fields[1][1]=lastLine+9;
3320 fields[2][0]=lastLine+10;
3321 fields[2][1]=lastLine+17;
3322 caseFoldingLineFn(&data, fields, 3, &errorCode);
3323
3324 /* verify that all code points that are not mentioned in CaseFolding.txt fold to themselves */
3325 for(i=0;
3326 0==uset_getItem(data.notSeen, i, &start, &end, NULL, 0, &errorCode) &&
3327 U_SUCCESS(errorCode);
3328 ++i
3329 ) {
3330 do {
3331 testFoldToSelf(start, CF_ALL);
3332 } while(++start<=end);
3333 }
3334 }
3335
3336 uset_close(data.notSeen);
374ca955 3337}