]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/cucdtst.c
ICU-531.48.tar.gz
[apple/icu.git] / icuSources / test / cintltst / cucdtst.c
CommitLineData
b75a7d8f
A
1/********************************************************************
2 * COPYRIGHT:
57a6839d 3 * Copyright (c) 1997-2014, International Business Machines Corporation and
b75a7d8f
A
4 * others. All Rights Reserved.
5 ********************************************************************/
46f4442e 6/*******************************************************************************
b75a7d8f
A
7*
8* File CUCDTST.C
9*
10* Modification History:
11* Name Description
12* Madhu Katragadda Ported for C API, added tests for string functions
46f4442e 13********************************************************************************
b75a7d8f
A
14*/
15
16#include <string.h>
17#include <math.h>
18#include <stdlib.h>
19
20#include "unicode/utypes.h"
21#include "unicode/uchar.h"
22#include "unicode/putil.h"
23#include "unicode/ustring.h"
24#include "unicode/uloc.h"
729e4ab9 25#include "unicode/unorm2.h"
b75a7d8f
A
26
27#include "cintltst.h"
374ca955 28#include "putilimp.h"
b75a7d8f 29#include "uparse.h"
374ca955 30#include "ucase.h"
73c04bcf 31#include "ubidi_props.h"
b75a7d8f 32#include "uprops.h"
374ca955 33#include "uset_imp.h"
b75a7d8f 34#include "usc_impl.h"
374ca955
A
35#include "udatamem.h" /* for testing ucase_openBinary() */
36#include "cucdapi.h"
b75a7d8f 37
374ca955 38#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
b75a7d8f
A
39
40/* prototypes --------------------------------------------------------------- */
41
42static void TestUpperLower(void);
43static void TestLetterNumber(void);
44static void TestMisc(void);
45static void TestPOSIX(void);
46static void TestControlPrint(void);
47static void TestIdentifier(void);
48static void TestUnicodeData(void);
49static void TestCodeUnit(void);
50static void TestCodePoint(void);
51static void TestCharLength(void);
52static void TestCharNames(void);
53static void TestMirroring(void);
b75a7d8f
A
54static void TestUScriptRunAPI(void);
55static void TestAdditionalProperties(void);
56static void TestNumericProperties(void);
57static void TestPropertyNames(void);
58static void TestPropertyValues(void);
59static void TestConsistency(void);
374ca955 60static void TestUCase(void);
73c04bcf
A
61static void TestUBiDiProps(void);
62static void TestCaseFolding(void);
b75a7d8f
A
63
64/* internal methods used */
65static int32_t MakeProp(char* str);
66static int32_t MakeDir(char* str);
67
73c04bcf
A
68/* helpers ------------------------------------------------------------------ */
69
70static void
71parseUCDFile(const char *filename,
72 char *fields[][2], int32_t fieldCount,
73 UParseLineFn *lineFn, void *context,
74 UErrorCode *pErrorCode) {
75 char path[256];
76 char backupPath[256];
77
78 if(U_FAILURE(*pErrorCode)) {
79 return;
80 }
81
82 /* Look inside ICU_DATA first */
83 strcpy(path, u_getDataDirectory());
84 strcat(path, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING);
85 strcat(path, filename);
86
87 /* As a fallback, try to guess where the source data was located
88 * at the time ICU was built, and look there.
89 */
90 strcpy(backupPath, ctest_dataSrcDir());
91 strcat(backupPath, U_FILE_SEP_STRING);
92 strcat(backupPath, "unidata" U_FILE_SEP_STRING);
93 strcat(backupPath, filename);
94
95 u_parseDelimitedFile(path, ';', fields, fieldCount, lineFn, context, pErrorCode);
96 if(*pErrorCode==U_FILE_ACCESS_ERROR) {
97 *pErrorCode=U_ZERO_ERROR;
98 u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, context, pErrorCode);
99 }
100 if(U_FAILURE(*pErrorCode)) {
729e4ab9 101 log_err_status(*pErrorCode, "error parsing %s: %s\n", filename, u_errorName(*pErrorCode));
73c04bcf
A
102 }
103}
104
b75a7d8f
A
105/* test data ---------------------------------------------------------------- */
106
b75a7d8f
A
107static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
108static const int32_t tagValues[] =
109 {
110 /* Mn */ U_NON_SPACING_MARK,
111 /* Mc */ U_COMBINING_SPACING_MARK,
112 /* Me */ U_ENCLOSING_MARK,
113 /* Nd */ U_DECIMAL_DIGIT_NUMBER,
114 /* Nl */ U_LETTER_NUMBER,
115 /* No */ U_OTHER_NUMBER,
116 /* Zs */ U_SPACE_SEPARATOR,
117 /* Zl */ U_LINE_SEPARATOR,
118 /* Zp */ U_PARAGRAPH_SEPARATOR,
119 /* Cc */ U_CONTROL_CHAR,
120 /* Cf */ U_FORMAT_CHAR,
121 /* Cs */ U_SURROGATE,
122 /* Co */ U_PRIVATE_USE_CHAR,
123 /* Cn */ U_UNASSIGNED,
124 /* Lu */ U_UPPERCASE_LETTER,
125 /* Ll */ U_LOWERCASE_LETTER,
126 /* Lt */ U_TITLECASE_LETTER,
127 /* Lm */ U_MODIFIER_LETTER,
128 /* Lo */ U_OTHER_LETTER,
129 /* Pc */ U_CONNECTOR_PUNCTUATION,
130 /* Pd */ U_DASH_PUNCTUATION,
131 /* Ps */ U_START_PUNCTUATION,
132 /* Pe */ U_END_PUNCTUATION,
133 /* Po */ U_OTHER_PUNCTUATION,
134 /* Sm */ U_MATH_SYMBOL,
135 /* Sc */ U_CURRENCY_SYMBOL,
136 /* Sk */ U_MODIFIER_SYMBOL,
137 /* So */ U_OTHER_SYMBOL,
138 /* Pi */ U_INITIAL_PUNCTUATION,
139 /* Pf */ U_FINAL_PUNCTUATION
140 };
141
142static const char dirStrings[][5] = {
143 "L",
144 "R",
145 "EN",
146 "ES",
147 "ET",
148 "AN",
149 "CS",
150 "B",
151 "S",
152 "WS",
153 "ON",
154 "LRE",
155 "LRO",
156 "AL",
157 "RLE",
158 "RLO",
159 "PDF",
160 "NSM",
57a6839d
A
161 "BN",
162 /* new in Unicode 6.3/ICU 52 */
163 "FSI",
164 "LRI",
165 "RLI",
166 "PDI"
b75a7d8f
A
167};
168
169void addUnicodeTest(TestNode** root);
170
171void addUnicodeTest(TestNode** root)
172{
b75a7d8f
A
173 addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit");
174 addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint");
175 addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength");
46f4442e
A
176 addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues");
177 addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData");
b75a7d8f
A
178 addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalProperties");
179 addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties");
180 addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower");
181 addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber");
182 addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc");
183 addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX");
184 addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint");
185 addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier");
186 addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames");
187 addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring");
188 addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");
729e4ab9
A
189 addTest(root, &TestHasScript, "tsutil/cucdtst/TestHasScript");
190 addTest(root, &TestGetScriptExtensions, "tsutil/cucdtst/TestGetScriptExtensions");
51004dcb 191 addTest(root, &TestScriptMetadataAPI, "tsutil/cucdtst/TestScriptMetadataAPI");
b75a7d8f
A
192 addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");
193 addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
194 addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
195 addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");
374ca955 196 addTest(root, &TestUCase, "tsutil/cucdtst/TestUCase");
73c04bcf
A
197 addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps");
198 addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding");
b75a7d8f
A
199}
200
201/*==================================================== */
202/* test u_toupper() and u_tolower() */
203/*==================================================== */
204static void TestUpperLower()
205{
206 const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0x01c9, 0x000c, 0x0000};
207 const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01c9, 0x000c, 0x0000};
208 U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21);
209 U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
210 int32_t i;
211
212 U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21);
213 U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
214
215/*
216Checks LetterLike Symbols which were previously a source of confusion
217[Bertrand A. D. 02/04/98]
218*/
219 for (i=0x2100;i<0x2138;i++)
220 {
73c04bcf
A
221 /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */
222 if(i!=0x2126 && i!=0x212a && i!=0x212b && i!=0x2132)
b75a7d8f
A
223 {
224 if (i != (int)u_tolower(i)) /* itself */
225 log_err("Failed case conversion with itself: U+%04x\n", i);
226 if (i != (int)u_toupper(i))
227 log_err("Failed case conversion with itself: U+%04x\n", i);
228 }
229 }
230
231 for(i=0; i < u_strlen(upper); i++){
232 if(u_tolower(upper[i]) != lower[i]){
233 log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i], lower[i], u_tolower(upper[i]));
234 }
235 }
236
237 log_verbose("testing upper lower\n");
238 for (i = 0; i < 21; i++) {
239
240 if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i]))
241 {
242 log_err("Failed isLowerCase test at %c\n", upperTest[i]);
243 }
244 else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i]))
245 {
246 log_err("Failed isUpperCase test at %c\n", lowerTest[i]);
247 }
248 else if (upperTest[i] != u_tolower(lowerTest[i]))
249 {
250 log_err("Failed case conversion from %c To %c :\n", lowerTest[i], upperTest[i]);
251 }
252 else if (lowerTest[i] != u_toupper(upperTest[i]))
253 {
254 log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerTest[i]);
255 }
256 else if (upperTest[i] != u_tolower(upperTest[i]))
257 {
258 log_err("Failed case conversion with itself: %c\n", upperTest[i]);
259 }
260 else if (lowerTest[i] != u_toupper(lowerTest[i]))
261 {
262 log_err("Failed case conversion with itself: %c\n", lowerTest[i]);
263 }
264 }
265 log_verbose("done testing upper lower\n");
266
267 log_verbose("testing u_istitle\n");
268 {
269 static const UChar expected[] = {
270 0x1F88,
271 0x1F89,
272 0x1F8A,
273 0x1F8B,
274 0x1F8C,
275 0x1F8D,
276 0x1F8E,
277 0x1F8F,
278 0x1F88,
279 0x1F89,
280 0x1F8A,
281 0x1F8B,
282 0x1F8C,
283 0x1F8D,
284 0x1F8E,
285 0x1F8F,
286 0x1F98,
287 0x1F99,
288 0x1F9A,
289 0x1F9B,
290 0x1F9C,
291 0x1F9D,
292 0x1F9E,
293 0x1F9F,
294 0x1F98,
295 0x1F99,
296 0x1F9A,
297 0x1F9B,
298 0x1F9C,
299 0x1F9D,
300 0x1F9E,
301 0x1F9F,
302 0x1FA8,
303 0x1FA9,
304 0x1FAA,
305 0x1FAB,
306 0x1FAC,
307 0x1FAD,
308 0x1FAE,
309 0x1FAF,
310 0x1FA8,
311 0x1FA9,
312 0x1FAA,
313 0x1FAB,
314 0x1FAC,
315 0x1FAD,
316 0x1FAE,
317 0x1FAF,
318 0x1FBC,
319 0x1FBC,
320 0x1FCC,
321 0x1FCC,
322 0x1FFC,
323 0x1FFC,
324 };
325 int32_t num = sizeof(expected)/sizeof(expected[0]);
326 for(i=0; i<num; i++){
327 if(!u_istitle(expected[i])){
328 log_err("u_istitle failed for 0x%4X. Expected TRUE, got FALSE\n",expected[i]);
329 }
330 }
331
332 }
333}
334
73c04bcf 335/* compare two sets and verify that their difference or intersection is empty */
b75a7d8f
A
336static UBool
337showADiffB(const USet *a, const USet *b,
338 const char *a_name, const char *b_name,
339 UBool expect, UBool diffIsError) {
73c04bcf 340 USet *aa;
b75a7d8f 341 int32_t i, start, end, length;
b75a7d8f
A
342 UErrorCode errorCode;
343
73c04bcf
A
344 /*
345 * expect:
346 * TRUE -> a-b should be empty, that is, b should contain all of a
347 * FALSE -> a&b should be empty, that is, a should contain none of b (and vice versa)
348 */
349 if(expect ? uset_containsAll(b, a) : uset_containsNone(a, b)) {
350 return TRUE;
351 }
352
353 /* clone a to aa because a is const */
354 aa=uset_open(1, 0);
355 if(aa==NULL) {
356 /* unusual problem - out of memory? */
357 return FALSE;
358 }
359 uset_addAll(aa, a);
360
361 /* compute the set in question */
362 if(expect) {
363 /* a-b */
364 uset_removeAll(aa, b);
365 } else {
366 /* a&b */
367 uset_retainAll(aa, b);
368 }
369
370 /* aa is not empty because of the initial tests above; show its contents */
b75a7d8f 371 errorCode=U_ZERO_ERROR;
b75a7d8f
A
372 i=0;
373 for(;;) {
73c04bcf 374 length=uset_getItem(aa, i, &start, &end, NULL, 0, &errorCode);
b75a7d8f 375 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
73c04bcf 376 break; /* done */
b75a7d8f
A
377 }
378 if(U_FAILURE(errorCode)) {
73c04bcf 379 log_err("error comparing %s with %s at difference item %d: %s\n",
b75a7d8f 380 a_name, b_name, i, u_errorName(errorCode));
73c04bcf 381 break;
b75a7d8f
A
382 }
383 if(length!=0) {
73c04bcf 384 break; /* done with code points, got a string or -1 */
b75a7d8f
A
385 }
386
73c04bcf
A
387 if(diffIsError) {
388 if(expect) {
389 log_err("error: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
390 } else {
391 log_err("error: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
392 }
393 } else {
394 if(expect) {
395 log_verbose("info: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
396 } else {
397 log_verbose("info: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
b75a7d8f
A
398 }
399 }
400
401 ++i;
402 }
73c04bcf
A
403
404 uset_close(aa);
405 return FALSE;
b75a7d8f
A
406}
407
408static UBool
409showAMinusB(const USet *a, const USet *b,
410 const char *a_name, const char *b_name,
411 UBool diffIsError) {
412 return showADiffB(a, b, a_name, b_name, TRUE, diffIsError);
413}
414
415static UBool
416showAIntersectB(const USet *a, const USet *b,
417 const char *a_name, const char *b_name,
418 UBool diffIsError) {
419 return showADiffB(a, b, a_name, b_name, FALSE, diffIsError);
420}
421
422static UBool
423compareUSets(const USet *a, const USet *b,
424 const char *a_name, const char *b_name,
425 UBool diffIsError) {
73c04bcf
A
426 /*
427 * Use an arithmetic & not a logical && so that both branches
428 * are always taken and all differences are shown.
429 */
b75a7d8f 430 return
73c04bcf 431 showAMinusB(a, b, a_name, b_name, diffIsError) &
b75a7d8f
A
432 showAMinusB(b, a, b_name, a_name, diffIsError);
433}
434
435/* test isLetter(u_isapha()) and isDigit(u_isdigit()) */
436static void TestLetterNumber()
437{
438 UChar i = 0x0000;
439
440 log_verbose("Testing for isalpha\n");
441 for (i = 0x0041; i < 0x005B; i++) {
442 if (!u_isalpha(i))
443 {
444 log_err("Failed isLetter test at %.4X\n", i);
445 }
446 }
447 for (i = 0x0660; i < 0x066A; i++) {
448 if (u_isalpha(i))
449 {
450 log_err("Failed isLetter test with numbers at %.4X\n", i);
451 }
452 }
453
454 log_verbose("Testing for isdigit\n");
455 for (i = 0x0660; i < 0x066A; i++) {
456 if (!u_isdigit(i))
457 {
458 log_verbose("Failed isNumber test at %.4X\n", i);
459 }
460 }
461
462 log_verbose("Testing for isalnum\n");
463 for (i = 0x0041; i < 0x005B; i++) {
464 if (!u_isalnum(i))
465 {
466 log_err("Failed isAlNum test at %.4X\n", i);
467 }
468 }
469 for (i = 0x0660; i < 0x066A; i++) {
470 if (!u_isalnum(i))
471 {
472 log_err("Failed isAlNum test at %.4X\n", i);
473 }
474 }
475
476 {
477 /*
478 * The following checks work only starting from Unicode 4.0.
479 * Check the version number here.
480 */
374ca955 481 static UVersionInfo u401={ 4, 0, 1, 0 };
b75a7d8f
A
482 UVersionInfo version;
483 u_getUnicodeVersion(version);
374ca955 484 if(version[0]<4 || 0==memcmp(version, u401, 4)) {
b75a7d8f
A
485 return;
486 }
487 }
488
489 {
490 /*
491 * Sanity check:
492 * Verify that exactly the digit characters have decimal digit values.
493 * This assumption is used in the implementation of u_digit()
494 * (which checks nt=de)
495 * compared with the parallel java.lang.Character.digit()
496 * (which checks Nd).
497 *
498 * This was not true in Unicode 3.2 and earlier.
374ca955
A
499 * Unicode 4.0 fixed discrepancies.
500 * Unicode 4.0.1 re-introduced problems in this area due to an
501 * unintentionally incomplete last-minute change.
b75a7d8f
A
502 */
503 U_STRING_DECL(digitsPattern, "[:Nd:]", 6);
504 U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
505
506 USet *digits, *decimalValues;
507 UErrorCode errorCode;
508
509 U_STRING_INIT(digitsPattern, "[:Nd:]", 6);
510 U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
511 errorCode=U_ZERO_ERROR;
512 digits=uset_openPattern(digitsPattern, 6, &errorCode);
513 decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode);
514
515 if(U_SUCCESS(errorCode)) {
516 compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", TRUE);
517 }
518
519 uset_close(digits);
520 uset_close(decimalValues);
521 }
522}
523
729e4ab9
A
524static void testSampleCharProps(UBool propFn(UChar32), const char *propName,
525 const UChar32 *sampleChars, int32_t sampleCharsLength,
526 UBool expected) {
527 int32_t i;
528 for (i = 0; i < sampleCharsLength; ++i) {
529 UBool result = propFn(sampleChars[i]);
530 if (result != expected) {
531 log_err("error: character property function %s(U+%04x)=%d is wrong\n",
532 propName, sampleChars[i], result);
533 }
534 }
535}
536
b75a7d8f
A
537/* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_isspace()), isWhiteSpace(), u_CharDigitValue() */
538static void TestMisc()
539{
729e4ab9
A
540 static const UChar32 sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
541 static const UChar32 sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
542 static const UChar32 sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6e};
543 static const UChar32 sampleDefined[] = {0x523E, 0x4f88, 0xfffd};
544 static const UChar32 sampleBase[] = {0x0061, 0x0031, 0x03d2};
545 static const UChar32 sampleNonBase[] = {0x002B, 0x0020, 0x203B};
b75a7d8f 546/* static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8, 0xFFF0};*/
729e4ab9
A
547 static const UChar32 sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5};
548 static const UChar32 sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};
549 static const UChar32 sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};
550 static const UChar32 sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f, 0x85, 0x2007, 0xffef};
b75a7d8f
A
551
552 static const int32_t sampleDigitValues[] = {0, 2, 3, 5};
553
554 uint32_t mask;
555
556 int32_t i;
557 char icuVersion[U_MAX_VERSION_STRING_LENGTH];
558 UVersionInfo realVersion;
559
560 memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH);
561
729e4ab9
A
562 testSampleCharProps(u_isspace, "u_isspace", sampleSpaces, LENGTHOF(sampleSpaces), TRUE);
563 testSampleCharProps(u_isspace, "u_isspace", sampleNonSpaces, LENGTHOF(sampleNonSpaces), FALSE);
b75a7d8f 564
729e4ab9
A
565 testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
566 sampleSpaces, LENGTHOF(sampleSpaces), TRUE);
567 testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
568 sampleNonSpaces, LENGTHOF(sampleNonSpaces), FALSE);
b75a7d8f 569
729e4ab9
A
570 testSampleCharProps(u_isWhitespace, "u_isWhitespace",
571 sampleWhiteSpaces, LENGTHOF(sampleWhiteSpaces), TRUE);
572 testSampleCharProps(u_isWhitespace, "u_isWhitespace",
573 sampleNonWhiteSpaces, LENGTHOF(sampleNonWhiteSpaces), FALSE);
b75a7d8f 574
729e4ab9
A
575 testSampleCharProps(u_isdefined, "u_isdefined",
576 sampleDefined, LENGTHOF(sampleDefined), TRUE);
577 testSampleCharProps(u_isdefined, "u_isdefined",
578 sampleUndefined, LENGTHOF(sampleUndefined), FALSE);
579
580 testSampleCharProps(u_isbase, "u_isbase", sampleBase, LENGTHOF(sampleBase), TRUE);
581 testSampleCharProps(u_isbase, "u_isbase", sampleNonBase, LENGTHOF(sampleNonBase), FALSE);
b75a7d8f 582
729e4ab9
A
583 testSampleCharProps(u_isdigit, "u_isdigit", sampleDigits, LENGTHOF(sampleDigits), TRUE);
584 testSampleCharProps(u_isdigit, "u_isdigit", sampleNonDigits, LENGTHOF(sampleNonDigits), FALSE);
585
586 for (i = 0; i < LENGTHOF(sampleDigits); i++) {
587 if (u_charDigitValue(sampleDigits[i]) != sampleDigitValues[i]) {
588 log_err("error: u_charDigitValue(U+04x)=%d != %d\n",
589 sampleDigits[i], u_charDigitValue(sampleDigits[i]), sampleDigitValues[i]);
b75a7d8f
A
590 }
591 }
592
593 /* Tests the ICU version #*/
594 u_getVersion(realVersion);
595 u_versionToString(realVersion, icuVersion);
374ca955 596 if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion), (int32_t)strlen(U_ICU_VERSION))) != 0)
b75a7d8f
A
597 {
598 log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERSION, icuVersion);
599 }
600#if defined(ICU_VERSION)
601 /* test only happens where we have configure.in with VERSION - sanity check. */
602 if(strcmp(U_ICU_VERSION, ICU_VERSION))
603 {
604 log_err("ICU version mismatch: Header says %s, build environment says %s.\n", U_ICU_VERSION, ICU_VERSION);
605 }
606#endif
607
608 /* test U_GC_... */
609 if(
610 U_GET_GC_MASK(0x41)!=U_GC_LU_MASK ||
611 U_GET_GC_MASK(0x662)!=U_GC_ND_MASK ||
612 U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK ||
613 U_GET_GC_MASK(0x28)!=U_GC_PS_MASK ||
614 U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK ||
615 U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK
616 ) {
617 log_err("error: U_GET_GC_MASK does not work properly\n");
618 }
619
620 mask=0;
621 mask=(mask&~U_GC_CN_MASK)|U_GC_CN_MASK;
622
623 mask=(mask&~U_GC_LU_MASK)|U_GC_LU_MASK;
624 mask=(mask&~U_GC_LL_MASK)|U_GC_LL_MASK;
625 mask=(mask&~U_GC_LT_MASK)|U_GC_LT_MASK;
626 mask=(mask&~U_GC_LM_MASK)|U_GC_LM_MASK;
627 mask=(mask&~U_GC_LO_MASK)|U_GC_LO_MASK;
628
629 mask=(mask&~U_GC_MN_MASK)|U_GC_MN_MASK;
630 mask=(mask&~U_GC_ME_MASK)|U_GC_ME_MASK;
631 mask=(mask&~U_GC_MC_MASK)|U_GC_MC_MASK;
632
633 mask=(mask&~U_GC_ND_MASK)|U_GC_ND_MASK;
634 mask=(mask&~U_GC_NL_MASK)|U_GC_NL_MASK;
635 mask=(mask&~U_GC_NO_MASK)|U_GC_NO_MASK;
636
637 mask=(mask&~U_GC_ZS_MASK)|U_GC_ZS_MASK;
638 mask=(mask&~U_GC_ZL_MASK)|U_GC_ZL_MASK;
639 mask=(mask&~U_GC_ZP_MASK)|U_GC_ZP_MASK;
640
641 mask=(mask&~U_GC_CC_MASK)|U_GC_CC_MASK;
642 mask=(mask&~U_GC_CF_MASK)|U_GC_CF_MASK;
643 mask=(mask&~U_GC_CO_MASK)|U_GC_CO_MASK;
644 mask=(mask&~U_GC_CS_MASK)|U_GC_CS_MASK;
645
646 mask=(mask&~U_GC_PD_MASK)|U_GC_PD_MASK;
647 mask=(mask&~U_GC_PS_MASK)|U_GC_PS_MASK;
648 mask=(mask&~U_GC_PE_MASK)|U_GC_PE_MASK;
649 mask=(mask&~U_GC_PC_MASK)|U_GC_PC_MASK;
650 mask=(mask&~U_GC_PO_MASK)|U_GC_PO_MASK;
651
652 mask=(mask&~U_GC_SM_MASK)|U_GC_SM_MASK;
653 mask=(mask&~U_GC_SC_MASK)|U_GC_SC_MASK;
654 mask=(mask&~U_GC_SK_MASK)|U_GC_SK_MASK;
655 mask=(mask&~U_GC_SO_MASK)|U_GC_SO_MASK;
656
657 mask=(mask&~U_GC_PI_MASK)|U_GC_PI_MASK;
658 mask=(mask&~U_GC_PF_MASK)|U_GC_PF_MASK;
659
660 if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
661 log_err("error: problems with U_GC_XX_MASK constants\n");
662 }
663
664 mask=0;
665 mask=(mask&~U_GC_C_MASK)|U_GC_C_MASK;
666 mask=(mask&~U_GC_L_MASK)|U_GC_L_MASK;
667 mask=(mask&~U_GC_M_MASK)|U_GC_M_MASK;
668 mask=(mask&~U_GC_N_MASK)|U_GC_N_MASK;
669 mask=(mask&~U_GC_Z_MASK)|U_GC_Z_MASK;
670 mask=(mask&~U_GC_P_MASK)|U_GC_P_MASK;
671 mask=(mask&~U_GC_S_MASK)|U_GC_S_MASK;
672
673 if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
674 log_err("error: problems with U_GC_Y_MASK constants\n");
675 }
676 {
677 static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039 };
678 for(i=0; i<10; i++){
679 if(digit[i]!=u_forDigit(i,10)){
680 log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n",i,digit[i],u_forDigit(i,10));
681 }
682 }
683 }
684
685 /* test u_digit() */
686 {
687 static const struct {
688 UChar32 c;
689 int8_t radix, value;
690 } data[]={
691 /* base 16 */
692 { 0x0031, 16, 1 },
693 { 0x0038, 16, 8 },
694 { 0x0043, 16, 12 },
695 { 0x0066, 16, 15 },
696 { 0x00e4, 16, -1 },
697 { 0x0662, 16, 2 },
698 { 0x06f5, 16, 5 },
699 { 0xff13, 16, 3 },
700 { 0xff41, 16, 10 },
701
702 /* base 8 */
703 { 0x0031, 8, 1 },
704 { 0x0038, 8, -1 },
705 { 0x0043, 8, -1 },
706 { 0x0066, 8, -1 },
707 { 0x00e4, 8, -1 },
708 { 0x0662, 8, 2 },
709 { 0x06f5, 8, 5 },
710 { 0xff13, 8, 3 },
711 { 0xff41, 8, -1 },
712
713 /* base 36 */
714 { 0x5a, 36, 35 },
715 { 0x7a, 36, 35 },
716 { 0xff3a, 36, 35 },
717 { 0xff5a, 36, 35 },
718
719 /* wrong radix values */
720 { 0x0031, 1, -1 },
721 { 0xff3a, 37, -1 }
722 };
723
724 for(i=0; i<LENGTHOF(data); ++i) {
725 if(u_digit(data[i].c, data[i].radix)!=data[i].value) {
726 log_err("u_digit(U+%04x, %d)=%d expected %d\n",
727 data[i].c,
728 data[i].radix,
729 u_digit(data[i].c, data[i].radix),
730 data[i].value);
731 }
732 }
733 }
734}
735
736/* test C/POSIX-style functions --------------------------------------------- */
737
738/* bit flags */
739#define ISAL 1
740#define ISLO 2
741#define ISUP 4
742
743#define ISDI 8
744#define ISXD 0x10
745
746#define ISAN 0x20
747
748#define ISPU 0x40
749#define ISGR 0x80
750#define ISPR 0x100
751
752#define ISSP 0x200
753#define ISBL 0x400
754#define ISCN 0x800
755
756/* C/POSIX-style functions, in the same order as the bit flags */
374ca955 757typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c);
b75a7d8f
A
758
759static const struct {
760 IsPOSIXClass *fn;
761 const char *name;
762} posixClasses[]={
763 { u_isalpha, "isalpha" },
764 { u_islower, "islower" },
765 { u_isupper, "isupper" },
766 { u_isdigit, "isdigit" },
767 { u_isxdigit, "isxdigit" },
768 { u_isalnum, "isalnum" },
769 { u_ispunct, "ispunct" },
770 { u_isgraph, "isgraph" },
771 { u_isprint, "isprint" },
772 { u_isspace, "isspace" },
773 { u_isblank, "isblank" },
774 { u_iscntrl, "iscntrl" }
775};
776
777static const struct {
778 UChar32 c;
779 uint32_t posixResults;
780} posixData[]={
781 { 0x0008, ISCN }, /* backspace */
782 { 0x0009, ISSP|ISBL|ISCN }, /* TAB */
783 { 0x000a, ISSP| ISCN }, /* LF */
784 { 0x000c, ISSP| ISCN }, /* FF */
785 { 0x000d, ISSP| ISCN }, /* CR */
786 { 0x0020, ISPR|ISSP|ISBL }, /* space */
787 { 0x0021, ISPU|ISGR|ISPR }, /* ! */
788 { 0x0033, ISDI|ISXD|ISAN| ISGR|ISPR }, /* 3 */
789 { 0x0040, ISPU|ISGR|ISPR }, /* @ */
790 { 0x0041, ISAL| ISUP| ISXD|ISAN| ISGR|ISPR }, /* A */
791 { 0x007a, ISAL|ISLO| ISAN| ISGR|ISPR }, /* z */
792 { 0x007b, ISPU|ISGR|ISPR }, /* { */
793 { 0x0085, ISSP| ISCN }, /* NEL */
794 { 0x00a0, ISPR|ISSP|ISBL }, /* NBSP */
795 { 0x00a4, ISGR|ISPR }, /* currency sign */
796 { 0x00e4, ISAL|ISLO| ISAN| ISGR|ISPR }, /* a-umlaut */
797 { 0x0300, ISGR|ISPR }, /* combining grave */
798 { 0x0600, ISCN }, /* arabic number sign */
799 { 0x0627, ISAL| ISAN| ISGR|ISPR }, /* alef */
800 { 0x0663, ISDI|ISXD|ISAN| ISGR|ISPR }, /* arabic 3 */
801 { 0x2002, ISPR|ISSP|ISBL }, /* en space */
802 { 0x2007, ISPR|ISSP|ISBL }, /* figure space */
803 { 0x2009, ISPR|ISSP|ISBL }, /* thin space */
374ca955
A
804 { 0x200b, ISCN }, /* ZWSP */
805 /*{ 0x200b, ISPR|ISSP },*/ /* ZWSP */ /* ZWSP became a control char in 4.0.1*/
b75a7d8f
A
806 { 0x200e, ISCN }, /* LRM */
807 { 0x2028, ISPR|ISSP| ISCN }, /* LS */
808 { 0x2029, ISPR|ISSP| ISCN }, /* PS */
809 { 0x20ac, ISGR|ISPR }, /* Euro */
810 { 0xff15, ISDI|ISXD|ISAN| ISGR|ISPR }, /* fullwidth 5 */
811 { 0xff25, ISAL| ISUP| ISXD|ISAN| ISGR|ISPR }, /* fullwidth E */
812 { 0xff35, ISAL| ISUP| ISAN| ISGR|ISPR }, /* fullwidth U */
813 { 0xff45, ISAL|ISLO| ISXD|ISAN| ISGR|ISPR }, /* fullwidth e */
814 { 0xff55, ISAL|ISLO| ISAN| ISGR|ISPR } /* fullwidth u */
815};
816
817static void
818TestPOSIX() {
819 uint32_t mask;
820 int32_t cl, i;
821 UBool expect;
822
823 mask=1;
824 for(cl=0; cl<12; ++cl) {
825 for(i=0; i<LENGTHOF(posixData); ++i) {
826 expect=(UBool)((posixData[i].posixResults&mask)!=0);
827 if(posixClasses[cl].fn(posixData[i].c)!=expect) {
828 log_err("u_%s(U+%04x)=%s is wrong\n",
829 posixClasses[cl].name, posixData[i].c, expect ? "FALSE" : "TRUE");
830 }
831 }
832 mask<<=1;
833 }
834}
835
836/* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */
837static void TestControlPrint()
838{
729e4ab9
A
839 const UChar32 sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0x202b};
840 const UChar32 sampleNonControl[] = {0x61, 0x0031, 0x00e2};
841 const UChar32 samplePrintable[] = {0x0042, 0x005f, 0x2014};
842 const UChar32 sampleNonPrintable[] = {0x200c, 0x009f, 0x001b};
b75a7d8f 843 UChar32 c;
b75a7d8f 844
729e4ab9
A
845 testSampleCharProps(u_iscntrl, "u_iscntrl", sampleControl, LENGTHOF(sampleControl), TRUE);
846 testSampleCharProps(u_iscntrl, "u_iscntrl", sampleNonControl, LENGTHOF(sampleNonControl), FALSE);
b75a7d8f 847
729e4ab9
A
848 testSampleCharProps(u_isprint, "u_isprint",
849 samplePrintable, LENGTHOF(samplePrintable), TRUE);
850 testSampleCharProps(u_isprint, "u_isprint",
851 sampleNonPrintable, LENGTHOF(sampleNonPrintable), FALSE);
b75a7d8f
A
852
853 /* test all ISO 8 controls */
854 for(c=0; c<=0x9f; ++c) {
855 if(c==0x20) {
856 /* skip ASCII graphic characters and continue with DEL */
857 c=0x7f;
858 }
859 if(!u_iscntrl(c)) {
860 log_err("error: u_iscntrl(ISO 8 control U+%04x)=FALSE\n", c);
861 }
862 if(!u_isISOControl(c)) {
863 log_err("error: u_isISOControl(ISO 8 control U+%04x)=FALSE\n", c);
864 }
865 if(u_isprint(c)) {
866 log_err("error: u_isprint(ISO 8 control U+%04x)=TRUE\n", c);
867 }
868 }
869
870 /* test all Latin-1 graphic characters */
871 for(c=0x20; c<=0xff; ++c) {
872 if(c==0x7f) {
873 c=0xa0;
874 } else if(c==0xad) {
875 /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not printable) */
876 ++c;
877 }
878 if(!u_isprint(c)) {
879 log_err("error: u_isprint(Latin-1 graphic character U+%04x)=FALSE\n", c);
880 }
881 }
882}
883
884/* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable()*/
885static void TestIdentifier()
886{
729e4ab9
A
887 const UChar32 sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f};
888 const UChar32 sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};
889 const UChar32 sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};
890 const UChar32 sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};
891 const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};
892 const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};
893 const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};
894 const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};
895 const UChar32 sampleIDIgnore[] = {0x0006, 0x0010, 0x206b, 0x85};
896 const UChar32 sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};
897
898 testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
899 sampleJavaIDStart, LENGTHOF(sampleJavaIDStart), TRUE);
900 testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
901 sampleNonJavaIDStart, LENGTHOF(sampleNonJavaIDStart), FALSE);
902
903 testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
904 sampleJavaIDPart, LENGTHOF(sampleJavaIDPart), TRUE);
905 testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
906 sampleNonJavaIDPart, LENGTHOF(sampleNonJavaIDPart), FALSE);
907
908 /* IDPart should imply IDStart */
909 testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
910 sampleJavaIDStart, LENGTHOF(sampleJavaIDStart), TRUE);
911
912 testSampleCharProps(u_isIDStart, "u_isIDStart",
913 sampleUnicodeIDStart, LENGTHOF(sampleUnicodeIDStart), TRUE);
914 testSampleCharProps(u_isIDStart, "u_isIDStart",
915 sampleNonUnicodeIDStart, LENGTHOF(sampleNonUnicodeIDStart), FALSE);
916
917 testSampleCharProps(u_isIDPart, "u_isIDPart",
918 sampleUnicodeIDPart, LENGTHOF(sampleUnicodeIDPart), TRUE);
919 testSampleCharProps(u_isIDPart, "u_isIDPart",
920 sampleNonUnicodeIDPart, LENGTHOF(sampleNonUnicodeIDPart), FALSE);
921
922 /* IDPart should imply IDStart */
923 testSampleCharProps(u_isIDPart, "u_isIDPart",
924 sampleUnicodeIDStart, LENGTHOF(sampleUnicodeIDStart), TRUE);
925
926 testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
927 sampleIDIgnore, LENGTHOF(sampleIDIgnore), TRUE);
928 testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
929 sampleNonIDIgnore, LENGTHOF(sampleNonIDIgnore), FALSE);
b75a7d8f
A
930}
931
932/* for each line of UnicodeData.txt, check some of the properties */
4388f060
A
933typedef struct UnicodeDataContext {
934#if UCONFIG_NO_NORMALIZATION
935 const void *dummy;
936#else
937 const UNormalizer2 *nfc;
938 const UNormalizer2 *nfkc;
939#endif
940} UnicodeDataContext;
941
b75a7d8f
A
942/*
943 * ### TODO
944 * This test fails incorrectly if the First or Last code point of a repetitive area
945 * is overridden, which is allowed and is encouraged for the PUAs.
946 * Currently, this means that both area First/Last and override lines are
947 * tested against the properties from the API,
948 * and the area boundary will not match and cause an error.
949 *
950 * This function should detect area boundaries and skip them for the test of individual
951 * code points' properties.
952 * Then it should check that the areas contain all the same properties except where overridden.
953 * For this, it would have had to set a flag for which code points were listed explicitly.
954 */
955static void U_CALLCONV
956unicodeDataLineFn(void *context,
957 char *fields[][2], int32_t fieldCount,
958 UErrorCode *pErrorCode)
959{
960 char buffer[100];
4388f060 961 const char *d;
b75a7d8f
A
962 char *end;
963 uint32_t value;
964 UChar32 c;
965 int32_t i;
966 int8_t type;
4388f060
A
967 int32_t dt;
968 UChar dm[32], s[32];
969 int32_t dmLength, length;
970
971#if !UCONFIG_NO_NORMALIZATION
972 const UNormalizer2 *nfc, *nfkc;
973#endif
b75a7d8f
A
974
975 /* get the character code, field 0 */
976 c=strtoul(fields[0][0], &end, 16);
977 if(end<=fields[0][0] || end!=fields[0][1]) {
978 log_err("error: syntax error in field 0 at %s\n", fields[0][0]);
979 return;
980 }
981 if((uint32_t)c>=UCHAR_MAX_VALUE + 1) {
982 log_err("error in UnicodeData.txt: code point %lu out of range\n", c);
983 return;
984 }
985
986 /* get general category, field 2 */
987 *fields[2][1]=0;
988 type = (int8_t)tagValues[MakeProp(fields[2][0])];
989 if(u_charType(c)!=type) {
990 log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType(c), type);
991 }
992 if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
993 log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
994 }
995
996 /* get canonical combining class, field 3 */
997 value=strtoul(fields[3][0], &end, 10);
998 if(end<=fields[3][0] || end!=fields[3][1]) {
999 log_err("error: syntax error in field 3 at code 0x%lx\n", c);
1000 return;
1001 }
1002 if(value>255) {
1003 log_err("error in UnicodeData.txt: combining class %lu out of range\n", value);
1004 return;
1005 }
1006#if !UCONFIG_NO_NORMALIZATION
1007 if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c, UCHAR_CANONICAL_COMBINING_CLASS)) {
1008 log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value);
1009 }
4388f060
A
1010 nfkc=((UnicodeDataContext *)context)->nfkc;
1011 if(value!=unorm2_getCombiningClass(nfkc, c)) {
1012 log_err("error: unorm2_getCombiningClass(nfkc, U+%04lx)==%hu instead of %lu\n", c, unorm2_getCombiningClass(nfkc, c), value);
1013 }
b75a7d8f
A
1014#endif
1015
1016 /* get BiDi category, field 4 */
1017 *fields[4][1]=0;
1018 i=MakeDir(fields[4][0]);
1019 if(i!=u_charDirection(c) || i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) {
1020 log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u_charDirection(c), MakeDir(fields[4][0]), fields[4][0]);
1021 }
1022
4388f060
A
1023 /* get Decomposition_Type & Decomposition_Mapping, field 5 */
1024 d=NULL;
1025 if(fields[5][0]==fields[5][1]) {
1026 /* no decomposition, except UnicodeData.txt omits Hangul syllable decompositions */
1027 if(c==0xac00 || c==0xd7a3) {
1028 dt=U_DT_CANONICAL;
1029 } else {
1030 dt=U_DT_NONE;
1031 }
1032 } else {
1033 d=fields[5][0];
1034 *fields[5][1]=0;
1035 dt=UCHAR_INVALID_CODE;
1036 if(*d=='<') {
1037 end=strchr(++d, '>');
1038 if(end!=NULL) {
1039 *end=0;
1040 dt=u_getPropertyValueEnum(UCHAR_DECOMPOSITION_TYPE, d);
1041 d=u_skipWhitespace(end+1);
1042 }
1043 } else {
1044 dt=U_DT_CANONICAL;
1045 }
1046 }
1047 if(dt>U_DT_NONE) {
1048 if(c==0xac00) {
1049 dm[0]=0x1100;
1050 dm[1]=0x1161;
1051 dm[2]=0;
1052 dmLength=2;
1053 } else if(c==0xd7a3) {
1054 dm[0]=0xd788;
1055 dm[1]=0x11c2;
1056 dm[2]=0;
1057 dmLength=2;
1058 } else {
1059 dmLength=u_parseString(d, dm, 32, NULL, pErrorCode);
1060 }
1061 } else {
1062 dmLength=-1;
1063 }
1064 if(dt<0 || U_FAILURE(*pErrorCode)) {
1065 log_err("error in UnicodeData.txt: syntax error in U+%04lX decomposition field\n", (long)c);
1066 return;
1067 }
1068#if !UCONFIG_NO_NORMALIZATION
1069 i=u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE);
1070 if(i!=dt) {
1071 log_err("error: u_getIntPropertyValue(U+%04lx, UCHAR_DECOMPOSITION_TYPE)==%d instead of %d\n", c, i, dt);
1072 }
1073 /* Expect Decomposition_Mapping=nfkc.getRawDecomposition(c). */
1074 length=unorm2_getRawDecomposition(nfkc, c, s, 32, pErrorCode);
1075 if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
1076 log_err("error: unorm2_getRawDecomposition(nfkc, U+%04lx)==%d instead of %d "
1077 "or the Decomposition_Mapping is different (%s)\n",
1078 c, length, dmLength, u_errorName(*pErrorCode));
1079 return;
1080 }
1081 /* For canonical decompositions only, expect Decomposition_Mapping=nfc.getRawDecomposition(c). */
1082 if(dt!=U_DT_CANONICAL) {
1083 dmLength=-1;
1084 }
1085 nfc=((UnicodeDataContext *)context)->nfc;
1086 length=unorm2_getRawDecomposition(nfc, c, s, 32, pErrorCode);
1087 if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
1088 log_err("error: unorm2_getRawDecomposition(nfc, U+%04lx)==%d instead of %d "
1089 "or the Decomposition_Mapping is different (%s)\n",
1090 c, length, dmLength, u_errorName(*pErrorCode));
1091 return;
1092 }
1093 /* recompose */
1094 if(dt==U_DT_CANONICAL && !u_hasBinaryProperty(c, UCHAR_FULL_COMPOSITION_EXCLUSION)) {
1095 UChar32 a, b, composite;
1096 i=0;
1097 U16_NEXT(dm, i, dmLength, a);
1098 U16_NEXT(dm, i, dmLength, b);
1099 /* i==dmLength */
1100 composite=unorm2_composePair(nfc, a, b);
1101 if(composite!=c) {
1102 log_err("error: nfc U+%04lX decomposes to U+%04lX+U+%04lX but does not compose back (instead U+%04lX)\n",
1103 (long)c, (long)a, (long)b, (long)composite);
1104 }
1105 /*
1106 * Note: NFKC has fewer round-trip mappings than NFC,
1107 * so we can't just test unorm2_composePair(nfkc, a, b) here without further data.
1108 */
1109 }
1110#endif
1111
b75a7d8f
A
1112 /* get ISO Comment, field 11 */
1113 *fields[11][1]=0;
1114 i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode);
1115 if(U_FAILURE(*pErrorCode) || 0!=strcmp(fields[11][0], buffer)) {
729e4ab9 1116 log_err_status(*pErrorCode, "error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n",
b75a7d8f
A
1117 c, u_errorName(*pErrorCode),
1118 U_FAILURE(*pErrorCode) ? buffer : "[error]",
1119 fields[11][0]);
1120 }
1121
1122 /* get uppercase mapping, field 12 */
1123 if(fields[12][0]!=fields[12][1]) {
1124 value=strtoul(fields[12][0], &end, 16);
1125 if(end!=fields[12][1]) {
1126 log_err("error: syntax error in field 12 at code 0x%lx\n", c);
1127 return;
1128 }
1129 if((UChar32)value!=u_toupper(c)) {
1130 log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_toupper(c), value);
1131 }
1132 } else {
1133 /* no case mapping: the API must map the code point to itself */
1134 if(c!=u_toupper(c)) {
1135 log_err("error: U+%04lx does not have an uppercase mapping but u_toupper()==U+%04lx\n", c, u_toupper(c));
1136 }
1137 }
1138
1139 /* get lowercase mapping, field 13 */
1140 if(fields[13][0]!=fields[13][1]) {
1141 value=strtoul(fields[13][0], &end, 16);
1142 if(end!=fields[13][1]) {
1143 log_err("error: syntax error in field 13 at code 0x%lx\n", c);
1144 return;
1145 }
1146 if((UChar32)value!=u_tolower(c)) {
1147 log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_tolower(c), value);
1148 }
1149 } else {
1150 /* no case mapping: the API must map the code point to itself */
1151 if(c!=u_tolower(c)) {
1152 log_err("error: U+%04lx does not have a lowercase mapping but u_tolower()==U+%04lx\n", c, u_tolower(c));
1153 }
1154 }
1155
1156 /* get titlecase mapping, field 14 */
1157 if(fields[14][0]!=fields[14][1]) {
1158 value=strtoul(fields[14][0], &end, 16);
1159 if(end!=fields[14][1]) {
1160 log_err("error: syntax error in field 14 at code 0x%lx\n", c);
1161 return;
1162 }
1163 if((UChar32)value!=u_totitle(c)) {
1164 log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_totitle(c), value);
1165 }
1166 } else {
1167 /* no case mapping: the API must map the code point to itself */
1168 if(c!=u_totitle(c)) {
1169 log_err("error: U+%04lx does not have a titlecase mapping but u_totitle()==U+%04lx\n", c, u_totitle(c));
1170 }
1171 }
1172}
1173
1174static UBool U_CALLCONV
1175enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1176 static const UChar32 test[][2]={
1177 {0x41, U_UPPERCASE_LETTER},
1178 {0x308, U_NON_SPACING_MARK},
1179 {0xfffe, U_GENERAL_OTHER_TYPES},
1180 {0xe0041, U_FORMAT_CHAR},
1181 {0xeffff, U_UNASSIGNED}
1182 };
1183
374ca955 1184 int32_t i, count;
b75a7d8f
A
1185
1186 if(0!=strcmp((const char *)context, "a1")) {
1187 log_err("error: u_enumCharTypes() passes on an incorrect context pointer\n");
1188 return FALSE;
1189 }
1190
374ca955 1191 count=LENGTHOF(test);
b75a7d8f
A
1192 for(i=0; i<count; ++i) {
1193 if(start<=test[i][0] && test[i][0]<limit) {
1194 if(type!=(UCharCategory)test[i][1]) {
1195 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld instead of U+%04lx with %ld\n",
1196 start, limit, (long)type, test[i][0], test[i][1]);
1197 }
374ca955 1198 /* stop at the range that includes the last test code point (increases code coverage for enumeration) */
b75a7d8f
A
1199 return i==(count-1) ? FALSE : TRUE;
1200 }
1201 }
1202
1203 if(start>test[count-1][0]) {
1204 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n",
1205 start, limit, (long)type);
1206 return FALSE;
1207 }
1208
374ca955
A
1209 return TRUE;
1210}
1211
1212static UBool U_CALLCONV
1213enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
4388f060 1214 /* default Bidi classes for unassigned code points, from the DerivedBidiClass.txt header */
374ca955
A
1215 static const int32_t defaultBidi[][2]={ /* { limit, class } */
1216 { 0x0590, U_LEFT_TO_RIGHT },
1217 { 0x0600, U_RIGHT_TO_LEFT },
1218 { 0x07C0, U_RIGHT_TO_LEFT_ARABIC },
4388f060
A
1219 { 0x08A0, U_RIGHT_TO_LEFT },
1220 { 0x0900, U_RIGHT_TO_LEFT_ARABIC }, /* Unicode 6.1 changes U+08A0..U+08FF from R to AL */
57a6839d
A
1221 { 0x20A0, U_LEFT_TO_RIGHT },
1222 { 0x20D0, U_EUROPEAN_NUMBER_TERMINATOR }, /* Unicode 6.3 changes the currency symbols block U+20A0..U+20CF to default to ET not L */
374ca955
A
1223 { 0xFB1D, U_LEFT_TO_RIGHT },
1224 { 0xFB50, U_RIGHT_TO_LEFT },
1225 { 0xFE00, U_RIGHT_TO_LEFT_ARABIC },
1226 { 0xFE70, U_LEFT_TO_RIGHT },
1227 { 0xFF00, U_RIGHT_TO_LEFT_ARABIC },
1228 { 0x10800, U_LEFT_TO_RIGHT },
1229 { 0x11000, U_RIGHT_TO_LEFT },
729e4ab9 1230 { 0x1E800, U_LEFT_TO_RIGHT }, /* new default-R range in Unicode 5.2: U+1E800 - U+1EFFF */
4388f060
A
1231 { 0x1EE00, U_RIGHT_TO_LEFT },
1232 { 0x1EF00, U_RIGHT_TO_LEFT_ARABIC }, /* Unicode 6.1 changes U+1EE00..U+1EEFF from R to AL */
729e4ab9 1233 { 0x1F000, U_RIGHT_TO_LEFT },
374ca955
A
1234 { 0x110000, U_LEFT_TO_RIGHT }
1235 };
1236
1237 UChar32 c;
1238 int32_t i;
1239 UCharDirection shouldBeDir;
1240
b75a7d8f
A
1241 /*
1242 * LineBreak.txt specifies:
1243 * # - Assigned characters that are not listed explicitly are given the value
1244 * # "AL".
1245 * # - Unassigned characters are given the value "XX".
1246 *
1247 * PUA characters are listed explicitly with "XX".
1248 * Verify that no assigned character has "XX".
1249 */
1250 if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) {
1251 c=start;
1252 while(c<limit) {
1253 if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) {
1254 log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c);
1255 }
1256 ++c;
1257 }
1258 }
1259
1260 /*
1261 * Verify default Bidi classes.
374ca955
A
1262 * For recent Unicode versions, see UCD.html.
1263 *
1264 * For older Unicode versions:
b75a7d8f
A
1265 * See table 3-7 "Bidirectional Character Types" in UAX #9.
1266 * http://www.unicode.org/reports/tr9/
1267 *
1268 * See also DerivedBidiClass.txt for Cn code points!
374ca955
A
1269 *
1270 * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/resolved-pri.html)
1271 * changed some default values.
1272 * In particular, non-characters and unassigned Default Ignorable Code Points
1273 * change from L to BN.
1274 *
1275 * UCD.html version 4.0.1 does not yet reflect these changes.
b75a7d8f
A
1276 */
1277 if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) {
1278 /* enumerate the intersections of defaultBidi ranges with [start..limit[ */
1279 c=start;
1280 for(i=0; i<LENGTHOF(defaultBidi) && c<limit; ++i) {
1281 if((int32_t)c<defaultBidi[i][0]) {
1282 while(c<limit && (int32_t)c<defaultBidi[i][0]) {
374ca955
A
1283 if(U_IS_UNICODE_NONCHAR(c) || u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
1284 shouldBeDir=U_BOUNDARY_NEUTRAL;
1285 } else {
1286 shouldBeDir=(UCharDirection)defaultBidi[i][1];
1287 }
1288
1289 if( u_charDirection(c)!=shouldBeDir ||
1290 u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir
b75a7d8f
A
1291 ) {
1292 log_err("error: u_charDirection(unassigned/PUA U+%04lx)=%s should be %s\n",
374ca955 1293 c, dirStrings[u_charDirection(c)], dirStrings[shouldBeDir]);
b75a7d8f
A
1294 }
1295 ++c;
1296 }
1297 }
1298 }
1299 }
1300
1301 return TRUE;
1302}
1303
1304/* tests for several properties */
1305static void TestUnicodeData()
1306{
b75a7d8f
A
1307 UVersionInfo expectVersionArray;
1308 UVersionInfo versionArray;
1309 char *fields[15][2];
1310 UErrorCode errorCode;
1311 UChar32 c;
1312 int8_t type;
1313
4388f060
A
1314 UnicodeDataContext context;
1315
b75a7d8f
A
1316 u_versionFromString(expectVersionArray, U_UNICODE_VERSION);
1317 u_getUnicodeVersion(versionArray);
1318 if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0)
1319 {
1320 log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION " got %d.%d.%d.%d\n",
1321 versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
1322 }
1323
1324#if defined(ICU_UNICODE_VERSION)
1325 /* test only happens where we have configure.in with UNICODE_VERSION - sanity check. */
1326 if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION))
1327 {
1328 log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNICODE_VERSION " got " ICU_UNICODE_VERSION "\n");
1329 }
1330#endif
1331
1332 if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN || u_getIntPropertyValue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) {
1333 log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041));
1334 }
1335
1336 errorCode=U_ZERO_ERROR;
4388f060
A
1337#if !UCONFIG_NO_NORMALIZATION
1338 context.nfc=unorm2_getNFCInstance(&errorCode);
1339 context.nfkc=unorm2_getNFKCInstance(&errorCode);
1340 if(U_FAILURE(errorCode)) {
1341 log_data_err("error: unable to open an NFC or NFKC UNormalizer2 - %s\n", u_errorName(errorCode));
1342 return;
1343 }
1344#endif
1345 parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, &context, &errorCode);
b75a7d8f 1346 if(U_FAILURE(errorCode)) {
b75a7d8f
A
1347 return; /* if we couldn't parse UnicodeData.txt, we should return */
1348 }
1349
1350 /* sanity check on repeated properties */
1351 for(c=0xfffe; c<=0x10ffff;) {
1352 type=u_charType(c);
1353 if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1354 log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1355 }
1356 if(type!=U_UNASSIGNED) {
1357 log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c, u_charType(c));
1358 }
1359 if((c&0xffff)==0xfffe) {
1360 ++c;
1361 } else {
1362 c+=0xffff;
1363 }
1364 }
1365
1366 /* test that PUA is not "unassigned" */
1367 for(c=0xe000; c<=0x10fffd;) {
1368 type=u_charType(c);
1369 if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1370 log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1371 }
1372 if(type==U_UNASSIGNED) {
1373 log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c);
1374 } else if(type!=U_PRIVATE_USE_CHAR) {
1375 log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type);
1376 }
1377 if(c==0xf8ff) {
1378 c=0xf0000;
1379 } else if(c==0xffffd) {
1380 c=0x100000;
1381 } else {
1382 ++c;
1383 }
1384 }
1385
1386 /* test u_enumCharTypes() */
1387 u_enumCharTypes(enumTypeRange, "a1");
374ca955
A
1388
1389 /* check default properties */
1390 u_enumCharTypes(enumDefaultsRange, NULL);
b75a7d8f
A
1391}
1392
1393static void TestCodeUnit(){
1394 const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0};
1395
1396 int32_t i;
1397
1398 for(i=0; i<(int32_t)(sizeof(codeunit)/sizeof(codeunit[0])); i++){
1399 UChar c=codeunit[i];
1400 if(i<4){
1401 if(!(UTF_IS_SINGLE(c)) || (UTF_IS_LEAD(c)) || (UTF_IS_TRAIL(c)) ||(UTF_IS_SURROGATE(c))){
1402 log_err("ERROR: U+%04x is a single", c);
1403 }
1404
1405 }
1406 if(i >= 4 && i< 8){
1407 if(!(UTF_IS_LEAD(c)) || UTF_IS_SINGLE(c) || UTF_IS_TRAIL(c) || !(UTF_IS_SURROGATE(c))){
1408 log_err("ERROR: U+%04x is a first surrogate", c);
1409 }
1410 }
1411 if(i >= 8 && i< 12){
1412 if(!(UTF_IS_TRAIL(c)) || UTF_IS_SINGLE(c) || UTF_IS_LEAD(c) || !(UTF_IS_SURROGATE(c))){
1413 log_err("ERROR: U+%04x is a second surrogate", c);
1414 }
1415 }
1416 }
1417
1418}
1419
1420static void TestCodePoint(){
1421 const UChar32 codePoint[]={
1422 /*surrogate, notvalid(codepoint), not a UnicodeChar, not Error */
1423 0xd800,
1424 0xdbff,
1425 0xdc00,
1426 0xdfff,
1427 0xdc04,
1428 0xd821,
1429 /*not a surrogate, valid, isUnicodeChar , not Error*/
1430 0x20ac,
1431 0xd7ff,
1432 0xe000,
1433 0xe123,
1434 0x0061,
1435 0xe065,
1436 0x20402,
1437 0x24506,
1438 0x23456,
1439 0x20402,
1440 0x10402,
1441 0x23456,
1442 /*not a surrogate, not valid, isUnicodeChar, isError */
1443 0x0015,
1444 0x009f,
1445 /*not a surrogate, not valid, not isUnicodeChar, isError */
1446 0xffff,
1447 0xfffe,
1448 };
1449 int32_t i;
1450 for(i=0; i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0])); i++){
1451 UChar32 c=codePoint[i];
1452 if(i<6){
1453 if(!UTF_IS_SURROGATE(c) || !U_IS_SURROGATE(c) || !U16_IS_SURROGATE(c)){
1454 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1455 }
1456 if(UTF_IS_VALID(c)){
1457 log_err("ERROR: isValid() failed for U+%04x\n", c);
1458 }
1459 if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
1460 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1461 }
1462 if(UTF_IS_ERROR(c)){
1463 log_err("ERROR: isError() failed for U+%04x\n", c);
1464 }
1465 }else if(i >=6 && i<18){
1466 if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1467 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1468 }
1469 if(!UTF_IS_VALID(c)){
1470 log_err("ERROR: isValid() failed for U+%04x\n", c);
1471 }
1472 if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
1473 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1474 }
1475 if(UTF_IS_ERROR(c)){
1476 log_err("ERROR: isError() failed for U+%04x\n", c);
1477 }
1478 }else if(i >=18 && i<20){
1479 if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1480 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1481 }
1482 if(UTF_IS_VALID(c)){
1483 log_err("ERROR: isValid() failed for U+%04x\n", c);
1484 }
1485 if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
1486 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1487 }
1488 if(!UTF_IS_ERROR(c)){
1489 log_err("ERROR: isError() failed for U+%04x\n", c);
1490 }
1491 }
1492 else if(i >=18 && i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0]))){
1493 if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1494 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1495 }
1496 if(UTF_IS_VALID(c)){
1497 log_err("ERROR: isValid() failed for U+%04x\n", c);
1498 }
1499 if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
1500 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1501 }
1502 if(!UTF_IS_ERROR(c)){
1503 log_err("ERROR: isError() failed for U+%04x\n", c);
1504 }
1505 }
1506 }
1507
374ca955
A
1508 if(
1509 !U_IS_BMP(0) || !U_IS_BMP(0x61) || !U_IS_BMP(0x20ac) ||
1510 !U_IS_BMP(0xd9da) || !U_IS_BMP(0xdfed) || !U_IS_BMP(0xffff) ||
1511 U_IS_BMP(U_SENTINEL) || U_IS_BMP(0x10000) || U_IS_BMP(0x50005) ||
1512 U_IS_BMP(0x10ffff) || U_IS_BMP(0x110000) || U_IS_BMP(0x7fffffff)
1513 ) {
1514 log_err("error with U_IS_BMP()\n");
1515 }
1516
1517 if(
1518 U_IS_SUPPLEMENTARY(0) || U_IS_SUPPLEMENTARY(0x61) || U_IS_SUPPLEMENTARY(0x20ac) ||
1519 U_IS_SUPPLEMENTARY(0xd9da) || U_IS_SUPPLEMENTARY(0xdfed) || U_IS_SUPPLEMENTARY(0xffff) ||
1520 U_IS_SUPPLEMENTARY(U_SENTINEL) || !U_IS_SUPPLEMENTARY(0x10000) || !U_IS_SUPPLEMENTARY(0x50005) ||
1521 !U_IS_SUPPLEMENTARY(0x10ffff) || U_IS_SUPPLEMENTARY(0x110000) || U_IS_SUPPLEMENTARY(0x7fffffff)
1522 ) {
1523 log_err("error with U_IS_SUPPLEMENTARY()\n");
1524 }
b75a7d8f
A
1525}
1526
1527static void TestCharLength()
1528{
1529 const int32_t codepoint[]={
1530 1, 0x0061,
1531 1, 0xe065,
1532 1, 0x20ac,
1533 2, 0x20402,
1534 2, 0x23456,
1535 2, 0x24506,
1536 2, 0x20402,
1537 2, 0x10402,
1538 1, 0xd7ff,
1539 1, 0xe000
1540 };
1541
1542 int32_t i;
1543 UBool multiple;
1544 for(i=0; i<(int32_t)(sizeof(codepoint)/sizeof(codepoint[0])); i=(int16_t)(i+2)){
1545 UChar32 c=codepoint[i+1];
1546 if(UTF_CHAR_LENGTH(c) != codepoint[i] || U16_LENGTH(c) != codepoint[i]){
4388f060 1547 log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], U16_LENGTH(c));
b75a7d8f
A
1548 }
1549 multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
1550 if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){
1551 log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c);
1552 }
1553 }
1554}
1555
1556/*internal functions ----*/
1557static int32_t MakeProp(char* str)
1558{
1559 int32_t result = 0;
1560 char* matchPosition =0;
1561
1562 matchPosition = strstr(tagStrings, str);
1563 if (matchPosition == 0)
1564 {
1565 log_err("unrecognized type letter ");
1566 log_err(str);
1567 }
374ca955
A
1568 else
1569 result = (int32_t)((matchPosition - tagStrings) / 2);
b75a7d8f
A
1570 return result;
1571}
1572
1573static int32_t MakeDir(char* str)
1574{
1575 int32_t pos = 0;
57a6839d 1576 for (pos = 0; pos < U_CHAR_DIRECTION_COUNT; pos++) {
b75a7d8f
A
1577 if (strcmp(str, dirStrings[pos]) == 0) {
1578 return pos;
1579 }
1580 }
1581 return -1;
1582}
1583
1584/* test u_charName() -------------------------------------------------------- */
1585
1586static const struct {
1587 uint32_t code;
729e4ab9 1588 const char *name, *oldName, *extName, *alias;
b75a7d8f
A
1589} names[]={
1590 {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"},
4388f060 1591 {0x01a2, "LATIN CAPITAL LETTER OI", "",
729e4ab9
A
1592 "LATIN CAPITAL LETTER OI",
1593 "LATIN CAPITAL LETTER GHA"},
4388f060 1594 {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "",
729e4ab9
A
1595 "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" },
1596 {0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "",
1597 "TIBETAN MARK BSKA- SHOG GI MGO RGYAN",
1598 "TIBETAN MARK BKA- SHOG GI MGO RGYAN"},
b75a7d8f
A
1599 {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" },
1600 {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" },
1601 {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" },
1602 {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" },
1603 {0xd800, "", "", "<lead surrogate-D800>" },
1604 {0xdc00, "", "", "<trail surrogate-DC00>" },
4388f060 1605 {0xff08, "FULLWIDTH LEFT PARENTHESIS", "", "FULLWIDTH LEFT PARENTHESIS" },
b75a7d8f
A
1606 {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" },
1607 {0xffff, "", "", "<noncharacter-FFFF>" },
729e4ab9
A
1608 {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "",
1609 "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS",
1610 "BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS"},
b75a7d8f
A
1611 {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" }
1612};
1613
1614static UBool
1615enumCharNamesFn(void *context,
1616 UChar32 code, UCharNameChoice nameChoice,
1617 const char *name, int32_t length) {
1618 int32_t *pCount=(int32_t *)context;
729e4ab9 1619 const char *expected;
b75a7d8f
A
1620 int i;
1621
1622 if(length<=0 || length!=(int32_t)strlen(name)) {
1623 /* should not be called with an empty string or invalid length */
1624 log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length);
1625 return TRUE;
1626 }
1627
1628 ++*pCount;
1629 for(i=0; i<sizeof(names)/sizeof(names[0]); ++i) {
1630 if(code==(UChar32)names[i].code) {
1631 switch (nameChoice) {
1632 case U_EXTENDED_CHAR_NAME:
1633 if(0!=strcmp(name, names[i].extName)) {
1634 log_err("u_enumCharName(0x%lx - Extended)=%s instead of %s\n", code, name, names[i].extName);
1635 }
1636 break;
1637 case U_UNICODE_CHAR_NAME:
1638 if(0!=strcmp(name, names[i].name)) {
1639 log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code, name, names[i].name);
1640 }
1641 break;
1642 case U_UNICODE_10_CHAR_NAME:
729e4ab9
A
1643 expected=names[i].oldName;
1644 if(expected[0]==0 || 0!=strcmp(name, expected)) {
1645 log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n", code, name, expected);
1646 }
1647 break;
1648 case U_CHAR_NAME_ALIAS:
1649 expected=names[i].alias;
1650 if(expected==NULL || expected[0]==0 || 0!=strcmp(name, expected)) {
1651 log_err("u_enumCharName(0x%lx - alias)=%s instead of %s\n", code, name, expected);
b75a7d8f
A
1652 }
1653 break;
1654 case U_CHAR_NAME_CHOICE_COUNT:
1655 break;
1656 }
1657 break;
1658 }
1659 }
1660 return TRUE;
1661}
1662
1663struct enumExtCharNamesContext {
1664 uint32_t length;
1665 int32_t last;
1666};
1667
1668static UBool
1669enumExtCharNamesFn(void *context,
1670 UChar32 code, UCharNameChoice nameChoice,
1671 const char *name, int32_t length) {
1672 struct enumExtCharNamesContext *ecncp = (struct enumExtCharNamesContext *) context;
1673
1674 if (ecncp->last != (int32_t) code - 1) {
1675 if (ecncp->last < 0) {
1676 log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1);
1677 } else {
1678 log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 - Ext)\n", code);
1679 }
1680 }
1681 ecncp->last = (int32_t) code;
1682
1683 if (!*name) {
1684 log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", code);
1685 }
1686
1687 return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length);
1688}
1689
1690/**
1691 * This can be made more efficient by moving it into putil.c and having
1692 * it directly access the ebcdic translation tables.
1693 * TODO: If we get this method in putil.c, then delete it from here.
1694 */
1695static UChar
1696u_charToUChar(char c) {
1697 UChar uc;
1698 u_charsToUChars(&c, &uc, 1);
1699 return uc;
1700}
1701
1702static void
1703TestCharNames() {
1704 static char name[80];
1705 UErrorCode errorCode=U_ZERO_ERROR;
1706 struct enumExtCharNamesContext extContext;
729e4ab9 1707 const char *expected;
b75a7d8f
A
1708 int32_t length;
1709 UChar32 c;
1710 int32_t i;
1711
1712 log_verbose("Testing uprv_getMaxCharNameLength()\n");
1713 length=uprv_getMaxCharNameLength();
1714 if(length==0) {
1715 /* no names data available */
1716 return;
1717 }
1718 if(length<83) { /* Unicode 3.2 max char name length */
1719 log_err("uprv_getMaxCharNameLength()=%d is too short");
1720 }
1721 /* ### TODO same tests for max ISO comment length as for max name length */
1722
1723 log_verbose("Testing u_charName()\n");
1724 for(i=0; i<(int32_t)(sizeof(names)/sizeof(names[0])); ++i) {
1725 /* modern Unicode character name */
1726 length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name), &errorCode);
1727 if(U_FAILURE(errorCode)) {
1728 log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(errorCode));
1729 return;
1730 }
1731 if(length<0 || 0!=strcmp(name, names[i].name) || length!=(uint16_t)strlen(name)) {
1732 log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n", names[i].code, name, length, names[i].name);
1733 }
1734
1735 /* find the modern name */
1736 if (*names[i].name) {
1737 c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode);
1738 if(U_FAILURE(errorCode)) {
1739 log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorName(errorCode));
1740 return;
1741 }
1742 if(c!=(UChar32)names[i].code) {
1743 log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", names[i].name, c, names[i].code);
1744 }
1745 }
1746
1747 /* Unicode 1.0 character name */
1748 length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(name), &errorCode);
1749 if(U_FAILURE(errorCode)) {
1750 log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_errorName(errorCode));
1751 return;
1752 }
1753 if(length<0 || (length>0 && 0!=strcmp(name, names[i].oldName)) || length!=(uint16_t)strlen(name)) {
1754 log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothing or %s\n", names[i].code, name, length, names[i].oldName);
1755 }
1756
1757 /* find the Unicode 1.0 name if it is stored (length>0 means that we could read it) */
1758 if(names[i].oldName[0]!=0 /* && length>0 */) {
1759 c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCode);
1760 if(U_FAILURE(errorCode)) {
1761 log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName, u_errorName(errorCode));
1762 return;
1763 }
1764 if(c!=(UChar32)names[i].code) {
1765 log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n", names[i].oldName, c, names[i].code);
1766 }
1767 }
729e4ab9
A
1768
1769 /* Unicode character name alias */
1770 length=u_charName(names[i].code, U_CHAR_NAME_ALIAS, name, sizeof(name), &errorCode);
1771 if(U_FAILURE(errorCode)) {
1772 log_err("u_charName(0x%lx - alias) error %s\n", names[i].code, u_errorName(errorCode));
1773 return;
1774 }
1775 expected=names[i].alias;
1776 if(expected==NULL) {
1777 expected="";
1778 }
1779 if(length<0 || (length>0 && 0!=strcmp(name, expected)) || length!=(uint16_t)strlen(name)) {
1780 log_err("u_charName(0x%lx - alias) gets %s length %ld instead of nothing or %s\n",
1781 names[i].code, name, length, expected);
1782 }
1783
1784 /* find the Unicode character name alias if it is stored (length>0 means that we could read it) */
1785 if(expected[0]!=0 /* && length>0 */) {
1786 c=u_charFromName(U_CHAR_NAME_ALIAS, expected, &errorCode);
1787 if(U_FAILURE(errorCode)) {
1788 log_err("u_charFromName(%s - alias) error %s\n",
1789 expected, u_errorName(errorCode));
1790 return;
1791 }
1792 if(c!=(UChar32)names[i].code) {
1793 log_err("u_charFromName(%s - alias) gets 0x%lx instead of 0x%lx\n",
1794 expected, c, names[i].code);
1795 }
1796 }
b75a7d8f
A
1797 }
1798
1799 /* test u_enumCharNames() */
1800 length=0;
1801 errorCode=U_ZERO_ERROR;
1802 u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &length, U_UNICODE_CHAR_NAME, &errorCode);
1803 if(U_FAILURE(errorCode) || length<94140) {
1804 log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length);
1805 }
1806
1807 extContext.length = 0;
1808 extContext.last = -1;
1809 errorCode=U_ZERO_ERROR;
1810 u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &extContext, U_EXTENDED_CHAR_NAME, &errorCode);
1811 if(U_FAILURE(errorCode) || extContext.length<UCHAR_MAX_VALUE + 1) {
1812 log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.length);
1813 }
1814
1815 /* test that u_charFromName() uppercases the input name, i.e., works with mixed-case names (new in 2.0) */
1816 if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorCode)) {
1817 log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") did not find U+0061 (%s)\n", u_errorName(errorCode));
1818 }
1819
1820 /* Test getCharNameCharacters */
729e4ab9 1821 if(!getTestOption(QUICK_OPTION)) {
b75a7d8f
A
1822 enum { BUFSIZE = 256 };
1823 UErrorCode ec = U_ZERO_ERROR;
1824 char buf[BUFSIZE];
1825 int32_t maxLength;
1826 UChar32 cp;
1827 UChar pat[BUFSIZE], dumbPat[BUFSIZE];
1828 int32_t l1, l2;
1829 UBool map[256];
1830 UBool ok;
1831
1832 USet* set = uset_open(1, 0); /* empty set */
1833 USet* dumb = uset_open(1, 0); /* empty set */
1834
1835 /*
1836 * uprv_getCharNameCharacters() will likely return more lowercase
1837 * letters than actual character names contain because
1838 * it includes all the characters in lowercased names of
1839 * general categories, for the full possible set of extended names.
1840 */
374ca955
A
1841 {
1842 USetAdder sa={
1843 NULL,
1844 uset_add,
1845 uset_addRange,
73c04bcf
A
1846 uset_addString,
1847 NULL /* don't need remove() */
374ca955
A
1848 };
1849 sa.set=set;
1850 uprv_getCharNameCharacters(&sa);
1851 }
b75a7d8f
A
1852
1853 /* build set the dumb (but sure-fire) way */
374ca955 1854 for (i=0; i<256; ++i) {
b75a7d8f 1855 map[i] = FALSE;
374ca955 1856 }
b75a7d8f
A
1857
1858 maxLength=0;
1859 for (cp=0; cp<0x110000; ++cp) {
1860 int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME,
1861 buf, BUFSIZE, &ec);
1862 if (U_FAILURE(ec)) {
1863 log_err("FAIL: u_charName failed when it shouldn't\n");
1864 uset_close(set);
1865 uset_close(dumb);
1866 return;
1867 }
1868 if(len>maxLength) {
1869 maxLength=len;
1870 }
1871
1872 for (i=0; i<len; ++i) {
1873 if (!map[(uint8_t) buf[i]]) {
1874 uset_add(dumb, (UChar32)u_charToUChar(buf[i]));
1875 map[(uint8_t) buf[i]] = TRUE;
1876 }
1877 }
374ca955
A
1878
1879 /* test for leading/trailing whitespace */
1880 if(buf[0]==' ' || buf[0]=='\t' || buf[len-1]==' ' || buf[len-1]=='\t') {
1881 log_err("u_charName(U+%04x) returns a name with leading or trailing whitespace\n", cp);
1882 }
1883 }
1884
1885 if(map[(uint8_t)'\t']) {
1886 log_err("u_charName() returned a name with a TAB for some code point\n", cp);
b75a7d8f
A
1887 }
1888
1889 length=uprv_getMaxCharNameLength();
1890 if(length!=maxLength) {
1891 log_err("uprv_getMaxCharNameLength()=%d differs from the maximum length %d of all extended names\n",
1892 length, maxLength);
1893 }
1894
1895 /* compare the sets. Where is my uset_equals?!! */
1896 ok=TRUE;
1897 for(i=0; i<256; ++i) {
1898 if(uset_contains(set, i)!=uset_contains(dumb, i)) {
1899 if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !uset_contains(dumb, i)) {
1900 /* ignore lowercase a-z that are in set but not in dumb */
1901 ok=TRUE;
1902 } else {
1903 ok=FALSE;
1904 break;
1905 }
1906 }
1907 }
1908
1909 l1 = uset_toPattern(set, pat, BUFSIZE, TRUE, &ec);
1910 l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, TRUE, &ec);
1911 if (U_FAILURE(ec)) {
1912 log_err("FAIL: uset_toPattern failed when it shouldn't\n");
1913 uset_close(set);
1914 uset_close(dumb);
1915 return;
1916 }
1917
1918 if (l1 >= BUFSIZE) {
1919 l1 = BUFSIZE-1;
1920 pat[l1] = 0;
1921 }
1922 if (l2 >= BUFSIZE) {
1923 l2 = BUFSIZE-1;
1924 dumbPat[l2] = 0;
1925 }
1926
1927 if (!ok) {
b75a7d8f 1928 log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n",
374ca955 1929 aescstrdup(pat, l1), aescstrdup(dumbPat, l2));
729e4ab9 1930 } else if(getTestOption(VERBOSITY_OPTION)) {
374ca955 1931 log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescstrdup(pat, l1));
b75a7d8f
A
1932 }
1933
1934 uset_close(set);
1935 uset_close(dumb);
1936 }
1937
1938 /* ### TODO: test error cases and other interesting things */
1939}
1940
1941/* test u_isMirrored() and u_charMirror() ----------------------------------- */
1942
1943static void
1944TestMirroring() {
73c04bcf
A
1945 USet *set;
1946 UErrorCode errorCode;
1947
1948 UChar32 start, end, c2, c3;
1949 int32_t i;
1950
1951 U_STRING_DECL(mirroredPattern, "[:Bidi_Mirrored:]", 17);
1952
1953 U_STRING_INIT(mirroredPattern, "[:Bidi_Mirrored:]", 17);
1954
b75a7d8f
A
1955 log_verbose("Testing u_isMirrored()\n");
1956 if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_isMirrored(0x232a) &&
1957 !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !u_isMirrored(0x3400)
1958 )
1959 ) {
1960 log_err("u_isMirrored() does not work correctly\n");
1961 }
1962
1963 log_verbose("Testing u_charMirror()\n");
1964 if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 &&
73c04bcf 1965 u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirror(0x29F5)==0x2215 && /* large delta between the code points */
46f4442e
A
1966 u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab &&
1967 /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
1968 u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charMirror(0x301d)==0x301d
b75a7d8f
A
1969 )
1970 ) {
1971 log_err("u_charMirror() does not work correctly\n");
1972 }
73c04bcf
A
1973
1974 /* verify that Bidi_Mirroring_Glyph roundtrips */
1975 errorCode=U_ZERO_ERROR;
1976 set=uset_openPattern(mirroredPattern, 17, &errorCode);
1977
1978 if (U_FAILURE(errorCode)) {
729e4ab9 1979 log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!\n");
73c04bcf
A
1980 } else {
1981 for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i) {
1982 do {
1983 c2=u_charMirror(start);
1984 c3=u_charMirror(c2);
1985 if(c3!=start) {
1986 log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx->U+%04lx\n", (long)start, (long)c2, (long)c3);
1987 }
57a6839d
A
1988 c3=u_getBidiPairedBracket(start);
1989 if(u_getIntPropertyValue(start, UCHAR_BIDI_PAIRED_BRACKET_TYPE)==U_BPT_NONE) {
1990 if(c3!=start) {
1991 log_err("u_getBidiPairedBracket(U+%04lx) != self for bpt(c)==None\n",
1992 (long)start);
1993 }
1994 } else {
1995 if(c3!=c2) {
1996 log_err("u_getBidiPairedBracket(U+%04lx) != U+%04lx = bmg(c)'\n",
1997 (long)start, (long)c2);
1998 }
1999 }
73c04bcf
A
2000 } while(++start<=end);
2001 }
2002 }
2003
2004 uset_close(set);
b75a7d8f
A
2005}
2006
2007
2008struct RunTestData
2009{
2010 const char *runText;
2011 UScriptCode runCode;
2012};
2013
2014typedef struct RunTestData RunTestData;
2015
2016static void
2017CheckScriptRuns(UScriptRun *scriptRun, int32_t *runStarts, const RunTestData *testData, int32_t nRuns,
2018 const char *prefix)
2019{
2020 int32_t run, runStart, runLimit;
2021 UScriptCode runCode;
2022
2023 /* iterate over all the runs */
2024 run = 0;
2025 while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) {
2026 if (runStart != runStarts[run]) {
2027 log_err("%s: incorrect start offset for run %d: expected %d, got %d\n",
2028 prefix, run, runStarts[run], runStart);
2029 }
2030
2031 if (runLimit != runStarts[run + 1]) {
2032 log_err("%s: incorrect limit offset for run %d: expected %d, got %d\n",
2033 prefix, run, runStarts[run + 1], runLimit);
2034 }
2035
2036 if (runCode != testData[run].runCode) {
2037 log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\"\n",
2038 prefix, run, uscript_getName(testData[run].runCode), uscript_getName(runCode));
2039 }
2040
2041 run += 1;
2042
2043 /* stop when we've seen all the runs we expect to see */
2044 if (run >= nRuns) {
2045 break;
2046 }
2047 }
2048
2049 /* Complain if we didn't see then number of runs we expected */
2050 if (run != nRuns) {
2051 log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, run, nRuns);
2052 }
2053}
2054
2055static void
2056TestUScriptRunAPI()
2057{
374ca955 2058 static const RunTestData testData1[] = {
b75a7d8f
A
2059 {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCRIPT_DEVANAGARI},
2060 {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARABIC},
2061 {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYRILLIC},
2062 {"English (", USCRIPT_LATIN},
2063 {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI},
2064 {") ", USCRIPT_LATIN},
2065 {"\\u6F22\\u5B75", USCRIPT_HAN},
2066 {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA},
2067 {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA},
2068 {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET}
2069 };
374ca955
A
2070
2071 static const RunTestData testData2[] = {
2072 {"((((((((((abc))))))))))", USCRIPT_LATIN}
2073 };
2074
2075 static const struct {
2076 const RunTestData *testData;
2077 int32_t nRuns;
2078 } testDataEntries[] = {
2079 {testData1, LENGTHOF(testData1)},
2080 {testData2, LENGTHOF(testData2)}
2081 };
2082
2083 static const int32_t nTestEntries = LENGTHOF(testDataEntries);
2084 int32_t testEntry;
2085
2086 for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) {
2087 UChar testString[1024];
2088 int32_t runStarts[256];
2089 int32_t nTestRuns = testDataEntries[testEntry].nRuns;
2090 const RunTestData *testData = testDataEntries[testEntry].testData;
2091
2092 int32_t run, stringLimit;
2093 UScriptRun *scriptRun = NULL;
2094 UErrorCode err;
2095
2096 /*
2097 * Fill in the test string and the runStarts array.
2098 */
2099 stringLimit = 0;
2100 for (run = 0; run < nTestRuns; run += 1) {
2101 runStarts[run] = stringLimit;
2102 stringLimit += u_unescape(testData[run].runText, &testString[stringLimit], 1024 - stringLimit);
2103 /*stringLimit -= 1;*/
2104 }
2105
2106 /* The limit of the last run */
2107 runStarts[nTestRuns] = stringLimit;
2108
2109 /*
2110 * Make sure that calling uscript_OpenRun with a NULL text pointer
2111 * and a non-zero text length returns the correct error.
2112 */
2113 err = U_ZERO_ERROR;
2114 scriptRun = uscript_openRun(NULL, stringLimit, &err);
2115
2116 if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2117 log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2118 }
2119
2120 if (scriptRun != NULL) {
2121 log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NULL result.\n");
2122 uscript_closeRun(scriptRun);
2123 }
2124
2125 /*
2126 * Make sure that calling uscript_OpenRun with a non-NULL text pointer
2127 * and a zero text length returns the correct error.
2128 */
2129 err = U_ZERO_ERROR;
2130 scriptRun = uscript_openRun(testString, 0, &err);
2131
2132 if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2133 log_err("uscript_openRun(testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2134 }
2135
2136 if (scriptRun != NULL) {
2137 log_err("uscript_openRun(testString, 0, &err) returned a non-NULL result.\n");
2138 uscript_closeRun(scriptRun);
2139 }
2140
2141 /*
2142 * Make sure that calling uscript_openRun with a NULL text pointer
2143 * and a zero text length doesn't return an error.
2144 */
2145 err = U_ZERO_ERROR;
2146 scriptRun = uscript_openRun(NULL, 0, &err);
2147
2148 if (U_FAILURE(err)) {
2149 log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_errorName(err));
2150 }
2151
2152 /* Make sure that the empty iterator doesn't find any runs */
2153 if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) {
2154 log_err("uscript_nextRun(...) returned TRUE for an empty iterator.\n");
2155 }
2156
2157 /*
2158 * Make sure that calling uscript_setRunText with a NULL text pointer
2159 * and a non-zero text length returns the correct error.
2160 */
2161 err = U_ZERO_ERROR;
2162 uscript_setRunText(scriptRun, NULL, stringLimit, &err);
2163
2164 if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2165 log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2166 }
2167
2168 /*
2169 * Make sure that calling uscript_OpenRun with a non-NULL text pointer
2170 * and a zero text length returns the correct error.
2171 */
2172 err = U_ZERO_ERROR;
2173 uscript_setRunText(scriptRun, testString, 0, &err);
2174
2175 if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2176 log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2177 }
2178
2179 /*
2180 * Now call uscript_setRunText on the empty iterator
2181 * and make sure that it works.
2182 */
2183 err = U_ZERO_ERROR;
2184 uscript_setRunText(scriptRun, testString, stringLimit, &err);
2185
2186 if (U_FAILURE(err)) {
2187 log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(err));
2188 } else {
2189 CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_setRunText");
2190 }
2191
b75a7d8f 2192 uscript_closeRun(scriptRun);
374ca955
A
2193
2194 /*
2195 * Now open an interator over the testString
2196 * using uscript_openRun and make sure that it works
2197 */
2198 scriptRun = uscript_openRun(testString, stringLimit, &err);
2199
2200 if (U_FAILURE(err)) {
2201 log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err));
2202 } else {
2203 CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_openRun");
2204 }
2205
2206 /* Now reset the iterator, and make sure
2207 * that it still works.
2208 */
2209 uscript_resetRun(scriptRun);
2210
2211 CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_resetRun");
2212
2213 /* Close the iterator */
b75a7d8f
A
2214 uscript_closeRun(scriptRun);
2215 }
b75a7d8f
A
2216}
2217
2218/* test additional, non-core properties */
2219static void
2220TestAdditionalProperties() {
2221 /* test data for u_charAge() */
2222 static const struct {
2223 UChar32 c;
2224 UVersionInfo version;
2225 } charAges[]={
2226 {0x41, { 1, 1, 0, 0 }},
2227 {0xffff, { 1, 1, 0, 0 }},
2228 {0x20ab, { 2, 0, 0, 0 }},
2229 {0x2fffe, { 2, 0, 0, 0 }},
2230 {0x20ac, { 2, 1, 0, 0 }},
2231 {0xfb1d, { 3, 0, 0, 0 }},
2232 {0x3f4, { 3, 1, 0, 0 }},
2233 {0x10300, { 3, 1, 0, 0 }},
2234 {0x220, { 3, 2, 0, 0 }},
2235 {0xff60, { 3, 2, 0, 0 }}
2236 };
2237
2238 /* test data for u_hasBinaryProperty() */
46f4442e 2239 static const int32_t
b75a7d8f
A
2240 props[][3]={ /* code point, property, value */
2241 { 0x0627, UCHAR_ALPHABETIC, TRUE },
2242 { 0x1034a, UCHAR_ALPHABETIC, TRUE },
2243 { 0x2028, UCHAR_ALPHABETIC, FALSE },
2244
2245 { 0x0066, UCHAR_ASCII_HEX_DIGIT, TRUE },
2246 { 0x0067, UCHAR_ASCII_HEX_DIGIT, FALSE },
2247
2248 { 0x202c, UCHAR_BIDI_CONTROL, TRUE },
2249 { 0x202f, UCHAR_BIDI_CONTROL, FALSE },
2250
2251 { 0x003c, UCHAR_BIDI_MIRRORED, TRUE },
2252 { 0x003d, UCHAR_BIDI_MIRRORED, FALSE },
2253
46f4442e
A
2254 /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
2255 { 0x2018, UCHAR_BIDI_MIRRORED, FALSE },
2256 { 0x201d, UCHAR_BIDI_MIRRORED, FALSE },
2257 { 0x201f, UCHAR_BIDI_MIRRORED, FALSE },
2258 { 0x301e, UCHAR_BIDI_MIRRORED, FALSE },
2259
b75a7d8f
A
2260 { 0x058a, UCHAR_DASH, TRUE },
2261 { 0x007e, UCHAR_DASH, FALSE },
2262
2263 { 0x0c4d, UCHAR_DIACRITIC, TRUE },
2264 { 0x3000, UCHAR_DIACRITIC, FALSE },
2265
2266 { 0x0e46, UCHAR_EXTENDER, TRUE },
2267 { 0x0020, UCHAR_EXTENDER, FALSE },
2268
2269#if !UCONFIG_NO_NORMALIZATION
2270 { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2271 { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2272 { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, FALSE },
374ca955
A
2273
2274 { 0x110a, UCHAR_NFD_INERT, TRUE }, /* Jamo L */
2275 { 0x0308, UCHAR_NFD_INERT, FALSE },
2276
2277 { 0x1164, UCHAR_NFKD_INERT, TRUE }, /* Jamo V */
2278 { 0x1d79d, UCHAR_NFKD_INERT, FALSE }, /* math compat version of xi */
2279
2280 { 0x0021, UCHAR_NFC_INERT, TRUE }, /* ! */
2281 { 0x0061, UCHAR_NFC_INERT, FALSE }, /* a */
2282 { 0x00e4, UCHAR_NFC_INERT, FALSE }, /* a-umlaut */
2283 { 0x0102, UCHAR_NFC_INERT, FALSE }, /* a-breve */
2284 { 0xac1c, UCHAR_NFC_INERT, FALSE }, /* Hangul LV */
2285 { 0xac1d, UCHAR_NFC_INERT, TRUE }, /* Hangul LVT */
2286
2287 { 0x1d79d, UCHAR_NFKC_INERT, FALSE }, /* math compat version of xi */
2288 { 0x2a6d6, UCHAR_NFKC_INERT, TRUE }, /* Han, last of CJK ext. B */
2289
2290 { 0x00e4, UCHAR_SEGMENT_STARTER, TRUE },
2291 { 0x0308, UCHAR_SEGMENT_STARTER, FALSE },
2292 { 0x110a, UCHAR_SEGMENT_STARTER, TRUE }, /* Jamo L */
2293 { 0x1164, UCHAR_SEGMENT_STARTER, FALSE },/* Jamo V */
2294 { 0xac1c, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LV */
2295 { 0xac1d, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LVT */
b75a7d8f
A
2296#endif
2297
2298 { 0x0044, UCHAR_HEX_DIGIT, TRUE },
2299 { 0xff46, UCHAR_HEX_DIGIT, TRUE },
2300 { 0x0047, UCHAR_HEX_DIGIT, FALSE },
2301
2302 { 0x30fb, UCHAR_HYPHEN, TRUE },
2303 { 0xfe58, UCHAR_HYPHEN, FALSE },
2304
2305 { 0x2172, UCHAR_ID_CONTINUE, TRUE },
2306 { 0x0307, UCHAR_ID_CONTINUE, TRUE },
2307 { 0x005c, UCHAR_ID_CONTINUE, FALSE },
2308
2309 { 0x2172, UCHAR_ID_START, TRUE },
2310 { 0x007a, UCHAR_ID_START, TRUE },
2311 { 0x0039, UCHAR_ID_START, FALSE },
2312
2313 { 0x4db5, UCHAR_IDEOGRAPHIC, TRUE },
2314 { 0x2f999, UCHAR_IDEOGRAPHIC, TRUE },
2315 { 0x2f99, UCHAR_IDEOGRAPHIC, FALSE },
2316
2317 { 0x200c, UCHAR_JOIN_CONTROL, TRUE },
2318 { 0x2029, UCHAR_JOIN_CONTROL, FALSE },
2319
2320 { 0x1d7bc, UCHAR_LOWERCASE, TRUE },
2321 { 0x0345, UCHAR_LOWERCASE, TRUE },
2322 { 0x0030, UCHAR_LOWERCASE, FALSE },
2323
2324 { 0x1d7a9, UCHAR_MATH, TRUE },
2325 { 0x2135, UCHAR_MATH, TRUE },
2326 { 0x0062, UCHAR_MATH, FALSE },
2327
2328 { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2329 { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2330 { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, FALSE },
2331
2332 { 0x0022, UCHAR_QUOTATION_MARK, TRUE },
2333 { 0xff62, UCHAR_QUOTATION_MARK, TRUE },
2334 { 0xd840, UCHAR_QUOTATION_MARK, FALSE },
2335
2336 { 0x061f, UCHAR_TERMINAL_PUNCTUATION, TRUE },
2337 { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, FALSE },
2338
2339 { 0x1d44a, UCHAR_UPPERCASE, TRUE },
2340 { 0x2162, UCHAR_UPPERCASE, TRUE },
2341 { 0x0345, UCHAR_UPPERCASE, FALSE },
2342
2343 { 0x0020, UCHAR_WHITE_SPACE, TRUE },
2344 { 0x202f, UCHAR_WHITE_SPACE, TRUE },
2345 { 0x3001, UCHAR_WHITE_SPACE, FALSE },
2346
2347 { 0x0711, UCHAR_XID_CONTINUE, TRUE },
2348 { 0x1d1aa, UCHAR_XID_CONTINUE, TRUE },
2349 { 0x007c, UCHAR_XID_CONTINUE, FALSE },
2350
2351 { 0x16ee, UCHAR_XID_START, TRUE },
2352 { 0x23456, UCHAR_XID_START, TRUE },
2353 { 0x1d1aa, UCHAR_XID_START, FALSE },
2354
2355 /*
2356 * Version break:
2357 * The following properties are only supported starting with the
2358 * Unicode version indicated in the second field.
2359 */
374ca955 2360 { -1, 0x320, 0 },
b75a7d8f
A
2361
2362 { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2363 { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2364 { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE },
2365
729e4ab9
A
2366 { 0x0149, UCHAR_DEPRECATED, TRUE }, /* changed in Unicode 5.2 */
2367 { 0x0341, UCHAR_DEPRECATED, FALSE }, /* changed in Unicode 5.2 */
46f4442e
A
2368 { 0xe0041, UCHAR_DEPRECATED, TRUE }, /* changed from Unicode 5 to 5.1 */
2369 { 0xe0100, UCHAR_DEPRECATED, FALSE },
b75a7d8f
A
2370
2371 { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE },
2372 { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE },
46f4442e
A
2373 { 0xff9d, UCHAR_GRAPHEME_BASE, TRUE },
2374 { 0xff9f, UCHAR_GRAPHEME_BASE, FALSE }, /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
b75a7d8f
A
2375
2376 { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE },
46f4442e
A
2377 { 0xff9d, UCHAR_GRAPHEME_EXTEND, FALSE },
2378 { 0xff9f, UCHAR_GRAPHEME_EXTEND, TRUE }, /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
b75a7d8f
A
2379 { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE },
2380
2381 { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE },
2382 { 0xff9f, UCHAR_GRAPHEME_LINK, FALSE },
2383
2384 { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, TRUE },
2385 { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, FALSE },
2386
2387 { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, TRUE },
2388 { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, FALSE },
2389
2390 { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, TRUE },
2391 { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, FALSE },
2392
2393 { 0x2e9b, UCHAR_RADICAL, TRUE },
2394 { 0x4e00, UCHAR_RADICAL, FALSE },
2395
2396 { 0x012f, UCHAR_SOFT_DOTTED, TRUE },
2397 { 0x0049, UCHAR_SOFT_DOTTED, FALSE },
2398
2399 { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE },
2400 { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE },
2401
73c04bcf 2402 { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */
374ca955
A
2403
2404 { 0x002e, UCHAR_S_TERM, TRUE },
2405 { 0x0061, UCHAR_S_TERM, FALSE },
2406
2407 { 0x180c, UCHAR_VARIATION_SELECTOR, TRUE },
2408 { 0xfe03, UCHAR_VARIATION_SELECTOR, TRUE },
2409 { 0xe01ef, UCHAR_VARIATION_SELECTOR, TRUE },
2410 { 0xe0200, UCHAR_VARIATION_SELECTOR, FALSE },
2411
b75a7d8f
A
2412 /* enum/integer type properties */
2413
2414 /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */
2415 /* test default Bidi classes for unassigned code points */
2416 { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
73c04bcf 2417 { 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
b75a7d8f 2418 { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
73c04bcf
A
2419 { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */
2420 { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */
4388f060 2421 { 0x089f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
b75a7d8f
A
2422 { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2423 { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2424 { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2425 { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2426 { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2427
46f4442e 2428 { 0x0605, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
b75a7d8f
A
2429 { 0x061c, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2430 { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2431 { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2432 { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2433 { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2434 { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2435 { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2436
2437 { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS },
2438 { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU },
2439 { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS },
2440 { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG },
374ca955 2441 { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU },
b75a7d8f
A
2442 { 0x1AFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2443 { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA },
2444 { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS },
374ca955 2445 { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
b75a7d8f 2446 { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
374ca955 2447 { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B },
b75a7d8f
A
2448
2449 /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */
2450 { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 },
2451
2452 { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK },
2453 { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT },
2454 { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2455 { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2456 { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2457 { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2458 { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL },
2459 { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT },
2460 { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2461
2462 { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2463 { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW },
2464 { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2465 { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH },
2466 { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2467 { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH },
2468 { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2469 { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2470 { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2471 { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2472 { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2473 { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2474 { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2475 { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */
2476 { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2477 { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2478 { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2479
2480 /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */
729e4ab9
A
2481 { 0xd7c7, UCHAR_GENERAL_CATEGORY, 0 },
2482 { 0xd7d7, UCHAR_GENERAL_CATEGORY, U_OTHER_LETTER }, /* changed in Unicode 5.2 */
b75a7d8f
A
2483
2484 { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2485 { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN },
2486 { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH },
2487 { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH },
2488 { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL },
b75a7d8f
A
2489
2490 { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING },
2491 { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2492 { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING },
2493 { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2494 { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING },
2495 { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2496 { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2497 { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2498
2499 /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */
2500 { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2501 { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2502 { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION },
2503 { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION },
2504 { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2505 { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
b75a7d8f
A
2506 { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2507 { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2508 { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2509 { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2510 { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2511 { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION },
2512 { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS },
2513 { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2514 { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2515
2516 /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */
2517
2518 /* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */
2519
729e4ab9 2520 { 0x10ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
b75a7d8f
A
2521 { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2522 { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2523 { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
729e4ab9
A
2524 { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* changed in Unicode 5.2 */
2525 { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* changed in Unicode 5.2 */
b75a7d8f
A
2526 { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2527
729e4ab9
A
2528 { 0xa95f, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2529 { 0xa960, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* changed in Unicode 5.2 */
2530 { 0xa97c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* changed in Unicode 5.2 */
2531 { 0xa97d, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2532
b75a7d8f
A
2533 { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2534 { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2535 { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2536 { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
729e4ab9
A
2537 { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* changed in Unicode 5.2 */
2538 { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* changed in Unicode 5.2 */
2539
2540 { 0xd7af, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2541 { 0xd7b0, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* changed in Unicode 5.2 */
2542 { 0xd7c6, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* changed in Unicode 5.2 */
2543 { 0xd7c7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
b75a7d8f
A
2544
2545 { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2546 { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2547 { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2548 { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
729e4ab9
A
2549 { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* changed in Unicode 5.2 */
2550 { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* changed in Unicode 5.2 */
2551 { 0x1200, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
b75a7d8f 2552
729e4ab9
A
2553 { 0xd7ca, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2554 { 0xd7cb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* changed in Unicode 5.2 */
2555 { 0xd7fb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* changed in Unicode 5.2 */
2556 { 0xd7fc, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
b75a7d8f
A
2557
2558 { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2559 { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2560 { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2561 { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2562
2563 { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2564 { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2565 { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2566 { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2567 { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2568
2569 { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2570
73c04bcf
A
2571 { -1, 0x410, 0 }, /* version break for Unicode 4.1 */
2572
2573 { 0x00d7, UCHAR_PATTERN_SYNTAX, TRUE },
2574 { 0xfe45, UCHAR_PATTERN_SYNTAX, TRUE },
2575 { 0x0061, UCHAR_PATTERN_SYNTAX, FALSE },
2576
2577 { 0x0020, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2578 { 0x0085, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2579 { 0x200f, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2580 { 0x00a0, UCHAR_PATTERN_WHITE_SPACE, FALSE },
2581 { 0x3000, UCHAR_PATTERN_WHITE_SPACE, FALSE },
2582
2583 { 0x1d200, UCHAR_BLOCK, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION },
2584 { 0x2c8e, UCHAR_BLOCK, UBLOCK_COPTIC },
2585 { 0xfe17, UCHAR_BLOCK, UBLOCK_VERTICAL_FORMS },
2586
2587 { 0x1a00, UCHAR_SCRIPT, USCRIPT_BUGINESE },
2588 { 0x2cea, UCHAR_SCRIPT, USCRIPT_COPTIC },
2589 { 0xa82b, UCHAR_SCRIPT, USCRIPT_SYLOTI_NAGRI },
2590 { 0x103d0, UCHAR_SCRIPT, USCRIPT_OLD_PERSIAN },
2591
2592 { 0xcc28, UCHAR_LINE_BREAK, U_LB_H2 },
2593 { 0xcc29, UCHAR_LINE_BREAK, U_LB_H3 },
2594 { 0xac03, UCHAR_LINE_BREAK, U_LB_H3 },
2595 { 0x115f, UCHAR_LINE_BREAK, U_LB_JL },
2596 { 0x11aa, UCHAR_LINE_BREAK, U_LB_JT },
2597 { 0x11a1, UCHAR_LINE_BREAK, U_LB_JV },
2598
2599 { 0xb2c9, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_LVT },
2600 { 0x036f, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_EXTEND },
2601 { 0x0000, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_CONTROL },
2602 { 0x1160, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_V },
2603
2604 { 0x05f4, UCHAR_WORD_BREAK, U_WB_MIDLETTER },
2605 { 0x4ef0, UCHAR_WORD_BREAK, U_WB_OTHER },
2606 { 0x19d9, UCHAR_WORD_BREAK, U_WB_NUMERIC },
2607 { 0x2044, UCHAR_WORD_BREAK, U_WB_MIDNUM },
2608
2609 { 0xfffd, UCHAR_SENTENCE_BREAK, U_SB_OTHER },
2610 { 0x1ffc, UCHAR_SENTENCE_BREAK, U_SB_UPPER },
2611 { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE },
2612 { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP },
2613
729e4ab9
A
2614 { -1, 0x520, 0 }, /* version break for Unicode 5.2 */
2615
4388f060
A
2616 /* unassigned code points in new default Bidi R blocks */
2617 { 0x1ede4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2618 { 0x1efe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2619
729e4ab9
A
2620 /* test some script codes >127 */
2621 { 0xa6e6, UCHAR_SCRIPT, USCRIPT_BAMUM },
2622 { 0xa4d0, UCHAR_SCRIPT, USCRIPT_LISU },
2623 { 0x10a7f, UCHAR_SCRIPT, USCRIPT_OLD_SOUTH_ARABIAN },
2624
2625 { -1, 0x600, 0 }, /* version break for Unicode 6.0 */
2626
2627 /* value changed in Unicode 6.0 */
2628 { 0x06C3, UCHAR_JOINING_GROUP, U_JG_TEH_MARBUTA_GOAL },
2629
4388f060
A
2630 { -1, 0x610, 0 }, /* version break for Unicode 6.1 */
2631
2632 /* unassigned code points in new/changed default Bidi AL blocks */
2633 { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2634 { 0x1eee4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2635
57a6839d
A
2636 { -1, 0x630, 0 }, /* version break for Unicode 6.3 */
2637
2638 /* unassigned code points in the currency symbols block now default to ET */
2639 { 0x20C0, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR },
2640 { 0x20CF, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR },
2641
2642 /* new property in Unicode 6.3 */
2643 { 0x0027, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE },
2644 { 0x0028, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN },
2645 { 0x0029, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE },
2646 { 0xFF5C, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE },
2647 { 0xFF5B, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN },
2648 { 0xFF5D, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE },
2649
b75a7d8f
A
2650 /* undefined UProperty values */
2651 { 0x61, 0x4a7, 0 },
2652 { 0x234bc, 0x15ed, 0 }
2653 };
2654
2655 UVersionInfo version;
2656 UChar32 c;
2657 int32_t i, result, uVersion;
2658 UProperty which;
2659
2660 /* what is our Unicode version? */
2661 u_getUnicodeVersion(version);
374ca955 2662 uVersion=((int32_t)version[0]<<8)|(version[1]<<4)|version[2]; /* major/minor/update version numbers */
b75a7d8f
A
2663
2664 u_charAge(0x20, version);
2665 if(version[0]==0) {
2666 /* no additional properties available */
2667 log_err("TestAdditionalProperties: no additional properties available, not tested\n");
2668 return;
2669 }
2670
2671 /* test u_charAge() */
2672 for(i=0; i<sizeof(charAges)/sizeof(charAges[0]); ++i) {
2673 u_charAge(charAges[i].c, version);
2674 if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) {
2675 log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { %u, %u, %u, %u }\n",
2676 charAges[i].c,
2677 version[0], version[1], version[2], version[3],
2678 charAges[i].version[0], charAges[i].version[1], charAges[i].version[2], charAges[i].version[3]);
2679 }
2680 }
2681
2682 if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 ||
2683 u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 ||
2684 u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 || /* j2478 */
2685 u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 || /*JB#2410*/
2686 u_getIntPropertyMinValue(0x2345)!=0
2687 ) {
2688 log_err("error: u_getIntPropertyMinValue() wrong\n");
2689 }
73c04bcf
A
2690 if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1) {
2691 log_err("error: u_getIntPropertyMaxValue(UCHAR_DASH) wrong\n");
2692 }
2693 if( u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1) {
2694 log_err("error: u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE) wrong\n");
2695 }
46f4442e 2696 if( u_getIntPropertyMaxValue((UProperty)(UCHAR_BINARY_LIMIT-1))!=1) {
73c04bcf
A
2697 log_err("error: u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1) wrong\n");
2698 }
2699 if( u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_COUNT-1 ) {
2700 log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS) wrong\n");
2701 }
2702 if( u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ) {
2703 log_err("error: u_getIntPropertyMaxValue(UCHAR_BLOCK) wrong\n");
2704 }
2705 if(u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1) {
2706 log_err("error: u_getIntPropertyMaxValue(UCHAR_LINE_BREAK) wrong\n");
2707 }
2708 if(u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1) {
2709 log_err("error: u_getIntPropertyMaxValue(UCHAR_SCRIPT) wrong\n");
2710 }
2711 if(u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE)!=(int32_t)U_NT_COUNT-1) {
2712 log_err("error: u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE) wrong\n");
2713 }
2714 if(u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY)!=(int32_t)U_CHAR_CATEGORY_COUNT-1) {
2715 log_err("error: u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY) wrong\n");
2716 }
2717 if(u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE)!=(int32_t)U_HST_COUNT-1) {
2718 log_err("error: u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE) wrong\n");
2719 }
2720 if(u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK)!=(int32_t)U_GCB_COUNT-1) {
2721 log_err("error: u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK) wrong\n");
2722 }
2723 if(u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK)!=(int32_t)U_SB_COUNT-1) {
2724 log_err("error: u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK) wrong\n");
2725 }
2726 if(u_getIntPropertyMaxValue(UCHAR_WORD_BREAK)!=(int32_t)U_WB_COUNT-1) {
2727 log_err("error: u_getIntPropertyMaxValue(UCHAR_WORD_BREAK) wrong\n");
2728 }
57a6839d
A
2729 if(u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE)!=(int32_t)U_BPT_COUNT-1) {
2730 log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE) wrong\n");
2731 }
73c04bcf
A
2732 /*JB#2410*/
2733 if( u_getIntPropertyMaxValue(0x2345)!=-1) {
2734 log_err("error: u_getIntPropertyMaxValue(0x2345) wrong\n");
2735 }
2736 if( u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_COUNT - 1)) {
2737 log_err("error: u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) wrong\n");
2738 }
2739 if( u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) != (int32_t) (U_JG_COUNT -1)) {
2740 log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) wrong\n");
2741 }
2742 if( u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1)) {
2743 log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) wrong\n");
2744 }
2745 if( u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUNT -1)) {
2746 log_err("error: u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) wrong\n");
b75a7d8f
A
2747 }
2748
2749 /* test u_hasBinaryProperty() and u_getIntPropertyValue() */
2750 for(i=0; i<sizeof(props)/sizeof(props[0]); ++i) {
729e4ab9
A
2751 const char *whichName;
2752
b75a7d8f
A
2753 if(props[i][0]<0) {
2754 /* Unicode version break */
2755 if(uVersion<props[i][1]) {
2756 break; /* do not test properties that are not yet supported */
2757 } else {
2758 continue; /* skip this row */
2759 }
2760 }
2761
2762 c=(UChar32)props[i][0];
2763 which=(UProperty)props[i][1];
729e4ab9 2764 whichName=u_getPropertyName(which, U_LONG_PROPERTY_NAME);
b75a7d8f
A
2765
2766 if(which<UCHAR_INT_START) {
2767 result=u_hasBinaryProperty(c, which);
2768 if(result!=props[i][2]) {
729e4ab9
A
2769 log_data_err("error: u_hasBinaryProperty(U+%04lx, %s)=%d is wrong (props[%d]) - (Are you missing data?)\n",
2770 c, whichName, result, i);
b75a7d8f
A
2771 }
2772 }
2773
2774 result=u_getIntPropertyValue(c, which);
2775 if(result!=props[i][2]) {
729e4ab9
A
2776 log_data_err("error: u_getIntPropertyValue(U+%04lx, %s)=%d is wrong, should be %d (props[%d]) - (Are you missing data?)\n",
2777 c, whichName, result, props[i][2], i);
b75a7d8f
A
2778 }
2779
2780 /* test separate functions, too */
2781 switch((UProperty)props[i][1]) {
2782 case UCHAR_ALPHABETIC:
2783 if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) {
2784 log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d])\n",
2785 props[i][0], result, i);
2786 }
2787 break;
2788 case UCHAR_LOWERCASE:
2789 if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2790 log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\n",
2791 props[i][0], result, i);
2792 }
2793 break;
2794 case UCHAR_UPPERCASE:
2795 if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2796 log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\n",
2797 props[i][0], result, i);
2798 }
2799 break;
2800 case UCHAR_WHITE_SPACE:
2801 if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) {
2802 log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d])\n",
2803 props[i][0], result, i);
2804 }
2805 break;
2806 default:
2807 break;
2808 }
2809 }
2810}
2811
2812static void
2813TestNumericProperties(void) {
2814 /* see UnicodeData.txt, DerivedNumericValues.txt */
2815 static const struct {
2816 UChar32 c;
2817 int32_t type;
2818 double numValue;
2819 } values[]={
2820 { 0x0F33, U_NT_NUMERIC, -1./2. },
2821 { 0x0C66, U_NT_DECIMAL, 0 },
2822 { 0x96f6, U_NT_NUMERIC, 0 },
729e4ab9
A
2823 { 0xa833, U_NT_NUMERIC, 1./16. },
2824 { 0x2152, U_NT_NUMERIC, 1./10. },
2825 { 0x2151, U_NT_NUMERIC, 1./9. },
2826 { 0x1245f, U_NT_NUMERIC, 1./8. },
2827 { 0x2150, U_NT_NUMERIC, 1./7. },
b75a7d8f 2828 { 0x2159, U_NT_NUMERIC, 1./6. },
729e4ab9
A
2829 { 0x09f6, U_NT_NUMERIC, 3./16. },
2830 { 0x2155, U_NT_NUMERIC, 1./5. },
b75a7d8f
A
2831 { 0x00BD, U_NT_NUMERIC, 1./2. },
2832 { 0x0031, U_NT_DECIMAL, 1. },
2833 { 0x4e00, U_NT_NUMERIC, 1. },
2834 { 0x58f1, U_NT_NUMERIC, 1. },
2835 { 0x10320, U_NT_NUMERIC, 1. },
2836 { 0x0F2B, U_NT_NUMERIC, 3./2. },
2837 { 0x00B2, U_NT_DIGIT, 2. },
2838 { 0x5f10, U_NT_NUMERIC, 2. },
2839 { 0x1813, U_NT_DECIMAL, 3. },
2840 { 0x5f0e, U_NT_NUMERIC, 3. },
2841 { 0x2173, U_NT_NUMERIC, 4. },
2842 { 0x8086, U_NT_NUMERIC, 4. },
2843 { 0x278E, U_NT_DIGIT, 5. },
2844 { 0x1D7F2, U_NT_DECIMAL, 6. },
2845 { 0x247A, U_NT_DIGIT, 7. },
2846 { 0x7396, U_NT_NUMERIC, 9. },
2847 { 0x1372, U_NT_NUMERIC, 10. },
2848 { 0x216B, U_NT_NUMERIC, 12. },
2849 { 0x16EE, U_NT_NUMERIC, 17. },
2850 { 0x249A, U_NT_NUMERIC, 19. },
2851 { 0x303A, U_NT_NUMERIC, 30. },
2852 { 0x5345, U_NT_NUMERIC, 30. },
2853 { 0x32B2, U_NT_NUMERIC, 37. },
2854 { 0x1375, U_NT_NUMERIC, 40. },
2855 { 0x10323, U_NT_NUMERIC, 50. },
2856 { 0x0BF1, U_NT_NUMERIC, 100. },
2857 { 0x964c, U_NT_NUMERIC, 100. },
2858 { 0x217E, U_NT_NUMERIC, 500. },
2859 { 0x2180, U_NT_NUMERIC, 1000. },
2860 { 0x4edf, U_NT_NUMERIC, 1000. },
2861 { 0x2181, U_NT_NUMERIC, 5000. },
2862 { 0x137C, U_NT_NUMERIC, 10000. },
2863 { 0x4e07, U_NT_NUMERIC, 10000. },
51004dcb
A
2864 { 0x12432, U_NT_NUMERIC, 216000. },
2865 { 0x12433, U_NT_NUMERIC, 432000. },
b75a7d8f
A
2866 { 0x4ebf, U_NT_NUMERIC, 100000000. },
2867 { 0x5146, U_NT_NUMERIC, 1000000000000. },
729e4ab9 2868 { -1, U_NT_NONE, U_NO_NUMERIC_VALUE },
b75a7d8f
A
2869 { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },
2870 { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },
2871 { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },
2872 { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE },
2873 { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE },
729e4ab9
A
2874 { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE },
2875 { 0x110000, U_NT_NONE, U_NO_NUMERIC_VALUE }
b75a7d8f
A
2876 };
2877
2878 double nv;
2879 UChar32 c;
2880 int32_t i, type;
2881
2882 for(i=0; i<LENGTHOF(values); ++i) {
2883 c=values[i].c;
2884 type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE);
2885 nv=u_getNumericValue(c);
2886
2887 if(type!=values[i].type) {
2888 log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, values[i].type);
2889 }
2890 if(0.000001 <= fabs(nv - values[i].numValue)) {
2891 log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, values[i].numValue);
2892 }
2893 }
2894}
2895
2896/**
2897 * Test the property names and property value names API.
2898 */
2899static void
2900TestPropertyNames(void) {
2901 int32_t p, v, choice=0, rev;
2902 UBool atLeastSomething = FALSE;
2903
2904 for (p=0; ; ++p) {
46f4442e 2905 UProperty propEnum = (UProperty)p;
b75a7d8f
A
2906 UBool sawProp = FALSE;
2907 if(p > 10 && !atLeastSomething) {
2908 log_data_err("Never got anything after 10 tries.\nYour data is probably fried. Quitting this test\n", p, choice);
2909 return;
2910 }
2911
2912 for (choice=0; ; ++choice) {
46f4442e 2913 const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice)choice);
b75a7d8f 2914 if (name) {
46f4442e
A
2915 if (!sawProp)
2916 log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff);
b75a7d8f
A
2917 log_verbose("%d=\"%s\"", choice, name);
2918 sawProp = TRUE;
2919 atLeastSomething = TRUE;
2920
2921 /* test reverse mapping */
2922 rev = u_getPropertyEnum(name);
2923 if (rev != p) {
2924 log_err("Property round-trip failure: %d -> %s -> %d\n",
2925 p, name, rev);
2926 }
2927 }
2928 if (!name && choice>0) break;
2929 }
2930 if (sawProp) {
2931 /* looks like a valid property; check the values */
46f4442e 2932 const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
b75a7d8f
A
2933 int32_t max = 0;
2934 if (p == UCHAR_CANONICAL_COMBINING_CLASS) {
2935 max = 255;
2936 } else if (p == UCHAR_GENERAL_CATEGORY_MASK) {
2937 /* it's far too slow to iterate all the way up to
2938 the real max, U_GC_P_MASK */
2939 max = U_GC_NL_MASK;
2940 } else if (p == UCHAR_BLOCK) {
2941 /* UBlockCodes, unlike other values, start at 1 */
2942 max = 1;
2943 }
2944 log_verbose("\n");
2945 for (v=-1; ; ++v) {
2946 UBool sawValue = FALSE;
2947 for (choice=0; ; ++choice) {
46f4442e 2948 const char* vname = u_getPropertyValueName(propEnum, v, (UPropertyNameChoice)choice);
b75a7d8f
A
2949 if (vname) {
2950 if (!sawValue) log_verbose(" %s, value %d:", pname, v);
2951 log_verbose("%d=\"%s\"", choice, vname);
2952 sawValue = TRUE;
2953
2954 /* test reverse mapping */
46f4442e 2955 rev = u_getPropertyValueEnum(propEnum, vname);
b75a7d8f
A
2956 if (rev != v) {
2957 log_err("Value round-trip failure (%s): %d -> %s -> %d\n",
2958 pname, v, vname, rev);
2959 }
2960 }
2961 if (!vname && choice>0) break;
2962 }
2963 if (sawValue) {
2964 log_verbose("\n");
2965 }
2966 if (!sawValue && v>=max) break;
2967 }
2968 }
2969 if (!sawProp) {
2970 if (p>=UCHAR_STRING_LIMIT) {
2971 break;
2972 } else if (p>=UCHAR_DOUBLE_LIMIT) {
2973 p = UCHAR_STRING_START - 1;
2974 } else if (p>=UCHAR_MASK_LIMIT) {
2975 p = UCHAR_DOUBLE_START - 1;
2976 } else if (p>=UCHAR_INT_LIMIT) {
2977 p = UCHAR_MASK_START - 1;
2978 } else if (p>=UCHAR_BINARY_LIMIT) {
2979 p = UCHAR_INT_START - 1;
2980 }
2981 }
2982 }
2983}
2984
2985/**
2986 * Test the property values API. See JB#2410.
2987 */
2988static void
2989TestPropertyValues(void) {
2990 int32_t i, p, min, max;
2991 UErrorCode ec;
2992
2993 /* Min should be 0 for everything. */
2994 /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */
2995 for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) {
46f4442e
A
2996 UProperty propEnum = (UProperty)p;
2997 min = u_getIntPropertyMinValue(propEnum);
b75a7d8f
A
2998 if (min != 0) {
2999 if (p == UCHAR_BLOCK) {
3000 /* This is okay...for now. See JB#2487.
3001 TODO Update this for JB#2487. */
3002 } else {
3003 const char* name;
46f4442e
A
3004 name = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
3005 if (name == NULL)
3006 name = "<ERROR>";
b75a7d8f
A
3007 log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n",
3008 name, min);
3009 }
3010 }
3011 }
3012
3013 if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 ||
3014 u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) {
3015 log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK) is wrong\n");
3016 }
3017
3018 /* Max should be -1 for invalid properties. */
46f4442e 3019 max = u_getIntPropertyMaxValue(UCHAR_INVALID_CODE);
b75a7d8f
A
3020 if (max != -1) {
3021 log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n",
3022 max);
3023 }
3024
73c04bcf 3025 /* Script should return USCRIPT_INVALID_CODE for an invalid code point. */
b75a7d8f
A
3026 for (i=0; i<2; ++i) {
3027 int32_t script;
3028 const char* desc;
3029 ec = U_ZERO_ERROR;
3030 switch (i) {
3031 case 0:
3032 script = uscript_getScript(-1, &ec);
3033 desc = "uscript_getScript(-1)";
3034 break;
3035 case 1:
3036 script = u_getIntPropertyValue(-1, UCHAR_SCRIPT);
3037 desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)";
3038 break;
3039 default:
3040 log_err("Internal test error. Too many scripts\n");
3041 return;
3042 }
3043 /* We don't explicitly test ec. It should be U_FAILURE but it
3044 isn't documented as such. */
73c04bcf 3045 if (script != (int32_t)USCRIPT_INVALID_CODE) {
b75a7d8f
A
3046 log_err("FAIL: %s = %d, exp. 0\n",
3047 desc, script);
3048 }
3049 }
3050}
3051
b75a7d8f
A
3052/* various tests for consistency of UCD data and API behavior */
3053static void
3054TestConsistency() {
b75a7d8f
A
3055 char buffer[300];
3056 USet *set1, *set2, *set3, *set4;
3057 UErrorCode errorCode;
3058
b75a7d8f
A
3059 UChar32 start, end;
3060 int32_t i, length;
3061
3062 U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10);
3063 U_STRING_DECL(dashPattern, "[:Dash:]", 8);
3064 U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13);
3065 U_STRING_DECL(formatPattern, "[:Cf:]", 6);
3066 U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14);
3067
73c04bcf
A
3068 U_STRING_DECL(mathBlocksPattern,
3069 "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
57a6839d 3070 214);
73c04bcf
A
3071 U_STRING_DECL(mathPattern, "[:Math:]", 8);
3072 U_STRING_DECL(unassignedPattern, "[:Cn:]", 6);
3073 U_STRING_DECL(unknownPattern, "[:sc=Unknown:]", 14);
3074 U_STRING_DECL(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
3075
b75a7d8f
A
3076 U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10);
3077 U_STRING_INIT(dashPattern, "[:Dash:]", 8);
3078 U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13);
3079 U_STRING_INIT(formatPattern, "[:Cf:]", 6);
3080 U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14);
3081
73c04bcf
A
3082 U_STRING_INIT(mathBlocksPattern,
3083 "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
57a6839d 3084 214);
73c04bcf
A
3085 U_STRING_INIT(mathPattern, "[:Math:]", 8);
3086 U_STRING_INIT(unassignedPattern, "[:Cn:]", 6);
3087 U_STRING_INIT(unknownPattern, "[:sc=Unknown:]", 14);
3088 U_STRING_INIT(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
3089
b75a7d8f
A
3090 /*
3091 * It used to be that UCD.html and its precursors said
3092 * "Those dashes used to mark connections between pieces of words,
3093 * plus the Katakana middle dot."
3094 *
3095 * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
3096 * but not from Hyphen.
729e4ab9 3097 * UTC 94 (2003mar) decided to leave it that way and to change UCD.html.
b75a7d8f
A
3098 * Therefore, do not show errors when testing the Hyphen property.
3099 */
3100 log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
3101 "known to the UTC and not considered errors.\n");
3102
3103 errorCode=U_ZERO_ERROR;
3104 set1=uset_openPattern(hyphenPattern, 10, &errorCode);
3105 set2=uset_openPattern(dashPattern, 8, &errorCode);
3106 if(U_SUCCESS(errorCode)) {
3107 /* remove the Katakana middle dot(s) from set1 */
3108 uset_remove(set1, 0x30fb);
3109 uset_remove(set1, 0xff65); /* halfwidth variant */
3110 showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", FALSE);
3111 } else {
729e4ab9 3112 log_data_err("error opening [:Hyphen:] or [:Dash:] - %s (Are you missing data?)\n", u_errorName(errorCode));
b75a7d8f
A
3113 }
3114
3115 /* check that Cf is neither Hyphen nor Dash nor Alphabetic */
3116 set3=uset_openPattern(formatPattern, 6, &errorCode);
3117 set4=uset_openPattern(alphaPattern, 14, &errorCode);
3118 if(U_SUCCESS(errorCode)) {
3119 showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", FALSE);
3120 showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", TRUE);
3121 showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", TRUE);
3122 } else {
729e4ab9 3123 log_data_err("error opening [:Cf:] or [:Alpbabetic:] - %s (Are you missing data?)\n", u_errorName(errorCode));
b75a7d8f
A
3124 }
3125
3126 uset_close(set1);
3127 uset_close(set2);
3128 uset_close(set3);
3129 uset_close(set4);
3130
3131 /*
3132 * Check that each lowercase character has "small" in its name
3133 * and not "capital".
3134 * There are some such characters, some of which seem odd.
3135 * Use the verbose flag to see these notices.
3136 */
3137 errorCode=U_ZERO_ERROR;
3138 set1=uset_openPattern(lowerPattern, 13, &errorCode);
3139 if(U_SUCCESS(errorCode)) {
3140 for(i=0;; ++i) {
3141 length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode);
3142 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
3143 break; /* done */
3144 }
3145 if(U_FAILURE(errorCode)) {
3146 log_err("error iterating over [:Lowercase:] at item %d: %s\n",
3147 i, u_errorName(errorCode));
3148 break;
3149 }
3150 if(length!=0) {
3151 break; /* done with code points, got a string or -1 */
3152 }
3153
3154 while(start<=end) {
3155 length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);
3156 if(U_FAILURE(errorCode)) {
4388f060 3157 log_data_err("error getting the name of U+%04x - %s\n", start, u_errorName(errorCode));
b75a7d8f 3158 errorCode=U_ZERO_ERROR;
b75a7d8f
A
3159 }
3160 if( (strstr(buffer, "SMALL")==NULL || strstr(buffer, "CAPITAL")!=NULL) &&
3161 strstr(buffer, "SMALL CAPITAL")==NULL
3162 ) {
3163 log_verbose("info: [:Lowercase:] contains U+%04x whose name does not suggest lowercase: %s\n", start, buffer);
3164 }
3165 ++start;
3166 }
3167 }
3168 } else {
729e4ab9 3169 log_data_err("error opening [:Lowercase:] - %s (Are you missing data?)\n", u_errorName(errorCode));
b75a7d8f 3170 }
b75a7d8f 3171 uset_close(set1);
73c04bcf
A
3172
3173 /* verify that all assigned characters in Math blocks are exactly Math characters */
3174 errorCode=U_ZERO_ERROR;
3175 set1=uset_openPattern(mathBlocksPattern, -1, &errorCode);
3176 set2=uset_openPattern(mathPattern, 8, &errorCode);
3177 set3=uset_openPattern(unassignedPattern, 6, &errorCode);
3178 if(U_SUCCESS(errorCode)) {
3179 uset_retainAll(set2, set1); /* [math blocks]&[:Math:] */
3180 uset_complement(set3); /* assigned characters */
3181 uset_retainAll(set1, set3); /* [math blocks]&[assigned] */
3182 compareUSets(set1, set2,
3183 "[assigned Math block chars]", "[math blocks]&[:Math:]",
3184 TRUE);
3185 } else {
729e4ab9 3186 log_data_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s (Are you missing data?)\n", u_errorName(errorCode));
73c04bcf
A
3187 }
3188 uset_close(set1);
3189 uset_close(set2);
3190 uset_close(set3);
3191
3192 /* new in Unicode 5.0: exactly all unassigned+PUA+surrogate code points have script=Unknown */
3193 errorCode=U_ZERO_ERROR;
3194 set1=uset_openPattern(unknownPattern, 14, &errorCode);
3195 set2=uset_openPattern(reservedPattern, 20, &errorCode);
3196 if(U_SUCCESS(errorCode)) {
3197 compareUSets(set1, set2,
3198 "[:sc=Unknown:]", "[[:Cn:][:Co:][:Cs:]]",
3199 TRUE);
3200 } else {
729e4ab9 3201 log_data_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s (Are you missing data?)\n", u_errorName(errorCode));
73c04bcf
A
3202 }
3203 uset_close(set1);
3204 uset_close(set2);
b75a7d8f 3205}
374ca955 3206
73c04bcf
A
3207/*
3208 * Starting with ICU4C 3.4, the core Unicode properties files
3209 * (uprops.icu, ucase.icu, ubidi.icu, unorm.icu)
3210 * are hardcoded in the common DLL and therefore not included
3211 * in the data package any more.
3212 * Test requiring these files are disabled so that
3213 * we need not jump through hoops (like adding snapshots of these files
3214 * to testdata).
3215 * See Jitterbug 4497.
3216 */
3217#define HARDCODED_DATA_4497 1
3218
374ca955
A
3219/* API coverage for ucase.c */
3220static void TestUCase() {
73c04bcf 3221#if !HARDCODED_DATA_4497
374ca955
A
3222 UDataMemory *pData;
3223 UCaseProps *csp;
73c04bcf 3224 const UCaseProps *ccsp;
374ca955
A
3225 UErrorCode errorCode;
3226
3227 /* coverage for ucase_openBinary() */
3228 errorCode=U_ZERO_ERROR;
3229 pData=udata_open(NULL, UCASE_DATA_TYPE, UCASE_DATA_NAME, &errorCode);
3230 if(U_FAILURE(errorCode)) {
3231 log_data_err("unable to open " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
3232 u_errorName(errorCode));
3233 return;
3234 }
3235
3236 csp=ucase_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
3237 if(U_FAILURE(errorCode)) {
3238 log_err("ucase_openBinary() fails for the contents of " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
3239 u_errorName(errorCode));
3240 udata_close(pData);
3241 return;
3242 }
3243
3244 if(UCASE_LOWER!=ucase_getType(csp, 0xdf)) { /* verify islower(sharp s) */
3245 log_err("ucase_openBinary() does not seem to return working UCaseProps\n");
3246 }
3247
3248 ucase_close(csp);
3249 udata_close(pData);
73c04bcf
A
3250
3251 /* coverage for ucase_getDummy() */
3252 errorCode=U_ZERO_ERROR;
3253 ccsp=ucase_getDummy(&errorCode);
3254 if(ucase_tolower(ccsp, 0x41)!=0x41) {
3255 log_err("ucase_tolower(dummy, A)!=A\n");
3256 }
46f4442e 3257#endif
73c04bcf
A
3258}
3259
3260/* API coverage for ubidi_props.c */
3261static void TestUBiDiProps() {
3262#if !HARDCODED_DATA_4497
3263 UDataMemory *pData;
3264 UBiDiProps *bdp;
73c04bcf
A
3265 const UBiDiProps *cbdp;
3266 UErrorCode errorCode;
3267
73c04bcf
A
3268 /* coverage for ubidi_openBinary() */
3269 errorCode=U_ZERO_ERROR;
3270 pData=udata_open(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &errorCode);
3271 if(U_FAILURE(errorCode)) {
3272 log_data_err("unable to open " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
3273 u_errorName(errorCode));
3274 return;
3275 }
3276
3277 bdp=ubidi_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
3278 if(U_FAILURE(errorCode)) {
3279 log_err("ubidi_openBinary() fails for the contents of " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
3280 u_errorName(errorCode));
3281 udata_close(pData);
3282 return;
3283 }
3284
3285 if(0x2215!=ubidi_getMirror(bdp, 0x29F5)) { /* verify some data */
3286 log_err("ubidi_openBinary() does not seem to return working UBiDiProps\n");
3287 }
3288
3289 ubidi_closeProps(bdp);
3290 udata_close(pData);
73c04bcf
A
3291
3292 /* coverage for ubidi_getDummy() */
3293 errorCode=U_ZERO_ERROR;
3294 cbdp=ubidi_getDummy(&errorCode);
3295 if(ubidi_getClass(cbdp, 0x20)!=0) {
3296 log_err("ubidi_getClass(dummy, space)!=0\n");
3297 }
46f4442e 3298#endif
73c04bcf
A
3299}
3300
3301/* test case folding, compare return values with CaseFolding.txt ------------ */
3302
3303/* bit set for which case foldings for a character have been tested already */
3304enum {
3305 CF_SIMPLE=1,
3306 CF_FULL=2,
3307 CF_TURKIC=4,
3308 CF_ALL=7
3309};
3310
3311static void
3312testFold(UChar32 c, int which,
3313 UChar32 simple, UChar32 turkic,
3314 const UChar *full, int32_t fullLength,
3315 const UChar *turkicFull, int32_t turkicFullLength) {
3316 UChar s[2], t[32];
3317 UChar32 c2;
3318 int32_t length, length2;
3319
3320 UErrorCode errorCode=U_ZERO_ERROR;
3321
3322 length=0;
3323 U16_APPEND_UNSAFE(s, length, c);
3324
3325 if((which&CF_SIMPLE)!=0 && (c2=u_foldCase(c, 0))!=simple) {
3326 log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3327 }
3328 if((which&CF_FULL)!=0) {
3329 length2=u_strFoldCase(t, LENGTHOF(t), s, length, 0, &errorCode);
3330 if(length2!=fullLength || 0!=u_memcmp(t, full, fullLength)) {
3331 log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n", (long)c);
3332 }
3333 }
3334 if((which&CF_TURKIC)!=0) {
3335 if((c2=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I))!=turkic) {
3336 log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3337 }
3338
3339 length2=u_strFoldCase(t, LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
3340 if(length2!=turkicFullLength || 0!=u_memcmp(t, turkicFull, length2)) {
3341 log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", (long)c);
3342 }
3343 }
3344}
3345
3346/* test that c case-folds to itself */
3347static void
3348testFoldToSelf(UChar32 c, int which) {
3349 UChar s[2];
3350 int32_t length;
3351
3352 length=0;
3353 U16_APPEND_UNSAFE(s, length, c);
3354 testFold(c, which, c, c, s, length, s, length);
3355}
3356
3357struct CaseFoldingData {
3358 USet *notSeen;
3359 UChar32 prev, prevSimple;
3360 UChar prevFull[32];
3361 int32_t prevFullLength;
3362 int which;
3363};
3364typedef struct CaseFoldingData CaseFoldingData;
3365
3366static void U_CALLCONV
3367caseFoldingLineFn(void *context,
3368 char *fields[][2], int32_t fieldCount,
3369 UErrorCode *pErrorCode) {
3370 CaseFoldingData *pData=(CaseFoldingData *)context;
3371 char *end;
3372 UChar full[32];
3373 UChar32 c, prev, simple;
3374 int32_t count;
3375 int which;
3376 char status;
3377
3378 /* get code point */
4388f060
A
3379 const char *s=u_skipWhitespace(fields[0][0]);
3380 if(0==strncmp(s, "0000..10FFFF", 12)) {
3381 /*
3382 * Ignore the line
3383 * # @missing: 0000..10FFFF; C; <code point>
3384 * because maps-to-self is already our default, and this line breaks this parser.
3385 */
3386 return;
3387 }
3388 c=(UChar32)strtoul(s, &end, 16);
73c04bcf
A
3389 end=(char *)u_skipWhitespace(end);
3390 if(end<=fields[0][0] || end!=fields[0][1]) {
3391 log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]);
3392 *pErrorCode=U_PARSE_ERROR;
3393 return;
3394 }
3395
3396 /* get the status of this mapping */
3397 status=*u_skipWhitespace(fields[1][0]);
3398 if(status!='C' && status!='S' && status!='F' && status!='T') {
3399 log_err("unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]);
3400 *pErrorCode=U_PARSE_ERROR;
3401 return;
3402 }
3403
3404 /* get the mapping */
3405 count=u_parseString(fields[2][0], full, 32, (uint32_t *)&simple, pErrorCode);
3406 if(U_FAILURE(*pErrorCode)) {
3407 log_err("error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);
3408 return;
3409 }
3410
3411 /* there is a simple mapping only if there is exactly one code point (count is in UChars) */
3412 if(count==0 || count>2 || (count==2 && U16_IS_SINGLE(full[1]))) {
3413 simple=c;
3414 }
3415
3416 if(c!=(prev=pData->prev)) {
3417 /*
3418 * Test remaining mappings for the previous code point.
3419 * If a turkic folding was not mentioned, then it should fold the same
3420 * as the regular simple case folding.
3421 */
4388f060 3422 UChar prevString[2];
73c04bcf
A
3423 int32_t length;
3424
3425 length=0;
4388f060 3426 U16_APPEND_UNSAFE(prevString, length, prev);
73c04bcf
A
3427 testFold(prev, (~pData->which)&CF_ALL,
3428 prev, pData->prevSimple,
4388f060 3429 prevString, length,
73c04bcf
A
3430 pData->prevFull, pData->prevFullLength);
3431 pData->prev=pData->prevSimple=c;
3432 length=0;
3433 U16_APPEND_UNSAFE(pData->prevFull, length, c);
3434 pData->prevFullLength=length;
3435 pData->which=0;
3436 }
3437
3438 /*
3439 * Turn the status into a bit set of case foldings to test.
3440 * Remember non-Turkic case foldings as defaults for Turkic mode.
3441 */
3442 switch(status) {
3443 case 'C':
3444 which=CF_SIMPLE|CF_FULL;
3445 pData->prevSimple=simple;
3446 u_memcpy(pData->prevFull, full, count);
3447 pData->prevFullLength=count;
3448 break;
3449 case 'S':
3450 which=CF_SIMPLE;
3451 pData->prevSimple=simple;
3452 break;
3453 case 'F':
3454 which=CF_FULL;
3455 u_memcpy(pData->prevFull, full, count);
3456 pData->prevFullLength=count;
3457 break;
3458 case 'T':
3459 which=CF_TURKIC;
3460 break;
3461 default:
3462 which=0;
3463 break; /* won't happen because of test above */
3464 }
3465
3466 testFold(c, which, simple, simple, full, count, full, count);
3467
3468 /* remember which case foldings of c have been tested */
3469 pData->which|=which;
3470
3471 /* remove c from the set of ones not mentioned in CaseFolding.txt */
3472 uset_remove(pData->notSeen, c);
3473}
3474
3475static void
3476TestCaseFolding() {
3477 CaseFoldingData data={ NULL };
3478 char *fields[3][2];
3479 UErrorCode errorCode;
3480
3481 static char *lastLine= (char *)"10FFFF; C; 10FFFF;";
3482
3483 errorCode=U_ZERO_ERROR;
3484 /* test BMP & plane 1 - nothing interesting above */
3485 data.notSeen=uset_open(0, 0x1ffff);
3486 data.prevFullLength=1; /* length of full case folding of U+0000 */
3487
3488 parseUCDFile("CaseFolding.txt", fields, 3, caseFoldingLineFn, &data, &errorCode);
3489 if(U_SUCCESS(errorCode)) {
3490 int32_t i, start, end;
3491
3492 /* add a pseudo-last line to finish testing of the actual last one */
3493 fields[0][0]=lastLine;
3494 fields[0][1]=lastLine+6;
3495 fields[1][0]=lastLine+7;
3496 fields[1][1]=lastLine+9;
3497 fields[2][0]=lastLine+10;
3498 fields[2][1]=lastLine+17;
3499 caseFoldingLineFn(&data, fields, 3, &errorCode);
3500
3501 /* verify that all code points that are not mentioned in CaseFolding.txt fold to themselves */
3502 for(i=0;
3503 0==uset_getItem(data.notSeen, i, &start, &end, NULL, 0, &errorCode) &&
3504 U_SUCCESS(errorCode);
3505 ++i
3506 ) {
3507 do {
3508 testFoldToSelf(start, CF_ALL);
3509 } while(++start<=end);
3510 }
3511 }
3512
3513 uset_close(data.notSeen);
374ca955 3514}