]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/cucdtst.c
ICU-6.2.22.tar.gz
[apple/icu.git] / icuSources / test / cintltst / cucdtst.c
CommitLineData
b75a7d8f
A
1/********************************************************************
2 * COPYRIGHT:
374ca955 3 * Copyright (c) 1997-2004, International Business Machines Corporation and
b75a7d8f
A
4 * others. All Rights Reserved.
5 ********************************************************************/
6/********************************************************************************
7*
8* File CUCDTST.C
9*
10* Modification History:
11* Name Description
12* Madhu Katragadda Ported for C API, added tests for string functions
13*********************************************************************************
14*/
15
16#include <string.h>
17#include <math.h>
18#include <stdlib.h>
19
20#include "unicode/utypes.h"
21#include "unicode/uchar.h"
22#include "unicode/putil.h"
23#include "unicode/ustring.h"
24#include "unicode/uloc.h"
25
26#include "cintltst.h"
374ca955 27#include "putilimp.h"
b75a7d8f 28#include "uparse.h"
374ca955 29#include "ucase.h"
b75a7d8f 30#include "uprops.h"
374ca955 31#include "uset_imp.h"
b75a7d8f
A
32#include "usc_impl.h"
33#include "unormimp.h"
374ca955
A
34#include "udatamem.h" /* for testing ucase_openBinary() */
35#include "cucdapi.h"
b75a7d8f 36
374ca955 37#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
b75a7d8f
A
38
39/* prototypes --------------------------------------------------------------- */
40
41static void TestUpperLower(void);
42static void TestLetterNumber(void);
43static void TestMisc(void);
44static void TestPOSIX(void);
45static void TestControlPrint(void);
46static void TestIdentifier(void);
47static void TestUnicodeData(void);
48static void TestCodeUnit(void);
49static void TestCodePoint(void);
50static void TestCharLength(void);
51static void TestCharNames(void);
52static void TestMirroring(void);
374ca955 53/* void TestUScriptCodeAPI(void);*/ /* defined in cucdapi.h */
b75a7d8f
A
54static void TestUScriptRunAPI(void);
55static void TestAdditionalProperties(void);
56static void TestNumericProperties(void);
57static void TestPropertyNames(void);
58static void TestPropertyValues(void);
59static void TestConsistency(void);
374ca955 60static void TestUCase(void);
b75a7d8f
A
61
62/* internal methods used */
63static int32_t MakeProp(char* str);
64static int32_t MakeDir(char* str);
65
b75a7d8f
A
66/* test data ---------------------------------------------------------------- */
67
68static const UChar LAST_CHAR_CODE_IN_FILE = 0xFFFD;
69static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
70static const int32_t tagValues[] =
71 {
72 /* Mn */ U_NON_SPACING_MARK,
73 /* Mc */ U_COMBINING_SPACING_MARK,
74 /* Me */ U_ENCLOSING_MARK,
75 /* Nd */ U_DECIMAL_DIGIT_NUMBER,
76 /* Nl */ U_LETTER_NUMBER,
77 /* No */ U_OTHER_NUMBER,
78 /* Zs */ U_SPACE_SEPARATOR,
79 /* Zl */ U_LINE_SEPARATOR,
80 /* Zp */ U_PARAGRAPH_SEPARATOR,
81 /* Cc */ U_CONTROL_CHAR,
82 /* Cf */ U_FORMAT_CHAR,
83 /* Cs */ U_SURROGATE,
84 /* Co */ U_PRIVATE_USE_CHAR,
85 /* Cn */ U_UNASSIGNED,
86 /* Lu */ U_UPPERCASE_LETTER,
87 /* Ll */ U_LOWERCASE_LETTER,
88 /* Lt */ U_TITLECASE_LETTER,
89 /* Lm */ U_MODIFIER_LETTER,
90 /* Lo */ U_OTHER_LETTER,
91 /* Pc */ U_CONNECTOR_PUNCTUATION,
92 /* Pd */ U_DASH_PUNCTUATION,
93 /* Ps */ U_START_PUNCTUATION,
94 /* Pe */ U_END_PUNCTUATION,
95 /* Po */ U_OTHER_PUNCTUATION,
96 /* Sm */ U_MATH_SYMBOL,
97 /* Sc */ U_CURRENCY_SYMBOL,
98 /* Sk */ U_MODIFIER_SYMBOL,
99 /* So */ U_OTHER_SYMBOL,
100 /* Pi */ U_INITIAL_PUNCTUATION,
101 /* Pf */ U_FINAL_PUNCTUATION
102 };
103
104static const char dirStrings[][5] = {
105 "L",
106 "R",
107 "EN",
108 "ES",
109 "ET",
110 "AN",
111 "CS",
112 "B",
113 "S",
114 "WS",
115 "ON",
116 "LRE",
117 "LRO",
118 "AL",
119 "RLE",
120 "RLO",
121 "PDF",
122 "NSM",
123 "BN"
124};
125
126void addUnicodeTest(TestNode** root);
127
128void addUnicodeTest(TestNode** root)
129{
130 addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData");
131 addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit");
132 addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint");
133 addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength");
134 addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalProperties");
135 addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties");
136 addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower");
137 addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber");
138 addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc");
139 addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX");
140 addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint");
141 addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier");
142 addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames");
143 addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring");
144 addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");
145 addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");
146 addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
147 addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
148 addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");
374ca955 149 addTest(root, &TestUCase, "tsutil/cucdtst/TestUCase");
b75a7d8f
A
150}
151
152/*==================================================== */
153/* test u_toupper() and u_tolower() */
154/*==================================================== */
155static void TestUpperLower()
156{
157 const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0x01c9, 0x000c, 0x0000};
158 const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01c9, 0x000c, 0x0000};
159 U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21);
160 U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
161 int32_t i;
162
163 U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21);
164 U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
165
166/*
167Checks LetterLike Symbols which were previously a source of confusion
168[Bertrand A. D. 02/04/98]
169*/
170 for (i=0x2100;i<0x2138;i++)
171 {
172 if(i!=0x2126 && i!=0x212a && i!=0x212b)
173 {
174 if (i != (int)u_tolower(i)) /* itself */
175 log_err("Failed case conversion with itself: U+%04x\n", i);
176 if (i != (int)u_toupper(i))
177 log_err("Failed case conversion with itself: U+%04x\n", i);
178 }
179 }
180
181 for(i=0; i < u_strlen(upper); i++){
182 if(u_tolower(upper[i]) != lower[i]){
183 log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i], lower[i], u_tolower(upper[i]));
184 }
185 }
186
187 log_verbose("testing upper lower\n");
188 for (i = 0; i < 21; i++) {
189
190 if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i]))
191 {
192 log_err("Failed isLowerCase test at %c\n", upperTest[i]);
193 }
194 else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i]))
195 {
196 log_err("Failed isUpperCase test at %c\n", lowerTest[i]);
197 }
198 else if (upperTest[i] != u_tolower(lowerTest[i]))
199 {
200 log_err("Failed case conversion from %c To %c :\n", lowerTest[i], upperTest[i]);
201 }
202 else if (lowerTest[i] != u_toupper(upperTest[i]))
203 {
204 log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerTest[i]);
205 }
206 else if (upperTest[i] != u_tolower(upperTest[i]))
207 {
208 log_err("Failed case conversion with itself: %c\n", upperTest[i]);
209 }
210 else if (lowerTest[i] != u_toupper(lowerTest[i]))
211 {
212 log_err("Failed case conversion with itself: %c\n", lowerTest[i]);
213 }
214 }
215 log_verbose("done testing upper lower\n");
216
217 log_verbose("testing u_istitle\n");
218 {
219 static const UChar expected[] = {
220 0x1F88,
221 0x1F89,
222 0x1F8A,
223 0x1F8B,
224 0x1F8C,
225 0x1F8D,
226 0x1F8E,
227 0x1F8F,
228 0x1F88,
229 0x1F89,
230 0x1F8A,
231 0x1F8B,
232 0x1F8C,
233 0x1F8D,
234 0x1F8E,
235 0x1F8F,
236 0x1F98,
237 0x1F99,
238 0x1F9A,
239 0x1F9B,
240 0x1F9C,
241 0x1F9D,
242 0x1F9E,
243 0x1F9F,
244 0x1F98,
245 0x1F99,
246 0x1F9A,
247 0x1F9B,
248 0x1F9C,
249 0x1F9D,
250 0x1F9E,
251 0x1F9F,
252 0x1FA8,
253 0x1FA9,
254 0x1FAA,
255 0x1FAB,
256 0x1FAC,
257 0x1FAD,
258 0x1FAE,
259 0x1FAF,
260 0x1FA8,
261 0x1FA9,
262 0x1FAA,
263 0x1FAB,
264 0x1FAC,
265 0x1FAD,
266 0x1FAE,
267 0x1FAF,
268 0x1FBC,
269 0x1FBC,
270 0x1FCC,
271 0x1FCC,
272 0x1FFC,
273 0x1FFC,
274 };
275 int32_t num = sizeof(expected)/sizeof(expected[0]);
276 for(i=0; i<num; i++){
277 if(!u_istitle(expected[i])){
278 log_err("u_istitle failed for 0x%4X. Expected TRUE, got FALSE\n",expected[i]);
279 }
280 }
281
282 }
283}
284
285/* compare two sets, which is not easy with the current (ICU 2.4) C API... */
286
287static UBool
288showADiffB(const USet *a, const USet *b,
289 const char *a_name, const char *b_name,
290 UBool expect, UBool diffIsError) {
291 int32_t i, start, end, length;
292 UBool equal;
293 UErrorCode errorCode;
294
295 errorCode=U_ZERO_ERROR;
296 equal=TRUE;
297 i=0;
298 for(;;) {
299 length=uset_getItem(a, i, &start, &end, NULL, 0, &errorCode);
300 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
301 return equal; /* done */
302 }
303 if(U_FAILURE(errorCode)) {
304 log_err("error comparing %s with %s at item %d: %s\n",
305 a_name, b_name, i, u_errorName(errorCode));
306 return FALSE;
307 }
308 if(length!=0) {
309 return equal; /* done with code points, got a string or -1 */
310 }
311
312 if(expect!=uset_containsRange(b, start, end)) {
313 equal=FALSE;
314 while(start<=end) {
315 if(expect!=uset_contains(b, start)) {
316 if(diffIsError) {
317 if(expect) {
318 log_err("error: %s contains U+%04x but %s does not\n", a_name, start, b_name);
319 } else {
320 log_err("error: %s and %s both contain U+%04x but should not intersect\n", a_name, b_name, start);
321 }
322 } else {
323 if(expect) {
324 log_verbose("info: %s contains U+%04x but %s does not\n", a_name, start, b_name);
325 } else {
326 log_verbose("info: %s and %s both contain U+%04x but should not intersect\n", a_name, b_name, start);
327 }
328 }
329 }
330 ++start;
331 }
332 }
333
334 ++i;
335 }
336}
337
338static UBool
339showAMinusB(const USet *a, const USet *b,
340 const char *a_name, const char *b_name,
341 UBool diffIsError) {
342 return showADiffB(a, b, a_name, b_name, TRUE, diffIsError);
343}
344
345static UBool
346showAIntersectB(const USet *a, const USet *b,
347 const char *a_name, const char *b_name,
348 UBool diffIsError) {
349 return showADiffB(a, b, a_name, b_name, FALSE, diffIsError);
350}
351
352static UBool
353compareUSets(const USet *a, const USet *b,
354 const char *a_name, const char *b_name,
355 UBool diffIsError) {
356 return
357 showAMinusB(a, b, a_name, b_name, diffIsError) &&
358 showAMinusB(b, a, b_name, a_name, diffIsError);
359}
360
361/* test isLetter(u_isapha()) and isDigit(u_isdigit()) */
362static void TestLetterNumber()
363{
364 UChar i = 0x0000;
365
366 log_verbose("Testing for isalpha\n");
367 for (i = 0x0041; i < 0x005B; i++) {
368 if (!u_isalpha(i))
369 {
370 log_err("Failed isLetter test at %.4X\n", i);
371 }
372 }
373 for (i = 0x0660; i < 0x066A; i++) {
374 if (u_isalpha(i))
375 {
376 log_err("Failed isLetter test with numbers at %.4X\n", i);
377 }
378 }
379
380 log_verbose("Testing for isdigit\n");
381 for (i = 0x0660; i < 0x066A; i++) {
382 if (!u_isdigit(i))
383 {
384 log_verbose("Failed isNumber test at %.4X\n", i);
385 }
386 }
387
388 log_verbose("Testing for isalnum\n");
389 for (i = 0x0041; i < 0x005B; i++) {
390 if (!u_isalnum(i))
391 {
392 log_err("Failed isAlNum test at %.4X\n", i);
393 }
394 }
395 for (i = 0x0660; i < 0x066A; i++) {
396 if (!u_isalnum(i))
397 {
398 log_err("Failed isAlNum test at %.4X\n", i);
399 }
400 }
401
402 {
403 /*
404 * The following checks work only starting from Unicode 4.0.
405 * Check the version number here.
406 */
374ca955 407 static UVersionInfo u401={ 4, 0, 1, 0 };
b75a7d8f
A
408 UVersionInfo version;
409 u_getUnicodeVersion(version);
374ca955 410 if(version[0]<4 || 0==memcmp(version, u401, 4)) {
b75a7d8f
A
411 return;
412 }
413 }
414
415 {
416 /*
417 * Sanity check:
418 * Verify that exactly the digit characters have decimal digit values.
419 * This assumption is used in the implementation of u_digit()
420 * (which checks nt=de)
421 * compared with the parallel java.lang.Character.digit()
422 * (which checks Nd).
423 *
424 * This was not true in Unicode 3.2 and earlier.
374ca955
A
425 * Unicode 4.0 fixed discrepancies.
426 * Unicode 4.0.1 re-introduced problems in this area due to an
427 * unintentionally incomplete last-minute change.
b75a7d8f
A
428 */
429 U_STRING_DECL(digitsPattern, "[:Nd:]", 6);
430 U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
431
432 USet *digits, *decimalValues;
433 UErrorCode errorCode;
434
435 U_STRING_INIT(digitsPattern, "[:Nd:]", 6);
436 U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
437 errorCode=U_ZERO_ERROR;
438 digits=uset_openPattern(digitsPattern, 6, &errorCode);
439 decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode);
440
441 if(U_SUCCESS(errorCode)) {
442 compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", TRUE);
443 }
444
445 uset_close(digits);
446 uset_close(decimalValues);
447 }
448}
449
450/* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_isspace()), isWhiteSpace(), u_CharDigitValue() */
451static void TestMisc()
452{
453 static const UChar sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
454 static const UChar sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
455 static const UChar sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6b };
456 static const UChar sampleDefined[] = {0x523E, 0x4f88, 0xfffd};
457 static const UChar sampleBase[] = {0x0061, 0x0031, 0x03d2};
458 static const UChar sampleNonBase[] = {0x002B, 0x0020, 0x203B};
459/* static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8, 0xFFF0};*/
460 static const UChar sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5};
461 static const UChar sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};
462 static const UChar sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};
463 static const UChar sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f};
464
465
466 static const int32_t sampleDigitValues[] = {0, 2, 3, 5};
467
468 uint32_t mask;
469
470 int32_t i;
471 char icuVersion[U_MAX_VERSION_STRING_LENGTH];
472 UVersionInfo realVersion;
473
474 memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH);
475
476 log_verbose("Testing for isspace and nonspaces\n");
477 for (i = 0; i < 5; i++) {
478 if (!(u_isspace(sampleSpaces[i])) ||
479 (u_isspace(sampleNonSpaces[i])))
480 {
481 log_err("Space char test error : %d or %d \n", (int32_t)sampleSpaces[i], (int32_t)sampleNonSpaces[i]);
482 }
483 if (!(u_isJavaSpaceChar(sampleSpaces[i])) ||
484 (u_isJavaSpaceChar(sampleNonSpaces[i])))
485 {
486 log_err("u_isJavaSpaceChar() test error : %d or %d \n", (int32_t)sampleSpaces[i], (int32_t)sampleNonSpaces[i]);
487 }
488 }
489
490 log_verbose("Testing for isspace and nonspaces\n");
491 for (i = 0; i < 5; i++) {
492 if (!(u_isWhitespace(sampleWhiteSpaces[i])) ||
493 (u_isWhitespace(sampleNonWhiteSpaces[i])))
494 {
495 log_err("White Space char test error : %lx or %lx \n", sampleWhiteSpaces[i], sampleNonWhiteSpaces[i]);
496 }
497 }
498
499 log_verbose("Testing for isdefined\n");
500 for (i = 0; i < 3; i++) {
501 if ((u_isdefined(sampleUndefined[i])) ||
502 !(u_isdefined(sampleDefined[i])))
503 {
504 log_err("Undefined char test error : U+%04x or U+%04x\n", (int32_t)sampleUndefined[i], (int32_t)sampleDefined[i]);
505 }
506 }
507
508 log_verbose("Testing for isbase\n");
509 for (i = 0; i < 3; i++) {
510 if ((u_isbase(sampleNonBase[i])) ||
511 !(u_isbase(sampleBase[i])))
512 {
513 log_err("Non-baseform char test error : U+%04x or U+%04x",(int32_t)sampleNonBase[i], (int32_t)sampleBase[i]);
514 }
515 }
516
517 log_verbose("Testing for isdigit \n");
518 for (i = 0; i < 4; i++) {
519 if ((u_isdigit(sampleDigits[i]) &&
520 (u_charDigitValue(sampleDigits[i])!= sampleDigitValues[i])) ||
521 (u_isdigit(sampleNonDigits[i]))) {
522 log_err("Digit char test error : %lx or %lx\n", sampleDigits[i], sampleNonDigits[i]);
523 }
524 }
525
526 /* Tests the ICU version #*/
527 u_getVersion(realVersion);
528 u_versionToString(realVersion, icuVersion);
374ca955 529 if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion), (int32_t)strlen(U_ICU_VERSION))) != 0)
b75a7d8f
A
530 {
531 log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERSION, icuVersion);
532 }
533#if defined(ICU_VERSION)
534 /* test only happens where we have configure.in with VERSION - sanity check. */
535 if(strcmp(U_ICU_VERSION, ICU_VERSION))
536 {
537 log_err("ICU version mismatch: Header says %s, build environment says %s.\n", U_ICU_VERSION, ICU_VERSION);
538 }
539#endif
540
541 /* test U_GC_... */
542 if(
543 U_GET_GC_MASK(0x41)!=U_GC_LU_MASK ||
544 U_GET_GC_MASK(0x662)!=U_GC_ND_MASK ||
545 U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK ||
546 U_GET_GC_MASK(0x28)!=U_GC_PS_MASK ||
547 U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK ||
548 U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK
549 ) {
550 log_err("error: U_GET_GC_MASK does not work properly\n");
551 }
552
553 mask=0;
554 mask=(mask&~U_GC_CN_MASK)|U_GC_CN_MASK;
555
556 mask=(mask&~U_GC_LU_MASK)|U_GC_LU_MASK;
557 mask=(mask&~U_GC_LL_MASK)|U_GC_LL_MASK;
558 mask=(mask&~U_GC_LT_MASK)|U_GC_LT_MASK;
559 mask=(mask&~U_GC_LM_MASK)|U_GC_LM_MASK;
560 mask=(mask&~U_GC_LO_MASK)|U_GC_LO_MASK;
561
562 mask=(mask&~U_GC_MN_MASK)|U_GC_MN_MASK;
563 mask=(mask&~U_GC_ME_MASK)|U_GC_ME_MASK;
564 mask=(mask&~U_GC_MC_MASK)|U_GC_MC_MASK;
565
566 mask=(mask&~U_GC_ND_MASK)|U_GC_ND_MASK;
567 mask=(mask&~U_GC_NL_MASK)|U_GC_NL_MASK;
568 mask=(mask&~U_GC_NO_MASK)|U_GC_NO_MASK;
569
570 mask=(mask&~U_GC_ZS_MASK)|U_GC_ZS_MASK;
571 mask=(mask&~U_GC_ZL_MASK)|U_GC_ZL_MASK;
572 mask=(mask&~U_GC_ZP_MASK)|U_GC_ZP_MASK;
573
574 mask=(mask&~U_GC_CC_MASK)|U_GC_CC_MASK;
575 mask=(mask&~U_GC_CF_MASK)|U_GC_CF_MASK;
576 mask=(mask&~U_GC_CO_MASK)|U_GC_CO_MASK;
577 mask=(mask&~U_GC_CS_MASK)|U_GC_CS_MASK;
578
579 mask=(mask&~U_GC_PD_MASK)|U_GC_PD_MASK;
580 mask=(mask&~U_GC_PS_MASK)|U_GC_PS_MASK;
581 mask=(mask&~U_GC_PE_MASK)|U_GC_PE_MASK;
582 mask=(mask&~U_GC_PC_MASK)|U_GC_PC_MASK;
583 mask=(mask&~U_GC_PO_MASK)|U_GC_PO_MASK;
584
585 mask=(mask&~U_GC_SM_MASK)|U_GC_SM_MASK;
586 mask=(mask&~U_GC_SC_MASK)|U_GC_SC_MASK;
587 mask=(mask&~U_GC_SK_MASK)|U_GC_SK_MASK;
588 mask=(mask&~U_GC_SO_MASK)|U_GC_SO_MASK;
589
590 mask=(mask&~U_GC_PI_MASK)|U_GC_PI_MASK;
591 mask=(mask&~U_GC_PF_MASK)|U_GC_PF_MASK;
592
593 if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
594 log_err("error: problems with U_GC_XX_MASK constants\n");
595 }
596
597 mask=0;
598 mask=(mask&~U_GC_C_MASK)|U_GC_C_MASK;
599 mask=(mask&~U_GC_L_MASK)|U_GC_L_MASK;
600 mask=(mask&~U_GC_M_MASK)|U_GC_M_MASK;
601 mask=(mask&~U_GC_N_MASK)|U_GC_N_MASK;
602 mask=(mask&~U_GC_Z_MASK)|U_GC_Z_MASK;
603 mask=(mask&~U_GC_P_MASK)|U_GC_P_MASK;
604 mask=(mask&~U_GC_S_MASK)|U_GC_S_MASK;
605
606 if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
607 log_err("error: problems with U_GC_Y_MASK constants\n");
608 }
609 {
610 static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039 };
611 for(i=0; i<10; i++){
612 if(digit[i]!=u_forDigit(i,10)){
613 log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n",i,digit[i],u_forDigit(i,10));
614 }
615 }
616 }
617
618 /* test u_digit() */
619 {
620 static const struct {
621 UChar32 c;
622 int8_t radix, value;
623 } data[]={
624 /* base 16 */
625 { 0x0031, 16, 1 },
626 { 0x0038, 16, 8 },
627 { 0x0043, 16, 12 },
628 { 0x0066, 16, 15 },
629 { 0x00e4, 16, -1 },
630 { 0x0662, 16, 2 },
631 { 0x06f5, 16, 5 },
632 { 0xff13, 16, 3 },
633 { 0xff41, 16, 10 },
634
635 /* base 8 */
636 { 0x0031, 8, 1 },
637 { 0x0038, 8, -1 },
638 { 0x0043, 8, -1 },
639 { 0x0066, 8, -1 },
640 { 0x00e4, 8, -1 },
641 { 0x0662, 8, 2 },
642 { 0x06f5, 8, 5 },
643 { 0xff13, 8, 3 },
644 { 0xff41, 8, -1 },
645
646 /* base 36 */
647 { 0x5a, 36, 35 },
648 { 0x7a, 36, 35 },
649 { 0xff3a, 36, 35 },
650 { 0xff5a, 36, 35 },
651
652 /* wrong radix values */
653 { 0x0031, 1, -1 },
654 { 0xff3a, 37, -1 }
655 };
656
657 for(i=0; i<LENGTHOF(data); ++i) {
658 if(u_digit(data[i].c, data[i].radix)!=data[i].value) {
659 log_err("u_digit(U+%04x, %d)=%d expected %d\n",
660 data[i].c,
661 data[i].radix,
662 u_digit(data[i].c, data[i].radix),
663 data[i].value);
664 }
665 }
666 }
667}
668
669/* test C/POSIX-style functions --------------------------------------------- */
670
671/* bit flags */
672#define ISAL 1
673#define ISLO 2
674#define ISUP 4
675
676#define ISDI 8
677#define ISXD 0x10
678
679#define ISAN 0x20
680
681#define ISPU 0x40
682#define ISGR 0x80
683#define ISPR 0x100
684
685#define ISSP 0x200
686#define ISBL 0x400
687#define ISCN 0x800
688
689/* C/POSIX-style functions, in the same order as the bit flags */
374ca955 690typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c);
b75a7d8f
A
691
692static const struct {
693 IsPOSIXClass *fn;
694 const char *name;
695} posixClasses[]={
696 { u_isalpha, "isalpha" },
697 { u_islower, "islower" },
698 { u_isupper, "isupper" },
699 { u_isdigit, "isdigit" },
700 { u_isxdigit, "isxdigit" },
701 { u_isalnum, "isalnum" },
702 { u_ispunct, "ispunct" },
703 { u_isgraph, "isgraph" },
704 { u_isprint, "isprint" },
705 { u_isspace, "isspace" },
706 { u_isblank, "isblank" },
707 { u_iscntrl, "iscntrl" }
708};
709
710static const struct {
711 UChar32 c;
712 uint32_t posixResults;
713} posixData[]={
714 { 0x0008, ISCN }, /* backspace */
715 { 0x0009, ISSP|ISBL|ISCN }, /* TAB */
716 { 0x000a, ISSP| ISCN }, /* LF */
717 { 0x000c, ISSP| ISCN }, /* FF */
718 { 0x000d, ISSP| ISCN }, /* CR */
719 { 0x0020, ISPR|ISSP|ISBL }, /* space */
720 { 0x0021, ISPU|ISGR|ISPR }, /* ! */
721 { 0x0033, ISDI|ISXD|ISAN| ISGR|ISPR }, /* 3 */
722 { 0x0040, ISPU|ISGR|ISPR }, /* @ */
723 { 0x0041, ISAL| ISUP| ISXD|ISAN| ISGR|ISPR }, /* A */
724 { 0x007a, ISAL|ISLO| ISAN| ISGR|ISPR }, /* z */
725 { 0x007b, ISPU|ISGR|ISPR }, /* { */
726 { 0x0085, ISSP| ISCN }, /* NEL */
727 { 0x00a0, ISPR|ISSP|ISBL }, /* NBSP */
728 { 0x00a4, ISGR|ISPR }, /* currency sign */
729 { 0x00e4, ISAL|ISLO| ISAN| ISGR|ISPR }, /* a-umlaut */
730 { 0x0300, ISGR|ISPR }, /* combining grave */
731 { 0x0600, ISCN }, /* arabic number sign */
732 { 0x0627, ISAL| ISAN| ISGR|ISPR }, /* alef */
733 { 0x0663, ISDI|ISXD|ISAN| ISGR|ISPR }, /* arabic 3 */
734 { 0x2002, ISPR|ISSP|ISBL }, /* en space */
735 { 0x2007, ISPR|ISSP|ISBL }, /* figure space */
736 { 0x2009, ISPR|ISSP|ISBL }, /* thin space */
374ca955
A
737 { 0x200b, ISCN }, /* ZWSP */
738 /*{ 0x200b, ISPR|ISSP },*/ /* ZWSP */ /* ZWSP became a control char in 4.0.1*/
b75a7d8f
A
739 { 0x200e, ISCN }, /* LRM */
740 { 0x2028, ISPR|ISSP| ISCN }, /* LS */
741 { 0x2029, ISPR|ISSP| ISCN }, /* PS */
742 { 0x20ac, ISGR|ISPR }, /* Euro */
743 { 0xff15, ISDI|ISXD|ISAN| ISGR|ISPR }, /* fullwidth 5 */
744 { 0xff25, ISAL| ISUP| ISXD|ISAN| ISGR|ISPR }, /* fullwidth E */
745 { 0xff35, ISAL| ISUP| ISAN| ISGR|ISPR }, /* fullwidth U */
746 { 0xff45, ISAL|ISLO| ISXD|ISAN| ISGR|ISPR }, /* fullwidth e */
747 { 0xff55, ISAL|ISLO| ISAN| ISGR|ISPR } /* fullwidth u */
748};
749
750static void
751TestPOSIX() {
752 uint32_t mask;
753 int32_t cl, i;
754 UBool expect;
755
756 mask=1;
757 for(cl=0; cl<12; ++cl) {
758 for(i=0; i<LENGTHOF(posixData); ++i) {
759 expect=(UBool)((posixData[i].posixResults&mask)!=0);
760 if(posixClasses[cl].fn(posixData[i].c)!=expect) {
761 log_err("u_%s(U+%04x)=%s is wrong\n",
762 posixClasses[cl].name, posixData[i].c, expect ? "FALSE" : "TRUE");
763 }
764 }
765 mask<<=1;
766 }
767}
768
769/* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */
770static void TestControlPrint()
771{
772 const UChar sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0x202b};
773 const UChar sampleNonControl[] = {0x61, 0x0031, 0x00e2};
774 const UChar samplePrintable[] = {0x0042, 0x005f, 0x2014};
775 const UChar sampleNonPrintable[] = {0x200c, 0x009f, 0x001b};
776 UChar32 c;
777 int i;
778
779 log_verbose("Testing for iscontrol\n");
780 for (i = 0; i < LENGTHOF(sampleControl); i++) {
781 if (!u_iscntrl(sampleControl[i]))
782 {
783 log_err("Control char test error : U+%04x should be control but is not\n", (int32_t)sampleControl[i]);
784 }
785 }
786
787 log_verbose("Testing for !iscontrol\n");
788 for (i = 0; i < LENGTHOF(sampleNonControl); i++) {
789 if (u_iscntrl(sampleNonControl[i]))
790 {
791 log_err("Control char test error : U+%04x should not be control but is\n", (int32_t)sampleNonControl[i]);
792 }
793 }
794
795 log_verbose("testing for isprintable\n");
796 for (i = 0; i < 3; i++) {
797 if (!u_isprint(samplePrintable[i]))
798 {
799 log_err("Printable char test error : U+%04x should be printable but is not\n", (int32_t)samplePrintable[i]);
800 }
801 if (u_isprint(sampleNonPrintable[i]))
802 {
803 log_err("Printable char test error : U+%04x should not be printable but is\n", (int32_t)sampleNonPrintable[i]);
804 }
805 }
806
807 /* test all ISO 8 controls */
808 for(c=0; c<=0x9f; ++c) {
809 if(c==0x20) {
810 /* skip ASCII graphic characters and continue with DEL */
811 c=0x7f;
812 }
813 if(!u_iscntrl(c)) {
814 log_err("error: u_iscntrl(ISO 8 control U+%04x)=FALSE\n", c);
815 }
816 if(!u_isISOControl(c)) {
817 log_err("error: u_isISOControl(ISO 8 control U+%04x)=FALSE\n", c);
818 }
819 if(u_isprint(c)) {
820 log_err("error: u_isprint(ISO 8 control U+%04x)=TRUE\n", c);
821 }
822 }
823
824 /* test all Latin-1 graphic characters */
825 for(c=0x20; c<=0xff; ++c) {
826 if(c==0x7f) {
827 c=0xa0;
828 } else if(c==0xad) {
829 /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not printable) */
830 ++c;
831 }
832 if(!u_isprint(c)) {
833 log_err("error: u_isprint(Latin-1 graphic character U+%04x)=FALSE\n", c);
834 }
835 }
836}
837
838/* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable()*/
839static void TestIdentifier()
840{
841 const UChar sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f};
842 const UChar sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};
843 const UChar sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};
844 const UChar sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};
845 const UChar sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};
846 const UChar sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};
847 const UChar sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};
848 const UChar sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};
849 const UChar sampleIDIgnore[] = {0x0006, 0x0010, 0x206b};
850 const UChar sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};
851
852 int i;
853
854 log_verbose("Testing sampleJavaID start \n");
855 for (i = 0; i < 3; i++) {
856 if (!(u_isJavaIDStart(sampleJavaIDStart[i])) ||
857 (u_isJavaIDStart(sampleNonJavaIDStart[i])))
858 log_err("Java ID Start char test error : %lx or %lx\n",
859 sampleJavaIDStart[i], sampleNonJavaIDStart[i]);
860 }
861
862 log_verbose("Testing sampleJavaID part \n");
863 for (i = 0; i < 3; i++) {
864 if (!(u_isJavaIDPart(sampleJavaIDPart[i])) ||
865 (u_isJavaIDPart(sampleNonJavaIDPart[i])))
866 log_err("Java ID Part char test error : %lx or %lx\n",
867 sampleJavaIDPart[i], sampleNonJavaIDPart[i]);
868 }
869
870 log_verbose("Testing sampleUnicodeID start \n");
871 for (i = 0; i < 3; i++) {
872 /* T_test_logln_ustr((int32_t)i); */
873 if (!(u_isIDStart(sampleUnicodeIDStart[i])) ||
874 (u_isIDStart(sampleNonUnicodeIDStart[i])))
875 {
876 log_err("Unicode ID Start char test error : %lx or %lx\n", sampleUnicodeIDStart[i],
877 sampleNonUnicodeIDStart[i]);
878 }
879 }
880
881 log_verbose("Testing sample unicode ID part \n");
882 for (i = 2; i < 3; i++) { /* nos *** starts with 2 instead of 0, until clarified */
883 /* T_test_logln_ustr((int32_t)i); */
884 if (!(u_isIDPart(sampleUnicodeIDPart[i])) ||
885 (u_isIDPart(sampleNonUnicodeIDPart[i])))
886 {
887 log_err("Unicode ID Part char test error : %lx or %lx", sampleUnicodeIDPart[i], sampleNonUnicodeIDPart[i]);
888 }
889 }
890
891 log_verbose("Testing sampleId ignore\n");
892 for (i = 0; i < 3; i++) {
893 /*T_test_logln_ustr((int32_t)i); */
894 if (!(u_isIDIgnorable(sampleIDIgnore[i])) ||
895 (u_isIDIgnorable(sampleNonIDIgnore[i])))
896 {
897 log_err("ID ignorable char test error : U+%04x or U+%04x\n", sampleIDIgnore[i], sampleNonIDIgnore[i]);
898 }
899 }
900}
901
902/* for each line of UnicodeData.txt, check some of the properties */
903/*
904 * ### TODO
905 * This test fails incorrectly if the First or Last code point of a repetitive area
906 * is overridden, which is allowed and is encouraged for the PUAs.
907 * Currently, this means that both area First/Last and override lines are
908 * tested against the properties from the API,
909 * and the area boundary will not match and cause an error.
910 *
911 * This function should detect area boundaries and skip them for the test of individual
912 * code points' properties.
913 * Then it should check that the areas contain all the same properties except where overridden.
914 * For this, it would have had to set a flag for which code points were listed explicitly.
915 */
916static void U_CALLCONV
917unicodeDataLineFn(void *context,
918 char *fields[][2], int32_t fieldCount,
919 UErrorCode *pErrorCode)
920{
921 char buffer[100];
922 char *end;
923 uint32_t value;
924 UChar32 c;
925 int32_t i;
926 int8_t type;
927
928 /* get the character code, field 0 */
929 c=strtoul(fields[0][0], &end, 16);
930 if(end<=fields[0][0] || end!=fields[0][1]) {
931 log_err("error: syntax error in field 0 at %s\n", fields[0][0]);
932 return;
933 }
934 if((uint32_t)c>=UCHAR_MAX_VALUE + 1) {
935 log_err("error in UnicodeData.txt: code point %lu out of range\n", c);
936 return;
937 }
938
939 /* get general category, field 2 */
940 *fields[2][1]=0;
941 type = (int8_t)tagValues[MakeProp(fields[2][0])];
942 if(u_charType(c)!=type) {
943 log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType(c), type);
944 }
945 if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
946 log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
947 }
948
949 /* get canonical combining class, field 3 */
950 value=strtoul(fields[3][0], &end, 10);
951 if(end<=fields[3][0] || end!=fields[3][1]) {
952 log_err("error: syntax error in field 3 at code 0x%lx\n", c);
953 return;
954 }
955 if(value>255) {
956 log_err("error in UnicodeData.txt: combining class %lu out of range\n", value);
957 return;
958 }
959#if !UCONFIG_NO_NORMALIZATION
960 if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c, UCHAR_CANONICAL_COMBINING_CLASS)) {
961 log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value);
962 }
963#endif
964
965 /* get BiDi category, field 4 */
966 *fields[4][1]=0;
967 i=MakeDir(fields[4][0]);
968 if(i!=u_charDirection(c) || i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) {
969 log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u_charDirection(c), MakeDir(fields[4][0]), fields[4][0]);
970 }
971
972 /* get ISO Comment, field 11 */
973 *fields[11][1]=0;
974 i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode);
975 if(U_FAILURE(*pErrorCode) || 0!=strcmp(fields[11][0], buffer)) {
976 log_err("error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n",
977 c, u_errorName(*pErrorCode),
978 U_FAILURE(*pErrorCode) ? buffer : "[error]",
979 fields[11][0]);
980 }
981
982 /* get uppercase mapping, field 12 */
983 if(fields[12][0]!=fields[12][1]) {
984 value=strtoul(fields[12][0], &end, 16);
985 if(end!=fields[12][1]) {
986 log_err("error: syntax error in field 12 at code 0x%lx\n", c);
987 return;
988 }
989 if((UChar32)value!=u_toupper(c)) {
990 log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_toupper(c), value);
991 }
992 } else {
993 /* no case mapping: the API must map the code point to itself */
994 if(c!=u_toupper(c)) {
995 log_err("error: U+%04lx does not have an uppercase mapping but u_toupper()==U+%04lx\n", c, u_toupper(c));
996 }
997 }
998
999 /* get lowercase mapping, field 13 */
1000 if(fields[13][0]!=fields[13][1]) {
1001 value=strtoul(fields[13][0], &end, 16);
1002 if(end!=fields[13][1]) {
1003 log_err("error: syntax error in field 13 at code 0x%lx\n", c);
1004 return;
1005 }
1006 if((UChar32)value!=u_tolower(c)) {
1007 log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_tolower(c), value);
1008 }
1009 } else {
1010 /* no case mapping: the API must map the code point to itself */
1011 if(c!=u_tolower(c)) {
1012 log_err("error: U+%04lx does not have a lowercase mapping but u_tolower()==U+%04lx\n", c, u_tolower(c));
1013 }
1014 }
1015
1016 /* get titlecase mapping, field 14 */
1017 if(fields[14][0]!=fields[14][1]) {
1018 value=strtoul(fields[14][0], &end, 16);
1019 if(end!=fields[14][1]) {
1020 log_err("error: syntax error in field 14 at code 0x%lx\n", c);
1021 return;
1022 }
1023 if((UChar32)value!=u_totitle(c)) {
1024 log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_totitle(c), value);
1025 }
1026 } else {
1027 /* no case mapping: the API must map the code point to itself */
1028 if(c!=u_totitle(c)) {
1029 log_err("error: U+%04lx does not have a titlecase mapping but u_totitle()==U+%04lx\n", c, u_totitle(c));
1030 }
1031 }
1032}
1033
1034static UBool U_CALLCONV
1035enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1036 static const UChar32 test[][2]={
1037 {0x41, U_UPPERCASE_LETTER},
1038 {0x308, U_NON_SPACING_MARK},
1039 {0xfffe, U_GENERAL_OTHER_TYPES},
1040 {0xe0041, U_FORMAT_CHAR},
1041 {0xeffff, U_UNASSIGNED}
1042 };
1043
374ca955 1044 int32_t i, count;
b75a7d8f
A
1045
1046 if(0!=strcmp((const char *)context, "a1")) {
1047 log_err("error: u_enumCharTypes() passes on an incorrect context pointer\n");
1048 return FALSE;
1049 }
1050
374ca955 1051 count=LENGTHOF(test);
b75a7d8f
A
1052 for(i=0; i<count; ++i) {
1053 if(start<=test[i][0] && test[i][0]<limit) {
1054 if(type!=(UCharCategory)test[i][1]) {
1055 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld instead of U+%04lx with %ld\n",
1056 start, limit, (long)type, test[i][0], test[i][1]);
1057 }
374ca955 1058 /* stop at the range that includes the last test code point (increases code coverage for enumeration) */
b75a7d8f
A
1059 return i==(count-1) ? FALSE : TRUE;
1060 }
1061 }
1062
1063 if(start>test[count-1][0]) {
1064 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n",
1065 start, limit, (long)type);
1066 return FALSE;
1067 }
1068
374ca955
A
1069 return TRUE;
1070}
1071
1072static UBool U_CALLCONV
1073enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1074 /* default Bidi classes for unassigned code points */
1075 static const int32_t defaultBidi[][2]={ /* { limit, class } */
1076 { 0x0590, U_LEFT_TO_RIGHT },
1077 { 0x0600, U_RIGHT_TO_LEFT },
1078 { 0x07C0, U_RIGHT_TO_LEFT_ARABIC },
1079 { 0x0900, U_RIGHT_TO_LEFT },
1080 { 0xFB1D, U_LEFT_TO_RIGHT },
1081 { 0xFB50, U_RIGHT_TO_LEFT },
1082 { 0xFE00, U_RIGHT_TO_LEFT_ARABIC },
1083 { 0xFE70, U_LEFT_TO_RIGHT },
1084 { 0xFF00, U_RIGHT_TO_LEFT_ARABIC },
1085 { 0x10800, U_LEFT_TO_RIGHT },
1086 { 0x11000, U_RIGHT_TO_LEFT },
1087 { 0x110000, U_LEFT_TO_RIGHT }
1088 };
1089
1090 UChar32 c;
1091 int32_t i;
1092 UCharDirection shouldBeDir;
1093
b75a7d8f
A
1094 /*
1095 * LineBreak.txt specifies:
1096 * # - Assigned characters that are not listed explicitly are given the value
1097 * # "AL".
1098 * # - Unassigned characters are given the value "XX".
1099 *
1100 * PUA characters are listed explicitly with "XX".
1101 * Verify that no assigned character has "XX".
1102 */
1103 if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) {
1104 c=start;
1105 while(c<limit) {
1106 if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) {
1107 log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c);
1108 }
1109 ++c;
1110 }
1111 }
1112
1113 /*
1114 * Verify default Bidi classes.
374ca955
A
1115 * For recent Unicode versions, see UCD.html.
1116 *
1117 * For older Unicode versions:
b75a7d8f
A
1118 * See table 3-7 "Bidirectional Character Types" in UAX #9.
1119 * http://www.unicode.org/reports/tr9/
1120 *
1121 * See also DerivedBidiClass.txt for Cn code points!
374ca955
A
1122 *
1123 * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/resolved-pri.html)
1124 * changed some default values.
1125 * In particular, non-characters and unassigned Default Ignorable Code Points
1126 * change from L to BN.
1127 *
1128 * UCD.html version 4.0.1 does not yet reflect these changes.
b75a7d8f
A
1129 */
1130 if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) {
1131 /* enumerate the intersections of defaultBidi ranges with [start..limit[ */
1132 c=start;
1133 for(i=0; i<LENGTHOF(defaultBidi) && c<limit; ++i) {
1134 if((int32_t)c<defaultBidi[i][0]) {
1135 while(c<limit && (int32_t)c<defaultBidi[i][0]) {
374ca955
A
1136 if(U_IS_UNICODE_NONCHAR(c) || u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
1137 shouldBeDir=U_BOUNDARY_NEUTRAL;
1138 } else {
1139 shouldBeDir=(UCharDirection)defaultBidi[i][1];
1140 }
1141
1142 if( u_charDirection(c)!=shouldBeDir ||
1143 u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir
b75a7d8f
A
1144 ) {
1145 log_err("error: u_charDirection(unassigned/PUA U+%04lx)=%s should be %s\n",
374ca955 1146 c, dirStrings[u_charDirection(c)], dirStrings[shouldBeDir]);
b75a7d8f
A
1147 }
1148 ++c;
1149 }
1150 }
1151 }
1152 }
1153
1154 return TRUE;
1155}
1156
1157/* tests for several properties */
1158static void TestUnicodeData()
1159{
1160 char newPath[256];
1161 char backupPath[256];
1162 UVersionInfo expectVersionArray;
1163 UVersionInfo versionArray;
1164 char *fields[15][2];
1165 UErrorCode errorCode;
1166 UChar32 c;
1167 int8_t type;
1168
1169 /* Look inside ICU_DATA first */
1170 strcpy(newPath, u_getDataDirectory());
1171 strcat(newPath, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
1172
1173 /* As a fallback, try to guess where the source data was located
1174 * at the time ICU was built, and look there.
1175 */
1176 strcpy(backupPath, ctest_dataSrcDir());
1177 strcat(backupPath, U_FILE_SEP_STRING);
1178 strcat(backupPath, "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
1179
1180 u_versionFromString(expectVersionArray, U_UNICODE_VERSION);
1181 u_getUnicodeVersion(versionArray);
1182 if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0)
1183 {
1184 log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION " got %d.%d.%d.%d\n",
1185 versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
1186 }
1187
1188#if defined(ICU_UNICODE_VERSION)
1189 /* test only happens where we have configure.in with UNICODE_VERSION - sanity check. */
1190 if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION))
1191 {
1192 log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNICODE_VERSION " got " ICU_UNICODE_VERSION "\n");
1193 }
1194#endif
1195
1196 if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN || u_getIntPropertyValue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) {
1197 log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041));
1198 }
1199
1200 errorCode=U_ZERO_ERROR;
1201 u_parseDelimitedFile(newPath, ';', fields, 15, unicodeDataLineFn, NULL, &errorCode);
1202 if(errorCode==U_FILE_ACCESS_ERROR) {
1203 errorCode=U_ZERO_ERROR;
1204 u_parseDelimitedFile(backupPath, ';', fields, 15, unicodeDataLineFn, NULL, &errorCode);
1205 }
1206 if(U_FAILURE(errorCode)) {
1207 log_err("error parsing UnicodeData.txt: %s\n", u_errorName(errorCode));
1208 return; /* if we couldn't parse UnicodeData.txt, we should return */
1209 }
1210
1211 /* sanity check on repeated properties */
1212 for(c=0xfffe; c<=0x10ffff;) {
1213 type=u_charType(c);
1214 if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1215 log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1216 }
1217 if(type!=U_UNASSIGNED) {
1218 log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c, u_charType(c));
1219 }
1220 if((c&0xffff)==0xfffe) {
1221 ++c;
1222 } else {
1223 c+=0xffff;
1224 }
1225 }
1226
1227 /* test that PUA is not "unassigned" */
1228 for(c=0xe000; c<=0x10fffd;) {
1229 type=u_charType(c);
1230 if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1231 log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1232 }
1233 if(type==U_UNASSIGNED) {
1234 log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c);
1235 } else if(type!=U_PRIVATE_USE_CHAR) {
1236 log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type);
1237 }
1238 if(c==0xf8ff) {
1239 c=0xf0000;
1240 } else if(c==0xffffd) {
1241 c=0x100000;
1242 } else {
1243 ++c;
1244 }
1245 }
1246
1247 /* test u_enumCharTypes() */
1248 u_enumCharTypes(enumTypeRange, "a1");
374ca955
A
1249
1250 /* check default properties */
1251 u_enumCharTypes(enumDefaultsRange, NULL);
b75a7d8f
A
1252}
1253
1254static void TestCodeUnit(){
1255 const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0};
1256
1257 int32_t i;
1258
1259 for(i=0; i<(int32_t)(sizeof(codeunit)/sizeof(codeunit[0])); i++){
1260 UChar c=codeunit[i];
1261 if(i<4){
1262 if(!(UTF_IS_SINGLE(c)) || (UTF_IS_LEAD(c)) || (UTF_IS_TRAIL(c)) ||(UTF_IS_SURROGATE(c))){
1263 log_err("ERROR: U+%04x is a single", c);
1264 }
1265
1266 }
1267 if(i >= 4 && i< 8){
1268 if(!(UTF_IS_LEAD(c)) || UTF_IS_SINGLE(c) || UTF_IS_TRAIL(c) || !(UTF_IS_SURROGATE(c))){
1269 log_err("ERROR: U+%04x is a first surrogate", c);
1270 }
1271 }
1272 if(i >= 8 && i< 12){
1273 if(!(UTF_IS_TRAIL(c)) || UTF_IS_SINGLE(c) || UTF_IS_LEAD(c) || !(UTF_IS_SURROGATE(c))){
1274 log_err("ERROR: U+%04x is a second surrogate", c);
1275 }
1276 }
1277 }
1278
1279}
1280
1281static void TestCodePoint(){
1282 const UChar32 codePoint[]={
1283 /*surrogate, notvalid(codepoint), not a UnicodeChar, not Error */
1284 0xd800,
1285 0xdbff,
1286 0xdc00,
1287 0xdfff,
1288 0xdc04,
1289 0xd821,
1290 /*not a surrogate, valid, isUnicodeChar , not Error*/
1291 0x20ac,
1292 0xd7ff,
1293 0xe000,
1294 0xe123,
1295 0x0061,
1296 0xe065,
1297 0x20402,
1298 0x24506,
1299 0x23456,
1300 0x20402,
1301 0x10402,
1302 0x23456,
1303 /*not a surrogate, not valid, isUnicodeChar, isError */
1304 0x0015,
1305 0x009f,
1306 /*not a surrogate, not valid, not isUnicodeChar, isError */
1307 0xffff,
1308 0xfffe,
1309 };
1310 int32_t i;
1311 for(i=0; i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0])); i++){
1312 UChar32 c=codePoint[i];
1313 if(i<6){
1314 if(!UTF_IS_SURROGATE(c) || !U_IS_SURROGATE(c) || !U16_IS_SURROGATE(c)){
1315 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1316 }
1317 if(UTF_IS_VALID(c)){
1318 log_err("ERROR: isValid() failed for U+%04x\n", c);
1319 }
1320 if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
1321 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1322 }
1323 if(UTF_IS_ERROR(c)){
1324 log_err("ERROR: isError() failed for U+%04x\n", c);
1325 }
1326 }else if(i >=6 && i<18){
1327 if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1328 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1329 }
1330 if(!UTF_IS_VALID(c)){
1331 log_err("ERROR: isValid() failed for U+%04x\n", c);
1332 }
1333 if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
1334 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1335 }
1336 if(UTF_IS_ERROR(c)){
1337 log_err("ERROR: isError() failed for U+%04x\n", c);
1338 }
1339 }else if(i >=18 && i<20){
1340 if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1341 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1342 }
1343 if(UTF_IS_VALID(c)){
1344 log_err("ERROR: isValid() failed for U+%04x\n", c);
1345 }
1346 if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
1347 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1348 }
1349 if(!UTF_IS_ERROR(c)){
1350 log_err("ERROR: isError() failed for U+%04x\n", c);
1351 }
1352 }
1353 else if(i >=18 && i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0]))){
1354 if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1355 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1356 }
1357 if(UTF_IS_VALID(c)){
1358 log_err("ERROR: isValid() failed for U+%04x\n", c);
1359 }
1360 if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
1361 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1362 }
1363 if(!UTF_IS_ERROR(c)){
1364 log_err("ERROR: isError() failed for U+%04x\n", c);
1365 }
1366 }
1367 }
1368
374ca955
A
1369 if(
1370 !U_IS_BMP(0) || !U_IS_BMP(0x61) || !U_IS_BMP(0x20ac) ||
1371 !U_IS_BMP(0xd9da) || !U_IS_BMP(0xdfed) || !U_IS_BMP(0xffff) ||
1372 U_IS_BMP(U_SENTINEL) || U_IS_BMP(0x10000) || U_IS_BMP(0x50005) ||
1373 U_IS_BMP(0x10ffff) || U_IS_BMP(0x110000) || U_IS_BMP(0x7fffffff)
1374 ) {
1375 log_err("error with U_IS_BMP()\n");
1376 }
1377
1378 if(
1379 U_IS_SUPPLEMENTARY(0) || U_IS_SUPPLEMENTARY(0x61) || U_IS_SUPPLEMENTARY(0x20ac) ||
1380 U_IS_SUPPLEMENTARY(0xd9da) || U_IS_SUPPLEMENTARY(0xdfed) || U_IS_SUPPLEMENTARY(0xffff) ||
1381 U_IS_SUPPLEMENTARY(U_SENTINEL) || !U_IS_SUPPLEMENTARY(0x10000) || !U_IS_SUPPLEMENTARY(0x50005) ||
1382 !U_IS_SUPPLEMENTARY(0x10ffff) || U_IS_SUPPLEMENTARY(0x110000) || U_IS_SUPPLEMENTARY(0x7fffffff)
1383 ) {
1384 log_err("error with U_IS_SUPPLEMENTARY()\n");
1385 }
b75a7d8f
A
1386}
1387
1388static void TestCharLength()
1389{
1390 const int32_t codepoint[]={
1391 1, 0x0061,
1392 1, 0xe065,
1393 1, 0x20ac,
1394 2, 0x20402,
1395 2, 0x23456,
1396 2, 0x24506,
1397 2, 0x20402,
1398 2, 0x10402,
1399 1, 0xd7ff,
1400 1, 0xe000
1401 };
1402
1403 int32_t i;
1404 UBool multiple;
1405 for(i=0; i<(int32_t)(sizeof(codepoint)/sizeof(codepoint[0])); i=(int16_t)(i+2)){
1406 UChar32 c=codepoint[i+1];
1407 if(UTF_CHAR_LENGTH(c) != codepoint[i] || U16_LENGTH(c) != codepoint[i]){
1408 log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], UTF_CHAR_LENGTH(c));
1409 }
1410 multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
1411 if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){
1412 log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c);
1413 }
1414 }
1415}
1416
1417/*internal functions ----*/
1418static int32_t MakeProp(char* str)
1419{
1420 int32_t result = 0;
1421 char* matchPosition =0;
1422
1423 matchPosition = strstr(tagStrings, str);
1424 if (matchPosition == 0)
1425 {
1426 log_err("unrecognized type letter ");
1427 log_err(str);
1428 }
374ca955
A
1429 else
1430 result = (int32_t)((matchPosition - tagStrings) / 2);
b75a7d8f
A
1431 return result;
1432}
1433
1434static int32_t MakeDir(char* str)
1435{
1436 int32_t pos = 0;
1437 for (pos = 0; pos < 19; pos++) {
1438 if (strcmp(str, dirStrings[pos]) == 0) {
1439 return pos;
1440 }
1441 }
1442 return -1;
1443}
1444
1445/* test u_charName() -------------------------------------------------------- */
1446
1447static const struct {
1448 uint32_t code;
1449 const char *name, *oldName, *extName;
1450} names[]={
1451 {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"},
1452 {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "LATIN SMALL LETTER DOTLESS J BAR HOOK", "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" },
1453 {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" },
1454 {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" },
1455 {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" },
1456 {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" },
1457 {0xd800, "", "", "<lead surrogate-D800>" },
1458 {0xdc00, "", "", "<trail surrogate-DC00>" },
1459 {0xff08, "FULLWIDTH LEFT PARENTHESIS", "FULLWIDTH OPENING PARENTHESIS", "FULLWIDTH LEFT PARENTHESIS" },
1460 {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" },
1461 {0xffff, "", "", "<noncharacter-FFFF>" },
1462 {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" }
1463};
1464
1465static UBool
1466enumCharNamesFn(void *context,
1467 UChar32 code, UCharNameChoice nameChoice,
1468 const char *name, int32_t length) {
1469 int32_t *pCount=(int32_t *)context;
1470 int i;
1471
1472 if(length<=0 || length!=(int32_t)strlen(name)) {
1473 /* should not be called with an empty string or invalid length */
1474 log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length);
1475 return TRUE;
1476 }
1477
1478 ++*pCount;
1479 for(i=0; i<sizeof(names)/sizeof(names[0]); ++i) {
1480 if(code==(UChar32)names[i].code) {
1481 switch (nameChoice) {
1482 case U_EXTENDED_CHAR_NAME:
1483 if(0!=strcmp(name, names[i].extName)) {
1484 log_err("u_enumCharName(0x%lx - Extended)=%s instead of %s\n", code, name, names[i].extName);
1485 }
1486 break;
1487 case U_UNICODE_CHAR_NAME:
1488 if(0!=strcmp(name, names[i].name)) {
1489 log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code, name, names[i].name);
1490 }
1491 break;
1492 case U_UNICODE_10_CHAR_NAME:
1493 if(names[i].oldName[0]==0 || 0!=strcmp(name, names[i].oldName)) {
1494 log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n", code, name, names[i].oldName);
1495 }
1496 break;
1497 case U_CHAR_NAME_CHOICE_COUNT:
1498 break;
1499 }
1500 break;
1501 }
1502 }
1503 return TRUE;
1504}
1505
1506struct enumExtCharNamesContext {
1507 uint32_t length;
1508 int32_t last;
1509};
1510
1511static UBool
1512enumExtCharNamesFn(void *context,
1513 UChar32 code, UCharNameChoice nameChoice,
1514 const char *name, int32_t length) {
1515 struct enumExtCharNamesContext *ecncp = (struct enumExtCharNamesContext *) context;
1516
1517 if (ecncp->last != (int32_t) code - 1) {
1518 if (ecncp->last < 0) {
1519 log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1);
1520 } else {
1521 log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 - Ext)\n", code);
1522 }
1523 }
1524 ecncp->last = (int32_t) code;
1525
1526 if (!*name) {
1527 log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", code);
1528 }
1529
1530 return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length);
1531}
1532
1533/**
1534 * This can be made more efficient by moving it into putil.c and having
1535 * it directly access the ebcdic translation tables.
1536 * TODO: If we get this method in putil.c, then delete it from here.
1537 */
1538static UChar
1539u_charToUChar(char c) {
1540 UChar uc;
1541 u_charsToUChars(&c, &uc, 1);
1542 return uc;
1543}
1544
1545static void
1546TestCharNames() {
1547 static char name[80];
1548 UErrorCode errorCode=U_ZERO_ERROR;
1549 struct enumExtCharNamesContext extContext;
1550 int32_t length;
1551 UChar32 c;
1552 int32_t i;
1553
1554 log_verbose("Testing uprv_getMaxCharNameLength()\n");
1555 length=uprv_getMaxCharNameLength();
1556 if(length==0) {
1557 /* no names data available */
1558 return;
1559 }
1560 if(length<83) { /* Unicode 3.2 max char name length */
1561 log_err("uprv_getMaxCharNameLength()=%d is too short");
1562 }
1563 /* ### TODO same tests for max ISO comment length as for max name length */
1564
1565 log_verbose("Testing u_charName()\n");
1566 for(i=0; i<(int32_t)(sizeof(names)/sizeof(names[0])); ++i) {
1567 /* modern Unicode character name */
1568 length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name), &errorCode);
1569 if(U_FAILURE(errorCode)) {
1570 log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(errorCode));
1571 return;
1572 }
1573 if(length<0 || 0!=strcmp(name, names[i].name) || length!=(uint16_t)strlen(name)) {
1574 log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n", names[i].code, name, length, names[i].name);
1575 }
1576
1577 /* find the modern name */
1578 if (*names[i].name) {
1579 c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode);
1580 if(U_FAILURE(errorCode)) {
1581 log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorName(errorCode));
1582 return;
1583 }
1584 if(c!=(UChar32)names[i].code) {
1585 log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", names[i].name, c, names[i].code);
1586 }
1587 }
1588
1589 /* Unicode 1.0 character name */
1590 length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(name), &errorCode);
1591 if(U_FAILURE(errorCode)) {
1592 log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_errorName(errorCode));
1593 return;
1594 }
1595 if(length<0 || (length>0 && 0!=strcmp(name, names[i].oldName)) || length!=(uint16_t)strlen(name)) {
1596 log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothing or %s\n", names[i].code, name, length, names[i].oldName);
1597 }
1598
1599 /* find the Unicode 1.0 name if it is stored (length>0 means that we could read it) */
1600 if(names[i].oldName[0]!=0 /* && length>0 */) {
1601 c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCode);
1602 if(U_FAILURE(errorCode)) {
1603 log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName, u_errorName(errorCode));
1604 return;
1605 }
1606 if(c!=(UChar32)names[i].code) {
1607 log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n", names[i].oldName, c, names[i].code);
1608 }
1609 }
1610 }
1611
1612 /* test u_enumCharNames() */
1613 length=0;
1614 errorCode=U_ZERO_ERROR;
1615 u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &length, U_UNICODE_CHAR_NAME, &errorCode);
1616 if(U_FAILURE(errorCode) || length<94140) {
1617 log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length);
1618 }
1619
1620 extContext.length = 0;
1621 extContext.last = -1;
1622 errorCode=U_ZERO_ERROR;
1623 u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &extContext, U_EXTENDED_CHAR_NAME, &errorCode);
1624 if(U_FAILURE(errorCode) || extContext.length<UCHAR_MAX_VALUE + 1) {
1625 log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.length);
1626 }
1627
1628 /* test that u_charFromName() uppercases the input name, i.e., works with mixed-case names (new in 2.0) */
1629 if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorCode)) {
1630 log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") did not find U+0061 (%s)\n", u_errorName(errorCode));
1631 }
1632
1633 /* Test getCharNameCharacters */
1634 if(!QUICK) {
1635 enum { BUFSIZE = 256 };
1636 UErrorCode ec = U_ZERO_ERROR;
1637 char buf[BUFSIZE];
1638 int32_t maxLength;
1639 UChar32 cp;
1640 UChar pat[BUFSIZE], dumbPat[BUFSIZE];
1641 int32_t l1, l2;
1642 UBool map[256];
1643 UBool ok;
1644
1645 USet* set = uset_open(1, 0); /* empty set */
1646 USet* dumb = uset_open(1, 0); /* empty set */
1647
1648 /*
1649 * uprv_getCharNameCharacters() will likely return more lowercase
1650 * letters than actual character names contain because
1651 * it includes all the characters in lowercased names of
1652 * general categories, for the full possible set of extended names.
1653 */
374ca955
A
1654 {
1655 USetAdder sa={
1656 NULL,
1657 uset_add,
1658 uset_addRange,
1659 uset_addString
1660 };
1661 sa.set=set;
1662 uprv_getCharNameCharacters(&sa);
1663 }
b75a7d8f
A
1664
1665 /* build set the dumb (but sure-fire) way */
374ca955 1666 for (i=0; i<256; ++i) {
b75a7d8f 1667 map[i] = FALSE;
374ca955 1668 }
b75a7d8f
A
1669
1670 maxLength=0;
1671 for (cp=0; cp<0x110000; ++cp) {
1672 int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME,
1673 buf, BUFSIZE, &ec);
1674 if (U_FAILURE(ec)) {
1675 log_err("FAIL: u_charName failed when it shouldn't\n");
1676 uset_close(set);
1677 uset_close(dumb);
1678 return;
1679 }
1680 if(len>maxLength) {
1681 maxLength=len;
1682 }
1683
1684 for (i=0; i<len; ++i) {
1685 if (!map[(uint8_t) buf[i]]) {
1686 uset_add(dumb, (UChar32)u_charToUChar(buf[i]));
1687 map[(uint8_t) buf[i]] = TRUE;
1688 }
1689 }
374ca955
A
1690
1691 /* test for leading/trailing whitespace */
1692 if(buf[0]==' ' || buf[0]=='\t' || buf[len-1]==' ' || buf[len-1]=='\t') {
1693 log_err("u_charName(U+%04x) returns a name with leading or trailing whitespace\n", cp);
1694 }
1695 }
1696
1697 if(map[(uint8_t)'\t']) {
1698 log_err("u_charName() returned a name with a TAB for some code point\n", cp);
b75a7d8f
A
1699 }
1700
1701 length=uprv_getMaxCharNameLength();
1702 if(length!=maxLength) {
1703 log_err("uprv_getMaxCharNameLength()=%d differs from the maximum length %d of all extended names\n",
1704 length, maxLength);
1705 }
1706
1707 /* compare the sets. Where is my uset_equals?!! */
1708 ok=TRUE;
1709 for(i=0; i<256; ++i) {
1710 if(uset_contains(set, i)!=uset_contains(dumb, i)) {
1711 if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !uset_contains(dumb, i)) {
1712 /* ignore lowercase a-z that are in set but not in dumb */
1713 ok=TRUE;
1714 } else {
1715 ok=FALSE;
1716 break;
1717 }
1718 }
1719 }
1720
1721 l1 = uset_toPattern(set, pat, BUFSIZE, TRUE, &ec);
1722 l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, TRUE, &ec);
1723 if (U_FAILURE(ec)) {
1724 log_err("FAIL: uset_toPattern failed when it shouldn't\n");
1725 uset_close(set);
1726 uset_close(dumb);
1727 return;
1728 }
1729
1730 if (l1 >= BUFSIZE) {
1731 l1 = BUFSIZE-1;
1732 pat[l1] = 0;
1733 }
1734 if (l2 >= BUFSIZE) {
1735 l2 = BUFSIZE-1;
1736 dumbPat[l2] = 0;
1737 }
1738
1739 if (!ok) {
b75a7d8f 1740 log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n",
374ca955
A
1741 aescstrdup(pat, l1), aescstrdup(dumbPat, l2));
1742 } else if(VERBOSITY) {
1743 log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescstrdup(pat, l1));
b75a7d8f
A
1744 }
1745
1746 uset_close(set);
1747 uset_close(dumb);
1748 }
1749
1750 /* ### TODO: test error cases and other interesting things */
1751}
1752
1753/* test u_isMirrored() and u_charMirror() ----------------------------------- */
1754
1755static void
1756TestMirroring() {
1757 log_verbose("Testing u_isMirrored()\n");
1758 if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_isMirrored(0x232a) &&
1759 !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !u_isMirrored(0x3400)
1760 )
1761 ) {
1762 log_err("u_isMirrored() does not work correctly\n");
1763 }
1764
1765 log_verbose("Testing u_charMirror()\n");
1766 if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 &&
1767 u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab
1768 )
1769 ) {
1770 log_err("u_charMirror() does not work correctly\n");
1771 }
1772}
1773
1774
1775struct RunTestData
1776{
1777 const char *runText;
1778 UScriptCode runCode;
1779};
1780
1781typedef struct RunTestData RunTestData;
1782
1783static void
1784CheckScriptRuns(UScriptRun *scriptRun, int32_t *runStarts, const RunTestData *testData, int32_t nRuns,
1785 const char *prefix)
1786{
1787 int32_t run, runStart, runLimit;
1788 UScriptCode runCode;
1789
1790 /* iterate over all the runs */
1791 run = 0;
1792 while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) {
1793 if (runStart != runStarts[run]) {
1794 log_err("%s: incorrect start offset for run %d: expected %d, got %d\n",
1795 prefix, run, runStarts[run], runStart);
1796 }
1797
1798 if (runLimit != runStarts[run + 1]) {
1799 log_err("%s: incorrect limit offset for run %d: expected %d, got %d\n",
1800 prefix, run, runStarts[run + 1], runLimit);
1801 }
1802
1803 if (runCode != testData[run].runCode) {
1804 log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\"\n",
1805 prefix, run, uscript_getName(testData[run].runCode), uscript_getName(runCode));
1806 }
1807
1808 run += 1;
1809
1810 /* stop when we've seen all the runs we expect to see */
1811 if (run >= nRuns) {
1812 break;
1813 }
1814 }
1815
1816 /* Complain if we didn't see then number of runs we expected */
1817 if (run != nRuns) {
1818 log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, run, nRuns);
1819 }
1820}
1821
1822static void
1823TestUScriptRunAPI()
1824{
374ca955 1825 static const RunTestData testData1[] = {
b75a7d8f
A
1826 {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCRIPT_DEVANAGARI},
1827 {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARABIC},
1828 {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYRILLIC},
1829 {"English (", USCRIPT_LATIN},
1830 {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI},
1831 {") ", USCRIPT_LATIN},
1832 {"\\u6F22\\u5B75", USCRIPT_HAN},
1833 {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA},
1834 {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA},
1835 {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET}
1836 };
374ca955
A
1837
1838 static const RunTestData testData2[] = {
1839 {"((((((((((abc))))))))))", USCRIPT_LATIN}
1840 };
1841
1842 static const struct {
1843 const RunTestData *testData;
1844 int32_t nRuns;
1845 } testDataEntries[] = {
1846 {testData1, LENGTHOF(testData1)},
1847 {testData2, LENGTHOF(testData2)}
1848 };
1849
1850 static const int32_t nTestEntries = LENGTHOF(testDataEntries);
1851 int32_t testEntry;
1852
1853 for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) {
1854 UChar testString[1024];
1855 int32_t runStarts[256];
1856 int32_t nTestRuns = testDataEntries[testEntry].nRuns;
1857 const RunTestData *testData = testDataEntries[testEntry].testData;
1858
1859 int32_t run, stringLimit;
1860 UScriptRun *scriptRun = NULL;
1861 UErrorCode err;
1862
1863 /*
1864 * Fill in the test string and the runStarts array.
1865 */
1866 stringLimit = 0;
1867 for (run = 0; run < nTestRuns; run += 1) {
1868 runStarts[run] = stringLimit;
1869 stringLimit += u_unescape(testData[run].runText, &testString[stringLimit], 1024 - stringLimit);
1870 /*stringLimit -= 1;*/
1871 }
1872
1873 /* The limit of the last run */
1874 runStarts[nTestRuns] = stringLimit;
1875
1876 /*
1877 * Make sure that calling uscript_OpenRun with a NULL text pointer
1878 * and a non-zero text length returns the correct error.
1879 */
1880 err = U_ZERO_ERROR;
1881 scriptRun = uscript_openRun(NULL, stringLimit, &err);
1882
1883 if (err != U_ILLEGAL_ARGUMENT_ERROR) {
1884 log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
1885 }
1886
1887 if (scriptRun != NULL) {
1888 log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NULL result.\n");
1889 uscript_closeRun(scriptRun);
1890 }
1891
1892 /*
1893 * Make sure that calling uscript_OpenRun with a non-NULL text pointer
1894 * and a zero text length returns the correct error.
1895 */
1896 err = U_ZERO_ERROR;
1897 scriptRun = uscript_openRun(testString, 0, &err);
1898
1899 if (err != U_ILLEGAL_ARGUMENT_ERROR) {
1900 log_err("uscript_openRun(testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
1901 }
1902
1903 if (scriptRun != NULL) {
1904 log_err("uscript_openRun(testString, 0, &err) returned a non-NULL result.\n");
1905 uscript_closeRun(scriptRun);
1906 }
1907
1908 /*
1909 * Make sure that calling uscript_openRun with a NULL text pointer
1910 * and a zero text length doesn't return an error.
1911 */
1912 err = U_ZERO_ERROR;
1913 scriptRun = uscript_openRun(NULL, 0, &err);
1914
1915 if (U_FAILURE(err)) {
1916 log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_errorName(err));
1917 }
1918
1919 /* Make sure that the empty iterator doesn't find any runs */
1920 if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) {
1921 log_err("uscript_nextRun(...) returned TRUE for an empty iterator.\n");
1922 }
1923
1924 /*
1925 * Make sure that calling uscript_setRunText with a NULL text pointer
1926 * and a non-zero text length returns the correct error.
1927 */
1928 err = U_ZERO_ERROR;
1929 uscript_setRunText(scriptRun, NULL, stringLimit, &err);
1930
1931 if (err != U_ILLEGAL_ARGUMENT_ERROR) {
1932 log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
1933 }
1934
1935 /*
1936 * Make sure that calling uscript_OpenRun with a non-NULL text pointer
1937 * and a zero text length returns the correct error.
1938 */
1939 err = U_ZERO_ERROR;
1940 uscript_setRunText(scriptRun, testString, 0, &err);
1941
1942 if (err != U_ILLEGAL_ARGUMENT_ERROR) {
1943 log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
1944 }
1945
1946 /*
1947 * Now call uscript_setRunText on the empty iterator
1948 * and make sure that it works.
1949 */
1950 err = U_ZERO_ERROR;
1951 uscript_setRunText(scriptRun, testString, stringLimit, &err);
1952
1953 if (U_FAILURE(err)) {
1954 log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(err));
1955 } else {
1956 CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_setRunText");
1957 }
1958
b75a7d8f 1959 uscript_closeRun(scriptRun);
374ca955
A
1960
1961 /*
1962 * Now open an interator over the testString
1963 * using uscript_openRun and make sure that it works
1964 */
1965 scriptRun = uscript_openRun(testString, stringLimit, &err);
1966
1967 if (U_FAILURE(err)) {
1968 log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err));
1969 } else {
1970 CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_openRun");
1971 }
1972
1973 /* Now reset the iterator, and make sure
1974 * that it still works.
1975 */
1976 uscript_resetRun(scriptRun);
1977
1978 CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_resetRun");
1979
1980 /* Close the iterator */
b75a7d8f
A
1981 uscript_closeRun(scriptRun);
1982 }
b75a7d8f
A
1983}
1984
1985/* test additional, non-core properties */
1986static void
1987TestAdditionalProperties() {
1988 /* test data for u_charAge() */
1989 static const struct {
1990 UChar32 c;
1991 UVersionInfo version;
1992 } charAges[]={
1993 {0x41, { 1, 1, 0, 0 }},
1994 {0xffff, { 1, 1, 0, 0 }},
1995 {0x20ab, { 2, 0, 0, 0 }},
1996 {0x2fffe, { 2, 0, 0, 0 }},
1997 {0x20ac, { 2, 1, 0, 0 }},
1998 {0xfb1d, { 3, 0, 0, 0 }},
1999 {0x3f4, { 3, 1, 0, 0 }},
2000 {0x10300, { 3, 1, 0, 0 }},
2001 {0x220, { 3, 2, 0, 0 }},
2002 {0xff60, { 3, 2, 0, 0 }}
2003 };
2004
2005 /* test data for u_hasBinaryProperty() */
2006 static int32_t
2007 props[][3]={ /* code point, property, value */
2008 { 0x0627, UCHAR_ALPHABETIC, TRUE },
2009 { 0x1034a, UCHAR_ALPHABETIC, TRUE },
2010 { 0x2028, UCHAR_ALPHABETIC, FALSE },
2011
2012 { 0x0066, UCHAR_ASCII_HEX_DIGIT, TRUE },
2013 { 0x0067, UCHAR_ASCII_HEX_DIGIT, FALSE },
2014
2015 { 0x202c, UCHAR_BIDI_CONTROL, TRUE },
2016 { 0x202f, UCHAR_BIDI_CONTROL, FALSE },
2017
2018 { 0x003c, UCHAR_BIDI_MIRRORED, TRUE },
2019 { 0x003d, UCHAR_BIDI_MIRRORED, FALSE },
2020
2021 { 0x058a, UCHAR_DASH, TRUE },
2022 { 0x007e, UCHAR_DASH, FALSE },
2023
2024 { 0x0c4d, UCHAR_DIACRITIC, TRUE },
2025 { 0x3000, UCHAR_DIACRITIC, FALSE },
2026
2027 { 0x0e46, UCHAR_EXTENDER, TRUE },
2028 { 0x0020, UCHAR_EXTENDER, FALSE },
2029
2030#if !UCONFIG_NO_NORMALIZATION
2031 { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2032 { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2033 { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, FALSE },
374ca955
A
2034
2035 { 0x110a, UCHAR_NFD_INERT, TRUE }, /* Jamo L */
2036 { 0x0308, UCHAR_NFD_INERT, FALSE },
2037
2038 { 0x1164, UCHAR_NFKD_INERT, TRUE }, /* Jamo V */
2039 { 0x1d79d, UCHAR_NFKD_INERT, FALSE }, /* math compat version of xi */
2040
2041 { 0x0021, UCHAR_NFC_INERT, TRUE }, /* ! */
2042 { 0x0061, UCHAR_NFC_INERT, FALSE }, /* a */
2043 { 0x00e4, UCHAR_NFC_INERT, FALSE }, /* a-umlaut */
2044 { 0x0102, UCHAR_NFC_INERT, FALSE }, /* a-breve */
2045 { 0xac1c, UCHAR_NFC_INERT, FALSE }, /* Hangul LV */
2046 { 0xac1d, UCHAR_NFC_INERT, TRUE }, /* Hangul LVT */
2047
2048 { 0x1d79d, UCHAR_NFKC_INERT, FALSE }, /* math compat version of xi */
2049 { 0x2a6d6, UCHAR_NFKC_INERT, TRUE }, /* Han, last of CJK ext. B */
2050
2051 { 0x00e4, UCHAR_SEGMENT_STARTER, TRUE },
2052 { 0x0308, UCHAR_SEGMENT_STARTER, FALSE },
2053 { 0x110a, UCHAR_SEGMENT_STARTER, TRUE }, /* Jamo L */
2054 { 0x1164, UCHAR_SEGMENT_STARTER, FALSE },/* Jamo V */
2055 { 0xac1c, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LV */
2056 { 0xac1d, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LVT */
b75a7d8f
A
2057#endif
2058
2059 { 0x0044, UCHAR_HEX_DIGIT, TRUE },
2060 { 0xff46, UCHAR_HEX_DIGIT, TRUE },
2061 { 0x0047, UCHAR_HEX_DIGIT, FALSE },
2062
2063 { 0x30fb, UCHAR_HYPHEN, TRUE },
2064 { 0xfe58, UCHAR_HYPHEN, FALSE },
2065
2066 { 0x2172, UCHAR_ID_CONTINUE, TRUE },
2067 { 0x0307, UCHAR_ID_CONTINUE, TRUE },
2068 { 0x005c, UCHAR_ID_CONTINUE, FALSE },
2069
2070 { 0x2172, UCHAR_ID_START, TRUE },
2071 { 0x007a, UCHAR_ID_START, TRUE },
2072 { 0x0039, UCHAR_ID_START, FALSE },
2073
2074 { 0x4db5, UCHAR_IDEOGRAPHIC, TRUE },
2075 { 0x2f999, UCHAR_IDEOGRAPHIC, TRUE },
2076 { 0x2f99, UCHAR_IDEOGRAPHIC, FALSE },
2077
2078 { 0x200c, UCHAR_JOIN_CONTROL, TRUE },
2079 { 0x2029, UCHAR_JOIN_CONTROL, FALSE },
2080
2081 { 0x1d7bc, UCHAR_LOWERCASE, TRUE },
2082 { 0x0345, UCHAR_LOWERCASE, TRUE },
2083 { 0x0030, UCHAR_LOWERCASE, FALSE },
2084
2085 { 0x1d7a9, UCHAR_MATH, TRUE },
2086 { 0x2135, UCHAR_MATH, TRUE },
2087 { 0x0062, UCHAR_MATH, FALSE },
2088
2089 { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2090 { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2091 { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, FALSE },
2092
2093 { 0x0022, UCHAR_QUOTATION_MARK, TRUE },
2094 { 0xff62, UCHAR_QUOTATION_MARK, TRUE },
2095 { 0xd840, UCHAR_QUOTATION_MARK, FALSE },
2096
2097 { 0x061f, UCHAR_TERMINAL_PUNCTUATION, TRUE },
2098 { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, FALSE },
2099
2100 { 0x1d44a, UCHAR_UPPERCASE, TRUE },
2101 { 0x2162, UCHAR_UPPERCASE, TRUE },
2102 { 0x0345, UCHAR_UPPERCASE, FALSE },
2103
2104 { 0x0020, UCHAR_WHITE_SPACE, TRUE },
2105 { 0x202f, UCHAR_WHITE_SPACE, TRUE },
2106 { 0x3001, UCHAR_WHITE_SPACE, FALSE },
2107
2108 { 0x0711, UCHAR_XID_CONTINUE, TRUE },
2109 { 0x1d1aa, UCHAR_XID_CONTINUE, TRUE },
2110 { 0x007c, UCHAR_XID_CONTINUE, FALSE },
2111
2112 { 0x16ee, UCHAR_XID_START, TRUE },
2113 { 0x23456, UCHAR_XID_START, TRUE },
2114 { 0x1d1aa, UCHAR_XID_START, FALSE },
2115
2116 /*
2117 * Version break:
2118 * The following properties are only supported starting with the
2119 * Unicode version indicated in the second field.
2120 */
374ca955 2121 { -1, 0x320, 0 },
b75a7d8f
A
2122
2123 { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2124 { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2125 { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE },
2126
2127 { 0x0341, UCHAR_DEPRECATED, TRUE },
2128 { 0xe0041, UCHAR_DEPRECATED, FALSE },
2129
2130 { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE },
2131 { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE },
2132 { 0xff9f, UCHAR_GRAPHEME_BASE, TRUE }, /* changed from Unicode 3.2 to 4 */
2133
2134 { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE },
2135 { 0xff9f, UCHAR_GRAPHEME_EXTEND, FALSE }, /* changed from Unicode 3.2 to 4 */
2136 { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE },
2137
2138 { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE },
2139 { 0xff9f, UCHAR_GRAPHEME_LINK, FALSE },
2140
2141 { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, TRUE },
2142 { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, FALSE },
2143
2144 { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, TRUE },
2145 { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, FALSE },
2146
2147 { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, TRUE },
2148 { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, FALSE },
2149
2150 { 0x2e9b, UCHAR_RADICAL, TRUE },
2151 { 0x4e00, UCHAR_RADICAL, FALSE },
2152
2153 { 0x012f, UCHAR_SOFT_DOTTED, TRUE },
2154 { 0x0049, UCHAR_SOFT_DOTTED, FALSE },
2155
2156 { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE },
2157 { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE },
2158
374ca955
A
2159 { -1, 0x401, 0 },
2160
2161 { 0x002e, UCHAR_S_TERM, TRUE },
2162 { 0x0061, UCHAR_S_TERM, FALSE },
2163
2164 { 0x180c, UCHAR_VARIATION_SELECTOR, TRUE },
2165 { 0xfe03, UCHAR_VARIATION_SELECTOR, TRUE },
2166 { 0xe01ef, UCHAR_VARIATION_SELECTOR, TRUE },
2167 { 0xe0200, UCHAR_VARIATION_SELECTOR, FALSE },
2168
b75a7d8f
A
2169 /* enum/integer type properties */
2170
2171 /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */
2172 /* test default Bidi classes for unassigned code points */
2173 { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2174 { 0x05a2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2175 { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2176 { 0x07f2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2177 { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2178 { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2179 { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2180 { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2181 { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2182 { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2183
2184 { 0x0606, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2185 { 0x061c, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2186 { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2187 { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2188 { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2189 { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2190 { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2191 { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2192
2193 { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS },
2194 { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU },
2195 { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS },
2196 { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG },
374ca955 2197 { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU },
b75a7d8f
A
2198 { 0x1AFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2199 { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA },
2200 { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS },
374ca955 2201 { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
b75a7d8f 2202 { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
374ca955 2203 { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B },
b75a7d8f
A
2204
2205 /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */
2206 { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 },
2207
2208 { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK },
2209 { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT },
2210 { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2211 { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2212 { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2213 { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2214 { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL },
2215 { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT },
2216 { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2217
2218 { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2219 { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW },
2220 { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2221 { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH },
2222 { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2223 { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH },
2224 { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2225 { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2226 { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2227 { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2228 { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2229 { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2230 { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2231 { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */
2232 { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2233 { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2234 { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2235
2236 /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */
2237 { 0xd7d7, UCHAR_GENERAL_CATEGORY, 0 },
2238
2239 { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2240 { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN },
2241 { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH },
2242 { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH },
2243 { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL },
2244 { 0x06C3, UCHAR_JOINING_GROUP, U_JG_HAMZA_ON_HEH_GOAL },
2245
2246 { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING },
2247 { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2248 { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING },
2249 { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2250 { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING },
2251 { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2252 { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2253 { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2254
2255 /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */
2256 { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2257 { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2258 { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION },
2259 { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION },
2260 { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2261 { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2262 { 0xac03, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2263 { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2264 { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2265 { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2266 { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2267 { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2268 { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION },
2269 { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS },
2270 { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2271 { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2272
2273 /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */
2274
2275 /* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */
2276
2277 { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2278 { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2279 { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2280 { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2281
2282 { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2283 { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2284 { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2285 { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2286
2287 { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2288 { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2289 { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2290 { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2291
2292 { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2293 { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2294 { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2295 { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2296 { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2297 { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2298
2299 { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2300 { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2301 { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2302 { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2303
2304 { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2305 { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2306 { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2307 { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2308 { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2309
2310 { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2311
2312 /* undefined UProperty values */
2313 { 0x61, 0x4a7, 0 },
2314 { 0x234bc, 0x15ed, 0 }
2315 };
2316
2317 UVersionInfo version;
2318 UChar32 c;
2319 int32_t i, result, uVersion;
2320 UProperty which;
2321
2322 /* what is our Unicode version? */
2323 u_getUnicodeVersion(version);
374ca955 2324 uVersion=((int32_t)version[0]<<8)|(version[1]<<4)|version[2]; /* major/minor/update version numbers */
b75a7d8f
A
2325
2326 u_charAge(0x20, version);
2327 if(version[0]==0) {
2328 /* no additional properties available */
2329 log_err("TestAdditionalProperties: no additional properties available, not tested\n");
2330 return;
2331 }
2332
2333 /* test u_charAge() */
2334 for(i=0; i<sizeof(charAges)/sizeof(charAges[0]); ++i) {
2335 u_charAge(charAges[i].c, version);
2336 if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) {
2337 log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { %u, %u, %u, %u }\n",
2338 charAges[i].c,
2339 version[0], version[1], version[2], version[3],
2340 charAges[i].version[0], charAges[i].version[1], charAges[i].version[2], charAges[i].version[3]);
2341 }
2342 }
2343
2344 if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 ||
2345 u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 ||
2346 u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 || /* j2478 */
2347 u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 || /*JB#2410*/
2348 u_getIntPropertyMinValue(0x2345)!=0
2349 ) {
2350 log_err("error: u_getIntPropertyMinValue() wrong\n");
2351 }
2352
2353 if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1 ||
2354 u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1 ||
2355 u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1)!=1 ||
2356 u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_COUNT-1 ||
2357 u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ||
2358 u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1 ||
2359 u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1 ||
2360 u_getIntPropertyMaxValue(0x2345)!=-1 /*JB#2410*/ ||
2361 u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_COUNT - 1) ||
2362 u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) != (int32_t) (U_JG_COUNT -1) ||
2363 u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1) ||
2364 u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUNT -1)
2365 ) {
2366 log_err("error: u_getIntPropertyMaxValue() wrong\n");
2367 }
2368
2369 /* test u_hasBinaryProperty() and u_getIntPropertyValue() */
2370 for(i=0; i<sizeof(props)/sizeof(props[0]); ++i) {
2371 if(props[i][0]<0) {
2372 /* Unicode version break */
2373 if(uVersion<props[i][1]) {
2374 break; /* do not test properties that are not yet supported */
2375 } else {
2376 continue; /* skip this row */
2377 }
2378 }
2379
2380 c=(UChar32)props[i][0];
2381 which=(UProperty)props[i][1];
2382
2383 if(which<UCHAR_INT_START) {
2384 result=u_hasBinaryProperty(c, which);
2385 if(result!=props[i][2]) {
2386 log_err("error: u_hasBinaryProperty(U+%04lx, %d)=%d is wrong (props[%d])\n",
2387 c, which, result, i);
2388 }
2389 }
2390
2391 result=u_getIntPropertyValue(c, which);
2392 if(result!=props[i][2]) {
2393 log_err("error: u_getIntPropertyValue(U+%04lx, 0x1000+%d)=%d is wrong, should be %d (props[%d])\n",
2394 c, (int32_t)which-0x1000, result, props[i][2], i);
2395 }
2396
2397 /* test separate functions, too */
2398 switch((UProperty)props[i][1]) {
2399 case UCHAR_ALPHABETIC:
2400 if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) {
2401 log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d])\n",
2402 props[i][0], result, i);
2403 }
2404 break;
2405 case UCHAR_LOWERCASE:
2406 if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2407 log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\n",
2408 props[i][0], result, i);
2409 }
2410 break;
2411 case UCHAR_UPPERCASE:
2412 if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2413 log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\n",
2414 props[i][0], result, i);
2415 }
2416 break;
2417 case UCHAR_WHITE_SPACE:
2418 if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) {
2419 log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d])\n",
2420 props[i][0], result, i);
2421 }
2422 break;
2423 default:
2424 break;
2425 }
2426 }
2427}
2428
2429static void
2430TestNumericProperties(void) {
2431 /* see UnicodeData.txt, DerivedNumericValues.txt */
2432 static const struct {
2433 UChar32 c;
2434 int32_t type;
2435 double numValue;
2436 } values[]={
2437 { 0x0F33, U_NT_NUMERIC, -1./2. },
2438 { 0x0C66, U_NT_DECIMAL, 0 },
2439 { 0x96f6, U_NT_NUMERIC, 0 },
2440 { 0x2159, U_NT_NUMERIC, 1./6. },
2441 { 0x00BD, U_NT_NUMERIC, 1./2. },
2442 { 0x0031, U_NT_DECIMAL, 1. },
2443 { 0x4e00, U_NT_NUMERIC, 1. },
2444 { 0x58f1, U_NT_NUMERIC, 1. },
2445 { 0x10320, U_NT_NUMERIC, 1. },
2446 { 0x0F2B, U_NT_NUMERIC, 3./2. },
2447 { 0x00B2, U_NT_DIGIT, 2. },
2448 { 0x5f10, U_NT_NUMERIC, 2. },
2449 { 0x1813, U_NT_DECIMAL, 3. },
2450 { 0x5f0e, U_NT_NUMERIC, 3. },
2451 { 0x2173, U_NT_NUMERIC, 4. },
2452 { 0x8086, U_NT_NUMERIC, 4. },
2453 { 0x278E, U_NT_DIGIT, 5. },
2454 { 0x1D7F2, U_NT_DECIMAL, 6. },
2455 { 0x247A, U_NT_DIGIT, 7. },
2456 { 0x7396, U_NT_NUMERIC, 9. },
2457 { 0x1372, U_NT_NUMERIC, 10. },
2458 { 0x216B, U_NT_NUMERIC, 12. },
2459 { 0x16EE, U_NT_NUMERIC, 17. },
2460 { 0x249A, U_NT_NUMERIC, 19. },
2461 { 0x303A, U_NT_NUMERIC, 30. },
2462 { 0x5345, U_NT_NUMERIC, 30. },
2463 { 0x32B2, U_NT_NUMERIC, 37. },
2464 { 0x1375, U_NT_NUMERIC, 40. },
2465 { 0x10323, U_NT_NUMERIC, 50. },
2466 { 0x0BF1, U_NT_NUMERIC, 100. },
2467 { 0x964c, U_NT_NUMERIC, 100. },
2468 { 0x217E, U_NT_NUMERIC, 500. },
2469 { 0x2180, U_NT_NUMERIC, 1000. },
2470 { 0x4edf, U_NT_NUMERIC, 1000. },
2471 { 0x2181, U_NT_NUMERIC, 5000. },
2472 { 0x137C, U_NT_NUMERIC, 10000. },
2473 { 0x4e07, U_NT_NUMERIC, 10000. },
2474 { 0x4ebf, U_NT_NUMERIC, 100000000. },
2475 { 0x5146, U_NT_NUMERIC, 1000000000000. },
2476 { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },
2477 { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },
2478 { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },
2479 { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE },
2480 { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE },
2481 { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE }
2482 };
2483
2484 double nv;
2485 UChar32 c;
2486 int32_t i, type;
2487
2488 for(i=0; i<LENGTHOF(values); ++i) {
2489 c=values[i].c;
2490 type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE);
2491 nv=u_getNumericValue(c);
2492
2493 if(type!=values[i].type) {
2494 log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, values[i].type);
2495 }
2496 if(0.000001 <= fabs(nv - values[i].numValue)) {
2497 log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, values[i].numValue);
2498 }
2499 }
2500}
2501
2502/**
2503 * Test the property names and property value names API.
2504 */
2505static void
2506TestPropertyNames(void) {
2507 int32_t p, v, choice=0, rev;
2508 UBool atLeastSomething = FALSE;
2509
2510 for (p=0; ; ++p) {
2511 UBool sawProp = FALSE;
2512 if(p > 10 && !atLeastSomething) {
2513 log_data_err("Never got anything after 10 tries.\nYour data is probably fried. Quitting this test\n", p, choice);
2514 return;
2515 }
2516
2517 for (choice=0; ; ++choice) {
2518 const char* name = u_getPropertyName(p, choice);
2519 if (name) {
2520 if (!sawProp) log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff);
2521 log_verbose("%d=\"%s\"", choice, name);
2522 sawProp = TRUE;
2523 atLeastSomething = TRUE;
2524
2525 /* test reverse mapping */
2526 rev = u_getPropertyEnum(name);
2527 if (rev != p) {
2528 log_err("Property round-trip failure: %d -> %s -> %d\n",
2529 p, name, rev);
2530 }
2531 }
2532 if (!name && choice>0) break;
2533 }
2534 if (sawProp) {
2535 /* looks like a valid property; check the values */
2536 const char* pname = u_getPropertyName(p, U_LONG_PROPERTY_NAME);
2537 int32_t max = 0;
2538 if (p == UCHAR_CANONICAL_COMBINING_CLASS) {
2539 max = 255;
2540 } else if (p == UCHAR_GENERAL_CATEGORY_MASK) {
2541 /* it's far too slow to iterate all the way up to
2542 the real max, U_GC_P_MASK */
2543 max = U_GC_NL_MASK;
2544 } else if (p == UCHAR_BLOCK) {
2545 /* UBlockCodes, unlike other values, start at 1 */
2546 max = 1;
2547 }
2548 log_verbose("\n");
2549 for (v=-1; ; ++v) {
2550 UBool sawValue = FALSE;
2551 for (choice=0; ; ++choice) {
2552 const char* vname = u_getPropertyValueName(p, v, choice);
2553 if (vname) {
2554 if (!sawValue) log_verbose(" %s, value %d:", pname, v);
2555 log_verbose("%d=\"%s\"", choice, vname);
2556 sawValue = TRUE;
2557
2558 /* test reverse mapping */
2559 rev = u_getPropertyValueEnum(p, vname);
2560 if (rev != v) {
2561 log_err("Value round-trip failure (%s): %d -> %s -> %d\n",
2562 pname, v, vname, rev);
2563 }
2564 }
2565 if (!vname && choice>0) break;
2566 }
2567 if (sawValue) {
2568 log_verbose("\n");
2569 }
2570 if (!sawValue && v>=max) break;
2571 }
2572 }
2573 if (!sawProp) {
2574 if (p>=UCHAR_STRING_LIMIT) {
2575 break;
2576 } else if (p>=UCHAR_DOUBLE_LIMIT) {
2577 p = UCHAR_STRING_START - 1;
2578 } else if (p>=UCHAR_MASK_LIMIT) {
2579 p = UCHAR_DOUBLE_START - 1;
2580 } else if (p>=UCHAR_INT_LIMIT) {
2581 p = UCHAR_MASK_START - 1;
2582 } else if (p>=UCHAR_BINARY_LIMIT) {
2583 p = UCHAR_INT_START - 1;
2584 }
2585 }
2586 }
2587}
2588
2589/**
2590 * Test the property values API. See JB#2410.
2591 */
2592static void
2593TestPropertyValues(void) {
2594 int32_t i, p, min, max;
2595 UErrorCode ec;
2596
2597 /* Min should be 0 for everything. */
2598 /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */
2599 for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) {
2600 min = u_getIntPropertyMinValue(p);
2601 if (min != 0) {
2602 if (p == UCHAR_BLOCK) {
2603 /* This is okay...for now. See JB#2487.
2604 TODO Update this for JB#2487. */
2605 } else {
2606 const char* name;
2607 name = u_getPropertyName(p, U_LONG_PROPERTY_NAME);
2608 if (name == NULL) name = "<ERROR>";
2609 log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n",
2610 name, min);
2611 }
2612 }
2613 }
2614
2615 if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 ||
2616 u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) {
2617 log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK) is wrong\n");
2618 }
2619
2620 /* Max should be -1 for invalid properties. */
2621 max = u_getIntPropertyMaxValue(-1);
2622 if (max != -1) {
2623 log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n",
2624 max);
2625 }
2626
2627 /* Script should return 0 for an invalid code point. */
2628 for (i=0; i<2; ++i) {
2629 int32_t script;
2630 const char* desc;
2631 ec = U_ZERO_ERROR;
2632 switch (i) {
2633 case 0:
2634 script = uscript_getScript(-1, &ec);
2635 desc = "uscript_getScript(-1)";
2636 break;
2637 case 1:
2638 script = u_getIntPropertyValue(-1, UCHAR_SCRIPT);
2639 desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)";
2640 break;
2641 default:
2642 log_err("Internal test error. Too many scripts\n");
2643 return;
2644 }
2645 /* We don't explicitly test ec. It should be U_FAILURE but it
2646 isn't documented as such. */
2647 if (script != 0) {
2648 log_err("FAIL: %s = %d, exp. 0\n",
2649 desc, script);
2650 }
2651 }
2652}
2653
2654/* add characters from a serialized set to a normal one */
2655static void
2656_setAddSerialized(USet *set, const USerializedSet *sset) {
2657 UChar32 start, end;
2658 int32_t i, count;
2659
2660 count=uset_getSerializedRangeCount(sset);
2661 for(i=0; i<count; ++i) {
2662 uset_getSerializedRange(sset, i, &start, &end);
2663 uset_addRange(set, start, end);
2664 }
2665}
2666
2667/* various tests for consistency of UCD data and API behavior */
2668static void
2669TestConsistency() {
2670#if !UCONFIG_NO_NORMALIZATION
2671 UChar buffer16[300];
2672#endif
2673 char buffer[300];
2674 USet *set1, *set2, *set3, *set4;
2675 UErrorCode errorCode;
2676
2677#if !UCONFIG_NO_NORMALIZATION
2678 USerializedSet sset;
2679#endif
2680 UChar32 start, end;
2681 int32_t i, length;
2682
2683 U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10);
2684 U_STRING_DECL(dashPattern, "[:Dash:]", 8);
2685 U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13);
2686 U_STRING_DECL(formatPattern, "[:Cf:]", 6);
2687 U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14);
2688
2689 U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10);
2690 U_STRING_INIT(dashPattern, "[:Dash:]", 8);
2691 U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13);
2692 U_STRING_INIT(formatPattern, "[:Cf:]", 6);
2693 U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14);
2694
2695 /*
2696 * It used to be that UCD.html and its precursors said
2697 * "Those dashes used to mark connections between pieces of words,
2698 * plus the Katakana middle dot."
2699 *
2700 * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
2701 * but not from Hyphen.
2702 * UTC 94 (2003mar) decided to leave it that way and to changed UCD.html.
2703 * Therefore, do not show errors when testing the Hyphen property.
2704 */
2705 log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
2706 "known to the UTC and not considered errors.\n");
2707
2708 errorCode=U_ZERO_ERROR;
2709 set1=uset_openPattern(hyphenPattern, 10, &errorCode);
2710 set2=uset_openPattern(dashPattern, 8, &errorCode);
2711 if(U_SUCCESS(errorCode)) {
2712 /* remove the Katakana middle dot(s) from set1 */
2713 uset_remove(set1, 0x30fb);
2714 uset_remove(set1, 0xff65); /* halfwidth variant */
2715 showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", FALSE);
2716 } else {
2717 log_err("error opening [:Hyphen:] or [:Dash:] - %s\n", u_errorName(errorCode));
2718 }
2719
2720 /* check that Cf is neither Hyphen nor Dash nor Alphabetic */
2721 set3=uset_openPattern(formatPattern, 6, &errorCode);
2722 set4=uset_openPattern(alphaPattern, 14, &errorCode);
2723 if(U_SUCCESS(errorCode)) {
2724 showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", FALSE);
2725 showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", TRUE);
2726 showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", TRUE);
2727 } else {
2728 log_err("error opening [:Cf:] or [:Alpbabetic:] - %s\n", u_errorName(errorCode));
2729 }
2730
2731 uset_close(set1);
2732 uset_close(set2);
2733 uset_close(set3);
2734 uset_close(set4);
2735
2736 /*
2737 * Check that each lowercase character has "small" in its name
2738 * and not "capital".
2739 * There are some such characters, some of which seem odd.
2740 * Use the verbose flag to see these notices.
2741 */
2742 errorCode=U_ZERO_ERROR;
2743 set1=uset_openPattern(lowerPattern, 13, &errorCode);
2744 if(U_SUCCESS(errorCode)) {
2745 for(i=0;; ++i) {
2746 length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode);
2747 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2748 break; /* done */
2749 }
2750 if(U_FAILURE(errorCode)) {
2751 log_err("error iterating over [:Lowercase:] at item %d: %s\n",
2752 i, u_errorName(errorCode));
2753 break;
2754 }
2755 if(length!=0) {
2756 break; /* done with code points, got a string or -1 */
2757 }
2758
2759 while(start<=end) {
2760 length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);
2761 if(U_FAILURE(errorCode)) {
2762 log_err("error getting the name of U+%04x - %s\n", start, u_errorName(errorCode));
2763 errorCode=U_ZERO_ERROR;
2764 continue;
2765 }
2766 if( (strstr(buffer, "SMALL")==NULL || strstr(buffer, "CAPITAL")!=NULL) &&
2767 strstr(buffer, "SMALL CAPITAL")==NULL
2768 ) {
2769 log_verbose("info: [:Lowercase:] contains U+%04x whose name does not suggest lowercase: %s\n", start, buffer);
2770 }
2771 ++start;
2772 }
2773 }
2774 } else {
2775 log_err("error opening [:Lowercase:] - %s\n", u_errorName(errorCode));
2776 }
2777 uset_close(set1);
2778
2779#if !UCONFIG_NO_NORMALIZATION
2780
2781 /*
2782 * Test for an example that unorm_getCanonStartSet() delivers
2783 * all characters that compose from the input one,
2784 * even in multiple steps.
2785 * For example, the set for "I" (0049) should contain both
2786 * I-diaeresis (00CF) and I-diaeresis-acute (1E2E).
2787 * In general, the set for the middle such character should be a subset
2788 * of the set for the first.
2789 */
2790 set1=uset_open(1, 0);
2791 set2=uset_open(1, 0);
2792
374ca955
A
2793 if (unorm_getCanonStartSet(0x49, &sset)) {
2794 _setAddSerialized(set1, &sset);
b75a7d8f 2795
374ca955
A
2796 /* enumerate all characters that are plausible to be latin letters */
2797 for(start=0xa0; start<0x2000; ++start) {
2798 if(unorm_getDecomposition(start, FALSE, buffer16, LENGTHOF(buffer16))>1 && buffer16[0]==0x49) {
2799 uset_add(set2, start);
2800 }
b75a7d8f 2801 }
374ca955
A
2802
2803 compareUSets(set1, set2,
2804 "[canon start set of 0049]", "[all c with canon decomp with 0049]",
2805 TRUE);
2806 } else {
2807 log_err("error calling unorm_getCanonStartSet()\n");
b75a7d8f
A
2808 }
2809
b75a7d8f
A
2810 uset_close(set1);
2811 uset_close(set2);
2812
2813#endif
2814}
374ca955
A
2815
2816/* API coverage for ucase.c */
2817static void TestUCase() {
2818 UDataMemory *pData;
2819 UCaseProps *csp;
2820 UErrorCode errorCode;
2821
2822 /* coverage for ucase_openBinary() */
2823 errorCode=U_ZERO_ERROR;
2824 pData=udata_open(NULL, UCASE_DATA_TYPE, UCASE_DATA_NAME, &errorCode);
2825 if(U_FAILURE(errorCode)) {
2826 log_data_err("unable to open " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
2827 u_errorName(errorCode));
2828 return;
2829 }
2830
2831 csp=ucase_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
2832 if(U_FAILURE(errorCode)) {
2833 log_err("ucase_openBinary() fails for the contents of " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
2834 u_errorName(errorCode));
2835 udata_close(pData);
2836 return;
2837 }
2838
2839 if(UCASE_LOWER!=ucase_getType(csp, 0xdf)) { /* verify islower(sharp s) */
2840 log_err("ucase_openBinary() does not seem to return working UCaseProps\n");
2841 }
2842
2843 ucase_close(csp);
2844 udata_close(pData);
2845}