1 /********************************************************************
3 * Copyright (c) 1997-2010, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
10 * Modification History:
12 * Madhu Katragadda Ported for C API
13 * synwee added test for quick check
14 * synwee added test for checkFCD
15 *********************************************************************************/
16 /*tests for u_normalization*/
17 #include "unicode/utypes.h"
18 #include "unicode/unorm.h"
21 #if UCONFIG_NO_NORMALIZATION
23 void addNormTest(TestNode
** root
) {
24 /* no normalization - nothing to do */
31 #include "unicode/uchar.h"
32 #include "unicode/ustring.h"
33 #include "unicode/unorm.h"
36 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof ((array)[0]))
42 TestNormCoverage(void);
45 TestConcatenate(void);
48 TestNextPrevious(void);
50 static void TestIsNormalized(void);
53 TestFCNFKCClosure(void);
56 TestQuickCheckPerCP(void);
59 TestComposition(void);
65 TestGetDecomposition(void);
67 static const char* const canonTests
[][3] = {
68 /* Input*/ /*Decomposed*/ /*Composed*/
69 { "cat", "cat", "cat" },
70 { "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark", },
72 { "\\u1e0a", "D\\u0307", "\\u1e0a" }, /* D-dot_above*/
73 { "D\\u0307", "D\\u0307", "\\u1e0a" }, /* D dot_above*/
75 { "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_below dot_above*/
76 { "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_above dot_below */
77 { "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D dot_below dot_above */
79 { "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307", "\\u1e10\\u0323\\u0307" }, /*D dot_below cedilla dot_above*/
80 { "D\\u0307\\u0328\\u0323", "D\\u0328\\u0323\\u0307", "\\u1e0c\\u0328\\u0307" }, /* D dot_above ogonek dot_below*/
82 { "\\u1E14", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron-grave*/
83 { "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron + grave*/
84 { "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" }, /* E-grave + macron*/
86 { "\\u212b", "A\\u030a", "\\u00c5" }, /* angstrom_sign*/
87 { "\\u00c5", "A\\u030a", "\\u00c5" }, /* A-ring*/
89 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
90 { "\\u00C4\\uFB03n", "A\\u0308\\uFB03n", "\\u00C4\\uFB03n" },
92 { "Henry IV", "Henry IV", "Henry IV" },
93 { "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" },
95 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
96 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
97 { "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" }, /* hw_ka + hw_ten*/
98 { "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" }, /* ka + hw_ten*/
99 { "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" }, /* hw_ka + ten*/
100 { "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" }, /* hw_ka + ten*/
104 static const char* const compatTests
[][3] = {
105 /* Input*/ /*Decomposed */ /*Composed*/
106 { "cat", "cat", "cat" },
108 { "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" }, /* Alef-Lamed vs. Alef, Lamed*/
110 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
111 { "\\u00C4\\uFB03n", "A\\u0308ffin", "\\u00C4ffin" }, /* ffi ligature -> f + f + i*/
113 { "Henry IV", "Henry IV", "Henry IV" },
114 { "Henry \\u2163", "Henry IV", "Henry IV" },
116 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
117 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
119 { "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + ten*/
121 /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
122 { "\\uFF76\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + hw_ten*/
123 { "\\u30AB\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* ka + hw_ten*/
127 static const char* const fcdTests
[][3] = {
128 /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */
129 { "\\u010e\\u0327", "D\\u0327\\u030c", NULL
}, /* D-caron + cedilla */
130 { "\\u010e", "\\u010e", NULL
} /* D-caron */
133 void addNormTest(TestNode
** root
);
135 void addNormTest(TestNode
** root
)
137 addTest(root
, &TestAPI
, "tsnorm/cnormtst/TestAPI");
138 addTest(root
, &TestDecomp
, "tsnorm/cnormtst/TestDecomp");
139 addTest(root
, &TestCompatDecomp
, "tsnorm/cnormtst/TestCompatDecomp");
140 addTest(root
, &TestCanonDecompCompose
, "tsnorm/cnormtst/TestCanonDecompCompose");
141 addTest(root
, &TestCompatDecompCompose
, "tsnorm/cnormtst/TestCompatDecompCompose");
142 addTest(root
, &TestFCD
, "tsnorm/cnormtst/TestFCD");
143 addTest(root
, &TestNull
, "tsnorm/cnormtst/TestNull");
144 addTest(root
, &TestQuickCheck
, "tsnorm/cnormtst/TestQuickCheck");
145 addTest(root
, &TestQuickCheckPerCP
, "tsnorm/cnormtst/TestQuickCheckPerCP");
146 addTest(root
, &TestIsNormalized
, "tsnorm/cnormtst/TestIsNormalized");
147 addTest(root
, &TestCheckFCD
, "tsnorm/cnormtst/TestCheckFCD");
148 addTest(root
, &TestNormCoverage
, "tsnorm/cnormtst/TestNormCoverage");
149 addTest(root
, &TestConcatenate
, "tsnorm/cnormtst/TestConcatenate");
150 addTest(root
, &TestNextPrevious
, "tsnorm/cnormtst/TestNextPrevious");
151 addTest(root
, &TestFCNFKCClosure
, "tsnorm/cnormtst/TestFCNFKCClosure");
152 addTest(root
, &TestComposition
, "tsnorm/cnormtst/TestComposition");
153 addTest(root
, &TestGetDecomposition
, "tsnorm/cnormtst/TestGetDecomposition");
156 static const char* const modeStrings
[]={
166 static void TestNormCases(UNormalizationMode mode
,
167 const char* const cases
[][3], int32_t lengthOfCases
) {
168 int32_t x
, neededLen
, length2
;
169 int32_t expIndex
= (mode
==UNORM_NFC
|| mode
==UNORM_NFKC
) ? 2 : 1;
172 log_verbose("Testing unorm_normalize(%s)\n", modeStrings
[mode
]);
173 for(x
=0; x
< lengthOfCases
; x
++)
175 UErrorCode status
= U_ZERO_ERROR
, status2
= U_ZERO_ERROR
;
176 source
=CharsToUChars(cases
[x
][0]);
177 neededLen
= unorm_normalize(source
, u_strlen(source
), mode
, 0, NULL
, 0, &status
);
178 length2
= unorm_normalize(source
, -1, mode
, 0, NULL
, 0, &status2
);
179 if(neededLen
!=length2
) {
180 log_err("ERROR in unorm_normalize(%s)[%d]: "
181 "preflight length/NUL %d!=%d preflight length/srcLength\n",
182 modeStrings
[mode
], (int)x
, (int)neededLen
, (int)length2
);
184 if(status
==U_BUFFER_OVERFLOW_ERROR
)
188 length2
=unorm_normalize(source
, u_strlen(source
), mode
, 0, result
, LENGTHOF(result
), &status
);
189 if(U_FAILURE(status
) || neededLen
!=length2
) {
190 log_data_err("ERROR in unorm_normalize(%s/NUL) at %s: %s - (Are you missing data?)\n",
191 modeStrings
[mode
], austrdup(source
), myErrorName(status
));
193 assertEqual(result
, cases
[x
][expIndex
], x
);
195 length2
=unorm_normalize(source
, -1, mode
, 0, result
, LENGTHOF(result
), &status
);
196 if(U_FAILURE(status
) || neededLen
!=length2
) {
197 log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s: %s - (Are you missing data?)\n",
198 modeStrings
[mode
], austrdup(source
), myErrorName(status
));
200 assertEqual(result
, cases
[x
][expIndex
], x
);
207 TestNormCases(UNORM_NFD
, canonTests
, LENGTHOF(canonTests
));
210 void TestCompatDecomp() {
211 TestNormCases(UNORM_NFKD
, compatTests
, LENGTHOF(compatTests
));
214 void TestCanonDecompCompose() {
215 TestNormCases(UNORM_NFC
, canonTests
, LENGTHOF(canonTests
));
218 void TestCompatDecompCompose() {
219 TestNormCases(UNORM_NFKC
, compatTests
, LENGTHOF(compatTests
));
223 TestNormCases(UNORM_FCD
, fcdTests
, LENGTHOF(fcdTests
));
226 static void assertEqual(const UChar
* result
, const char* expected
, int32_t index
)
228 UChar
*expectedUni
= CharsToUChars(expected
);
229 if(u_strcmp(result
, expectedUni
)!=0){
230 log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index
, expected
,
236 static void TestNull_check(UChar
*src
, int32_t srcLen
,
237 UChar
*exp
, int32_t expLen
,
238 UNormalizationMode mode
,
241 UErrorCode status
= U_ZERO_ERROR
;
247 status
= U_ZERO_ERROR
;
254 len
= unorm_normalize(src
, srcLen
, mode
, 0, result
, 50, &status
);
256 if(U_FAILURE(status
)) {
257 log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missing data?)\n", name
, u_errorName(status
));
258 } else if (len
!= expLen
) {
259 log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name
, expLen
, len
);
264 if(exp
[i
] != result
[i
]) {
265 log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n",
272 log_verbose(" %d: \\u%04X\n", i
, result
[i
]);
276 log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name
);
282 UChar source_comp
[] = { 0x0061, 0x0000, 0x0044, 0x0307 };
283 int32_t source_comp_len
= 4;
284 UChar expect_comp
[] = { 0x0061, 0x0000, 0x1e0a };
285 int32_t expect_comp_len
= 3;
287 UChar source_dcmp
[] = { 0x1e0A, 0x0000, 0x0929 };
288 int32_t source_dcmp_len
= 3;
289 UChar expect_dcmp
[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C };
290 int32_t expect_dcmp_len
= 5;
292 TestNull_check(source_comp
,
299 TestNull_check(source_dcmp
,
306 TestNull_check(source_comp
,
316 static void TestQuickCheckResultNO()
318 const UChar CPNFD
[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
319 0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
320 const UChar CPNFC
[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
321 0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
322 const UChar CPNFKD
[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
323 0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
324 const UChar CPNFKC
[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
325 0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
331 UErrorCode error
= U_ZERO_ERROR
;
333 for (; count
< SIZE
; count
++)
335 if (unorm_quickCheck(&(CPNFD
[count
]), 1, UNORM_NFD
, &error
) !=
338 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD
[count
]);
341 if (unorm_quickCheck(&(CPNFC
[count
]), 1, UNORM_NFC
, &error
) !=
344 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC
[count
]);
347 if (unorm_quickCheck(&(CPNFKD
[count
]), 1, UNORM_NFKD
, &error
) !=
350 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD
[count
]);
353 if (unorm_quickCheck(&(CPNFKC
[count
]), 1, UNORM_NFKC
, &error
) !=
356 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC
[count
]);
363 static void TestQuickCheckResultYES()
365 const UChar CPNFD
[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
366 0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
367 const UChar CPNFC
[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
368 0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
369 const UChar CPNFKD
[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
370 0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
371 const UChar CPNFKC
[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
372 0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
376 UErrorCode error
= U_ZERO_ERROR
;
381 if (unorm_quickCheck(&cp
, 1, UNORM_NFD
, &error
) != UNORM_YES
)
383 log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)\n", cp
);
386 if (unorm_quickCheck(&cp
, 1, UNORM_NFC
, &error
) !=
389 log_err("ERROR in NFC quick check at U+%04x\n", cp
);
392 if (unorm_quickCheck(&cp
, 1, UNORM_NFKD
, &error
) != UNORM_YES
)
394 log_err("ERROR in NFKD quick check at U+%04x\n", cp
);
397 if (unorm_quickCheck(&cp
, 1, UNORM_NFKC
, &error
) !=
400 log_err("ERROR in NFKC quick check at U+%04x\n", cp
);
406 for (; count
< SIZE
; count
++)
408 if (unorm_quickCheck(&(CPNFD
[count
]), 1, UNORM_NFD
, &error
) !=
411 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD
[count
]);
414 if (unorm_quickCheck(&(CPNFC
[count
]), 1, UNORM_NFC
, &error
)
417 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC
[count
]);
420 if (unorm_quickCheck(&(CPNFKD
[count
]), 1, UNORM_NFKD
, &error
) !=
423 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD
[count
]);
426 if (unorm_quickCheck(&(CPNFKC
[count
]), 1, UNORM_NFKC
, &error
) !=
429 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC
[count
]);
435 static void TestQuickCheckResultMAYBE()
437 const UChar CPNFC
[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
438 0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
439 const UChar CPNFKC
[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
440 0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
446 UErrorCode error
= U_ZERO_ERROR
;
448 /* NFD and NFKD does not have any MAYBE codepoints */
449 for (; count
< SIZE
; count
++)
451 if (unorm_quickCheck(&(CPNFC
[count
]), 1, UNORM_NFC
, &error
) !=
454 log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)\n", CPNFC
[count
]);
457 if (unorm_quickCheck(&(CPNFKC
[count
]), 1, UNORM_NFKC
, &error
) !=
460 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC
[count
]);
466 static void TestQuickCheckStringResult()
471 UErrorCode error
= U_ZERO_ERROR
;
473 for (count
= 0; count
< LENGTHOF(canonTests
); count
++)
475 d
= CharsToUChars(canonTests
[count
][1]);
476 c
= CharsToUChars(canonTests
[count
][2]);
477 if (unorm_quickCheck(d
, u_strlen(d
), UNORM_NFD
, &error
) !=
480 log_data_err("ERROR in NFD quick check for string at count %d - (Are you missing data?)\n", count
);
484 if (unorm_quickCheck(c
, u_strlen(c
), UNORM_NFC
, &error
) ==
487 log_err("ERROR in NFC quick check for string at count %d\n", count
);
495 for (count
= 0; count
< LENGTHOF(compatTests
); count
++)
497 d
= CharsToUChars(compatTests
[count
][1]);
498 c
= CharsToUChars(compatTests
[count
][2]);
499 if (unorm_quickCheck(d
, u_strlen(d
), UNORM_NFKD
, &error
) !=
502 log_err("ERROR in NFKD quick check for string at count %d\n", count
);
506 if (unorm_quickCheck(c
, u_strlen(c
), UNORM_NFKC
, &error
) !=
509 log_err("ERROR in NFKC quick check for string at count %d\n", count
);
518 void TestQuickCheck()
520 TestQuickCheckResultNO();
521 TestQuickCheckResultYES();
522 TestQuickCheckResultMAYBE();
523 TestQuickCheckStringResult();
527 * The intltest/NormalizerConformanceTest tests a lot of strings that _are_
528 * normalized, and some that are not.
529 * Here we pick some specific cases and test the C API.
531 static void TestIsNormalized(void) {
532 static const UChar notNFC
[][8]={ /* strings that are not in NFC */
533 { 0x62, 0x61, 0x300, 0x63, 0 }, /* 0061 0300 compose */
534 { 0xfb1d, 0 }, /* excluded from composition */
535 { 0x0627, 0x0653, 0 }, /* 0627 0653 compose */
536 { 0x3071, 0x306f, 0x309a, 0x3073, 0 } /* 306F 309A compose */
538 static const UChar notNFKC
[][8]={ /* strings that are not in NFKC */
539 { 0x1100, 0x1161, 0 }, /* Jamo compose */
540 { 0x1100, 0x314f, 0 }, /* compatibility Jamo compose */
541 { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 } /* 1F00 0345 compose */
545 UErrorCode errorCode
;
549 /* normal case with length>=0 (length -1 used for special cases below) */
550 errorCode
=U_ZERO_ERROR
;
551 if(!unorm_isNormalized(notNFC
[0]+2, 1, UNORM_NFC
, &errorCode
) || U_FAILURE(errorCode
)) {
552 log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
555 /* incoming U_FAILURE */
556 errorCode
=U_TRUNCATED_CHAR_FOUND
;
557 (void)unorm_isNormalized(notNFC
[0]+2, 1, UNORM_NFC
, &errorCode
);
558 if(errorCode
!=U_TRUNCATED_CHAR_FOUND
) {
559 log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode
));
563 errorCode
=U_ZERO_ERROR
;
564 (void)unorm_isNormalized(NULL
, 1, UNORM_NFC
, &errorCode
);
565 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
566 log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode
));
570 errorCode
=U_ZERO_ERROR
;
571 (void)unorm_isNormalized(notNFC
[0]+2, -2, UNORM_NFC
, &errorCode
);
572 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
573 log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode
));
577 for(i
=0; i
<LENGTHOF(notNFC
); ++i
) {
578 errorCode
=U_ZERO_ERROR
;
579 if(unorm_isNormalized(notNFC
[i
], -1, UNORM_NFC
, &errorCode
) || U_FAILURE(errorCode
)) {
580 log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (Are you missing data?)\n", i
, u_errorName(errorCode
));
582 errorCode
=U_ZERO_ERROR
;
583 if(unorm_isNormalized(notNFC
[i
], -1, UNORM_NFKC
, &errorCode
) || U_FAILURE(errorCode
)) {
584 log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i
, u_errorName(errorCode
));
587 for(i
=0; i
<LENGTHOF(notNFKC
); ++i
) {
588 errorCode
=U_ZERO_ERROR
;
589 if(unorm_isNormalized(notNFKC
[i
], -1, UNORM_NFKC
, &errorCode
) || U_FAILURE(errorCode
)) {
590 log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i
, u_errorName(errorCode
));
597 UErrorCode status
= U_ZERO_ERROR
;
598 static const UChar FAST_
[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
600 static const UChar FALSE_
[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
601 0x02B9, 0x0314, 0x0315, 0x0316};
602 static const UChar TRUE_
[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
603 0x0050, 0x0730, 0x09EE, 0x1E10};
605 static const UChar datastr
[][5] =
606 { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
607 {0x0061, 0x030A, 0x00E2, 0x0323, 0},
608 {0x0061, 0x0323, 0x00E2, 0x0323, 0},
609 {0x0061, 0x0323, 0x1E05, 0x0302, 0} };
610 static const UBool result
[] = {UNORM_YES
, UNORM_NO
, UNORM_NO
, UNORM_YES
};
612 static const UChar datachar
[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
614 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
616 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
617 0x0307, 0x0308, 0x0309, 0x030a,
618 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
619 0x0327, 0x0328, 0x0329, 0x032a,
620 0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
621 0x1e07, 0x1e08, 0x1e09, 0x1e0a};
625 if (unorm_quickCheck(FAST_
, 10, UNORM_FCD
, &status
) != UNORM_YES
)
626 log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
627 if (unorm_quickCheck(FALSE_
, 10, UNORM_FCD
, &status
) != UNORM_NO
)
628 log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n");
629 if (unorm_quickCheck(TRUE_
, 10, UNORM_FCD
, &status
) != UNORM_YES
)
630 log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
632 if (U_FAILURE(status
))
633 log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n", u_errorName(status
));
637 UBool fcdresult
= unorm_quickCheck(datastr
[count
], 4, UNORM_FCD
, &status
);
638 if (U_FAILURE(status
)) {
639 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data set %d - (Are you missing data?)\n", count
);
643 if (result
[count
] != fcdresult
) {
644 log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count
,
651 /* random checks of long strings */
652 status
= U_ZERO_ERROR
;
653 srand((unsigned)time( NULL
));
655 for (count
= 0; count
< 50; count
++)
658 UBool testresult
= UNORM_YES
;
666 data
[size
] = datachar
[(rand() * 50) / RAND_MAX
];
667 log_verbose("0x%x", data
[size
]);
668 normsize
+= unorm_normalize(data
+ size
, 1, UNORM_NFD
, 0,
669 norm
+ normsize
, 100 - normsize
, &status
);
670 if (U_FAILURE(status
)) {
671 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data generation - (Are you missing data?)\n");
678 nfdsize
= unorm_normalize(data
, size
, UNORM_NFD
, 0,
680 if (U_FAILURE(status
)) {
681 log_data_err("unorm_quickCheck(FCD) failed: exception occured at normalized data generation - (Are you missing data?)\n");
684 if (nfdsize
!= normsize
|| u_memcmp(nfd
, norm
, nfdsize
) != 0) {
685 testresult
= UNORM_NO
;
687 if (testresult
== UNORM_YES
) {
688 log_verbose("result UNORM_YES\n");
691 log_verbose("result UNORM_NO\n");
694 if (unorm_quickCheck(data
, size
, UNORM_FCD
, &status
) != testresult
|| U_FAILURE(status
)) {
695 log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data - (Are you missing data?)\n", testresult
);
702 static const UChar in
[]={ 0x68, 0xe4 };
703 UChar out
[20]={ 0xffff, 0xffff, 0xffff, 0xffff };
704 UErrorCode errorCode
;
707 /* try preflighting */
708 errorCode
=U_ZERO_ERROR
;
709 length
=unorm_normalize(in
, 2, UNORM_NFD
, 0, NULL
, 0, &errorCode
);
710 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=3) {
711 log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
715 errorCode
=U_ZERO_ERROR
;
716 length
=unorm_normalize(in
, 2, UNORM_NFD
, 0, out
, 3, &errorCode
);
717 if(U_FAILURE(errorCode
)) {
718 log_err("unorm_normalize(NFD)=%ld failed with %s\n", length
, u_errorName(errorCode
));
721 if(length
!=3 || out
[2]!=0x308 || out
[3]!=0xffff) {
722 log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length
, out
[0], out
[1], out
[2], out
[3]);
725 length
=unorm_normalize(NULL
, 0, UNORM_NFC
, 0, NULL
, 0, &errorCode
);
726 if(U_FAILURE(errorCode
)) {
727 log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with %s\n", (long)length
, u_errorName(errorCode
));
730 length
=unorm_normalize(NULL
, 0, UNORM_NFC
, 0, out
, 20, &errorCode
);
731 if(U_FAILURE(errorCode
)) {
732 log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with %s\n", (long)length
, u_errorName(errorCode
));
737 /* test cases to improve test code coverage */
739 HANGUL_K_KIYEOK
=0x3131, /* NFKD->Jamo L U+1100 */
740 HANGUL_K_WEO
=0x315d, /* NFKD->Jamo V U+116f */
741 HANGUL_K_KIYEOK_SIOS
=0x3133, /* NFKD->Jamo T U+11aa */
743 HANGUL_KIYEOK
=0x1100, /* Jamo L U+1100 */
744 HANGUL_WEO
=0x116f, /* Jamo V U+116f */
745 HANGUL_KIYEOK_SIOS
=0x11aa, /* Jamo T U+11aa */
747 HANGUL_AC00
=0xac00, /* Hangul syllable = Jamo LV U+ac00 */
748 HANGUL_SYLLABLE
=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */
750 MUSICAL_VOID_NOTEHEAD
=0x1d157,
751 MUSICAL_HALF_NOTE
=0x1d15e, /* NFC/NFD->Notehead+Stem */
752 MUSICAL_STEM
=0x1d165, /* cc=216 */
753 MUSICAL_STACCATO
=0x1d17c /* cc=220 */
758 UChar input
[1000], expect
[1000], output
[1000];
759 UErrorCode errorCode
;
760 int32_t i
, length
, inLength
, expectLength
, hangulPrefixLength
, preflightLength
;
762 /* create a long and nasty string with NFKC-unsafe characters */
765 /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */
766 input
[inLength
++]=HANGUL_KIYEOK
;
767 input
[inLength
++]=HANGUL_WEO
;
768 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
770 input
[inLength
++]=HANGUL_KIYEOK
;
771 input
[inLength
++]=HANGUL_WEO
;
772 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
774 input
[inLength
++]=HANGUL_KIYEOK
;
775 input
[inLength
++]=HANGUL_K_WEO
;
776 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
778 input
[inLength
++]=HANGUL_KIYEOK
;
779 input
[inLength
++]=HANGUL_K_WEO
;
780 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
782 input
[inLength
++]=HANGUL_K_KIYEOK
;
783 input
[inLength
++]=HANGUL_WEO
;
784 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
786 input
[inLength
++]=HANGUL_K_KIYEOK
;
787 input
[inLength
++]=HANGUL_WEO
;
788 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
790 input
[inLength
++]=HANGUL_K_KIYEOK
;
791 input
[inLength
++]=HANGUL_K_WEO
;
792 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
794 input
[inLength
++]=HANGUL_K_KIYEOK
;
795 input
[inLength
++]=HANGUL_K_WEO
;
796 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
798 /* Hangul LV with normal/compatibility Jamo T */
799 input
[inLength
++]=HANGUL_AC00
;
800 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
802 input
[inLength
++]=HANGUL_AC00
;
803 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
805 /* compatibility Jamo L, V */
806 input
[inLength
++]=HANGUL_K_KIYEOK
;
807 input
[inLength
++]=HANGUL_K_WEO
;
809 hangulPrefixLength
=inLength
;
811 input
[inLength
++]=UTF16_LEAD(MUSICAL_HALF_NOTE
);
812 input
[inLength
++]=UTF16_TRAIL(MUSICAL_HALF_NOTE
);
813 for(i
=0; i
<200; ++i
) {
814 input
[inLength
++]=UTF16_LEAD(MUSICAL_STACCATO
);
815 input
[inLength
++]=UTF16_TRAIL(MUSICAL_STACCATO
);
816 input
[inLength
++]=UTF16_LEAD(MUSICAL_STEM
);
817 input
[inLength
++]=UTF16_TRAIL(MUSICAL_STEM
);
820 /* (compatibility) Jamo L, T do not compose */
821 input
[inLength
++]=HANGUL_K_KIYEOK
;
822 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
825 errorCode
=U_ZERO_ERROR
;
826 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_NFD
, &errorCode
) || U_FAILURE(errorCode
)) {
827 log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
829 errorCode
=U_ZERO_ERROR
;
830 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_NFKD
, &errorCode
) || U_FAILURE(errorCode
)) {
831 log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
833 errorCode
=U_ZERO_ERROR
;
834 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_NFC
, &errorCode
) || U_FAILURE(errorCode
)) {
835 log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
837 errorCode
=U_ZERO_ERROR
;
838 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_NFKC
, &errorCode
) || U_FAILURE(errorCode
)) {
839 log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
841 errorCode
=U_ZERO_ERROR
;
842 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_FCD
, &errorCode
) || U_FAILURE(errorCode
)) {
843 log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
848 expect
[expectLength
++]=HANGUL_SYLLABLE
;
850 expect
[expectLength
++]=HANGUL_SYLLABLE
;
852 expect
[expectLength
++]=HANGUL_SYLLABLE
;
854 expect
[expectLength
++]=HANGUL_SYLLABLE
;
856 expect
[expectLength
++]=HANGUL_SYLLABLE
;
858 expect
[expectLength
++]=HANGUL_SYLLABLE
;
860 expect
[expectLength
++]=HANGUL_SYLLABLE
;
862 expect
[expectLength
++]=HANGUL_SYLLABLE
;
864 expect
[expectLength
++]=HANGUL_AC00
+3;
866 expect
[expectLength
++]=HANGUL_AC00
+3;
868 expect
[expectLength
++]=HANGUL_AC00
+14*28;
870 expect
[expectLength
++]=UTF16_LEAD(MUSICAL_VOID_NOTEHEAD
);
871 expect
[expectLength
++]=UTF16_TRAIL(MUSICAL_VOID_NOTEHEAD
);
872 expect
[expectLength
++]=UTF16_LEAD(MUSICAL_STEM
);
873 expect
[expectLength
++]=UTF16_TRAIL(MUSICAL_STEM
);
874 for(i
=0; i
<200; ++i
) {
875 expect
[expectLength
++]=UTF16_LEAD(MUSICAL_STEM
);
876 expect
[expectLength
++]=UTF16_TRAIL(MUSICAL_STEM
);
878 for(i
=0; i
<200; ++i
) {
879 expect
[expectLength
++]=UTF16_LEAD(MUSICAL_STACCATO
);
880 expect
[expectLength
++]=UTF16_TRAIL(MUSICAL_STACCATO
);
883 expect
[expectLength
++]=HANGUL_KIYEOK
;
884 expect
[expectLength
++]=HANGUL_KIYEOK_SIOS
;
886 /* try destination overflow first */
887 errorCode
=U_ZERO_ERROR
;
888 preflightLength
=unorm_normalize(input
, inLength
,
890 output
, 100, /* too short */
892 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
) {
893 log_data_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCode
));
897 errorCode
=U_ZERO_ERROR
;
898 length
=unorm_normalize(input
, inLength
,
900 output
, sizeof(output
)/U_SIZEOF_UCHAR
,
902 if(U_FAILURE(errorCode
)) {
903 log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s - (Are you missing data?)\n", u_errorName(errorCode
));
904 } else if(length
!=expectLength
|| u_memcmp(output
, expect
, length
)!=0) {
905 log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n");
906 for(i
=0; i
<length
; ++i
) {
907 if(output
[i
]!=expect
[i
]) {
908 log_err(" NFKC[%d]==U+%04lx expected U+%04lx\n", i
, output
[i
], expect
[i
]);
913 if(length
!=preflightLength
) {
914 log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length
, preflightLength
);
918 u_memcpy(expect
, input
, hangulPrefixLength
);
919 expectLength
=hangulPrefixLength
;
921 expect
[expectLength
++]=UTF16_LEAD(MUSICAL_VOID_NOTEHEAD
);
922 expect
[expectLength
++]=UTF16_TRAIL(MUSICAL_VOID_NOTEHEAD
);
923 expect
[expectLength
++]=UTF16_LEAD(MUSICAL_STEM
);
924 expect
[expectLength
++]=UTF16_TRAIL(MUSICAL_STEM
);
925 for(i
=0; i
<200; ++i
) {
926 expect
[expectLength
++]=UTF16_LEAD(MUSICAL_STEM
);
927 expect
[expectLength
++]=UTF16_TRAIL(MUSICAL_STEM
);
929 for(i
=0; i
<200; ++i
) {
930 expect
[expectLength
++]=UTF16_LEAD(MUSICAL_STACCATO
);
931 expect
[expectLength
++]=UTF16_TRAIL(MUSICAL_STACCATO
);
934 expect
[expectLength
++]=HANGUL_K_KIYEOK
;
935 expect
[expectLength
++]=HANGUL_K_KIYEOK_SIOS
;
937 errorCode
=U_ZERO_ERROR
;
938 length
=unorm_normalize(input
, inLength
,
940 output
, sizeof(output
)/U_SIZEOF_UCHAR
,
942 if(U_FAILURE(errorCode
)) {
943 log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %s - (Are you missing data?)\n", u_errorName(errorCode
));
944 } else if(length
!=expectLength
|| u_memcmp(output
, expect
, length
)!=0) {
945 log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n");
946 for(i
=0; i
<length
; ++i
) {
947 if(output
[i
]!=expect
[i
]) {
948 log_err(" FCD[%d]==U+%04lx expected U+%04lx\n", i
, output
[i
], expect
[i
]);
955 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */
957 TestConcatenate(void) {
964 0x301, 0x73, 0x75, 0x6d, 0xe9, 0
967 0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0
971 UErrorCode errorCode
;
974 /* left with length, right NUL-terminated */
975 errorCode
=U_ZERO_ERROR
;
976 length
=unorm_concatenate(left
, 2, right
, -1, buffer
, 100, UNORM_NFC
, 0, &errorCode
);
977 if(U_FAILURE(errorCode
) || length
!=6 || 0!=u_memcmp(buffer
, expect
, length
)) {
978 log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
982 errorCode
=U_ZERO_ERROR
;
983 length
=unorm_concatenate(left
, 2, right
, -1, NULL
, 0, UNORM_NFC
, 0, &errorCode
);
984 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=6) {
985 log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
989 errorCode
=U_ZERO_ERROR
;
990 length
=unorm_concatenate(left
, 2, right
, -1, buffer
, 1, UNORM_NFC
, 0, &errorCode
);
991 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=6 || buffer
[2]!=0x5555) {
992 log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
995 /* enter with U_FAILURE */
997 errorCode
=U_UNEXPECTED_TOKEN
;
998 length
=unorm_concatenate(left
, 2, right
, -1, buffer
, 100, UNORM_NFC
, 0, &errorCode
);
999 if(errorCode
!=U_UNEXPECTED_TOKEN
|| buffer
[2]!=0xaaaa) {
1000 log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length
, u_errorName(errorCode
));
1003 /* illegal arguments */
1005 errorCode
=U_ZERO_ERROR
;
1006 length
=unorm_concatenate(NULL
, 2, right
, -1, buffer
, 100, UNORM_NFC
, 0, &errorCode
);
1007 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
|| buffer
[2]!=0xaaaa) {
1008 log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
1011 errorCode
=U_ZERO_ERROR
;
1012 length
=unorm_concatenate(left
, 2, right
, -1, NULL
, 100, UNORM_NFC
, 0, &errorCode
);
1013 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1014 log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
1022 static const char *const _modeString
[UNORM_MODE_COUNT
]={
1023 "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD"
1027 _testIter(const UChar
*src
, int32_t srcLength
,
1028 UCharIterator
*iter
, UNormalizationMode mode
, UBool forward
,
1029 const UChar
*out
, int32_t outLength
,
1030 const int32_t *srcIndexes
, int32_t srcIndexesLength
) {
1032 const UChar
*expect
, *outLimit
, *in
;
1033 int32_t length
, i
, expectLength
, expectIndex
, prevIndex
, index
, inLength
;
1034 UErrorCode errorCode
;
1035 UBool neededToNormalize
, expectNeeded
;
1037 errorCode
=U_ZERO_ERROR
;
1038 outLimit
=out
+outLength
;
1044 i
=srcIndexesLength
-2;
1051 if(!iter
->hasNext(iter
)) {
1054 length
=unorm_next(iter
,
1055 buffer
, sizeof(buffer
)/U_SIZEOF_UCHAR
,
1057 (UBool
)(out
!=NULL
), &neededToNormalize
,
1059 expectIndex
=srcIndexes
[i
+1];
1061 inLength
=expectIndex
-prevIndex
;
1064 /* get output piece from between plus signs */
1066 while((expect
+expectLength
)!=outLimit
&& expect
[expectLength
]!=_PLUS
) {
1069 expectNeeded
=(UBool
)(0!=u_memcmp(buffer
, in
, inLength
));
1072 expectLength
=inLength
;
1076 if(!iter
->hasPrevious(iter
)) {
1079 length
=unorm_previous(iter
,
1080 buffer
, sizeof(buffer
)/U_SIZEOF_UCHAR
,
1082 (UBool
)(out
!=NULL
), &neededToNormalize
,
1084 expectIndex
=srcIndexes
[i
];
1086 inLength
=prevIndex
-expectIndex
;
1089 /* get output piece from between plus signs */
1091 while(expect
!=out
&& expect
[-1]!=_PLUS
) {
1095 expectNeeded
=(UBool
)(0!=u_memcmp(buffer
, in
, inLength
));
1098 expectLength
=inLength
;
1102 index
=iter
->getIndex(iter
, UITER_CURRENT
);
1104 if(U_FAILURE(errorCode
)) {
1105 log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s - (Are you missing data?)\n",
1106 forward
, _modeString
[mode
], i
, u_errorName(errorCode
));
1109 if(expectIndex
!=index
) {
1110 log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n",
1111 forward
, _modeString
[mode
], i
, index
, expectIndex
);
1114 if(expectLength
!=length
) {
1115 log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n",
1116 forward
, _modeString
[mode
], i
, length
, expectLength
);
1119 if(0!=u_memcmp(expect
, buffer
, length
)) {
1120 log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n",
1121 forward
, _modeString
[mode
], i
);
1124 if(neededToNormalize
!=expectNeeded
) {
1128 expect
+=expectLength
+1; /* go after the + */
1131 --expect
; /* go before the + */
1138 TestNextPrevious() {
1140 src
[]={ /* input string */
1141 0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133
1143 nfd
[]={ /* + separates expected output pieces */
1144 0xa0, _PLUS
, 0x61, 0x308, _PLUS
, 0x63, 0x327, 0x302, _PLUS
, 0x1100, 0x1161, _PLUS
, 0x3133
1147 0x20, _PLUS
, 0x61, 0x308, _PLUS
, 0x63, 0x327, 0x302, _PLUS
, 0x1100, 0x1161, _PLUS
, 0x11aa
1150 0xa0, _PLUS
, 0xe4, _PLUS
, 0xe7, 0x302, _PLUS
, 0xac00, _PLUS
, 0x3133
1153 0x20, _PLUS
, 0xe4, _PLUS
, 0xe7, 0x302, _PLUS
, 0xac03
1156 0xa0, _PLUS
, 0xe4, _PLUS
, 0x63, 0x327, 0x302, _PLUS
, 0xac00, _PLUS
, 0x3133
1159 /* expected iterator indexes in the source string for each iteration piece */
1160 static const int32_t
1182 UBool neededToNormalize
;
1183 UErrorCode errorCode
;
1185 uiter_setString(&iter
, src
, sizeof(src
)/U_SIZEOF_UCHAR
);
1187 /* test iteration with doNormalize */
1189 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFD
, TRUE
, nfd
, sizeof(nfd
)/U_SIZEOF_UCHAR
, nfdIndexes
, sizeof(nfdIndexes
)/4);
1191 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKD
, TRUE
, nfkd
, sizeof(nfkd
)/U_SIZEOF_UCHAR
, nfkdIndexes
, sizeof(nfkdIndexes
)/4);
1193 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFC
, TRUE
, nfc
, sizeof(nfc
)/U_SIZEOF_UCHAR
, nfcIndexes
, sizeof(nfcIndexes
)/4);
1195 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKC
, TRUE
, nfkc
, sizeof(nfkc
)/U_SIZEOF_UCHAR
, nfkcIndexes
, sizeof(nfkcIndexes
)/4);
1197 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_FCD
, TRUE
, fcd
, sizeof(fcd
)/U_SIZEOF_UCHAR
, fcdIndexes
, sizeof(fcdIndexes
)/4);
1199 iter
.index
=iter
.length
;
1200 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFD
, FALSE
, nfd
, sizeof(nfd
)/U_SIZEOF_UCHAR
, nfdIndexes
, sizeof(nfdIndexes
)/4);
1201 iter
.index
=iter
.length
;
1202 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKD
, FALSE
, nfkd
, sizeof(nfkd
)/U_SIZEOF_UCHAR
, nfkdIndexes
, sizeof(nfkdIndexes
)/4);
1203 iter
.index
=iter
.length
;
1204 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFC
, FALSE
, nfc
, sizeof(nfc
)/U_SIZEOF_UCHAR
, nfcIndexes
, sizeof(nfcIndexes
)/4);
1205 iter
.index
=iter
.length
;
1206 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKC
, FALSE
, nfkc
, sizeof(nfkc
)/U_SIZEOF_UCHAR
, nfkcIndexes
, sizeof(nfkcIndexes
)/4);
1207 iter
.index
=iter
.length
;
1208 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_FCD
, FALSE
, fcd
, sizeof(fcd
)/U_SIZEOF_UCHAR
, fcdIndexes
, sizeof(fcdIndexes
)/4);
1210 /* test iteration without doNormalize */
1212 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFD
, TRUE
, NULL
, 0, nfdIndexes
, sizeof(nfdIndexes
)/4);
1214 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKD
, TRUE
, NULL
, 0, nfkdIndexes
, sizeof(nfkdIndexes
)/4);
1216 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFC
, TRUE
, NULL
, 0, nfcIndexes
, sizeof(nfcIndexes
)/4);
1218 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKC
, TRUE
, NULL
, 0, nfkcIndexes
, sizeof(nfkcIndexes
)/4);
1220 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_FCD
, TRUE
, NULL
, 0, fcdIndexes
, sizeof(fcdIndexes
)/4);
1222 iter
.index
=iter
.length
;
1223 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFD
, FALSE
, NULL
, 0, nfdIndexes
, sizeof(nfdIndexes
)/4);
1224 iter
.index
=iter
.length
;
1225 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKD
, FALSE
, NULL
, 0, nfkdIndexes
, sizeof(nfkdIndexes
)/4);
1226 iter
.index
=iter
.length
;
1227 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFC
, FALSE
, NULL
, 0, nfcIndexes
, sizeof(nfcIndexes
)/4);
1228 iter
.index
=iter
.length
;
1229 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKC
, FALSE
, NULL
, 0, nfkcIndexes
, sizeof(nfkcIndexes
)/4);
1230 iter
.index
=iter
.length
;
1231 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_FCD
, FALSE
, NULL
, 0, fcdIndexes
, sizeof(fcdIndexes
)/4);
1233 /* try without neededToNormalize */
1234 errorCode
=U_ZERO_ERROR
;
1237 length
=unorm_next(&iter
, buffer
, sizeof(buffer
)/U_SIZEOF_UCHAR
,
1238 UNORM_NFD
, 0, TRUE
, NULL
,
1240 if(U_FAILURE(errorCode
) || length
!=2 || buffer
[0]!=nfd
[2] || buffer
[1]!=nfd
[3]) {
1241 log_data_err("error unorm_next(without needed) %s - (Are you missing data?)\n", u_errorName(errorCode
));
1246 neededToNormalize
=9;
1248 length
=unorm_next(&iter
, NULL
, 0,
1249 UNORM_NFD
, 0, TRUE
, &neededToNormalize
,
1251 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| neededToNormalize
!=FALSE
|| length
!=2) {
1252 log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode
));
1256 errorCode
=U_ZERO_ERROR
;
1257 buffer
[0]=buffer
[1]=5;
1258 neededToNormalize
=9;
1260 length
=unorm_next(&iter
, buffer
, 1,
1261 UNORM_NFD
, 0, TRUE
, &neededToNormalize
,
1263 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| neededToNormalize
!=FALSE
|| length
!=2 || buffer
[1]!=5) {
1264 log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode
));
1269 errorCode
=U_ZERO_ERROR
;
1270 buffer
[0]=buffer
[1]=5;
1271 neededToNormalize
=9;
1273 length
=unorm_next(NULL
, buffer
, sizeof(buffer
)/U_SIZEOF_UCHAR
,
1274 UNORM_NFD
, 0, TRUE
, &neededToNormalize
,
1276 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1277 log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode
));
1282 buffer
[0]=buffer
[1]=5;
1283 neededToNormalize
=9;
1285 length
=unorm_next(&iter
, buffer
, sizeof(buffer
)/U_SIZEOF_UCHAR
,
1286 (UNormalizationMode
)0, 0, TRUE
, &neededToNormalize
,
1288 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1289 log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode
));
1293 /* error coming in */
1294 errorCode
=U_MISPLACED_QUANTIFIER
;
1297 length
=unorm_next(&iter
, buffer
, sizeof(buffer
)/U_SIZEOF_UCHAR
,
1298 UNORM_NFD
, 0, TRUE
, NULL
,
1300 if(errorCode
!=U_MISPLACED_QUANTIFIER
) {
1301 log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode
));
1307 TestFCNFKCClosure(void) {
1308 static const struct {
1314 { 0x037A, { 0x0020, 0x03B9, 0 } },
1315 { 0x03D2, { 0x03C5, 0 } },
1316 { 0x20A8, { 0x0072, 0x0073, 0 } },
1317 { 0x210B, { 0x0068, 0 } },
1318 { 0x210C, { 0x0068, 0 } },
1319 { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } },
1320 { 0x2122, { 0x0074, 0x006D, 0 } },
1321 { 0x2128, { 0x007A, 0 } },
1322 { 0x1D5DB, { 0x0068, 0 } },
1323 { 0x1D5ED, { 0x007A, 0 } },
1328 UErrorCode errorCode
;
1331 for(i
=0; i
<LENGTHOF(tests
); ++i
) {
1332 errorCode
=U_ZERO_ERROR
;
1333 length
=u_getFC_NFKC_Closure(tests
[i
].c
, buffer
, LENGTHOF(buffer
), &errorCode
);
1334 if(U_FAILURE(errorCode
) || length
!=u_strlen(buffer
) || 0!=u_strcmp(tests
[i
].s
, buffer
)) {
1335 log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you missing data?)\n", tests
[i
].c
, u_errorName(errorCode
));
1339 /* error handling */
1340 errorCode
=U_ZERO_ERROR
;
1341 length
=u_getFC_NFKC_Closure(0x5c, NULL
, LENGTHOF(buffer
), &errorCode
);
1342 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1343 log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode
));
1346 length
=u_getFC_NFKC_Closure(0x5c, buffer
, LENGTHOF(buffer
), &errorCode
);
1347 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1348 log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode
));
1353 TestQuickCheckPerCP() {
1354 UErrorCode errorCode
;
1355 UChar32 c
, lead
, trail
;
1356 UChar s
[U16_MAX_LENGTH
], nfd
[16];
1357 int32_t length
, lccc1
, lccc2
, tccc1
, tccc2
;
1361 u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK
)!=(int32_t)UNORM_YES
||
1362 u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK
)!=(int32_t)UNORM_YES
||
1363 u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK
)!=(int32_t)UNORM_MAYBE
||
1364 u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK
)!=(int32_t)UNORM_MAYBE
||
1365 u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS
)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS
) ||
1366 u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS
)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS
)
1368 log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n");
1372 * compare the quick check property values for some code points
1373 * to the quick check results for checking same-code point strings
1375 errorCode
=U_ZERO_ERROR
;
1379 U16_APPEND_UNSAFE(s
, length
, c
);
1381 qc1
=u_getIntPropertyValue(c
, UCHAR_NFC_QUICK_CHECK
);
1382 qc2
=unorm_quickCheck(s
, length
, UNORM_NFC
, &errorCode
);
1384 log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x - (Are you missing data?)\n", qc1
, qc2
, c
);
1387 qc1
=u_getIntPropertyValue(c
, UCHAR_NFD_QUICK_CHECK
);
1388 qc2
=unorm_quickCheck(s
, length
, UNORM_NFD
, &errorCode
);
1390 log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x - (Are you missing data?)\n", qc1
, qc2
, c
);
1393 qc1
=u_getIntPropertyValue(c
, UCHAR_NFKC_QUICK_CHECK
);
1394 qc2
=unorm_quickCheck(s
, length
, UNORM_NFKC
, &errorCode
);
1396 log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x - (Are you missing data?)\n", qc1
, qc2
, c
);
1399 qc1
=u_getIntPropertyValue(c
, UCHAR_NFKD_QUICK_CHECK
);
1400 qc2
=unorm_quickCheck(s
, length
, UNORM_NFKD
, &errorCode
);
1402 log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x - (Are you missing data?)\n", qc1
, qc2
, c
);
1405 length
=unorm_normalize(s
, length
, UNORM_NFD
, 0, nfd
, LENGTHOF(nfd
), &errorCode
);
1406 /* length-length == 0 is used to get around a compiler warning. */
1407 U16_GET(nfd
, 0, length
-length
, length
, lead
);
1408 U16_GET(nfd
, 0, length
-1, length
, trail
);
1410 lccc1
=u_getIntPropertyValue(c
, UCHAR_LEAD_CANONICAL_COMBINING_CLASS
);
1411 lccc2
=u_getCombiningClass(lead
);
1412 tccc1
=u_getIntPropertyValue(c
, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS
);
1413 tccc2
=u_getCombiningClass(trail
);
1416 log_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
1420 log_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
1424 /* skip some code points */
1430 TestComposition(void) {
1431 static const struct {
1432 UNormalizationMode mode
;
1438 * special cases for UAX #15 bug
1439 * see Unicode Corrigendum #5: Normalization Idempotency
1440 * at http://unicode.org/versions/corrigendum5.html
1441 * (was Public Review Issue #29)
1443 { UNORM_NFC
, 0, { 0x1100, 0x0300, 0x1161, 0x0327 }, { 0x1100, 0x0300, 0x1161, 0x0327 } },
1444 { UNORM_NFC
, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } },
1445 { UNORM_NFC
, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 }, { 0xac00, 0x0327, 0x0300, 0x11a8 } },
1446 { UNORM_NFC
, 0, { 0x0b47, 0x0300, 0x0b3e }, { 0x0b47, 0x0300, 0x0b3e } },
1448 /* TODO: add test cases for UNORM_FCC here (j2151) */
1452 UErrorCode errorCode
;
1455 for(i
=0; i
<LENGTHOF(cases
); ++i
) {
1456 errorCode
=U_ZERO_ERROR
;
1457 length
=unorm_normalize(
1459 cases
[i
].mode
, cases
[i
].options
,
1460 output
, LENGTHOF(output
),
1462 if( U_FAILURE(errorCode
) ||
1463 length
!=u_strlen(cases
[i
].expect
) ||
1464 0!=u_memcmp(output
, cases
[i
].expect
, length
)
1466 log_data_err("unexpected result for case %d - (Are you missing data?)\n", i
);
1472 TestGetDecomposition() {
1476 UErrorCode errorCode
=U_ZERO_ERROR
;
1477 const UNormalizer2
*n2
=unorm2_getInstance(NULL
, "nfc", UNORM2_COMPOSE_CONTIGUOUS
, &errorCode
);
1478 if(U_FAILURE(errorCode
)) {
1479 log_err_status(errorCode
, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_errorName(errorCode
));
1483 length
=unorm2_getDecomposition(n2
, 0x20, decomp
, LENGTHOF(decomp
), &errorCode
);
1484 if(U_FAILURE(errorCode
) || length
>=0) {
1485 log_err("unorm2_getDecomposition(space) failed\n");
1487 errorCode
=U_ZERO_ERROR
;
1488 length
=unorm2_getDecomposition(n2
, 0xe4, decomp
, LENGTHOF(decomp
), &errorCode
);
1489 if(U_FAILURE(errorCode
) || length
!=2 || decomp
[0]!=0x61 || decomp
[1]!=0x308 || decomp
[2]!=0) {
1490 log_err("unorm2_getDecomposition(a-umlaut) failed\n");
1492 errorCode
=U_ZERO_ERROR
;
1493 length
=unorm2_getDecomposition(n2
, 0xac01, decomp
, LENGTHOF(decomp
), &errorCode
);
1494 if(U_FAILURE(errorCode
) || length
!=3 || decomp
[0]!=0x1100 || decomp
[1]!=0x1161 || decomp
[2]!=0x11a8 || decomp
[3]!=0) {
1495 log_err("unorm2_getDecomposition(Hangul syllable U+AC01) failed\n");
1497 errorCode
=U_ZERO_ERROR
;
1498 length
=unorm2_getDecomposition(n2
, 0xac01, NULL
, 0, &errorCode
);
1499 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=3) {
1500 log_err("unorm2_getDecomposition(Hangul syllable U+AC01) overflow failed\n");
1502 errorCode
=U_ZERO_ERROR
;
1503 length
=unorm2_getDecomposition(n2
, 0xac01, decomp
, -1, &errorCode
);
1504 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1505 log_err("unorm2_getDecomposition(capacity<0) failed\n");
1507 errorCode
=U_ZERO_ERROR
;
1508 length
=unorm2_getDecomposition(n2
, 0xac01, NULL
, 4, &errorCode
);
1509 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1510 log_err("unorm2_getDecomposition(decomposition=NULL) failed\n");
1514 #endif /* #if !UCONFIG_NO_NORMALIZATION */