1 /********************************************************************
3 * Copyright (c) 1997-2004, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
10 * Modification History:
12 * Madhu Katragadda Ported for C API
13 * synwee added test for quick check
14 * synwee added test for checkFCD
15 *********************************************************************************/
16 /*tests for u_normalization*/
17 #include "unicode/utypes.h"
18 #include "unicode/unorm.h"
22 #if UCONFIG_NO_NORMALIZATION
24 void addNormTest(TestNode
** root
) {
25 /* no normalization - nothing to do */
32 #include "unicode/uchar.h"
33 #include "unicode/ustring.h"
34 #include "unicode/unorm.h"
37 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof ((array)[0]))
43 TestNormCoverage(void);
46 TestConcatenate(void);
49 TestNextPrevious(void);
51 static void TestIsNormalized(void);
54 TestFCNFKCClosure(void);
57 TestQuickCheckPerCP(void);
60 TestComposition(void);
62 const static char* canonTests
[][3] = {
63 /* Input*/ /*Decomposed*/ /*Composed*/
64 { "cat", "cat", "cat" },
65 { "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark", },
67 { "\\u1e0a", "D\\u0307", "\\u1e0a" }, /* D-dot_above*/
68 { "D\\u0307", "D\\u0307", "\\u1e0a" }, /* D dot_above*/
70 { "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_below dot_above*/
71 { "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_above dot_below */
72 { "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D dot_below dot_above */
74 { "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307", "\\u1e10\\u0323\\u0307" }, /*D dot_below cedilla dot_above*/
75 { "D\\u0307\\u0328\\u0323", "D\\u0328\\u0323\\u0307", "\\u1e0c\\u0328\\u0307" }, /* D dot_above ogonek dot_below*/
77 { "\\u1E14", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron-grave*/
78 { "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron + grave*/
79 { "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" }, /* E-grave + macron*/
81 { "\\u212b", "A\\u030a", "\\u00c5" }, /* angstrom_sign*/
82 { "\\u00c5", "A\\u030a", "\\u00c5" }, /* A-ring*/
84 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
85 { "\\u00C4\\uFB03n", "A\\u0308\\uFB03n", "\\u00C4\\uFB03n" },
87 { "Henry IV", "Henry IV", "Henry IV" },
88 { "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" },
90 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
91 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
92 { "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" }, /* hw_ka + hw_ten*/
93 { "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" }, /* ka + hw_ten*/
94 { "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" }, /* hw_ka + ten*/
95 { "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" } /* hw_ka + ten*/
98 const static char* compatTests
[][3] = {
99 /* Input*/ /*Decomposed */ /*Composed*/
100 { "cat", "cat", "cat" },
102 { "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" }, /* Alef-Lamed vs. Alef, Lamed*/
104 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
105 { "\\u00C4\\uFB03n", "A\\u0308ffin", "\\u00C4ffin" }, /* ffi ligature -> f + f + i*/
107 { "Henry IV", "Henry IV", "Henry IV" },
108 { "Henry \\u2163", "Henry IV", "Henry IV" },
110 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
111 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
113 { "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + ten*/
115 /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
116 { "\\uFF76\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + hw_ten*/
117 { "\\u30AB\\uFF9E", "\\u30AB\\u3099", "\\u30AC" } /* ka + hw_ten*/
121 void addNormTest(TestNode
** root
);
123 void addNormTest(TestNode
** root
)
125 addTest(root
, &TestAPI
, "tscoll/cnormtst/TestAPI");
126 addTest(root
, &TestDecomp
, "tscoll/cnormtst/TestDecomp");
127 addTest(root
, &TestCompatDecomp
, "tscoll/cnormtst/TestCompatDecomp");
128 addTest(root
, &TestCanonDecompCompose
, "tscoll/cnormtst/TestCanonDecompCompose");
129 addTest(root
, &TestCompatDecompCompose
, "tscoll/cnormtst/CompatDecompCompose");
130 addTest(root
, &TestNull
, "tscoll/cnormtst/TestNull");
131 addTest(root
, &TestQuickCheck
, "tscoll/cnormtst/TestQuickCheck");
132 addTest(root
, &TestQuickCheckPerCP
, "tscoll/cnormtst/TestQuickCheckPerCP");
133 addTest(root
, &TestIsNormalized
, "tscoll/cnormtst/TestIsNormalized");
134 addTest(root
, &TestCheckFCD
, "tscoll/cnormtst/TestCheckFCD");
135 addTest(root
, &TestNormCoverage
, "tscoll/cnormtst/TestNormCoverage");
136 addTest(root
, &TestConcatenate
, "tscoll/cnormtst/TestConcatenate");
137 addTest(root
, &TestNextPrevious
, "tscoll/cnormtst/TestNextPrevious");
138 addTest(root
, &TestFCNFKCClosure
, "tscoll/cnormtst/TestFCNFKCClosure");
139 addTest(root
, &TestComposition
, "tscoll/cnormtst/TestComposition");
144 UErrorCode status
= U_ZERO_ERROR
;
145 int32_t x
, neededLen
, resLen
;
146 UChar
*source
=NULL
, *result
=NULL
;
147 status
= U_ZERO_ERROR
;
149 log_verbose("Testing unorm_normalize with Decomp canonical\n");
150 for(x
=0; x
< LENGTHOF(canonTests
); x
++)
152 source
=CharsToUChars(canonTests
[x
][0]);
153 neededLen
= unorm_normalize(source
, u_strlen(source
), UNORM_NFD
, 0, NULL
, 0, &status
);
154 if(status
==U_BUFFER_OVERFLOW_ERROR
)
158 result
=(UChar
*)malloc(sizeof(UChar
*) * resLen
);
159 unorm_normalize(source
, u_strlen(source
), UNORM_NFD
, 0, result
, resLen
, &status
);
161 if(U_FAILURE(status
)){
162 log_err("ERROR in unorm_normalize at %s: %s\n", austrdup(source
), myErrorName(status
) );
164 assertEqual(result
, canonTests
[x
][1], x
);
171 void TestCompatDecomp()
173 UErrorCode status
= U_ZERO_ERROR
;
174 int32_t x
, neededLen
, resLen
;
175 UChar
*source
=NULL
, *result
=NULL
;
176 status
= U_ZERO_ERROR
;
178 log_verbose("Testing unorm_normalize with Decomp compat\n");
179 for(x
=0; x
< LENGTHOF(compatTests
); x
++)
181 source
=CharsToUChars(compatTests
[x
][0]);
182 neededLen
= unorm_normalize(source
, u_strlen(source
), UNORM_NFKD
, 0, NULL
, 0, &status
);
183 if(status
==U_BUFFER_OVERFLOW_ERROR
)
187 result
=(UChar
*)malloc(sizeof(UChar
*) * resLen
);
188 unorm_normalize(source
, u_strlen(source
), UNORM_NFKD
, 0, result
, resLen
, &status
);
190 if(U_FAILURE(status
)){
191 log_err("ERROR in unorm_normalize at %s: %s\n", austrdup(source
), myErrorName(status
) );
193 assertEqual(result
, compatTests
[x
][1], x
);
200 void TestCanonDecompCompose()
202 UErrorCode status
= U_ZERO_ERROR
;
203 int32_t x
, neededLen
, resLen
;
204 UChar
*source
=NULL
, *result
=NULL
;
205 status
= U_ZERO_ERROR
;
207 log_verbose("Testing unorm_normalize with Decomp can compose compat\n");
208 for(x
=0; x
< LENGTHOF(canonTests
); x
++)
210 source
=CharsToUChars(canonTests
[x
][0]);
211 neededLen
= unorm_normalize(source
, u_strlen(source
), UNORM_NFC
, 0, NULL
, 0, &status
);
212 if(status
==U_BUFFER_OVERFLOW_ERROR
)
216 result
=(UChar
*)malloc(sizeof(UChar
*) * resLen
);
217 unorm_normalize(source
, u_strlen(source
), UNORM_NFC
, 0, result
, resLen
, &status
);
219 if(U_FAILURE(status
)){
220 log_err("ERROR in unorm_normalize at %s: %s\n", austrdup(source
),myErrorName(status
) );
222 assertEqual(result
, canonTests
[x
][2], x
);
229 void TestCompatDecompCompose()
231 UErrorCode status
= U_ZERO_ERROR
;
232 int32_t x
, neededLen
, resLen
;
233 UChar
*source
=NULL
, *result
=NULL
;
234 status
= U_ZERO_ERROR
;
236 log_verbose("Testing unorm_normalize with compat decomp compose can\n");
237 for(x
=0; x
< LENGTHOF(compatTests
); x
++)
239 source
=CharsToUChars(compatTests
[x
][0]);
240 neededLen
= unorm_normalize(source
, u_strlen(source
), UNORM_NFKC
, 0, NULL
, 0, &status
);
241 if(status
==U_BUFFER_OVERFLOW_ERROR
)
245 result
=(UChar
*)malloc(sizeof(UChar
*) * resLen
);
246 unorm_normalize(source
, u_strlen(source
), UNORM_NFKC
, 0, result
, resLen
, &status
);
248 if(U_FAILURE(status
)){
249 log_err("ERROR in unorm_normalize at %s: %s\n", austrdup(source
), myErrorName(status
) );
251 assertEqual(result
, compatTests
[x
][2], x
);
260 static void assertEqual(const UChar* result, const UChar* expected, int32_t index)
262 if(u_strcmp(result, expected)!=0){
263 log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index, austrdup(expected),
269 static void assertEqual(const UChar
* result
, const char* expected
, int32_t index
)
271 UChar
*expectedUni
= CharsToUChars(expected
);
272 if(u_strcmp(result
, expectedUni
)!=0){
273 log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index
, expected
,
279 static void TestNull_check(UChar
*src
, int32_t srcLen
,
280 UChar
*exp
, int32_t expLen
,
281 UNormalizationMode mode
,
284 UErrorCode status
= U_ZERO_ERROR
;
290 status
= U_ZERO_ERROR
;
297 len
= unorm_normalize(src
, srcLen
, mode
, 0, result
, 50, &status
);
299 if(U_FAILURE(status
)) {
300 log_err("unorm_normalize(%s) with 0x0000 failed: %s\n", name
, u_errorName(status
));
301 } else if (len
!= expLen
) {
302 log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name
, expLen
, len
);
307 if(exp
[i
] != result
[i
]) {
308 log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n",
315 log_verbose(" %d: \\u%04X\n", i
, result
[i
]);
319 log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name
);
325 UChar source_comp
[] = { 0x0061, 0x0000, 0x0044, 0x0307 };
326 int32_t source_comp_len
= 4;
327 UChar expect_comp
[] = { 0x0061, 0x0000, 0x1e0a };
328 int32_t expect_comp_len
= 3;
330 UChar source_dcmp
[] = { 0x1e0A, 0x0000, 0x0929 };
331 int32_t source_dcmp_len
= 3;
332 UChar expect_dcmp
[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C };
333 int32_t expect_dcmp_len
= 5;
335 TestNull_check(source_comp
,
342 TestNull_check(source_dcmp
,
349 TestNull_check(source_comp
,
359 static void TestQuickCheckResultNO()
361 const UChar CPNFD
[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
362 0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
363 const UChar CPNFC
[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
364 0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
365 const UChar CPNFKD
[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
366 0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
367 const UChar CPNFKC
[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
368 0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
374 UErrorCode error
= U_ZERO_ERROR
;
376 for (; count
< SIZE
; count
++)
378 if (unorm_quickCheck(&(CPNFD
[count
]), 1, UNORM_NFD
, &error
) !=
381 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD
[count
]);
384 if (unorm_quickCheck(&(CPNFC
[count
]), 1, UNORM_NFC
, &error
) !=
387 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC
[count
]);
390 if (unorm_quickCheck(&(CPNFKD
[count
]), 1, UNORM_NFKD
, &error
) !=
393 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD
[count
]);
396 if (unorm_quickCheck(&(CPNFKC
[count
]), 1, UNORM_NFKC
, &error
) !=
399 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC
[count
]);
406 static void TestQuickCheckResultYES()
408 const UChar CPNFD
[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
409 0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
410 const UChar CPNFC
[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
411 0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
412 const UChar CPNFKD
[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
413 0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
414 const UChar CPNFKC
[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
415 0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
419 UErrorCode error
= U_ZERO_ERROR
;
424 if (unorm_quickCheck(&cp
, 1, UNORM_NFD
, &error
) != UNORM_YES
)
426 log_err("ERROR in NFD quick check at U+%04x\n", cp
);
429 if (unorm_quickCheck(&cp
, 1, UNORM_NFC
, &error
) !=
432 log_err("ERROR in NFC quick check at U+%04x\n", cp
);
435 if (unorm_quickCheck(&cp
, 1, UNORM_NFKD
, &error
) != UNORM_YES
)
437 log_err("ERROR in NFKD quick check at U+%04x\n", cp
);
440 if (unorm_quickCheck(&cp
, 1, UNORM_NFKC
, &error
) !=
443 log_err("ERROR in NFKC quick check at U+%04x\n", cp
);
449 for (; count
< SIZE
; count
++)
451 if (unorm_quickCheck(&(CPNFD
[count
]), 1, UNORM_NFD
, &error
) !=
454 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD
[count
]);
457 if (unorm_quickCheck(&(CPNFC
[count
]), 1, UNORM_NFC
, &error
)
460 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC
[count
]);
463 if (unorm_quickCheck(&(CPNFKD
[count
]), 1, UNORM_NFKD
, &error
) !=
466 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD
[count
]);
469 if (unorm_quickCheck(&(CPNFKC
[count
]), 1, UNORM_NFKC
, &error
) !=
472 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC
[count
]);
478 static void TestQuickCheckResultMAYBE()
480 const UChar CPNFC
[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
481 0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
482 const UChar CPNFKC
[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
483 0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
489 UErrorCode error
= U_ZERO_ERROR
;
491 /* NFD and NFKD does not have any MAYBE codepoints */
492 for (; count
< SIZE
; count
++)
494 if (unorm_quickCheck(&(CPNFC
[count
]), 1, UNORM_NFC
, &error
) !=
497 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC
[count
]);
500 if (unorm_quickCheck(&(CPNFKC
[count
]), 1, UNORM_NFKC
, &error
) !=
503 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC
[count
]);
509 static void TestQuickCheckStringResult()
514 UErrorCode error
= U_ZERO_ERROR
;
516 for (count
= 0; count
< LENGTHOF(canonTests
); count
++)
518 d
= CharsToUChars(canonTests
[count
][1]);
519 c
= CharsToUChars(canonTests
[count
][2]);
520 if (unorm_quickCheck(d
, u_strlen(d
), UNORM_NFD
, &error
) !=
523 log_err("ERROR in NFD quick check for string at count %d\n", count
);
527 if (unorm_quickCheck(c
, u_strlen(c
), UNORM_NFC
, &error
) ==
530 log_err("ERROR in NFC quick check for string at count %d\n", count
);
538 for (count
= 0; count
< LENGTHOF(compatTests
); count
++)
540 d
= CharsToUChars(compatTests
[count
][1]);
541 c
= CharsToUChars(compatTests
[count
][2]);
542 if (unorm_quickCheck(d
, u_strlen(d
), UNORM_NFKD
, &error
) !=
545 log_err("ERROR in NFKD quick check for string at count %d\n", count
);
549 if (unorm_quickCheck(c
, u_strlen(c
), UNORM_NFKC
, &error
) !=
552 log_err("ERROR in NFKC quick check for string at count %d\n", count
);
561 void TestQuickCheck()
563 TestQuickCheckResultNO();
564 TestQuickCheckResultYES();
565 TestQuickCheckResultMAYBE();
566 TestQuickCheckStringResult();
570 * The intltest/NormalizerConformanceTest tests a lot of strings that _are_
571 * normalized, and some that are not.
572 * Here we pick some specific cases and test the C API.
574 static void TestIsNormalized(void) {
575 static const UChar notNFC
[][8]={ /* strings that are not in NFC */
576 { 0x62, 0x61, 0x300, 0x63, 0 }, /* 0061 0300 compose */
577 { 0xfb1d, 0 }, /* excluded from composition */
578 { 0x0627, 0x0653, 0 }, /* 0627 0653 compose */
579 { 0x3071, 0x306f, 0x309a, 0x3073, 0 } /* 306F 309A compose */
581 static const UChar notNFKC
[][8]={ /* strings that are not in NFKC */
582 { 0x1100, 0x1161, 0 }, /* Jamo compose */
583 { 0x1100, 0x314f, 0 }, /* compatibility Jamo compose */
584 { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 } /* 1F00 0345 compose */
588 UErrorCode errorCode
;
592 /* normal case with length>=0 (length -1 used for special cases below) */
593 errorCode
=U_ZERO_ERROR
;
594 if(!unorm_isNormalized(notNFC
[0]+2, 1, UNORM_NFC
, &errorCode
) || U_FAILURE(errorCode
)) {
595 log_err("error: !isNormalized(<U+0300>, NFC) (%s)\n", u_errorName(errorCode
));
598 /* incoming U_FAILURE */
599 errorCode
=U_TRUNCATED_CHAR_FOUND
;
600 (void)unorm_isNormalized(notNFC
[0]+2, 1, UNORM_NFC
, &errorCode
);
601 if(errorCode
!=U_TRUNCATED_CHAR_FOUND
) {
602 log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode
));
606 errorCode
=U_ZERO_ERROR
;
607 (void)unorm_isNormalized(NULL
, 1, UNORM_NFC
, &errorCode
);
608 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
609 log_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s\n", u_errorName(errorCode
));
613 errorCode
=U_ZERO_ERROR
;
614 (void)unorm_isNormalized(notNFC
[0]+2, -2, UNORM_NFC
, &errorCode
);
615 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
616 log_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s\n", u_errorName(errorCode
));
620 for(i
=0; i
<LENGTHOF(notNFC
); ++i
) {
621 errorCode
=U_ZERO_ERROR
;
622 if(unorm_isNormalized(notNFC
[i
], -1, UNORM_NFC
, &errorCode
) || U_FAILURE(errorCode
)) {
623 log_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s)\n", i
, u_errorName(errorCode
));
625 errorCode
=U_ZERO_ERROR
;
626 if(unorm_isNormalized(notNFC
[i
], -1, UNORM_NFKC
, &errorCode
) || U_FAILURE(errorCode
)) {
627 log_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s)\n", i
, u_errorName(errorCode
));
630 for(i
=0; i
<LENGTHOF(notNFKC
); ++i
) {
631 errorCode
=U_ZERO_ERROR
;
632 if(unorm_isNormalized(notNFKC
[i
], -1, UNORM_NFKC
, &errorCode
) || U_FAILURE(errorCode
)) {
633 log_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s)\n", i
, u_errorName(errorCode
));
640 UErrorCode status
= U_ZERO_ERROR
;
641 static const UChar FAST_
[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
643 static const UChar FALSE_
[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
644 0x02B9, 0x0314, 0x0315, 0x0316};
645 static const UChar TRUE_
[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
646 0x0050, 0x0730, 0x09EE, 0x1E10};
648 static const UChar datastr
[][5] =
649 { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
650 {0x0061, 0x030A, 0x00E2, 0x0323, 0},
651 {0x0061, 0x0323, 0x00E2, 0x0323, 0},
652 {0x0061, 0x0323, 0x1E05, 0x0302, 0} };
653 static const UBool result
[] = {UNORM_YES
, UNORM_NO
, UNORM_NO
, UNORM_YES
};
655 static const UChar datachar
[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
657 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
659 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
660 0x0307, 0x0308, 0x0309, 0x030a,
661 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
662 0x0327, 0x0328, 0x0329, 0x032a,
663 0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
664 0x1e07, 0x1e08, 0x1e09, 0x1e0a};
668 if (unorm_quickCheck(FAST_
, 10, UNORM_FCD
, &status
) != UNORM_YES
)
669 log_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES\n");
670 if (unorm_quickCheck(FALSE_
, 10, UNORM_FCD
, &status
) != UNORM_NO
)
671 log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n");
672 if (unorm_quickCheck(TRUE_
, 10, UNORM_FCD
, &status
) != UNORM_YES
)
673 log_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES\n");
675 if (U_FAILURE(status
))
676 log_err("unorm_quickCheck(FCD) failed: %s\n", u_errorName(status
));
680 UBool fcdresult
= unorm_quickCheck(datastr
[count
], 4, UNORM_FCD
, &status
);
681 if (U_FAILURE(status
)) {
682 log_err("unorm_quickCheck(FCD) failed: exception occured at data set %d\n", count
);
686 if (result
[count
] != fcdresult
) {
687 log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count
,
694 /* random checks of long strings */
695 status
= U_ZERO_ERROR
;
696 srand((unsigned)time( NULL
));
698 for (count
= 0; count
< 50; count
++)
701 UBool testresult
= UNORM_YES
;
709 data
[size
] = datachar
[(rand() * 50) / RAND_MAX
];
710 log_verbose("0x%x", data
[size
]);
711 normsize
+= unorm_normalize(data
+ size
, 1, UNORM_NFD
, 0,
712 norm
+ normsize
, 100 - normsize
, &status
);
713 if (U_FAILURE(status
)) {
714 log_err("unorm_quickCheck(FCD) failed: exception occured at data generation\n");
721 nfdsize
= unorm_normalize(data
, size
, UNORM_NFD
, 0,
723 if (U_FAILURE(status
)) {
724 log_err("unorm_quickCheck(FCD) failed: exception occured at normalized data generation\n");
727 if (nfdsize
!= normsize
|| u_memcmp(nfd
, norm
, nfdsize
) != 0) {
728 testresult
= UNORM_NO
;
730 if (testresult
== UNORM_YES
) {
731 log_verbose("result UNORM_YES\n");
734 log_verbose("result UNORM_NO\n");
737 if (unorm_quickCheck(data
, size
, UNORM_FCD
, &status
) != testresult
|| U_FAILURE(status
)) {
738 log_err("unorm_quickCheck(FCD) failed: expected %d for random data\n", testresult
);
745 static const UChar in
[]={ 0x68, 0xe4 };
746 UChar out
[20]={ 0xffff, 0xffff, 0xffff, 0xffff };
747 UErrorCode errorCode
;
750 /* try preflighting */
751 errorCode
=U_ZERO_ERROR
;
752 length
=unorm_normalize(in
, 2, UNORM_NFD
, 0, NULL
, 0, &errorCode
);
753 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=3) {
754 log_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s\n", length
, u_errorName(errorCode
));
758 errorCode
=U_ZERO_ERROR
;
759 length
=unorm_normalize(in
, 2, UNORM_NFD
, 0, out
, 3, &errorCode
);
760 if(U_FAILURE(errorCode
)) {
761 log_err("unorm_normalize(NFD)=%ld failed with %s\n", length
, u_errorName(errorCode
));
764 if(length
!=3 || out
[2]!=0x308 || out
[3]!=0xffff) {
765 log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length
, out
[0], out
[1], out
[2], out
[3]);
770 /* test cases to improve test code coverage */
772 HANGUL_K_KIYEOK
=0x3131, /* NFKD->Jamo L U+1100 */
773 HANGUL_K_WEO
=0x315d, /* NFKD->Jamo V U+116f */
774 HANGUL_K_KIYEOK_SIOS
=0x3133, /* NFKD->Jamo T U+11aa */
776 HANGUL_KIYEOK
=0x1100, /* Jamo L U+1100 */
777 HANGUL_WEO
=0x116f, /* Jamo V U+116f */
778 HANGUL_KIYEOK_SIOS
=0x11aa, /* Jamo T U+11aa */
780 HANGUL_AC00
=0xac00, /* Hangul syllable = Jamo LV U+ac00 */
781 HANGUL_SYLLABLE
=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */
783 MUSICAL_VOID_NOTEHEAD
=0x1d157,
784 MUSICAL_HALF_NOTE
=0x1d15e, /* NFC/NFD->Notehead+Stem */
785 MUSICAL_STEM
=0x1d165, /* cc=216 */
786 MUSICAL_STACCATO
=0x1d17c /* cc=220 */
791 static UChar input
[2000], expect
[3000], output
[3000];
792 UErrorCode errorCode
;
793 int32_t i
, length
, inLength
, expectLength
, hangulPrefixLength
, preflightLength
;
795 /* create a long and nasty string with NFKC-unsafe characters */
798 /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */
799 input
[inLength
++]=HANGUL_KIYEOK
;
800 input
[inLength
++]=HANGUL_WEO
;
801 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
803 input
[inLength
++]=HANGUL_KIYEOK
;
804 input
[inLength
++]=HANGUL_WEO
;
805 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
807 input
[inLength
++]=HANGUL_KIYEOK
;
808 input
[inLength
++]=HANGUL_K_WEO
;
809 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
811 input
[inLength
++]=HANGUL_KIYEOK
;
812 input
[inLength
++]=HANGUL_K_WEO
;
813 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
815 input
[inLength
++]=HANGUL_K_KIYEOK
;
816 input
[inLength
++]=HANGUL_WEO
;
817 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
819 input
[inLength
++]=HANGUL_K_KIYEOK
;
820 input
[inLength
++]=HANGUL_WEO
;
821 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
823 input
[inLength
++]=HANGUL_K_KIYEOK
;
824 input
[inLength
++]=HANGUL_K_WEO
;
825 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
827 input
[inLength
++]=HANGUL_K_KIYEOK
;
828 input
[inLength
++]=HANGUL_K_WEO
;
829 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
831 /* Hangul LV with normal/compatibility Jamo T */
832 input
[inLength
++]=HANGUL_AC00
;
833 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
835 input
[inLength
++]=HANGUL_AC00
;
836 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
838 /* compatibility Jamo L, V */
839 input
[inLength
++]=HANGUL_K_KIYEOK
;
840 input
[inLength
++]=HANGUL_K_WEO
;
842 hangulPrefixLength
=inLength
;
844 input
[inLength
++]=UTF16_LEAD(MUSICAL_HALF_NOTE
);
845 input
[inLength
++]=UTF16_TRAIL(MUSICAL_HALF_NOTE
);
846 for(i
=0; i
<200; ++i
) {
847 input
[inLength
++]=UTF16_LEAD(MUSICAL_STACCATO
);
848 input
[inLength
++]=UTF16_TRAIL(MUSICAL_STACCATO
);
849 input
[inLength
++]=UTF16_LEAD(MUSICAL_STEM
);
850 input
[inLength
++]=UTF16_TRAIL(MUSICAL_STEM
);
853 /* (compatibility) Jamo L, T do not compose */
854 input
[inLength
++]=HANGUL_K_KIYEOK
;
855 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
858 errorCode
=U_ZERO_ERROR
;
859 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_NFD
, &errorCode
) || U_FAILURE(errorCode
)) {
860 log_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s)\n", u_errorName(errorCode
));
862 errorCode
=U_ZERO_ERROR
;
863 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_NFKD
, &errorCode
) || U_FAILURE(errorCode
)) {
864 log_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s)\n", u_errorName(errorCode
));
866 errorCode
=U_ZERO_ERROR
;
867 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_NFC
, &errorCode
) || U_FAILURE(errorCode
)) {
868 log_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s)\n", u_errorName(errorCode
));
870 errorCode
=U_ZERO_ERROR
;
871 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_NFKC
, &errorCode
) || U_FAILURE(errorCode
)) {
872 log_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s)\n", u_errorName(errorCode
));
874 errorCode
=U_ZERO_ERROR
;
875 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_FCD
, &errorCode
) || U_FAILURE(errorCode
)) {
876 log_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s)\n", u_errorName(errorCode
));
881 expect
[expectLength
++]=HANGUL_SYLLABLE
;
883 expect
[expectLength
++]=HANGUL_SYLLABLE
;
885 expect
[expectLength
++]=HANGUL_SYLLABLE
;
887 expect
[expectLength
++]=HANGUL_SYLLABLE
;
889 expect
[expectLength
++]=HANGUL_SYLLABLE
;
891 expect
[expectLength
++]=HANGUL_SYLLABLE
;
893 expect
[expectLength
++]=HANGUL_SYLLABLE
;
895 expect
[expectLength
++]=HANGUL_SYLLABLE
;
897 expect
[expectLength
++]=HANGUL_AC00
+3;
899 expect
[expectLength
++]=HANGUL_AC00
+3;
901 expect
[expectLength
++]=HANGUL_AC00
+14*28;
903 expect
[expectLength
++]=UTF16_LEAD(MUSICAL_VOID_NOTEHEAD
);
904 expect
[expectLength
++]=UTF16_TRAIL(MUSICAL_VOID_NOTEHEAD
);
905 expect
[expectLength
++]=UTF16_LEAD(MUSICAL_STEM
);
906 expect
[expectLength
++]=UTF16_TRAIL(MUSICAL_STEM
);
907 for(i
=0; i
<200; ++i
) {
908 expect
[expectLength
++]=UTF16_LEAD(MUSICAL_STEM
);
909 expect
[expectLength
++]=UTF16_TRAIL(MUSICAL_STEM
);
911 for(i
=0; i
<200; ++i
) {
912 expect
[expectLength
++]=UTF16_LEAD(MUSICAL_STACCATO
);
913 expect
[expectLength
++]=UTF16_TRAIL(MUSICAL_STACCATO
);
916 expect
[expectLength
++]=HANGUL_KIYEOK
;
917 expect
[expectLength
++]=HANGUL_KIYEOK_SIOS
;
919 /* try destination overflow first */
920 errorCode
=U_ZERO_ERROR
;
921 preflightLength
=unorm_normalize(input
, inLength
,
923 output
, 100, /* too short */
925 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
) {
926 log_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s\n", u_errorName(errorCode
));
930 errorCode
=U_ZERO_ERROR
;
931 length
=unorm_normalize(input
, inLength
,
933 output
, sizeof(output
)/U_SIZEOF_UCHAR
,
935 if(U_FAILURE(errorCode
)) {
936 log_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s\n", u_errorName(errorCode
));
937 } else if(length
!=expectLength
|| u_memcmp(output
, expect
, length
)!=0) {
938 log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n");
939 for(i
=0; i
<length
; ++i
) {
940 if(output
[i
]!=expect
[i
]) {
941 log_err(" NFKC[%d]==U+%04lx expected U+%04lx\n", i
, output
[i
], expect
[i
]);
946 if(length
!=preflightLength
) {
947 log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length
, preflightLength
);
951 u_memcpy(expect
, input
, hangulPrefixLength
);
952 expectLength
=hangulPrefixLength
;
954 expect
[expectLength
++]=UTF16_LEAD(MUSICAL_VOID_NOTEHEAD
);
955 expect
[expectLength
++]=UTF16_TRAIL(MUSICAL_VOID_NOTEHEAD
);
956 expect
[expectLength
++]=UTF16_LEAD(MUSICAL_STEM
);
957 expect
[expectLength
++]=UTF16_TRAIL(MUSICAL_STEM
);
958 for(i
=0; i
<200; ++i
) {
959 expect
[expectLength
++]=UTF16_LEAD(MUSICAL_STEM
);
960 expect
[expectLength
++]=UTF16_TRAIL(MUSICAL_STEM
);
962 for(i
=0; i
<200; ++i
) {
963 expect
[expectLength
++]=UTF16_LEAD(MUSICAL_STACCATO
);
964 expect
[expectLength
++]=UTF16_TRAIL(MUSICAL_STACCATO
);
967 expect
[expectLength
++]=HANGUL_K_KIYEOK
;
968 expect
[expectLength
++]=HANGUL_K_KIYEOK_SIOS
;
970 errorCode
=U_ZERO_ERROR
;
971 length
=unorm_normalize(input
, inLength
,
973 output
, sizeof(output
)/U_SIZEOF_UCHAR
,
975 if(U_FAILURE(errorCode
)) {
976 log_err("error unorm_normalize(long input, UNORM_FCD) failed with %s\n", u_errorName(errorCode
));
977 } else if(length
!=expectLength
|| u_memcmp(output
, expect
, length
)!=0) {
978 log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n");
979 for(i
=0; i
<length
; ++i
) {
980 if(output
[i
]!=expect
[i
]) {
981 log_err(" FCD[%d]==U+%04lx expected U+%04lx\n", i
, output
[i
], expect
[i
]);
988 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */
990 TestConcatenate(void) {
997 0x301, 0x73, 0x75, 0x6d, 0xe9, 0
1000 0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0
1004 UErrorCode errorCode
;
1007 /* left with length, right NUL-terminated */
1008 errorCode
=U_ZERO_ERROR
;
1009 length
=unorm_concatenate(left
, 2, right
, -1, buffer
, 100, UNORM_NFC
, 0, &errorCode
);
1010 if(U_FAILURE(errorCode
) || length
!=6 || 0!=u_memcmp(buffer
, expect
, length
)) {
1011 log_err("error: unorm_concatenate()=%ld (expect 6) failed with %s\n", length
, u_errorName(errorCode
));
1015 errorCode
=U_ZERO_ERROR
;
1016 length
=unorm_concatenate(left
, 2, right
, -1, NULL
, 0, UNORM_NFC
, 0, &errorCode
);
1017 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=6) {
1018 log_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s\n", length
, u_errorName(errorCode
));
1022 errorCode
=U_ZERO_ERROR
;
1023 length
=unorm_concatenate(left
, 2, right
, -1, buffer
, 1, UNORM_NFC
, 0, &errorCode
);
1024 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=6 || buffer
[2]!=0x5555) {
1025 log_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s\n", length
, u_errorName(errorCode
));
1028 /* enter with U_FAILURE */
1030 errorCode
=U_UNEXPECTED_TOKEN
;
1031 length
=unorm_concatenate(left
, 2, right
, -1, buffer
, 100, UNORM_NFC
, 0, &errorCode
);
1032 if(errorCode
!=U_UNEXPECTED_TOKEN
|| buffer
[2]!=0xaaaa) {
1033 log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length
, u_errorName(errorCode
));
1036 /* illegal arguments */
1038 errorCode
=U_ZERO_ERROR
;
1039 length
=unorm_concatenate(NULL
, 2, right
, -1, buffer
, 100, UNORM_NFC
, 0, &errorCode
);
1040 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
|| buffer
[2]!=0xaaaa) {
1041 log_err("error: unorm_concatenate(left=NULL)=%ld failed with %s\n", length
, u_errorName(errorCode
));
1044 errorCode
=U_ZERO_ERROR
;
1045 length
=unorm_concatenate(left
, 2, right
, -1, NULL
, 100, UNORM_NFC
, 0, &errorCode
);
1046 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1047 log_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s\n", length
, u_errorName(errorCode
));
1055 static const char *const _modeString
[UNORM_MODE_COUNT
]={
1056 "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD"
1060 _testIter(const UChar
*src
, int32_t srcLength
,
1061 UCharIterator
*iter
, UNormalizationMode mode
, UBool forward
,
1062 const UChar
*out
, int32_t outLength
,
1063 const int32_t *srcIndexes
, int32_t srcIndexesLength
) {
1065 const UChar
*expect
, *outLimit
, *in
;
1066 int32_t length
, i
, expectLength
, expectIndex
, prevIndex
, index
, inLength
;
1067 UErrorCode errorCode
;
1068 UBool neededToNormalize
, expectNeeded
;
1070 errorCode
=U_ZERO_ERROR
;
1071 outLimit
=out
+outLength
;
1077 i
=srcIndexesLength
-2;
1084 if(!iter
->hasNext(iter
)) {
1087 length
=unorm_next(iter
,
1088 buffer
, sizeof(buffer
)/U_SIZEOF_UCHAR
,
1090 (UBool
)(out
!=NULL
), &neededToNormalize
,
1092 expectIndex
=srcIndexes
[i
+1];
1094 inLength
=expectIndex
-prevIndex
;
1097 /* get output piece from between plus signs */
1099 while((expect
+expectLength
)!=outLimit
&& expect
[expectLength
]!=_PLUS
) {
1102 expectNeeded
=(UBool
)(0!=u_memcmp(buffer
, in
, inLength
));
1105 expectLength
=inLength
;
1109 if(!iter
->hasPrevious(iter
)) {
1112 length
=unorm_previous(iter
,
1113 buffer
, sizeof(buffer
)/U_SIZEOF_UCHAR
,
1115 (UBool
)(out
!=NULL
), &neededToNormalize
,
1117 expectIndex
=srcIndexes
[i
];
1119 inLength
=prevIndex
-expectIndex
;
1122 /* get output piece from between plus signs */
1124 while(expect
!=out
&& expect
[-1]!=_PLUS
) {
1128 expectNeeded
=(UBool
)(0!=u_memcmp(buffer
, in
, inLength
));
1131 expectLength
=inLength
;
1135 index
=iter
->getIndex(iter
, UITER_CURRENT
);
1137 if(U_FAILURE(errorCode
)) {
1138 log_err("error unorm iteration (next/previous %d %s)[%d]: %s\n",
1139 forward
, _modeString
[mode
], i
, u_errorName(errorCode
));
1142 if(expectIndex
!=index
) {
1143 log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n",
1144 forward
, _modeString
[mode
], i
, index
, expectIndex
);
1147 if(expectLength
!=length
) {
1148 log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n",
1149 forward
, _modeString
[mode
], i
, length
, expectLength
);
1152 if(0!=u_memcmp(expect
, buffer
, length
)) {
1153 log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n",
1154 forward
, _modeString
[mode
], i
);
1157 if(neededToNormalize
!=expectNeeded
) {
1161 expect
+=expectLength
+1; /* go after the + */
1164 --expect
; /* go before the + */
1171 TestNextPrevious() {
1173 src
[]={ /* input string */
1174 0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133
1176 nfd
[]={ /* + separates expected output pieces */
1177 0xa0, _PLUS
, 0x61, 0x308, _PLUS
, 0x63, 0x327, 0x302, _PLUS
, 0x1100, 0x1161, _PLUS
, 0x3133
1180 0x20, _PLUS
, 0x61, 0x308, _PLUS
, 0x63, 0x327, 0x302, _PLUS
, 0x1100, 0x1161, _PLUS
, 0x11aa
1183 0xa0, _PLUS
, 0xe4, _PLUS
, 0xe7, 0x302, _PLUS
, 0xac00, _PLUS
, 0x3133
1186 0x20, _PLUS
, 0xe4, _PLUS
, 0xe7, 0x302, _PLUS
, 0xac03
1189 0xa0, _PLUS
, 0xe4, _PLUS
, 0x63, 0x327, 0x302, _PLUS
, 0xac00, _PLUS
, 0x3133
1192 /* expected iterator indexes in the source string for each iteration piece */
1193 static const int32_t
1215 UBool neededToNormalize
;
1216 UErrorCode errorCode
;
1218 uiter_setString(&iter
, src
, sizeof(src
)/U_SIZEOF_UCHAR
);
1220 /* test iteration with doNormalize */
1222 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFD
, TRUE
, nfd
, sizeof(nfd
)/U_SIZEOF_UCHAR
, nfdIndexes
, sizeof(nfdIndexes
)/4);
1224 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKD
, TRUE
, nfkd
, sizeof(nfkd
)/U_SIZEOF_UCHAR
, nfkdIndexes
, sizeof(nfkdIndexes
)/4);
1226 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFC
, TRUE
, nfc
, sizeof(nfc
)/U_SIZEOF_UCHAR
, nfcIndexes
, sizeof(nfcIndexes
)/4);
1228 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKC
, TRUE
, nfkc
, sizeof(nfkc
)/U_SIZEOF_UCHAR
, nfkcIndexes
, sizeof(nfkcIndexes
)/4);
1230 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_FCD
, TRUE
, fcd
, sizeof(fcd
)/U_SIZEOF_UCHAR
, fcdIndexes
, sizeof(fcdIndexes
)/4);
1232 iter
.index
=iter
.length
;
1233 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFD
, FALSE
, nfd
, sizeof(nfd
)/U_SIZEOF_UCHAR
, nfdIndexes
, sizeof(nfdIndexes
)/4);
1234 iter
.index
=iter
.length
;
1235 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKD
, FALSE
, nfkd
, sizeof(nfkd
)/U_SIZEOF_UCHAR
, nfkdIndexes
, sizeof(nfkdIndexes
)/4);
1236 iter
.index
=iter
.length
;
1237 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFC
, FALSE
, nfc
, sizeof(nfc
)/U_SIZEOF_UCHAR
, nfcIndexes
, sizeof(nfcIndexes
)/4);
1238 iter
.index
=iter
.length
;
1239 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKC
, FALSE
, nfkc
, sizeof(nfkc
)/U_SIZEOF_UCHAR
, nfkcIndexes
, sizeof(nfkcIndexes
)/4);
1240 iter
.index
=iter
.length
;
1241 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_FCD
, FALSE
, fcd
, sizeof(fcd
)/U_SIZEOF_UCHAR
, fcdIndexes
, sizeof(fcdIndexes
)/4);
1243 /* test iteration without doNormalize */
1245 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFD
, TRUE
, NULL
, 0, nfdIndexes
, sizeof(nfdIndexes
)/4);
1247 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKD
, TRUE
, NULL
, 0, nfkdIndexes
, sizeof(nfkdIndexes
)/4);
1249 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFC
, TRUE
, NULL
, 0, nfcIndexes
, sizeof(nfcIndexes
)/4);
1251 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKC
, TRUE
, NULL
, 0, nfkcIndexes
, sizeof(nfkcIndexes
)/4);
1253 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_FCD
, TRUE
, NULL
, 0, fcdIndexes
, sizeof(fcdIndexes
)/4);
1255 iter
.index
=iter
.length
;
1256 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFD
, FALSE
, NULL
, 0, nfdIndexes
, sizeof(nfdIndexes
)/4);
1257 iter
.index
=iter
.length
;
1258 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKD
, FALSE
, NULL
, 0, nfkdIndexes
, sizeof(nfkdIndexes
)/4);
1259 iter
.index
=iter
.length
;
1260 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFC
, FALSE
, NULL
, 0, nfcIndexes
, sizeof(nfcIndexes
)/4);
1261 iter
.index
=iter
.length
;
1262 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKC
, FALSE
, NULL
, 0, nfkcIndexes
, sizeof(nfkcIndexes
)/4);
1263 iter
.index
=iter
.length
;
1264 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_FCD
, FALSE
, NULL
, 0, fcdIndexes
, sizeof(fcdIndexes
)/4);
1266 /* try without neededToNormalize */
1267 errorCode
=U_ZERO_ERROR
;
1270 length
=unorm_next(&iter
, buffer
, sizeof(buffer
)/U_SIZEOF_UCHAR
,
1271 UNORM_NFD
, 0, TRUE
, NULL
,
1273 if(U_FAILURE(errorCode
) || length
!=2 || buffer
[0]!=nfd
[2] || buffer
[1]!=nfd
[3]) {
1274 log_err("error unorm_next(without needed) %s\n", u_errorName(errorCode
));
1279 neededToNormalize
=9;
1281 length
=unorm_next(&iter
, NULL
, 0,
1282 UNORM_NFD
, 0, TRUE
, &neededToNormalize
,
1284 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| neededToNormalize
!=FALSE
|| length
!=2) {
1285 log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode
));
1289 errorCode
=U_ZERO_ERROR
;
1290 buffer
[0]=buffer
[1]=5;
1291 neededToNormalize
=9;
1293 length
=unorm_next(&iter
, buffer
, 1,
1294 UNORM_NFD
, 0, TRUE
, &neededToNormalize
,
1296 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| neededToNormalize
!=FALSE
|| length
!=2 || buffer
[1]!=5) {
1297 log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode
));
1302 errorCode
=U_ZERO_ERROR
;
1303 buffer
[0]=buffer
[1]=5;
1304 neededToNormalize
=9;
1306 length
=unorm_next(NULL
, buffer
, sizeof(buffer
)/U_SIZEOF_UCHAR
,
1307 UNORM_NFD
, 0, TRUE
, &neededToNormalize
,
1309 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1310 log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode
));
1315 buffer
[0]=buffer
[1]=5;
1316 neededToNormalize
=9;
1318 length
=unorm_next(&iter
, buffer
, sizeof(buffer
)/U_SIZEOF_UCHAR
,
1319 (UNormalizationMode
)0, 0, TRUE
, &neededToNormalize
,
1321 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1322 log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode
));
1326 /* error coming in */
1327 errorCode
=U_MISPLACED_QUANTIFIER
;
1330 length
=unorm_next(&iter
, buffer
, sizeof(buffer
)/U_SIZEOF_UCHAR
,
1331 UNORM_NFD
, 0, TRUE
, NULL
,
1333 if(errorCode
!=U_MISPLACED_QUANTIFIER
) {
1334 log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode
));
1338 /* missing pErrorCode */
1341 length
=unorm_next(&iter
, buffer
, sizeof(buffer
)/U_SIZEOF_UCHAR
,
1342 UNORM_NFD
, 0, TRUE
, NULL
,
1344 if(iter
.index
!=1 || buffer
[0]!=5) {
1345 log_err("error unorm_next(pErrorCode==NULL) %s\n", u_errorName(errorCode
));
1351 TestFCNFKCClosure(void) {
1352 static const struct {
1356 { 0x037A, { 0x0020, 0x03B9, 0 } },
1357 { 0x03D2, { 0x03C5, 0 } },
1358 { 0x20A8, { 0x0072, 0x0073, 0 } },
1359 { 0x210B, { 0x0068, 0 } },
1360 { 0x210C, { 0x0068, 0 } },
1361 { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } },
1362 { 0x2122, { 0x0074, 0x006D, 0 } },
1363 { 0x2128, { 0x007A, 0 } },
1364 { 0x1D5DB, { 0x0068, 0 } },
1365 { 0x1D5ED, { 0x007A, 0 } },
1370 UErrorCode errorCode
;
1373 for(i
=0; i
<LENGTHOF(tests
); ++i
) {
1374 errorCode
=U_ZERO_ERROR
;
1375 length
=u_getFC_NFKC_Closure(tests
[i
].c
, buffer
, LENGTHOF(buffer
), &errorCode
);
1376 if(U_FAILURE(errorCode
) || length
!=u_strlen(buffer
) || 0!=u_strcmp(tests
[i
].s
, buffer
)) {
1377 log_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s)\n", tests
[i
].c
, u_errorName(errorCode
));
1381 /* error handling */
1382 errorCode
=U_ZERO_ERROR
;
1383 length
=u_getFC_NFKC_Closure(0x5c, NULL
, LENGTHOF(buffer
), &errorCode
);
1384 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1385 log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode
));
1388 length
=u_getFC_NFKC_Closure(0x5c, buffer
, LENGTHOF(buffer
), &errorCode
);
1389 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1390 log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode
));
1395 TestQuickCheckPerCP() {
1396 UErrorCode errorCode
;
1397 UChar32 c
, lead
, trail
;
1398 UChar s
[U16_MAX_LENGTH
], nfd
[16];
1399 int32_t length
, lccc1
, lccc2
, tccc1
, tccc2
;
1400 UNormalizationCheckResult qc1
, qc2
;
1403 u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK
)!=(int32_t)UNORM_YES
||
1404 u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK
)!=(int32_t)UNORM_YES
||
1405 u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK
)!=(int32_t)UNORM_MAYBE
||
1406 u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK
)!=(int32_t)UNORM_MAYBE
||
1407 u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS
)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS
) ||
1408 u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS
)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS
)
1410 log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n");
1414 * compare the quick check property values for some code points
1415 * to the quick check results for checking same-code point strings
1417 errorCode
=U_ZERO_ERROR
;
1421 U16_APPEND_UNSAFE(s
, length
, c
);
1423 qc1
=u_getIntPropertyValue(c
, UCHAR_NFC_QUICK_CHECK
);
1424 qc2
=unorm_quickCheck(s
, length
, UNORM_NFC
, &errorCode
);
1426 log_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x\n", qc1
, qc2
, c
);
1429 qc1
=u_getIntPropertyValue(c
, UCHAR_NFD_QUICK_CHECK
);
1430 qc2
=unorm_quickCheck(s
, length
, UNORM_NFD
, &errorCode
);
1432 log_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x\n", qc1
, qc2
, c
);
1435 qc1
=u_getIntPropertyValue(c
, UCHAR_NFKC_QUICK_CHECK
);
1436 qc2
=unorm_quickCheck(s
, length
, UNORM_NFKC
, &errorCode
);
1438 log_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x\n", qc1
, qc2
, c
);
1441 qc1
=u_getIntPropertyValue(c
, UCHAR_NFKD_QUICK_CHECK
);
1442 qc2
=unorm_quickCheck(s
, length
, UNORM_NFKD
, &errorCode
);
1444 log_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x\n", qc1
, qc2
, c
);
1447 length
=unorm_normalize(s
, length
, UNORM_NFD
, 0, nfd
, LENGTHOF(nfd
), &errorCode
);
1448 U16_GET(nfd
, 0, 0, length
, lead
);
1449 U16_GET(nfd
, 0, length
-1, length
, trail
);
1451 lccc1
=u_getIntPropertyValue(c
, UCHAR_LEAD_CANONICAL_COMBINING_CLASS
);
1452 lccc2
=u_getCombiningClass(lead
);
1453 tccc1
=u_getIntPropertyValue(c
, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS
);
1454 tccc2
=u_getCombiningClass(trail
);
1457 log_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
1461 log_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
1465 /* skip some code points */
1471 TestComposition(void) {
1472 static const struct {
1473 UNormalizationMode mode
;
1479 * special cases for UAX #15 bug
1480 * see Unicode Public Review Issue #29
1481 * at http://www.unicode.org/review/resolved-pri.html#pri29
1483 { UNORM_NFC
, 0, { 0x1100, 0x0300, 0x1161, 0x0327 }, { 0x1100, 0x0300, 0x1161, 0x0327 } },
1484 { UNORM_NFC
, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } },
1485 { UNORM_NFC
, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 }, { 0xac00, 0x0327, 0x0300, 0x11a8 } },
1486 { UNORM_NFC
, 0, { 0x0b47, 0x0300, 0x0b3e }, { 0x0b47, 0x0300, 0x0b3e } },
1488 { UNORM_NFC
, UNORM_BEFORE_PRI_29
, { 0x1100, 0x0300, 0x1161, 0x0327 }, { 0xac00, 0x0300, 0x0327 } },
1489 { UNORM_NFC
, UNORM_BEFORE_PRI_29
, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0xac01, 0x0300, 0x0327 } },
1490 { UNORM_NFC
, UNORM_BEFORE_PRI_29
, { 0xac00, 0x0300, 0x0327, 0x11a8 }, { 0xac01, 0x0327, 0x0300 } },
1491 { UNORM_NFC
, UNORM_BEFORE_PRI_29
, { 0x0b47, 0x0300, 0x0b3e }, { 0x0b4b, 0x0300 } }
1493 /* TODO: add test cases for UNORM_FCC here (j2151) */
1497 UErrorCode errorCode
;
1500 for(i
=0; i
<LENGTHOF(cases
); ++i
) {
1501 errorCode
=U_ZERO_ERROR
;
1502 length
=unorm_normalize(
1504 cases
[i
].mode
, cases
[i
].options
,
1505 output
, LENGTHOF(output
),
1507 if( U_FAILURE(errorCode
) ||
1508 length
!=u_strlen(cases
[i
].expect
) ||
1509 0!=u_memcmp(output
, cases
[i
].expect
, length
)
1511 log_err("unexpected result for case %d\n", i
);
1516 #endif /* #if !UCONFIG_NO_NORMALIZATION */