1 /********************************************************************
3 * Copyright (c) 1997-2012, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
10 * Modification History:
12 * Madhu Katragadda Ported for C API
13 * synwee added test for quick check
14 * synwee added test for checkFCD
15 *********************************************************************************/
16 /*tests for u_normalization*/
17 #include "unicode/utypes.h"
18 #include "unicode/unorm.h"
19 #include "unicode/utf16.h"
22 #if !UCONFIG_NO_NORMALIZATION
26 #include "unicode/uchar.h"
27 #include "unicode/ustring.h"
28 #include "unicode/unorm.h"
31 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof ((array)[0]))
37 TestNormCoverage(void);
40 TestConcatenate(void);
43 TestNextPrevious(void);
45 static void TestIsNormalized(void);
48 TestFCNFKCClosure(void);
51 TestQuickCheckPerCP(void);
54 TestComposition(void);
60 TestGetDecomposition(void);
63 TestGetRawDecomposition(void);
65 static void TestAppendRestoreMiddle(void);
66 static void TestGetEasyToUseInstance(void);
68 static const char* const canonTests
[][3] = {
69 /* Input*/ /*Decomposed*/ /*Composed*/
70 { "cat", "cat", "cat" },
71 { "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark", },
73 { "\\u1e0a", "D\\u0307", "\\u1e0a" }, /* D-dot_above*/
74 { "D\\u0307", "D\\u0307", "\\u1e0a" }, /* D dot_above*/
76 { "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_below dot_above*/
77 { "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_above dot_below */
78 { "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D dot_below dot_above */
80 { "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307", "\\u1e10\\u0323\\u0307" }, /*D dot_below cedilla dot_above*/
81 { "D\\u0307\\u0328\\u0323", "D\\u0328\\u0323\\u0307", "\\u1e0c\\u0328\\u0307" }, /* D dot_above ogonek dot_below*/
83 { "\\u1E14", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron-grave*/
84 { "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron + grave*/
85 { "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" }, /* E-grave + macron*/
87 { "\\u212b", "A\\u030a", "\\u00c5" }, /* angstrom_sign*/
88 { "\\u00c5", "A\\u030a", "\\u00c5" }, /* A-ring*/
90 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
91 { "\\u00C4\\uFB03n", "A\\u0308\\uFB03n", "\\u00C4\\uFB03n" },
93 { "Henry IV", "Henry IV", "Henry IV" },
94 { "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" },
96 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
97 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
98 { "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" }, /* hw_ka + hw_ten*/
99 { "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" }, /* ka + hw_ten*/
100 { "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" }, /* hw_ka + ten*/
101 { "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" }, /* hw_ka + ten*/
105 static const char* const compatTests
[][3] = {
106 /* Input*/ /*Decomposed */ /*Composed*/
107 { "cat", "cat", "cat" },
109 { "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" }, /* Alef-Lamed vs. Alef, Lamed*/
111 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
112 { "\\u00C4\\uFB03n", "A\\u0308ffin", "\\u00C4ffin" }, /* ffi ligature -> f + f + i*/
114 { "Henry IV", "Henry IV", "Henry IV" },
115 { "Henry \\u2163", "Henry IV", "Henry IV" },
117 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
118 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
120 { "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + ten*/
122 /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
123 { "\\uFF76\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + hw_ten*/
124 { "\\u30AB\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* ka + hw_ten*/
128 static const char* const fcdTests
[][3] = {
129 /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */
130 { "\\u010e\\u0327", "D\\u0327\\u030c", NULL
}, /* D-caron + cedilla */
131 { "\\u010e", "\\u010e", NULL
} /* D-caron */
134 void addNormTest(TestNode
** root
);
136 void addNormTest(TestNode
** root
)
138 addTest(root
, &TestAPI
, "tsnorm/cnormtst/TestAPI");
139 addTest(root
, &TestDecomp
, "tsnorm/cnormtst/TestDecomp");
140 addTest(root
, &TestCompatDecomp
, "tsnorm/cnormtst/TestCompatDecomp");
141 addTest(root
, &TestCanonDecompCompose
, "tsnorm/cnormtst/TestCanonDecompCompose");
142 addTest(root
, &TestCompatDecompCompose
, "tsnorm/cnormtst/TestCompatDecompCompose");
143 addTest(root
, &TestFCD
, "tsnorm/cnormtst/TestFCD");
144 addTest(root
, &TestNull
, "tsnorm/cnormtst/TestNull");
145 addTest(root
, &TestQuickCheck
, "tsnorm/cnormtst/TestQuickCheck");
146 addTest(root
, &TestQuickCheckPerCP
, "tsnorm/cnormtst/TestQuickCheckPerCP");
147 addTest(root
, &TestIsNormalized
, "tsnorm/cnormtst/TestIsNormalized");
148 addTest(root
, &TestCheckFCD
, "tsnorm/cnormtst/TestCheckFCD");
149 addTest(root
, &TestNormCoverage
, "tsnorm/cnormtst/TestNormCoverage");
150 addTest(root
, &TestConcatenate
, "tsnorm/cnormtst/TestConcatenate");
151 addTest(root
, &TestNextPrevious
, "tsnorm/cnormtst/TestNextPrevious");
152 addTest(root
, &TestFCNFKCClosure
, "tsnorm/cnormtst/TestFCNFKCClosure");
153 addTest(root
, &TestComposition
, "tsnorm/cnormtst/TestComposition");
154 addTest(root
, &TestGetDecomposition
, "tsnorm/cnormtst/TestGetDecomposition");
155 addTest(root
, &TestGetRawDecomposition
, "tsnorm/cnormtst/TestGetRawDecomposition");
156 addTest(root
, &TestAppendRestoreMiddle
, "tsnorm/cnormtst/TestAppendRestoreMiddle");
157 addTest(root
, &TestGetEasyToUseInstance
, "tsnorm/cnormtst/TestGetEasyToUseInstance");
160 static const char* const modeStrings
[]={
170 static void TestNormCases(UNormalizationMode mode
,
171 const char* const cases
[][3], int32_t lengthOfCases
) {
172 int32_t x
, neededLen
, length2
;
173 int32_t expIndex
= (mode
==UNORM_NFC
|| mode
==UNORM_NFKC
) ? 2 : 1;
176 log_verbose("Testing unorm_normalize(%s)\n", modeStrings
[mode
]);
177 for(x
=0; x
< lengthOfCases
; x
++)
179 UErrorCode status
= U_ZERO_ERROR
, status2
= U_ZERO_ERROR
;
180 source
=CharsToUChars(cases
[x
][0]);
181 neededLen
= unorm_normalize(source
, u_strlen(source
), mode
, 0, NULL
, 0, &status
);
182 length2
= unorm_normalize(source
, -1, mode
, 0, NULL
, 0, &status2
);
183 if(neededLen
!=length2
) {
184 log_err("ERROR in unorm_normalize(%s)[%d]: "
185 "preflight length/NUL %d!=%d preflight length/srcLength\n",
186 modeStrings
[mode
], (int)x
, (int)neededLen
, (int)length2
);
188 if(status
==U_BUFFER_OVERFLOW_ERROR
)
192 length2
=unorm_normalize(source
, u_strlen(source
), mode
, 0, result
, LENGTHOF(result
), &status
);
193 if(U_FAILURE(status
) || neededLen
!=length2
) {
194 log_data_err("ERROR in unorm_normalize(%s/NUL) at %s: %s - (Are you missing data?)\n",
195 modeStrings
[mode
], austrdup(source
), myErrorName(status
));
197 assertEqual(result
, cases
[x
][expIndex
], x
);
199 length2
=unorm_normalize(source
, -1, mode
, 0, result
, LENGTHOF(result
), &status
);
200 if(U_FAILURE(status
) || neededLen
!=length2
) {
201 log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s: %s - (Are you missing data?)\n",
202 modeStrings
[mode
], austrdup(source
), myErrorName(status
));
204 assertEqual(result
, cases
[x
][expIndex
], x
);
211 TestNormCases(UNORM_NFD
, canonTests
, LENGTHOF(canonTests
));
214 void TestCompatDecomp() {
215 TestNormCases(UNORM_NFKD
, compatTests
, LENGTHOF(compatTests
));
218 void TestCanonDecompCompose() {
219 TestNormCases(UNORM_NFC
, canonTests
, LENGTHOF(canonTests
));
222 void TestCompatDecompCompose() {
223 TestNormCases(UNORM_NFKC
, compatTests
, LENGTHOF(compatTests
));
227 TestNormCases(UNORM_FCD
, fcdTests
, LENGTHOF(fcdTests
));
230 static void assertEqual(const UChar
* result
, const char* expected
, int32_t index
)
232 UChar
*expectedUni
= CharsToUChars(expected
);
233 if(u_strcmp(result
, expectedUni
)!=0){
234 log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index
, expected
,
240 static void TestNull_check(UChar
*src
, int32_t srcLen
,
241 UChar
*exp
, int32_t expLen
,
242 UNormalizationMode mode
,
245 UErrorCode status
= U_ZERO_ERROR
;
251 status
= U_ZERO_ERROR
;
258 len
= unorm_normalize(src
, srcLen
, mode
, 0, result
, 50, &status
);
260 if(U_FAILURE(status
)) {
261 log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missing data?)\n", name
, u_errorName(status
));
262 } else if (len
!= expLen
) {
263 log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name
, expLen
, len
);
268 if(exp
[i
] != result
[i
]) {
269 log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n",
276 log_verbose(" %d: \\u%04X\n", i
, result
[i
]);
280 log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name
);
286 UChar source_comp
[] = { 0x0061, 0x0000, 0x0044, 0x0307 };
287 int32_t source_comp_len
= 4;
288 UChar expect_comp
[] = { 0x0061, 0x0000, 0x1e0a };
289 int32_t expect_comp_len
= 3;
291 UChar source_dcmp
[] = { 0x1e0A, 0x0000, 0x0929 };
292 int32_t source_dcmp_len
= 3;
293 UChar expect_dcmp
[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C };
294 int32_t expect_dcmp_len
= 5;
296 TestNull_check(source_comp
,
303 TestNull_check(source_dcmp
,
310 TestNull_check(source_comp
,
320 static void TestQuickCheckResultNO()
322 const UChar CPNFD
[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
323 0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
324 const UChar CPNFC
[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
325 0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
326 const UChar CPNFKD
[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
327 0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
328 const UChar CPNFKC
[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
329 0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
335 UErrorCode error
= U_ZERO_ERROR
;
337 for (; count
< SIZE
; count
++)
339 if (unorm_quickCheck(&(CPNFD
[count
]), 1, UNORM_NFD
, &error
) !=
342 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD
[count
]);
345 if (unorm_quickCheck(&(CPNFC
[count
]), 1, UNORM_NFC
, &error
) !=
348 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC
[count
]);
351 if (unorm_quickCheck(&(CPNFKD
[count
]), 1, UNORM_NFKD
, &error
) !=
354 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD
[count
]);
357 if (unorm_quickCheck(&(CPNFKC
[count
]), 1, UNORM_NFKC
, &error
) !=
360 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC
[count
]);
367 static void TestQuickCheckResultYES()
369 const UChar CPNFD
[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
370 0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
371 const UChar CPNFC
[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
372 0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
373 const UChar CPNFKD
[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
374 0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
375 const UChar CPNFKC
[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
376 0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
380 UErrorCode error
= U_ZERO_ERROR
;
385 if (unorm_quickCheck(&cp
, 1, UNORM_NFD
, &error
) != UNORM_YES
)
387 log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)\n", cp
);
390 if (unorm_quickCheck(&cp
, 1, UNORM_NFC
, &error
) !=
393 log_err("ERROR in NFC quick check at U+%04x\n", cp
);
396 if (unorm_quickCheck(&cp
, 1, UNORM_NFKD
, &error
) != UNORM_YES
)
398 log_err("ERROR in NFKD quick check at U+%04x\n", cp
);
401 if (unorm_quickCheck(&cp
, 1, UNORM_NFKC
, &error
) !=
404 log_err("ERROR in NFKC quick check at U+%04x\n", cp
);
410 for (; count
< SIZE
; count
++)
412 if (unorm_quickCheck(&(CPNFD
[count
]), 1, UNORM_NFD
, &error
) !=
415 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD
[count
]);
418 if (unorm_quickCheck(&(CPNFC
[count
]), 1, UNORM_NFC
, &error
)
421 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC
[count
]);
424 if (unorm_quickCheck(&(CPNFKD
[count
]), 1, UNORM_NFKD
, &error
) !=
427 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD
[count
]);
430 if (unorm_quickCheck(&(CPNFKC
[count
]), 1, UNORM_NFKC
, &error
) !=
433 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC
[count
]);
439 static void TestQuickCheckResultMAYBE()
441 const UChar CPNFC
[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
442 0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
443 const UChar CPNFKC
[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
444 0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
450 UErrorCode error
= U_ZERO_ERROR
;
452 /* NFD and NFKD does not have any MAYBE codepoints */
453 for (; count
< SIZE
; count
++)
455 if (unorm_quickCheck(&(CPNFC
[count
]), 1, UNORM_NFC
, &error
) !=
458 log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)\n", CPNFC
[count
]);
461 if (unorm_quickCheck(&(CPNFKC
[count
]), 1, UNORM_NFKC
, &error
) !=
464 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC
[count
]);
470 static void TestQuickCheckStringResult()
475 UErrorCode error
= U_ZERO_ERROR
;
477 for (count
= 0; count
< LENGTHOF(canonTests
); count
++)
479 d
= CharsToUChars(canonTests
[count
][1]);
480 c
= CharsToUChars(canonTests
[count
][2]);
481 if (unorm_quickCheck(d
, u_strlen(d
), UNORM_NFD
, &error
) !=
484 log_data_err("ERROR in NFD quick check for string at count %d - (Are you missing data?)\n", count
);
488 if (unorm_quickCheck(c
, u_strlen(c
), UNORM_NFC
, &error
) ==
491 log_err("ERROR in NFC quick check for string at count %d\n", count
);
499 for (count
= 0; count
< LENGTHOF(compatTests
); count
++)
501 d
= CharsToUChars(compatTests
[count
][1]);
502 c
= CharsToUChars(compatTests
[count
][2]);
503 if (unorm_quickCheck(d
, u_strlen(d
), UNORM_NFKD
, &error
) !=
506 log_err("ERROR in NFKD quick check for string at count %d\n", count
);
510 if (unorm_quickCheck(c
, u_strlen(c
), UNORM_NFKC
, &error
) !=
513 log_err("ERROR in NFKC quick check for string at count %d\n", count
);
522 void TestQuickCheck()
524 TestQuickCheckResultNO();
525 TestQuickCheckResultYES();
526 TestQuickCheckResultMAYBE();
527 TestQuickCheckStringResult();
531 * The intltest/NormalizerConformanceTest tests a lot of strings that _are_
532 * normalized, and some that are not.
533 * Here we pick some specific cases and test the C API.
535 static void TestIsNormalized(void) {
536 static const UChar notNFC
[][8]={ /* strings that are not in NFC */
537 { 0x62, 0x61, 0x300, 0x63, 0 }, /* 0061 0300 compose */
538 { 0xfb1d, 0 }, /* excluded from composition */
539 { 0x0627, 0x0653, 0 }, /* 0627 0653 compose */
540 { 0x3071, 0x306f, 0x309a, 0x3073, 0 } /* 306F 309A compose */
542 static const UChar notNFKC
[][8]={ /* strings that are not in NFKC */
543 { 0x1100, 0x1161, 0 }, /* Jamo compose */
544 { 0x1100, 0x314f, 0 }, /* compatibility Jamo compose */
545 { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 } /* 1F00 0345 compose */
549 UErrorCode errorCode
;
553 /* normal case with length>=0 (length -1 used for special cases below) */
554 errorCode
=U_ZERO_ERROR
;
555 if(!unorm_isNormalized(notNFC
[0]+2, 1, UNORM_NFC
, &errorCode
) || U_FAILURE(errorCode
)) {
556 log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
559 /* incoming U_FAILURE */
560 errorCode
=U_TRUNCATED_CHAR_FOUND
;
561 (void)unorm_isNormalized(notNFC
[0]+2, 1, UNORM_NFC
, &errorCode
);
562 if(errorCode
!=U_TRUNCATED_CHAR_FOUND
) {
563 log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode
));
567 errorCode
=U_ZERO_ERROR
;
568 (void)unorm_isNormalized(NULL
, 1, UNORM_NFC
, &errorCode
);
569 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
570 log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode
));
574 errorCode
=U_ZERO_ERROR
;
575 (void)unorm_isNormalized(notNFC
[0]+2, -2, UNORM_NFC
, &errorCode
);
576 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
577 log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode
));
581 for(i
=0; i
<LENGTHOF(notNFC
); ++i
) {
582 errorCode
=U_ZERO_ERROR
;
583 if(unorm_isNormalized(notNFC
[i
], -1, UNORM_NFC
, &errorCode
) || U_FAILURE(errorCode
)) {
584 log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (Are you missing data?)\n", i
, u_errorName(errorCode
));
586 errorCode
=U_ZERO_ERROR
;
587 if(unorm_isNormalized(notNFC
[i
], -1, UNORM_NFKC
, &errorCode
) || U_FAILURE(errorCode
)) {
588 log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i
, u_errorName(errorCode
));
591 for(i
=0; i
<LENGTHOF(notNFKC
); ++i
) {
592 errorCode
=U_ZERO_ERROR
;
593 if(unorm_isNormalized(notNFKC
[i
], -1, UNORM_NFKC
, &errorCode
) || U_FAILURE(errorCode
)) {
594 log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i
, u_errorName(errorCode
));
601 UErrorCode status
= U_ZERO_ERROR
;
602 static const UChar FAST_
[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
604 static const UChar FALSE_
[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
605 0x02B9, 0x0314, 0x0315, 0x0316};
606 static const UChar TRUE_
[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
607 0x0050, 0x0730, 0x09EE, 0x1E10};
609 static const UChar datastr
[][5] =
610 { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
611 {0x0061, 0x030A, 0x00E2, 0x0323, 0},
612 {0x0061, 0x0323, 0x00E2, 0x0323, 0},
613 {0x0061, 0x0323, 0x1E05, 0x0302, 0} };
614 static const UBool result
[] = {UNORM_YES
, UNORM_NO
, UNORM_NO
, UNORM_YES
};
616 static const UChar datachar
[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
618 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
620 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
621 0x0307, 0x0308, 0x0309, 0x030a,
622 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
623 0x0327, 0x0328, 0x0329, 0x032a,
624 0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
625 0x1e07, 0x1e08, 0x1e09, 0x1e0a};
629 if (unorm_quickCheck(FAST_
, 10, UNORM_FCD
, &status
) != UNORM_YES
)
630 log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
631 if (unorm_quickCheck(FALSE_
, 10, UNORM_FCD
, &status
) != UNORM_NO
)
632 log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n");
633 if (unorm_quickCheck(TRUE_
, 10, UNORM_FCD
, &status
) != UNORM_YES
)
634 log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
636 if (U_FAILURE(status
))
637 log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n", u_errorName(status
));
641 UBool fcdresult
= unorm_quickCheck(datastr
[count
], 4, UNORM_FCD
, &status
);
642 if (U_FAILURE(status
)) {
643 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data set %d - (Are you missing data?)\n", count
);
647 if (result
[count
] != fcdresult
) {
648 log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count
,
655 /* random checks of long strings */
656 status
= U_ZERO_ERROR
;
657 srand((unsigned)time( NULL
));
659 for (count
= 0; count
< 50; count
++)
662 UBool testresult
= UNORM_YES
;
670 data
[size
] = datachar
[(rand() * 50) / RAND_MAX
];
671 log_verbose("0x%x", data
[size
]);
672 normsize
+= unorm_normalize(data
+ size
, 1, UNORM_NFD
, 0,
673 norm
+ normsize
, 100 - normsize
, &status
);
674 if (U_FAILURE(status
)) {
675 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data generation - (Are you missing data?)\n");
682 nfdsize
= unorm_normalize(data
, size
, UNORM_NFD
, 0,
684 if (U_FAILURE(status
)) {
685 log_data_err("unorm_quickCheck(FCD) failed: exception occured at normalized data generation - (Are you missing data?)\n");
688 if (nfdsize
!= normsize
|| u_memcmp(nfd
, norm
, nfdsize
) != 0) {
689 testresult
= UNORM_NO
;
691 if (testresult
== UNORM_YES
) {
692 log_verbose("result UNORM_YES\n");
695 log_verbose("result UNORM_NO\n");
698 if (unorm_quickCheck(data
, size
, UNORM_FCD
, &status
) != testresult
|| U_FAILURE(status
)) {
699 log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data - (Are you missing data?)\n", testresult
);
706 static const UChar in
[]={ 0x68, 0xe4 };
707 UChar out
[20]={ 0xffff, 0xffff, 0xffff, 0xffff };
708 UErrorCode errorCode
;
711 /* try preflighting */
712 errorCode
=U_ZERO_ERROR
;
713 length
=unorm_normalize(in
, 2, UNORM_NFD
, 0, NULL
, 0, &errorCode
);
714 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=3) {
715 log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
719 errorCode
=U_ZERO_ERROR
;
720 length
=unorm_normalize(in
, 2, UNORM_NFD
, 0, out
, 3, &errorCode
);
721 if(U_FAILURE(errorCode
)) {
722 log_err("unorm_normalize(NFD)=%ld failed with %s\n", length
, u_errorName(errorCode
));
725 if(length
!=3 || out
[2]!=0x308 || out
[3]!=0xffff) {
726 log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length
, out
[0], out
[1], out
[2], out
[3]);
729 length
=unorm_normalize(NULL
, 0, UNORM_NFC
, 0, NULL
, 0, &errorCode
);
730 if(U_FAILURE(errorCode
)) {
731 log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with %s\n", (long)length
, u_errorName(errorCode
));
734 length
=unorm_normalize(NULL
, 0, UNORM_NFC
, 0, out
, 20, &errorCode
);
735 if(U_FAILURE(errorCode
)) {
736 log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with %s\n", (long)length
, u_errorName(errorCode
));
741 /* test cases to improve test code coverage */
743 HANGUL_K_KIYEOK
=0x3131, /* NFKD->Jamo L U+1100 */
744 HANGUL_K_WEO
=0x315d, /* NFKD->Jamo V U+116f */
745 HANGUL_K_KIYEOK_SIOS
=0x3133, /* NFKD->Jamo T U+11aa */
747 HANGUL_KIYEOK
=0x1100, /* Jamo L U+1100 */
748 HANGUL_WEO
=0x116f, /* Jamo V U+116f */
749 HANGUL_KIYEOK_SIOS
=0x11aa, /* Jamo T U+11aa */
751 HANGUL_AC00
=0xac00, /* Hangul syllable = Jamo LV U+ac00 */
752 HANGUL_SYLLABLE
=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */
754 MUSICAL_VOID_NOTEHEAD
=0x1d157,
755 MUSICAL_HALF_NOTE
=0x1d15e, /* NFC/NFD->Notehead+Stem */
756 MUSICAL_STEM
=0x1d165, /* cc=216 */
757 MUSICAL_STACCATO
=0x1d17c /* cc=220 */
762 UChar input
[1000], expect
[1000], output
[1000];
763 UErrorCode errorCode
;
764 int32_t i
, length
, inLength
, expectLength
, hangulPrefixLength
, preflightLength
;
766 /* create a long and nasty string with NFKC-unsafe characters */
769 /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */
770 input
[inLength
++]=HANGUL_KIYEOK
;
771 input
[inLength
++]=HANGUL_WEO
;
772 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
774 input
[inLength
++]=HANGUL_KIYEOK
;
775 input
[inLength
++]=HANGUL_WEO
;
776 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
778 input
[inLength
++]=HANGUL_KIYEOK
;
779 input
[inLength
++]=HANGUL_K_WEO
;
780 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
782 input
[inLength
++]=HANGUL_KIYEOK
;
783 input
[inLength
++]=HANGUL_K_WEO
;
784 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
786 input
[inLength
++]=HANGUL_K_KIYEOK
;
787 input
[inLength
++]=HANGUL_WEO
;
788 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
790 input
[inLength
++]=HANGUL_K_KIYEOK
;
791 input
[inLength
++]=HANGUL_WEO
;
792 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
794 input
[inLength
++]=HANGUL_K_KIYEOK
;
795 input
[inLength
++]=HANGUL_K_WEO
;
796 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
798 input
[inLength
++]=HANGUL_K_KIYEOK
;
799 input
[inLength
++]=HANGUL_K_WEO
;
800 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
802 /* Hangul LV with normal/compatibility Jamo T */
803 input
[inLength
++]=HANGUL_AC00
;
804 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
806 input
[inLength
++]=HANGUL_AC00
;
807 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
809 /* compatibility Jamo L, V */
810 input
[inLength
++]=HANGUL_K_KIYEOK
;
811 input
[inLength
++]=HANGUL_K_WEO
;
813 hangulPrefixLength
=inLength
;
815 input
[inLength
++]=U16_LEAD(MUSICAL_HALF_NOTE
);
816 input
[inLength
++]=U16_TRAIL(MUSICAL_HALF_NOTE
);
817 for(i
=0; i
<200; ++i
) {
818 input
[inLength
++]=U16_LEAD(MUSICAL_STACCATO
);
819 input
[inLength
++]=U16_TRAIL(MUSICAL_STACCATO
);
820 input
[inLength
++]=U16_LEAD(MUSICAL_STEM
);
821 input
[inLength
++]=U16_TRAIL(MUSICAL_STEM
);
824 /* (compatibility) Jamo L, T do not compose */
825 input
[inLength
++]=HANGUL_K_KIYEOK
;
826 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
829 errorCode
=U_ZERO_ERROR
;
830 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_NFD
, &errorCode
) || U_FAILURE(errorCode
)) {
831 log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
833 errorCode
=U_ZERO_ERROR
;
834 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_NFKD
, &errorCode
) || U_FAILURE(errorCode
)) {
835 log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
837 errorCode
=U_ZERO_ERROR
;
838 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_NFC
, &errorCode
) || U_FAILURE(errorCode
)) {
839 log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
841 errorCode
=U_ZERO_ERROR
;
842 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_NFKC
, &errorCode
) || U_FAILURE(errorCode
)) {
843 log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
845 errorCode
=U_ZERO_ERROR
;
846 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_FCD
, &errorCode
) || U_FAILURE(errorCode
)) {
847 log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
852 expect
[expectLength
++]=HANGUL_SYLLABLE
;
854 expect
[expectLength
++]=HANGUL_SYLLABLE
;
856 expect
[expectLength
++]=HANGUL_SYLLABLE
;
858 expect
[expectLength
++]=HANGUL_SYLLABLE
;
860 expect
[expectLength
++]=HANGUL_SYLLABLE
;
862 expect
[expectLength
++]=HANGUL_SYLLABLE
;
864 expect
[expectLength
++]=HANGUL_SYLLABLE
;
866 expect
[expectLength
++]=HANGUL_SYLLABLE
;
868 expect
[expectLength
++]=HANGUL_AC00
+3;
870 expect
[expectLength
++]=HANGUL_AC00
+3;
872 expect
[expectLength
++]=HANGUL_AC00
+14*28;
874 expect
[expectLength
++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD
);
875 expect
[expectLength
++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD
);
876 expect
[expectLength
++]=U16_LEAD(MUSICAL_STEM
);
877 expect
[expectLength
++]=U16_TRAIL(MUSICAL_STEM
);
878 for(i
=0; i
<200; ++i
) {
879 expect
[expectLength
++]=U16_LEAD(MUSICAL_STEM
);
880 expect
[expectLength
++]=U16_TRAIL(MUSICAL_STEM
);
882 for(i
=0; i
<200; ++i
) {
883 expect
[expectLength
++]=U16_LEAD(MUSICAL_STACCATO
);
884 expect
[expectLength
++]=U16_TRAIL(MUSICAL_STACCATO
);
887 expect
[expectLength
++]=HANGUL_KIYEOK
;
888 expect
[expectLength
++]=HANGUL_KIYEOK_SIOS
;
890 /* try destination overflow first */
891 errorCode
=U_ZERO_ERROR
;
892 preflightLength
=unorm_normalize(input
, inLength
,
894 output
, 100, /* too short */
896 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
) {
897 log_data_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCode
));
901 errorCode
=U_ZERO_ERROR
;
902 length
=unorm_normalize(input
, inLength
,
904 output
, sizeof(output
)/U_SIZEOF_UCHAR
,
906 if(U_FAILURE(errorCode
)) {
907 log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s - (Are you missing data?)\n", u_errorName(errorCode
));
908 } else if(length
!=expectLength
|| u_memcmp(output
, expect
, length
)!=0) {
909 log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n");
910 for(i
=0; i
<length
; ++i
) {
911 if(output
[i
]!=expect
[i
]) {
912 log_err(" NFKC[%d]==U+%04lx expected U+%04lx\n", i
, output
[i
], expect
[i
]);
917 if(length
!=preflightLength
) {
918 log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length
, preflightLength
);
922 u_memcpy(expect
, input
, hangulPrefixLength
);
923 expectLength
=hangulPrefixLength
;
925 expect
[expectLength
++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD
);
926 expect
[expectLength
++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD
);
927 expect
[expectLength
++]=U16_LEAD(MUSICAL_STEM
);
928 expect
[expectLength
++]=U16_TRAIL(MUSICAL_STEM
);
929 for(i
=0; i
<200; ++i
) {
930 expect
[expectLength
++]=U16_LEAD(MUSICAL_STEM
);
931 expect
[expectLength
++]=U16_TRAIL(MUSICAL_STEM
);
933 for(i
=0; i
<200; ++i
) {
934 expect
[expectLength
++]=U16_LEAD(MUSICAL_STACCATO
);
935 expect
[expectLength
++]=U16_TRAIL(MUSICAL_STACCATO
);
938 expect
[expectLength
++]=HANGUL_K_KIYEOK
;
939 expect
[expectLength
++]=HANGUL_K_KIYEOK_SIOS
;
941 errorCode
=U_ZERO_ERROR
;
942 length
=unorm_normalize(input
, inLength
,
944 output
, sizeof(output
)/U_SIZEOF_UCHAR
,
946 if(U_FAILURE(errorCode
)) {
947 log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %s - (Are you missing data?)\n", u_errorName(errorCode
));
948 } else if(length
!=expectLength
|| u_memcmp(output
, expect
, length
)!=0) {
949 log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n");
950 for(i
=0; i
<length
; ++i
) {
951 if(output
[i
]!=expect
[i
]) {
952 log_err(" FCD[%d]==U+%04lx expected U+%04lx\n", i
, output
[i
], expect
[i
]);
959 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */
961 TestConcatenate(void) {
968 0x301, 0x73, 0x75, 0x6d, 0xe9, 0
971 0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0
975 UErrorCode errorCode
;
978 /* left with length, right NUL-terminated */
979 errorCode
=U_ZERO_ERROR
;
980 length
=unorm_concatenate(left
, 2, right
, -1, buffer
, 100, UNORM_NFC
, 0, &errorCode
);
981 if(U_FAILURE(errorCode
) || length
!=6 || 0!=u_memcmp(buffer
, expect
, length
)) {
982 log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
986 errorCode
=U_ZERO_ERROR
;
987 length
=unorm_concatenate(left
, 2, right
, -1, NULL
, 0, UNORM_NFC
, 0, &errorCode
);
988 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=6) {
989 log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
993 errorCode
=U_ZERO_ERROR
;
994 length
=unorm_concatenate(left
, 2, right
, -1, buffer
, 1, UNORM_NFC
, 0, &errorCode
);
995 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=6 || buffer
[2]!=0x5555) {
996 log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
999 /* enter with U_FAILURE */
1001 errorCode
=U_UNEXPECTED_TOKEN
;
1002 length
=unorm_concatenate(left
, 2, right
, -1, buffer
, 100, UNORM_NFC
, 0, &errorCode
);
1003 if(errorCode
!=U_UNEXPECTED_TOKEN
|| buffer
[2]!=0xaaaa) {
1004 log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length
, u_errorName(errorCode
));
1007 /* illegal arguments */
1009 errorCode
=U_ZERO_ERROR
;
1010 length
=unorm_concatenate(NULL
, 2, right
, -1, buffer
, 100, UNORM_NFC
, 0, &errorCode
);
1011 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
|| buffer
[2]!=0xaaaa) {
1012 log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
1015 errorCode
=U_ZERO_ERROR
;
1016 length
=unorm_concatenate(left
, 2, right
, -1, NULL
, 100, UNORM_NFC
, 0, &errorCode
);
1017 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1018 log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
1026 static const char *const _modeString
[UNORM_MODE_COUNT
]={
1027 "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD"
1031 _testIter(const UChar
*src
, int32_t srcLength
,
1032 UCharIterator
*iter
, UNormalizationMode mode
, UBool forward
,
1033 const UChar
*out
, int32_t outLength
,
1034 const int32_t *srcIndexes
, int32_t srcIndexesLength
) {
1036 const UChar
*expect
, *outLimit
, *in
;
1037 int32_t length
, i
, expectLength
, expectIndex
, prevIndex
, index
, inLength
;
1038 UErrorCode errorCode
;
1039 UBool neededToNormalize
, expectNeeded
;
1041 errorCode
=U_ZERO_ERROR
;
1042 outLimit
=out
+outLength
;
1048 i
=srcIndexesLength
-2;
1055 if(!iter
->hasNext(iter
)) {
1058 length
=unorm_next(iter
,
1059 buffer
, sizeof(buffer
)/U_SIZEOF_UCHAR
,
1061 (UBool
)(out
!=NULL
), &neededToNormalize
,
1063 expectIndex
=srcIndexes
[i
+1];
1065 inLength
=expectIndex
-prevIndex
;
1068 /* get output piece from between plus signs */
1070 while((expect
+expectLength
)!=outLimit
&& expect
[expectLength
]!=_PLUS
) {
1073 expectNeeded
=(UBool
)(0!=u_memcmp(buffer
, in
, inLength
));
1076 expectLength
=inLength
;
1080 if(!iter
->hasPrevious(iter
)) {
1083 length
=unorm_previous(iter
,
1084 buffer
, sizeof(buffer
)/U_SIZEOF_UCHAR
,
1086 (UBool
)(out
!=NULL
), &neededToNormalize
,
1088 expectIndex
=srcIndexes
[i
];
1090 inLength
=prevIndex
-expectIndex
;
1093 /* get output piece from between plus signs */
1095 while(expect
!=out
&& expect
[-1]!=_PLUS
) {
1099 expectNeeded
=(UBool
)(0!=u_memcmp(buffer
, in
, inLength
));
1102 expectLength
=inLength
;
1106 index
=iter
->getIndex(iter
, UITER_CURRENT
);
1108 if(U_FAILURE(errorCode
)) {
1109 log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s - (Are you missing data?)\n",
1110 forward
, _modeString
[mode
], i
, u_errorName(errorCode
));
1113 if(expectIndex
!=index
) {
1114 log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n",
1115 forward
, _modeString
[mode
], i
, index
, expectIndex
);
1118 if(expectLength
!=length
) {
1119 log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n",
1120 forward
, _modeString
[mode
], i
, length
, expectLength
);
1123 if(0!=u_memcmp(expect
, buffer
, length
)) {
1124 log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n",
1125 forward
, _modeString
[mode
], i
);
1128 if(neededToNormalize
!=expectNeeded
) {
1132 expect
+=expectLength
+1; /* go after the + */
1135 --expect
; /* go before the + */
1142 TestNextPrevious() {
1144 src
[]={ /* input string */
1145 0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133
1147 nfd
[]={ /* + separates expected output pieces */
1148 0xa0, _PLUS
, 0x61, 0x308, _PLUS
, 0x63, 0x327, 0x302, _PLUS
, 0x1100, 0x1161, _PLUS
, 0x3133
1151 0x20, _PLUS
, 0x61, 0x308, _PLUS
, 0x63, 0x327, 0x302, _PLUS
, 0x1100, 0x1161, _PLUS
, 0x11aa
1154 0xa0, _PLUS
, 0xe4, _PLUS
, 0xe7, 0x302, _PLUS
, 0xac00, _PLUS
, 0x3133
1157 0x20, _PLUS
, 0xe4, _PLUS
, 0xe7, 0x302, _PLUS
, 0xac03
1160 0xa0, _PLUS
, 0xe4, _PLUS
, 0x63, 0x327, 0x302, _PLUS
, 0xac00, _PLUS
, 0x3133
1163 /* expected iterator indexes in the source string for each iteration piece */
1164 static const int32_t
1186 UBool neededToNormalize
;
1187 UErrorCode errorCode
;
1189 uiter_setString(&iter
, src
, sizeof(src
)/U_SIZEOF_UCHAR
);
1191 /* test iteration with doNormalize */
1193 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFD
, TRUE
, nfd
, sizeof(nfd
)/U_SIZEOF_UCHAR
, nfdIndexes
, sizeof(nfdIndexes
)/4);
1195 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKD
, TRUE
, nfkd
, sizeof(nfkd
)/U_SIZEOF_UCHAR
, nfkdIndexes
, sizeof(nfkdIndexes
)/4);
1197 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFC
, TRUE
, nfc
, sizeof(nfc
)/U_SIZEOF_UCHAR
, nfcIndexes
, sizeof(nfcIndexes
)/4);
1199 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKC
, TRUE
, nfkc
, sizeof(nfkc
)/U_SIZEOF_UCHAR
, nfkcIndexes
, sizeof(nfkcIndexes
)/4);
1201 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_FCD
, TRUE
, fcd
, sizeof(fcd
)/U_SIZEOF_UCHAR
, fcdIndexes
, sizeof(fcdIndexes
)/4);
1203 iter
.index
=iter
.length
;
1204 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFD
, FALSE
, nfd
, sizeof(nfd
)/U_SIZEOF_UCHAR
, nfdIndexes
, sizeof(nfdIndexes
)/4);
1205 iter
.index
=iter
.length
;
1206 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKD
, FALSE
, nfkd
, sizeof(nfkd
)/U_SIZEOF_UCHAR
, nfkdIndexes
, sizeof(nfkdIndexes
)/4);
1207 iter
.index
=iter
.length
;
1208 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFC
, FALSE
, nfc
, sizeof(nfc
)/U_SIZEOF_UCHAR
, nfcIndexes
, sizeof(nfcIndexes
)/4);
1209 iter
.index
=iter
.length
;
1210 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKC
, FALSE
, nfkc
, sizeof(nfkc
)/U_SIZEOF_UCHAR
, nfkcIndexes
, sizeof(nfkcIndexes
)/4);
1211 iter
.index
=iter
.length
;
1212 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_FCD
, FALSE
, fcd
, sizeof(fcd
)/U_SIZEOF_UCHAR
, fcdIndexes
, sizeof(fcdIndexes
)/4);
1214 /* test iteration without doNormalize */
1216 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFD
, TRUE
, NULL
, 0, nfdIndexes
, sizeof(nfdIndexes
)/4);
1218 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKD
, TRUE
, NULL
, 0, nfkdIndexes
, sizeof(nfkdIndexes
)/4);
1220 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFC
, TRUE
, NULL
, 0, nfcIndexes
, sizeof(nfcIndexes
)/4);
1222 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKC
, TRUE
, NULL
, 0, nfkcIndexes
, sizeof(nfkcIndexes
)/4);
1224 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_FCD
, TRUE
, NULL
, 0, fcdIndexes
, sizeof(fcdIndexes
)/4);
1226 iter
.index
=iter
.length
;
1227 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFD
, FALSE
, NULL
, 0, nfdIndexes
, sizeof(nfdIndexes
)/4);
1228 iter
.index
=iter
.length
;
1229 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKD
, FALSE
, NULL
, 0, nfkdIndexes
, sizeof(nfkdIndexes
)/4);
1230 iter
.index
=iter
.length
;
1231 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFC
, FALSE
, NULL
, 0, nfcIndexes
, sizeof(nfcIndexes
)/4);
1232 iter
.index
=iter
.length
;
1233 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKC
, FALSE
, NULL
, 0, nfkcIndexes
, sizeof(nfkcIndexes
)/4);
1234 iter
.index
=iter
.length
;
1235 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_FCD
, FALSE
, NULL
, 0, fcdIndexes
, sizeof(fcdIndexes
)/4);
1237 /* try without neededToNormalize */
1238 errorCode
=U_ZERO_ERROR
;
1241 length
=unorm_next(&iter
, buffer
, sizeof(buffer
)/U_SIZEOF_UCHAR
,
1242 UNORM_NFD
, 0, TRUE
, NULL
,
1244 if(U_FAILURE(errorCode
) || length
!=2 || buffer
[0]!=nfd
[2] || buffer
[1]!=nfd
[3]) {
1245 log_data_err("error unorm_next(without needed) %s - (Are you missing data?)\n", u_errorName(errorCode
));
1250 neededToNormalize
=9;
1252 length
=unorm_next(&iter
, NULL
, 0,
1253 UNORM_NFD
, 0, TRUE
, &neededToNormalize
,
1255 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| neededToNormalize
!=FALSE
|| length
!=2) {
1256 log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode
));
1260 errorCode
=U_ZERO_ERROR
;
1261 buffer
[0]=buffer
[1]=5;
1262 neededToNormalize
=9;
1264 length
=unorm_next(&iter
, buffer
, 1,
1265 UNORM_NFD
, 0, TRUE
, &neededToNormalize
,
1267 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| neededToNormalize
!=FALSE
|| length
!=2 || buffer
[1]!=5) {
1268 log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode
));
1273 errorCode
=U_ZERO_ERROR
;
1274 buffer
[0]=buffer
[1]=5;
1275 neededToNormalize
=9;
1277 length
=unorm_next(NULL
, buffer
, sizeof(buffer
)/U_SIZEOF_UCHAR
,
1278 UNORM_NFD
, 0, TRUE
, &neededToNormalize
,
1280 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1281 log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode
));
1286 buffer
[0]=buffer
[1]=5;
1287 neededToNormalize
=9;
1289 length
=unorm_next(&iter
, buffer
, sizeof(buffer
)/U_SIZEOF_UCHAR
,
1290 (UNormalizationMode
)0, 0, TRUE
, &neededToNormalize
,
1292 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1293 log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode
));
1297 /* error coming in */
1298 errorCode
=U_MISPLACED_QUANTIFIER
;
1301 length
=unorm_next(&iter
, buffer
, sizeof(buffer
)/U_SIZEOF_UCHAR
,
1302 UNORM_NFD
, 0, TRUE
, NULL
,
1304 if(errorCode
!=U_MISPLACED_QUANTIFIER
) {
1305 log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode
));
1311 TestFCNFKCClosure(void) {
1312 static const struct {
1318 { 0x037A, { 0x0020, 0x03B9, 0 } },
1319 { 0x03D2, { 0x03C5, 0 } },
1320 { 0x20A8, { 0x0072, 0x0073, 0 } },
1321 { 0x210B, { 0x0068, 0 } },
1322 { 0x210C, { 0x0068, 0 } },
1323 { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } },
1324 { 0x2122, { 0x0074, 0x006D, 0 } },
1325 { 0x2128, { 0x007A, 0 } },
1326 { 0x1D5DB, { 0x0068, 0 } },
1327 { 0x1D5ED, { 0x007A, 0 } },
1332 UErrorCode errorCode
;
1335 for(i
=0; i
<LENGTHOF(tests
); ++i
) {
1336 errorCode
=U_ZERO_ERROR
;
1337 length
=u_getFC_NFKC_Closure(tests
[i
].c
, buffer
, LENGTHOF(buffer
), &errorCode
);
1338 if(U_FAILURE(errorCode
) || length
!=u_strlen(buffer
) || 0!=u_strcmp(tests
[i
].s
, buffer
)) {
1339 log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you missing data?)\n", tests
[i
].c
, u_errorName(errorCode
));
1343 /* error handling */
1344 errorCode
=U_ZERO_ERROR
;
1345 length
=u_getFC_NFKC_Closure(0x5c, NULL
, LENGTHOF(buffer
), &errorCode
);
1346 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1347 log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode
));
1350 length
=u_getFC_NFKC_Closure(0x5c, buffer
, LENGTHOF(buffer
), &errorCode
);
1351 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1352 log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode
));
1357 TestQuickCheckPerCP() {
1358 UErrorCode errorCode
;
1359 UChar32 c
, lead
, trail
;
1360 UChar s
[U16_MAX_LENGTH
], nfd
[16];
1361 int32_t length
, lccc1
, lccc2
, tccc1
, tccc2
;
1365 u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK
)!=(int32_t)UNORM_YES
||
1366 u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK
)!=(int32_t)UNORM_YES
||
1367 u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK
)!=(int32_t)UNORM_MAYBE
||
1368 u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK
)!=(int32_t)UNORM_MAYBE
||
1369 u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS
)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS
) ||
1370 u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS
)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS
)
1372 log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n");
1376 * compare the quick check property values for some code points
1377 * to the quick check results for checking same-code point strings
1379 errorCode
=U_ZERO_ERROR
;
1383 U16_APPEND_UNSAFE(s
, length
, c
);
1385 qc1
=u_getIntPropertyValue(c
, UCHAR_NFC_QUICK_CHECK
);
1386 qc2
=unorm_quickCheck(s
, length
, UNORM_NFC
, &errorCode
);
1388 log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x - (Are you missing data?)\n", qc1
, qc2
, c
);
1391 qc1
=u_getIntPropertyValue(c
, UCHAR_NFD_QUICK_CHECK
);
1392 qc2
=unorm_quickCheck(s
, length
, UNORM_NFD
, &errorCode
);
1394 log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x - (Are you missing data?)\n", qc1
, qc2
, c
);
1397 qc1
=u_getIntPropertyValue(c
, UCHAR_NFKC_QUICK_CHECK
);
1398 qc2
=unorm_quickCheck(s
, length
, UNORM_NFKC
, &errorCode
);
1400 log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x - (Are you missing data?)\n", qc1
, qc2
, c
);
1403 qc1
=u_getIntPropertyValue(c
, UCHAR_NFKD_QUICK_CHECK
);
1404 qc2
=unorm_quickCheck(s
, length
, UNORM_NFKD
, &errorCode
);
1406 log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x - (Are you missing data?)\n", qc1
, qc2
, c
);
1409 length
=unorm_normalize(s
, length
, UNORM_NFD
, 0, nfd
, LENGTHOF(nfd
), &errorCode
);
1410 /* length-length == 0 is used to get around a compiler warning. */
1411 U16_GET(nfd
, 0, length
-length
, length
, lead
);
1412 U16_GET(nfd
, 0, length
-1, length
, trail
);
1414 lccc1
=u_getIntPropertyValue(c
, UCHAR_LEAD_CANONICAL_COMBINING_CLASS
);
1415 lccc2
=u_getCombiningClass(lead
);
1416 tccc1
=u_getIntPropertyValue(c
, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS
);
1417 tccc2
=u_getCombiningClass(trail
);
1420 log_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
1424 log_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
1428 /* skip some code points */
1434 TestComposition(void) {
1435 static const struct {
1436 UNormalizationMode mode
;
1442 * special cases for UAX #15 bug
1443 * see Unicode Corrigendum #5: Normalization Idempotency
1444 * at http://unicode.org/versions/corrigendum5.html
1445 * (was Public Review Issue #29)
1447 { UNORM_NFC
, 0, { 0x1100, 0x0300, 0x1161, 0x0327 }, { 0x1100, 0x0300, 0x1161, 0x0327 } },
1448 { UNORM_NFC
, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } },
1449 { UNORM_NFC
, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 }, { 0xac00, 0x0327, 0x0300, 0x11a8 } },
1450 { UNORM_NFC
, 0, { 0x0b47, 0x0300, 0x0b3e }, { 0x0b47, 0x0300, 0x0b3e } },
1452 /* TODO: add test cases for UNORM_FCC here (j2151) */
1456 UErrorCode errorCode
;
1459 for(i
=0; i
<LENGTHOF(cases
); ++i
) {
1460 errorCode
=U_ZERO_ERROR
;
1461 length
=unorm_normalize(
1463 cases
[i
].mode
, cases
[i
].options
,
1464 output
, LENGTHOF(output
),
1466 if( U_FAILURE(errorCode
) ||
1467 length
!=u_strlen(cases
[i
].expect
) ||
1468 0!=u_memcmp(output
, cases
[i
].expect
, length
)
1470 log_data_err("unexpected result for case %d - (Are you missing data?)\n", i
);
1476 TestGetDecomposition() {
1480 UErrorCode errorCode
=U_ZERO_ERROR
;
1481 const UNormalizer2
*n2
=unorm2_getInstance(NULL
, "nfc", UNORM2_COMPOSE_CONTIGUOUS
, &errorCode
);
1482 if(U_FAILURE(errorCode
)) {
1483 log_err_status(errorCode
, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_errorName(errorCode
));
1487 length
=unorm2_getDecomposition(n2
, 0x20, decomp
, LENGTHOF(decomp
), &errorCode
);
1488 if(U_FAILURE(errorCode
) || length
>=0) {
1489 log_err("unorm2_getDecomposition(fcc, space) failed\n");
1491 errorCode
=U_ZERO_ERROR
;
1492 length
=unorm2_getDecomposition(n2
, 0xe4, decomp
, LENGTHOF(decomp
), &errorCode
);
1493 if(U_FAILURE(errorCode
) || length
!=2 || decomp
[0]!=0x61 || decomp
[1]!=0x308 || decomp
[2]!=0) {
1494 log_err("unorm2_getDecomposition(fcc, a-umlaut) failed\n");
1496 errorCode
=U_ZERO_ERROR
;
1497 length
=unorm2_getDecomposition(n2
, 0xac01, decomp
, LENGTHOF(decomp
), &errorCode
);
1498 if(U_FAILURE(errorCode
) || length
!=3 || decomp
[0]!=0x1100 || decomp
[1]!=0x1161 || decomp
[2]!=0x11a8 || decomp
[3]!=0) {
1499 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) failed\n");
1501 errorCode
=U_ZERO_ERROR
;
1502 length
=unorm2_getDecomposition(n2
, 0xac01, NULL
, 0, &errorCode
);
1503 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=3) {
1504 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) overflow failed\n");
1506 errorCode
=U_ZERO_ERROR
;
1507 length
=unorm2_getDecomposition(n2
, 0xac01, decomp
, -1, &errorCode
);
1508 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1509 log_err("unorm2_getDecomposition(fcc, capacity<0) failed\n");
1511 errorCode
=U_ZERO_ERROR
;
1512 length
=unorm2_getDecomposition(n2
, 0xac01, NULL
, 4, &errorCode
);
1513 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1514 log_err("unorm2_getDecomposition(fcc, decomposition=NULL) failed\n");
1519 TestGetRawDecomposition() {
1523 UErrorCode errorCode
=U_ZERO_ERROR
;
1524 const UNormalizer2
*n2
=unorm2_getNFKCInstance(&errorCode
);
1525 if(U_FAILURE(errorCode
)) {
1526 log_err_status(errorCode
, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode
));
1530 * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values,
1531 * without recursive decomposition.
1534 length
=unorm2_getRawDecomposition(n2
, 0x20, decomp
, LENGTHOF(decomp
), &errorCode
);
1535 if(U_FAILURE(errorCode
) || length
>=0) {
1536 log_err("unorm2_getDecomposition(nfkc, space) failed\n");
1538 errorCode
=U_ZERO_ERROR
;
1539 length
=unorm2_getRawDecomposition(n2
, 0xe4, decomp
, LENGTHOF(decomp
), &errorCode
);
1540 if(U_FAILURE(errorCode
) || length
!=2 || decomp
[0]!=0x61 || decomp
[1]!=0x308 || decomp
[2]!=0) {
1541 log_err("unorm2_getDecomposition(nfkc, a-umlaut) failed\n");
1543 /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */
1544 errorCode
=U_ZERO_ERROR
;
1545 length
=unorm2_getRawDecomposition(n2
, 0x1e08, decomp
, LENGTHOF(decomp
), &errorCode
);
1546 if(U_FAILURE(errorCode
) || length
!=2 || decomp
[0]!=0xc7 || decomp
[1]!=0x301 || decomp
[2]!=0) {
1547 log_err("unorm2_getDecomposition(nfkc, c-cedilla-acute) failed\n");
1549 /* U+212B ANGSTROM SIGN */
1550 errorCode
=U_ZERO_ERROR
;
1551 length
=unorm2_getRawDecomposition(n2
, 0x212b, decomp
, LENGTHOF(decomp
), &errorCode
);
1552 if(U_FAILURE(errorCode
) || length
!=1 || decomp
[0]!=0xc5 || decomp
[1]!=0) {
1553 log_err("unorm2_getDecomposition(nfkc, angstrom sign) failed\n");
1555 errorCode
=U_ZERO_ERROR
;
1556 length
=unorm2_getRawDecomposition(n2
, 0xac00, decomp
, LENGTHOF(decomp
), &errorCode
);
1557 if(U_FAILURE(errorCode
) || length
!=2 || decomp
[0]!=0x1100 || decomp
[1]!=0x1161 || decomp
[2]!=0) {
1558 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC00) failed\n");
1560 /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */
1561 errorCode
=U_ZERO_ERROR
;
1562 length
=unorm2_getRawDecomposition(n2
, 0xac01, decomp
, LENGTHOF(decomp
), &errorCode
);
1563 if(U_FAILURE(errorCode
) || length
!=2 || decomp
[0]!=0xac00 || decomp
[1]!=0x11a8 || decomp
[2]!=0) {
1564 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) failed\n");
1566 errorCode
=U_ZERO_ERROR
;
1567 length
=unorm2_getRawDecomposition(n2
, 0xac01, NULL
, 0, &errorCode
);
1568 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=2) {
1569 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) overflow failed\n");
1571 errorCode
=U_ZERO_ERROR
;
1572 length
=unorm2_getRawDecomposition(n2
, 0xac01, decomp
, -1, &errorCode
);
1573 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1574 log_err("unorm2_getDecomposition(nfkc, capacity<0) failed\n");
1576 errorCode
=U_ZERO_ERROR
;
1577 length
=unorm2_getRawDecomposition(n2
, 0xac01, NULL
, 4, &errorCode
);
1578 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1579 log_err("unorm2_getDecomposition(nfkc, decomposition=NULL) failed\n");
1584 TestAppendRestoreMiddle() {
1585 UChar a
[20]={ 0x61, 0x62, 0x63, 0x41, 0x327, 0 }; /* last chars are 'A' and 'cedilla' NFC */
1586 static const UChar b
[]={ 0x30A, 0x64, 0x65, 0x66, 0 }; /* first char is 'ring above' NFC */
1587 /* NFC: C5 is 'A with ring above' */
1588 static const UChar expected
[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 };
1590 UErrorCode errorCode
=U_ZERO_ERROR
;
1591 const UNormalizer2
*n2
=unorm2_getNFCInstance(&errorCode
);
1592 if(U_FAILURE(errorCode
)) {
1593 log_err_status(errorCode
, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode
));
1597 * Use length=-1 to fool the estimate of the ReorderingBuffer capacity.
1598 * Use a capacity of 6 or 7 so that the middle sequence <41 327 30A>
1599 * still fits into a[] but the full result still overflows this capacity.
1600 * (Let it modify the destination buffer before reallocating internally.)
1602 length
=unorm2_append(n2
, a
, -1, 6, b
, -1, &errorCode
);
1603 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=LENGTHOF(expected
)) {
1604 log_err("unorm2_append(preflight) returned wrong length of %d\n", (int)length
);
1607 /* Verify that the middle is unchanged or restored. (ICU ticket #7848) */
1608 if(a
[0]!=0x61 || a
[1]!=0x62 || a
[2]!=0x63 || a
[3]!=0x41 || a
[4]!=0x327 || a
[5]!=0) {
1609 log_err("unorm2_append(overflow) modified the first string\n");
1612 errorCode
=U_ZERO_ERROR
;
1613 length
=unorm2_append(n2
, a
, -1, LENGTHOF(a
), b
, -1, &errorCode
);
1614 if(U_FAILURE(errorCode
) || length
!=LENGTHOF(expected
) || 0!=u_memcmp(a
, expected
, length
)) {
1615 log_err("unorm2_append(real) failed - %s, length %d\n", u_errorName(errorCode
), (int)length
);
1621 TestGetEasyToUseInstance() {
1622 static const UChar in
[]={
1623 0xA0, /* -> <noBreak> 0020 */
1624 0xC7, 0x301 /* = 1E08 = 0043 0327 0301 */
1629 UErrorCode errorCode
=U_ZERO_ERROR
;
1630 const UNormalizer2
*n2
=unorm2_getNFCInstance(&errorCode
);
1631 if(U_FAILURE(errorCode
)) {
1632 log_err_status(errorCode
, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode
));
1635 length
=unorm2_normalize(n2
, in
, LENGTHOF(in
), out
, LENGTHOF(out
), &errorCode
);
1636 if(U_FAILURE(errorCode
) || length
!=2 || out
[0]!=0xa0 || out
[1]!=0x1e08) {
1637 log_err("unorm2_getNFCInstance() did not return an NFC instance (normalized length=%d; %s)\n",
1638 (int)length
, u_errorName(errorCode
));
1641 errorCode
=U_ZERO_ERROR
;
1642 n2
=unorm2_getNFDInstance(&errorCode
);
1643 if(U_FAILURE(errorCode
)) {
1644 log_err_status(errorCode
, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode
));
1647 length
=unorm2_normalize(n2
, in
, LENGTHOF(in
), out
, LENGTHOF(out
), &errorCode
);
1648 if(U_FAILURE(errorCode
) || length
!=4 || out
[0]!=0xa0 || out
[1]!=0x43 || out
[2]!=0x327 || out
[3]!=0x301) {
1649 log_err("unorm2_getNFDInstance() did not return an NFD instance (normalized length=%d; %s)\n",
1650 (int)length
, u_errorName(errorCode
));
1653 errorCode
=U_ZERO_ERROR
;
1654 n2
=unorm2_getNFKCInstance(&errorCode
);
1655 if(U_FAILURE(errorCode
)) {
1656 log_err_status(errorCode
, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode
));
1659 length
=unorm2_normalize(n2
, in
, LENGTHOF(in
), out
, LENGTHOF(out
), &errorCode
);
1660 if(U_FAILURE(errorCode
) || length
!=2 || out
[0]!=0x20 || out
[1]!=0x1e08) {
1661 log_err("unorm2_getNFKCInstance() did not return an NFKC instance (normalized length=%d; %s)\n",
1662 (int)length
, u_errorName(errorCode
));
1665 errorCode
=U_ZERO_ERROR
;
1666 n2
=unorm2_getNFKDInstance(&errorCode
);
1667 if(U_FAILURE(errorCode
)) {
1668 log_err_status(errorCode
, "unorm2_getNFKDInstance() failed: %s\n", u_errorName(errorCode
));
1671 length
=unorm2_normalize(n2
, in
, LENGTHOF(in
), out
, LENGTHOF(out
), &errorCode
);
1672 if(U_FAILURE(errorCode
) || length
!=4 || out
[0]!=0x20 || out
[1]!=0x43 || out
[2]!=0x327 || out
[3]!=0x301) {
1673 log_err("unorm2_getNFKDInstance() did not return an NFKD instance (normalized length=%d; %s)\n",
1674 (int)length
, u_errorName(errorCode
));
1677 errorCode
=U_ZERO_ERROR
;
1678 n2
=unorm2_getNFKCCasefoldInstance(&errorCode
);
1679 if(U_FAILURE(errorCode
)) {
1680 log_err_status(errorCode
, "unorm2_getNFKCCasefoldInstance() failed: %s\n", u_errorName(errorCode
));
1683 length
=unorm2_normalize(n2
, in
, LENGTHOF(in
), out
, LENGTHOF(out
), &errorCode
);
1684 if(U_FAILURE(errorCode
) || length
!=2 || out
[0]!=0x20 || out
[1]!=0x1e09) {
1685 log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefold instance (normalized length=%d; %s)\n",
1686 (int)length
, u_errorName(errorCode
));
1690 #endif /* #if !UCONFIG_NO_NORMALIZATION */