1 /********************************************************************
3 * Copyright (c) 1997-2011, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
10 * Modification History:
12 * Madhu Katragadda Ported for C API
13 * synwee added test for quick check
14 * synwee added test for checkFCD
15 *********************************************************************************/
16 /*tests for u_normalization*/
17 #include "unicode/utypes.h"
18 #include "unicode/unorm.h"
19 #include "unicode/utf16.h"
22 #if UCONFIG_NO_NORMALIZATION
24 void addNormTest(TestNode
** root
) {
25 /* no normalization - nothing to do */
32 #include "unicode/uchar.h"
33 #include "unicode/ustring.h"
34 #include "unicode/unorm.h"
37 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof ((array)[0]))
43 TestNormCoverage(void);
46 TestConcatenate(void);
49 TestNextPrevious(void);
51 static void TestIsNormalized(void);
54 TestFCNFKCClosure(void);
57 TestQuickCheckPerCP(void);
60 TestComposition(void);
66 TestGetDecomposition(void);
69 TestGetRawDecomposition(void);
71 static void TestAppendRestoreMiddle(void);
72 static void TestGetEasyToUseInstance(void);
74 static const char* const canonTests
[][3] = {
75 /* Input*/ /*Decomposed*/ /*Composed*/
76 { "cat", "cat", "cat" },
77 { "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark", },
79 { "\\u1e0a", "D\\u0307", "\\u1e0a" }, /* D-dot_above*/
80 { "D\\u0307", "D\\u0307", "\\u1e0a" }, /* D dot_above*/
82 { "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_below dot_above*/
83 { "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_above dot_below */
84 { "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D dot_below dot_above */
86 { "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307", "\\u1e10\\u0323\\u0307" }, /*D dot_below cedilla dot_above*/
87 { "D\\u0307\\u0328\\u0323", "D\\u0328\\u0323\\u0307", "\\u1e0c\\u0328\\u0307" }, /* D dot_above ogonek dot_below*/
89 { "\\u1E14", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron-grave*/
90 { "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron + grave*/
91 { "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" }, /* E-grave + macron*/
93 { "\\u212b", "A\\u030a", "\\u00c5" }, /* angstrom_sign*/
94 { "\\u00c5", "A\\u030a", "\\u00c5" }, /* A-ring*/
96 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
97 { "\\u00C4\\uFB03n", "A\\u0308\\uFB03n", "\\u00C4\\uFB03n" },
99 { "Henry IV", "Henry IV", "Henry IV" },
100 { "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" },
102 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
103 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
104 { "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" }, /* hw_ka + hw_ten*/
105 { "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" }, /* ka + hw_ten*/
106 { "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" }, /* hw_ka + ten*/
107 { "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" }, /* hw_ka + ten*/
111 static const char* const compatTests
[][3] = {
112 /* Input*/ /*Decomposed */ /*Composed*/
113 { "cat", "cat", "cat" },
115 { "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" }, /* Alef-Lamed vs. Alef, Lamed*/
117 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
118 { "\\u00C4\\uFB03n", "A\\u0308ffin", "\\u00C4ffin" }, /* ffi ligature -> f + f + i*/
120 { "Henry IV", "Henry IV", "Henry IV" },
121 { "Henry \\u2163", "Henry IV", "Henry IV" },
123 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
124 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
126 { "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + ten*/
128 /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
129 { "\\uFF76\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + hw_ten*/
130 { "\\u30AB\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* ka + hw_ten*/
134 static const char* const fcdTests
[][3] = {
135 /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */
136 { "\\u010e\\u0327", "D\\u0327\\u030c", NULL
}, /* D-caron + cedilla */
137 { "\\u010e", "\\u010e", NULL
} /* D-caron */
140 void addNormTest(TestNode
** root
);
142 void addNormTest(TestNode
** root
)
144 addTest(root
, &TestAPI
, "tsnorm/cnormtst/TestAPI");
145 addTest(root
, &TestDecomp
, "tsnorm/cnormtst/TestDecomp");
146 addTest(root
, &TestCompatDecomp
, "tsnorm/cnormtst/TestCompatDecomp");
147 addTest(root
, &TestCanonDecompCompose
, "tsnorm/cnormtst/TestCanonDecompCompose");
148 addTest(root
, &TestCompatDecompCompose
, "tsnorm/cnormtst/TestCompatDecompCompose");
149 addTest(root
, &TestFCD
, "tsnorm/cnormtst/TestFCD");
150 addTest(root
, &TestNull
, "tsnorm/cnormtst/TestNull");
151 addTest(root
, &TestQuickCheck
, "tsnorm/cnormtst/TestQuickCheck");
152 addTest(root
, &TestQuickCheckPerCP
, "tsnorm/cnormtst/TestQuickCheckPerCP");
153 addTest(root
, &TestIsNormalized
, "tsnorm/cnormtst/TestIsNormalized");
154 addTest(root
, &TestCheckFCD
, "tsnorm/cnormtst/TestCheckFCD");
155 addTest(root
, &TestNormCoverage
, "tsnorm/cnormtst/TestNormCoverage");
156 addTest(root
, &TestConcatenate
, "tsnorm/cnormtst/TestConcatenate");
157 addTest(root
, &TestNextPrevious
, "tsnorm/cnormtst/TestNextPrevious");
158 addTest(root
, &TestFCNFKCClosure
, "tsnorm/cnormtst/TestFCNFKCClosure");
159 addTest(root
, &TestComposition
, "tsnorm/cnormtst/TestComposition");
160 addTest(root
, &TestGetDecomposition
, "tsnorm/cnormtst/TestGetDecomposition");
161 addTest(root
, &TestGetRawDecomposition
, "tsnorm/cnormtst/TestGetRawDecomposition");
162 addTest(root
, &TestAppendRestoreMiddle
, "tsnorm/cnormtst/TestAppendRestoreMiddle");
163 addTest(root
, &TestGetEasyToUseInstance
, "tsnorm/cnormtst/TestGetEasyToUseInstance");
166 static const char* const modeStrings
[]={
176 static void TestNormCases(UNormalizationMode mode
,
177 const char* const cases
[][3], int32_t lengthOfCases
) {
178 int32_t x
, neededLen
, length2
;
179 int32_t expIndex
= (mode
==UNORM_NFC
|| mode
==UNORM_NFKC
) ? 2 : 1;
182 log_verbose("Testing unorm_normalize(%s)\n", modeStrings
[mode
]);
183 for(x
=0; x
< lengthOfCases
; x
++)
185 UErrorCode status
= U_ZERO_ERROR
, status2
= U_ZERO_ERROR
;
186 source
=CharsToUChars(cases
[x
][0]);
187 neededLen
= unorm_normalize(source
, u_strlen(source
), mode
, 0, NULL
, 0, &status
);
188 length2
= unorm_normalize(source
, -1, mode
, 0, NULL
, 0, &status2
);
189 if(neededLen
!=length2
) {
190 log_err("ERROR in unorm_normalize(%s)[%d]: "
191 "preflight length/NUL %d!=%d preflight length/srcLength\n",
192 modeStrings
[mode
], (int)x
, (int)neededLen
, (int)length2
);
194 if(status
==U_BUFFER_OVERFLOW_ERROR
)
198 length2
=unorm_normalize(source
, u_strlen(source
), mode
, 0, result
, LENGTHOF(result
), &status
);
199 if(U_FAILURE(status
) || neededLen
!=length2
) {
200 log_data_err("ERROR in unorm_normalize(%s/NUL) at %s: %s - (Are you missing data?)\n",
201 modeStrings
[mode
], austrdup(source
), myErrorName(status
));
203 assertEqual(result
, cases
[x
][expIndex
], x
);
205 length2
=unorm_normalize(source
, -1, mode
, 0, result
, LENGTHOF(result
), &status
);
206 if(U_FAILURE(status
) || neededLen
!=length2
) {
207 log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s: %s - (Are you missing data?)\n",
208 modeStrings
[mode
], austrdup(source
), myErrorName(status
));
210 assertEqual(result
, cases
[x
][expIndex
], x
);
217 TestNormCases(UNORM_NFD
, canonTests
, LENGTHOF(canonTests
));
220 void TestCompatDecomp() {
221 TestNormCases(UNORM_NFKD
, compatTests
, LENGTHOF(compatTests
));
224 void TestCanonDecompCompose() {
225 TestNormCases(UNORM_NFC
, canonTests
, LENGTHOF(canonTests
));
228 void TestCompatDecompCompose() {
229 TestNormCases(UNORM_NFKC
, compatTests
, LENGTHOF(compatTests
));
233 TestNormCases(UNORM_FCD
, fcdTests
, LENGTHOF(fcdTests
));
236 static void assertEqual(const UChar
* result
, const char* expected
, int32_t index
)
238 UChar
*expectedUni
= CharsToUChars(expected
);
239 if(u_strcmp(result
, expectedUni
)!=0){
240 log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index
, expected
,
246 static void TestNull_check(UChar
*src
, int32_t srcLen
,
247 UChar
*exp
, int32_t expLen
,
248 UNormalizationMode mode
,
251 UErrorCode status
= U_ZERO_ERROR
;
257 status
= U_ZERO_ERROR
;
264 len
= unorm_normalize(src
, srcLen
, mode
, 0, result
, 50, &status
);
266 if(U_FAILURE(status
)) {
267 log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missing data?)\n", name
, u_errorName(status
));
268 } else if (len
!= expLen
) {
269 log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name
, expLen
, len
);
274 if(exp
[i
] != result
[i
]) {
275 log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n",
282 log_verbose(" %d: \\u%04X\n", i
, result
[i
]);
286 log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name
);
292 UChar source_comp
[] = { 0x0061, 0x0000, 0x0044, 0x0307 };
293 int32_t source_comp_len
= 4;
294 UChar expect_comp
[] = { 0x0061, 0x0000, 0x1e0a };
295 int32_t expect_comp_len
= 3;
297 UChar source_dcmp
[] = { 0x1e0A, 0x0000, 0x0929 };
298 int32_t source_dcmp_len
= 3;
299 UChar expect_dcmp
[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C };
300 int32_t expect_dcmp_len
= 5;
302 TestNull_check(source_comp
,
309 TestNull_check(source_dcmp
,
316 TestNull_check(source_comp
,
326 static void TestQuickCheckResultNO()
328 const UChar CPNFD
[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
329 0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
330 const UChar CPNFC
[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
331 0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
332 const UChar CPNFKD
[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
333 0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
334 const UChar CPNFKC
[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
335 0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
341 UErrorCode error
= U_ZERO_ERROR
;
343 for (; count
< SIZE
; count
++)
345 if (unorm_quickCheck(&(CPNFD
[count
]), 1, UNORM_NFD
, &error
) !=
348 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD
[count
]);
351 if (unorm_quickCheck(&(CPNFC
[count
]), 1, UNORM_NFC
, &error
) !=
354 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC
[count
]);
357 if (unorm_quickCheck(&(CPNFKD
[count
]), 1, UNORM_NFKD
, &error
) !=
360 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD
[count
]);
363 if (unorm_quickCheck(&(CPNFKC
[count
]), 1, UNORM_NFKC
, &error
) !=
366 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC
[count
]);
373 static void TestQuickCheckResultYES()
375 const UChar CPNFD
[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
376 0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
377 const UChar CPNFC
[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
378 0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
379 const UChar CPNFKD
[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
380 0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
381 const UChar CPNFKC
[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
382 0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
386 UErrorCode error
= U_ZERO_ERROR
;
391 if (unorm_quickCheck(&cp
, 1, UNORM_NFD
, &error
) != UNORM_YES
)
393 log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)\n", cp
);
396 if (unorm_quickCheck(&cp
, 1, UNORM_NFC
, &error
) !=
399 log_err("ERROR in NFC quick check at U+%04x\n", cp
);
402 if (unorm_quickCheck(&cp
, 1, UNORM_NFKD
, &error
) != UNORM_YES
)
404 log_err("ERROR in NFKD quick check at U+%04x\n", cp
);
407 if (unorm_quickCheck(&cp
, 1, UNORM_NFKC
, &error
) !=
410 log_err("ERROR in NFKC quick check at U+%04x\n", cp
);
416 for (; count
< SIZE
; count
++)
418 if (unorm_quickCheck(&(CPNFD
[count
]), 1, UNORM_NFD
, &error
) !=
421 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD
[count
]);
424 if (unorm_quickCheck(&(CPNFC
[count
]), 1, UNORM_NFC
, &error
)
427 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC
[count
]);
430 if (unorm_quickCheck(&(CPNFKD
[count
]), 1, UNORM_NFKD
, &error
) !=
433 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD
[count
]);
436 if (unorm_quickCheck(&(CPNFKC
[count
]), 1, UNORM_NFKC
, &error
) !=
439 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC
[count
]);
445 static void TestQuickCheckResultMAYBE()
447 const UChar CPNFC
[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
448 0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
449 const UChar CPNFKC
[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
450 0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
456 UErrorCode error
= U_ZERO_ERROR
;
458 /* NFD and NFKD does not have any MAYBE codepoints */
459 for (; count
< SIZE
; count
++)
461 if (unorm_quickCheck(&(CPNFC
[count
]), 1, UNORM_NFC
, &error
) !=
464 log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)\n", CPNFC
[count
]);
467 if (unorm_quickCheck(&(CPNFKC
[count
]), 1, UNORM_NFKC
, &error
) !=
470 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC
[count
]);
476 static void TestQuickCheckStringResult()
481 UErrorCode error
= U_ZERO_ERROR
;
483 for (count
= 0; count
< LENGTHOF(canonTests
); count
++)
485 d
= CharsToUChars(canonTests
[count
][1]);
486 c
= CharsToUChars(canonTests
[count
][2]);
487 if (unorm_quickCheck(d
, u_strlen(d
), UNORM_NFD
, &error
) !=
490 log_data_err("ERROR in NFD quick check for string at count %d - (Are you missing data?)\n", count
);
494 if (unorm_quickCheck(c
, u_strlen(c
), UNORM_NFC
, &error
) ==
497 log_err("ERROR in NFC quick check for string at count %d\n", count
);
505 for (count
= 0; count
< LENGTHOF(compatTests
); count
++)
507 d
= CharsToUChars(compatTests
[count
][1]);
508 c
= CharsToUChars(compatTests
[count
][2]);
509 if (unorm_quickCheck(d
, u_strlen(d
), UNORM_NFKD
, &error
) !=
512 log_err("ERROR in NFKD quick check for string at count %d\n", count
);
516 if (unorm_quickCheck(c
, u_strlen(c
), UNORM_NFKC
, &error
) !=
519 log_err("ERROR in NFKC quick check for string at count %d\n", count
);
528 void TestQuickCheck()
530 TestQuickCheckResultNO();
531 TestQuickCheckResultYES();
532 TestQuickCheckResultMAYBE();
533 TestQuickCheckStringResult();
537 * The intltest/NormalizerConformanceTest tests a lot of strings that _are_
538 * normalized, and some that are not.
539 * Here we pick some specific cases and test the C API.
541 static void TestIsNormalized(void) {
542 static const UChar notNFC
[][8]={ /* strings that are not in NFC */
543 { 0x62, 0x61, 0x300, 0x63, 0 }, /* 0061 0300 compose */
544 { 0xfb1d, 0 }, /* excluded from composition */
545 { 0x0627, 0x0653, 0 }, /* 0627 0653 compose */
546 { 0x3071, 0x306f, 0x309a, 0x3073, 0 } /* 306F 309A compose */
548 static const UChar notNFKC
[][8]={ /* strings that are not in NFKC */
549 { 0x1100, 0x1161, 0 }, /* Jamo compose */
550 { 0x1100, 0x314f, 0 }, /* compatibility Jamo compose */
551 { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 } /* 1F00 0345 compose */
555 UErrorCode errorCode
;
559 /* normal case with length>=0 (length -1 used for special cases below) */
560 errorCode
=U_ZERO_ERROR
;
561 if(!unorm_isNormalized(notNFC
[0]+2, 1, UNORM_NFC
, &errorCode
) || U_FAILURE(errorCode
)) {
562 log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
565 /* incoming U_FAILURE */
566 errorCode
=U_TRUNCATED_CHAR_FOUND
;
567 (void)unorm_isNormalized(notNFC
[0]+2, 1, UNORM_NFC
, &errorCode
);
568 if(errorCode
!=U_TRUNCATED_CHAR_FOUND
) {
569 log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode
));
573 errorCode
=U_ZERO_ERROR
;
574 (void)unorm_isNormalized(NULL
, 1, UNORM_NFC
, &errorCode
);
575 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
576 log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode
));
580 errorCode
=U_ZERO_ERROR
;
581 (void)unorm_isNormalized(notNFC
[0]+2, -2, UNORM_NFC
, &errorCode
);
582 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
583 log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode
));
587 for(i
=0; i
<LENGTHOF(notNFC
); ++i
) {
588 errorCode
=U_ZERO_ERROR
;
589 if(unorm_isNormalized(notNFC
[i
], -1, UNORM_NFC
, &errorCode
) || U_FAILURE(errorCode
)) {
590 log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (Are you missing data?)\n", i
, u_errorName(errorCode
));
592 errorCode
=U_ZERO_ERROR
;
593 if(unorm_isNormalized(notNFC
[i
], -1, UNORM_NFKC
, &errorCode
) || U_FAILURE(errorCode
)) {
594 log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i
, u_errorName(errorCode
));
597 for(i
=0; i
<LENGTHOF(notNFKC
); ++i
) {
598 errorCode
=U_ZERO_ERROR
;
599 if(unorm_isNormalized(notNFKC
[i
], -1, UNORM_NFKC
, &errorCode
) || U_FAILURE(errorCode
)) {
600 log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i
, u_errorName(errorCode
));
607 UErrorCode status
= U_ZERO_ERROR
;
608 static const UChar FAST_
[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
610 static const UChar FALSE_
[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
611 0x02B9, 0x0314, 0x0315, 0x0316};
612 static const UChar TRUE_
[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
613 0x0050, 0x0730, 0x09EE, 0x1E10};
615 static const UChar datastr
[][5] =
616 { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
617 {0x0061, 0x030A, 0x00E2, 0x0323, 0},
618 {0x0061, 0x0323, 0x00E2, 0x0323, 0},
619 {0x0061, 0x0323, 0x1E05, 0x0302, 0} };
620 static const UBool result
[] = {UNORM_YES
, UNORM_NO
, UNORM_NO
, UNORM_YES
};
622 static const UChar datachar
[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
624 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
626 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
627 0x0307, 0x0308, 0x0309, 0x030a,
628 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
629 0x0327, 0x0328, 0x0329, 0x032a,
630 0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
631 0x1e07, 0x1e08, 0x1e09, 0x1e0a};
635 if (unorm_quickCheck(FAST_
, 10, UNORM_FCD
, &status
) != UNORM_YES
)
636 log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
637 if (unorm_quickCheck(FALSE_
, 10, UNORM_FCD
, &status
) != UNORM_NO
)
638 log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n");
639 if (unorm_quickCheck(TRUE_
, 10, UNORM_FCD
, &status
) != UNORM_YES
)
640 log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
642 if (U_FAILURE(status
))
643 log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n", u_errorName(status
));
647 UBool fcdresult
= unorm_quickCheck(datastr
[count
], 4, UNORM_FCD
, &status
);
648 if (U_FAILURE(status
)) {
649 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data set %d - (Are you missing data?)\n", count
);
653 if (result
[count
] != fcdresult
) {
654 log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count
,
661 /* random checks of long strings */
662 status
= U_ZERO_ERROR
;
663 srand((unsigned)time( NULL
));
665 for (count
= 0; count
< 50; count
++)
668 UBool testresult
= UNORM_YES
;
676 data
[size
] = datachar
[(rand() * 50) / RAND_MAX
];
677 log_verbose("0x%x", data
[size
]);
678 normsize
+= unorm_normalize(data
+ size
, 1, UNORM_NFD
, 0,
679 norm
+ normsize
, 100 - normsize
, &status
);
680 if (U_FAILURE(status
)) {
681 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data generation - (Are you missing data?)\n");
688 nfdsize
= unorm_normalize(data
, size
, UNORM_NFD
, 0,
690 if (U_FAILURE(status
)) {
691 log_data_err("unorm_quickCheck(FCD) failed: exception occured at normalized data generation - (Are you missing data?)\n");
694 if (nfdsize
!= normsize
|| u_memcmp(nfd
, norm
, nfdsize
) != 0) {
695 testresult
= UNORM_NO
;
697 if (testresult
== UNORM_YES
) {
698 log_verbose("result UNORM_YES\n");
701 log_verbose("result UNORM_NO\n");
704 if (unorm_quickCheck(data
, size
, UNORM_FCD
, &status
) != testresult
|| U_FAILURE(status
)) {
705 log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data - (Are you missing data?)\n", testresult
);
712 static const UChar in
[]={ 0x68, 0xe4 };
713 UChar out
[20]={ 0xffff, 0xffff, 0xffff, 0xffff };
714 UErrorCode errorCode
;
717 /* try preflighting */
718 errorCode
=U_ZERO_ERROR
;
719 length
=unorm_normalize(in
, 2, UNORM_NFD
, 0, NULL
, 0, &errorCode
);
720 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=3) {
721 log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
725 errorCode
=U_ZERO_ERROR
;
726 length
=unorm_normalize(in
, 2, UNORM_NFD
, 0, out
, 3, &errorCode
);
727 if(U_FAILURE(errorCode
)) {
728 log_err("unorm_normalize(NFD)=%ld failed with %s\n", length
, u_errorName(errorCode
));
731 if(length
!=3 || out
[2]!=0x308 || out
[3]!=0xffff) {
732 log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length
, out
[0], out
[1], out
[2], out
[3]);
735 length
=unorm_normalize(NULL
, 0, UNORM_NFC
, 0, NULL
, 0, &errorCode
);
736 if(U_FAILURE(errorCode
)) {
737 log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with %s\n", (long)length
, u_errorName(errorCode
));
740 length
=unorm_normalize(NULL
, 0, UNORM_NFC
, 0, out
, 20, &errorCode
);
741 if(U_FAILURE(errorCode
)) {
742 log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with %s\n", (long)length
, u_errorName(errorCode
));
747 /* test cases to improve test code coverage */
749 HANGUL_K_KIYEOK
=0x3131, /* NFKD->Jamo L U+1100 */
750 HANGUL_K_WEO
=0x315d, /* NFKD->Jamo V U+116f */
751 HANGUL_K_KIYEOK_SIOS
=0x3133, /* NFKD->Jamo T U+11aa */
753 HANGUL_KIYEOK
=0x1100, /* Jamo L U+1100 */
754 HANGUL_WEO
=0x116f, /* Jamo V U+116f */
755 HANGUL_KIYEOK_SIOS
=0x11aa, /* Jamo T U+11aa */
757 HANGUL_AC00
=0xac00, /* Hangul syllable = Jamo LV U+ac00 */
758 HANGUL_SYLLABLE
=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */
760 MUSICAL_VOID_NOTEHEAD
=0x1d157,
761 MUSICAL_HALF_NOTE
=0x1d15e, /* NFC/NFD->Notehead+Stem */
762 MUSICAL_STEM
=0x1d165, /* cc=216 */
763 MUSICAL_STACCATO
=0x1d17c /* cc=220 */
768 UChar input
[1000], expect
[1000], output
[1000];
769 UErrorCode errorCode
;
770 int32_t i
, length
, inLength
, expectLength
, hangulPrefixLength
, preflightLength
;
772 /* create a long and nasty string with NFKC-unsafe characters */
775 /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */
776 input
[inLength
++]=HANGUL_KIYEOK
;
777 input
[inLength
++]=HANGUL_WEO
;
778 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
780 input
[inLength
++]=HANGUL_KIYEOK
;
781 input
[inLength
++]=HANGUL_WEO
;
782 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
784 input
[inLength
++]=HANGUL_KIYEOK
;
785 input
[inLength
++]=HANGUL_K_WEO
;
786 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
788 input
[inLength
++]=HANGUL_KIYEOK
;
789 input
[inLength
++]=HANGUL_K_WEO
;
790 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
792 input
[inLength
++]=HANGUL_K_KIYEOK
;
793 input
[inLength
++]=HANGUL_WEO
;
794 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
796 input
[inLength
++]=HANGUL_K_KIYEOK
;
797 input
[inLength
++]=HANGUL_WEO
;
798 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
800 input
[inLength
++]=HANGUL_K_KIYEOK
;
801 input
[inLength
++]=HANGUL_K_WEO
;
802 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
804 input
[inLength
++]=HANGUL_K_KIYEOK
;
805 input
[inLength
++]=HANGUL_K_WEO
;
806 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
808 /* Hangul LV with normal/compatibility Jamo T */
809 input
[inLength
++]=HANGUL_AC00
;
810 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
812 input
[inLength
++]=HANGUL_AC00
;
813 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
815 /* compatibility Jamo L, V */
816 input
[inLength
++]=HANGUL_K_KIYEOK
;
817 input
[inLength
++]=HANGUL_K_WEO
;
819 hangulPrefixLength
=inLength
;
821 input
[inLength
++]=U16_LEAD(MUSICAL_HALF_NOTE
);
822 input
[inLength
++]=U16_TRAIL(MUSICAL_HALF_NOTE
);
823 for(i
=0; i
<200; ++i
) {
824 input
[inLength
++]=U16_LEAD(MUSICAL_STACCATO
);
825 input
[inLength
++]=U16_TRAIL(MUSICAL_STACCATO
);
826 input
[inLength
++]=U16_LEAD(MUSICAL_STEM
);
827 input
[inLength
++]=U16_TRAIL(MUSICAL_STEM
);
830 /* (compatibility) Jamo L, T do not compose */
831 input
[inLength
++]=HANGUL_K_KIYEOK
;
832 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
835 errorCode
=U_ZERO_ERROR
;
836 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_NFD
, &errorCode
) || U_FAILURE(errorCode
)) {
837 log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
839 errorCode
=U_ZERO_ERROR
;
840 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_NFKD
, &errorCode
) || U_FAILURE(errorCode
)) {
841 log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
843 errorCode
=U_ZERO_ERROR
;
844 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_NFC
, &errorCode
) || U_FAILURE(errorCode
)) {
845 log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
847 errorCode
=U_ZERO_ERROR
;
848 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_NFKC
, &errorCode
) || U_FAILURE(errorCode
)) {
849 log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
851 errorCode
=U_ZERO_ERROR
;
852 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_FCD
, &errorCode
) || U_FAILURE(errorCode
)) {
853 log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
858 expect
[expectLength
++]=HANGUL_SYLLABLE
;
860 expect
[expectLength
++]=HANGUL_SYLLABLE
;
862 expect
[expectLength
++]=HANGUL_SYLLABLE
;
864 expect
[expectLength
++]=HANGUL_SYLLABLE
;
866 expect
[expectLength
++]=HANGUL_SYLLABLE
;
868 expect
[expectLength
++]=HANGUL_SYLLABLE
;
870 expect
[expectLength
++]=HANGUL_SYLLABLE
;
872 expect
[expectLength
++]=HANGUL_SYLLABLE
;
874 expect
[expectLength
++]=HANGUL_AC00
+3;
876 expect
[expectLength
++]=HANGUL_AC00
+3;
878 expect
[expectLength
++]=HANGUL_AC00
+14*28;
880 expect
[expectLength
++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD
);
881 expect
[expectLength
++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD
);
882 expect
[expectLength
++]=U16_LEAD(MUSICAL_STEM
);
883 expect
[expectLength
++]=U16_TRAIL(MUSICAL_STEM
);
884 for(i
=0; i
<200; ++i
) {
885 expect
[expectLength
++]=U16_LEAD(MUSICAL_STEM
);
886 expect
[expectLength
++]=U16_TRAIL(MUSICAL_STEM
);
888 for(i
=0; i
<200; ++i
) {
889 expect
[expectLength
++]=U16_LEAD(MUSICAL_STACCATO
);
890 expect
[expectLength
++]=U16_TRAIL(MUSICAL_STACCATO
);
893 expect
[expectLength
++]=HANGUL_KIYEOK
;
894 expect
[expectLength
++]=HANGUL_KIYEOK_SIOS
;
896 /* try destination overflow first */
897 errorCode
=U_ZERO_ERROR
;
898 preflightLength
=unorm_normalize(input
, inLength
,
900 output
, 100, /* too short */
902 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
) {
903 log_data_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCode
));
907 errorCode
=U_ZERO_ERROR
;
908 length
=unorm_normalize(input
, inLength
,
910 output
, sizeof(output
)/U_SIZEOF_UCHAR
,
912 if(U_FAILURE(errorCode
)) {
913 log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s - (Are you missing data?)\n", u_errorName(errorCode
));
914 } else if(length
!=expectLength
|| u_memcmp(output
, expect
, length
)!=0) {
915 log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n");
916 for(i
=0; i
<length
; ++i
) {
917 if(output
[i
]!=expect
[i
]) {
918 log_err(" NFKC[%d]==U+%04lx expected U+%04lx\n", i
, output
[i
], expect
[i
]);
923 if(length
!=preflightLength
) {
924 log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length
, preflightLength
);
928 u_memcpy(expect
, input
, hangulPrefixLength
);
929 expectLength
=hangulPrefixLength
;
931 expect
[expectLength
++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD
);
932 expect
[expectLength
++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD
);
933 expect
[expectLength
++]=U16_LEAD(MUSICAL_STEM
);
934 expect
[expectLength
++]=U16_TRAIL(MUSICAL_STEM
);
935 for(i
=0; i
<200; ++i
) {
936 expect
[expectLength
++]=U16_LEAD(MUSICAL_STEM
);
937 expect
[expectLength
++]=U16_TRAIL(MUSICAL_STEM
);
939 for(i
=0; i
<200; ++i
) {
940 expect
[expectLength
++]=U16_LEAD(MUSICAL_STACCATO
);
941 expect
[expectLength
++]=U16_TRAIL(MUSICAL_STACCATO
);
944 expect
[expectLength
++]=HANGUL_K_KIYEOK
;
945 expect
[expectLength
++]=HANGUL_K_KIYEOK_SIOS
;
947 errorCode
=U_ZERO_ERROR
;
948 length
=unorm_normalize(input
, inLength
,
950 output
, sizeof(output
)/U_SIZEOF_UCHAR
,
952 if(U_FAILURE(errorCode
)) {
953 log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %s - (Are you missing data?)\n", u_errorName(errorCode
));
954 } else if(length
!=expectLength
|| u_memcmp(output
, expect
, length
)!=0) {
955 log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n");
956 for(i
=0; i
<length
; ++i
) {
957 if(output
[i
]!=expect
[i
]) {
958 log_err(" FCD[%d]==U+%04lx expected U+%04lx\n", i
, output
[i
], expect
[i
]);
965 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */
967 TestConcatenate(void) {
974 0x301, 0x73, 0x75, 0x6d, 0xe9, 0
977 0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0
981 UErrorCode errorCode
;
984 /* left with length, right NUL-terminated */
985 errorCode
=U_ZERO_ERROR
;
986 length
=unorm_concatenate(left
, 2, right
, -1, buffer
, 100, UNORM_NFC
, 0, &errorCode
);
987 if(U_FAILURE(errorCode
) || length
!=6 || 0!=u_memcmp(buffer
, expect
, length
)) {
988 log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
992 errorCode
=U_ZERO_ERROR
;
993 length
=unorm_concatenate(left
, 2, right
, -1, NULL
, 0, UNORM_NFC
, 0, &errorCode
);
994 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=6) {
995 log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
999 errorCode
=U_ZERO_ERROR
;
1000 length
=unorm_concatenate(left
, 2, right
, -1, buffer
, 1, UNORM_NFC
, 0, &errorCode
);
1001 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=6 || buffer
[2]!=0x5555) {
1002 log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
1005 /* enter with U_FAILURE */
1007 errorCode
=U_UNEXPECTED_TOKEN
;
1008 length
=unorm_concatenate(left
, 2, right
, -1, buffer
, 100, UNORM_NFC
, 0, &errorCode
);
1009 if(errorCode
!=U_UNEXPECTED_TOKEN
|| buffer
[2]!=0xaaaa) {
1010 log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length
, u_errorName(errorCode
));
1013 /* illegal arguments */
1015 errorCode
=U_ZERO_ERROR
;
1016 length
=unorm_concatenate(NULL
, 2, right
, -1, buffer
, 100, UNORM_NFC
, 0, &errorCode
);
1017 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
|| buffer
[2]!=0xaaaa) {
1018 log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
1021 errorCode
=U_ZERO_ERROR
;
1022 length
=unorm_concatenate(left
, 2, right
, -1, NULL
, 100, UNORM_NFC
, 0, &errorCode
);
1023 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1024 log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
1032 static const char *const _modeString
[UNORM_MODE_COUNT
]={
1033 "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD"
1037 _testIter(const UChar
*src
, int32_t srcLength
,
1038 UCharIterator
*iter
, UNormalizationMode mode
, UBool forward
,
1039 const UChar
*out
, int32_t outLength
,
1040 const int32_t *srcIndexes
, int32_t srcIndexesLength
) {
1042 const UChar
*expect
, *outLimit
, *in
;
1043 int32_t length
, i
, expectLength
, expectIndex
, prevIndex
, index
, inLength
;
1044 UErrorCode errorCode
;
1045 UBool neededToNormalize
, expectNeeded
;
1047 errorCode
=U_ZERO_ERROR
;
1048 outLimit
=out
+outLength
;
1054 i
=srcIndexesLength
-2;
1061 if(!iter
->hasNext(iter
)) {
1064 length
=unorm_next(iter
,
1065 buffer
, sizeof(buffer
)/U_SIZEOF_UCHAR
,
1067 (UBool
)(out
!=NULL
), &neededToNormalize
,
1069 expectIndex
=srcIndexes
[i
+1];
1071 inLength
=expectIndex
-prevIndex
;
1074 /* get output piece from between plus signs */
1076 while((expect
+expectLength
)!=outLimit
&& expect
[expectLength
]!=_PLUS
) {
1079 expectNeeded
=(UBool
)(0!=u_memcmp(buffer
, in
, inLength
));
1082 expectLength
=inLength
;
1086 if(!iter
->hasPrevious(iter
)) {
1089 length
=unorm_previous(iter
,
1090 buffer
, sizeof(buffer
)/U_SIZEOF_UCHAR
,
1092 (UBool
)(out
!=NULL
), &neededToNormalize
,
1094 expectIndex
=srcIndexes
[i
];
1096 inLength
=prevIndex
-expectIndex
;
1099 /* get output piece from between plus signs */
1101 while(expect
!=out
&& expect
[-1]!=_PLUS
) {
1105 expectNeeded
=(UBool
)(0!=u_memcmp(buffer
, in
, inLength
));
1108 expectLength
=inLength
;
1112 index
=iter
->getIndex(iter
, UITER_CURRENT
);
1114 if(U_FAILURE(errorCode
)) {
1115 log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s - (Are you missing data?)\n",
1116 forward
, _modeString
[mode
], i
, u_errorName(errorCode
));
1119 if(expectIndex
!=index
) {
1120 log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n",
1121 forward
, _modeString
[mode
], i
, index
, expectIndex
);
1124 if(expectLength
!=length
) {
1125 log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n",
1126 forward
, _modeString
[mode
], i
, length
, expectLength
);
1129 if(0!=u_memcmp(expect
, buffer
, length
)) {
1130 log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n",
1131 forward
, _modeString
[mode
], i
);
1134 if(neededToNormalize
!=expectNeeded
) {
1138 expect
+=expectLength
+1; /* go after the + */
1141 --expect
; /* go before the + */
1148 TestNextPrevious() {
1150 src
[]={ /* input string */
1151 0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133
1153 nfd
[]={ /* + separates expected output pieces */
1154 0xa0, _PLUS
, 0x61, 0x308, _PLUS
, 0x63, 0x327, 0x302, _PLUS
, 0x1100, 0x1161, _PLUS
, 0x3133
1157 0x20, _PLUS
, 0x61, 0x308, _PLUS
, 0x63, 0x327, 0x302, _PLUS
, 0x1100, 0x1161, _PLUS
, 0x11aa
1160 0xa0, _PLUS
, 0xe4, _PLUS
, 0xe7, 0x302, _PLUS
, 0xac00, _PLUS
, 0x3133
1163 0x20, _PLUS
, 0xe4, _PLUS
, 0xe7, 0x302, _PLUS
, 0xac03
1166 0xa0, _PLUS
, 0xe4, _PLUS
, 0x63, 0x327, 0x302, _PLUS
, 0xac00, _PLUS
, 0x3133
1169 /* expected iterator indexes in the source string for each iteration piece */
1170 static const int32_t
1192 UBool neededToNormalize
;
1193 UErrorCode errorCode
;
1195 uiter_setString(&iter
, src
, sizeof(src
)/U_SIZEOF_UCHAR
);
1197 /* test iteration with doNormalize */
1199 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFD
, TRUE
, nfd
, sizeof(nfd
)/U_SIZEOF_UCHAR
, nfdIndexes
, sizeof(nfdIndexes
)/4);
1201 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKD
, TRUE
, nfkd
, sizeof(nfkd
)/U_SIZEOF_UCHAR
, nfkdIndexes
, sizeof(nfkdIndexes
)/4);
1203 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFC
, TRUE
, nfc
, sizeof(nfc
)/U_SIZEOF_UCHAR
, nfcIndexes
, sizeof(nfcIndexes
)/4);
1205 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKC
, TRUE
, nfkc
, sizeof(nfkc
)/U_SIZEOF_UCHAR
, nfkcIndexes
, sizeof(nfkcIndexes
)/4);
1207 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_FCD
, TRUE
, fcd
, sizeof(fcd
)/U_SIZEOF_UCHAR
, fcdIndexes
, sizeof(fcdIndexes
)/4);
1209 iter
.index
=iter
.length
;
1210 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFD
, FALSE
, nfd
, sizeof(nfd
)/U_SIZEOF_UCHAR
, nfdIndexes
, sizeof(nfdIndexes
)/4);
1211 iter
.index
=iter
.length
;
1212 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKD
, FALSE
, nfkd
, sizeof(nfkd
)/U_SIZEOF_UCHAR
, nfkdIndexes
, sizeof(nfkdIndexes
)/4);
1213 iter
.index
=iter
.length
;
1214 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFC
, FALSE
, nfc
, sizeof(nfc
)/U_SIZEOF_UCHAR
, nfcIndexes
, sizeof(nfcIndexes
)/4);
1215 iter
.index
=iter
.length
;
1216 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKC
, FALSE
, nfkc
, sizeof(nfkc
)/U_SIZEOF_UCHAR
, nfkcIndexes
, sizeof(nfkcIndexes
)/4);
1217 iter
.index
=iter
.length
;
1218 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_FCD
, FALSE
, fcd
, sizeof(fcd
)/U_SIZEOF_UCHAR
, fcdIndexes
, sizeof(fcdIndexes
)/4);
1220 /* test iteration without doNormalize */
1222 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFD
, TRUE
, NULL
, 0, nfdIndexes
, sizeof(nfdIndexes
)/4);
1224 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKD
, TRUE
, NULL
, 0, nfkdIndexes
, sizeof(nfkdIndexes
)/4);
1226 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFC
, TRUE
, NULL
, 0, nfcIndexes
, sizeof(nfcIndexes
)/4);
1228 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKC
, TRUE
, NULL
, 0, nfkcIndexes
, sizeof(nfkcIndexes
)/4);
1230 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_FCD
, TRUE
, NULL
, 0, fcdIndexes
, sizeof(fcdIndexes
)/4);
1232 iter
.index
=iter
.length
;
1233 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFD
, FALSE
, NULL
, 0, nfdIndexes
, sizeof(nfdIndexes
)/4);
1234 iter
.index
=iter
.length
;
1235 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKD
, FALSE
, NULL
, 0, nfkdIndexes
, sizeof(nfkdIndexes
)/4);
1236 iter
.index
=iter
.length
;
1237 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFC
, FALSE
, NULL
, 0, nfcIndexes
, sizeof(nfcIndexes
)/4);
1238 iter
.index
=iter
.length
;
1239 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_NFKC
, FALSE
, NULL
, 0, nfkcIndexes
, sizeof(nfkcIndexes
)/4);
1240 iter
.index
=iter
.length
;
1241 _testIter(src
, sizeof(src
)/U_SIZEOF_UCHAR
, &iter
, UNORM_FCD
, FALSE
, NULL
, 0, fcdIndexes
, sizeof(fcdIndexes
)/4);
1243 /* try without neededToNormalize */
1244 errorCode
=U_ZERO_ERROR
;
1247 length
=unorm_next(&iter
, buffer
, sizeof(buffer
)/U_SIZEOF_UCHAR
,
1248 UNORM_NFD
, 0, TRUE
, NULL
,
1250 if(U_FAILURE(errorCode
) || length
!=2 || buffer
[0]!=nfd
[2] || buffer
[1]!=nfd
[3]) {
1251 log_data_err("error unorm_next(without needed) %s - (Are you missing data?)\n", u_errorName(errorCode
));
1256 neededToNormalize
=9;
1258 length
=unorm_next(&iter
, NULL
, 0,
1259 UNORM_NFD
, 0, TRUE
, &neededToNormalize
,
1261 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| neededToNormalize
!=FALSE
|| length
!=2) {
1262 log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode
));
1266 errorCode
=U_ZERO_ERROR
;
1267 buffer
[0]=buffer
[1]=5;
1268 neededToNormalize
=9;
1270 length
=unorm_next(&iter
, buffer
, 1,
1271 UNORM_NFD
, 0, TRUE
, &neededToNormalize
,
1273 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| neededToNormalize
!=FALSE
|| length
!=2 || buffer
[1]!=5) {
1274 log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode
));
1279 errorCode
=U_ZERO_ERROR
;
1280 buffer
[0]=buffer
[1]=5;
1281 neededToNormalize
=9;
1283 length
=unorm_next(NULL
, buffer
, sizeof(buffer
)/U_SIZEOF_UCHAR
,
1284 UNORM_NFD
, 0, TRUE
, &neededToNormalize
,
1286 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1287 log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode
));
1292 buffer
[0]=buffer
[1]=5;
1293 neededToNormalize
=9;
1295 length
=unorm_next(&iter
, buffer
, sizeof(buffer
)/U_SIZEOF_UCHAR
,
1296 (UNormalizationMode
)0, 0, TRUE
, &neededToNormalize
,
1298 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1299 log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode
));
1303 /* error coming in */
1304 errorCode
=U_MISPLACED_QUANTIFIER
;
1307 length
=unorm_next(&iter
, buffer
, sizeof(buffer
)/U_SIZEOF_UCHAR
,
1308 UNORM_NFD
, 0, TRUE
, NULL
,
1310 if(errorCode
!=U_MISPLACED_QUANTIFIER
) {
1311 log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode
));
1317 TestFCNFKCClosure(void) {
1318 static const struct {
1324 { 0x037A, { 0x0020, 0x03B9, 0 } },
1325 { 0x03D2, { 0x03C5, 0 } },
1326 { 0x20A8, { 0x0072, 0x0073, 0 } },
1327 { 0x210B, { 0x0068, 0 } },
1328 { 0x210C, { 0x0068, 0 } },
1329 { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } },
1330 { 0x2122, { 0x0074, 0x006D, 0 } },
1331 { 0x2128, { 0x007A, 0 } },
1332 { 0x1D5DB, { 0x0068, 0 } },
1333 { 0x1D5ED, { 0x007A, 0 } },
1338 UErrorCode errorCode
;
1341 for(i
=0; i
<LENGTHOF(tests
); ++i
) {
1342 errorCode
=U_ZERO_ERROR
;
1343 length
=u_getFC_NFKC_Closure(tests
[i
].c
, buffer
, LENGTHOF(buffer
), &errorCode
);
1344 if(U_FAILURE(errorCode
) || length
!=u_strlen(buffer
) || 0!=u_strcmp(tests
[i
].s
, buffer
)) {
1345 log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you missing data?)\n", tests
[i
].c
, u_errorName(errorCode
));
1349 /* error handling */
1350 errorCode
=U_ZERO_ERROR
;
1351 length
=u_getFC_NFKC_Closure(0x5c, NULL
, LENGTHOF(buffer
), &errorCode
);
1352 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1353 log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode
));
1356 length
=u_getFC_NFKC_Closure(0x5c, buffer
, LENGTHOF(buffer
), &errorCode
);
1357 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1358 log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode
));
1363 TestQuickCheckPerCP() {
1364 UErrorCode errorCode
;
1365 UChar32 c
, lead
, trail
;
1366 UChar s
[U16_MAX_LENGTH
], nfd
[16];
1367 int32_t length
, lccc1
, lccc2
, tccc1
, tccc2
;
1371 u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK
)!=(int32_t)UNORM_YES
||
1372 u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK
)!=(int32_t)UNORM_YES
||
1373 u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK
)!=(int32_t)UNORM_MAYBE
||
1374 u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK
)!=(int32_t)UNORM_MAYBE
||
1375 u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS
)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS
) ||
1376 u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS
)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS
)
1378 log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n");
1382 * compare the quick check property values for some code points
1383 * to the quick check results for checking same-code point strings
1385 errorCode
=U_ZERO_ERROR
;
1389 U16_APPEND_UNSAFE(s
, length
, c
);
1391 qc1
=u_getIntPropertyValue(c
, UCHAR_NFC_QUICK_CHECK
);
1392 qc2
=unorm_quickCheck(s
, length
, UNORM_NFC
, &errorCode
);
1394 log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x - (Are you missing data?)\n", qc1
, qc2
, c
);
1397 qc1
=u_getIntPropertyValue(c
, UCHAR_NFD_QUICK_CHECK
);
1398 qc2
=unorm_quickCheck(s
, length
, UNORM_NFD
, &errorCode
);
1400 log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x - (Are you missing data?)\n", qc1
, qc2
, c
);
1403 qc1
=u_getIntPropertyValue(c
, UCHAR_NFKC_QUICK_CHECK
);
1404 qc2
=unorm_quickCheck(s
, length
, UNORM_NFKC
, &errorCode
);
1406 log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x - (Are you missing data?)\n", qc1
, qc2
, c
);
1409 qc1
=u_getIntPropertyValue(c
, UCHAR_NFKD_QUICK_CHECK
);
1410 qc2
=unorm_quickCheck(s
, length
, UNORM_NFKD
, &errorCode
);
1412 log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x - (Are you missing data?)\n", qc1
, qc2
, c
);
1415 length
=unorm_normalize(s
, length
, UNORM_NFD
, 0, nfd
, LENGTHOF(nfd
), &errorCode
);
1416 /* length-length == 0 is used to get around a compiler warning. */
1417 U16_GET(nfd
, 0, length
-length
, length
, lead
);
1418 U16_GET(nfd
, 0, length
-1, length
, trail
);
1420 lccc1
=u_getIntPropertyValue(c
, UCHAR_LEAD_CANONICAL_COMBINING_CLASS
);
1421 lccc2
=u_getCombiningClass(lead
);
1422 tccc1
=u_getIntPropertyValue(c
, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS
);
1423 tccc2
=u_getCombiningClass(trail
);
1426 log_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
1430 log_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
1434 /* skip some code points */
1440 TestComposition(void) {
1441 static const struct {
1442 UNormalizationMode mode
;
1448 * special cases for UAX #15 bug
1449 * see Unicode Corrigendum #5: Normalization Idempotency
1450 * at http://unicode.org/versions/corrigendum5.html
1451 * (was Public Review Issue #29)
1453 { UNORM_NFC
, 0, { 0x1100, 0x0300, 0x1161, 0x0327 }, { 0x1100, 0x0300, 0x1161, 0x0327 } },
1454 { UNORM_NFC
, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } },
1455 { UNORM_NFC
, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 }, { 0xac00, 0x0327, 0x0300, 0x11a8 } },
1456 { UNORM_NFC
, 0, { 0x0b47, 0x0300, 0x0b3e }, { 0x0b47, 0x0300, 0x0b3e } },
1458 /* TODO: add test cases for UNORM_FCC here (j2151) */
1462 UErrorCode errorCode
;
1465 for(i
=0; i
<LENGTHOF(cases
); ++i
) {
1466 errorCode
=U_ZERO_ERROR
;
1467 length
=unorm_normalize(
1469 cases
[i
].mode
, cases
[i
].options
,
1470 output
, LENGTHOF(output
),
1472 if( U_FAILURE(errorCode
) ||
1473 length
!=u_strlen(cases
[i
].expect
) ||
1474 0!=u_memcmp(output
, cases
[i
].expect
, length
)
1476 log_data_err("unexpected result for case %d - (Are you missing data?)\n", i
);
1482 TestGetDecomposition() {
1486 UErrorCode errorCode
=U_ZERO_ERROR
;
1487 const UNormalizer2
*n2
=unorm2_getInstance(NULL
, "nfc", UNORM2_COMPOSE_CONTIGUOUS
, &errorCode
);
1488 if(U_FAILURE(errorCode
)) {
1489 log_err_status(errorCode
, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_errorName(errorCode
));
1493 length
=unorm2_getDecomposition(n2
, 0x20, decomp
, LENGTHOF(decomp
), &errorCode
);
1494 if(U_FAILURE(errorCode
) || length
>=0) {
1495 log_err("unorm2_getDecomposition(fcc, space) failed\n");
1497 errorCode
=U_ZERO_ERROR
;
1498 length
=unorm2_getDecomposition(n2
, 0xe4, decomp
, LENGTHOF(decomp
), &errorCode
);
1499 if(U_FAILURE(errorCode
) || length
!=2 || decomp
[0]!=0x61 || decomp
[1]!=0x308 || decomp
[2]!=0) {
1500 log_err("unorm2_getDecomposition(fcc, a-umlaut) failed\n");
1502 errorCode
=U_ZERO_ERROR
;
1503 length
=unorm2_getDecomposition(n2
, 0xac01, decomp
, LENGTHOF(decomp
), &errorCode
);
1504 if(U_FAILURE(errorCode
) || length
!=3 || decomp
[0]!=0x1100 || decomp
[1]!=0x1161 || decomp
[2]!=0x11a8 || decomp
[3]!=0) {
1505 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) failed\n");
1507 errorCode
=U_ZERO_ERROR
;
1508 length
=unorm2_getDecomposition(n2
, 0xac01, NULL
, 0, &errorCode
);
1509 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=3) {
1510 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) overflow failed\n");
1512 errorCode
=U_ZERO_ERROR
;
1513 length
=unorm2_getDecomposition(n2
, 0xac01, decomp
, -1, &errorCode
);
1514 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1515 log_err("unorm2_getDecomposition(fcc, capacity<0) failed\n");
1517 errorCode
=U_ZERO_ERROR
;
1518 length
=unorm2_getDecomposition(n2
, 0xac01, NULL
, 4, &errorCode
);
1519 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1520 log_err("unorm2_getDecomposition(fcc, decomposition=NULL) failed\n");
1525 TestGetRawDecomposition() {
1529 UErrorCode errorCode
=U_ZERO_ERROR
;
1530 const UNormalizer2
*n2
=unorm2_getNFKCInstance(&errorCode
);
1531 if(U_FAILURE(errorCode
)) {
1532 log_err_status(errorCode
, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode
));
1536 * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values,
1537 * without recursive decomposition.
1540 length
=unorm2_getRawDecomposition(n2
, 0x20, decomp
, LENGTHOF(decomp
), &errorCode
);
1541 if(U_FAILURE(errorCode
) || length
>=0) {
1542 log_err("unorm2_getDecomposition(nfkc, space) failed\n");
1544 errorCode
=U_ZERO_ERROR
;
1545 length
=unorm2_getRawDecomposition(n2
, 0xe4, decomp
, LENGTHOF(decomp
), &errorCode
);
1546 if(U_FAILURE(errorCode
) || length
!=2 || decomp
[0]!=0x61 || decomp
[1]!=0x308 || decomp
[2]!=0) {
1547 log_err("unorm2_getDecomposition(nfkc, a-umlaut) failed\n");
1549 /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */
1550 errorCode
=U_ZERO_ERROR
;
1551 length
=unorm2_getRawDecomposition(n2
, 0x1e08, decomp
, LENGTHOF(decomp
), &errorCode
);
1552 if(U_FAILURE(errorCode
) || length
!=2 || decomp
[0]!=0xc7 || decomp
[1]!=0x301 || decomp
[2]!=0) {
1553 log_err("unorm2_getDecomposition(nfkc, c-cedilla-acute) failed\n");
1555 /* U+212B ANGSTROM SIGN */
1556 errorCode
=U_ZERO_ERROR
;
1557 length
=unorm2_getRawDecomposition(n2
, 0x212b, decomp
, LENGTHOF(decomp
), &errorCode
);
1558 if(U_FAILURE(errorCode
) || length
!=1 || decomp
[0]!=0xc5 || decomp
[1]!=0) {
1559 log_err("unorm2_getDecomposition(nfkc, angstrom sign) failed\n");
1561 errorCode
=U_ZERO_ERROR
;
1562 length
=unorm2_getRawDecomposition(n2
, 0xac00, decomp
, LENGTHOF(decomp
), &errorCode
);
1563 if(U_FAILURE(errorCode
) || length
!=2 || decomp
[0]!=0x1100 || decomp
[1]!=0x1161 || decomp
[2]!=0) {
1564 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC00) failed\n");
1566 /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */
1567 errorCode
=U_ZERO_ERROR
;
1568 length
=unorm2_getRawDecomposition(n2
, 0xac01, decomp
, LENGTHOF(decomp
), &errorCode
);
1569 if(U_FAILURE(errorCode
) || length
!=2 || decomp
[0]!=0xac00 || decomp
[1]!=0x11a8 || decomp
[2]!=0) {
1570 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) failed\n");
1572 errorCode
=U_ZERO_ERROR
;
1573 length
=unorm2_getRawDecomposition(n2
, 0xac01, NULL
, 0, &errorCode
);
1574 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=2) {
1575 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) overflow failed\n");
1577 errorCode
=U_ZERO_ERROR
;
1578 length
=unorm2_getRawDecomposition(n2
, 0xac01, decomp
, -1, &errorCode
);
1579 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1580 log_err("unorm2_getDecomposition(nfkc, capacity<0) failed\n");
1582 errorCode
=U_ZERO_ERROR
;
1583 length
=unorm2_getRawDecomposition(n2
, 0xac01, NULL
, 4, &errorCode
);
1584 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1585 log_err("unorm2_getDecomposition(nfkc, decomposition=NULL) failed\n");
1590 TestAppendRestoreMiddle() {
1591 UChar a
[20]={ 0x61, 0x62, 0x63, 0x41, 0x327, 0 }; /* last chars are 'A' and 'cedilla' NFC */
1592 static const UChar b
[]={ 0x30A, 0x64, 0x65, 0x66, 0 }; /* first char is 'ring above' NFC */
1593 /* NFC: C5 is 'A with ring above' */
1594 static const UChar expected
[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 };
1596 UErrorCode errorCode
=U_ZERO_ERROR
;
1597 const UNormalizer2
*n2
=unorm2_getNFCInstance(&errorCode
);
1598 if(U_FAILURE(errorCode
)) {
1599 log_err_status(errorCode
, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode
));
1603 * Use length=-1 to fool the estimate of the ReorderingBuffer capacity.
1604 * Use a capacity of 6 or 7 so that the middle sequence <41 327 30A>
1605 * still fits into a[] but the full result still overflows this capacity.
1606 * (Let it modify the destination buffer before reallocating internally.)
1608 length
=unorm2_append(n2
, a
, -1, 6, b
, -1, &errorCode
);
1609 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=LENGTHOF(expected
)) {
1610 log_err("unorm2_append(preflight) returned wrong length of %d\n", (int)length
);
1613 /* Verify that the middle is unchanged or restored. (ICU ticket #7848) */
1614 if(a
[0]!=0x61 || a
[1]!=0x62 || a
[2]!=0x63 || a
[3]!=0x41 || a
[4]!=0x327 || a
[5]!=0) {
1615 log_err("unorm2_append(overflow) modified the first string\n");
1618 errorCode
=U_ZERO_ERROR
;
1619 length
=unorm2_append(n2
, a
, -1, LENGTHOF(a
), b
, -1, &errorCode
);
1620 if(U_FAILURE(errorCode
) || length
!=LENGTHOF(expected
) || 0!=u_memcmp(a
, expected
, length
)) {
1621 log_err("unorm2_append(real) failed - %s, length %d\n", u_errorName(errorCode
), (int)length
);
1627 TestGetEasyToUseInstance() {
1628 static const UChar in
[]={
1629 0xA0, /* -> <noBreak> 0020 */
1630 0xC7, 0x301 /* = 1E08 = 0043 0327 0301 */
1635 UErrorCode errorCode
=U_ZERO_ERROR
;
1636 const UNormalizer2
*n2
=unorm2_getNFCInstance(&errorCode
);
1637 if(U_FAILURE(errorCode
)) {
1638 log_err_status(errorCode
, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode
));
1641 length
=unorm2_normalize(n2
, in
, LENGTHOF(in
), out
, LENGTHOF(out
), &errorCode
);
1642 if(U_FAILURE(errorCode
) || length
!=2 || out
[0]!=0xa0 || out
[1]!=0x1e08) {
1643 log_err("unorm2_getNFCInstance() did not return an NFC instance (normalized length=%d; %s)\n",
1644 (int)length
, u_errorName(errorCode
));
1647 errorCode
=U_ZERO_ERROR
;
1648 n2
=unorm2_getNFDInstance(&errorCode
);
1649 if(U_FAILURE(errorCode
)) {
1650 log_err_status(errorCode
, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode
));
1653 length
=unorm2_normalize(n2
, in
, LENGTHOF(in
), out
, LENGTHOF(out
), &errorCode
);
1654 if(U_FAILURE(errorCode
) || length
!=4 || out
[0]!=0xa0 || out
[1]!=0x43 || out
[2]!=0x327 || out
[3]!=0x301) {
1655 log_err("unorm2_getNFDInstance() did not return an NFD instance (normalized length=%d; %s)\n",
1656 (int)length
, u_errorName(errorCode
));
1659 errorCode
=U_ZERO_ERROR
;
1660 n2
=unorm2_getNFKCInstance(&errorCode
);
1661 if(U_FAILURE(errorCode
)) {
1662 log_err_status(errorCode
, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode
));
1665 length
=unorm2_normalize(n2
, in
, LENGTHOF(in
), out
, LENGTHOF(out
), &errorCode
);
1666 if(U_FAILURE(errorCode
) || length
!=2 || out
[0]!=0x20 || out
[1]!=0x1e08) {
1667 log_err("unorm2_getNFKCInstance() did not return an NFKC instance (normalized length=%d; %s)\n",
1668 (int)length
, u_errorName(errorCode
));
1671 errorCode
=U_ZERO_ERROR
;
1672 n2
=unorm2_getNFKDInstance(&errorCode
);
1673 if(U_FAILURE(errorCode
)) {
1674 log_err_status(errorCode
, "unorm2_getNFKDInstance() failed: %s\n", u_errorName(errorCode
));
1677 length
=unorm2_normalize(n2
, in
, LENGTHOF(in
), out
, LENGTHOF(out
), &errorCode
);
1678 if(U_FAILURE(errorCode
) || length
!=4 || out
[0]!=0x20 || out
[1]!=0x43 || out
[2]!=0x327 || out
[3]!=0x301) {
1679 log_err("unorm2_getNFKDInstance() did not return an NFKD instance (normalized length=%d; %s)\n",
1680 (int)length
, u_errorName(errorCode
));
1683 errorCode
=U_ZERO_ERROR
;
1684 n2
=unorm2_getNFKCCasefoldInstance(&errorCode
);
1685 if(U_FAILURE(errorCode
)) {
1686 log_err_status(errorCode
, "unorm2_getNFKCCasefoldInstance() failed: %s\n", u_errorName(errorCode
));
1689 length
=unorm2_normalize(n2
, in
, LENGTHOF(in
), out
, LENGTHOF(out
), &errorCode
);
1690 if(U_FAILURE(errorCode
) || length
!=2 || out
[0]!=0x20 || out
[1]!=0x1e09) {
1691 log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefold instance (normalized length=%d; %s)\n",
1692 (int)length
, u_errorName(errorCode
));
1696 #endif /* #if !UCONFIG_NO_NORMALIZATION */