1 /********************************************************************
3 * Copyright (c) 1997-2016, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
10 * Modification History:
12 * Madhu Katragadda Ported for C API
13 * synwee added test for quick check
14 * synwee added test for checkFCD
15 *********************************************************************************/
16 /*tests for u_normalization*/
17 #include "unicode/utypes.h"
18 #include "unicode/unorm.h"
19 #include "unicode/utf16.h"
23 #if !UCONFIG_NO_NORMALIZATION
27 #include "unicode/uchar.h"
28 #include "unicode/ustring.h"
29 #include "unicode/unorm.h"
36 TestNormCoverage(void);
39 TestConcatenate(void);
42 TestNextPrevious(void);
44 static void TestIsNormalized(void);
47 TestFCNFKCClosure(void);
50 TestQuickCheckPerCP(void);
53 TestComposition(void);
59 TestGetDecomposition(void);
62 TestGetRawDecomposition(void);
64 static void TestAppendRestoreMiddle(void);
65 static void TestGetEasyToUseInstance(void);
67 static const char* const canonTests
[][3] = {
68 /* Input*/ /*Decomposed*/ /*Composed*/
69 { "cat", "cat", "cat" },
70 { "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark", },
72 { "\\u1e0a", "D\\u0307", "\\u1e0a" }, /* D-dot_above*/
73 { "D\\u0307", "D\\u0307", "\\u1e0a" }, /* D dot_above*/
75 { "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_below dot_above*/
76 { "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_above dot_below */
77 { "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D dot_below dot_above */
79 { "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307", "\\u1e10\\u0323\\u0307" }, /*D dot_below cedilla dot_above*/
80 { "D\\u0307\\u0328\\u0323", "D\\u0328\\u0323\\u0307", "\\u1e0c\\u0328\\u0307" }, /* D dot_above ogonek dot_below*/
82 { "\\u1E14", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron-grave*/
83 { "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron + grave*/
84 { "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" }, /* E-grave + macron*/
86 { "\\u212b", "A\\u030a", "\\u00c5" }, /* angstrom_sign*/
87 { "\\u00c5", "A\\u030a", "\\u00c5" }, /* A-ring*/
89 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
90 { "\\u00C4\\uFB03n", "A\\u0308\\uFB03n", "\\u00C4\\uFB03n" },
92 { "Henry IV", "Henry IV", "Henry IV" },
93 { "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" },
95 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
96 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
97 { "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" }, /* hw_ka + hw_ten*/
98 { "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" }, /* ka + hw_ten*/
99 { "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" }, /* hw_ka + ten*/
100 { "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" }, /* hw_ka + ten*/
104 static const char* const compatTests
[][3] = {
105 /* Input*/ /*Decomposed */ /*Composed*/
106 { "cat", "cat", "cat" },
108 { "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" }, /* Alef-Lamed vs. Alef, Lamed*/
110 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
111 { "\\u00C4\\uFB03n", "A\\u0308ffin", "\\u00C4ffin" }, /* ffi ligature -> f + f + i*/
113 { "Henry IV", "Henry IV", "Henry IV" },
114 { "Henry \\u2163", "Henry IV", "Henry IV" },
116 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
117 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
119 { "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + ten*/
121 /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
122 { "\\uFF76\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + hw_ten*/
123 { "\\u30AB\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* ka + hw_ten*/
127 static const char* const fcdTests
[][3] = {
128 /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */
129 { "\\u010e\\u0327", "D\\u0327\\u030c", NULL
}, /* D-caron + cedilla */
130 { "\\u010e", "\\u010e", NULL
} /* D-caron */
133 void addNormTest(TestNode
** root
);
135 void addNormTest(TestNode
** root
)
137 addTest(root
, &TestAPI
, "tsnorm/cnormtst/TestAPI");
138 addTest(root
, &TestDecomp
, "tsnorm/cnormtst/TestDecomp");
139 addTest(root
, &TestCompatDecomp
, "tsnorm/cnormtst/TestCompatDecomp");
140 addTest(root
, &TestCanonDecompCompose
, "tsnorm/cnormtst/TestCanonDecompCompose");
141 addTest(root
, &TestCompatDecompCompose
, "tsnorm/cnormtst/TestCompatDecompCompose");
142 addTest(root
, &TestFCD
, "tsnorm/cnormtst/TestFCD");
143 addTest(root
, &TestNull
, "tsnorm/cnormtst/TestNull");
144 addTest(root
, &TestQuickCheck
, "tsnorm/cnormtst/TestQuickCheck");
145 addTest(root
, &TestQuickCheckPerCP
, "tsnorm/cnormtst/TestQuickCheckPerCP");
146 addTest(root
, &TestIsNormalized
, "tsnorm/cnormtst/TestIsNormalized");
147 addTest(root
, &TestCheckFCD
, "tsnorm/cnormtst/TestCheckFCD");
148 addTest(root
, &TestNormCoverage
, "tsnorm/cnormtst/TestNormCoverage");
149 addTest(root
, &TestConcatenate
, "tsnorm/cnormtst/TestConcatenate");
150 addTest(root
, &TestNextPrevious
, "tsnorm/cnormtst/TestNextPrevious");
151 addTest(root
, &TestFCNFKCClosure
, "tsnorm/cnormtst/TestFCNFKCClosure");
152 addTest(root
, &TestComposition
, "tsnorm/cnormtst/TestComposition");
153 addTest(root
, &TestGetDecomposition
, "tsnorm/cnormtst/TestGetDecomposition");
154 addTest(root
, &TestGetRawDecomposition
, "tsnorm/cnormtst/TestGetRawDecomposition");
155 addTest(root
, &TestAppendRestoreMiddle
, "tsnorm/cnormtst/TestAppendRestoreMiddle");
156 addTest(root
, &TestGetEasyToUseInstance
, "tsnorm/cnormtst/TestGetEasyToUseInstance");
159 static const char* const modeStrings
[]={
169 static void TestNormCases(UNormalizationMode mode
,
170 const char* const cases
[][3], int32_t lengthOfCases
) {
171 int32_t x
, neededLen
, length2
;
172 int32_t expIndex
= (mode
==UNORM_NFC
|| mode
==UNORM_NFKC
) ? 2 : 1;
175 log_verbose("Testing unorm_normalize(%s)\n", modeStrings
[mode
]);
176 for(x
=0; x
< lengthOfCases
; x
++)
178 UErrorCode status
= U_ZERO_ERROR
, status2
= U_ZERO_ERROR
;
179 source
=CharsToUChars(cases
[x
][0]);
180 neededLen
= unorm_normalize(source
, u_strlen(source
), mode
, 0, NULL
, 0, &status
);
181 length2
= unorm_normalize(source
, -1, mode
, 0, NULL
, 0, &status2
);
182 if(neededLen
!=length2
) {
183 log_err("ERROR in unorm_normalize(%s)[%d]: "
184 "preflight length/NUL %d!=%d preflight length/srcLength\n",
185 modeStrings
[mode
], (int)x
, (int)neededLen
, (int)length2
);
187 if(status
==U_BUFFER_OVERFLOW_ERROR
)
191 length2
=unorm_normalize(source
, u_strlen(source
), mode
, 0, result
, UPRV_LENGTHOF(result
), &status
);
192 if(U_FAILURE(status
) || neededLen
!=length2
) {
193 log_data_err("ERROR in unorm_normalize(%s/NUL) at %s: %s - (Are you missing data?)\n",
194 modeStrings
[mode
], austrdup(source
), myErrorName(status
));
196 assertEqual(result
, cases
[x
][expIndex
], x
);
198 length2
=unorm_normalize(source
, -1, mode
, 0, result
, UPRV_LENGTHOF(result
), &status
);
199 if(U_FAILURE(status
) || neededLen
!=length2
) {
200 log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s: %s - (Are you missing data?)\n",
201 modeStrings
[mode
], austrdup(source
), myErrorName(status
));
203 assertEqual(result
, cases
[x
][expIndex
], x
);
210 TestNormCases(UNORM_NFD
, canonTests
, UPRV_LENGTHOF(canonTests
));
213 void TestCompatDecomp() {
214 TestNormCases(UNORM_NFKD
, compatTests
, UPRV_LENGTHOF(compatTests
));
217 void TestCanonDecompCompose() {
218 TestNormCases(UNORM_NFC
, canonTests
, UPRV_LENGTHOF(canonTests
));
221 void TestCompatDecompCompose() {
222 TestNormCases(UNORM_NFKC
, compatTests
, UPRV_LENGTHOF(compatTests
));
226 TestNormCases(UNORM_FCD
, fcdTests
, UPRV_LENGTHOF(fcdTests
));
229 static void assertEqual(const UChar
* result
, const char* expected
, int32_t index
)
231 UChar
*expectedUni
= CharsToUChars(expected
);
232 if(u_strcmp(result
, expectedUni
)!=0){
233 log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index
, expected
,
239 static void TestNull_check(UChar
*src
, int32_t srcLen
,
240 UChar
*exp
, int32_t expLen
,
241 UNormalizationMode mode
,
244 UErrorCode status
= U_ZERO_ERROR
;
250 status
= U_ZERO_ERROR
;
257 len
= unorm_normalize(src
, srcLen
, mode
, 0, result
, 50, &status
);
259 if(U_FAILURE(status
)) {
260 log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missing data?)\n", name
, u_errorName(status
));
261 } else if (len
!= expLen
) {
262 log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name
, expLen
, len
);
267 if(exp
[i
] != result
[i
]) {
268 log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n",
275 log_verbose(" %d: \\u%04X\n", i
, result
[i
]);
279 log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name
);
285 UChar source_comp
[] = { 0x0061, 0x0000, 0x0044, 0x0307 };
286 int32_t source_comp_len
= 4;
287 UChar expect_comp
[] = { 0x0061, 0x0000, 0x1e0a };
288 int32_t expect_comp_len
= 3;
290 UChar source_dcmp
[] = { 0x1e0A, 0x0000, 0x0929 };
291 int32_t source_dcmp_len
= 3;
292 UChar expect_dcmp
[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C };
293 int32_t expect_dcmp_len
= 5;
295 TestNull_check(source_comp
,
302 TestNull_check(source_dcmp
,
309 TestNull_check(source_comp
,
319 static void TestQuickCheckResultNO()
321 const UChar CPNFD
[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
322 0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
323 const UChar CPNFC
[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
324 0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
325 const UChar CPNFKD
[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
326 0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
327 const UChar CPNFKC
[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
328 0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
334 UErrorCode error
= U_ZERO_ERROR
;
336 for (; count
< SIZE
; count
++)
338 if (unorm_quickCheck(&(CPNFD
[count
]), 1, UNORM_NFD
, &error
) !=
341 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD
[count
]);
344 if (unorm_quickCheck(&(CPNFC
[count
]), 1, UNORM_NFC
, &error
) !=
347 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC
[count
]);
350 if (unorm_quickCheck(&(CPNFKD
[count
]), 1, UNORM_NFKD
, &error
) !=
353 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD
[count
]);
356 if (unorm_quickCheck(&(CPNFKC
[count
]), 1, UNORM_NFKC
, &error
) !=
359 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC
[count
]);
366 static void TestQuickCheckResultYES()
368 const UChar CPNFD
[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
369 0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
370 const UChar CPNFC
[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
371 0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
372 const UChar CPNFKD
[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
373 0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
374 const UChar CPNFKC
[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
375 0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
379 UErrorCode error
= U_ZERO_ERROR
;
384 if (unorm_quickCheck(&cp
, 1, UNORM_NFD
, &error
) != UNORM_YES
)
386 log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)\n", cp
);
389 if (unorm_quickCheck(&cp
, 1, UNORM_NFC
, &error
) !=
392 log_err("ERROR in NFC quick check at U+%04x\n", cp
);
395 if (unorm_quickCheck(&cp
, 1, UNORM_NFKD
, &error
) != UNORM_YES
)
397 log_data_err("ERROR in NFKD quick check at U+%04x\n", cp
);
400 if (unorm_quickCheck(&cp
, 1, UNORM_NFKC
, &error
) !=
403 log_err("ERROR in NFKC quick check at U+%04x\n", cp
);
409 for (; count
< SIZE
; count
++)
411 if (unorm_quickCheck(&(CPNFD
[count
]), 1, UNORM_NFD
, &error
) !=
414 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD
[count
]);
417 if (unorm_quickCheck(&(CPNFC
[count
]), 1, UNORM_NFC
, &error
)
420 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC
[count
]);
423 if (unorm_quickCheck(&(CPNFKD
[count
]), 1, UNORM_NFKD
, &error
) !=
426 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD
[count
]);
429 if (unorm_quickCheck(&(CPNFKC
[count
]), 1, UNORM_NFKC
, &error
) !=
432 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC
[count
]);
438 static void TestQuickCheckResultMAYBE()
440 const UChar CPNFC
[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
441 0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
442 const UChar CPNFKC
[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
443 0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
449 UErrorCode error
= U_ZERO_ERROR
;
451 /* NFD and NFKD does not have any MAYBE codepoints */
452 for (; count
< SIZE
; count
++)
454 if (unorm_quickCheck(&(CPNFC
[count
]), 1, UNORM_NFC
, &error
) !=
457 log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)\n", CPNFC
[count
]);
460 if (unorm_quickCheck(&(CPNFKC
[count
]), 1, UNORM_NFKC
, &error
) !=
463 log_data_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC
[count
]);
469 static void TestQuickCheckStringResult()
474 UErrorCode error
= U_ZERO_ERROR
;
476 for (count
= 0; count
< UPRV_LENGTHOF(canonTests
); count
++)
478 d
= CharsToUChars(canonTests
[count
][1]);
479 c
= CharsToUChars(canonTests
[count
][2]);
480 if (unorm_quickCheck(d
, u_strlen(d
), UNORM_NFD
, &error
) !=
483 log_data_err("ERROR in NFD quick check for string at count %d - (Are you missing data?)\n", count
);
487 if (unorm_quickCheck(c
, u_strlen(c
), UNORM_NFC
, &error
) ==
490 log_err("ERROR in NFC quick check for string at count %d\n", count
);
498 for (count
= 0; count
< UPRV_LENGTHOF(compatTests
); count
++)
500 d
= CharsToUChars(compatTests
[count
][1]);
501 c
= CharsToUChars(compatTests
[count
][2]);
502 if (unorm_quickCheck(d
, u_strlen(d
), UNORM_NFKD
, &error
) !=
505 log_data_err("ERROR in NFKD quick check for string at count %d\n", count
);
509 if (unorm_quickCheck(c
, u_strlen(c
), UNORM_NFKC
, &error
) !=
512 log_err("ERROR in NFKC quick check for string at count %d\n", count
);
521 void TestQuickCheck()
523 TestQuickCheckResultNO();
524 TestQuickCheckResultYES();
525 TestQuickCheckResultMAYBE();
526 TestQuickCheckStringResult();
530 * The intltest/NormalizerConformanceTest tests a lot of strings that _are_
531 * normalized, and some that are not.
532 * Here we pick some specific cases and test the C API.
534 static void TestIsNormalized(void) {
535 static const UChar notNFC
[][8]={ /* strings that are not in NFC */
536 { 0x62, 0x61, 0x300, 0x63, 0 }, /* 0061 0300 compose */
537 { 0xfb1d, 0 }, /* excluded from composition */
538 { 0x0627, 0x0653, 0 }, /* 0627 0653 compose */
539 { 0x3071, 0x306f, 0x309a, 0x3073, 0 } /* 306F 309A compose */
541 static const UChar notNFKC
[][8]={ /* strings that are not in NFKC */
542 { 0x1100, 0x1161, 0 }, /* Jamo compose */
543 { 0x1100, 0x314f, 0 }, /* compatibility Jamo compose */
544 { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 } /* 1F00 0345 compose */
548 UErrorCode errorCode
;
552 /* normal case with length>=0 (length -1 used for special cases below) */
553 errorCode
=U_ZERO_ERROR
;
554 if(!unorm_isNormalized(notNFC
[0]+2, 1, UNORM_NFC
, &errorCode
) || U_FAILURE(errorCode
)) {
555 log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
558 /* incoming U_FAILURE */
559 errorCode
=U_TRUNCATED_CHAR_FOUND
;
560 (void)unorm_isNormalized(notNFC
[0]+2, 1, UNORM_NFC
, &errorCode
);
561 if(errorCode
!=U_TRUNCATED_CHAR_FOUND
) {
562 log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode
));
566 errorCode
=U_ZERO_ERROR
;
567 (void)unorm_isNormalized(NULL
, 1, UNORM_NFC
, &errorCode
);
568 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
569 log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode
));
573 errorCode
=U_ZERO_ERROR
;
574 (void)unorm_isNormalized(notNFC
[0]+2, -2, UNORM_NFC
, &errorCode
);
575 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
576 log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode
));
580 for(i
=0; i
<UPRV_LENGTHOF(notNFC
); ++i
) {
581 errorCode
=U_ZERO_ERROR
;
582 if(unorm_isNormalized(notNFC
[i
], -1, UNORM_NFC
, &errorCode
) || U_FAILURE(errorCode
)) {
583 log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (Are you missing data?)\n", i
, u_errorName(errorCode
));
585 errorCode
=U_ZERO_ERROR
;
586 if(unorm_isNormalized(notNFC
[i
], -1, UNORM_NFKC
, &errorCode
) || U_FAILURE(errorCode
)) {
587 log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i
, u_errorName(errorCode
));
590 for(i
=0; i
<UPRV_LENGTHOF(notNFKC
); ++i
) {
591 errorCode
=U_ZERO_ERROR
;
592 if(unorm_isNormalized(notNFKC
[i
], -1, UNORM_NFKC
, &errorCode
) || U_FAILURE(errorCode
)) {
593 log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i
, u_errorName(errorCode
));
600 UErrorCode status
= U_ZERO_ERROR
;
601 static const UChar FAST_
[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
603 static const UChar FALSE_
[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
604 0x02B9, 0x0314, 0x0315, 0x0316};
605 static const UChar TRUE_
[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
606 0x0050, 0x0730, 0x09EE, 0x1E10};
608 static const UChar datastr
[][5] =
609 { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
610 {0x0061, 0x030A, 0x00E2, 0x0323, 0},
611 {0x0061, 0x0323, 0x00E2, 0x0323, 0},
612 {0x0061, 0x0323, 0x1E05, 0x0302, 0} };
613 static const UBool result
[] = {UNORM_YES
, UNORM_NO
, UNORM_NO
, UNORM_YES
};
615 static const UChar datachar
[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
617 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
619 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
620 0x0307, 0x0308, 0x0309, 0x030a,
621 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
622 0x0327, 0x0328, 0x0329, 0x032a,
623 0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
624 0x1e07, 0x1e08, 0x1e09, 0x1e0a};
628 if (unorm_quickCheck(FAST_
, 10, UNORM_FCD
, &status
) != UNORM_YES
)
629 log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
630 if (unorm_quickCheck(FALSE_
, 10, UNORM_FCD
, &status
) != UNORM_NO
)
631 log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n");
632 if (unorm_quickCheck(TRUE_
, 10, UNORM_FCD
, &status
) != UNORM_YES
)
633 log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
635 if (U_FAILURE(status
))
636 log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n", u_errorName(status
));
640 UBool fcdresult
= unorm_quickCheck(datastr
[count
], 4, UNORM_FCD
, &status
);
641 if (U_FAILURE(status
)) {
642 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data set %d - (Are you missing data?)\n", count
);
646 if (result
[count
] != fcdresult
) {
647 log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count
,
654 /* random checks of long strings */
655 status
= U_ZERO_ERROR
;
656 srand((unsigned)time( NULL
));
658 for (count
= 0; count
< 50; count
++)
661 UBool testresult
= UNORM_YES
;
669 data
[size
] = datachar
[(rand() * 50) / RAND_MAX
];
670 log_verbose("0x%x", data
[size
]);
671 normsize
+= unorm_normalize(data
+ size
, 1, UNORM_NFD
, 0,
672 norm
+ normsize
, 100 - normsize
, &status
);
673 if (U_FAILURE(status
)) {
674 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data generation - (Are you missing data?)\n");
681 nfdsize
= unorm_normalize(data
, size
, UNORM_NFD
, 0,
683 if (U_FAILURE(status
)) {
684 log_data_err("unorm_quickCheck(FCD) failed: exception occured at normalized data generation - (Are you missing data?)\n");
687 if (nfdsize
!= normsize
|| u_memcmp(nfd
, norm
, nfdsize
) != 0) {
688 testresult
= UNORM_NO
;
690 if (testresult
== UNORM_YES
) {
691 log_verbose("result UNORM_YES\n");
694 log_verbose("result UNORM_NO\n");
697 if (unorm_quickCheck(data
, size
, UNORM_FCD
, &status
) != testresult
|| U_FAILURE(status
)) {
698 log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data - (Are you missing data?)\n", testresult
);
705 static const UChar in
[]={ 0x68, 0xe4 };
706 UChar out
[20]={ 0xffff, 0xffff, 0xffff, 0xffff };
707 UErrorCode errorCode
;
710 /* try preflighting */
711 errorCode
=U_ZERO_ERROR
;
712 length
=unorm_normalize(in
, 2, UNORM_NFD
, 0, NULL
, 0, &errorCode
);
713 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=3) {
714 log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
718 errorCode
=U_ZERO_ERROR
;
719 length
=unorm_normalize(in
, 2, UNORM_NFD
, 0, out
, 3, &errorCode
);
720 if(U_FAILURE(errorCode
)) {
721 log_err("unorm_normalize(NFD)=%ld failed with %s\n", length
, u_errorName(errorCode
));
724 if(length
!=3 || out
[2]!=0x308 || out
[3]!=0xffff) {
725 log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length
, out
[0], out
[1], out
[2], out
[3]);
728 length
=unorm_normalize(NULL
, 0, UNORM_NFC
, 0, NULL
, 0, &errorCode
);
729 if(U_FAILURE(errorCode
)) {
730 log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with %s\n", (long)length
, u_errorName(errorCode
));
733 length
=unorm_normalize(NULL
, 0, UNORM_NFC
, 0, out
, 20, &errorCode
);
734 if(U_FAILURE(errorCode
)) {
735 log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with %s\n", (long)length
, u_errorName(errorCode
));
740 /* test cases to improve test code coverage */
742 HANGUL_K_KIYEOK
=0x3131, /* NFKD->Jamo L U+1100 */
743 HANGUL_K_WEO
=0x315d, /* NFKD->Jamo V U+116f */
744 HANGUL_K_KIYEOK_SIOS
=0x3133, /* NFKD->Jamo T U+11aa */
746 HANGUL_KIYEOK
=0x1100, /* Jamo L U+1100 */
747 HANGUL_WEO
=0x116f, /* Jamo V U+116f */
748 HANGUL_KIYEOK_SIOS
=0x11aa, /* Jamo T U+11aa */
750 HANGUL_AC00
=0xac00, /* Hangul syllable = Jamo LV U+ac00 */
751 HANGUL_SYLLABLE
=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */
753 MUSICAL_VOID_NOTEHEAD
=0x1d157,
754 MUSICAL_HALF_NOTE
=0x1d15e, /* NFC/NFD->Notehead+Stem */
755 MUSICAL_STEM
=0x1d165, /* cc=216 */
756 MUSICAL_STACCATO
=0x1d17c /* cc=220 */
761 UChar input
[1000], expect
[1000], output
[1000];
762 UErrorCode errorCode
;
763 int32_t i
, length
, inLength
, expectLength
, hangulPrefixLength
, preflightLength
;
765 /* create a long and nasty string with NFKC-unsafe characters */
768 /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */
769 input
[inLength
++]=HANGUL_KIYEOK
;
770 input
[inLength
++]=HANGUL_WEO
;
771 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
773 input
[inLength
++]=HANGUL_KIYEOK
;
774 input
[inLength
++]=HANGUL_WEO
;
775 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
777 input
[inLength
++]=HANGUL_KIYEOK
;
778 input
[inLength
++]=HANGUL_K_WEO
;
779 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
781 input
[inLength
++]=HANGUL_KIYEOK
;
782 input
[inLength
++]=HANGUL_K_WEO
;
783 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
785 input
[inLength
++]=HANGUL_K_KIYEOK
;
786 input
[inLength
++]=HANGUL_WEO
;
787 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
789 input
[inLength
++]=HANGUL_K_KIYEOK
;
790 input
[inLength
++]=HANGUL_WEO
;
791 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
793 input
[inLength
++]=HANGUL_K_KIYEOK
;
794 input
[inLength
++]=HANGUL_K_WEO
;
795 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
797 input
[inLength
++]=HANGUL_K_KIYEOK
;
798 input
[inLength
++]=HANGUL_K_WEO
;
799 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
801 /* Hangul LV with normal/compatibility Jamo T */
802 input
[inLength
++]=HANGUL_AC00
;
803 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
805 input
[inLength
++]=HANGUL_AC00
;
806 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
808 /* compatibility Jamo L, V */
809 input
[inLength
++]=HANGUL_K_KIYEOK
;
810 input
[inLength
++]=HANGUL_K_WEO
;
812 hangulPrefixLength
=inLength
;
814 input
[inLength
++]=U16_LEAD(MUSICAL_HALF_NOTE
);
815 input
[inLength
++]=U16_TRAIL(MUSICAL_HALF_NOTE
);
816 for(i
=0; i
<200; ++i
) {
817 input
[inLength
++]=U16_LEAD(MUSICAL_STACCATO
);
818 input
[inLength
++]=U16_TRAIL(MUSICAL_STACCATO
);
819 input
[inLength
++]=U16_LEAD(MUSICAL_STEM
);
820 input
[inLength
++]=U16_TRAIL(MUSICAL_STEM
);
823 /* (compatibility) Jamo L, T do not compose */
824 input
[inLength
++]=HANGUL_K_KIYEOK
;
825 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
828 errorCode
=U_ZERO_ERROR
;
829 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_NFD
, &errorCode
) || U_FAILURE(errorCode
)) {
830 log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
832 errorCode
=U_ZERO_ERROR
;
833 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_NFKD
, &errorCode
) || U_FAILURE(errorCode
)) {
834 log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
836 errorCode
=U_ZERO_ERROR
;
837 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_NFC
, &errorCode
) || U_FAILURE(errorCode
)) {
838 log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
840 errorCode
=U_ZERO_ERROR
;
841 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_NFKC
, &errorCode
) || U_FAILURE(errorCode
)) {
842 log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
844 errorCode
=U_ZERO_ERROR
;
845 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_FCD
, &errorCode
) || U_FAILURE(errorCode
)) {
846 log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
851 expect
[expectLength
++]=HANGUL_SYLLABLE
;
853 expect
[expectLength
++]=HANGUL_SYLLABLE
;
855 expect
[expectLength
++]=HANGUL_SYLLABLE
;
857 expect
[expectLength
++]=HANGUL_SYLLABLE
;
859 expect
[expectLength
++]=HANGUL_SYLLABLE
;
861 expect
[expectLength
++]=HANGUL_SYLLABLE
;
863 expect
[expectLength
++]=HANGUL_SYLLABLE
;
865 expect
[expectLength
++]=HANGUL_SYLLABLE
;
867 expect
[expectLength
++]=HANGUL_AC00
+3;
869 expect
[expectLength
++]=HANGUL_AC00
+3;
871 expect
[expectLength
++]=HANGUL_AC00
+14*28;
873 expect
[expectLength
++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD
);
874 expect
[expectLength
++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD
);
875 expect
[expectLength
++]=U16_LEAD(MUSICAL_STEM
);
876 expect
[expectLength
++]=U16_TRAIL(MUSICAL_STEM
);
877 for(i
=0; i
<200; ++i
) {
878 expect
[expectLength
++]=U16_LEAD(MUSICAL_STEM
);
879 expect
[expectLength
++]=U16_TRAIL(MUSICAL_STEM
);
881 for(i
=0; i
<200; ++i
) {
882 expect
[expectLength
++]=U16_LEAD(MUSICAL_STACCATO
);
883 expect
[expectLength
++]=U16_TRAIL(MUSICAL_STACCATO
);
886 expect
[expectLength
++]=HANGUL_KIYEOK
;
887 expect
[expectLength
++]=HANGUL_KIYEOK_SIOS
;
889 /* try destination overflow first */
890 errorCode
=U_ZERO_ERROR
;
891 preflightLength
=unorm_normalize(input
, inLength
,
893 output
, 100, /* too short */
895 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
) {
896 log_data_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCode
));
900 errorCode
=U_ZERO_ERROR
;
901 length
=unorm_normalize(input
, inLength
,
903 output
, UPRV_LENGTHOF(output
),
905 if(U_FAILURE(errorCode
)) {
906 log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s - (Are you missing data?)\n", u_errorName(errorCode
));
907 } else if(length
!=expectLength
|| u_memcmp(output
, expect
, length
)!=0) {
908 log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n");
909 for(i
=0; i
<length
; ++i
) {
910 if(output
[i
]!=expect
[i
]) {
911 log_err(" NFKC[%d]==U+%04lx expected U+%04lx\n", i
, output
[i
], expect
[i
]);
916 if(length
!=preflightLength
) {
917 log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length
, preflightLength
);
921 u_memcpy(expect
, input
, hangulPrefixLength
);
922 expectLength
=hangulPrefixLength
;
924 expect
[expectLength
++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD
);
925 expect
[expectLength
++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD
);
926 expect
[expectLength
++]=U16_LEAD(MUSICAL_STEM
);
927 expect
[expectLength
++]=U16_TRAIL(MUSICAL_STEM
);
928 for(i
=0; i
<200; ++i
) {
929 expect
[expectLength
++]=U16_LEAD(MUSICAL_STEM
);
930 expect
[expectLength
++]=U16_TRAIL(MUSICAL_STEM
);
932 for(i
=0; i
<200; ++i
) {
933 expect
[expectLength
++]=U16_LEAD(MUSICAL_STACCATO
);
934 expect
[expectLength
++]=U16_TRAIL(MUSICAL_STACCATO
);
937 expect
[expectLength
++]=HANGUL_K_KIYEOK
;
938 expect
[expectLength
++]=HANGUL_K_KIYEOK_SIOS
;
940 errorCode
=U_ZERO_ERROR
;
941 length
=unorm_normalize(input
, inLength
,
943 output
, UPRV_LENGTHOF(output
),
945 if(U_FAILURE(errorCode
)) {
946 log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %s - (Are you missing data?)\n", u_errorName(errorCode
));
947 } else if(length
!=expectLength
|| u_memcmp(output
, expect
, length
)!=0) {
948 log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n");
949 for(i
=0; i
<length
; ++i
) {
950 if(output
[i
]!=expect
[i
]) {
951 log_err(" FCD[%d]==U+%04lx expected U+%04lx\n", i
, output
[i
], expect
[i
]);
958 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */
960 TestConcatenate(void) {
967 0x301, 0x73, 0x75, 0x6d, 0xe9, 0
970 0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0
974 UErrorCode errorCode
;
977 /* left with length, right NUL-terminated */
978 errorCode
=U_ZERO_ERROR
;
979 length
=unorm_concatenate(left
, 2, right
, -1, buffer
, 100, UNORM_NFC
, 0, &errorCode
);
980 if(U_FAILURE(errorCode
) || length
!=6 || 0!=u_memcmp(buffer
, expect
, length
)) {
981 log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
985 errorCode
=U_ZERO_ERROR
;
986 length
=unorm_concatenate(left
, 2, right
, -1, NULL
, 0, UNORM_NFC
, 0, &errorCode
);
987 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=6) {
988 log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
992 errorCode
=U_ZERO_ERROR
;
993 length
=unorm_concatenate(left
, 2, right
, -1, buffer
, 1, UNORM_NFC
, 0, &errorCode
);
994 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=6 || buffer
[2]!=0x5555) {
995 log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
998 /* enter with U_FAILURE */
1000 errorCode
=U_UNEXPECTED_TOKEN
;
1001 length
=unorm_concatenate(left
, 2, right
, -1, buffer
, 100, UNORM_NFC
, 0, &errorCode
);
1002 if(errorCode
!=U_UNEXPECTED_TOKEN
|| buffer
[2]!=0xaaaa) {
1003 log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length
, u_errorName(errorCode
));
1006 /* illegal arguments */
1008 errorCode
=U_ZERO_ERROR
;
1009 length
=unorm_concatenate(NULL
, 2, right
, -1, buffer
, 100, UNORM_NFC
, 0, &errorCode
);
1010 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
|| buffer
[2]!=0xaaaa) {
1011 log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
1014 errorCode
=U_ZERO_ERROR
;
1015 length
=unorm_concatenate(left
, 2, right
, -1, NULL
, 100, UNORM_NFC
, 0, &errorCode
);
1016 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1017 log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
1025 static const char *const _modeString
[UNORM_MODE_COUNT
]={
1026 "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD"
1030 _testIter(const UChar
*src
, int32_t srcLength
,
1031 UCharIterator
*iter
, UNormalizationMode mode
, UBool forward
,
1032 const UChar
*out
, int32_t outLength
,
1033 const int32_t *srcIndexes
, int32_t srcIndexesLength
) {
1035 const UChar
*expect
, *outLimit
, *in
;
1036 int32_t length
, i
, expectLength
, expectIndex
, prevIndex
, index
, inLength
;
1037 UErrorCode errorCode
;
1038 UBool neededToNormalize
, expectNeeded
;
1040 errorCode
=U_ZERO_ERROR
;
1041 outLimit
=out
+outLength
;
1047 i
=srcIndexesLength
-2;
1054 if(!iter
->hasNext(iter
)) {
1057 length
=unorm_next(iter
,
1058 buffer
, UPRV_LENGTHOF(buffer
),
1060 (UBool
)(out
!=NULL
), &neededToNormalize
,
1062 expectIndex
=srcIndexes
[i
+1];
1064 inLength
=expectIndex
-prevIndex
;
1067 /* get output piece from between plus signs */
1069 while((expect
+expectLength
)!=outLimit
&& expect
[expectLength
]!=_PLUS
) {
1072 expectNeeded
=(UBool
)(0!=u_memcmp(buffer
, in
, inLength
));
1075 expectLength
=inLength
;
1079 if(!iter
->hasPrevious(iter
)) {
1082 length
=unorm_previous(iter
,
1083 buffer
, UPRV_LENGTHOF(buffer
),
1085 (UBool
)(out
!=NULL
), &neededToNormalize
,
1087 expectIndex
=srcIndexes
[i
];
1089 inLength
=prevIndex
-expectIndex
;
1092 /* get output piece from between plus signs */
1094 while(expect
!=out
&& expect
[-1]!=_PLUS
) {
1098 expectNeeded
=(UBool
)(0!=u_memcmp(buffer
, in
, inLength
));
1101 expectLength
=inLength
;
1105 index
=iter
->getIndex(iter
, UITER_CURRENT
);
1107 if(U_FAILURE(errorCode
)) {
1108 log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s - (Are you missing data?)\n",
1109 forward
, _modeString
[mode
], i
, u_errorName(errorCode
));
1112 if(expectIndex
!=index
) {
1113 log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n",
1114 forward
, _modeString
[mode
], i
, index
, expectIndex
);
1117 if(expectLength
!=length
) {
1118 log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n",
1119 forward
, _modeString
[mode
], i
, length
, expectLength
);
1122 if(0!=u_memcmp(expect
, buffer
, length
)) {
1123 log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n",
1124 forward
, _modeString
[mode
], i
);
1127 if(neededToNormalize
!=expectNeeded
) {
1131 expect
+=expectLength
+1; /* go after the + */
1134 --expect
; /* go before the + */
1141 TestNextPrevious() {
1143 src
[]={ /* input string */
1144 0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133
1146 nfd
[]={ /* + separates expected output pieces */
1147 0xa0, _PLUS
, 0x61, 0x308, _PLUS
, 0x63, 0x327, 0x302, _PLUS
, 0x1100, 0x1161, _PLUS
, 0x3133
1150 0x20, _PLUS
, 0x61, 0x308, _PLUS
, 0x63, 0x327, 0x302, _PLUS
, 0x1100, 0x1161, _PLUS
, 0x11aa
1153 0xa0, _PLUS
, 0xe4, _PLUS
, 0xe7, 0x302, _PLUS
, 0xac00, _PLUS
, 0x3133
1156 0x20, _PLUS
, 0xe4, _PLUS
, 0xe7, 0x302, _PLUS
, 0xac03
1159 0xa0, _PLUS
, 0xe4, _PLUS
, 0x63, 0x327, 0x302, _PLUS
, 0xac00, _PLUS
, 0x3133
1162 /* expected iterator indexes in the source string for each iteration piece */
1163 static const int32_t
1185 UBool neededToNormalize
;
1186 UErrorCode errorCode
;
1188 uiter_setString(&iter
, src
, UPRV_LENGTHOF(src
));
1190 /* test iteration with doNormalize */
1192 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFD
, TRUE
, nfd
, UPRV_LENGTHOF(nfd
), nfdIndexes
, sizeof(nfdIndexes
)/4);
1194 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFKD
, TRUE
, nfkd
, UPRV_LENGTHOF(nfkd
), nfkdIndexes
, sizeof(nfkdIndexes
)/4);
1196 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFC
, TRUE
, nfc
, UPRV_LENGTHOF(nfc
), nfcIndexes
, sizeof(nfcIndexes
)/4);
1198 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFKC
, TRUE
, nfkc
, UPRV_LENGTHOF(nfkc
), nfkcIndexes
, sizeof(nfkcIndexes
)/4);
1200 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_FCD
, TRUE
, fcd
, UPRV_LENGTHOF(fcd
), fcdIndexes
, sizeof(fcdIndexes
)/4);
1202 iter
.index
=iter
.length
;
1203 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFD
, FALSE
, nfd
, UPRV_LENGTHOF(nfd
), nfdIndexes
, sizeof(nfdIndexes
)/4);
1204 iter
.index
=iter
.length
;
1205 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFKD
, FALSE
, nfkd
, UPRV_LENGTHOF(nfkd
), nfkdIndexes
, sizeof(nfkdIndexes
)/4);
1206 iter
.index
=iter
.length
;
1207 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFC
, FALSE
, nfc
, UPRV_LENGTHOF(nfc
), nfcIndexes
, sizeof(nfcIndexes
)/4);
1208 iter
.index
=iter
.length
;
1209 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFKC
, FALSE
, nfkc
, UPRV_LENGTHOF(nfkc
), nfkcIndexes
, sizeof(nfkcIndexes
)/4);
1210 iter
.index
=iter
.length
;
1211 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_FCD
, FALSE
, fcd
, UPRV_LENGTHOF(fcd
), fcdIndexes
, sizeof(fcdIndexes
)/4);
1213 /* test iteration without doNormalize */
1215 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFD
, TRUE
, NULL
, 0, nfdIndexes
, sizeof(nfdIndexes
)/4);
1217 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFKD
, TRUE
, NULL
, 0, nfkdIndexes
, sizeof(nfkdIndexes
)/4);
1219 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFC
, TRUE
, NULL
, 0, nfcIndexes
, sizeof(nfcIndexes
)/4);
1221 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFKC
, TRUE
, NULL
, 0, nfkcIndexes
, sizeof(nfkcIndexes
)/4);
1223 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_FCD
, TRUE
, NULL
, 0, fcdIndexes
, sizeof(fcdIndexes
)/4);
1225 iter
.index
=iter
.length
;
1226 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFD
, FALSE
, NULL
, 0, nfdIndexes
, sizeof(nfdIndexes
)/4);
1227 iter
.index
=iter
.length
;
1228 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFKD
, FALSE
, NULL
, 0, nfkdIndexes
, sizeof(nfkdIndexes
)/4);
1229 iter
.index
=iter
.length
;
1230 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFC
, FALSE
, NULL
, 0, nfcIndexes
, sizeof(nfcIndexes
)/4);
1231 iter
.index
=iter
.length
;
1232 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFKC
, FALSE
, NULL
, 0, nfkcIndexes
, sizeof(nfkcIndexes
)/4);
1233 iter
.index
=iter
.length
;
1234 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_FCD
, FALSE
, NULL
, 0, fcdIndexes
, sizeof(fcdIndexes
)/4);
1236 /* try without neededToNormalize */
1237 errorCode
=U_ZERO_ERROR
;
1240 length
=unorm_next(&iter
, buffer
, UPRV_LENGTHOF(buffer
),
1241 UNORM_NFD
, 0, TRUE
, NULL
,
1243 if(U_FAILURE(errorCode
) || length
!=2 || buffer
[0]!=nfd
[2] || buffer
[1]!=nfd
[3]) {
1244 log_data_err("error unorm_next(without needed) %s - (Are you missing data?)\n", u_errorName(errorCode
));
1249 neededToNormalize
=9;
1251 length
=unorm_next(&iter
, NULL
, 0,
1252 UNORM_NFD
, 0, TRUE
, &neededToNormalize
,
1254 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| neededToNormalize
!=FALSE
|| length
!=2) {
1255 log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode
));
1259 errorCode
=U_ZERO_ERROR
;
1260 buffer
[0]=buffer
[1]=5;
1261 neededToNormalize
=9;
1263 length
=unorm_next(&iter
, buffer
, 1,
1264 UNORM_NFD
, 0, TRUE
, &neededToNormalize
,
1266 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| neededToNormalize
!=FALSE
|| length
!=2 || buffer
[1]!=5) {
1267 log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode
));
1272 errorCode
=U_ZERO_ERROR
;
1273 buffer
[0]=buffer
[1]=5;
1274 neededToNormalize
=9;
1276 length
=unorm_next(NULL
, buffer
, UPRV_LENGTHOF(buffer
),
1277 UNORM_NFD
, 0, TRUE
, &neededToNormalize
,
1279 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1280 log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode
));
1285 buffer
[0]=buffer
[1]=5;
1286 neededToNormalize
=9;
1288 length
=unorm_next(&iter
, buffer
, UPRV_LENGTHOF(buffer
),
1289 (UNormalizationMode
)0, 0, TRUE
, &neededToNormalize
,
1291 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1292 log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode
));
1296 /* error coming in */
1297 errorCode
=U_MISPLACED_QUANTIFIER
;
1300 length
=unorm_next(&iter
, buffer
, UPRV_LENGTHOF(buffer
),
1301 UNORM_NFD
, 0, TRUE
, NULL
,
1303 if(errorCode
!=U_MISPLACED_QUANTIFIER
) {
1304 log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode
));
1310 TestFCNFKCClosure(void) {
1311 static const struct {
1317 { 0x037A, { 0x0020, 0x03B9, 0 } },
1318 { 0x03D2, { 0x03C5, 0 } },
1319 { 0x20A8, { 0x0072, 0x0073, 0 } },
1320 { 0x210B, { 0x0068, 0 } },
1321 { 0x210C, { 0x0068, 0 } },
1322 { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } },
1323 { 0x2122, { 0x0074, 0x006D, 0 } },
1324 { 0x2128, { 0x007A, 0 } },
1325 { 0x1D5DB, { 0x0068, 0 } },
1326 { 0x1D5ED, { 0x007A, 0 } },
1331 UErrorCode errorCode
;
1334 for(i
=0; i
<UPRV_LENGTHOF(tests
); ++i
) {
1335 errorCode
=U_ZERO_ERROR
;
1336 length
=u_getFC_NFKC_Closure(tests
[i
].c
, buffer
, UPRV_LENGTHOF(buffer
), &errorCode
);
1337 if(U_FAILURE(errorCode
) || length
!=u_strlen(buffer
) || 0!=u_strcmp(tests
[i
].s
, buffer
)) {
1338 log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you missing data?)\n", tests
[i
].c
, u_errorName(errorCode
));
1342 /* error handling */
1343 errorCode
=U_ZERO_ERROR
;
1344 length
=u_getFC_NFKC_Closure(0x5c, NULL
, UPRV_LENGTHOF(buffer
), &errorCode
);
1345 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1346 log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode
));
1349 length
=u_getFC_NFKC_Closure(0x5c, buffer
, UPRV_LENGTHOF(buffer
), &errorCode
);
1350 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1351 log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode
));
1356 TestQuickCheckPerCP() {
1357 UErrorCode errorCode
;
1358 UChar32 c
, lead
, trail
;
1359 UChar s
[U16_MAX_LENGTH
], nfd
[16];
1360 int32_t length
, lccc1
, lccc2
, tccc1
, tccc2
;
1364 u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK
)!=(int32_t)UNORM_YES
||
1365 u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK
)!=(int32_t)UNORM_YES
||
1366 u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK
)!=(int32_t)UNORM_MAYBE
||
1367 u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK
)!=(int32_t)UNORM_MAYBE
||
1368 u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS
)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS
) ||
1369 u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS
)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS
)
1371 log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n");
1375 * compare the quick check property values for some code points
1376 * to the quick check results for checking same-code point strings
1378 errorCode
=U_ZERO_ERROR
;
1382 U16_APPEND_UNSAFE(s
, length
, c
);
1384 qc1
=u_getIntPropertyValue(c
, UCHAR_NFC_QUICK_CHECK
);
1385 qc2
=unorm_quickCheck(s
, length
, UNORM_NFC
, &errorCode
);
1387 log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x - (Are you missing data?)\n", qc1
, qc2
, c
);
1390 qc1
=u_getIntPropertyValue(c
, UCHAR_NFD_QUICK_CHECK
);
1391 qc2
=unorm_quickCheck(s
, length
, UNORM_NFD
, &errorCode
);
1393 log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x - (Are you missing data?)\n", qc1
, qc2
, c
);
1396 qc1
=u_getIntPropertyValue(c
, UCHAR_NFKC_QUICK_CHECK
);
1397 qc2
=unorm_quickCheck(s
, length
, UNORM_NFKC
, &errorCode
);
1399 log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x - (Are you missing data?)\n", qc1
, qc2
, c
);
1402 qc1
=u_getIntPropertyValue(c
, UCHAR_NFKD_QUICK_CHECK
);
1403 qc2
=unorm_quickCheck(s
, length
, UNORM_NFKD
, &errorCode
);
1405 log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x - (Are you missing data?)\n", qc1
, qc2
, c
);
1408 length
=unorm_normalize(s
, length
, UNORM_NFD
, 0, nfd
, UPRV_LENGTHOF(nfd
), &errorCode
);
1409 /* length-length == 0 is used to get around a compiler warning. */
1410 U16_GET(nfd
, 0, length
-length
, length
, lead
);
1411 U16_GET(nfd
, 0, length
-1, length
, trail
);
1413 lccc1
=u_getIntPropertyValue(c
, UCHAR_LEAD_CANONICAL_COMBINING_CLASS
);
1414 lccc2
=u_getCombiningClass(lead
);
1415 tccc1
=u_getIntPropertyValue(c
, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS
);
1416 tccc2
=u_getCombiningClass(trail
);
1419 log_data_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
1423 log_data_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
1427 /* skip some code points */
1433 TestComposition(void) {
1434 static const struct {
1435 UNormalizationMode mode
;
1441 * special cases for UAX #15 bug
1442 * see Unicode Corrigendum #5: Normalization Idempotency
1443 * at http://unicode.org/versions/corrigendum5.html
1444 * (was Public Review Issue #29)
1446 { UNORM_NFC
, 0, { 0x1100, 0x0300, 0x1161, 0x0327 }, { 0x1100, 0x0300, 0x1161, 0x0327 } },
1447 { UNORM_NFC
, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } },
1448 { UNORM_NFC
, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 }, { 0xac00, 0x0327, 0x0300, 0x11a8 } },
1449 { UNORM_NFC
, 0, { 0x0b47, 0x0300, 0x0b3e }, { 0x0b47, 0x0300, 0x0b3e } },
1451 /* TODO: add test cases for UNORM_FCC here (j2151) */
1455 UErrorCode errorCode
;
1458 for(i
=0; i
<UPRV_LENGTHOF(cases
); ++i
) {
1459 errorCode
=U_ZERO_ERROR
;
1460 length
=unorm_normalize(
1462 cases
[i
].mode
, cases
[i
].options
,
1463 output
, UPRV_LENGTHOF(output
),
1465 if( U_FAILURE(errorCode
) ||
1466 length
!=u_strlen(cases
[i
].expect
) ||
1467 0!=u_memcmp(output
, cases
[i
].expect
, length
)
1469 log_data_err("unexpected result for case %d - (Are you missing data?)\n", i
);
1475 TestGetDecomposition() {
1479 UErrorCode errorCode
=U_ZERO_ERROR
;
1480 const UNormalizer2
*n2
=unorm2_getInstance(NULL
, "nfc", UNORM2_COMPOSE_CONTIGUOUS
, &errorCode
);
1481 if(U_FAILURE(errorCode
)) {
1482 log_err_status(errorCode
, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_errorName(errorCode
));
1486 length
=unorm2_getDecomposition(n2
, 0x20, decomp
, UPRV_LENGTHOF(decomp
), &errorCode
);
1487 if(U_FAILURE(errorCode
) || length
>=0) {
1488 log_err("unorm2_getDecomposition(fcc, space) failed\n");
1490 errorCode
=U_ZERO_ERROR
;
1491 length
=unorm2_getDecomposition(n2
, 0xe4, decomp
, UPRV_LENGTHOF(decomp
), &errorCode
);
1492 if(U_FAILURE(errorCode
) || length
!=2 || decomp
[0]!=0x61 || decomp
[1]!=0x308 || decomp
[2]!=0) {
1493 log_err("unorm2_getDecomposition(fcc, a-umlaut) failed\n");
1495 errorCode
=U_ZERO_ERROR
;
1496 length
=unorm2_getDecomposition(n2
, 0xac01, decomp
, UPRV_LENGTHOF(decomp
), &errorCode
);
1497 if(U_FAILURE(errorCode
) || length
!=3 || decomp
[0]!=0x1100 || decomp
[1]!=0x1161 || decomp
[2]!=0x11a8 || decomp
[3]!=0) {
1498 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) failed\n");
1500 errorCode
=U_ZERO_ERROR
;
1501 length
=unorm2_getDecomposition(n2
, 0xac01, NULL
, 0, &errorCode
);
1502 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=3) {
1503 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) overflow failed\n");
1505 errorCode
=U_ZERO_ERROR
;
1506 length
=unorm2_getDecomposition(n2
, 0xac01, decomp
, -1, &errorCode
);
1507 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1508 log_err("unorm2_getDecomposition(fcc, capacity<0) failed\n");
1510 errorCode
=U_ZERO_ERROR
;
1511 length
=unorm2_getDecomposition(n2
, 0xac01, NULL
, 4, &errorCode
);
1512 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1513 log_err("unorm2_getDecomposition(fcc, decomposition=NULL) failed\n");
1518 TestGetRawDecomposition() {
1522 UErrorCode errorCode
=U_ZERO_ERROR
;
1523 const UNormalizer2
*n2
=unorm2_getNFKCInstance(&errorCode
);
1524 if(U_FAILURE(errorCode
)) {
1525 log_err_status(errorCode
, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode
));
1529 * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values,
1530 * without recursive decomposition.
1533 length
=unorm2_getRawDecomposition(n2
, 0x20, decomp
, UPRV_LENGTHOF(decomp
), &errorCode
);
1534 if(U_FAILURE(errorCode
) || length
>=0) {
1535 log_err("unorm2_getDecomposition(nfkc, space) failed\n");
1537 errorCode
=U_ZERO_ERROR
;
1538 length
=unorm2_getRawDecomposition(n2
, 0xe4, decomp
, UPRV_LENGTHOF(decomp
), &errorCode
);
1539 if(U_FAILURE(errorCode
) || length
!=2 || decomp
[0]!=0x61 || decomp
[1]!=0x308 || decomp
[2]!=0) {
1540 log_err("unorm2_getDecomposition(nfkc, a-umlaut) failed\n");
1542 /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */
1543 errorCode
=U_ZERO_ERROR
;
1544 length
=unorm2_getRawDecomposition(n2
, 0x1e08, decomp
, UPRV_LENGTHOF(decomp
), &errorCode
);
1545 if(U_FAILURE(errorCode
) || length
!=2 || decomp
[0]!=0xc7 || decomp
[1]!=0x301 || decomp
[2]!=0) {
1546 log_err("unorm2_getDecomposition(nfkc, c-cedilla-acute) failed\n");
1548 /* U+212B ANGSTROM SIGN */
1549 errorCode
=U_ZERO_ERROR
;
1550 length
=unorm2_getRawDecomposition(n2
, 0x212b, decomp
, UPRV_LENGTHOF(decomp
), &errorCode
);
1551 if(U_FAILURE(errorCode
) || length
!=1 || decomp
[0]!=0xc5 || decomp
[1]!=0) {
1552 log_err("unorm2_getDecomposition(nfkc, angstrom sign) failed\n");
1554 errorCode
=U_ZERO_ERROR
;
1555 length
=unorm2_getRawDecomposition(n2
, 0xac00, decomp
, UPRV_LENGTHOF(decomp
), &errorCode
);
1556 if(U_FAILURE(errorCode
) || length
!=2 || decomp
[0]!=0x1100 || decomp
[1]!=0x1161 || decomp
[2]!=0) {
1557 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC00) failed\n");
1559 /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */
1560 errorCode
=U_ZERO_ERROR
;
1561 length
=unorm2_getRawDecomposition(n2
, 0xac01, decomp
, UPRV_LENGTHOF(decomp
), &errorCode
);
1562 if(U_FAILURE(errorCode
) || length
!=2 || decomp
[0]!=0xac00 || decomp
[1]!=0x11a8 || decomp
[2]!=0) {
1563 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) failed\n");
1565 errorCode
=U_ZERO_ERROR
;
1566 length
=unorm2_getRawDecomposition(n2
, 0xac01, NULL
, 0, &errorCode
);
1567 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=2) {
1568 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) overflow failed\n");
1570 errorCode
=U_ZERO_ERROR
;
1571 length
=unorm2_getRawDecomposition(n2
, 0xac01, decomp
, -1, &errorCode
);
1572 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1573 log_err("unorm2_getDecomposition(nfkc, capacity<0) failed\n");
1575 errorCode
=U_ZERO_ERROR
;
1576 length
=unorm2_getRawDecomposition(n2
, 0xac01, NULL
, 4, &errorCode
);
1577 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1578 log_err("unorm2_getDecomposition(nfkc, decomposition=NULL) failed\n");
1583 TestAppendRestoreMiddle() {
1584 UChar a
[20]={ 0x61, 0x62, 0x63, 0x41, 0x327, 0 }; /* last chars are 'A' and 'cedilla' NFC */
1585 static const UChar b
[]={ 0x30A, 0x64, 0x65, 0x66, 0 }; /* first char is 'ring above' NFC */
1586 /* NFC: C5 is 'A with ring above' */
1587 static const UChar expected
[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 };
1589 UErrorCode errorCode
=U_ZERO_ERROR
;
1590 const UNormalizer2
*n2
=unorm2_getNFCInstance(&errorCode
);
1591 if(U_FAILURE(errorCode
)) {
1592 log_err_status(errorCode
, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode
));
1596 * Use length=-1 to fool the estimate of the ReorderingBuffer capacity.
1597 * Use a capacity of 6 or 7 so that the middle sequence <41 327 30A>
1598 * still fits into a[] but the full result still overflows this capacity.
1599 * (Let it modify the destination buffer before reallocating internally.)
1601 length
=unorm2_append(n2
, a
, -1, 6, b
, -1, &errorCode
);
1602 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=UPRV_LENGTHOF(expected
)) {
1603 log_err("unorm2_append(preflight) returned wrong length of %d\n", (int)length
);
1606 /* Verify that the middle is unchanged or restored. (ICU ticket #7848) */
1607 if(a
[0]!=0x61 || a
[1]!=0x62 || a
[2]!=0x63 || a
[3]!=0x41 || a
[4]!=0x327 || a
[5]!=0) {
1608 log_err("unorm2_append(overflow) modified the first string\n");
1611 errorCode
=U_ZERO_ERROR
;
1612 length
=unorm2_append(n2
, a
, -1, UPRV_LENGTHOF(a
), b
, -1, &errorCode
);
1613 if(U_FAILURE(errorCode
) || length
!=UPRV_LENGTHOF(expected
) || 0!=u_memcmp(a
, expected
, length
)) {
1614 log_err("unorm2_append(real) failed - %s, length %d\n", u_errorName(errorCode
), (int)length
);
1620 TestGetEasyToUseInstance() {
1621 static const UChar in
[]={
1622 0xA0, /* -> <noBreak> 0020 */
1623 0xC7, 0x301 /* = 1E08 = 0043 0327 0301 */
1628 UErrorCode errorCode
=U_ZERO_ERROR
;
1629 const UNormalizer2
*n2
=unorm2_getNFCInstance(&errorCode
);
1630 if(U_FAILURE(errorCode
)) {
1631 log_err_status(errorCode
, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode
));
1634 length
=unorm2_normalize(n2
, in
, UPRV_LENGTHOF(in
), out
, UPRV_LENGTHOF(out
), &errorCode
);
1635 if(U_FAILURE(errorCode
) || length
!=2 || out
[0]!=0xa0 || out
[1]!=0x1e08) {
1636 log_err("unorm2_getNFCInstance() did not return an NFC instance (normalized length=%d; %s)\n",
1637 (int)length
, u_errorName(errorCode
));
1640 errorCode
=U_ZERO_ERROR
;
1641 n2
=unorm2_getNFDInstance(&errorCode
);
1642 if(U_FAILURE(errorCode
)) {
1643 log_err_status(errorCode
, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode
));
1646 length
=unorm2_normalize(n2
, in
, UPRV_LENGTHOF(in
), out
, UPRV_LENGTHOF(out
), &errorCode
);
1647 if(U_FAILURE(errorCode
) || length
!=4 || out
[0]!=0xa0 || out
[1]!=0x43 || out
[2]!=0x327 || out
[3]!=0x301) {
1648 log_err("unorm2_getNFDInstance() did not return an NFD instance (normalized length=%d; %s)\n",
1649 (int)length
, u_errorName(errorCode
));
1652 errorCode
=U_ZERO_ERROR
;
1653 n2
=unorm2_getNFKCInstance(&errorCode
);
1654 if(U_FAILURE(errorCode
)) {
1655 log_err_status(errorCode
, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode
));
1658 length
=unorm2_normalize(n2
, in
, UPRV_LENGTHOF(in
), out
, UPRV_LENGTHOF(out
), &errorCode
);
1659 if(U_FAILURE(errorCode
) || length
!=2 || out
[0]!=0x20 || out
[1]!=0x1e08) {
1660 log_err("unorm2_getNFKCInstance() did not return an NFKC instance (normalized length=%d; %s)\n",
1661 (int)length
, u_errorName(errorCode
));
1664 errorCode
=U_ZERO_ERROR
;
1665 n2
=unorm2_getNFKDInstance(&errorCode
);
1666 if(U_FAILURE(errorCode
)) {
1667 log_err_status(errorCode
, "unorm2_getNFKDInstance() failed: %s\n", u_errorName(errorCode
));
1670 length
=unorm2_normalize(n2
, in
, UPRV_LENGTHOF(in
), out
, UPRV_LENGTHOF(out
), &errorCode
);
1671 if(U_FAILURE(errorCode
) || length
!=4 || out
[0]!=0x20 || out
[1]!=0x43 || out
[2]!=0x327 || out
[3]!=0x301) {
1672 log_err("unorm2_getNFKDInstance() did not return an NFKD instance (normalized length=%d; %s)\n",
1673 (int)length
, u_errorName(errorCode
));
1676 errorCode
=U_ZERO_ERROR
;
1677 n2
=unorm2_getNFKCCasefoldInstance(&errorCode
);
1678 if(U_FAILURE(errorCode
)) {
1679 log_err_status(errorCode
, "unorm2_getNFKCCasefoldInstance() failed: %s\n", u_errorName(errorCode
));
1682 length
=unorm2_normalize(n2
, in
, UPRV_LENGTHOF(in
), out
, UPRV_LENGTHOF(out
), &errorCode
);
1683 if(U_FAILURE(errorCode
) || length
!=2 || out
[0]!=0x20 || out
[1]!=0x1e09) {
1684 log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefold instance (normalized length=%d; %s)\n",
1685 (int)length
, u_errorName(errorCode
));
1689 #endif /* #if !UCONFIG_NO_NORMALIZATION */