1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /********************************************************************************
12 * Modification History:
14 * Madhu Katragadda Ported for C API
15 * synwee added test for quick check
16 * synwee added test for checkFCD
17 *********************************************************************************/
18 /*tests for u_normalization*/
19 #include "unicode/utypes.h"
20 #include "unicode/unorm.h"
21 #include "unicode/utf16.h"
25 #if !UCONFIG_NO_NORMALIZATION
29 #include "unicode/uchar.h"
30 #include "unicode/ustring.h"
31 #include "unicode/unorm.h"
38 TestNormCoverage(void);
41 TestConcatenate(void);
44 TestNextPrevious(void);
46 static void TestIsNormalized(void);
49 TestFCNFKCClosure(void);
52 TestQuickCheckPerCP(void);
55 TestComposition(void);
61 TestGetDecomposition(void);
64 TestGetRawDecomposition(void);
66 static void TestAppendRestoreMiddle(void);
67 static void TestGetEasyToUseInstance(void);
69 static const char* const canonTests
[][3] = {
70 /* Input*/ /*Decomposed*/ /*Composed*/
71 { "cat", "cat", "cat" },
72 { "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark", },
74 { "\\u1e0a", "D\\u0307", "\\u1e0a" }, /* D-dot_above*/
75 { "D\\u0307", "D\\u0307", "\\u1e0a" }, /* D dot_above*/
77 { "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_below dot_above*/
78 { "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_above dot_below */
79 { "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D dot_below dot_above */
81 { "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307", "\\u1e10\\u0323\\u0307" }, /*D dot_below cedilla dot_above*/
82 { "D\\u0307\\u0328\\u0323", "D\\u0328\\u0323\\u0307", "\\u1e0c\\u0328\\u0307" }, /* D dot_above ogonek dot_below*/
84 { "\\u1E14", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron-grave*/
85 { "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron + grave*/
86 { "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" }, /* E-grave + macron*/
88 { "\\u212b", "A\\u030a", "\\u00c5" }, /* angstrom_sign*/
89 { "\\u00c5", "A\\u030a", "\\u00c5" }, /* A-ring*/
91 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
92 { "\\u00C4\\uFB03n", "A\\u0308\\uFB03n", "\\u00C4\\uFB03n" },
94 { "Henry IV", "Henry IV", "Henry IV" },
95 { "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" },
97 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
98 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
99 { "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" }, /* hw_ka + hw_ten*/
100 { "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" }, /* ka + hw_ten*/
101 { "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" }, /* hw_ka + ten*/
102 { "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" }, /* hw_ka + ten*/
106 static const char* const compatTests
[][3] = {
107 /* Input*/ /*Decomposed */ /*Composed*/
108 { "cat", "cat", "cat" },
110 { "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" }, /* Alef-Lamed vs. Alef, Lamed*/
112 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
113 { "\\u00C4\\uFB03n", "A\\u0308ffin", "\\u00C4ffin" }, /* ffi ligature -> f + f + i*/
115 { "Henry IV", "Henry IV", "Henry IV" },
116 { "Henry \\u2163", "Henry IV", "Henry IV" },
118 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
119 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
121 { "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + ten*/
123 /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
124 { "\\uFF76\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + hw_ten*/
125 { "\\u30AB\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* ka + hw_ten*/
129 static const char* const fcdTests
[][3] = {
130 /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */
131 { "\\u010e\\u0327", "D\\u0327\\u030c", NULL
}, /* D-caron + cedilla */
132 { "\\u010e", "\\u010e", NULL
} /* D-caron */
135 void addNormTest(TestNode
** root
);
137 void addNormTest(TestNode
** root
)
139 addTest(root
, &TestAPI
, "tsnorm/cnormtst/TestAPI");
140 addTest(root
, &TestDecomp
, "tsnorm/cnormtst/TestDecomp");
141 addTest(root
, &TestCompatDecomp
, "tsnorm/cnormtst/TestCompatDecomp");
142 addTest(root
, &TestCanonDecompCompose
, "tsnorm/cnormtst/TestCanonDecompCompose");
143 addTest(root
, &TestCompatDecompCompose
, "tsnorm/cnormtst/TestCompatDecompCompose");
144 addTest(root
, &TestFCD
, "tsnorm/cnormtst/TestFCD");
145 addTest(root
, &TestNull
, "tsnorm/cnormtst/TestNull");
146 addTest(root
, &TestQuickCheck
, "tsnorm/cnormtst/TestQuickCheck");
147 addTest(root
, &TestQuickCheckPerCP
, "tsnorm/cnormtst/TestQuickCheckPerCP");
148 addTest(root
, &TestIsNormalized
, "tsnorm/cnormtst/TestIsNormalized");
149 addTest(root
, &TestCheckFCD
, "tsnorm/cnormtst/TestCheckFCD");
150 addTest(root
, &TestNormCoverage
, "tsnorm/cnormtst/TestNormCoverage");
151 addTest(root
, &TestConcatenate
, "tsnorm/cnormtst/TestConcatenate");
152 addTest(root
, &TestNextPrevious
, "tsnorm/cnormtst/TestNextPrevious");
153 addTest(root
, &TestFCNFKCClosure
, "tsnorm/cnormtst/TestFCNFKCClosure");
154 addTest(root
, &TestComposition
, "tsnorm/cnormtst/TestComposition");
155 addTest(root
, &TestGetDecomposition
, "tsnorm/cnormtst/TestGetDecomposition");
156 addTest(root
, &TestGetRawDecomposition
, "tsnorm/cnormtst/TestGetRawDecomposition");
157 addTest(root
, &TestAppendRestoreMiddle
, "tsnorm/cnormtst/TestAppendRestoreMiddle");
158 addTest(root
, &TestGetEasyToUseInstance
, "tsnorm/cnormtst/TestGetEasyToUseInstance");
161 static const char* const modeStrings
[]={
172 static void TestNormCases(UNormalizationMode mode
,
173 const char* const cases
[][3], int32_t lengthOfCases
) {
174 int32_t x
, neededLen
, length2
;
175 int32_t expIndex
= (mode
==UNORM_NFC
|| mode
==UNORM_NFKC
) ? 2 : 1;
178 log_verbose("Testing unorm_normalize(%s)\n", modeStrings
[mode
]);
179 for(x
=0; x
< lengthOfCases
; x
++)
181 UErrorCode status
= U_ZERO_ERROR
, status2
= U_ZERO_ERROR
;
182 source
=CharsToUChars(cases
[x
][0]);
183 neededLen
= unorm_normalize(source
, u_strlen(source
), mode
, 0, NULL
, 0, &status
);
184 length2
= unorm_normalize(source
, -1, mode
, 0, NULL
, 0, &status2
);
185 if(neededLen
!=length2
) {
186 log_err("ERROR in unorm_normalize(%s)[%d]: "
187 "preflight length/srcLength %d!=%d preflight length/NUL\n",
188 modeStrings
[mode
], (int)x
, (int)neededLen
, (int)length2
);
190 if(status
==U_BUFFER_OVERFLOW_ERROR
)
194 length2
=unorm_normalize(source
, u_strlen(source
), mode
, 0, result
, UPRV_LENGTHOF(result
), &status
);
195 if(U_FAILURE(status
) || neededLen
!=length2
) {
196 log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s: %s - (Are you missing data?)\n",
197 modeStrings
[mode
], austrdup(source
), myErrorName(status
));
199 assertEqual(result
, cases
[x
][expIndex
], x
);
201 length2
=unorm_normalize(source
, -1, mode
, 0, result
, UPRV_LENGTHOF(result
), &status
);
202 if(U_FAILURE(status
) || neededLen
!=length2
) {
203 log_data_err("ERROR in unorm_normalize(%s/NUL) at %s: %s - (Are you missing data?)\n",
204 modeStrings
[mode
], austrdup(source
), myErrorName(status
));
206 assertEqual(result
, cases
[x
][expIndex
], x
);
213 TestNormCases(UNORM_NFD
, canonTests
, UPRV_LENGTHOF(canonTests
));
216 void TestCompatDecomp() {
217 TestNormCases(UNORM_NFKD
, compatTests
, UPRV_LENGTHOF(compatTests
));
220 void TestCanonDecompCompose() {
221 TestNormCases(UNORM_NFC
, canonTests
, UPRV_LENGTHOF(canonTests
));
224 void TestCompatDecompCompose() {
225 TestNormCases(UNORM_NFKC
, compatTests
, UPRV_LENGTHOF(compatTests
));
229 TestNormCases(UNORM_FCD
, fcdTests
, UPRV_LENGTHOF(fcdTests
));
232 static void assertEqual(const UChar
* result
, const char* expected
, int32_t index
)
234 UChar
*expectedUni
= CharsToUChars(expected
);
235 if(u_strcmp(result
, expectedUni
)!=0){
236 log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index
, expected
,
242 static void TestNull_check(UChar
*src
, int32_t srcLen
,
243 UChar
*exp
, int32_t expLen
,
244 UNormalizationMode mode
,
247 UErrorCode status
= U_ZERO_ERROR
;
253 status
= U_ZERO_ERROR
;
260 len
= unorm_normalize(src
, srcLen
, mode
, 0, result
, 50, &status
);
262 if(U_FAILURE(status
)) {
263 log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missing data?)\n", name
, u_errorName(status
));
264 } else if (len
!= expLen
) {
265 log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name
, expLen
, len
);
270 if(exp
[i
] != result
[i
]) {
271 log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n",
278 log_verbose(" %d: \\u%04X\n", i
, result
[i
]);
282 log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name
);
288 UChar source_comp
[] = { 0x0061, 0x0000, 0x0044, 0x0307 };
289 int32_t source_comp_len
= 4;
290 UChar expect_comp
[] = { 0x0061, 0x0000, 0x1e0a };
291 int32_t expect_comp_len
= 3;
293 UChar source_dcmp
[] = { 0x1e0A, 0x0000, 0x0929 };
294 int32_t source_dcmp_len
= 3;
295 UChar expect_dcmp
[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C };
296 int32_t expect_dcmp_len
= 5;
298 TestNull_check(source_comp
,
305 TestNull_check(source_dcmp
,
312 TestNull_check(source_comp
,
322 static void TestQuickCheckResultNO()
324 const UChar CPNFD
[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
325 0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
326 const UChar CPNFC
[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
327 0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
328 const UChar CPNFKD
[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
329 0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
330 const UChar CPNFKC
[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
331 0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
337 UErrorCode error
= U_ZERO_ERROR
;
339 for (; count
< SIZE
; count
++)
341 if (unorm_quickCheck(&(CPNFD
[count
]), 1, UNORM_NFD
, &error
) !=
344 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD
[count
]);
347 if (unorm_quickCheck(&(CPNFC
[count
]), 1, UNORM_NFC
, &error
) !=
350 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC
[count
]);
353 if (unorm_quickCheck(&(CPNFKD
[count
]), 1, UNORM_NFKD
, &error
) !=
356 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD
[count
]);
359 if (unorm_quickCheck(&(CPNFKC
[count
]), 1, UNORM_NFKC
, &error
) !=
362 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC
[count
]);
369 static void TestQuickCheckResultYES()
371 const UChar CPNFD
[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
372 0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
373 const UChar CPNFC
[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
374 0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
375 const UChar CPNFKD
[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
376 0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
377 const UChar CPNFKC
[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
378 0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
382 UErrorCode error
= U_ZERO_ERROR
;
387 if (unorm_quickCheck(&cp
, 1, UNORM_NFD
, &error
) != UNORM_YES
)
389 log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)\n", cp
);
392 if (unorm_quickCheck(&cp
, 1, UNORM_NFC
, &error
) !=
395 log_err("ERROR in NFC quick check at U+%04x\n", cp
);
398 if (unorm_quickCheck(&cp
, 1, UNORM_NFKD
, &error
) != UNORM_YES
)
400 log_data_err("ERROR in NFKD quick check at U+%04x\n", cp
);
403 if (unorm_quickCheck(&cp
, 1, UNORM_NFKC
, &error
) !=
406 log_err("ERROR in NFKC quick check at U+%04x\n", cp
);
412 for (; count
< SIZE
; count
++)
414 if (unorm_quickCheck(&(CPNFD
[count
]), 1, UNORM_NFD
, &error
) !=
417 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD
[count
]);
420 if (unorm_quickCheck(&(CPNFC
[count
]), 1, UNORM_NFC
, &error
)
423 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC
[count
]);
426 if (unorm_quickCheck(&(CPNFKD
[count
]), 1, UNORM_NFKD
, &error
) !=
429 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD
[count
]);
432 if (unorm_quickCheck(&(CPNFKC
[count
]), 1, UNORM_NFKC
, &error
) !=
435 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC
[count
]);
441 static void TestQuickCheckResultMAYBE()
443 const UChar CPNFC
[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
444 0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
445 const UChar CPNFKC
[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
446 0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
452 UErrorCode error
= U_ZERO_ERROR
;
454 /* NFD and NFKD does not have any MAYBE codepoints */
455 for (; count
< SIZE
; count
++)
457 if (unorm_quickCheck(&(CPNFC
[count
]), 1, UNORM_NFC
, &error
) !=
460 log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)\n", CPNFC
[count
]);
463 if (unorm_quickCheck(&(CPNFKC
[count
]), 1, UNORM_NFKC
, &error
) !=
466 log_data_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC
[count
]);
472 static void TestQuickCheckStringResult()
477 UErrorCode error
= U_ZERO_ERROR
;
479 for (count
= 0; count
< UPRV_LENGTHOF(canonTests
); count
++)
481 d
= CharsToUChars(canonTests
[count
][1]);
482 c
= CharsToUChars(canonTests
[count
][2]);
483 if (unorm_quickCheck(d
, u_strlen(d
), UNORM_NFD
, &error
) !=
486 log_data_err("ERROR in NFD quick check for string at count %d - (Are you missing data?)\n", count
);
490 if (unorm_quickCheck(c
, u_strlen(c
), UNORM_NFC
, &error
) ==
493 log_err("ERROR in NFC quick check for string at count %d\n", count
);
501 for (count
= 0; count
< UPRV_LENGTHOF(compatTests
); count
++)
503 d
= CharsToUChars(compatTests
[count
][1]);
504 c
= CharsToUChars(compatTests
[count
][2]);
505 if (unorm_quickCheck(d
, u_strlen(d
), UNORM_NFKD
, &error
) !=
508 log_data_err("ERROR in NFKD quick check for string at count %d\n", count
);
512 if (unorm_quickCheck(c
, u_strlen(c
), UNORM_NFKC
, &error
) !=
515 log_err("ERROR in NFKC quick check for string at count %d\n", count
);
524 void TestQuickCheck()
526 TestQuickCheckResultNO();
527 TestQuickCheckResultYES();
528 TestQuickCheckResultMAYBE();
529 TestQuickCheckStringResult();
533 * The intltest/NormalizerConformanceTest tests a lot of strings that _are_
534 * normalized, and some that are not.
535 * Here we pick some specific cases and test the C API.
537 static void TestIsNormalized(void) {
538 static const UChar notNFC
[][8]={ /* strings that are not in NFC */
539 { 0x62, 0x61, 0x300, 0x63, 0 }, /* 0061 0300 compose */
540 { 0xfb1d, 0 }, /* excluded from composition */
541 { 0x0627, 0x0653, 0 }, /* 0627 0653 compose */
542 { 0x3071, 0x306f, 0x309a, 0x3073, 0 } /* 306F 309A compose */
544 static const UChar notNFKC
[][8]={ /* strings that are not in NFKC */
545 { 0x1100, 0x1161, 0 }, /* Jamo compose */
546 { 0x1100, 0x314f, 0 }, /* compatibility Jamo compose */
547 { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 } /* 1F00 0345 compose */
551 UErrorCode errorCode
;
555 /* normal case with length>=0 (length -1 used for special cases below) */
556 errorCode
=U_ZERO_ERROR
;
557 if(!unorm_isNormalized(notNFC
[0]+2, 1, UNORM_NFC
, &errorCode
) || U_FAILURE(errorCode
)) {
558 log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
561 /* incoming U_FAILURE */
562 errorCode
=U_TRUNCATED_CHAR_FOUND
;
563 (void)unorm_isNormalized(notNFC
[0]+2, 1, UNORM_NFC
, &errorCode
);
564 if(errorCode
!=U_TRUNCATED_CHAR_FOUND
) {
565 log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode
));
569 errorCode
=U_ZERO_ERROR
;
570 (void)unorm_isNormalized(NULL
, 1, UNORM_NFC
, &errorCode
);
571 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
572 log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode
));
576 errorCode
=U_ZERO_ERROR
;
577 (void)unorm_isNormalized(notNFC
[0]+2, -2, UNORM_NFC
, &errorCode
);
578 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
579 log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode
));
583 for(i
=0; i
<UPRV_LENGTHOF(notNFC
); ++i
) {
584 errorCode
=U_ZERO_ERROR
;
585 if(unorm_isNormalized(notNFC
[i
], -1, UNORM_NFC
, &errorCode
) || U_FAILURE(errorCode
)) {
586 log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (Are you missing data?)\n", i
, u_errorName(errorCode
));
588 errorCode
=U_ZERO_ERROR
;
589 if(unorm_isNormalized(notNFC
[i
], -1, UNORM_NFKC
, &errorCode
) || U_FAILURE(errorCode
)) {
590 log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i
, u_errorName(errorCode
));
593 for(i
=0; i
<UPRV_LENGTHOF(notNFKC
); ++i
) {
594 errorCode
=U_ZERO_ERROR
;
595 if(unorm_isNormalized(notNFKC
[i
], -1, UNORM_NFKC
, &errorCode
) || U_FAILURE(errorCode
)) {
596 log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i
, u_errorName(errorCode
));
603 UErrorCode status
= U_ZERO_ERROR
;
604 static const UChar FAST_
[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
606 static const UChar FALSE_
[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
607 0x02B9, 0x0314, 0x0315, 0x0316};
608 static const UChar TRUE_
[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
609 0x0050, 0x0730, 0x09EE, 0x1E10};
611 static const UChar datastr
[][5] =
612 { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
613 {0x0061, 0x030A, 0x00E2, 0x0323, 0},
614 {0x0061, 0x0323, 0x00E2, 0x0323, 0},
615 {0x0061, 0x0323, 0x1E05, 0x0302, 0} };
616 static const UBool result
[] = {UNORM_YES
, UNORM_NO
, UNORM_NO
, UNORM_YES
};
618 static const UChar datachar
[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
620 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
622 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
623 0x0307, 0x0308, 0x0309, 0x030a,
624 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
625 0x0327, 0x0328, 0x0329, 0x032a,
626 0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
627 0x1e07, 0x1e08, 0x1e09, 0x1e0a};
631 if (unorm_quickCheck(FAST_
, 10, UNORM_FCD
, &status
) != UNORM_YES
)
632 log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
633 if (unorm_quickCheck(FALSE_
, 10, UNORM_FCD
, &status
) != UNORM_NO
)
634 log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n");
635 if (unorm_quickCheck(TRUE_
, 10, UNORM_FCD
, &status
) != UNORM_YES
)
636 log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
638 if (U_FAILURE(status
))
639 log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n", u_errorName(status
));
643 UBool fcdresult
= unorm_quickCheck(datastr
[count
], 4, UNORM_FCD
, &status
);
644 if (U_FAILURE(status
)) {
645 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data set %d - (Are you missing data?)\n", count
);
649 if (result
[count
] != fcdresult
) {
650 log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count
,
657 /* random checks of long strings */
658 status
= U_ZERO_ERROR
;
659 srand((unsigned)time( NULL
));
661 for (count
= 0; count
< 50; count
++)
664 UBool testresult
= UNORM_YES
;
672 data
[size
] = datachar
[rand() % UPRV_LENGTHOF(datachar
)];
673 log_verbose("0x%x", data
[size
]);
674 normsize
+= unorm_normalize(data
+ size
, 1, UNORM_NFD
, 0,
675 norm
+ normsize
, 100 - normsize
, &status
);
676 if (U_FAILURE(status
)) {
677 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data generation - (Are you missing data?)\n");
684 nfdsize
= unorm_normalize(data
, size
, UNORM_NFD
, 0,
686 if (U_FAILURE(status
)) {
687 log_data_err("unorm_quickCheck(FCD) failed: exception occured at normalized data generation - (Are you missing data?)\n");
690 if (nfdsize
!= normsize
|| u_memcmp(nfd
, norm
, nfdsize
) != 0) {
691 testresult
= UNORM_NO
;
693 if (testresult
== UNORM_YES
) {
694 log_verbose("result UNORM_YES\n");
697 log_verbose("result UNORM_NO\n");
700 if (unorm_quickCheck(data
, size
, UNORM_FCD
, &status
) != testresult
|| U_FAILURE(status
)) {
701 log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data - (Are you missing data?)\n", testresult
);
708 static const UChar in
[]={ 0x68, 0xe4 };
709 UChar out
[20]={ 0xffff, 0xffff, 0xffff, 0xffff };
710 UErrorCode errorCode
;
713 /* try preflighting */
714 errorCode
=U_ZERO_ERROR
;
715 length
=unorm_normalize(in
, 2, UNORM_NFD
, 0, NULL
, 0, &errorCode
);
716 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=3) {
717 log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
721 errorCode
=U_ZERO_ERROR
;
722 length
=unorm_normalize(in
, 2, UNORM_NFD
, 0, out
, 3, &errorCode
);
723 if(U_FAILURE(errorCode
)) {
724 log_err("unorm_normalize(NFD)=%ld failed with %s\n", length
, u_errorName(errorCode
));
727 if(length
!=3 || out
[2]!=0x308 || out
[3]!=0xffff) {
728 log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length
, out
[0], out
[1], out
[2], out
[3]);
731 length
=unorm_normalize(NULL
, 0, UNORM_NFC
, 0, NULL
, 0, &errorCode
);
732 if(U_FAILURE(errorCode
)) {
733 log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with %s\n", (long)length
, u_errorName(errorCode
));
736 length
=unorm_normalize(NULL
, 0, UNORM_NFC
, 0, out
, 20, &errorCode
);
737 if(U_FAILURE(errorCode
)) {
738 log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with %s\n", (long)length
, u_errorName(errorCode
));
743 /* test cases to improve test code coverage */
745 HANGUL_K_KIYEOK
=0x3131, /* NFKD->Jamo L U+1100 */
746 HANGUL_K_WEO
=0x315d, /* NFKD->Jamo V U+116f */
747 HANGUL_K_KIYEOK_SIOS
=0x3133, /* NFKD->Jamo T U+11aa */
749 HANGUL_KIYEOK
=0x1100, /* Jamo L U+1100 */
750 HANGUL_WEO
=0x116f, /* Jamo V U+116f */
751 HANGUL_KIYEOK_SIOS
=0x11aa, /* Jamo T U+11aa */
753 HANGUL_AC00
=0xac00, /* Hangul syllable = Jamo LV U+ac00 */
754 HANGUL_SYLLABLE
=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */
756 MUSICAL_VOID_NOTEHEAD
=0x1d157,
757 MUSICAL_HALF_NOTE
=0x1d15e, /* NFC/NFD->Notehead+Stem */
758 MUSICAL_STEM
=0x1d165, /* cc=216 */
759 MUSICAL_STACCATO
=0x1d17c /* cc=220 */
764 UChar input
[1000], expect
[1000], output
[1000];
765 UErrorCode errorCode
;
766 int32_t i
, length
, inLength
, expectLength
, hangulPrefixLength
, preflightLength
;
768 /* create a long and nasty string with NFKC-unsafe characters */
771 /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */
772 input
[inLength
++]=HANGUL_KIYEOK
;
773 input
[inLength
++]=HANGUL_WEO
;
774 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
776 input
[inLength
++]=HANGUL_KIYEOK
;
777 input
[inLength
++]=HANGUL_WEO
;
778 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
780 input
[inLength
++]=HANGUL_KIYEOK
;
781 input
[inLength
++]=HANGUL_K_WEO
;
782 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
784 input
[inLength
++]=HANGUL_KIYEOK
;
785 input
[inLength
++]=HANGUL_K_WEO
;
786 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
788 input
[inLength
++]=HANGUL_K_KIYEOK
;
789 input
[inLength
++]=HANGUL_WEO
;
790 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
792 input
[inLength
++]=HANGUL_K_KIYEOK
;
793 input
[inLength
++]=HANGUL_WEO
;
794 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
796 input
[inLength
++]=HANGUL_K_KIYEOK
;
797 input
[inLength
++]=HANGUL_K_WEO
;
798 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
800 input
[inLength
++]=HANGUL_K_KIYEOK
;
801 input
[inLength
++]=HANGUL_K_WEO
;
802 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
804 /* Hangul LV with normal/compatibility Jamo T */
805 input
[inLength
++]=HANGUL_AC00
;
806 input
[inLength
++]=HANGUL_KIYEOK_SIOS
;
808 input
[inLength
++]=HANGUL_AC00
;
809 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
811 /* compatibility Jamo L, V */
812 input
[inLength
++]=HANGUL_K_KIYEOK
;
813 input
[inLength
++]=HANGUL_K_WEO
;
815 hangulPrefixLength
=inLength
;
817 input
[inLength
++]=U16_LEAD(MUSICAL_HALF_NOTE
);
818 input
[inLength
++]=U16_TRAIL(MUSICAL_HALF_NOTE
);
819 for(i
=0; i
<200; ++i
) {
820 input
[inLength
++]=U16_LEAD(MUSICAL_STACCATO
);
821 input
[inLength
++]=U16_TRAIL(MUSICAL_STACCATO
);
822 input
[inLength
++]=U16_LEAD(MUSICAL_STEM
);
823 input
[inLength
++]=U16_TRAIL(MUSICAL_STEM
);
826 /* (compatibility) Jamo L, T do not compose */
827 input
[inLength
++]=HANGUL_K_KIYEOK
;
828 input
[inLength
++]=HANGUL_K_KIYEOK_SIOS
;
831 errorCode
=U_ZERO_ERROR
;
832 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_NFD
, &errorCode
) || U_FAILURE(errorCode
)) {
833 log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
835 errorCode
=U_ZERO_ERROR
;
836 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_NFKD
, &errorCode
) || U_FAILURE(errorCode
)) {
837 log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
839 errorCode
=U_ZERO_ERROR
;
840 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_NFC
, &errorCode
) || U_FAILURE(errorCode
)) {
841 log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
843 errorCode
=U_ZERO_ERROR
;
844 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_NFKC
, &errorCode
) || U_FAILURE(errorCode
)) {
845 log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
847 errorCode
=U_ZERO_ERROR
;
848 if(UNORM_NO
!=unorm_quickCheck(input
, inLength
, UNORM_FCD
, &errorCode
) || U_FAILURE(errorCode
)) {
849 log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode
));
854 expect
[expectLength
++]=HANGUL_SYLLABLE
;
856 expect
[expectLength
++]=HANGUL_SYLLABLE
;
858 expect
[expectLength
++]=HANGUL_SYLLABLE
;
860 expect
[expectLength
++]=HANGUL_SYLLABLE
;
862 expect
[expectLength
++]=HANGUL_SYLLABLE
;
864 expect
[expectLength
++]=HANGUL_SYLLABLE
;
866 expect
[expectLength
++]=HANGUL_SYLLABLE
;
868 expect
[expectLength
++]=HANGUL_SYLLABLE
;
870 expect
[expectLength
++]=HANGUL_AC00
+3;
872 expect
[expectLength
++]=HANGUL_AC00
+3;
874 expect
[expectLength
++]=HANGUL_AC00
+14*28;
876 expect
[expectLength
++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD
);
877 expect
[expectLength
++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD
);
878 expect
[expectLength
++]=U16_LEAD(MUSICAL_STEM
);
879 expect
[expectLength
++]=U16_TRAIL(MUSICAL_STEM
);
880 for(i
=0; i
<200; ++i
) {
881 expect
[expectLength
++]=U16_LEAD(MUSICAL_STEM
);
882 expect
[expectLength
++]=U16_TRAIL(MUSICAL_STEM
);
884 for(i
=0; i
<200; ++i
) {
885 expect
[expectLength
++]=U16_LEAD(MUSICAL_STACCATO
);
886 expect
[expectLength
++]=U16_TRAIL(MUSICAL_STACCATO
);
889 expect
[expectLength
++]=HANGUL_KIYEOK
;
890 expect
[expectLength
++]=HANGUL_KIYEOK_SIOS
;
892 /* try destination overflow first */
893 errorCode
=U_ZERO_ERROR
;
894 preflightLength
=unorm_normalize(input
, inLength
,
896 output
, 100, /* too short */
898 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
) {
899 log_data_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCode
));
903 errorCode
=U_ZERO_ERROR
;
904 length
=unorm_normalize(input
, inLength
,
906 output
, UPRV_LENGTHOF(output
),
908 if(U_FAILURE(errorCode
)) {
909 log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s - (Are you missing data?)\n", u_errorName(errorCode
));
910 } else if(length
!=expectLength
|| u_memcmp(output
, expect
, length
)!=0) {
911 log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n");
912 for(i
=0; i
<length
; ++i
) {
913 if(output
[i
]!=expect
[i
]) {
914 log_err(" NFKC[%d]==U+%04lx expected U+%04lx\n", i
, output
[i
], expect
[i
]);
919 if(length
!=preflightLength
) {
920 log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length
, preflightLength
);
924 u_memcpy(expect
, input
, hangulPrefixLength
);
925 expectLength
=hangulPrefixLength
;
927 expect
[expectLength
++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD
);
928 expect
[expectLength
++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD
);
929 expect
[expectLength
++]=U16_LEAD(MUSICAL_STEM
);
930 expect
[expectLength
++]=U16_TRAIL(MUSICAL_STEM
);
931 for(i
=0; i
<200; ++i
) {
932 expect
[expectLength
++]=U16_LEAD(MUSICAL_STEM
);
933 expect
[expectLength
++]=U16_TRAIL(MUSICAL_STEM
);
935 for(i
=0; i
<200; ++i
) {
936 expect
[expectLength
++]=U16_LEAD(MUSICAL_STACCATO
);
937 expect
[expectLength
++]=U16_TRAIL(MUSICAL_STACCATO
);
940 expect
[expectLength
++]=HANGUL_K_KIYEOK
;
941 expect
[expectLength
++]=HANGUL_K_KIYEOK_SIOS
;
943 errorCode
=U_ZERO_ERROR
;
944 length
=unorm_normalize(input
, inLength
,
946 output
, UPRV_LENGTHOF(output
),
948 if(U_FAILURE(errorCode
)) {
949 log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %s - (Are you missing data?)\n", u_errorName(errorCode
));
950 } else if(length
!=expectLength
|| u_memcmp(output
, expect
, length
)!=0) {
951 log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n");
952 for(i
=0; i
<length
; ++i
) {
953 if(output
[i
]!=expect
[i
]) {
954 log_err(" FCD[%d]==U+%04lx expected U+%04lx\n", i
, output
[i
], expect
[i
]);
961 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */
963 TestConcatenate(void) {
970 0x301, 0x73, 0x75, 0x6d, 0xe9, 0
973 0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0
977 UErrorCode errorCode
;
980 /* left with length, right NUL-terminated */
981 errorCode
=U_ZERO_ERROR
;
982 length
=unorm_concatenate(left
, 2, right
, -1, buffer
, 100, UNORM_NFC
, 0, &errorCode
);
983 if(U_FAILURE(errorCode
) || length
!=6 || 0!=u_memcmp(buffer
, expect
, length
)) {
984 log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
988 errorCode
=U_ZERO_ERROR
;
989 length
=unorm_concatenate(left
, 2, right
, -1, NULL
, 0, UNORM_NFC
, 0, &errorCode
);
990 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=6) {
991 log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
995 errorCode
=U_ZERO_ERROR
;
996 length
=unorm_concatenate(left
, 2, right
, -1, buffer
, 1, UNORM_NFC
, 0, &errorCode
);
997 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=6 || buffer
[2]!=0x5555) {
998 log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
1001 /* enter with U_FAILURE */
1003 errorCode
=U_UNEXPECTED_TOKEN
;
1004 length
=unorm_concatenate(left
, 2, right
, -1, buffer
, 100, UNORM_NFC
, 0, &errorCode
);
1005 if(errorCode
!=U_UNEXPECTED_TOKEN
|| buffer
[2]!=0xaaaa) {
1006 log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length
, u_errorName(errorCode
));
1009 /* illegal arguments */
1011 errorCode
=U_ZERO_ERROR
;
1012 length
=unorm_concatenate(NULL
, 2, right
, -1, buffer
, 100, UNORM_NFC
, 0, &errorCode
);
1013 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
|| buffer
[2]!=0xaaaa) {
1014 log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
1017 errorCode
=U_ZERO_ERROR
;
1018 length
=unorm_concatenate(left
, 2, right
, -1, NULL
, 100, UNORM_NFC
, 0, &errorCode
);
1019 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1020 log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s - (Are you missing data?)\n", length
, u_errorName(errorCode
));
1028 static const char *const _modeString
[UNORM_MODE_COUNT
]={
1029 "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD"
1033 _testIter(const UChar
*src
, int32_t srcLength
,
1034 UCharIterator
*iter
, UNormalizationMode mode
, UBool forward
,
1035 const UChar
*out
, int32_t outLength
,
1036 const int32_t *srcIndexes
, int32_t srcIndexesLength
) {
1038 const UChar
*expect
, *outLimit
, *in
;
1039 int32_t length
, i
, expectLength
, expectIndex
, prevIndex
, index
, inLength
;
1040 UErrorCode errorCode
;
1041 UBool neededToNormalize
, expectNeeded
;
1043 errorCode
=U_ZERO_ERROR
;
1044 outLimit
=out
+outLength
;
1050 i
=srcIndexesLength
-2;
1057 if(!iter
->hasNext(iter
)) {
1060 length
=unorm_next(iter
,
1061 buffer
, UPRV_LENGTHOF(buffer
),
1063 (UBool
)(out
!=NULL
), &neededToNormalize
,
1065 expectIndex
=srcIndexes
[i
+1];
1067 inLength
=expectIndex
-prevIndex
;
1070 /* get output piece from between plus signs */
1072 while((expect
+expectLength
)!=outLimit
&& expect
[expectLength
]!=_PLUS
) {
1075 expectNeeded
=(UBool
)(0!=u_memcmp(buffer
, in
, inLength
));
1078 expectLength
=inLength
;
1082 if(!iter
->hasPrevious(iter
)) {
1085 length
=unorm_previous(iter
,
1086 buffer
, UPRV_LENGTHOF(buffer
),
1088 (UBool
)(out
!=NULL
), &neededToNormalize
,
1090 expectIndex
=srcIndexes
[i
];
1092 inLength
=prevIndex
-expectIndex
;
1095 /* get output piece from between plus signs */
1097 while(expect
!=out
&& expect
[-1]!=_PLUS
) {
1101 expectNeeded
=(UBool
)(0!=u_memcmp(buffer
, in
, inLength
));
1104 expectLength
=inLength
;
1108 index
=iter
->getIndex(iter
, UITER_CURRENT
);
1110 if(U_FAILURE(errorCode
)) {
1111 log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s - (Are you missing data?)\n",
1112 forward
, _modeString
[mode
], i
, u_errorName(errorCode
));
1115 if(expectIndex
!=index
) {
1116 log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n",
1117 forward
, _modeString
[mode
], i
, index
, expectIndex
);
1120 if(expectLength
!=length
) {
1121 log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n",
1122 forward
, _modeString
[mode
], i
, length
, expectLength
);
1125 if(0!=u_memcmp(expect
, buffer
, length
)) {
1126 log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n",
1127 forward
, _modeString
[mode
], i
);
1130 if(neededToNormalize
!=expectNeeded
) {
1134 expect
+=expectLength
+1; /* go after the + */
1137 --expect
; /* go before the + */
1144 TestNextPrevious() {
1146 src
[]={ /* input string */
1147 0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133
1149 nfd
[]={ /* + separates expected output pieces */
1150 0xa0, _PLUS
, 0x61, 0x308, _PLUS
, 0x63, 0x327, 0x302, _PLUS
, 0x1100, 0x1161, _PLUS
, 0x3133
1153 0x20, _PLUS
, 0x61, 0x308, _PLUS
, 0x63, 0x327, 0x302, _PLUS
, 0x1100, 0x1161, _PLUS
, 0x11aa
1156 0xa0, _PLUS
, 0xe4, _PLUS
, 0xe7, 0x302, _PLUS
, 0xac00, _PLUS
, 0x3133
1159 0x20, _PLUS
, 0xe4, _PLUS
, 0xe7, 0x302, _PLUS
, 0xac03
1162 0xa0, _PLUS
, 0xe4, _PLUS
, 0x63, 0x327, 0x302, _PLUS
, 0xac00, _PLUS
, 0x3133
1165 /* expected iterator indexes in the source string for each iteration piece */
1166 static const int32_t
1188 UBool neededToNormalize
;
1189 UErrorCode errorCode
;
1191 uiter_setString(&iter
, src
, UPRV_LENGTHOF(src
));
1193 /* test iteration with doNormalize */
1195 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFD
, TRUE
, nfd
, UPRV_LENGTHOF(nfd
), nfdIndexes
, sizeof(nfdIndexes
)/4);
1197 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFKD
, TRUE
, nfkd
, UPRV_LENGTHOF(nfkd
), nfkdIndexes
, sizeof(nfkdIndexes
)/4);
1199 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFC
, TRUE
, nfc
, UPRV_LENGTHOF(nfc
), nfcIndexes
, sizeof(nfcIndexes
)/4);
1201 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFKC
, TRUE
, nfkc
, UPRV_LENGTHOF(nfkc
), nfkcIndexes
, sizeof(nfkcIndexes
)/4);
1203 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_FCD
, TRUE
, fcd
, UPRV_LENGTHOF(fcd
), fcdIndexes
, sizeof(fcdIndexes
)/4);
1205 iter
.index
=iter
.length
;
1206 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFD
, FALSE
, nfd
, UPRV_LENGTHOF(nfd
), nfdIndexes
, sizeof(nfdIndexes
)/4);
1207 iter
.index
=iter
.length
;
1208 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFKD
, FALSE
, nfkd
, UPRV_LENGTHOF(nfkd
), nfkdIndexes
, sizeof(nfkdIndexes
)/4);
1209 iter
.index
=iter
.length
;
1210 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFC
, FALSE
, nfc
, UPRV_LENGTHOF(nfc
), nfcIndexes
, sizeof(nfcIndexes
)/4);
1211 iter
.index
=iter
.length
;
1212 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFKC
, FALSE
, nfkc
, UPRV_LENGTHOF(nfkc
), nfkcIndexes
, sizeof(nfkcIndexes
)/4);
1213 iter
.index
=iter
.length
;
1214 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_FCD
, FALSE
, fcd
, UPRV_LENGTHOF(fcd
), fcdIndexes
, sizeof(fcdIndexes
)/4);
1216 /* test iteration without doNormalize */
1218 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFD
, TRUE
, NULL
, 0, nfdIndexes
, sizeof(nfdIndexes
)/4);
1220 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFKD
, TRUE
, NULL
, 0, nfkdIndexes
, sizeof(nfkdIndexes
)/4);
1222 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFC
, TRUE
, NULL
, 0, nfcIndexes
, sizeof(nfcIndexes
)/4);
1224 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFKC
, TRUE
, NULL
, 0, nfkcIndexes
, sizeof(nfkcIndexes
)/4);
1226 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_FCD
, TRUE
, NULL
, 0, fcdIndexes
, sizeof(fcdIndexes
)/4);
1228 iter
.index
=iter
.length
;
1229 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFD
, FALSE
, NULL
, 0, nfdIndexes
, sizeof(nfdIndexes
)/4);
1230 iter
.index
=iter
.length
;
1231 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFKD
, FALSE
, NULL
, 0, nfkdIndexes
, sizeof(nfkdIndexes
)/4);
1232 iter
.index
=iter
.length
;
1233 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFC
, FALSE
, NULL
, 0, nfcIndexes
, sizeof(nfcIndexes
)/4);
1234 iter
.index
=iter
.length
;
1235 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_NFKC
, FALSE
, NULL
, 0, nfkcIndexes
, sizeof(nfkcIndexes
)/4);
1236 iter
.index
=iter
.length
;
1237 _testIter(src
, UPRV_LENGTHOF(src
), &iter
, UNORM_FCD
, FALSE
, NULL
, 0, fcdIndexes
, sizeof(fcdIndexes
)/4);
1239 /* try without neededToNormalize */
1240 errorCode
=U_ZERO_ERROR
;
1243 length
=unorm_next(&iter
, buffer
, UPRV_LENGTHOF(buffer
),
1244 UNORM_NFD
, 0, TRUE
, NULL
,
1246 if(U_FAILURE(errorCode
) || length
!=2 || buffer
[0]!=nfd
[2] || buffer
[1]!=nfd
[3]) {
1247 log_data_err("error unorm_next(without needed) %s - (Are you missing data?)\n", u_errorName(errorCode
));
1252 neededToNormalize
=9;
1254 length
=unorm_next(&iter
, NULL
, 0,
1255 UNORM_NFD
, 0, TRUE
, &neededToNormalize
,
1257 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| neededToNormalize
!=FALSE
|| length
!=2) {
1258 log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode
));
1262 errorCode
=U_ZERO_ERROR
;
1263 buffer
[0]=buffer
[1]=5;
1264 neededToNormalize
=9;
1266 length
=unorm_next(&iter
, buffer
, 1,
1267 UNORM_NFD
, 0, TRUE
, &neededToNormalize
,
1269 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| neededToNormalize
!=FALSE
|| length
!=2 || buffer
[1]!=5) {
1270 log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode
));
1275 errorCode
=U_ZERO_ERROR
;
1276 buffer
[0]=buffer
[1]=5;
1277 neededToNormalize
=9;
1279 length
=unorm_next(NULL
, buffer
, UPRV_LENGTHOF(buffer
),
1280 UNORM_NFD
, 0, TRUE
, &neededToNormalize
,
1282 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1283 log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode
));
1288 buffer
[0]=buffer
[1]=5;
1289 neededToNormalize
=9;
1291 length
=unorm_next(&iter
, buffer
, UPRV_LENGTHOF(buffer
),
1292 (UNormalizationMode
)0, 0, TRUE
, &neededToNormalize
,
1294 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1295 log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode
));
1299 /* error coming in */
1300 errorCode
=U_MISPLACED_QUANTIFIER
;
1303 length
=unorm_next(&iter
, buffer
, UPRV_LENGTHOF(buffer
),
1304 UNORM_NFD
, 0, TRUE
, NULL
,
1306 if(errorCode
!=U_MISPLACED_QUANTIFIER
) {
1307 log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode
));
1313 TestFCNFKCClosure(void) {
1314 static const struct {
1320 { 0x037A, { 0x0020, 0x03B9, 0 } },
1321 { 0x03D2, { 0x03C5, 0 } },
1322 { 0x20A8, { 0x0072, 0x0073, 0 } },
1323 { 0x210B, { 0x0068, 0 } },
1324 { 0x210C, { 0x0068, 0 } },
1325 { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } },
1326 { 0x2122, { 0x0074, 0x006D, 0 } },
1327 { 0x2128, { 0x007A, 0 } },
1328 { 0x1D5DB, { 0x0068, 0 } },
1329 { 0x1D5ED, { 0x007A, 0 } },
1334 UErrorCode errorCode
;
1337 for(i
=0; i
<UPRV_LENGTHOF(tests
); ++i
) {
1338 errorCode
=U_ZERO_ERROR
;
1339 length
=u_getFC_NFKC_Closure(tests
[i
].c
, buffer
, UPRV_LENGTHOF(buffer
), &errorCode
);
1340 if(U_FAILURE(errorCode
) || length
!=u_strlen(buffer
) || 0!=u_strcmp(tests
[i
].s
, buffer
)) {
1341 log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you missing data?)\n", tests
[i
].c
, u_errorName(errorCode
));
1345 /* error handling */
1346 errorCode
=U_ZERO_ERROR
;
1347 length
=u_getFC_NFKC_Closure(0x5c, NULL
, UPRV_LENGTHOF(buffer
), &errorCode
);
1348 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1349 log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode
));
1352 length
=u_getFC_NFKC_Closure(0x5c, buffer
, UPRV_LENGTHOF(buffer
), &errorCode
);
1353 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1354 log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode
));
1359 TestQuickCheckPerCP() {
1360 UErrorCode errorCode
;
1361 UChar32 c
, lead
, trail
;
1362 UChar s
[U16_MAX_LENGTH
], nfd
[16];
1363 int32_t length
, lccc1
, lccc2
, tccc1
, tccc2
;
1367 u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK
)!=(int32_t)UNORM_YES
||
1368 u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK
)!=(int32_t)UNORM_YES
||
1369 u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK
)!=(int32_t)UNORM_MAYBE
||
1370 u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK
)!=(int32_t)UNORM_MAYBE
||
1371 u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS
)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS
) ||
1372 u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS
)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS
)
1374 log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n");
1378 * compare the quick check property values for some code points
1379 * to the quick check results for checking same-code point strings
1381 errorCode
=U_ZERO_ERROR
;
1385 U16_APPEND_UNSAFE(s
, length
, c
);
1387 qc1
=u_getIntPropertyValue(c
, UCHAR_NFC_QUICK_CHECK
);
1388 qc2
=unorm_quickCheck(s
, length
, UNORM_NFC
, &errorCode
);
1390 log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x - (Are you missing data?)\n", qc1
, qc2
, c
);
1393 qc1
=u_getIntPropertyValue(c
, UCHAR_NFD_QUICK_CHECK
);
1394 qc2
=unorm_quickCheck(s
, length
, UNORM_NFD
, &errorCode
);
1396 log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x - (Are you missing data?)\n", qc1
, qc2
, c
);
1399 qc1
=u_getIntPropertyValue(c
, UCHAR_NFKC_QUICK_CHECK
);
1400 qc2
=unorm_quickCheck(s
, length
, UNORM_NFKC
, &errorCode
);
1402 log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x - (Are you missing data?)\n", qc1
, qc2
, c
);
1405 qc1
=u_getIntPropertyValue(c
, UCHAR_NFKD_QUICK_CHECK
);
1406 qc2
=unorm_quickCheck(s
, length
, UNORM_NFKD
, &errorCode
);
1408 log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x - (Are you missing data?)\n", qc1
, qc2
, c
);
1411 length
=unorm_normalize(s
, length
, UNORM_NFD
, 0, nfd
, UPRV_LENGTHOF(nfd
), &errorCode
);
1412 if (U_FAILURE(errorCode
)) {
1413 log_data_err("%s:%d errorCode=%s\n", __FILE__
, __LINE__
, u_errorName(errorCode
));
1416 /* length-length == 0 is used to get around a compiler warning. */
1417 U16_GET(nfd
, 0, length
-length
, length
, lead
);
1418 U16_GET(nfd
, 0, length
-1, length
, trail
);
1420 lccc1
=u_getIntPropertyValue(c
, UCHAR_LEAD_CANONICAL_COMBINING_CLASS
);
1421 lccc2
=u_getCombiningClass(lead
);
1422 tccc1
=u_getIntPropertyValue(c
, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS
);
1423 tccc2
=u_getCombiningClass(trail
);
1426 log_data_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
1430 log_data_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
1434 /* skip some code points */
1440 TestComposition(void) {
1441 static const struct {
1442 UNormalizationMode mode
;
1448 * special cases for UAX #15 bug
1449 * see Unicode Corrigendum #5: Normalization Idempotency
1450 * at http://unicode.org/versions/corrigendum5.html
1451 * (was Public Review Issue #29)
1453 { UNORM_NFC
, 0, { 0x1100, 0x0300, 0x1161, 0x0327 }, { 0x1100, 0x0300, 0x1161, 0x0327 } },
1454 { UNORM_NFC
, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } },
1455 { UNORM_NFC
, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 }, { 0xac00, 0x0327, 0x0300, 0x11a8 } },
1456 { UNORM_NFC
, 0, { 0x0b47, 0x0300, 0x0b3e }, { 0x0b47, 0x0300, 0x0b3e } },
1458 /* TODO: add test cases for UNORM_FCC here (j2151) */
1462 UErrorCode errorCode
;
1465 for(i
=0; i
<UPRV_LENGTHOF(cases
); ++i
) {
1466 errorCode
=U_ZERO_ERROR
;
1467 length
=unorm_normalize(
1469 cases
[i
].mode
, cases
[i
].options
,
1470 output
, UPRV_LENGTHOF(output
),
1472 if( U_FAILURE(errorCode
) ||
1473 length
!=u_strlen(cases
[i
].expect
) ||
1474 0!=u_memcmp(output
, cases
[i
].expect
, length
)
1476 log_data_err("unexpected result for case %d - (Are you missing data?)\n", i
);
1482 TestGetDecomposition() {
1486 UErrorCode errorCode
=U_ZERO_ERROR
;
1487 const UNormalizer2
*n2
=unorm2_getInstance(NULL
, "nfc", UNORM2_COMPOSE_CONTIGUOUS
, &errorCode
);
1488 if(U_FAILURE(errorCode
)) {
1489 log_err_status(errorCode
, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_errorName(errorCode
));
1493 length
=unorm2_getDecomposition(n2
, 0x20, decomp
, UPRV_LENGTHOF(decomp
), &errorCode
);
1494 if(U_FAILURE(errorCode
) || length
>=0) {
1495 log_err("unorm2_getDecomposition(fcc, space) failed\n");
1497 errorCode
=U_ZERO_ERROR
;
1498 length
=unorm2_getDecomposition(n2
, 0xe4, decomp
, UPRV_LENGTHOF(decomp
), &errorCode
);
1499 if(U_FAILURE(errorCode
) || length
!=2 || decomp
[0]!=0x61 || decomp
[1]!=0x308 || decomp
[2]!=0) {
1500 log_err("unorm2_getDecomposition(fcc, a-umlaut) failed\n");
1502 errorCode
=U_ZERO_ERROR
;
1503 length
=unorm2_getDecomposition(n2
, 0xac01, decomp
, UPRV_LENGTHOF(decomp
), &errorCode
);
1504 if(U_FAILURE(errorCode
) || length
!=3 || decomp
[0]!=0x1100 || decomp
[1]!=0x1161 || decomp
[2]!=0x11a8 || decomp
[3]!=0) {
1505 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) failed\n");
1507 errorCode
=U_ZERO_ERROR
;
1508 length
=unorm2_getDecomposition(n2
, 0xac01, NULL
, 0, &errorCode
);
1509 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=3) {
1510 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) overflow failed\n");
1512 errorCode
=U_ZERO_ERROR
;
1513 length
=unorm2_getDecomposition(n2
, 0xac01, decomp
, -1, &errorCode
);
1514 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1515 log_err("unorm2_getDecomposition(fcc, capacity<0) failed\n");
1517 errorCode
=U_ZERO_ERROR
;
1518 length
=unorm2_getDecomposition(n2
, 0xac01, NULL
, 4, &errorCode
);
1519 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1520 log_err("unorm2_getDecomposition(fcc, decomposition=NULL) failed\n");
1525 TestGetRawDecomposition() {
1529 UErrorCode errorCode
=U_ZERO_ERROR
;
1530 const UNormalizer2
*n2
=unorm2_getNFKCInstance(&errorCode
);
1531 if(U_FAILURE(errorCode
)) {
1532 log_err_status(errorCode
, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode
));
1536 * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values,
1537 * without recursive decomposition.
1540 length
=unorm2_getRawDecomposition(n2
, 0x20, decomp
, UPRV_LENGTHOF(decomp
), &errorCode
);
1541 if(U_FAILURE(errorCode
) || length
>=0) {
1542 log_err("unorm2_getDecomposition(nfkc, space) failed\n");
1544 errorCode
=U_ZERO_ERROR
;
1545 length
=unorm2_getRawDecomposition(n2
, 0xe4, decomp
, UPRV_LENGTHOF(decomp
), &errorCode
);
1546 if(U_FAILURE(errorCode
) || length
!=2 || decomp
[0]!=0x61 || decomp
[1]!=0x308 || decomp
[2]!=0) {
1547 log_err("unorm2_getDecomposition(nfkc, a-umlaut) failed\n");
1549 /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */
1550 errorCode
=U_ZERO_ERROR
;
1551 length
=unorm2_getRawDecomposition(n2
, 0x1e08, decomp
, UPRV_LENGTHOF(decomp
), &errorCode
);
1552 if(U_FAILURE(errorCode
) || length
!=2 || decomp
[0]!=0xc7 || decomp
[1]!=0x301 || decomp
[2]!=0) {
1553 log_err("unorm2_getDecomposition(nfkc, c-cedilla-acute) failed\n");
1555 /* U+212B ANGSTROM SIGN */
1556 errorCode
=U_ZERO_ERROR
;
1557 length
=unorm2_getRawDecomposition(n2
, 0x212b, decomp
, UPRV_LENGTHOF(decomp
), &errorCode
);
1558 if(U_FAILURE(errorCode
) || length
!=1 || decomp
[0]!=0xc5 || decomp
[1]!=0) {
1559 log_err("unorm2_getDecomposition(nfkc, angstrom sign) failed\n");
1561 errorCode
=U_ZERO_ERROR
;
1562 length
=unorm2_getRawDecomposition(n2
, 0xac00, decomp
, UPRV_LENGTHOF(decomp
), &errorCode
);
1563 if(U_FAILURE(errorCode
) || length
!=2 || decomp
[0]!=0x1100 || decomp
[1]!=0x1161 || decomp
[2]!=0) {
1564 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC00) failed\n");
1566 /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */
1567 errorCode
=U_ZERO_ERROR
;
1568 length
=unorm2_getRawDecomposition(n2
, 0xac01, decomp
, UPRV_LENGTHOF(decomp
), &errorCode
);
1569 if(U_FAILURE(errorCode
) || length
!=2 || decomp
[0]!=0xac00 || decomp
[1]!=0x11a8 || decomp
[2]!=0) {
1570 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) failed\n");
1572 errorCode
=U_ZERO_ERROR
;
1573 length
=unorm2_getRawDecomposition(n2
, 0xac01, NULL
, 0, &errorCode
);
1574 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=2) {
1575 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) overflow failed\n");
1577 errorCode
=U_ZERO_ERROR
;
1578 length
=unorm2_getRawDecomposition(n2
, 0xac01, decomp
, -1, &errorCode
);
1579 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1580 log_err("unorm2_getDecomposition(nfkc, capacity<0) failed\n");
1582 errorCode
=U_ZERO_ERROR
;
1583 length
=unorm2_getRawDecomposition(n2
, 0xac01, NULL
, 4, &errorCode
);
1584 if(errorCode
!=U_ILLEGAL_ARGUMENT_ERROR
) {
1585 log_err("unorm2_getDecomposition(nfkc, decomposition=NULL) failed\n");
1590 TestAppendRestoreMiddle() {
1591 UChar a
[20]={ 0x61, 0x62, 0x63, 0x41, 0x327, 0 }; /* last chars are 'A' and 'cedilla' NFC */
1592 static const UChar b
[]={ 0x30A, 0x64, 0x65, 0x66, 0 }; /* first char is 'ring above' NFC */
1593 /* NFC: C5 is 'A with ring above' */
1594 static const UChar expected
[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 };
1596 UErrorCode errorCode
=U_ZERO_ERROR
;
1597 const UNormalizer2
*n2
=unorm2_getNFCInstance(&errorCode
);
1598 if(U_FAILURE(errorCode
)) {
1599 log_err_status(errorCode
, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode
));
1603 * Use length=-1 to fool the estimate of the ReorderingBuffer capacity.
1604 * Use a capacity of 6 or 7 so that the middle sequence <41 327 30A>
1605 * still fits into a[] but the full result still overflows this capacity.
1606 * (Let it modify the destination buffer before reallocating internally.)
1608 length
=unorm2_append(n2
, a
, -1, 6, b
, -1, &errorCode
);
1609 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=UPRV_LENGTHOF(expected
)) {
1610 log_err("unorm2_append(preflight) returned wrong length of %d\n", (int)length
);
1613 /* Verify that the middle is unchanged or restored. (ICU ticket #7848) */
1614 if(a
[0]!=0x61 || a
[1]!=0x62 || a
[2]!=0x63 || a
[3]!=0x41 || a
[4]!=0x327 || a
[5]!=0) {
1615 log_err("unorm2_append(overflow) modified the first string\n");
1618 errorCode
=U_ZERO_ERROR
;
1619 length
=unorm2_append(n2
, a
, -1, UPRV_LENGTHOF(a
), b
, -1, &errorCode
);
1620 if(U_FAILURE(errorCode
) || length
!=UPRV_LENGTHOF(expected
) || 0!=u_memcmp(a
, expected
, length
)) {
1621 log_err("unorm2_append(real) failed - %s, length %d\n", u_errorName(errorCode
), (int)length
);
1627 TestGetEasyToUseInstance() {
1628 static const UChar in
[]={
1629 0xA0, /* -> <noBreak> 0020 */
1630 0xC7, 0x301 /* = 1E08 = 0043 0327 0301 */
1635 UErrorCode errorCode
=U_ZERO_ERROR
;
1636 const UNormalizer2
*n2
=unorm2_getNFCInstance(&errorCode
);
1637 if(U_FAILURE(errorCode
)) {
1638 log_err_status(errorCode
, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode
));
1641 length
=unorm2_normalize(n2
, in
, UPRV_LENGTHOF(in
), out
, UPRV_LENGTHOF(out
), &errorCode
);
1642 if(U_FAILURE(errorCode
) || length
!=2 || out
[0]!=0xa0 || out
[1]!=0x1e08) {
1643 log_err("unorm2_getNFCInstance() did not return an NFC instance (normalized length=%d; %s)\n",
1644 (int)length
, u_errorName(errorCode
));
1647 errorCode
=U_ZERO_ERROR
;
1648 n2
=unorm2_getNFDInstance(&errorCode
);
1649 if(U_FAILURE(errorCode
)) {
1650 log_err_status(errorCode
, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode
));
1653 length
=unorm2_normalize(n2
, in
, UPRV_LENGTHOF(in
), out
, UPRV_LENGTHOF(out
), &errorCode
);
1654 if(U_FAILURE(errorCode
) || length
!=4 || out
[0]!=0xa0 || out
[1]!=0x43 || out
[2]!=0x327 || out
[3]!=0x301) {
1655 log_err("unorm2_getNFDInstance() did not return an NFD instance (normalized length=%d; %s)\n",
1656 (int)length
, u_errorName(errorCode
));
1659 errorCode
=U_ZERO_ERROR
;
1660 n2
=unorm2_getNFKCInstance(&errorCode
);
1661 if(U_FAILURE(errorCode
)) {
1662 log_err_status(errorCode
, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode
));
1665 length
=unorm2_normalize(n2
, in
, UPRV_LENGTHOF(in
), out
, UPRV_LENGTHOF(out
), &errorCode
);
1666 if(U_FAILURE(errorCode
) || length
!=2 || out
[0]!=0x20 || out
[1]!=0x1e08) {
1667 log_err("unorm2_getNFKCInstance() did not return an NFKC instance (normalized length=%d; %s)\n",
1668 (int)length
, u_errorName(errorCode
));
1671 errorCode
=U_ZERO_ERROR
;
1672 n2
=unorm2_getNFKDInstance(&errorCode
);
1673 if(U_FAILURE(errorCode
)) {
1674 log_err_status(errorCode
, "unorm2_getNFKDInstance() failed: %s\n", u_errorName(errorCode
));
1677 length
=unorm2_normalize(n2
, in
, UPRV_LENGTHOF(in
), out
, UPRV_LENGTHOF(out
), &errorCode
);
1678 if(U_FAILURE(errorCode
) || length
!=4 || out
[0]!=0x20 || out
[1]!=0x43 || out
[2]!=0x327 || out
[3]!=0x301) {
1679 log_err("unorm2_getNFKDInstance() did not return an NFKD instance (normalized length=%d; %s)\n",
1680 (int)length
, u_errorName(errorCode
));
1683 errorCode
=U_ZERO_ERROR
;
1684 n2
=unorm2_getNFKCCasefoldInstance(&errorCode
);
1685 if(U_FAILURE(errorCode
)) {
1686 log_err_status(errorCode
, "unorm2_getNFKCCasefoldInstance() failed: %s\n", u_errorName(errorCode
));
1689 length
=unorm2_normalize(n2
, in
, UPRV_LENGTHOF(in
), out
, UPRV_LENGTHOF(out
), &errorCode
);
1690 if(U_FAILURE(errorCode
) || length
!=2 || out
[0]!=0x20 || out
[1]!=0x1e09) {
1691 log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefold instance (normalized length=%d; %s)\n",
1692 (int)length
, u_errorName(errorCode
));
1696 #endif /* #if !UCONFIG_NO_NORMALIZATION */