]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/cnormtst.c
ICU-511.35.tar.gz
[apple/icu.git] / icuSources / test / cintltst / cnormtst.c
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2012, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
7 *
8 * File CNORMTST.C
9 *
10 * Modification History:
11 * Name Description
12 * Madhu Katragadda Ported for C API
13 * synwee added test for quick check
14 * synwee added test for checkFCD
15 *********************************************************************************/
16 /*tests for u_normalization*/
17 #include "unicode/utypes.h"
18 #include "unicode/unorm.h"
19 #include "unicode/utf16.h"
20 #include "cintltst.h"
21
22 #if !UCONFIG_NO_NORMALIZATION
23
24 #include <stdlib.h>
25 #include <time.h>
26 #include "unicode/uchar.h"
27 #include "unicode/ustring.h"
28 #include "unicode/unorm.h"
29 #include "cnormtst.h"
30
31 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof ((array)[0]))
32
33 static void
34 TestAPI(void);
35
36 static void
37 TestNormCoverage(void);
38
39 static void
40 TestConcatenate(void);
41
42 static void
43 TestNextPrevious(void);
44
45 static void TestIsNormalized(void);
46
47 static void
48 TestFCNFKCClosure(void);
49
50 static void
51 TestQuickCheckPerCP(void);
52
53 static void
54 TestComposition(void);
55
56 static void
57 TestFCD(void);
58
59 static void
60 TestGetDecomposition(void);
61
62 static void
63 TestGetRawDecomposition(void);
64
65 static void TestAppendRestoreMiddle(void);
66 static void TestGetEasyToUseInstance(void);
67
68 static const char* const canonTests[][3] = {
69 /* Input*/ /*Decomposed*/ /*Composed*/
70 { "cat", "cat", "cat" },
71 { "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark", },
72
73 { "\\u1e0a", "D\\u0307", "\\u1e0a" }, /* D-dot_above*/
74 { "D\\u0307", "D\\u0307", "\\u1e0a" }, /* D dot_above*/
75
76 { "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_below dot_above*/
77 { "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_above dot_below */
78 { "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D dot_below dot_above */
79
80 { "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307", "\\u1e10\\u0323\\u0307" }, /*D dot_below cedilla dot_above*/
81 { "D\\u0307\\u0328\\u0323", "D\\u0328\\u0323\\u0307", "\\u1e0c\\u0328\\u0307" }, /* D dot_above ogonek dot_below*/
82
83 { "\\u1E14", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron-grave*/
84 { "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron + grave*/
85 { "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" }, /* E-grave + macron*/
86
87 { "\\u212b", "A\\u030a", "\\u00c5" }, /* angstrom_sign*/
88 { "\\u00c5", "A\\u030a", "\\u00c5" }, /* A-ring*/
89
90 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
91 { "\\u00C4\\uFB03n", "A\\u0308\\uFB03n", "\\u00C4\\uFB03n" },
92
93 { "Henry IV", "Henry IV", "Henry IV" },
94 { "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" },
95
96 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
97 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
98 { "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" }, /* hw_ka + hw_ten*/
99 { "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" }, /* ka + hw_ten*/
100 { "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" }, /* hw_ka + ten*/
101 { "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" }, /* hw_ka + ten*/
102 { "", "", "" }
103 };
104
105 static const char* const compatTests[][3] = {
106 /* Input*/ /*Decomposed */ /*Composed*/
107 { "cat", "cat", "cat" },
108
109 { "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" }, /* Alef-Lamed vs. Alef, Lamed*/
110
111 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
112 { "\\u00C4\\uFB03n", "A\\u0308ffin", "\\u00C4ffin" }, /* ffi ligature -> f + f + i*/
113
114 { "Henry IV", "Henry IV", "Henry IV" },
115 { "Henry \\u2163", "Henry IV", "Henry IV" },
116
117 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
118 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
119
120 { "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + ten*/
121
122 /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
123 { "\\uFF76\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + hw_ten*/
124 { "\\u30AB\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* ka + hw_ten*/
125 { "", "", "" }
126 };
127
128 static const char* const fcdTests[][3] = {
129 /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */
130 { "\\u010e\\u0327", "D\\u0327\\u030c", NULL }, /* D-caron + cedilla */
131 { "\\u010e", "\\u010e", NULL } /* D-caron */
132 };
133
134 void addNormTest(TestNode** root);
135
136 void addNormTest(TestNode** root)
137 {
138 addTest(root, &TestAPI, "tsnorm/cnormtst/TestAPI");
139 addTest(root, &TestDecomp, "tsnorm/cnormtst/TestDecomp");
140 addTest(root, &TestCompatDecomp, "tsnorm/cnormtst/TestCompatDecomp");
141 addTest(root, &TestCanonDecompCompose, "tsnorm/cnormtst/TestCanonDecompCompose");
142 addTest(root, &TestCompatDecompCompose, "tsnorm/cnormtst/TestCompatDecompCompose");
143 addTest(root, &TestFCD, "tsnorm/cnormtst/TestFCD");
144 addTest(root, &TestNull, "tsnorm/cnormtst/TestNull");
145 addTest(root, &TestQuickCheck, "tsnorm/cnormtst/TestQuickCheck");
146 addTest(root, &TestQuickCheckPerCP, "tsnorm/cnormtst/TestQuickCheckPerCP");
147 addTest(root, &TestIsNormalized, "tsnorm/cnormtst/TestIsNormalized");
148 addTest(root, &TestCheckFCD, "tsnorm/cnormtst/TestCheckFCD");
149 addTest(root, &TestNormCoverage, "tsnorm/cnormtst/TestNormCoverage");
150 addTest(root, &TestConcatenate, "tsnorm/cnormtst/TestConcatenate");
151 addTest(root, &TestNextPrevious, "tsnorm/cnormtst/TestNextPrevious");
152 addTest(root, &TestFCNFKCClosure, "tsnorm/cnormtst/TestFCNFKCClosure");
153 addTest(root, &TestComposition, "tsnorm/cnormtst/TestComposition");
154 addTest(root, &TestGetDecomposition, "tsnorm/cnormtst/TestGetDecomposition");
155 addTest(root, &TestGetRawDecomposition, "tsnorm/cnormtst/TestGetRawDecomposition");
156 addTest(root, &TestAppendRestoreMiddle, "tsnorm/cnormtst/TestAppendRestoreMiddle");
157 addTest(root, &TestGetEasyToUseInstance, "tsnorm/cnormtst/TestGetEasyToUseInstance");
158 }
159
160 static const char* const modeStrings[]={
161 "UNORM_NONE",
162 "UNORM_NFD",
163 "UNORM_NFKD",
164 "UNORM_NFC",
165 "UNORM_NFKC",
166 "UNORM_FCD",
167 "UNORM_MODE_COUNT"
168 };
169
170 static void TestNormCases(UNormalizationMode mode,
171 const char* const cases[][3], int32_t lengthOfCases) {
172 int32_t x, neededLen, length2;
173 int32_t expIndex= (mode==UNORM_NFC || mode==UNORM_NFKC) ? 2 : 1;
174 UChar *source=NULL;
175 UChar result[16];
176 log_verbose("Testing unorm_normalize(%s)\n", modeStrings[mode]);
177 for(x=0; x < lengthOfCases; x++)
178 {
179 UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
180 source=CharsToUChars(cases[x][0]);
181 neededLen= unorm_normalize(source, u_strlen(source), mode, 0, NULL, 0, &status);
182 length2= unorm_normalize(source, -1, mode, 0, NULL, 0, &status2);
183 if(neededLen!=length2) {
184 log_err("ERROR in unorm_normalize(%s)[%d]: "
185 "preflight length/NUL %d!=%d preflight length/srcLength\n",
186 modeStrings[mode], (int)x, (int)neededLen, (int)length2);
187 }
188 if(status==U_BUFFER_OVERFLOW_ERROR)
189 {
190 status=U_ZERO_ERROR;
191 }
192 length2=unorm_normalize(source, u_strlen(source), mode, 0, result, LENGTHOF(result), &status);
193 if(U_FAILURE(status) || neededLen!=length2) {
194 log_data_err("ERROR in unorm_normalize(%s/NUL) at %s: %s - (Are you missing data?)\n",
195 modeStrings[mode], austrdup(source), myErrorName(status));
196 } else {
197 assertEqual(result, cases[x][expIndex], x);
198 }
199 length2=unorm_normalize(source, -1, mode, 0, result, LENGTHOF(result), &status);
200 if(U_FAILURE(status) || neededLen!=length2) {
201 log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s: %s - (Are you missing data?)\n",
202 modeStrings[mode], austrdup(source), myErrorName(status));
203 } else {
204 assertEqual(result, cases[x][expIndex], x);
205 }
206 free(source);
207 }
208 }
209
210 void TestDecomp() {
211 TestNormCases(UNORM_NFD, canonTests, LENGTHOF(canonTests));
212 }
213
214 void TestCompatDecomp() {
215 TestNormCases(UNORM_NFKD, compatTests, LENGTHOF(compatTests));
216 }
217
218 void TestCanonDecompCompose() {
219 TestNormCases(UNORM_NFC, canonTests, LENGTHOF(canonTests));
220 }
221
222 void TestCompatDecompCompose() {
223 TestNormCases(UNORM_NFKC, compatTests, LENGTHOF(compatTests));
224 }
225
226 void TestFCD() {
227 TestNormCases(UNORM_FCD, fcdTests, LENGTHOF(fcdTests));
228 }
229
230 static void assertEqual(const UChar* result, const char* expected, int32_t index)
231 {
232 UChar *expectedUni = CharsToUChars(expected);
233 if(u_strcmp(result, expectedUni)!=0){
234 log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index, expected,
235 austrdup(result) );
236 }
237 free(expectedUni);
238 }
239
240 static void TestNull_check(UChar *src, int32_t srcLen,
241 UChar *exp, int32_t expLen,
242 UNormalizationMode mode,
243 const char *name)
244 {
245 UErrorCode status = U_ZERO_ERROR;
246 int32_t len, i;
247
248 UChar result[50];
249
250
251 status = U_ZERO_ERROR;
252
253 for(i=0;i<50;i++)
254 {
255 result[i] = 0xFFFD;
256 }
257
258 len = unorm_normalize(src, srcLen, mode, 0, result, 50, &status);
259
260 if(U_FAILURE(status)) {
261 log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missing data?)\n", name, u_errorName(status));
262 } else if (len != expLen) {
263 log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name, expLen, len);
264 }
265
266 {
267 for(i=0;i<len;i++){
268 if(exp[i] != result[i]) {
269 log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n",
270 name,
271 i,
272 exp[i],
273 result[i]);
274 return;
275 }
276 log_verbose(" %d: \\u%04X\n", i, result[i]);
277 }
278 }
279
280 log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name);
281 }
282
283 void TestNull()
284 {
285
286 UChar source_comp[] = { 0x0061, 0x0000, 0x0044, 0x0307 };
287 int32_t source_comp_len = 4;
288 UChar expect_comp[] = { 0x0061, 0x0000, 0x1e0a };
289 int32_t expect_comp_len = 3;
290
291 UChar source_dcmp[] = { 0x1e0A, 0x0000, 0x0929 };
292 int32_t source_dcmp_len = 3;
293 UChar expect_dcmp[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C };
294 int32_t expect_dcmp_len = 5;
295
296 TestNull_check(source_comp,
297 source_comp_len,
298 expect_comp,
299 expect_comp_len,
300 UNORM_NFC,
301 "UNORM_NFC");
302
303 TestNull_check(source_dcmp,
304 source_dcmp_len,
305 expect_dcmp,
306 expect_dcmp_len,
307 UNORM_NFD,
308 "UNORM_NFD");
309
310 TestNull_check(source_comp,
311 source_comp_len,
312 expect_comp,
313 expect_comp_len,
314 UNORM_NFKC,
315 "UNORM_NFKC");
316
317
318 }
319
320 static void TestQuickCheckResultNO()
321 {
322 const UChar CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
323 0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
324 const UChar CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
325 0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
326 const UChar CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
327 0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
328 const UChar CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
329 0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
330
331
332 const int SIZE = 10;
333
334 int count = 0;
335 UErrorCode error = U_ZERO_ERROR;
336
337 for (; count < SIZE; count ++)
338 {
339 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
340 UNORM_NO)
341 {
342 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
343 return;
344 }
345 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
346 UNORM_NO)
347 {
348 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
349 return;
350 }
351 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
352 UNORM_NO)
353 {
354 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
355 return;
356 }
357 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
358 UNORM_NO)
359 {
360 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
361 return;
362 }
363 }
364 }
365
366
367 static void TestQuickCheckResultYES()
368 {
369 const UChar CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
370 0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
371 const UChar CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
372 0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
373 const UChar CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
374 0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
375 const UChar CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
376 0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
377
378 const int SIZE = 10;
379 int count = 0;
380 UErrorCode error = U_ZERO_ERROR;
381
382 UChar cp = 0;
383 while (cp < 0xA0)
384 {
385 if (unorm_quickCheck(&cp, 1, UNORM_NFD, &error) != UNORM_YES)
386 {
387 log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)\n", cp);
388 return;
389 }
390 if (unorm_quickCheck(&cp, 1, UNORM_NFC, &error) !=
391 UNORM_YES)
392 {
393 log_err("ERROR in NFC quick check at U+%04x\n", cp);
394 return;
395 }
396 if (unorm_quickCheck(&cp, 1, UNORM_NFKD, &error) != UNORM_YES)
397 {
398 log_err("ERROR in NFKD quick check at U+%04x\n", cp);
399 return;
400 }
401 if (unorm_quickCheck(&cp, 1, UNORM_NFKC, &error) !=
402 UNORM_YES)
403 {
404 log_err("ERROR in NFKC quick check at U+%04x\n", cp);
405 return;
406 }
407 cp ++;
408 }
409
410 for (; count < SIZE; count ++)
411 {
412 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
413 UNORM_YES)
414 {
415 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
416 return;
417 }
418 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error)
419 != UNORM_YES)
420 {
421 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
422 return;
423 }
424 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
425 UNORM_YES)
426 {
427 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
428 return;
429 }
430 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
431 UNORM_YES)
432 {
433 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
434 return;
435 }
436 }
437 }
438
439 static void TestQuickCheckResultMAYBE()
440 {
441 const UChar CPNFC[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
442 0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
443 const UChar CPNFKC[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
444 0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
445
446
447 const int SIZE = 10;
448
449 int count = 0;
450 UErrorCode error = U_ZERO_ERROR;
451
452 /* NFD and NFKD does not have any MAYBE codepoints */
453 for (; count < SIZE; count ++)
454 {
455 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
456 UNORM_MAYBE)
457 {
458 log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)\n", CPNFC[count]);
459 return;
460 }
461 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
462 UNORM_MAYBE)
463 {
464 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
465 return;
466 }
467 }
468 }
469
470 static void TestQuickCheckStringResult()
471 {
472 int count;
473 UChar *d = NULL;
474 UChar *c = NULL;
475 UErrorCode error = U_ZERO_ERROR;
476
477 for (count = 0; count < LENGTHOF(canonTests); count ++)
478 {
479 d = CharsToUChars(canonTests[count][1]);
480 c = CharsToUChars(canonTests[count][2]);
481 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFD, &error) !=
482 UNORM_YES)
483 {
484 log_data_err("ERROR in NFD quick check for string at count %d - (Are you missing data?)\n", count);
485 return;
486 }
487
488 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFC, &error) ==
489 UNORM_NO)
490 {
491 log_err("ERROR in NFC quick check for string at count %d\n", count);
492 return;
493 }
494
495 free(d);
496 free(c);
497 }
498
499 for (count = 0; count < LENGTHOF(compatTests); count ++)
500 {
501 d = CharsToUChars(compatTests[count][1]);
502 c = CharsToUChars(compatTests[count][2]);
503 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFKD, &error) !=
504 UNORM_YES)
505 {
506 log_err("ERROR in NFKD quick check for string at count %d\n", count);
507 return;
508 }
509
510 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFKC, &error) !=
511 UNORM_YES)
512 {
513 log_err("ERROR in NFKC quick check for string at count %d\n", count);
514 return;
515 }
516
517 free(d);
518 free(c);
519 }
520 }
521
522 void TestQuickCheck()
523 {
524 TestQuickCheckResultNO();
525 TestQuickCheckResultYES();
526 TestQuickCheckResultMAYBE();
527 TestQuickCheckStringResult();
528 }
529
530 /*
531 * The intltest/NormalizerConformanceTest tests a lot of strings that _are_
532 * normalized, and some that are not.
533 * Here we pick some specific cases and test the C API.
534 */
535 static void TestIsNormalized(void) {
536 static const UChar notNFC[][8]={ /* strings that are not in NFC */
537 { 0x62, 0x61, 0x300, 0x63, 0 }, /* 0061 0300 compose */
538 { 0xfb1d, 0 }, /* excluded from composition */
539 { 0x0627, 0x0653, 0 }, /* 0627 0653 compose */
540 { 0x3071, 0x306f, 0x309a, 0x3073, 0 } /* 306F 309A compose */
541 };
542 static const UChar notNFKC[][8]={ /* strings that are not in NFKC */
543 { 0x1100, 0x1161, 0 }, /* Jamo compose */
544 { 0x1100, 0x314f, 0 }, /* compatibility Jamo compose */
545 { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 } /* 1F00 0345 compose */
546 };
547
548 int32_t i;
549 UErrorCode errorCode;
550
551 /* API test */
552
553 /* normal case with length>=0 (length -1 used for special cases below) */
554 errorCode=U_ZERO_ERROR;
555 if(!unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
556 log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missing data?)\n", u_errorName(errorCode));
557 }
558
559 /* incoming U_FAILURE */
560 errorCode=U_TRUNCATED_CHAR_FOUND;
561 (void)unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode);
562 if(errorCode!=U_TRUNCATED_CHAR_FOUND) {
563 log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode));
564 }
565
566 /* NULL source */
567 errorCode=U_ZERO_ERROR;
568 (void)unorm_isNormalized(NULL, 1, UNORM_NFC, &errorCode);
569 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
570 log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
571 }
572
573 /* bad length */
574 errorCode=U_ZERO_ERROR;
575 (void)unorm_isNormalized(notNFC[0]+2, -2, UNORM_NFC, &errorCode);
576 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
577 log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
578 }
579
580 /* specific cases */
581 for(i=0; i<LENGTHOF(notNFC); ++i) {
582 errorCode=U_ZERO_ERROR;
583 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
584 log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
585 }
586 errorCode=U_ZERO_ERROR;
587 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
588 log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
589 }
590 }
591 for(i=0; i<LENGTHOF(notNFKC); ++i) {
592 errorCode=U_ZERO_ERROR;
593 if(unorm_isNormalized(notNFKC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
594 log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
595 }
596 }
597 }
598
599 void TestCheckFCD()
600 {
601 UErrorCode status = U_ZERO_ERROR;
602 static const UChar FAST_[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
603 0x0A};
604 static const UChar FALSE_[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
605 0x02B9, 0x0314, 0x0315, 0x0316};
606 static const UChar TRUE_[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
607 0x0050, 0x0730, 0x09EE, 0x1E10};
608
609 static const UChar datastr[][5] =
610 { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
611 {0x0061, 0x030A, 0x00E2, 0x0323, 0},
612 {0x0061, 0x0323, 0x00E2, 0x0323, 0},
613 {0x0061, 0x0323, 0x1E05, 0x0302, 0} };
614 static const UBool result[] = {UNORM_YES, UNORM_NO, UNORM_NO, UNORM_YES};
615
616 static const UChar datachar[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
617 0x6a,
618 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
619 0xea,
620 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
621 0x0307, 0x0308, 0x0309, 0x030a,
622 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
623 0x0327, 0x0328, 0x0329, 0x032a,
624 0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
625 0x1e07, 0x1e08, 0x1e09, 0x1e0a};
626
627 int count = 0;
628
629 if (unorm_quickCheck(FAST_, 10, UNORM_FCD, &status) != UNORM_YES)
630 log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
631 if (unorm_quickCheck(FALSE_, 10, UNORM_FCD, &status) != UNORM_NO)
632 log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n");
633 if (unorm_quickCheck(TRUE_, 10, UNORM_FCD, &status) != UNORM_YES)
634 log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
635
636 if (U_FAILURE(status))
637 log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n", u_errorName(status));
638
639 while (count < 4)
640 {
641 UBool fcdresult = unorm_quickCheck(datastr[count], 4, UNORM_FCD, &status);
642 if (U_FAILURE(status)) {
643 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data set %d - (Are you missing data?)\n", count);
644 break;
645 }
646 else {
647 if (result[count] != fcdresult) {
648 log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count,
649 result[count]);
650 }
651 }
652 count ++;
653 }
654
655 /* random checks of long strings */
656 status = U_ZERO_ERROR;
657 srand((unsigned)time( NULL ));
658
659 for (count = 0; count < 50; count ++)
660 {
661 int size = 0;
662 UBool testresult = UNORM_YES;
663 UChar data[20];
664 UChar norm[100];
665 UChar nfd[100];
666 int normsize = 0;
667 int nfdsize = 0;
668
669 while (size != 19) {
670 data[size] = datachar[(rand() * 50) / RAND_MAX];
671 log_verbose("0x%x", data[size]);
672 normsize += unorm_normalize(data + size, 1, UNORM_NFD, 0,
673 norm + normsize, 100 - normsize, &status);
674 if (U_FAILURE(status)) {
675 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data generation - (Are you missing data?)\n");
676 break;
677 }
678 size ++;
679 }
680 log_verbose("\n");
681
682 nfdsize = unorm_normalize(data, size, UNORM_NFD, 0,
683 nfd, 100, &status);
684 if (U_FAILURE(status)) {
685 log_data_err("unorm_quickCheck(FCD) failed: exception occured at normalized data generation - (Are you missing data?)\n");
686 }
687
688 if (nfdsize != normsize || u_memcmp(nfd, norm, nfdsize) != 0) {
689 testresult = UNORM_NO;
690 }
691 if (testresult == UNORM_YES) {
692 log_verbose("result UNORM_YES\n");
693 }
694 else {
695 log_verbose("result UNORM_NO\n");
696 }
697
698 if (unorm_quickCheck(data, size, UNORM_FCD, &status) != testresult || U_FAILURE(status)) {
699 log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data - (Are you missing data?)\n", testresult);
700 }
701 }
702 }
703
704 static void
705 TestAPI() {
706 static const UChar in[]={ 0x68, 0xe4 };
707 UChar out[20]={ 0xffff, 0xffff, 0xffff, 0xffff };
708 UErrorCode errorCode;
709 int32_t length;
710
711 /* try preflighting */
712 errorCode=U_ZERO_ERROR;
713 length=unorm_normalize(in, 2, UNORM_NFD, 0, NULL, 0, &errorCode);
714 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
715 log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
716 return;
717 }
718
719 errorCode=U_ZERO_ERROR;
720 length=unorm_normalize(in, 2, UNORM_NFD, 0, out, 3, &errorCode);
721 if(U_FAILURE(errorCode)) {
722 log_err("unorm_normalize(NFD)=%ld failed with %s\n", length, u_errorName(errorCode));
723 return;
724 }
725 if(length!=3 || out[2]!=0x308 || out[3]!=0xffff) {
726 log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length, out[0], out[1], out[2], out[3]);
727 return;
728 }
729 length=unorm_normalize(NULL, 0, UNORM_NFC, 0, NULL, 0, &errorCode);
730 if(U_FAILURE(errorCode)) {
731 log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
732 return;
733 }
734 length=unorm_normalize(NULL, 0, UNORM_NFC, 0, out, 20, &errorCode);
735 if(U_FAILURE(errorCode)) {
736 log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
737 return;
738 }
739 }
740
741 /* test cases to improve test code coverage */
742 enum {
743 HANGUL_K_KIYEOK=0x3131, /* NFKD->Jamo L U+1100 */
744 HANGUL_K_WEO=0x315d, /* NFKD->Jamo V U+116f */
745 HANGUL_K_KIYEOK_SIOS=0x3133, /* NFKD->Jamo T U+11aa */
746
747 HANGUL_KIYEOK=0x1100, /* Jamo L U+1100 */
748 HANGUL_WEO=0x116f, /* Jamo V U+116f */
749 HANGUL_KIYEOK_SIOS=0x11aa, /* Jamo T U+11aa */
750
751 HANGUL_AC00=0xac00, /* Hangul syllable = Jamo LV U+ac00 */
752 HANGUL_SYLLABLE=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */
753
754 MUSICAL_VOID_NOTEHEAD=0x1d157,
755 MUSICAL_HALF_NOTE=0x1d15e, /* NFC/NFD->Notehead+Stem */
756 MUSICAL_STEM=0x1d165, /* cc=216 */
757 MUSICAL_STACCATO=0x1d17c /* cc=220 */
758 };
759
760 static void
761 TestNormCoverage() {
762 UChar input[1000], expect[1000], output[1000];
763 UErrorCode errorCode;
764 int32_t i, length, inLength, expectLength, hangulPrefixLength, preflightLength;
765
766 /* create a long and nasty string with NFKC-unsafe characters */
767 inLength=0;
768
769 /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */
770 input[inLength++]=HANGUL_KIYEOK;
771 input[inLength++]=HANGUL_WEO;
772 input[inLength++]=HANGUL_KIYEOK_SIOS;
773
774 input[inLength++]=HANGUL_KIYEOK;
775 input[inLength++]=HANGUL_WEO;
776 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
777
778 input[inLength++]=HANGUL_KIYEOK;
779 input[inLength++]=HANGUL_K_WEO;
780 input[inLength++]=HANGUL_KIYEOK_SIOS;
781
782 input[inLength++]=HANGUL_KIYEOK;
783 input[inLength++]=HANGUL_K_WEO;
784 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
785
786 input[inLength++]=HANGUL_K_KIYEOK;
787 input[inLength++]=HANGUL_WEO;
788 input[inLength++]=HANGUL_KIYEOK_SIOS;
789
790 input[inLength++]=HANGUL_K_KIYEOK;
791 input[inLength++]=HANGUL_WEO;
792 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
793
794 input[inLength++]=HANGUL_K_KIYEOK;
795 input[inLength++]=HANGUL_K_WEO;
796 input[inLength++]=HANGUL_KIYEOK_SIOS;
797
798 input[inLength++]=HANGUL_K_KIYEOK;
799 input[inLength++]=HANGUL_K_WEO;
800 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
801
802 /* Hangul LV with normal/compatibility Jamo T */
803 input[inLength++]=HANGUL_AC00;
804 input[inLength++]=HANGUL_KIYEOK_SIOS;
805
806 input[inLength++]=HANGUL_AC00;
807 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
808
809 /* compatibility Jamo L, V */
810 input[inLength++]=HANGUL_K_KIYEOK;
811 input[inLength++]=HANGUL_K_WEO;
812
813 hangulPrefixLength=inLength;
814
815 input[inLength++]=U16_LEAD(MUSICAL_HALF_NOTE);
816 input[inLength++]=U16_TRAIL(MUSICAL_HALF_NOTE);
817 for(i=0; i<200; ++i) {
818 input[inLength++]=U16_LEAD(MUSICAL_STACCATO);
819 input[inLength++]=U16_TRAIL(MUSICAL_STACCATO);
820 input[inLength++]=U16_LEAD(MUSICAL_STEM);
821 input[inLength++]=U16_TRAIL(MUSICAL_STEM);
822 }
823
824 /* (compatibility) Jamo L, T do not compose */
825 input[inLength++]=HANGUL_K_KIYEOK;
826 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
827
828 /* quick checks */
829 errorCode=U_ZERO_ERROR;
830 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFD, &errorCode) || U_FAILURE(errorCode)) {
831 log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
832 }
833 errorCode=U_ZERO_ERROR;
834 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKD, &errorCode) || U_FAILURE(errorCode)) {
835 log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
836 }
837 errorCode=U_ZERO_ERROR;
838 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
839 log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
840 }
841 errorCode=U_ZERO_ERROR;
842 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
843 log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
844 }
845 errorCode=U_ZERO_ERROR;
846 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_FCD, &errorCode) || U_FAILURE(errorCode)) {
847 log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
848 }
849
850 /* NFKC */
851 expectLength=0;
852 expect[expectLength++]=HANGUL_SYLLABLE;
853
854 expect[expectLength++]=HANGUL_SYLLABLE;
855
856 expect[expectLength++]=HANGUL_SYLLABLE;
857
858 expect[expectLength++]=HANGUL_SYLLABLE;
859
860 expect[expectLength++]=HANGUL_SYLLABLE;
861
862 expect[expectLength++]=HANGUL_SYLLABLE;
863
864 expect[expectLength++]=HANGUL_SYLLABLE;
865
866 expect[expectLength++]=HANGUL_SYLLABLE;
867
868 expect[expectLength++]=HANGUL_AC00+3;
869
870 expect[expectLength++]=HANGUL_AC00+3;
871
872 expect[expectLength++]=HANGUL_AC00+14*28;
873
874 expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
875 expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
876 expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
877 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
878 for(i=0; i<200; ++i) {
879 expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
880 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
881 }
882 for(i=0; i<200; ++i) {
883 expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
884 expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
885 }
886
887 expect[expectLength++]=HANGUL_KIYEOK;
888 expect[expectLength++]=HANGUL_KIYEOK_SIOS;
889
890 /* try destination overflow first */
891 errorCode=U_ZERO_ERROR;
892 preflightLength=unorm_normalize(input, inLength,
893 UNORM_NFKC, 0,
894 output, 100, /* too short */
895 &errorCode);
896 if(errorCode!=U_BUFFER_OVERFLOW_ERROR) {
897 log_data_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCode));
898 }
899
900 /* real NFKC */
901 errorCode=U_ZERO_ERROR;
902 length=unorm_normalize(input, inLength,
903 UNORM_NFKC, 0,
904 output, sizeof(output)/U_SIZEOF_UCHAR,
905 &errorCode);
906 if(U_FAILURE(errorCode)) {
907 log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
908 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
909 log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n");
910 for(i=0; i<length; ++i) {
911 if(output[i]!=expect[i]) {
912 log_err(" NFKC[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
913 break;
914 }
915 }
916 }
917 if(length!=preflightLength) {
918 log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length, preflightLength);
919 }
920
921 /* FCD */
922 u_memcpy(expect, input, hangulPrefixLength);
923 expectLength=hangulPrefixLength;
924
925 expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
926 expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
927 expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
928 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
929 for(i=0; i<200; ++i) {
930 expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
931 expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
932 }
933 for(i=0; i<200; ++i) {
934 expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
935 expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
936 }
937
938 expect[expectLength++]=HANGUL_K_KIYEOK;
939 expect[expectLength++]=HANGUL_K_KIYEOK_SIOS;
940
941 errorCode=U_ZERO_ERROR;
942 length=unorm_normalize(input, inLength,
943 UNORM_FCD, 0,
944 output, sizeof(output)/U_SIZEOF_UCHAR,
945 &errorCode);
946 if(U_FAILURE(errorCode)) {
947 log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
948 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
949 log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n");
950 for(i=0; i<length; ++i) {
951 if(output[i]!=expect[i]) {
952 log_err(" FCD[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
953 break;
954 }
955 }
956 }
957 }
958
959 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */
960 static void
961 TestConcatenate(void) {
962 /* "re + 'sume'" */
963 static const UChar
964 left[]={
965 0x72, 0x65, 0
966 },
967 right[]={
968 0x301, 0x73, 0x75, 0x6d, 0xe9, 0
969 },
970 expect[]={
971 0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0
972 };
973
974 UChar buffer[100];
975 UErrorCode errorCode;
976 int32_t length;
977
978 /* left with length, right NUL-terminated */
979 errorCode=U_ZERO_ERROR;
980 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
981 if(U_FAILURE(errorCode) || length!=6 || 0!=u_memcmp(buffer, expect, length)) {
982 log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
983 }
984
985 /* preflighting */
986 errorCode=U_ZERO_ERROR;
987 length=unorm_concatenate(left, 2, right, -1, NULL, 0, UNORM_NFC, 0, &errorCode);
988 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6) {
989 log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
990 }
991
992 buffer[2]=0x5555;
993 errorCode=U_ZERO_ERROR;
994 length=unorm_concatenate(left, 2, right, -1, buffer, 1, UNORM_NFC, 0, &errorCode);
995 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6 || buffer[2]!=0x5555) {
996 log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
997 }
998
999 /* enter with U_FAILURE */
1000 buffer[2]=0xaaaa;
1001 errorCode=U_UNEXPECTED_TOKEN;
1002 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1003 if(errorCode!=U_UNEXPECTED_TOKEN || buffer[2]!=0xaaaa) {
1004 log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length, u_errorName(errorCode));
1005 }
1006
1007 /* illegal arguments */
1008 buffer[2]=0xaaaa;
1009 errorCode=U_ZERO_ERROR;
1010 length=unorm_concatenate(NULL, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1011 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || buffer[2]!=0xaaaa) {
1012 log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1013 }
1014
1015 errorCode=U_ZERO_ERROR;
1016 length=unorm_concatenate(left, 2, right, -1, NULL, 100, UNORM_NFC, 0, &errorCode);
1017 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1018 log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1019 }
1020 }
1021
1022 enum {
1023 _PLUS=0x2b
1024 };
1025
1026 static const char *const _modeString[UNORM_MODE_COUNT]={
1027 "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD"
1028 };
1029
1030 static void
1031 _testIter(const UChar *src, int32_t srcLength,
1032 UCharIterator *iter, UNormalizationMode mode, UBool forward,
1033 const UChar *out, int32_t outLength,
1034 const int32_t *srcIndexes, int32_t srcIndexesLength) {
1035 UChar buffer[4];
1036 const UChar *expect, *outLimit, *in;
1037 int32_t length, i, expectLength, expectIndex, prevIndex, index, inLength;
1038 UErrorCode errorCode;
1039 UBool neededToNormalize, expectNeeded;
1040
1041 errorCode=U_ZERO_ERROR;
1042 outLimit=out+outLength;
1043 if(forward) {
1044 expect=out;
1045 i=index=0;
1046 } else {
1047 expect=outLimit;
1048 i=srcIndexesLength-2;
1049 index=srcLength;
1050 }
1051
1052 for(;;) {
1053 prevIndex=index;
1054 if(forward) {
1055 if(!iter->hasNext(iter)) {
1056 return;
1057 }
1058 length=unorm_next(iter,
1059 buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1060 mode, 0,
1061 (UBool)(out!=NULL), &neededToNormalize,
1062 &errorCode);
1063 expectIndex=srcIndexes[i+1];
1064 in=src+prevIndex;
1065 inLength=expectIndex-prevIndex;
1066
1067 if(out!=NULL) {
1068 /* get output piece from between plus signs */
1069 expectLength=0;
1070 while((expect+expectLength)!=outLimit && expect[expectLength]!=_PLUS) {
1071 ++expectLength;
1072 }
1073 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1074 } else {
1075 expect=in;
1076 expectLength=inLength;
1077 expectNeeded=FALSE;
1078 }
1079 } else {
1080 if(!iter->hasPrevious(iter)) {
1081 return;
1082 }
1083 length=unorm_previous(iter,
1084 buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1085 mode, 0,
1086 (UBool)(out!=NULL), &neededToNormalize,
1087 &errorCode);
1088 expectIndex=srcIndexes[i];
1089 in=src+expectIndex;
1090 inLength=prevIndex-expectIndex;
1091
1092 if(out!=NULL) {
1093 /* get output piece from between plus signs */
1094 expectLength=0;
1095 while(expect!=out && expect[-1]!=_PLUS) {
1096 ++expectLength;
1097 --expect;
1098 }
1099 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1100 } else {
1101 expect=in;
1102 expectLength=inLength;
1103 expectNeeded=FALSE;
1104 }
1105 }
1106 index=iter->getIndex(iter, UITER_CURRENT);
1107
1108 if(U_FAILURE(errorCode)) {
1109 log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s - (Are you missing data?)\n",
1110 forward, _modeString[mode], i, u_errorName(errorCode));
1111 return;
1112 }
1113 if(expectIndex!=index) {
1114 log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n",
1115 forward, _modeString[mode], i, index, expectIndex);
1116 return;
1117 }
1118 if(expectLength!=length) {
1119 log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n",
1120 forward, _modeString[mode], i, length, expectLength);
1121 return;
1122 }
1123 if(0!=u_memcmp(expect, buffer, length)) {
1124 log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n",
1125 forward, _modeString[mode], i);
1126 return;
1127 }
1128 if(neededToNormalize!=expectNeeded) {
1129 }
1130
1131 if(forward) {
1132 expect+=expectLength+1; /* go after the + */
1133 ++i;
1134 } else {
1135 --expect; /* go before the + */
1136 --i;
1137 }
1138 }
1139 }
1140
1141 static void
1142 TestNextPrevious() {
1143 static const UChar
1144 src[]={ /* input string */
1145 0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133
1146 },
1147 nfd[]={ /* + separates expected output pieces */
1148 0xa0, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x3133
1149 },
1150 nfkd[]={
1151 0x20, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x11aa
1152 },
1153 nfc[]={
1154 0xa0, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1155 },
1156 nfkc[]={
1157 0x20, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac03
1158 },
1159 fcd[]={
1160 0xa0, _PLUS, 0xe4, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1161 };
1162
1163 /* expected iterator indexes in the source string for each iteration piece */
1164 static const int32_t
1165 nfdIndexes[]={
1166 0, 1, 2, 5, 6, 7
1167 },
1168 nfkdIndexes[]={
1169 0, 1, 2, 5, 6, 7
1170 },
1171 nfcIndexes[]={
1172 0, 1, 2, 5, 6, 7
1173 },
1174 nfkcIndexes[]={
1175 0, 1, 2, 5, 7
1176 },
1177 fcdIndexes[]={
1178 0, 1, 2, 5, 6, 7
1179 };
1180
1181 UCharIterator iter;
1182
1183 UChar buffer[4];
1184 int32_t length;
1185
1186 UBool neededToNormalize;
1187 UErrorCode errorCode;
1188
1189 uiter_setString(&iter, src, sizeof(src)/U_SIZEOF_UCHAR);
1190
1191 /* test iteration with doNormalize */
1192 iter.index=0;
1193 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, TRUE, nfd, sizeof(nfd)/U_SIZEOF_UCHAR, nfdIndexes, sizeof(nfdIndexes)/4);
1194 iter.index=0;
1195 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, TRUE, nfkd, sizeof(nfkd)/U_SIZEOF_UCHAR, nfkdIndexes, sizeof(nfkdIndexes)/4);
1196 iter.index=0;
1197 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, TRUE, nfc, sizeof(nfc)/U_SIZEOF_UCHAR, nfcIndexes, sizeof(nfcIndexes)/4);
1198 iter.index=0;
1199 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, TRUE, nfkc, sizeof(nfkc)/U_SIZEOF_UCHAR, nfkcIndexes, sizeof(nfkcIndexes)/4);
1200 iter.index=0;
1201 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, TRUE, fcd, sizeof(fcd)/U_SIZEOF_UCHAR, fcdIndexes, sizeof(fcdIndexes)/4);
1202
1203 iter.index=iter.length;
1204 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, FALSE, nfd, sizeof(nfd)/U_SIZEOF_UCHAR, nfdIndexes, sizeof(nfdIndexes)/4);
1205 iter.index=iter.length;
1206 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, FALSE, nfkd, sizeof(nfkd)/U_SIZEOF_UCHAR, nfkdIndexes, sizeof(nfkdIndexes)/4);
1207 iter.index=iter.length;
1208 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, FALSE, nfc, sizeof(nfc)/U_SIZEOF_UCHAR, nfcIndexes, sizeof(nfcIndexes)/4);
1209 iter.index=iter.length;
1210 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, FALSE, nfkc, sizeof(nfkc)/U_SIZEOF_UCHAR, nfkcIndexes, sizeof(nfkcIndexes)/4);
1211 iter.index=iter.length;
1212 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, FALSE, fcd, sizeof(fcd)/U_SIZEOF_UCHAR, fcdIndexes, sizeof(fcdIndexes)/4);
1213
1214 /* test iteration without doNormalize */
1215 iter.index=0;
1216 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, TRUE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1217 iter.index=0;
1218 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, TRUE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1219 iter.index=0;
1220 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, TRUE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1221 iter.index=0;
1222 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, TRUE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1223 iter.index=0;
1224 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, TRUE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1225
1226 iter.index=iter.length;
1227 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, FALSE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1228 iter.index=iter.length;
1229 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, FALSE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1230 iter.index=iter.length;
1231 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, FALSE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1232 iter.index=iter.length;
1233 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, FALSE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1234 iter.index=iter.length;
1235 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, FALSE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1236
1237 /* try without neededToNormalize */
1238 errorCode=U_ZERO_ERROR;
1239 buffer[0]=5;
1240 iter.index=1;
1241 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1242 UNORM_NFD, 0, TRUE, NULL,
1243 &errorCode);
1244 if(U_FAILURE(errorCode) || length!=2 || buffer[0]!=nfd[2] || buffer[1]!=nfd[3]) {
1245 log_data_err("error unorm_next(without needed) %s - (Are you missing data?)\n", u_errorName(errorCode));
1246 return;
1247 }
1248
1249 /* preflight */
1250 neededToNormalize=9;
1251 iter.index=1;
1252 length=unorm_next(&iter, NULL, 0,
1253 UNORM_NFD, 0, TRUE, &neededToNormalize,
1254 &errorCode);
1255 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2) {
1256 log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode));
1257 return;
1258 }
1259
1260 errorCode=U_ZERO_ERROR;
1261 buffer[0]=buffer[1]=5;
1262 neededToNormalize=9;
1263 iter.index=1;
1264 length=unorm_next(&iter, buffer, 1,
1265 UNORM_NFD, 0, TRUE, &neededToNormalize,
1266 &errorCode);
1267 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2 || buffer[1]!=5) {
1268 log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode));
1269 return;
1270 }
1271
1272 /* no iterator */
1273 errorCode=U_ZERO_ERROR;
1274 buffer[0]=buffer[1]=5;
1275 neededToNormalize=9;
1276 iter.index=1;
1277 length=unorm_next(NULL, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1278 UNORM_NFD, 0, TRUE, &neededToNormalize,
1279 &errorCode);
1280 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1281 log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode));
1282 return;
1283 }
1284
1285 /* illegal mode */
1286 buffer[0]=buffer[1]=5;
1287 neededToNormalize=9;
1288 iter.index=1;
1289 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1290 (UNormalizationMode)0, 0, TRUE, &neededToNormalize,
1291 &errorCode);
1292 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1293 log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode));
1294 return;
1295 }
1296
1297 /* error coming in */
1298 errorCode=U_MISPLACED_QUANTIFIER;
1299 buffer[0]=5;
1300 iter.index=1;
1301 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1302 UNORM_NFD, 0, TRUE, NULL,
1303 &errorCode);
1304 if(errorCode!=U_MISPLACED_QUANTIFIER) {
1305 log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode));
1306 return;
1307 }
1308 }
1309
1310 static void
1311 TestFCNFKCClosure(void) {
1312 static const struct {
1313 UChar32 c;
1314 const UChar s[6];
1315 } tests[]={
1316 { 0x00C4, { 0 } },
1317 { 0x00E4, { 0 } },
1318 { 0x037A, { 0x0020, 0x03B9, 0 } },
1319 { 0x03D2, { 0x03C5, 0 } },
1320 { 0x20A8, { 0x0072, 0x0073, 0 } },
1321 { 0x210B, { 0x0068, 0 } },
1322 { 0x210C, { 0x0068, 0 } },
1323 { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } },
1324 { 0x2122, { 0x0074, 0x006D, 0 } },
1325 { 0x2128, { 0x007A, 0 } },
1326 { 0x1D5DB, { 0x0068, 0 } },
1327 { 0x1D5ED, { 0x007A, 0 } },
1328 { 0x0061, { 0 } }
1329 };
1330
1331 UChar buffer[8];
1332 UErrorCode errorCode;
1333 int32_t i, length;
1334
1335 for(i=0; i<LENGTHOF(tests); ++i) {
1336 errorCode=U_ZERO_ERROR;
1337 length=u_getFC_NFKC_Closure(tests[i].c, buffer, LENGTHOF(buffer), &errorCode);
1338 if(U_FAILURE(errorCode) || length!=u_strlen(buffer) || 0!=u_strcmp(tests[i].s, buffer)) {
1339 log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you missing data?)\n", tests[i].c, u_errorName(errorCode));
1340 }
1341 }
1342
1343 /* error handling */
1344 errorCode=U_ZERO_ERROR;
1345 length=u_getFC_NFKC_Closure(0x5c, NULL, LENGTHOF(buffer), &errorCode);
1346 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1347 log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode));
1348 }
1349
1350 length=u_getFC_NFKC_Closure(0x5c, buffer, LENGTHOF(buffer), &errorCode);
1351 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1352 log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode));
1353 }
1354 }
1355
1356 static void
1357 TestQuickCheckPerCP() {
1358 UErrorCode errorCode;
1359 UChar32 c, lead, trail;
1360 UChar s[U16_MAX_LENGTH], nfd[16];
1361 int32_t length, lccc1, lccc2, tccc1, tccc2;
1362 int32_t qc1, qc2;
1363
1364 if(
1365 u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1366 u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1367 u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1368 u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1369 u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) ||
1370 u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS)
1371 ) {
1372 log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n");
1373 }
1374
1375 /*
1376 * compare the quick check property values for some code points
1377 * to the quick check results for checking same-code point strings
1378 */
1379 errorCode=U_ZERO_ERROR;
1380 c=0;
1381 while(c<0x110000) {
1382 length=0;
1383 U16_APPEND_UNSAFE(s, length, c);
1384
1385 qc1=u_getIntPropertyValue(c, UCHAR_NFC_QUICK_CHECK);
1386 qc2=unorm_quickCheck(s, length, UNORM_NFC, &errorCode);
1387 if(qc1!=qc2) {
1388 log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1389 }
1390
1391 qc1=u_getIntPropertyValue(c, UCHAR_NFD_QUICK_CHECK);
1392 qc2=unorm_quickCheck(s, length, UNORM_NFD, &errorCode);
1393 if(qc1!=qc2) {
1394 log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1395 }
1396
1397 qc1=u_getIntPropertyValue(c, UCHAR_NFKC_QUICK_CHECK);
1398 qc2=unorm_quickCheck(s, length, UNORM_NFKC, &errorCode);
1399 if(qc1!=qc2) {
1400 log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1401 }
1402
1403 qc1=u_getIntPropertyValue(c, UCHAR_NFKD_QUICK_CHECK);
1404 qc2=unorm_quickCheck(s, length, UNORM_NFKD, &errorCode);
1405 if(qc1!=qc2) {
1406 log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1407 }
1408
1409 length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, LENGTHOF(nfd), &errorCode);
1410 /* length-length == 0 is used to get around a compiler warning. */
1411 U16_GET(nfd, 0, length-length, length, lead);
1412 U16_GET(nfd, 0, length-1, length, trail);
1413
1414 lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS);
1415 lccc2=u_getCombiningClass(lead);
1416 tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS);
1417 tccc2=u_getCombiningClass(trail);
1418
1419 if(lccc1!=lccc2) {
1420 log_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
1421 lccc1, lccc2, c);
1422 }
1423 if(tccc1!=tccc2) {
1424 log_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
1425 tccc1, tccc2, c);
1426 }
1427
1428 /* skip some code points */
1429 c=(20*c)/19+1;
1430 }
1431 }
1432
1433 static void
1434 TestComposition(void) {
1435 static const struct {
1436 UNormalizationMode mode;
1437 uint32_t options;
1438 UChar input[12];
1439 UChar expect[12];
1440 } cases[]={
1441 /*
1442 * special cases for UAX #15 bug
1443 * see Unicode Corrigendum #5: Normalization Idempotency
1444 * at http://unicode.org/versions/corrigendum5.html
1445 * (was Public Review Issue #29)
1446 */
1447 { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327 }, { 0x1100, 0x0300, 0x1161, 0x0327 } },
1448 { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } },
1449 { UNORM_NFC, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 }, { 0xac00, 0x0327, 0x0300, 0x11a8 } },
1450 { UNORM_NFC, 0, { 0x0b47, 0x0300, 0x0b3e }, { 0x0b47, 0x0300, 0x0b3e } },
1451
1452 /* TODO: add test cases for UNORM_FCC here (j2151) */
1453 };
1454
1455 UChar output[16];
1456 UErrorCode errorCode;
1457 int32_t i, length;
1458
1459 for(i=0; i<LENGTHOF(cases); ++i) {
1460 errorCode=U_ZERO_ERROR;
1461 length=unorm_normalize(
1462 cases[i].input, -1,
1463 cases[i].mode, cases[i].options,
1464 output, LENGTHOF(output),
1465 &errorCode);
1466 if( U_FAILURE(errorCode) ||
1467 length!=u_strlen(cases[i].expect) ||
1468 0!=u_memcmp(output, cases[i].expect, length)
1469 ) {
1470 log_data_err("unexpected result for case %d - (Are you missing data?)\n", i);
1471 }
1472 }
1473 }
1474
1475 static void
1476 TestGetDecomposition() {
1477 UChar decomp[32];
1478 int32_t length;
1479
1480 UErrorCode errorCode=U_ZERO_ERROR;
1481 const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE_CONTIGUOUS, &errorCode);
1482 if(U_FAILURE(errorCode)) {
1483 log_err_status(errorCode, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_errorName(errorCode));
1484 return;
1485 }
1486
1487 length=unorm2_getDecomposition(n2, 0x20, decomp, LENGTHOF(decomp), &errorCode);
1488 if(U_FAILURE(errorCode) || length>=0) {
1489 log_err("unorm2_getDecomposition(fcc, space) failed\n");
1490 }
1491 errorCode=U_ZERO_ERROR;
1492 length=unorm2_getDecomposition(n2, 0xe4, decomp, LENGTHOF(decomp), &errorCode);
1493 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1494 log_err("unorm2_getDecomposition(fcc, a-umlaut) failed\n");
1495 }
1496 errorCode=U_ZERO_ERROR;
1497 length=unorm2_getDecomposition(n2, 0xac01, decomp, LENGTHOF(decomp), &errorCode);
1498 if(U_FAILURE(errorCode) || length!=3 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0x11a8 || decomp[3]!=0) {
1499 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) failed\n");
1500 }
1501 errorCode=U_ZERO_ERROR;
1502 length=unorm2_getDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1503 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
1504 log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) overflow failed\n");
1505 }
1506 errorCode=U_ZERO_ERROR;
1507 length=unorm2_getDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1508 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1509 log_err("unorm2_getDecomposition(fcc, capacity<0) failed\n");
1510 }
1511 errorCode=U_ZERO_ERROR;
1512 length=unorm2_getDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1513 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1514 log_err("unorm2_getDecomposition(fcc, decomposition=NULL) failed\n");
1515 }
1516 }
1517
1518 static void
1519 TestGetRawDecomposition() {
1520 UChar decomp[32];
1521 int32_t length;
1522
1523 UErrorCode errorCode=U_ZERO_ERROR;
1524 const UNormalizer2 *n2=unorm2_getNFKCInstance(&errorCode);
1525 if(U_FAILURE(errorCode)) {
1526 log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1527 return;
1528 }
1529 /*
1530 * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values,
1531 * without recursive decomposition.
1532 */
1533
1534 length=unorm2_getRawDecomposition(n2, 0x20, decomp, LENGTHOF(decomp), &errorCode);
1535 if(U_FAILURE(errorCode) || length>=0) {
1536 log_err("unorm2_getDecomposition(nfkc, space) failed\n");
1537 }
1538 errorCode=U_ZERO_ERROR;
1539 length=unorm2_getRawDecomposition(n2, 0xe4, decomp, LENGTHOF(decomp), &errorCode);
1540 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1541 log_err("unorm2_getDecomposition(nfkc, a-umlaut) failed\n");
1542 }
1543 /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */
1544 errorCode=U_ZERO_ERROR;
1545 length=unorm2_getRawDecomposition(n2, 0x1e08, decomp, LENGTHOF(decomp), &errorCode);
1546 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xc7 || decomp[1]!=0x301 || decomp[2]!=0) {
1547 log_err("unorm2_getDecomposition(nfkc, c-cedilla-acute) failed\n");
1548 }
1549 /* U+212B ANGSTROM SIGN */
1550 errorCode=U_ZERO_ERROR;
1551 length=unorm2_getRawDecomposition(n2, 0x212b, decomp, LENGTHOF(decomp), &errorCode);
1552 if(U_FAILURE(errorCode) || length!=1 || decomp[0]!=0xc5 || decomp[1]!=0) {
1553 log_err("unorm2_getDecomposition(nfkc, angstrom sign) failed\n");
1554 }
1555 errorCode=U_ZERO_ERROR;
1556 length=unorm2_getRawDecomposition(n2, 0xac00, decomp, LENGTHOF(decomp), &errorCode);
1557 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0) {
1558 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC00) failed\n");
1559 }
1560 /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */
1561 errorCode=U_ZERO_ERROR;
1562 length=unorm2_getRawDecomposition(n2, 0xac01, decomp, LENGTHOF(decomp), &errorCode);
1563 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xac00 || decomp[1]!=0x11a8 || decomp[2]!=0) {
1564 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) failed\n");
1565 }
1566 errorCode=U_ZERO_ERROR;
1567 length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1568 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=2) {
1569 log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) overflow failed\n");
1570 }
1571 errorCode=U_ZERO_ERROR;
1572 length=unorm2_getRawDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1573 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1574 log_err("unorm2_getDecomposition(nfkc, capacity<0) failed\n");
1575 }
1576 errorCode=U_ZERO_ERROR;
1577 length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1578 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1579 log_err("unorm2_getDecomposition(nfkc, decomposition=NULL) failed\n");
1580 }
1581 }
1582
1583 static void
1584 TestAppendRestoreMiddle() {
1585 UChar a[20]={ 0x61, 0x62, 0x63, 0x41, 0x327, 0 }; /* last chars are 'A' and 'cedilla' NFC */
1586 static const UChar b[]={ 0x30A, 0x64, 0x65, 0x66, 0 }; /* first char is 'ring above' NFC */
1587 /* NFC: C5 is 'A with ring above' */
1588 static const UChar expected[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 };
1589 int32_t length;
1590 UErrorCode errorCode=U_ZERO_ERROR;
1591 const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1592 if(U_FAILURE(errorCode)) {
1593 log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1594 return;
1595 }
1596 /*
1597 * Use length=-1 to fool the estimate of the ReorderingBuffer capacity.
1598 * Use a capacity of 6 or 7 so that the middle sequence <41 327 30A>
1599 * still fits into a[] but the full result still overflows this capacity.
1600 * (Let it modify the destination buffer before reallocating internally.)
1601 */
1602 length=unorm2_append(n2, a, -1, 6, b, -1, &errorCode);
1603 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=LENGTHOF(expected)) {
1604 log_err("unorm2_append(preflight) returned wrong length of %d\n", (int)length);
1605 return;
1606 }
1607 /* Verify that the middle is unchanged or restored. (ICU ticket #7848) */
1608 if(a[0]!=0x61 || a[1]!=0x62 || a[2]!=0x63 || a[3]!=0x41 || a[4]!=0x327 || a[5]!=0) {
1609 log_err("unorm2_append(overflow) modified the first string\n");
1610 return;
1611 }
1612 errorCode=U_ZERO_ERROR;
1613 length=unorm2_append(n2, a, -1, LENGTHOF(a), b, -1, &errorCode);
1614 if(U_FAILURE(errorCode) || length!=LENGTHOF(expected) || 0!=u_memcmp(a, expected, length)) {
1615 log_err("unorm2_append(real) failed - %s, length %d\n", u_errorName(errorCode), (int)length);
1616 return;
1617 }
1618 }
1619
1620 static void
1621 TestGetEasyToUseInstance() {
1622 static const UChar in[]={
1623 0xA0, /* -> <noBreak> 0020 */
1624 0xC7, 0x301 /* = 1E08 = 0043 0327 0301 */
1625 };
1626 UChar out[32];
1627 int32_t length;
1628
1629 UErrorCode errorCode=U_ZERO_ERROR;
1630 const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1631 if(U_FAILURE(errorCode)) {
1632 log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1633 return;
1634 }
1635 length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode);
1636 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0xa0 || out[1]!=0x1e08) {
1637 log_err("unorm2_getNFCInstance() did not return an NFC instance (normalized length=%d; %s)\n",
1638 (int)length, u_errorName(errorCode));
1639 }
1640
1641 errorCode=U_ZERO_ERROR;
1642 n2=unorm2_getNFDInstance(&errorCode);
1643 if(U_FAILURE(errorCode)) {
1644 log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode));
1645 return;
1646 }
1647 length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode);
1648 if(U_FAILURE(errorCode) || length!=4 || out[0]!=0xa0 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1649 log_err("unorm2_getNFDInstance() did not return an NFD instance (normalized length=%d; %s)\n",
1650 (int)length, u_errorName(errorCode));
1651 }
1652
1653 errorCode=U_ZERO_ERROR;
1654 n2=unorm2_getNFKCInstance(&errorCode);
1655 if(U_FAILURE(errorCode)) {
1656 log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1657 return;
1658 }
1659 length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode);
1660 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e08) {
1661 log_err("unorm2_getNFKCInstance() did not return an NFKC instance (normalized length=%d; %s)\n",
1662 (int)length, u_errorName(errorCode));
1663 }
1664
1665 errorCode=U_ZERO_ERROR;
1666 n2=unorm2_getNFKDInstance(&errorCode);
1667 if(U_FAILURE(errorCode)) {
1668 log_err_status(errorCode, "unorm2_getNFKDInstance() failed: %s\n", u_errorName(errorCode));
1669 return;
1670 }
1671 length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode);
1672 if(U_FAILURE(errorCode) || length!=4 || out[0]!=0x20 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1673 log_err("unorm2_getNFKDInstance() did not return an NFKD instance (normalized length=%d; %s)\n",
1674 (int)length, u_errorName(errorCode));
1675 }
1676
1677 errorCode=U_ZERO_ERROR;
1678 n2=unorm2_getNFKCCasefoldInstance(&errorCode);
1679 if(U_FAILURE(errorCode)) {
1680 log_err_status(errorCode, "unorm2_getNFKCCasefoldInstance() failed: %s\n", u_errorName(errorCode));
1681 return;
1682 }
1683 length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode);
1684 if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e09) {
1685 log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefold instance (normalized length=%d; %s)\n",
1686 (int)length, u_errorName(errorCode));
1687 }
1688 }
1689
1690 #endif /* #if !UCONFIG_NO_NORMALIZATION */