]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/cnormtst.c
ICU-461.16.tar.gz
[apple/icu.git] / icuSources / test / cintltst / cnormtst.c
CommitLineData
b75a7d8f
A
1/********************************************************************
2 * COPYRIGHT:
729e4ab9 3 * Copyright (c) 1997-2010, International Business Machines Corporation and
b75a7d8f
A
4 * others. All Rights Reserved.
5 ********************************************************************/
6/********************************************************************************
7*
8* File CNORMTST.C
9*
10* Modification History:
11* Name Description
12* Madhu Katragadda Ported for C API
13* synwee added test for quick check
14* synwee added test for checkFCD
15*********************************************************************************/
16/*tests for u_normalization*/
17#include "unicode/utypes.h"
374ca955 18#include "unicode/unorm.h"
b75a7d8f
A
19#include "cintltst.h"
20
21#if UCONFIG_NO_NORMALIZATION
22
23void addNormTest(TestNode** root) {
24 /* no normalization - nothing to do */
25}
26
27#else
28
29#include <stdlib.h>
30#include <time.h>
31#include "unicode/uchar.h"
32#include "unicode/ustring.h"
33#include "unicode/unorm.h"
34#include "cnormtst.h"
35
374ca955 36#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof ((array)[0]))
b75a7d8f
A
37
38static void
39TestAPI(void);
40
41static void
42TestNormCoverage(void);
43
44static void
45TestConcatenate(void);
46
47static void
48TestNextPrevious(void);
49
50static void TestIsNormalized(void);
51
52static void
53TestFCNFKCClosure(void);
54
374ca955
A
55static void
56TestQuickCheckPerCP(void);
57
58static void
59TestComposition(void);
60
729e4ab9
A
61static void
62TestFCD(void);
63
64static void
65TestGetDecomposition(void);
66
67static const char* const canonTests[][3] = {
b75a7d8f
A
68 /* Input*/ /*Decomposed*/ /*Composed*/
69 { "cat", "cat", "cat" },
70 { "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark", },
71
72 { "\\u1e0a", "D\\u0307", "\\u1e0a" }, /* D-dot_above*/
73 { "D\\u0307", "D\\u0307", "\\u1e0a" }, /* D dot_above*/
74
75 { "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_below dot_above*/
76 { "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_above dot_below */
77 { "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D dot_below dot_above */
78
79 { "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307", "\\u1e10\\u0323\\u0307" }, /*D dot_below cedilla dot_above*/
80 { "D\\u0307\\u0328\\u0323", "D\\u0328\\u0323\\u0307", "\\u1e0c\\u0328\\u0307" }, /* D dot_above ogonek dot_below*/
81
82 { "\\u1E14", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron-grave*/
83 { "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron + grave*/
84 { "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" }, /* E-grave + macron*/
85
86 { "\\u212b", "A\\u030a", "\\u00c5" }, /* angstrom_sign*/
87 { "\\u00c5", "A\\u030a", "\\u00c5" }, /* A-ring*/
88
89 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
90 { "\\u00C4\\uFB03n", "A\\u0308\\uFB03n", "\\u00C4\\uFB03n" },
91
92 { "Henry IV", "Henry IV", "Henry IV" },
93 { "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" },
94
95 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
96 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
97 { "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" }, /* hw_ka + hw_ten*/
98 { "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" }, /* ka + hw_ten*/
99 { "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" }, /* hw_ka + ten*/
729e4ab9
A
100 { "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" }, /* hw_ka + ten*/
101 { "", "", "" }
b75a7d8f
A
102};
103
729e4ab9 104static const char* const compatTests[][3] = {
b75a7d8f
A
105 /* Input*/ /*Decomposed */ /*Composed*/
106 { "cat", "cat", "cat" },
107
108 { "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" }, /* Alef-Lamed vs. Alef, Lamed*/
109
110 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
111 { "\\u00C4\\uFB03n", "A\\u0308ffin", "\\u00C4ffin" }, /* ffi ligature -> f + f + i*/
112
113 { "Henry IV", "Henry IV", "Henry IV" },
114 { "Henry \\u2163", "Henry IV", "Henry IV" },
115
116 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
117 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
118
119 { "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + ten*/
120
121 /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
122 { "\\uFF76\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + hw_ten*/
729e4ab9
A
123 { "\\u30AB\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* ka + hw_ten*/
124 { "", "", "" }
125};
126
127static const char* const fcdTests[][3] = {
128 /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */
129 { "\\u010e\\u0327", "D\\u0327\\u030c", NULL }, /* D-caron + cedilla */
130 { "\\u010e", "\\u010e", NULL } /* D-caron */
b75a7d8f
A
131};
132
133void addNormTest(TestNode** root);
134
135void addNormTest(TestNode** root)
136{
729e4ab9
A
137 addTest(root, &TestAPI, "tsnorm/cnormtst/TestAPI");
138 addTest(root, &TestDecomp, "tsnorm/cnormtst/TestDecomp");
139 addTest(root, &TestCompatDecomp, "tsnorm/cnormtst/TestCompatDecomp");
140 addTest(root, &TestCanonDecompCompose, "tsnorm/cnormtst/TestCanonDecompCompose");
141 addTest(root, &TestCompatDecompCompose, "tsnorm/cnormtst/TestCompatDecompCompose");
142 addTest(root, &TestFCD, "tsnorm/cnormtst/TestFCD");
143 addTest(root, &TestNull, "tsnorm/cnormtst/TestNull");
144 addTest(root, &TestQuickCheck, "tsnorm/cnormtst/TestQuickCheck");
145 addTest(root, &TestQuickCheckPerCP, "tsnorm/cnormtst/TestQuickCheckPerCP");
146 addTest(root, &TestIsNormalized, "tsnorm/cnormtst/TestIsNormalized");
147 addTest(root, &TestCheckFCD, "tsnorm/cnormtst/TestCheckFCD");
148 addTest(root, &TestNormCoverage, "tsnorm/cnormtst/TestNormCoverage");
149 addTest(root, &TestConcatenate, "tsnorm/cnormtst/TestConcatenate");
150 addTest(root, &TestNextPrevious, "tsnorm/cnormtst/TestNextPrevious");
151 addTest(root, &TestFCNFKCClosure, "tsnorm/cnormtst/TestFCNFKCClosure");
152 addTest(root, &TestComposition, "tsnorm/cnormtst/TestComposition");
153 addTest(root, &TestGetDecomposition, "tsnorm/cnormtst/TestGetDecomposition");
b75a7d8f
A
154}
155
729e4ab9
A
156static const char* const modeStrings[]={
157 "UNORM_NONE",
158 "UNORM_NFD",
159 "UNORM_NFKD",
160 "UNORM_NFC",
161 "UNORM_NFKC",
162 "UNORM_FCD",
163 "UNORM_MODE_COUNT"
164};
165
166static void TestNormCases(UNormalizationMode mode,
167 const char* const cases[][3], int32_t lengthOfCases) {
168 int32_t x, neededLen, length2;
169 int32_t expIndex= (mode==UNORM_NFC || mode==UNORM_NFKC) ? 2 : 1;
170 UChar *source=NULL;
171 UChar result[16];
172 log_verbose("Testing unorm_normalize(%s)\n", modeStrings[mode]);
173 for(x=0; x < lengthOfCases; x++)
b75a7d8f 174 {
729e4ab9
A
175 UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
176 source=CharsToUChars(cases[x][0]);
177 neededLen= unorm_normalize(source, u_strlen(source), mode, 0, NULL, 0, &status);
178 length2= unorm_normalize(source, -1, mode, 0, NULL, 0, &status2);
179 if(neededLen!=length2) {
180 log_err("ERROR in unorm_normalize(%s)[%d]: "
181 "preflight length/NUL %d!=%d preflight length/srcLength\n",
182 modeStrings[mode], (int)x, (int)neededLen, (int)length2);
183 }
b75a7d8f
A
184 if(status==U_BUFFER_OVERFLOW_ERROR)
185 {
186 status=U_ZERO_ERROR;
b75a7d8f 187 }
729e4ab9
A
188 length2=unorm_normalize(source, u_strlen(source), mode, 0, result, LENGTHOF(result), &status);
189 if(U_FAILURE(status) || neededLen!=length2) {
190 log_data_err("ERROR in unorm_normalize(%s/NUL) at %s: %s - (Are you missing data?)\n",
191 modeStrings[mode], austrdup(source), myErrorName(status));
b75a7d8f 192 } else {
729e4ab9 193 assertEqual(result, cases[x][expIndex], x);
b75a7d8f 194 }
729e4ab9
A
195 length2=unorm_normalize(source, -1, mode, 0, result, LENGTHOF(result), &status);
196 if(U_FAILURE(status) || neededLen!=length2) {
197 log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s: %s - (Are you missing data?)\n",
198 modeStrings[mode], austrdup(source), myErrorName(status));
b75a7d8f 199 } else {
729e4ab9 200 assertEqual(result, cases[x][expIndex], x);
b75a7d8f 201 }
b75a7d8f
A
202 free(source);
203 }
204}
205
729e4ab9
A
206void TestDecomp() {
207 TestNormCases(UNORM_NFD, canonTests, LENGTHOF(canonTests));
b75a7d8f
A
208}
209
729e4ab9
A
210void TestCompatDecomp() {
211 TestNormCases(UNORM_NFKD, compatTests, LENGTHOF(compatTests));
b75a7d8f
A
212}
213
729e4ab9
A
214void TestCanonDecompCompose() {
215 TestNormCases(UNORM_NFC, canonTests, LENGTHOF(canonTests));
216}
b75a7d8f 217
729e4ab9
A
218void TestCompatDecompCompose() {
219 TestNormCases(UNORM_NFKC, compatTests, LENGTHOF(compatTests));
220}
221
222void TestFCD() {
223 TestNormCases(UNORM_FCD, fcdTests, LENGTHOF(fcdTests));
b75a7d8f 224}
b75a7d8f
A
225
226static void assertEqual(const UChar* result, const char* expected, int32_t index)
227{
228 UChar *expectedUni = CharsToUChars(expected);
229 if(u_strcmp(result, expectedUni)!=0){
230 log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index, expected,
231 austrdup(result) );
232 }
233 free(expectedUni);
234}
235
236static void TestNull_check(UChar *src, int32_t srcLen,
237 UChar *exp, int32_t expLen,
238 UNormalizationMode mode,
239 const char *name)
240{
241 UErrorCode status = U_ZERO_ERROR;
242 int32_t len, i;
243
244 UChar result[50];
245
246
247 status = U_ZERO_ERROR;
248
249 for(i=0;i<50;i++)
250 {
251 result[i] = 0xFFFD;
252 }
253
254 len = unorm_normalize(src, srcLen, mode, 0, result, 50, &status);
255
256 if(U_FAILURE(status)) {
729e4ab9 257 log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missing data?)\n", name, u_errorName(status));
b75a7d8f
A
258 } else if (len != expLen) {
259 log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name, expLen, len);
260 }
261
262 {
263 for(i=0;i<len;i++){
264 if(exp[i] != result[i]) {
265 log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n",
266 name,
267 i,
268 exp[i],
269 result[i]);
270 return;
271 }
272 log_verbose(" %d: \\u%04X\n", i, result[i]);
273 }
274 }
275
276 log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name);
277}
278
279void TestNull()
280{
281
282 UChar source_comp[] = { 0x0061, 0x0000, 0x0044, 0x0307 };
283 int32_t source_comp_len = 4;
284 UChar expect_comp[] = { 0x0061, 0x0000, 0x1e0a };
285 int32_t expect_comp_len = 3;
286
287 UChar source_dcmp[] = { 0x1e0A, 0x0000, 0x0929 };
288 int32_t source_dcmp_len = 3;
289 UChar expect_dcmp[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C };
290 int32_t expect_dcmp_len = 5;
291
292 TestNull_check(source_comp,
293 source_comp_len,
294 expect_comp,
295 expect_comp_len,
296 UNORM_NFC,
297 "UNORM_NFC");
298
299 TestNull_check(source_dcmp,
300 source_dcmp_len,
301 expect_dcmp,
302 expect_dcmp_len,
303 UNORM_NFD,
304 "UNORM_NFD");
305
306 TestNull_check(source_comp,
307 source_comp_len,
308 expect_comp,
309 expect_comp_len,
310 UNORM_NFKC,
311 "UNORM_NFKC");
312
313
314}
315
316static void TestQuickCheckResultNO()
317{
318 const UChar CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
319 0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
320 const UChar CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
321 0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
322 const UChar CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
323 0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
324 const UChar CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
325 0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
326
327
328 const int SIZE = 10;
329
330 int count = 0;
331 UErrorCode error = U_ZERO_ERROR;
332
333 for (; count < SIZE; count ++)
334 {
335 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
336 UNORM_NO)
337 {
338 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
339 return;
340 }
341 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
342 UNORM_NO)
343 {
344 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
345 return;
346 }
347 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
348 UNORM_NO)
349 {
350 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
351 return;
352 }
353 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
354 UNORM_NO)
355 {
356 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
357 return;
358 }
359 }
360}
361
362
363static void TestQuickCheckResultYES()
364{
365 const UChar CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
366 0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
367 const UChar CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
368 0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
369 const UChar CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
370 0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
371 const UChar CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
372 0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
373
374 const int SIZE = 10;
375 int count = 0;
376 UErrorCode error = U_ZERO_ERROR;
377
378 UChar cp = 0;
379 while (cp < 0xA0)
380 {
381 if (unorm_quickCheck(&cp, 1, UNORM_NFD, &error) != UNORM_YES)
382 {
729e4ab9 383 log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)\n", cp);
b75a7d8f
A
384 return;
385 }
386 if (unorm_quickCheck(&cp, 1, UNORM_NFC, &error) !=
387 UNORM_YES)
388 {
389 log_err("ERROR in NFC quick check at U+%04x\n", cp);
390 return;
391 }
392 if (unorm_quickCheck(&cp, 1, UNORM_NFKD, &error) != UNORM_YES)
393 {
394 log_err("ERROR in NFKD quick check at U+%04x\n", cp);
395 return;
396 }
397 if (unorm_quickCheck(&cp, 1, UNORM_NFKC, &error) !=
398 UNORM_YES)
399 {
400 log_err("ERROR in NFKC quick check at U+%04x\n", cp);
401 return;
402 }
403 cp ++;
404 }
405
406 for (; count < SIZE; count ++)
407 {
408 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
409 UNORM_YES)
410 {
411 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
412 return;
413 }
414 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error)
415 != UNORM_YES)
416 {
417 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
418 return;
419 }
420 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
421 UNORM_YES)
422 {
423 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
424 return;
425 }
426 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
427 UNORM_YES)
428 {
429 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
430 return;
431 }
432 }
433}
434
435static void TestQuickCheckResultMAYBE()
436{
437 const UChar CPNFC[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
438 0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
439 const UChar CPNFKC[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
440 0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
441
442
443 const int SIZE = 10;
444
445 int count = 0;
446 UErrorCode error = U_ZERO_ERROR;
447
448 /* NFD and NFKD does not have any MAYBE codepoints */
449 for (; count < SIZE; count ++)
450 {
451 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
452 UNORM_MAYBE)
453 {
729e4ab9 454 log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)\n", CPNFC[count]);
b75a7d8f
A
455 return;
456 }
457 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
458 UNORM_MAYBE)
459 {
460 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
461 return;
462 }
463 }
464}
465
466static void TestQuickCheckStringResult()
467{
468 int count;
469 UChar *d = NULL;
470 UChar *c = NULL;
471 UErrorCode error = U_ZERO_ERROR;
472
374ca955 473 for (count = 0; count < LENGTHOF(canonTests); count ++)
b75a7d8f
A
474 {
475 d = CharsToUChars(canonTests[count][1]);
476 c = CharsToUChars(canonTests[count][2]);
477 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFD, &error) !=
478 UNORM_YES)
479 {
729e4ab9 480 log_data_err("ERROR in NFD quick check for string at count %d - (Are you missing data?)\n", count);
b75a7d8f
A
481 return;
482 }
483
484 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFC, &error) ==
485 UNORM_NO)
486 {
487 log_err("ERROR in NFC quick check for string at count %d\n", count);
488 return;
489 }
490
491 free(d);
492 free(c);
493 }
494
374ca955 495 for (count = 0; count < LENGTHOF(compatTests); count ++)
b75a7d8f
A
496 {
497 d = CharsToUChars(compatTests[count][1]);
498 c = CharsToUChars(compatTests[count][2]);
499 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFKD, &error) !=
500 UNORM_YES)
501 {
502 log_err("ERROR in NFKD quick check for string at count %d\n", count);
503 return;
504 }
505
506 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFKC, &error) !=
507 UNORM_YES)
508 {
509 log_err("ERROR in NFKC quick check for string at count %d\n", count);
510 return;
511 }
512
513 free(d);
514 free(c);
515 }
516}
517
518void TestQuickCheck()
519{
520 TestQuickCheckResultNO();
521 TestQuickCheckResultYES();
522 TestQuickCheckResultMAYBE();
523 TestQuickCheckStringResult();
524}
525
526/*
527 * The intltest/NormalizerConformanceTest tests a lot of strings that _are_
528 * normalized, and some that are not.
529 * Here we pick some specific cases and test the C API.
530 */
531static void TestIsNormalized(void) {
532 static const UChar notNFC[][8]={ /* strings that are not in NFC */
533 { 0x62, 0x61, 0x300, 0x63, 0 }, /* 0061 0300 compose */
534 { 0xfb1d, 0 }, /* excluded from composition */
535 { 0x0627, 0x0653, 0 }, /* 0627 0653 compose */
536 { 0x3071, 0x306f, 0x309a, 0x3073, 0 } /* 306F 309A compose */
537 };
538 static const UChar notNFKC[][8]={ /* strings that are not in NFKC */
539 { 0x1100, 0x1161, 0 }, /* Jamo compose */
540 { 0x1100, 0x314f, 0 }, /* compatibility Jamo compose */
541 { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 } /* 1F00 0345 compose */
542 };
543
544 int32_t i;
545 UErrorCode errorCode;
546
547 /* API test */
548
549 /* normal case with length>=0 (length -1 used for special cases below) */
550 errorCode=U_ZERO_ERROR;
551 if(!unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
729e4ab9 552 log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missing data?)\n", u_errorName(errorCode));
b75a7d8f
A
553 }
554
555 /* incoming U_FAILURE */
556 errorCode=U_TRUNCATED_CHAR_FOUND;
557 (void)unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode);
558 if(errorCode!=U_TRUNCATED_CHAR_FOUND) {
559 log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode));
560 }
561
562 /* NULL source */
563 errorCode=U_ZERO_ERROR;
564 (void)unorm_isNormalized(NULL, 1, UNORM_NFC, &errorCode);
565 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
729e4ab9 566 log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
b75a7d8f
A
567 }
568
569 /* bad length */
570 errorCode=U_ZERO_ERROR;
571 (void)unorm_isNormalized(notNFC[0]+2, -2, UNORM_NFC, &errorCode);
572 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
729e4ab9 573 log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
b75a7d8f
A
574 }
575
576 /* specific cases */
374ca955 577 for(i=0; i<LENGTHOF(notNFC); ++i) {
b75a7d8f
A
578 errorCode=U_ZERO_ERROR;
579 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
729e4ab9 580 log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
b75a7d8f
A
581 }
582 errorCode=U_ZERO_ERROR;
583 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
729e4ab9 584 log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
b75a7d8f
A
585 }
586 }
374ca955 587 for(i=0; i<LENGTHOF(notNFKC); ++i) {
b75a7d8f
A
588 errorCode=U_ZERO_ERROR;
589 if(unorm_isNormalized(notNFKC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
729e4ab9 590 log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
b75a7d8f
A
591 }
592 }
593}
594
595void TestCheckFCD()
596{
597 UErrorCode status = U_ZERO_ERROR;
598 static const UChar FAST_[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
599 0x0A};
600 static const UChar FALSE_[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
601 0x02B9, 0x0314, 0x0315, 0x0316};
602 static const UChar TRUE_[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
603 0x0050, 0x0730, 0x09EE, 0x1E10};
604
605 static const UChar datastr[][5] =
606 { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
607 {0x0061, 0x030A, 0x00E2, 0x0323, 0},
608 {0x0061, 0x0323, 0x00E2, 0x0323, 0},
609 {0x0061, 0x0323, 0x1E05, 0x0302, 0} };
610 static const UBool result[] = {UNORM_YES, UNORM_NO, UNORM_NO, UNORM_YES};
611
612 static const UChar datachar[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
613 0x6a,
614 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
615 0xea,
616 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
617 0x0307, 0x0308, 0x0309, 0x030a,
618 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
619 0x0327, 0x0328, 0x0329, 0x032a,
620 0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
621 0x1e07, 0x1e08, 0x1e09, 0x1e0a};
622
623 int count = 0;
624
625 if (unorm_quickCheck(FAST_, 10, UNORM_FCD, &status) != UNORM_YES)
729e4ab9 626 log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
b75a7d8f
A
627 if (unorm_quickCheck(FALSE_, 10, UNORM_FCD, &status) != UNORM_NO)
628 log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n");
629 if (unorm_quickCheck(TRUE_, 10, UNORM_FCD, &status) != UNORM_YES)
729e4ab9 630 log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
b75a7d8f
A
631
632 if (U_FAILURE(status))
729e4ab9 633 log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n", u_errorName(status));
b75a7d8f
A
634
635 while (count < 4)
636 {
637 UBool fcdresult = unorm_quickCheck(datastr[count], 4, UNORM_FCD, &status);
638 if (U_FAILURE(status)) {
729e4ab9 639 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data set %d - (Are you missing data?)\n", count);
b75a7d8f
A
640 break;
641 }
642 else {
643 if (result[count] != fcdresult) {
644 log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count,
645 result[count]);
646 }
647 }
648 count ++;
649 }
650
651 /* random checks of long strings */
652 status = U_ZERO_ERROR;
653 srand((unsigned)time( NULL ));
654
655 for (count = 0; count < 50; count ++)
656 {
657 int size = 0;
658 UBool testresult = UNORM_YES;
659 UChar data[20];
660 UChar norm[100];
661 UChar nfd[100];
662 int normsize = 0;
663 int nfdsize = 0;
664
665 while (size != 19) {
666 data[size] = datachar[(rand() * 50) / RAND_MAX];
667 log_verbose("0x%x", data[size]);
668 normsize += unorm_normalize(data + size, 1, UNORM_NFD, 0,
669 norm + normsize, 100 - normsize, &status);
670 if (U_FAILURE(status)) {
729e4ab9 671 log_data_err("unorm_quickCheck(FCD) failed: exception occured at data generation - (Are you missing data?)\n");
b75a7d8f
A
672 break;
673 }
674 size ++;
675 }
676 log_verbose("\n");
677
678 nfdsize = unorm_normalize(data, size, UNORM_NFD, 0,
679 nfd, 100, &status);
680 if (U_FAILURE(status)) {
729e4ab9 681 log_data_err("unorm_quickCheck(FCD) failed: exception occured at normalized data generation - (Are you missing data?)\n");
b75a7d8f
A
682 }
683
684 if (nfdsize != normsize || u_memcmp(nfd, norm, nfdsize) != 0) {
685 testresult = UNORM_NO;
686 }
687 if (testresult == UNORM_YES) {
688 log_verbose("result UNORM_YES\n");
689 }
690 else {
691 log_verbose("result UNORM_NO\n");
692 }
693
694 if (unorm_quickCheck(data, size, UNORM_FCD, &status) != testresult || U_FAILURE(status)) {
729e4ab9 695 log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data - (Are you missing data?)\n", testresult);
b75a7d8f
A
696 }
697 }
698}
699
700static void
701TestAPI() {
702 static const UChar in[]={ 0x68, 0xe4 };
703 UChar out[20]={ 0xffff, 0xffff, 0xffff, 0xffff };
704 UErrorCode errorCode;
705 int32_t length;
706
707 /* try preflighting */
708 errorCode=U_ZERO_ERROR;
709 length=unorm_normalize(in, 2, UNORM_NFD, 0, NULL, 0, &errorCode);
710 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
729e4ab9 711 log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
b75a7d8f
A
712 return;
713 }
714
715 errorCode=U_ZERO_ERROR;
716 length=unorm_normalize(in, 2, UNORM_NFD, 0, out, 3, &errorCode);
717 if(U_FAILURE(errorCode)) {
718 log_err("unorm_normalize(NFD)=%ld failed with %s\n", length, u_errorName(errorCode));
719 return;
720 }
721 if(length!=3 || out[2]!=0x308 || out[3]!=0xffff) {
722 log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length, out[0], out[1], out[2], out[3]);
723 return;
724 }
729e4ab9
A
725 length=unorm_normalize(NULL, 0, UNORM_NFC, 0, NULL, 0, &errorCode);
726 if(U_FAILURE(errorCode)) {
727 log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
728 return;
729 }
730 length=unorm_normalize(NULL, 0, UNORM_NFC, 0, out, 20, &errorCode);
731 if(U_FAILURE(errorCode)) {
732 log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
733 return;
734 }
b75a7d8f
A
735}
736
737/* test cases to improve test code coverage */
738enum {
739 HANGUL_K_KIYEOK=0x3131, /* NFKD->Jamo L U+1100 */
740 HANGUL_K_WEO=0x315d, /* NFKD->Jamo V U+116f */
741 HANGUL_K_KIYEOK_SIOS=0x3133, /* NFKD->Jamo T U+11aa */
742
743 HANGUL_KIYEOK=0x1100, /* Jamo L U+1100 */
744 HANGUL_WEO=0x116f, /* Jamo V U+116f */
745 HANGUL_KIYEOK_SIOS=0x11aa, /* Jamo T U+11aa */
746
747 HANGUL_AC00=0xac00, /* Hangul syllable = Jamo LV U+ac00 */
748 HANGUL_SYLLABLE=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */
749
750 MUSICAL_VOID_NOTEHEAD=0x1d157,
751 MUSICAL_HALF_NOTE=0x1d15e, /* NFC/NFD->Notehead+Stem */
752 MUSICAL_STEM=0x1d165, /* cc=216 */
753 MUSICAL_STACCATO=0x1d17c /* cc=220 */
754};
755
756static void
757TestNormCoverage() {
46f4442e 758 UChar input[1000], expect[1000], output[1000];
b75a7d8f
A
759 UErrorCode errorCode;
760 int32_t i, length, inLength, expectLength, hangulPrefixLength, preflightLength;
761
762 /* create a long and nasty string with NFKC-unsafe characters */
763 inLength=0;
764
765 /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */
766 input[inLength++]=HANGUL_KIYEOK;
767 input[inLength++]=HANGUL_WEO;
768 input[inLength++]=HANGUL_KIYEOK_SIOS;
769
770 input[inLength++]=HANGUL_KIYEOK;
771 input[inLength++]=HANGUL_WEO;
772 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
773
774 input[inLength++]=HANGUL_KIYEOK;
775 input[inLength++]=HANGUL_K_WEO;
776 input[inLength++]=HANGUL_KIYEOK_SIOS;
777
778 input[inLength++]=HANGUL_KIYEOK;
779 input[inLength++]=HANGUL_K_WEO;
780 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
781
782 input[inLength++]=HANGUL_K_KIYEOK;
783 input[inLength++]=HANGUL_WEO;
784 input[inLength++]=HANGUL_KIYEOK_SIOS;
785
786 input[inLength++]=HANGUL_K_KIYEOK;
787 input[inLength++]=HANGUL_WEO;
788 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
789
790 input[inLength++]=HANGUL_K_KIYEOK;
791 input[inLength++]=HANGUL_K_WEO;
792 input[inLength++]=HANGUL_KIYEOK_SIOS;
793
794 input[inLength++]=HANGUL_K_KIYEOK;
795 input[inLength++]=HANGUL_K_WEO;
796 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
797
798 /* Hangul LV with normal/compatibility Jamo T */
799 input[inLength++]=HANGUL_AC00;
800 input[inLength++]=HANGUL_KIYEOK_SIOS;
801
802 input[inLength++]=HANGUL_AC00;
803 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
804
805 /* compatibility Jamo L, V */
806 input[inLength++]=HANGUL_K_KIYEOK;
807 input[inLength++]=HANGUL_K_WEO;
808
809 hangulPrefixLength=inLength;
810
811 input[inLength++]=UTF16_LEAD(MUSICAL_HALF_NOTE);
812 input[inLength++]=UTF16_TRAIL(MUSICAL_HALF_NOTE);
813 for(i=0; i<200; ++i) {
814 input[inLength++]=UTF16_LEAD(MUSICAL_STACCATO);
815 input[inLength++]=UTF16_TRAIL(MUSICAL_STACCATO);
816 input[inLength++]=UTF16_LEAD(MUSICAL_STEM);
817 input[inLength++]=UTF16_TRAIL(MUSICAL_STEM);
818 }
819
820 /* (compatibility) Jamo L, T do not compose */
821 input[inLength++]=HANGUL_K_KIYEOK;
822 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
823
824 /* quick checks */
825 errorCode=U_ZERO_ERROR;
826 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFD, &errorCode) || U_FAILURE(errorCode)) {
729e4ab9 827 log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
b75a7d8f
A
828 }
829 errorCode=U_ZERO_ERROR;
830 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKD, &errorCode) || U_FAILURE(errorCode)) {
729e4ab9 831 log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
b75a7d8f
A
832 }
833 errorCode=U_ZERO_ERROR;
834 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
729e4ab9 835 log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
b75a7d8f
A
836 }
837 errorCode=U_ZERO_ERROR;
838 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
729e4ab9 839 log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
b75a7d8f
A
840 }
841 errorCode=U_ZERO_ERROR;
842 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_FCD, &errorCode) || U_FAILURE(errorCode)) {
729e4ab9 843 log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
b75a7d8f
A
844 }
845
846 /* NFKC */
847 expectLength=0;
848 expect[expectLength++]=HANGUL_SYLLABLE;
849
850 expect[expectLength++]=HANGUL_SYLLABLE;
851
852 expect[expectLength++]=HANGUL_SYLLABLE;
853
854 expect[expectLength++]=HANGUL_SYLLABLE;
855
856 expect[expectLength++]=HANGUL_SYLLABLE;
857
858 expect[expectLength++]=HANGUL_SYLLABLE;
859
860 expect[expectLength++]=HANGUL_SYLLABLE;
861
862 expect[expectLength++]=HANGUL_SYLLABLE;
863
864 expect[expectLength++]=HANGUL_AC00+3;
865
866 expect[expectLength++]=HANGUL_AC00+3;
867
868 expect[expectLength++]=HANGUL_AC00+14*28;
869
870 expect[expectLength++]=UTF16_LEAD(MUSICAL_VOID_NOTEHEAD);
871 expect[expectLength++]=UTF16_TRAIL(MUSICAL_VOID_NOTEHEAD);
872 expect[expectLength++]=UTF16_LEAD(MUSICAL_STEM);
873 expect[expectLength++]=UTF16_TRAIL(MUSICAL_STEM);
874 for(i=0; i<200; ++i) {
875 expect[expectLength++]=UTF16_LEAD(MUSICAL_STEM);
876 expect[expectLength++]=UTF16_TRAIL(MUSICAL_STEM);
877 }
878 for(i=0; i<200; ++i) {
879 expect[expectLength++]=UTF16_LEAD(MUSICAL_STACCATO);
880 expect[expectLength++]=UTF16_TRAIL(MUSICAL_STACCATO);
881 }
882
883 expect[expectLength++]=HANGUL_KIYEOK;
884 expect[expectLength++]=HANGUL_KIYEOK_SIOS;
885
886 /* try destination overflow first */
887 errorCode=U_ZERO_ERROR;
888 preflightLength=unorm_normalize(input, inLength,
889 UNORM_NFKC, 0,
890 output, 100, /* too short */
891 &errorCode);
892 if(errorCode!=U_BUFFER_OVERFLOW_ERROR) {
729e4ab9 893 log_data_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCode));
b75a7d8f
A
894 }
895
896 /* real NFKC */
897 errorCode=U_ZERO_ERROR;
898 length=unorm_normalize(input, inLength,
899 UNORM_NFKC, 0,
900 output, sizeof(output)/U_SIZEOF_UCHAR,
901 &errorCode);
902 if(U_FAILURE(errorCode)) {
729e4ab9 903 log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
b75a7d8f
A
904 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
905 log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n");
906 for(i=0; i<length; ++i) {
907 if(output[i]!=expect[i]) {
908 log_err(" NFKC[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
909 break;
910 }
911 }
912 }
913 if(length!=preflightLength) {
914 log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length, preflightLength);
915 }
916
917 /* FCD */
918 u_memcpy(expect, input, hangulPrefixLength);
919 expectLength=hangulPrefixLength;
920
921 expect[expectLength++]=UTF16_LEAD(MUSICAL_VOID_NOTEHEAD);
922 expect[expectLength++]=UTF16_TRAIL(MUSICAL_VOID_NOTEHEAD);
923 expect[expectLength++]=UTF16_LEAD(MUSICAL_STEM);
924 expect[expectLength++]=UTF16_TRAIL(MUSICAL_STEM);
925 for(i=0; i<200; ++i) {
926 expect[expectLength++]=UTF16_LEAD(MUSICAL_STEM);
927 expect[expectLength++]=UTF16_TRAIL(MUSICAL_STEM);
928 }
929 for(i=0; i<200; ++i) {
930 expect[expectLength++]=UTF16_LEAD(MUSICAL_STACCATO);
931 expect[expectLength++]=UTF16_TRAIL(MUSICAL_STACCATO);
932 }
933
934 expect[expectLength++]=HANGUL_K_KIYEOK;
935 expect[expectLength++]=HANGUL_K_KIYEOK_SIOS;
936
937 errorCode=U_ZERO_ERROR;
938 length=unorm_normalize(input, inLength,
939 UNORM_FCD, 0,
940 output, sizeof(output)/U_SIZEOF_UCHAR,
941 &errorCode);
942 if(U_FAILURE(errorCode)) {
729e4ab9 943 log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
b75a7d8f
A
944 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
945 log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n");
946 for(i=0; i<length; ++i) {
947 if(output[i]!=expect[i]) {
948 log_err(" FCD[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
949 break;
950 }
951 }
952 }
953}
954
955/* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */
956static void
957TestConcatenate(void) {
958 /* "re + 'sume'" */
959 static const UChar
960 left[]={
961 0x72, 0x65, 0
962 },
963 right[]={
964 0x301, 0x73, 0x75, 0x6d, 0xe9, 0
965 },
966 expect[]={
967 0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0
968 };
969
970 UChar buffer[100];
971 UErrorCode errorCode;
972 int32_t length;
973
974 /* left with length, right NUL-terminated */
975 errorCode=U_ZERO_ERROR;
976 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
977 if(U_FAILURE(errorCode) || length!=6 || 0!=u_memcmp(buffer, expect, length)) {
729e4ab9 978 log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
b75a7d8f
A
979 }
980
981 /* preflighting */
982 errorCode=U_ZERO_ERROR;
983 length=unorm_concatenate(left, 2, right, -1, NULL, 0, UNORM_NFC, 0, &errorCode);
984 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6) {
729e4ab9 985 log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
b75a7d8f
A
986 }
987
988 buffer[2]=0x5555;
989 errorCode=U_ZERO_ERROR;
990 length=unorm_concatenate(left, 2, right, -1, buffer, 1, UNORM_NFC, 0, &errorCode);
991 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6 || buffer[2]!=0x5555) {
729e4ab9 992 log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
b75a7d8f
A
993 }
994
995 /* enter with U_FAILURE */
996 buffer[2]=0xaaaa;
997 errorCode=U_UNEXPECTED_TOKEN;
998 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
999 if(errorCode!=U_UNEXPECTED_TOKEN || buffer[2]!=0xaaaa) {
1000 log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length, u_errorName(errorCode));
1001 }
1002
1003 /* illegal arguments */
1004 buffer[2]=0xaaaa;
1005 errorCode=U_ZERO_ERROR;
1006 length=unorm_concatenate(NULL, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1007 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || buffer[2]!=0xaaaa) {
729e4ab9 1008 log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
b75a7d8f
A
1009 }
1010
1011 errorCode=U_ZERO_ERROR;
1012 length=unorm_concatenate(left, 2, right, -1, NULL, 100, UNORM_NFC, 0, &errorCode);
1013 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
729e4ab9 1014 log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
b75a7d8f
A
1015 }
1016}
1017
1018enum {
1019 _PLUS=0x2b
1020};
1021
1022static const char *const _modeString[UNORM_MODE_COUNT]={
1023 "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD"
1024};
1025
1026static void
1027_testIter(const UChar *src, int32_t srcLength,
1028 UCharIterator *iter, UNormalizationMode mode, UBool forward,
1029 const UChar *out, int32_t outLength,
1030 const int32_t *srcIndexes, int32_t srcIndexesLength) {
1031 UChar buffer[4];
1032 const UChar *expect, *outLimit, *in;
1033 int32_t length, i, expectLength, expectIndex, prevIndex, index, inLength;
1034 UErrorCode errorCode;
1035 UBool neededToNormalize, expectNeeded;
1036
1037 errorCode=U_ZERO_ERROR;
1038 outLimit=out+outLength;
1039 if(forward) {
1040 expect=out;
1041 i=index=0;
1042 } else {
1043 expect=outLimit;
1044 i=srcIndexesLength-2;
1045 index=srcLength;
1046 }
1047
1048 for(;;) {
1049 prevIndex=index;
1050 if(forward) {
1051 if(!iter->hasNext(iter)) {
1052 return;
1053 }
1054 length=unorm_next(iter,
1055 buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1056 mode, 0,
1057 (UBool)(out!=NULL), &neededToNormalize,
1058 &errorCode);
1059 expectIndex=srcIndexes[i+1];
1060 in=src+prevIndex;
1061 inLength=expectIndex-prevIndex;
1062
1063 if(out!=NULL) {
1064 /* get output piece from between plus signs */
1065 expectLength=0;
1066 while((expect+expectLength)!=outLimit && expect[expectLength]!=_PLUS) {
1067 ++expectLength;
1068 }
1069 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1070 } else {
1071 expect=in;
1072 expectLength=inLength;
1073 expectNeeded=FALSE;
1074 }
1075 } else {
1076 if(!iter->hasPrevious(iter)) {
1077 return;
1078 }
1079 length=unorm_previous(iter,
1080 buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1081 mode, 0,
1082 (UBool)(out!=NULL), &neededToNormalize,
1083 &errorCode);
1084 expectIndex=srcIndexes[i];
1085 in=src+expectIndex;
1086 inLength=prevIndex-expectIndex;
1087
1088 if(out!=NULL) {
1089 /* get output piece from between plus signs */
1090 expectLength=0;
1091 while(expect!=out && expect[-1]!=_PLUS) {
1092 ++expectLength;
1093 --expect;
1094 }
1095 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1096 } else {
1097 expect=in;
1098 expectLength=inLength;
1099 expectNeeded=FALSE;
1100 }
1101 }
1102 index=iter->getIndex(iter, UITER_CURRENT);
1103
1104 if(U_FAILURE(errorCode)) {
729e4ab9 1105 log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s - (Are you missing data?)\n",
b75a7d8f
A
1106 forward, _modeString[mode], i, u_errorName(errorCode));
1107 return;
1108 }
1109 if(expectIndex!=index) {
1110 log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n",
1111 forward, _modeString[mode], i, index, expectIndex);
1112 return;
1113 }
1114 if(expectLength!=length) {
1115 log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n",
1116 forward, _modeString[mode], i, length, expectLength);
1117 return;
1118 }
1119 if(0!=u_memcmp(expect, buffer, length)) {
1120 log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n",
1121 forward, _modeString[mode], i);
1122 return;
1123 }
1124 if(neededToNormalize!=expectNeeded) {
1125 }
1126
1127 if(forward) {
1128 expect+=expectLength+1; /* go after the + */
1129 ++i;
1130 } else {
1131 --expect; /* go before the + */
1132 --i;
1133 }
1134 }
1135}
1136
1137static void
1138TestNextPrevious() {
1139 static const UChar
1140 src[]={ /* input string */
1141 0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133
1142 },
1143 nfd[]={ /* + separates expected output pieces */
1144 0xa0, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x3133
1145 },
1146 nfkd[]={
1147 0x20, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x11aa
1148 },
1149 nfc[]={
1150 0xa0, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1151 },
1152 nfkc[]={
1153 0x20, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac03
1154 },
1155 fcd[]={
1156 0xa0, _PLUS, 0xe4, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1157 };
1158
1159 /* expected iterator indexes in the source string for each iteration piece */
1160 static const int32_t
1161 nfdIndexes[]={
1162 0, 1, 2, 5, 6, 7
1163 },
1164 nfkdIndexes[]={
1165 0, 1, 2, 5, 6, 7
1166 },
1167 nfcIndexes[]={
1168 0, 1, 2, 5, 6, 7
1169 },
1170 nfkcIndexes[]={
1171 0, 1, 2, 5, 7
1172 },
1173 fcdIndexes[]={
1174 0, 1, 2, 5, 6, 7
1175 };
1176
1177 UCharIterator iter;
1178
1179 UChar buffer[4];
1180 int32_t length;
1181
1182 UBool neededToNormalize;
1183 UErrorCode errorCode;
1184
1185 uiter_setString(&iter, src, sizeof(src)/U_SIZEOF_UCHAR);
1186
1187 /* test iteration with doNormalize */
1188 iter.index=0;
1189 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, TRUE, nfd, sizeof(nfd)/U_SIZEOF_UCHAR, nfdIndexes, sizeof(nfdIndexes)/4);
1190 iter.index=0;
1191 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, TRUE, nfkd, sizeof(nfkd)/U_SIZEOF_UCHAR, nfkdIndexes, sizeof(nfkdIndexes)/4);
1192 iter.index=0;
1193 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, TRUE, nfc, sizeof(nfc)/U_SIZEOF_UCHAR, nfcIndexes, sizeof(nfcIndexes)/4);
1194 iter.index=0;
1195 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, TRUE, nfkc, sizeof(nfkc)/U_SIZEOF_UCHAR, nfkcIndexes, sizeof(nfkcIndexes)/4);
1196 iter.index=0;
1197 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, TRUE, fcd, sizeof(fcd)/U_SIZEOF_UCHAR, fcdIndexes, sizeof(fcdIndexes)/4);
1198
1199 iter.index=iter.length;
1200 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, FALSE, nfd, sizeof(nfd)/U_SIZEOF_UCHAR, nfdIndexes, sizeof(nfdIndexes)/4);
1201 iter.index=iter.length;
1202 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, FALSE, nfkd, sizeof(nfkd)/U_SIZEOF_UCHAR, nfkdIndexes, sizeof(nfkdIndexes)/4);
1203 iter.index=iter.length;
1204 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, FALSE, nfc, sizeof(nfc)/U_SIZEOF_UCHAR, nfcIndexes, sizeof(nfcIndexes)/4);
1205 iter.index=iter.length;
1206 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, FALSE, nfkc, sizeof(nfkc)/U_SIZEOF_UCHAR, nfkcIndexes, sizeof(nfkcIndexes)/4);
1207 iter.index=iter.length;
1208 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, FALSE, fcd, sizeof(fcd)/U_SIZEOF_UCHAR, fcdIndexes, sizeof(fcdIndexes)/4);
1209
1210 /* test iteration without doNormalize */
1211 iter.index=0;
1212 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, TRUE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1213 iter.index=0;
1214 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, TRUE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1215 iter.index=0;
1216 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, TRUE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1217 iter.index=0;
1218 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, TRUE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1219 iter.index=0;
1220 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, TRUE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1221
1222 iter.index=iter.length;
1223 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, FALSE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1224 iter.index=iter.length;
1225 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, FALSE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1226 iter.index=iter.length;
1227 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, FALSE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1228 iter.index=iter.length;
1229 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, FALSE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1230 iter.index=iter.length;
1231 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, FALSE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1232
1233 /* try without neededToNormalize */
1234 errorCode=U_ZERO_ERROR;
1235 buffer[0]=5;
1236 iter.index=1;
1237 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1238 UNORM_NFD, 0, TRUE, NULL,
1239 &errorCode);
1240 if(U_FAILURE(errorCode) || length!=2 || buffer[0]!=nfd[2] || buffer[1]!=nfd[3]) {
729e4ab9 1241 log_data_err("error unorm_next(without needed) %s - (Are you missing data?)\n", u_errorName(errorCode));
b75a7d8f
A
1242 return;
1243 }
1244
1245 /* preflight */
1246 neededToNormalize=9;
1247 iter.index=1;
1248 length=unorm_next(&iter, NULL, 0,
1249 UNORM_NFD, 0, TRUE, &neededToNormalize,
1250 &errorCode);
1251 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2) {
1252 log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode));
1253 return;
1254 }
1255
1256 errorCode=U_ZERO_ERROR;
1257 buffer[0]=buffer[1]=5;
1258 neededToNormalize=9;
1259 iter.index=1;
1260 length=unorm_next(&iter, buffer, 1,
1261 UNORM_NFD, 0, TRUE, &neededToNormalize,
1262 &errorCode);
1263 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2 || buffer[1]!=5) {
1264 log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode));
1265 return;
1266 }
1267
1268 /* no iterator */
1269 errorCode=U_ZERO_ERROR;
1270 buffer[0]=buffer[1]=5;
1271 neededToNormalize=9;
1272 iter.index=1;
1273 length=unorm_next(NULL, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1274 UNORM_NFD, 0, TRUE, &neededToNormalize,
1275 &errorCode);
1276 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1277 log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode));
1278 return;
1279 }
1280
1281 /* illegal mode */
1282 buffer[0]=buffer[1]=5;
1283 neededToNormalize=9;
1284 iter.index=1;
1285 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1286 (UNormalizationMode)0, 0, TRUE, &neededToNormalize,
1287 &errorCode);
1288 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1289 log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode));
1290 return;
1291 }
1292
1293 /* error coming in */
1294 errorCode=U_MISPLACED_QUANTIFIER;
1295 buffer[0]=5;
1296 iter.index=1;
1297 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1298 UNORM_NFD, 0, TRUE, NULL,
1299 &errorCode);
1300 if(errorCode!=U_MISPLACED_QUANTIFIER) {
1301 log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode));
1302 return;
1303 }
b75a7d8f
A
1304}
1305
1306static void
1307TestFCNFKCClosure(void) {
1308 static const struct {
1309 UChar32 c;
1310 const UChar s[6];
1311 } tests[]={
729e4ab9
A
1312 { 0x00C4, { 0 } },
1313 { 0x00E4, { 0 } },
b75a7d8f
A
1314 { 0x037A, { 0x0020, 0x03B9, 0 } },
1315 { 0x03D2, { 0x03C5, 0 } },
1316 { 0x20A8, { 0x0072, 0x0073, 0 } },
1317 { 0x210B, { 0x0068, 0 } },
1318 { 0x210C, { 0x0068, 0 } },
1319 { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } },
1320 { 0x2122, { 0x0074, 0x006D, 0 } },
1321 { 0x2128, { 0x007A, 0 } },
1322 { 0x1D5DB, { 0x0068, 0 } },
1323 { 0x1D5ED, { 0x007A, 0 } },
1324 { 0x0061, { 0 } }
1325 };
1326
1327 UChar buffer[8];
1328 UErrorCode errorCode;
1329 int32_t i, length;
1330
374ca955 1331 for(i=0; i<LENGTHOF(tests); ++i) {
b75a7d8f 1332 errorCode=U_ZERO_ERROR;
374ca955 1333 length=u_getFC_NFKC_Closure(tests[i].c, buffer, LENGTHOF(buffer), &errorCode);
b75a7d8f 1334 if(U_FAILURE(errorCode) || length!=u_strlen(buffer) || 0!=u_strcmp(tests[i].s, buffer)) {
729e4ab9 1335 log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you missing data?)\n", tests[i].c, u_errorName(errorCode));
b75a7d8f
A
1336 }
1337 }
1338
1339 /* error handling */
1340 errorCode=U_ZERO_ERROR;
374ca955 1341 length=u_getFC_NFKC_Closure(0x5c, NULL, LENGTHOF(buffer), &errorCode);
b75a7d8f
A
1342 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1343 log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode));
1344 }
1345
374ca955 1346 length=u_getFC_NFKC_Closure(0x5c, buffer, LENGTHOF(buffer), &errorCode);
b75a7d8f
A
1347 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1348 log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode));
1349 }
1350}
1351
374ca955
A
1352static void
1353TestQuickCheckPerCP() {
1354 UErrorCode errorCode;
1355 UChar32 c, lead, trail;
1356 UChar s[U16_MAX_LENGTH], nfd[16];
1357 int32_t length, lccc1, lccc2, tccc1, tccc2;
46f4442e 1358 int32_t qc1, qc2;
374ca955
A
1359
1360 if(
1361 u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1362 u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1363 u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1364 u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1365 u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) ||
1366 u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS)
1367 ) {
1368 log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n");
1369 }
1370
1371 /*
1372 * compare the quick check property values for some code points
1373 * to the quick check results for checking same-code point strings
1374 */
1375 errorCode=U_ZERO_ERROR;
1376 c=0;
1377 while(c<0x110000) {
1378 length=0;
1379 U16_APPEND_UNSAFE(s, length, c);
1380
1381 qc1=u_getIntPropertyValue(c, UCHAR_NFC_QUICK_CHECK);
1382 qc2=unorm_quickCheck(s, length, UNORM_NFC, &errorCode);
1383 if(qc1!=qc2) {
729e4ab9 1384 log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
374ca955
A
1385 }
1386
1387 qc1=u_getIntPropertyValue(c, UCHAR_NFD_QUICK_CHECK);
1388 qc2=unorm_quickCheck(s, length, UNORM_NFD, &errorCode);
1389 if(qc1!=qc2) {
729e4ab9 1390 log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
374ca955
A
1391 }
1392
1393 qc1=u_getIntPropertyValue(c, UCHAR_NFKC_QUICK_CHECK);
1394 qc2=unorm_quickCheck(s, length, UNORM_NFKC, &errorCode);
1395 if(qc1!=qc2) {
729e4ab9 1396 log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
374ca955
A
1397 }
1398
1399 qc1=u_getIntPropertyValue(c, UCHAR_NFKD_QUICK_CHECK);
1400 qc2=unorm_quickCheck(s, length, UNORM_NFKD, &errorCode);
1401 if(qc1!=qc2) {
729e4ab9 1402 log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
374ca955
A
1403 }
1404
1405 length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, LENGTHOF(nfd), &errorCode);
46f4442e
A
1406 /* length-length == 0 is used to get around a compiler warning. */
1407 U16_GET(nfd, 0, length-length, length, lead);
374ca955
A
1408 U16_GET(nfd, 0, length-1, length, trail);
1409
1410 lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS);
1411 lccc2=u_getCombiningClass(lead);
1412 tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS);
1413 tccc2=u_getCombiningClass(trail);
1414
1415 if(lccc1!=lccc2) {
1416 log_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
1417 lccc1, lccc2, c);
1418 }
1419 if(tccc1!=tccc2) {
1420 log_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
1421 tccc1, tccc2, c);
1422 }
1423
1424 /* skip some code points */
1425 c=(20*c)/19+1;
1426 }
1427}
1428
1429static void
1430TestComposition(void) {
1431 static const struct {
1432 UNormalizationMode mode;
1433 uint32_t options;
1434 UChar input[12];
1435 UChar expect[12];
1436 } cases[]={
1437 /*
1438 * special cases for UAX #15 bug
729e4ab9
A
1439 * see Unicode Corrigendum #5: Normalization Idempotency
1440 * at http://unicode.org/versions/corrigendum5.html
1441 * (was Public Review Issue #29)
374ca955
A
1442 */
1443 { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327 }, { 0x1100, 0x0300, 0x1161, 0x0327 } },
1444 { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } },
1445 { UNORM_NFC, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 }, { 0xac00, 0x0327, 0x0300, 0x11a8 } },
1446 { UNORM_NFC, 0, { 0x0b47, 0x0300, 0x0b3e }, { 0x0b47, 0x0300, 0x0b3e } },
1447
374ca955
A
1448 /* TODO: add test cases for UNORM_FCC here (j2151) */
1449 };
1450
1451 UChar output[16];
1452 UErrorCode errorCode;
1453 int32_t i, length;
1454
1455 for(i=0; i<LENGTHOF(cases); ++i) {
1456 errorCode=U_ZERO_ERROR;
1457 length=unorm_normalize(
1458 cases[i].input, -1,
1459 cases[i].mode, cases[i].options,
1460 output, LENGTHOF(output),
1461 &errorCode);
1462 if( U_FAILURE(errorCode) ||
1463 length!=u_strlen(cases[i].expect) ||
1464 0!=u_memcmp(output, cases[i].expect, length)
1465 ) {
729e4ab9 1466 log_data_err("unexpected result for case %d - (Are you missing data?)\n", i);
374ca955
A
1467 }
1468 }
1469}
1470
729e4ab9
A
1471static void
1472TestGetDecomposition() {
1473 UChar decomp[32];
1474 int32_t length;
1475
1476 UErrorCode errorCode=U_ZERO_ERROR;
1477 const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE_CONTIGUOUS, &errorCode);
1478 if(U_FAILURE(errorCode)) {
1479 log_err_status(errorCode, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_errorName(errorCode));
1480 return;
1481 }
1482
1483 length=unorm2_getDecomposition(n2, 0x20, decomp, LENGTHOF(decomp), &errorCode);
1484 if(U_FAILURE(errorCode) || length>=0) {
1485 log_err("unorm2_getDecomposition(space) failed\n");
1486 }
1487 errorCode=U_ZERO_ERROR;
1488 length=unorm2_getDecomposition(n2, 0xe4, decomp, LENGTHOF(decomp), &errorCode);
1489 if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1490 log_err("unorm2_getDecomposition(a-umlaut) failed\n");
1491 }
1492 errorCode=U_ZERO_ERROR;
1493 length=unorm2_getDecomposition(n2, 0xac01, decomp, LENGTHOF(decomp), &errorCode);
1494 if(U_FAILURE(errorCode) || length!=3 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0x11a8 || decomp[3]!=0) {
1495 log_err("unorm2_getDecomposition(Hangul syllable U+AC01) failed\n");
1496 }
1497 errorCode=U_ZERO_ERROR;
1498 length=unorm2_getDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1499 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
1500 log_err("unorm2_getDecomposition(Hangul syllable U+AC01) overflow failed\n");
1501 }
1502 errorCode=U_ZERO_ERROR;
1503 length=unorm2_getDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1504 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1505 log_err("unorm2_getDecomposition(capacity<0) failed\n");
1506 }
1507 errorCode=U_ZERO_ERROR;
1508 length=unorm2_getDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1509 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1510 log_err("unorm2_getDecomposition(decomposition=NULL) failed\n");
1511 }
1512}
1513
b75a7d8f 1514#endif /* #if !UCONFIG_NO_NORMALIZATION */