]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/cnormtst.c
ICU-6.2.22.tar.gz
[apple/icu.git] / icuSources / test / cintltst / cnormtst.c
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2004, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
7 *
8 * File CNORMTST.C
9 *
10 * Modification History:
11 * Name Description
12 * Madhu Katragadda Ported for C API
13 * synwee added test for quick check
14 * synwee added test for checkFCD
15 *********************************************************************************/
16 /*tests for u_normalization*/
17 #include "unicode/utypes.h"
18 #include "unicode/unorm.h"
19 #include "unormimp.h"
20 #include "cintltst.h"
21
22 #if UCONFIG_NO_NORMALIZATION
23
24 void addNormTest(TestNode** root) {
25 /* no normalization - nothing to do */
26 }
27
28 #else
29
30 #include <stdlib.h>
31 #include <time.h>
32 #include "unicode/uchar.h"
33 #include "unicode/ustring.h"
34 #include "unicode/unorm.h"
35 #include "cnormtst.h"
36
37 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof ((array)[0]))
38
39 static void
40 TestAPI(void);
41
42 static void
43 TestNormCoverage(void);
44
45 static void
46 TestConcatenate(void);
47
48 static void
49 TestNextPrevious(void);
50
51 static void TestIsNormalized(void);
52
53 static void
54 TestFCNFKCClosure(void);
55
56 static void
57 TestQuickCheckPerCP(void);
58
59 static void
60 TestComposition(void);
61
62 const static char* canonTests[][3] = {
63 /* Input*/ /*Decomposed*/ /*Composed*/
64 { "cat", "cat", "cat" },
65 { "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark", },
66
67 { "\\u1e0a", "D\\u0307", "\\u1e0a" }, /* D-dot_above*/
68 { "D\\u0307", "D\\u0307", "\\u1e0a" }, /* D dot_above*/
69
70 { "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_below dot_above*/
71 { "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_above dot_below */
72 { "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D dot_below dot_above */
73
74 { "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307", "\\u1e10\\u0323\\u0307" }, /*D dot_below cedilla dot_above*/
75 { "D\\u0307\\u0328\\u0323", "D\\u0328\\u0323\\u0307", "\\u1e0c\\u0328\\u0307" }, /* D dot_above ogonek dot_below*/
76
77 { "\\u1E14", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron-grave*/
78 { "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron + grave*/
79 { "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" }, /* E-grave + macron*/
80
81 { "\\u212b", "A\\u030a", "\\u00c5" }, /* angstrom_sign*/
82 { "\\u00c5", "A\\u030a", "\\u00c5" }, /* A-ring*/
83
84 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
85 { "\\u00C4\\uFB03n", "A\\u0308\\uFB03n", "\\u00C4\\uFB03n" },
86
87 { "Henry IV", "Henry IV", "Henry IV" },
88 { "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" },
89
90 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
91 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
92 { "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" }, /* hw_ka + hw_ten*/
93 { "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" }, /* ka + hw_ten*/
94 { "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" }, /* hw_ka + ten*/
95 { "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" } /* hw_ka + ten*/
96 };
97
98 const static char* compatTests[][3] = {
99 /* Input*/ /*Decomposed */ /*Composed*/
100 { "cat", "cat", "cat" },
101
102 { "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" }, /* Alef-Lamed vs. Alef, Lamed*/
103
104 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
105 { "\\u00C4\\uFB03n", "A\\u0308ffin", "\\u00C4ffin" }, /* ffi ligature -> f + f + i*/
106
107 { "Henry IV", "Henry IV", "Henry IV" },
108 { "Henry \\u2163", "Henry IV", "Henry IV" },
109
110 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
111 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
112
113 { "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + ten*/
114
115 /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
116 { "\\uFF76\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + hw_ten*/
117 { "\\u30AB\\uFF9E", "\\u30AB\\u3099", "\\u30AC" } /* ka + hw_ten*/
118
119 };
120
121 void addNormTest(TestNode** root);
122
123 void addNormTest(TestNode** root)
124 {
125 addTest(root, &TestAPI, "tscoll/cnormtst/TestAPI");
126 addTest(root, &TestDecomp, "tscoll/cnormtst/TestDecomp");
127 addTest(root, &TestCompatDecomp, "tscoll/cnormtst/TestCompatDecomp");
128 addTest(root, &TestCanonDecompCompose, "tscoll/cnormtst/TestCanonDecompCompose");
129 addTest(root, &TestCompatDecompCompose, "tscoll/cnormtst/CompatDecompCompose");
130 addTest(root, &TestNull, "tscoll/cnormtst/TestNull");
131 addTest(root, &TestQuickCheck, "tscoll/cnormtst/TestQuickCheck");
132 addTest(root, &TestQuickCheckPerCP, "tscoll/cnormtst/TestQuickCheckPerCP");
133 addTest(root, &TestIsNormalized, "tscoll/cnormtst/TestIsNormalized");
134 addTest(root, &TestCheckFCD, "tscoll/cnormtst/TestCheckFCD");
135 addTest(root, &TestNormCoverage, "tscoll/cnormtst/TestNormCoverage");
136 addTest(root, &TestConcatenate, "tscoll/cnormtst/TestConcatenate");
137 addTest(root, &TestNextPrevious, "tscoll/cnormtst/TestNextPrevious");
138 addTest(root, &TestFCNFKCClosure, "tscoll/cnormtst/TestFCNFKCClosure");
139 addTest(root, &TestComposition, "tscoll/cnormtst/TestComposition");
140 }
141
142 void TestDecomp()
143 {
144 UErrorCode status = U_ZERO_ERROR;
145 int32_t x, neededLen, resLen;
146 UChar *source=NULL, *result=NULL;
147 status = U_ZERO_ERROR;
148 resLen=0;
149 log_verbose("Testing unorm_normalize with Decomp canonical\n");
150 for(x=0; x < LENGTHOF(canonTests); x++)
151 {
152 source=CharsToUChars(canonTests[x][0]);
153 neededLen= unorm_normalize(source, u_strlen(source), UNORM_NFD, 0, NULL, 0, &status);
154 if(status==U_BUFFER_OVERFLOW_ERROR)
155 {
156 status=U_ZERO_ERROR;
157 resLen=neededLen+1;
158 result=(UChar*)malloc(sizeof(UChar*) * resLen);
159 unorm_normalize(source, u_strlen(source), UNORM_NFD, 0, result, resLen, &status);
160 }
161 if(U_FAILURE(status)){
162 log_err("ERROR in unorm_normalize at %s: %s\n", austrdup(source), myErrorName(status) );
163 } else {
164 assertEqual(result, canonTests[x][1], x);
165 }
166 free(result);
167 free(source);
168 }
169 }
170
171 void TestCompatDecomp()
172 {
173 UErrorCode status = U_ZERO_ERROR;
174 int32_t x, neededLen, resLen;
175 UChar *source=NULL, *result=NULL;
176 status = U_ZERO_ERROR;
177 resLen=0;
178 log_verbose("Testing unorm_normalize with Decomp compat\n");
179 for(x=0; x < LENGTHOF(compatTests); x++)
180 {
181 source=CharsToUChars(compatTests[x][0]);
182 neededLen= unorm_normalize(source, u_strlen(source), UNORM_NFKD, 0, NULL, 0, &status);
183 if(status==U_BUFFER_OVERFLOW_ERROR)
184 {
185 status=U_ZERO_ERROR;
186 resLen=neededLen+1;
187 result=(UChar*)malloc(sizeof(UChar*) * resLen);
188 unorm_normalize(source, u_strlen(source), UNORM_NFKD, 0, result, resLen, &status);
189 }
190 if(U_FAILURE(status)){
191 log_err("ERROR in unorm_normalize at %s: %s\n", austrdup(source), myErrorName(status) );
192 } else {
193 assertEqual(result, compatTests[x][1], x);
194 }
195 free(result);
196 free(source);
197 }
198 }
199
200 void TestCanonDecompCompose()
201 {
202 UErrorCode status = U_ZERO_ERROR;
203 int32_t x, neededLen, resLen;
204 UChar *source=NULL, *result=NULL;
205 status = U_ZERO_ERROR;
206 resLen=0;
207 log_verbose("Testing unorm_normalize with Decomp can compose compat\n");
208 for(x=0; x < LENGTHOF(canonTests); x++)
209 {
210 source=CharsToUChars(canonTests[x][0]);
211 neededLen= unorm_normalize(source, u_strlen(source), UNORM_NFC, 0, NULL, 0, &status);
212 if(status==U_BUFFER_OVERFLOW_ERROR)
213 {
214 status=U_ZERO_ERROR;
215 resLen=neededLen+1;
216 result=(UChar*)malloc(sizeof(UChar*) * resLen);
217 unorm_normalize(source, u_strlen(source), UNORM_NFC, 0, result, resLen, &status);
218 }
219 if(U_FAILURE(status)){
220 log_err("ERROR in unorm_normalize at %s: %s\n", austrdup(source),myErrorName(status) );
221 } else {
222 assertEqual(result, canonTests[x][2], x);
223 }
224 free(result);
225 free(source);
226 }
227 }
228
229 void TestCompatDecompCompose()
230 {
231 UErrorCode status = U_ZERO_ERROR;
232 int32_t x, neededLen, resLen;
233 UChar *source=NULL, *result=NULL;
234 status = U_ZERO_ERROR;
235 resLen=0;
236 log_verbose("Testing unorm_normalize with compat decomp compose can\n");
237 for(x=0; x < LENGTHOF(compatTests); x++)
238 {
239 source=CharsToUChars(compatTests[x][0]);
240 neededLen= unorm_normalize(source, u_strlen(source), UNORM_NFKC, 0, NULL, 0, &status);
241 if(status==U_BUFFER_OVERFLOW_ERROR)
242 {
243 status=U_ZERO_ERROR;
244 resLen=neededLen+1;
245 result=(UChar*)malloc(sizeof(UChar*) * resLen);
246 unorm_normalize(source, u_strlen(source), UNORM_NFKC, 0, result, resLen, &status);
247 }
248 if(U_FAILURE(status)){
249 log_err("ERROR in unorm_normalize at %s: %s\n", austrdup(source), myErrorName(status) );
250 } else {
251 assertEqual(result, compatTests[x][2], x);
252 }
253 free(result);
254 free(source);
255 }
256 }
257
258
259 /*
260 static void assertEqual(const UChar* result, const UChar* expected, int32_t index)
261 {
262 if(u_strcmp(result, expected)!=0){
263 log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index, austrdup(expected),
264 austrdup(result) );
265 }
266 }
267 */
268
269 static void assertEqual(const UChar* result, const char* expected, int32_t index)
270 {
271 UChar *expectedUni = CharsToUChars(expected);
272 if(u_strcmp(result, expectedUni)!=0){
273 log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index, expected,
274 austrdup(result) );
275 }
276 free(expectedUni);
277 }
278
279 static void TestNull_check(UChar *src, int32_t srcLen,
280 UChar *exp, int32_t expLen,
281 UNormalizationMode mode,
282 const char *name)
283 {
284 UErrorCode status = U_ZERO_ERROR;
285 int32_t len, i;
286
287 UChar result[50];
288
289
290 status = U_ZERO_ERROR;
291
292 for(i=0;i<50;i++)
293 {
294 result[i] = 0xFFFD;
295 }
296
297 len = unorm_normalize(src, srcLen, mode, 0, result, 50, &status);
298
299 if(U_FAILURE(status)) {
300 log_err("unorm_normalize(%s) with 0x0000 failed: %s\n", name, u_errorName(status));
301 } else if (len != expLen) {
302 log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name, expLen, len);
303 }
304
305 {
306 for(i=0;i<len;i++){
307 if(exp[i] != result[i]) {
308 log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n",
309 name,
310 i,
311 exp[i],
312 result[i]);
313 return;
314 }
315 log_verbose(" %d: \\u%04X\n", i, result[i]);
316 }
317 }
318
319 log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name);
320 }
321
322 void TestNull()
323 {
324
325 UChar source_comp[] = { 0x0061, 0x0000, 0x0044, 0x0307 };
326 int32_t source_comp_len = 4;
327 UChar expect_comp[] = { 0x0061, 0x0000, 0x1e0a };
328 int32_t expect_comp_len = 3;
329
330 UChar source_dcmp[] = { 0x1e0A, 0x0000, 0x0929 };
331 int32_t source_dcmp_len = 3;
332 UChar expect_dcmp[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C };
333 int32_t expect_dcmp_len = 5;
334
335 TestNull_check(source_comp,
336 source_comp_len,
337 expect_comp,
338 expect_comp_len,
339 UNORM_NFC,
340 "UNORM_NFC");
341
342 TestNull_check(source_dcmp,
343 source_dcmp_len,
344 expect_dcmp,
345 expect_dcmp_len,
346 UNORM_NFD,
347 "UNORM_NFD");
348
349 TestNull_check(source_comp,
350 source_comp_len,
351 expect_comp,
352 expect_comp_len,
353 UNORM_NFKC,
354 "UNORM_NFKC");
355
356
357 }
358
359 static void TestQuickCheckResultNO()
360 {
361 const UChar CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
362 0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
363 const UChar CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
364 0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
365 const UChar CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
366 0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
367 const UChar CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
368 0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
369
370
371 const int SIZE = 10;
372
373 int count = 0;
374 UErrorCode error = U_ZERO_ERROR;
375
376 for (; count < SIZE; count ++)
377 {
378 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
379 UNORM_NO)
380 {
381 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
382 return;
383 }
384 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
385 UNORM_NO)
386 {
387 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
388 return;
389 }
390 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
391 UNORM_NO)
392 {
393 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
394 return;
395 }
396 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
397 UNORM_NO)
398 {
399 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
400 return;
401 }
402 }
403 }
404
405
406 static void TestQuickCheckResultYES()
407 {
408 const UChar CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
409 0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
410 const UChar CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
411 0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
412 const UChar CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
413 0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
414 const UChar CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
415 0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
416
417 const int SIZE = 10;
418 int count = 0;
419 UErrorCode error = U_ZERO_ERROR;
420
421 UChar cp = 0;
422 while (cp < 0xA0)
423 {
424 if (unorm_quickCheck(&cp, 1, UNORM_NFD, &error) != UNORM_YES)
425 {
426 log_err("ERROR in NFD quick check at U+%04x\n", cp);
427 return;
428 }
429 if (unorm_quickCheck(&cp, 1, UNORM_NFC, &error) !=
430 UNORM_YES)
431 {
432 log_err("ERROR in NFC quick check at U+%04x\n", cp);
433 return;
434 }
435 if (unorm_quickCheck(&cp, 1, UNORM_NFKD, &error) != UNORM_YES)
436 {
437 log_err("ERROR in NFKD quick check at U+%04x\n", cp);
438 return;
439 }
440 if (unorm_quickCheck(&cp, 1, UNORM_NFKC, &error) !=
441 UNORM_YES)
442 {
443 log_err("ERROR in NFKC quick check at U+%04x\n", cp);
444 return;
445 }
446 cp ++;
447 }
448
449 for (; count < SIZE; count ++)
450 {
451 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
452 UNORM_YES)
453 {
454 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
455 return;
456 }
457 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error)
458 != UNORM_YES)
459 {
460 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
461 return;
462 }
463 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
464 UNORM_YES)
465 {
466 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
467 return;
468 }
469 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
470 UNORM_YES)
471 {
472 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
473 return;
474 }
475 }
476 }
477
478 static void TestQuickCheckResultMAYBE()
479 {
480 const UChar CPNFC[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
481 0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
482 const UChar CPNFKC[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
483 0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
484
485
486 const int SIZE = 10;
487
488 int count = 0;
489 UErrorCode error = U_ZERO_ERROR;
490
491 /* NFD and NFKD does not have any MAYBE codepoints */
492 for (; count < SIZE; count ++)
493 {
494 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
495 UNORM_MAYBE)
496 {
497 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
498 return;
499 }
500 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
501 UNORM_MAYBE)
502 {
503 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
504 return;
505 }
506 }
507 }
508
509 static void TestQuickCheckStringResult()
510 {
511 int count;
512 UChar *d = NULL;
513 UChar *c = NULL;
514 UErrorCode error = U_ZERO_ERROR;
515
516 for (count = 0; count < LENGTHOF(canonTests); count ++)
517 {
518 d = CharsToUChars(canonTests[count][1]);
519 c = CharsToUChars(canonTests[count][2]);
520 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFD, &error) !=
521 UNORM_YES)
522 {
523 log_err("ERROR in NFD quick check for string at count %d\n", count);
524 return;
525 }
526
527 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFC, &error) ==
528 UNORM_NO)
529 {
530 log_err("ERROR in NFC quick check for string at count %d\n", count);
531 return;
532 }
533
534 free(d);
535 free(c);
536 }
537
538 for (count = 0; count < LENGTHOF(compatTests); count ++)
539 {
540 d = CharsToUChars(compatTests[count][1]);
541 c = CharsToUChars(compatTests[count][2]);
542 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFKD, &error) !=
543 UNORM_YES)
544 {
545 log_err("ERROR in NFKD quick check for string at count %d\n", count);
546 return;
547 }
548
549 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFKC, &error) !=
550 UNORM_YES)
551 {
552 log_err("ERROR in NFKC quick check for string at count %d\n", count);
553 return;
554 }
555
556 free(d);
557 free(c);
558 }
559 }
560
561 void TestQuickCheck()
562 {
563 TestQuickCheckResultNO();
564 TestQuickCheckResultYES();
565 TestQuickCheckResultMAYBE();
566 TestQuickCheckStringResult();
567 }
568
569 /*
570 * The intltest/NormalizerConformanceTest tests a lot of strings that _are_
571 * normalized, and some that are not.
572 * Here we pick some specific cases and test the C API.
573 */
574 static void TestIsNormalized(void) {
575 static const UChar notNFC[][8]={ /* strings that are not in NFC */
576 { 0x62, 0x61, 0x300, 0x63, 0 }, /* 0061 0300 compose */
577 { 0xfb1d, 0 }, /* excluded from composition */
578 { 0x0627, 0x0653, 0 }, /* 0627 0653 compose */
579 { 0x3071, 0x306f, 0x309a, 0x3073, 0 } /* 306F 309A compose */
580 };
581 static const UChar notNFKC[][8]={ /* strings that are not in NFKC */
582 { 0x1100, 0x1161, 0 }, /* Jamo compose */
583 { 0x1100, 0x314f, 0 }, /* compatibility Jamo compose */
584 { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 } /* 1F00 0345 compose */
585 };
586
587 int32_t i;
588 UErrorCode errorCode;
589
590 /* API test */
591
592 /* normal case with length>=0 (length -1 used for special cases below) */
593 errorCode=U_ZERO_ERROR;
594 if(!unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
595 log_err("error: !isNormalized(<U+0300>, NFC) (%s)\n", u_errorName(errorCode));
596 }
597
598 /* incoming U_FAILURE */
599 errorCode=U_TRUNCATED_CHAR_FOUND;
600 (void)unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode);
601 if(errorCode!=U_TRUNCATED_CHAR_FOUND) {
602 log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode));
603 }
604
605 /* NULL source */
606 errorCode=U_ZERO_ERROR;
607 (void)unorm_isNormalized(NULL, 1, UNORM_NFC, &errorCode);
608 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
609 log_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s\n", u_errorName(errorCode));
610 }
611
612 /* bad length */
613 errorCode=U_ZERO_ERROR;
614 (void)unorm_isNormalized(notNFC[0]+2, -2, UNORM_NFC, &errorCode);
615 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
616 log_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s\n", u_errorName(errorCode));
617 }
618
619 /* specific cases */
620 for(i=0; i<LENGTHOF(notNFC); ++i) {
621 errorCode=U_ZERO_ERROR;
622 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
623 log_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s)\n", i, u_errorName(errorCode));
624 }
625 errorCode=U_ZERO_ERROR;
626 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
627 log_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s)\n", i, u_errorName(errorCode));
628 }
629 }
630 for(i=0; i<LENGTHOF(notNFKC); ++i) {
631 errorCode=U_ZERO_ERROR;
632 if(unorm_isNormalized(notNFKC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
633 log_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s)\n", i, u_errorName(errorCode));
634 }
635 }
636 }
637
638 void TestCheckFCD()
639 {
640 UErrorCode status = U_ZERO_ERROR;
641 static const UChar FAST_[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
642 0x0A};
643 static const UChar FALSE_[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
644 0x02B9, 0x0314, 0x0315, 0x0316};
645 static const UChar TRUE_[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
646 0x0050, 0x0730, 0x09EE, 0x1E10};
647
648 static const UChar datastr[][5] =
649 { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
650 {0x0061, 0x030A, 0x00E2, 0x0323, 0},
651 {0x0061, 0x0323, 0x00E2, 0x0323, 0},
652 {0x0061, 0x0323, 0x1E05, 0x0302, 0} };
653 static const UBool result[] = {UNORM_YES, UNORM_NO, UNORM_NO, UNORM_YES};
654
655 static const UChar datachar[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
656 0x6a,
657 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
658 0xea,
659 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
660 0x0307, 0x0308, 0x0309, 0x030a,
661 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
662 0x0327, 0x0328, 0x0329, 0x032a,
663 0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
664 0x1e07, 0x1e08, 0x1e09, 0x1e0a};
665
666 int count = 0;
667
668 if (unorm_quickCheck(FAST_, 10, UNORM_FCD, &status) != UNORM_YES)
669 log_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES\n");
670 if (unorm_quickCheck(FALSE_, 10, UNORM_FCD, &status) != UNORM_NO)
671 log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n");
672 if (unorm_quickCheck(TRUE_, 10, UNORM_FCD, &status) != UNORM_YES)
673 log_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES\n");
674
675 if (U_FAILURE(status))
676 log_err("unorm_quickCheck(FCD) failed: %s\n", u_errorName(status));
677
678 while (count < 4)
679 {
680 UBool fcdresult = unorm_quickCheck(datastr[count], 4, UNORM_FCD, &status);
681 if (U_FAILURE(status)) {
682 log_err("unorm_quickCheck(FCD) failed: exception occured at data set %d\n", count);
683 break;
684 }
685 else {
686 if (result[count] != fcdresult) {
687 log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count,
688 result[count]);
689 }
690 }
691 count ++;
692 }
693
694 /* random checks of long strings */
695 status = U_ZERO_ERROR;
696 srand((unsigned)time( NULL ));
697
698 for (count = 0; count < 50; count ++)
699 {
700 int size = 0;
701 UBool testresult = UNORM_YES;
702 UChar data[20];
703 UChar norm[100];
704 UChar nfd[100];
705 int normsize = 0;
706 int nfdsize = 0;
707
708 while (size != 19) {
709 data[size] = datachar[(rand() * 50) / RAND_MAX];
710 log_verbose("0x%x", data[size]);
711 normsize += unorm_normalize(data + size, 1, UNORM_NFD, 0,
712 norm + normsize, 100 - normsize, &status);
713 if (U_FAILURE(status)) {
714 log_err("unorm_quickCheck(FCD) failed: exception occured at data generation\n");
715 break;
716 }
717 size ++;
718 }
719 log_verbose("\n");
720
721 nfdsize = unorm_normalize(data, size, UNORM_NFD, 0,
722 nfd, 100, &status);
723 if (U_FAILURE(status)) {
724 log_err("unorm_quickCheck(FCD) failed: exception occured at normalized data generation\n");
725 }
726
727 if (nfdsize != normsize || u_memcmp(nfd, norm, nfdsize) != 0) {
728 testresult = UNORM_NO;
729 }
730 if (testresult == UNORM_YES) {
731 log_verbose("result UNORM_YES\n");
732 }
733 else {
734 log_verbose("result UNORM_NO\n");
735 }
736
737 if (unorm_quickCheck(data, size, UNORM_FCD, &status) != testresult || U_FAILURE(status)) {
738 log_err("unorm_quickCheck(FCD) failed: expected %d for random data\n", testresult);
739 }
740 }
741 }
742
743 static void
744 TestAPI() {
745 static const UChar in[]={ 0x68, 0xe4 };
746 UChar out[20]={ 0xffff, 0xffff, 0xffff, 0xffff };
747 UErrorCode errorCode;
748 int32_t length;
749
750 /* try preflighting */
751 errorCode=U_ZERO_ERROR;
752 length=unorm_normalize(in, 2, UNORM_NFD, 0, NULL, 0, &errorCode);
753 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
754 log_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s\n", length, u_errorName(errorCode));
755 return;
756 }
757
758 errorCode=U_ZERO_ERROR;
759 length=unorm_normalize(in, 2, UNORM_NFD, 0, out, 3, &errorCode);
760 if(U_FAILURE(errorCode)) {
761 log_err("unorm_normalize(NFD)=%ld failed with %s\n", length, u_errorName(errorCode));
762 return;
763 }
764 if(length!=3 || out[2]!=0x308 || out[3]!=0xffff) {
765 log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length, out[0], out[1], out[2], out[3]);
766 return;
767 }
768 }
769
770 /* test cases to improve test code coverage */
771 enum {
772 HANGUL_K_KIYEOK=0x3131, /* NFKD->Jamo L U+1100 */
773 HANGUL_K_WEO=0x315d, /* NFKD->Jamo V U+116f */
774 HANGUL_K_KIYEOK_SIOS=0x3133, /* NFKD->Jamo T U+11aa */
775
776 HANGUL_KIYEOK=0x1100, /* Jamo L U+1100 */
777 HANGUL_WEO=0x116f, /* Jamo V U+116f */
778 HANGUL_KIYEOK_SIOS=0x11aa, /* Jamo T U+11aa */
779
780 HANGUL_AC00=0xac00, /* Hangul syllable = Jamo LV U+ac00 */
781 HANGUL_SYLLABLE=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */
782
783 MUSICAL_VOID_NOTEHEAD=0x1d157,
784 MUSICAL_HALF_NOTE=0x1d15e, /* NFC/NFD->Notehead+Stem */
785 MUSICAL_STEM=0x1d165, /* cc=216 */
786 MUSICAL_STACCATO=0x1d17c /* cc=220 */
787 };
788
789 static void
790 TestNormCoverage() {
791 static UChar input[2000], expect[3000], output[3000];
792 UErrorCode errorCode;
793 int32_t i, length, inLength, expectLength, hangulPrefixLength, preflightLength;
794
795 /* create a long and nasty string with NFKC-unsafe characters */
796 inLength=0;
797
798 /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */
799 input[inLength++]=HANGUL_KIYEOK;
800 input[inLength++]=HANGUL_WEO;
801 input[inLength++]=HANGUL_KIYEOK_SIOS;
802
803 input[inLength++]=HANGUL_KIYEOK;
804 input[inLength++]=HANGUL_WEO;
805 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
806
807 input[inLength++]=HANGUL_KIYEOK;
808 input[inLength++]=HANGUL_K_WEO;
809 input[inLength++]=HANGUL_KIYEOK_SIOS;
810
811 input[inLength++]=HANGUL_KIYEOK;
812 input[inLength++]=HANGUL_K_WEO;
813 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
814
815 input[inLength++]=HANGUL_K_KIYEOK;
816 input[inLength++]=HANGUL_WEO;
817 input[inLength++]=HANGUL_KIYEOK_SIOS;
818
819 input[inLength++]=HANGUL_K_KIYEOK;
820 input[inLength++]=HANGUL_WEO;
821 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
822
823 input[inLength++]=HANGUL_K_KIYEOK;
824 input[inLength++]=HANGUL_K_WEO;
825 input[inLength++]=HANGUL_KIYEOK_SIOS;
826
827 input[inLength++]=HANGUL_K_KIYEOK;
828 input[inLength++]=HANGUL_K_WEO;
829 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
830
831 /* Hangul LV with normal/compatibility Jamo T */
832 input[inLength++]=HANGUL_AC00;
833 input[inLength++]=HANGUL_KIYEOK_SIOS;
834
835 input[inLength++]=HANGUL_AC00;
836 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
837
838 /* compatibility Jamo L, V */
839 input[inLength++]=HANGUL_K_KIYEOK;
840 input[inLength++]=HANGUL_K_WEO;
841
842 hangulPrefixLength=inLength;
843
844 input[inLength++]=UTF16_LEAD(MUSICAL_HALF_NOTE);
845 input[inLength++]=UTF16_TRAIL(MUSICAL_HALF_NOTE);
846 for(i=0; i<200; ++i) {
847 input[inLength++]=UTF16_LEAD(MUSICAL_STACCATO);
848 input[inLength++]=UTF16_TRAIL(MUSICAL_STACCATO);
849 input[inLength++]=UTF16_LEAD(MUSICAL_STEM);
850 input[inLength++]=UTF16_TRAIL(MUSICAL_STEM);
851 }
852
853 /* (compatibility) Jamo L, T do not compose */
854 input[inLength++]=HANGUL_K_KIYEOK;
855 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
856
857 /* quick checks */
858 errorCode=U_ZERO_ERROR;
859 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFD, &errorCode) || U_FAILURE(errorCode)) {
860 log_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s)\n", u_errorName(errorCode));
861 }
862 errorCode=U_ZERO_ERROR;
863 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKD, &errorCode) || U_FAILURE(errorCode)) {
864 log_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s)\n", u_errorName(errorCode));
865 }
866 errorCode=U_ZERO_ERROR;
867 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
868 log_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s)\n", u_errorName(errorCode));
869 }
870 errorCode=U_ZERO_ERROR;
871 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
872 log_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s)\n", u_errorName(errorCode));
873 }
874 errorCode=U_ZERO_ERROR;
875 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_FCD, &errorCode) || U_FAILURE(errorCode)) {
876 log_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s)\n", u_errorName(errorCode));
877 }
878
879 /* NFKC */
880 expectLength=0;
881 expect[expectLength++]=HANGUL_SYLLABLE;
882
883 expect[expectLength++]=HANGUL_SYLLABLE;
884
885 expect[expectLength++]=HANGUL_SYLLABLE;
886
887 expect[expectLength++]=HANGUL_SYLLABLE;
888
889 expect[expectLength++]=HANGUL_SYLLABLE;
890
891 expect[expectLength++]=HANGUL_SYLLABLE;
892
893 expect[expectLength++]=HANGUL_SYLLABLE;
894
895 expect[expectLength++]=HANGUL_SYLLABLE;
896
897 expect[expectLength++]=HANGUL_AC00+3;
898
899 expect[expectLength++]=HANGUL_AC00+3;
900
901 expect[expectLength++]=HANGUL_AC00+14*28;
902
903 expect[expectLength++]=UTF16_LEAD(MUSICAL_VOID_NOTEHEAD);
904 expect[expectLength++]=UTF16_TRAIL(MUSICAL_VOID_NOTEHEAD);
905 expect[expectLength++]=UTF16_LEAD(MUSICAL_STEM);
906 expect[expectLength++]=UTF16_TRAIL(MUSICAL_STEM);
907 for(i=0; i<200; ++i) {
908 expect[expectLength++]=UTF16_LEAD(MUSICAL_STEM);
909 expect[expectLength++]=UTF16_TRAIL(MUSICAL_STEM);
910 }
911 for(i=0; i<200; ++i) {
912 expect[expectLength++]=UTF16_LEAD(MUSICAL_STACCATO);
913 expect[expectLength++]=UTF16_TRAIL(MUSICAL_STACCATO);
914 }
915
916 expect[expectLength++]=HANGUL_KIYEOK;
917 expect[expectLength++]=HANGUL_KIYEOK_SIOS;
918
919 /* try destination overflow first */
920 errorCode=U_ZERO_ERROR;
921 preflightLength=unorm_normalize(input, inLength,
922 UNORM_NFKC, 0,
923 output, 100, /* too short */
924 &errorCode);
925 if(errorCode!=U_BUFFER_OVERFLOW_ERROR) {
926 log_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s\n", u_errorName(errorCode));
927 }
928
929 /* real NFKC */
930 errorCode=U_ZERO_ERROR;
931 length=unorm_normalize(input, inLength,
932 UNORM_NFKC, 0,
933 output, sizeof(output)/U_SIZEOF_UCHAR,
934 &errorCode);
935 if(U_FAILURE(errorCode)) {
936 log_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s\n", u_errorName(errorCode));
937 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
938 log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n");
939 for(i=0; i<length; ++i) {
940 if(output[i]!=expect[i]) {
941 log_err(" NFKC[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
942 break;
943 }
944 }
945 }
946 if(length!=preflightLength) {
947 log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length, preflightLength);
948 }
949
950 /* FCD */
951 u_memcpy(expect, input, hangulPrefixLength);
952 expectLength=hangulPrefixLength;
953
954 expect[expectLength++]=UTF16_LEAD(MUSICAL_VOID_NOTEHEAD);
955 expect[expectLength++]=UTF16_TRAIL(MUSICAL_VOID_NOTEHEAD);
956 expect[expectLength++]=UTF16_LEAD(MUSICAL_STEM);
957 expect[expectLength++]=UTF16_TRAIL(MUSICAL_STEM);
958 for(i=0; i<200; ++i) {
959 expect[expectLength++]=UTF16_LEAD(MUSICAL_STEM);
960 expect[expectLength++]=UTF16_TRAIL(MUSICAL_STEM);
961 }
962 for(i=0; i<200; ++i) {
963 expect[expectLength++]=UTF16_LEAD(MUSICAL_STACCATO);
964 expect[expectLength++]=UTF16_TRAIL(MUSICAL_STACCATO);
965 }
966
967 expect[expectLength++]=HANGUL_K_KIYEOK;
968 expect[expectLength++]=HANGUL_K_KIYEOK_SIOS;
969
970 errorCode=U_ZERO_ERROR;
971 length=unorm_normalize(input, inLength,
972 UNORM_FCD, 0,
973 output, sizeof(output)/U_SIZEOF_UCHAR,
974 &errorCode);
975 if(U_FAILURE(errorCode)) {
976 log_err("error unorm_normalize(long input, UNORM_FCD) failed with %s\n", u_errorName(errorCode));
977 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
978 log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n");
979 for(i=0; i<length; ++i) {
980 if(output[i]!=expect[i]) {
981 log_err(" FCD[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
982 break;
983 }
984 }
985 }
986 }
987
988 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */
989 static void
990 TestConcatenate(void) {
991 /* "re + 'sume'" */
992 static const UChar
993 left[]={
994 0x72, 0x65, 0
995 },
996 right[]={
997 0x301, 0x73, 0x75, 0x6d, 0xe9, 0
998 },
999 expect[]={
1000 0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0
1001 };
1002
1003 UChar buffer[100];
1004 UErrorCode errorCode;
1005 int32_t length;
1006
1007 /* left with length, right NUL-terminated */
1008 errorCode=U_ZERO_ERROR;
1009 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1010 if(U_FAILURE(errorCode) || length!=6 || 0!=u_memcmp(buffer, expect, length)) {
1011 log_err("error: unorm_concatenate()=%ld (expect 6) failed with %s\n", length, u_errorName(errorCode));
1012 }
1013
1014 /* preflighting */
1015 errorCode=U_ZERO_ERROR;
1016 length=unorm_concatenate(left, 2, right, -1, NULL, 0, UNORM_NFC, 0, &errorCode);
1017 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6) {
1018 log_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s\n", length, u_errorName(errorCode));
1019 }
1020
1021 buffer[2]=0x5555;
1022 errorCode=U_ZERO_ERROR;
1023 length=unorm_concatenate(left, 2, right, -1, buffer, 1, UNORM_NFC, 0, &errorCode);
1024 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6 || buffer[2]!=0x5555) {
1025 log_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s\n", length, u_errorName(errorCode));
1026 }
1027
1028 /* enter with U_FAILURE */
1029 buffer[2]=0xaaaa;
1030 errorCode=U_UNEXPECTED_TOKEN;
1031 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1032 if(errorCode!=U_UNEXPECTED_TOKEN || buffer[2]!=0xaaaa) {
1033 log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length, u_errorName(errorCode));
1034 }
1035
1036 /* illegal arguments */
1037 buffer[2]=0xaaaa;
1038 errorCode=U_ZERO_ERROR;
1039 length=unorm_concatenate(NULL, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1040 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || buffer[2]!=0xaaaa) {
1041 log_err("error: unorm_concatenate(left=NULL)=%ld failed with %s\n", length, u_errorName(errorCode));
1042 }
1043
1044 errorCode=U_ZERO_ERROR;
1045 length=unorm_concatenate(left, 2, right, -1, NULL, 100, UNORM_NFC, 0, &errorCode);
1046 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1047 log_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s\n", length, u_errorName(errorCode));
1048 }
1049 }
1050
1051 enum {
1052 _PLUS=0x2b
1053 };
1054
1055 static const char *const _modeString[UNORM_MODE_COUNT]={
1056 "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD"
1057 };
1058
1059 static void
1060 _testIter(const UChar *src, int32_t srcLength,
1061 UCharIterator *iter, UNormalizationMode mode, UBool forward,
1062 const UChar *out, int32_t outLength,
1063 const int32_t *srcIndexes, int32_t srcIndexesLength) {
1064 UChar buffer[4];
1065 const UChar *expect, *outLimit, *in;
1066 int32_t length, i, expectLength, expectIndex, prevIndex, index, inLength;
1067 UErrorCode errorCode;
1068 UBool neededToNormalize, expectNeeded;
1069
1070 errorCode=U_ZERO_ERROR;
1071 outLimit=out+outLength;
1072 if(forward) {
1073 expect=out;
1074 i=index=0;
1075 } else {
1076 expect=outLimit;
1077 i=srcIndexesLength-2;
1078 index=srcLength;
1079 }
1080
1081 for(;;) {
1082 prevIndex=index;
1083 if(forward) {
1084 if(!iter->hasNext(iter)) {
1085 return;
1086 }
1087 length=unorm_next(iter,
1088 buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1089 mode, 0,
1090 (UBool)(out!=NULL), &neededToNormalize,
1091 &errorCode);
1092 expectIndex=srcIndexes[i+1];
1093 in=src+prevIndex;
1094 inLength=expectIndex-prevIndex;
1095
1096 if(out!=NULL) {
1097 /* get output piece from between plus signs */
1098 expectLength=0;
1099 while((expect+expectLength)!=outLimit && expect[expectLength]!=_PLUS) {
1100 ++expectLength;
1101 }
1102 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1103 } else {
1104 expect=in;
1105 expectLength=inLength;
1106 expectNeeded=FALSE;
1107 }
1108 } else {
1109 if(!iter->hasPrevious(iter)) {
1110 return;
1111 }
1112 length=unorm_previous(iter,
1113 buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1114 mode, 0,
1115 (UBool)(out!=NULL), &neededToNormalize,
1116 &errorCode);
1117 expectIndex=srcIndexes[i];
1118 in=src+expectIndex;
1119 inLength=prevIndex-expectIndex;
1120
1121 if(out!=NULL) {
1122 /* get output piece from between plus signs */
1123 expectLength=0;
1124 while(expect!=out && expect[-1]!=_PLUS) {
1125 ++expectLength;
1126 --expect;
1127 }
1128 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1129 } else {
1130 expect=in;
1131 expectLength=inLength;
1132 expectNeeded=FALSE;
1133 }
1134 }
1135 index=iter->getIndex(iter, UITER_CURRENT);
1136
1137 if(U_FAILURE(errorCode)) {
1138 log_err("error unorm iteration (next/previous %d %s)[%d]: %s\n",
1139 forward, _modeString[mode], i, u_errorName(errorCode));
1140 return;
1141 }
1142 if(expectIndex!=index) {
1143 log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n",
1144 forward, _modeString[mode], i, index, expectIndex);
1145 return;
1146 }
1147 if(expectLength!=length) {
1148 log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n",
1149 forward, _modeString[mode], i, length, expectLength);
1150 return;
1151 }
1152 if(0!=u_memcmp(expect, buffer, length)) {
1153 log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n",
1154 forward, _modeString[mode], i);
1155 return;
1156 }
1157 if(neededToNormalize!=expectNeeded) {
1158 }
1159
1160 if(forward) {
1161 expect+=expectLength+1; /* go after the + */
1162 ++i;
1163 } else {
1164 --expect; /* go before the + */
1165 --i;
1166 }
1167 }
1168 }
1169
1170 static void
1171 TestNextPrevious() {
1172 static const UChar
1173 src[]={ /* input string */
1174 0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133
1175 },
1176 nfd[]={ /* + separates expected output pieces */
1177 0xa0, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x3133
1178 },
1179 nfkd[]={
1180 0x20, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x11aa
1181 },
1182 nfc[]={
1183 0xa0, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1184 },
1185 nfkc[]={
1186 0x20, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac03
1187 },
1188 fcd[]={
1189 0xa0, _PLUS, 0xe4, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1190 };
1191
1192 /* expected iterator indexes in the source string for each iteration piece */
1193 static const int32_t
1194 nfdIndexes[]={
1195 0, 1, 2, 5, 6, 7
1196 },
1197 nfkdIndexes[]={
1198 0, 1, 2, 5, 6, 7
1199 },
1200 nfcIndexes[]={
1201 0, 1, 2, 5, 6, 7
1202 },
1203 nfkcIndexes[]={
1204 0, 1, 2, 5, 7
1205 },
1206 fcdIndexes[]={
1207 0, 1, 2, 5, 6, 7
1208 };
1209
1210 UCharIterator iter;
1211
1212 UChar buffer[4];
1213 int32_t length;
1214
1215 UBool neededToNormalize;
1216 UErrorCode errorCode;
1217
1218 uiter_setString(&iter, src, sizeof(src)/U_SIZEOF_UCHAR);
1219
1220 /* test iteration with doNormalize */
1221 iter.index=0;
1222 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, TRUE, nfd, sizeof(nfd)/U_SIZEOF_UCHAR, nfdIndexes, sizeof(nfdIndexes)/4);
1223 iter.index=0;
1224 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, TRUE, nfkd, sizeof(nfkd)/U_SIZEOF_UCHAR, nfkdIndexes, sizeof(nfkdIndexes)/4);
1225 iter.index=0;
1226 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, TRUE, nfc, sizeof(nfc)/U_SIZEOF_UCHAR, nfcIndexes, sizeof(nfcIndexes)/4);
1227 iter.index=0;
1228 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, TRUE, nfkc, sizeof(nfkc)/U_SIZEOF_UCHAR, nfkcIndexes, sizeof(nfkcIndexes)/4);
1229 iter.index=0;
1230 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, TRUE, fcd, sizeof(fcd)/U_SIZEOF_UCHAR, fcdIndexes, sizeof(fcdIndexes)/4);
1231
1232 iter.index=iter.length;
1233 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, FALSE, nfd, sizeof(nfd)/U_SIZEOF_UCHAR, nfdIndexes, sizeof(nfdIndexes)/4);
1234 iter.index=iter.length;
1235 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, FALSE, nfkd, sizeof(nfkd)/U_SIZEOF_UCHAR, nfkdIndexes, sizeof(nfkdIndexes)/4);
1236 iter.index=iter.length;
1237 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, FALSE, nfc, sizeof(nfc)/U_SIZEOF_UCHAR, nfcIndexes, sizeof(nfcIndexes)/4);
1238 iter.index=iter.length;
1239 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, FALSE, nfkc, sizeof(nfkc)/U_SIZEOF_UCHAR, nfkcIndexes, sizeof(nfkcIndexes)/4);
1240 iter.index=iter.length;
1241 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, FALSE, fcd, sizeof(fcd)/U_SIZEOF_UCHAR, fcdIndexes, sizeof(fcdIndexes)/4);
1242
1243 /* test iteration without doNormalize */
1244 iter.index=0;
1245 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, TRUE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1246 iter.index=0;
1247 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, TRUE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1248 iter.index=0;
1249 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, TRUE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1250 iter.index=0;
1251 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, TRUE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1252 iter.index=0;
1253 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, TRUE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1254
1255 iter.index=iter.length;
1256 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, FALSE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1257 iter.index=iter.length;
1258 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, FALSE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1259 iter.index=iter.length;
1260 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, FALSE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1261 iter.index=iter.length;
1262 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, FALSE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1263 iter.index=iter.length;
1264 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, FALSE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1265
1266 /* try without neededToNormalize */
1267 errorCode=U_ZERO_ERROR;
1268 buffer[0]=5;
1269 iter.index=1;
1270 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1271 UNORM_NFD, 0, TRUE, NULL,
1272 &errorCode);
1273 if(U_FAILURE(errorCode) || length!=2 || buffer[0]!=nfd[2] || buffer[1]!=nfd[3]) {
1274 log_err("error unorm_next(without needed) %s\n", u_errorName(errorCode));
1275 return;
1276 }
1277
1278 /* preflight */
1279 neededToNormalize=9;
1280 iter.index=1;
1281 length=unorm_next(&iter, NULL, 0,
1282 UNORM_NFD, 0, TRUE, &neededToNormalize,
1283 &errorCode);
1284 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2) {
1285 log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode));
1286 return;
1287 }
1288
1289 errorCode=U_ZERO_ERROR;
1290 buffer[0]=buffer[1]=5;
1291 neededToNormalize=9;
1292 iter.index=1;
1293 length=unorm_next(&iter, buffer, 1,
1294 UNORM_NFD, 0, TRUE, &neededToNormalize,
1295 &errorCode);
1296 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2 || buffer[1]!=5) {
1297 log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode));
1298 return;
1299 }
1300
1301 /* no iterator */
1302 errorCode=U_ZERO_ERROR;
1303 buffer[0]=buffer[1]=5;
1304 neededToNormalize=9;
1305 iter.index=1;
1306 length=unorm_next(NULL, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1307 UNORM_NFD, 0, TRUE, &neededToNormalize,
1308 &errorCode);
1309 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1310 log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode));
1311 return;
1312 }
1313
1314 /* illegal mode */
1315 buffer[0]=buffer[1]=5;
1316 neededToNormalize=9;
1317 iter.index=1;
1318 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1319 (UNormalizationMode)0, 0, TRUE, &neededToNormalize,
1320 &errorCode);
1321 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1322 log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode));
1323 return;
1324 }
1325
1326 /* error coming in */
1327 errorCode=U_MISPLACED_QUANTIFIER;
1328 buffer[0]=5;
1329 iter.index=1;
1330 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1331 UNORM_NFD, 0, TRUE, NULL,
1332 &errorCode);
1333 if(errorCode!=U_MISPLACED_QUANTIFIER) {
1334 log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode));
1335 return;
1336 }
1337
1338 /* missing pErrorCode */
1339 buffer[0]=5;
1340 iter.index=1;
1341 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1342 UNORM_NFD, 0, TRUE, NULL,
1343 NULL);
1344 if(iter.index!=1 || buffer[0]!=5) {
1345 log_err("error unorm_next(pErrorCode==NULL) %s\n", u_errorName(errorCode));
1346 return;
1347 }
1348 }
1349
1350 static void
1351 TestFCNFKCClosure(void) {
1352 static const struct {
1353 UChar32 c;
1354 const UChar s[6];
1355 } tests[]={
1356 { 0x037A, { 0x0020, 0x03B9, 0 } },
1357 { 0x03D2, { 0x03C5, 0 } },
1358 { 0x20A8, { 0x0072, 0x0073, 0 } },
1359 { 0x210B, { 0x0068, 0 } },
1360 { 0x210C, { 0x0068, 0 } },
1361 { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } },
1362 { 0x2122, { 0x0074, 0x006D, 0 } },
1363 { 0x2128, { 0x007A, 0 } },
1364 { 0x1D5DB, { 0x0068, 0 } },
1365 { 0x1D5ED, { 0x007A, 0 } },
1366 { 0x0061, { 0 } }
1367 };
1368
1369 UChar buffer[8];
1370 UErrorCode errorCode;
1371 int32_t i, length;
1372
1373 for(i=0; i<LENGTHOF(tests); ++i) {
1374 errorCode=U_ZERO_ERROR;
1375 length=u_getFC_NFKC_Closure(tests[i].c, buffer, LENGTHOF(buffer), &errorCode);
1376 if(U_FAILURE(errorCode) || length!=u_strlen(buffer) || 0!=u_strcmp(tests[i].s, buffer)) {
1377 log_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s)\n", tests[i].c, u_errorName(errorCode));
1378 }
1379 }
1380
1381 /* error handling */
1382 errorCode=U_ZERO_ERROR;
1383 length=u_getFC_NFKC_Closure(0x5c, NULL, LENGTHOF(buffer), &errorCode);
1384 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1385 log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode));
1386 }
1387
1388 length=u_getFC_NFKC_Closure(0x5c, buffer, LENGTHOF(buffer), &errorCode);
1389 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1390 log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode));
1391 }
1392 }
1393
1394 static void
1395 TestQuickCheckPerCP() {
1396 UErrorCode errorCode;
1397 UChar32 c, lead, trail;
1398 UChar s[U16_MAX_LENGTH], nfd[16];
1399 int32_t length, lccc1, lccc2, tccc1, tccc2;
1400 UNormalizationCheckResult qc1, qc2;
1401
1402 if(
1403 u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1404 u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1405 u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1406 u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1407 u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) ||
1408 u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS)
1409 ) {
1410 log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n");
1411 }
1412
1413 /*
1414 * compare the quick check property values for some code points
1415 * to the quick check results for checking same-code point strings
1416 */
1417 errorCode=U_ZERO_ERROR;
1418 c=0;
1419 while(c<0x110000) {
1420 length=0;
1421 U16_APPEND_UNSAFE(s, length, c);
1422
1423 qc1=u_getIntPropertyValue(c, UCHAR_NFC_QUICK_CHECK);
1424 qc2=unorm_quickCheck(s, length, UNORM_NFC, &errorCode);
1425 if(qc1!=qc2) {
1426 log_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x\n", qc1, qc2, c);
1427 }
1428
1429 qc1=u_getIntPropertyValue(c, UCHAR_NFD_QUICK_CHECK);
1430 qc2=unorm_quickCheck(s, length, UNORM_NFD, &errorCode);
1431 if(qc1!=qc2) {
1432 log_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x\n", qc1, qc2, c);
1433 }
1434
1435 qc1=u_getIntPropertyValue(c, UCHAR_NFKC_QUICK_CHECK);
1436 qc2=unorm_quickCheck(s, length, UNORM_NFKC, &errorCode);
1437 if(qc1!=qc2) {
1438 log_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x\n", qc1, qc2, c);
1439 }
1440
1441 qc1=u_getIntPropertyValue(c, UCHAR_NFKD_QUICK_CHECK);
1442 qc2=unorm_quickCheck(s, length, UNORM_NFKD, &errorCode);
1443 if(qc1!=qc2) {
1444 log_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x\n", qc1, qc2, c);
1445 }
1446
1447 length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, LENGTHOF(nfd), &errorCode);
1448 U16_GET(nfd, 0, 0, length, lead);
1449 U16_GET(nfd, 0, length-1, length, trail);
1450
1451 lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS);
1452 lccc2=u_getCombiningClass(lead);
1453 tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS);
1454 tccc2=u_getCombiningClass(trail);
1455
1456 if(lccc1!=lccc2) {
1457 log_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
1458 lccc1, lccc2, c);
1459 }
1460 if(tccc1!=tccc2) {
1461 log_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
1462 tccc1, tccc2, c);
1463 }
1464
1465 /* skip some code points */
1466 c=(20*c)/19+1;
1467 }
1468 }
1469
1470 static void
1471 TestComposition(void) {
1472 static const struct {
1473 UNormalizationMode mode;
1474 uint32_t options;
1475 UChar input[12];
1476 UChar expect[12];
1477 } cases[]={
1478 /*
1479 * special cases for UAX #15 bug
1480 * see Unicode Public Review Issue #29
1481 * at http://www.unicode.org/review/resolved-pri.html#pri29
1482 */
1483 { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327 }, { 0x1100, 0x0300, 0x1161, 0x0327 } },
1484 { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } },
1485 { UNORM_NFC, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 }, { 0xac00, 0x0327, 0x0300, 0x11a8 } },
1486 { UNORM_NFC, 0, { 0x0b47, 0x0300, 0x0b3e }, { 0x0b47, 0x0300, 0x0b3e } },
1487
1488 { UNORM_NFC, UNORM_BEFORE_PRI_29, { 0x1100, 0x0300, 0x1161, 0x0327 }, { 0xac00, 0x0300, 0x0327 } },
1489 { UNORM_NFC, UNORM_BEFORE_PRI_29, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0xac01, 0x0300, 0x0327 } },
1490 { UNORM_NFC, UNORM_BEFORE_PRI_29, { 0xac00, 0x0300, 0x0327, 0x11a8 }, { 0xac01, 0x0327, 0x0300 } },
1491 { UNORM_NFC, UNORM_BEFORE_PRI_29, { 0x0b47, 0x0300, 0x0b3e }, { 0x0b4b, 0x0300 } }
1492
1493 /* TODO: add test cases for UNORM_FCC here (j2151) */
1494 };
1495
1496 UChar output[16];
1497 UErrorCode errorCode;
1498 int32_t i, length;
1499
1500 for(i=0; i<LENGTHOF(cases); ++i) {
1501 errorCode=U_ZERO_ERROR;
1502 length=unorm_normalize(
1503 cases[i].input, -1,
1504 cases[i].mode, cases[i].options,
1505 output, LENGTHOF(output),
1506 &errorCode);
1507 if( U_FAILURE(errorCode) ||
1508 length!=u_strlen(cases[i].expect) ||
1509 0!=u_memcmp(output, cases[i].expect, length)
1510 ) {
1511 log_err("unexpected result for case %d\n", i);
1512 }
1513 }
1514 }
1515
1516 #endif /* #if !UCONFIG_NO_NORMALIZATION */