]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/cnormtst.c
ICU-3.13.tar.gz
[apple/icu.git] / icuSources / test / cintltst / cnormtst.c
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2003, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
7 *
8 * File CNORMTST.C
9 *
10 * Modification History:
11 * Name Description
12 * Madhu Katragadda Ported for C API
13 * synwee added test for quick check
14 * synwee added test for checkFCD
15 *********************************************************************************/
16 /*tests for u_normalization*/
17 #include "unicode/utypes.h"
18 #include "cintltst.h"
19
20 #if UCONFIG_NO_NORMALIZATION
21
22 void addNormTest(TestNode** root) {
23 /* no normalization - nothing to do */
24 }
25
26 #else
27
28 #include <stdlib.h>
29 #include <time.h>
30 #include "unicode/uchar.h"
31 #include "unicode/ustring.h"
32 #include "unicode/unorm.h"
33 #include "cnormtst.h"
34
35 #define ARRAY_LENGTH(array) (sizeof (array) / sizeof (*array))
36
37 static void
38 TestAPI(void);
39
40 static void
41 TestNormCoverage(void);
42
43 static void
44 TestConcatenate(void);
45
46 static void
47 TestNextPrevious(void);
48
49 static void TestIsNormalized(void);
50
51 static void
52 TestFCNFKCClosure(void);
53
54 const static char* canonTests[][3] = {
55 /* Input*/ /*Decomposed*/ /*Composed*/
56 { "cat", "cat", "cat" },
57 { "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark", },
58
59 { "\\u1e0a", "D\\u0307", "\\u1e0a" }, /* D-dot_above*/
60 { "D\\u0307", "D\\u0307", "\\u1e0a" }, /* D dot_above*/
61
62 { "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_below dot_above*/
63 { "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D-dot_above dot_below */
64 { "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" }, /* D dot_below dot_above */
65
66 { "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307", "\\u1e10\\u0323\\u0307" }, /*D dot_below cedilla dot_above*/
67 { "D\\u0307\\u0328\\u0323", "D\\u0328\\u0323\\u0307", "\\u1e0c\\u0328\\u0307" }, /* D dot_above ogonek dot_below*/
68
69 { "\\u1E14", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron-grave*/
70 { "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" }, /* E-macron + grave*/
71 { "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" }, /* E-grave + macron*/
72
73 { "\\u212b", "A\\u030a", "\\u00c5" }, /* angstrom_sign*/
74 { "\\u00c5", "A\\u030a", "\\u00c5" }, /* A-ring*/
75
76 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
77 { "\\u00C4\\uFB03n", "A\\u0308\\uFB03n", "\\u00C4\\uFB03n" },
78
79 { "Henry IV", "Henry IV", "Henry IV" },
80 { "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" },
81
82 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
83 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
84 { "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" }, /* hw_ka + hw_ten*/
85 { "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" }, /* ka + hw_ten*/
86 { "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" }, /* hw_ka + ten*/
87 { "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" } /* hw_ka + ten*/
88 };
89
90 const static char* compatTests[][3] = {
91 /* Input*/ /*Decomposed */ /*Composed*/
92 { "cat", "cat", "cat" },
93
94 { "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" }, /* Alef-Lamed vs. Alef, Lamed*/
95
96 { "\\u00C4ffin", "A\\u0308ffin", "\\u00C4ffin" },
97 { "\\u00C4\\uFB03n", "A\\u0308ffin", "\\u00C4ffin" }, /* ffi ligature -> f + f + i*/
98
99 { "Henry IV", "Henry IV", "Henry IV" },
100 { "Henry \\u2163", "Henry IV", "Henry IV" },
101
102 { "\\u30AC", "\\u30AB\\u3099", "\\u30AC" }, /* ga (Katakana)*/
103 { "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /*ka + ten*/
104
105 { "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + ten*/
106
107 /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
108 { "\\uFF76\\uFF9E", "\\u30AB\\u3099", "\\u30AC" }, /* hw_ka + hw_ten*/
109 { "\\u30AB\\uFF9E", "\\u30AB\\u3099", "\\u30AC" } /* ka + hw_ten*/
110
111 };
112
113 void addNormTest(TestNode** root);
114
115 void addNormTest(TestNode** root)
116 {
117 addTest(root, &TestAPI, "tscoll/cnormtst/TestAPI");
118 addTest(root, &TestDecomp, "tscoll/cnormtst/TestDecomp");
119 addTest(root, &TestCompatDecomp, "tscoll/cnormtst/TestCompatDecomp");
120 addTest(root, &TestCanonDecompCompose, "tscoll/cnormtst/TestCanonDecompCompose");
121 addTest(root, &TestCompatDecompCompose, "tscoll/cnormtst/CompatDecompCompose");
122 addTest(root, &TestNull, "tscoll/cnormtst/TestNull");
123 addTest(root, &TestQuickCheck, "tscoll/cnormtst/TestQuickCheck");
124 addTest(root, &TestIsNormalized, "tscoll/cnormtst/TestIsNormalized");
125 addTest(root, &TestCheckFCD, "tscoll/cnormtst/TestCheckFCD");
126 addTest(root, &TestNormCoverage, "tscoll/cnormtst/TestNormCoverage");
127 addTest(root, &TestConcatenate, "tscoll/cnormtst/TestConcatenate");
128 addTest(root, &TestNextPrevious, "tscoll/cnormtst/TestNextPrevious");
129 addTest(root, &TestFCNFKCClosure, "tscoll/cnormtst/TestFCNFKCClosure");
130 }
131
132 void TestDecomp()
133 {
134 UErrorCode status = U_ZERO_ERROR;
135 int32_t x, neededLen, resLen;
136 UChar *source=NULL, *result=NULL;
137 status = U_ZERO_ERROR;
138 resLen=0;
139 log_verbose("Testing unorm_normalize with Decomp canonical\n");
140 for(x=0; x < ARRAY_LENGTH(canonTests); x++)
141 {
142 source=CharsToUChars(canonTests[x][0]);
143 neededLen= unorm_normalize(source, u_strlen(source), UNORM_NFD, 0, NULL, 0, &status);
144 if(status==U_BUFFER_OVERFLOW_ERROR)
145 {
146 status=U_ZERO_ERROR;
147 resLen=neededLen+1;
148 result=(UChar*)malloc(sizeof(UChar*) * resLen);
149 unorm_normalize(source, u_strlen(source), UNORM_NFD, 0, result, resLen, &status);
150 }
151 if(U_FAILURE(status)){
152 log_err("ERROR in unorm_normalize at %s: %s\n", austrdup(source), myErrorName(status) );
153 } else {
154 assertEqual(result, canonTests[x][1], x);
155 }
156 free(result);
157 free(source);
158 }
159 }
160
161 void TestCompatDecomp()
162 {
163 UErrorCode status = U_ZERO_ERROR;
164 int32_t x, neededLen, resLen;
165 UChar *source=NULL, *result=NULL;
166 status = U_ZERO_ERROR;
167 resLen=0;
168 log_verbose("Testing unorm_normalize with Decomp compat\n");
169 for(x=0; x < ARRAY_LENGTH(compatTests); x++)
170 {
171 source=CharsToUChars(compatTests[x][0]);
172 neededLen= unorm_normalize(source, u_strlen(source), UNORM_NFKD, 0, NULL, 0, &status);
173 if(status==U_BUFFER_OVERFLOW_ERROR)
174 {
175 status=U_ZERO_ERROR;
176 resLen=neededLen+1;
177 result=(UChar*)malloc(sizeof(UChar*) * resLen);
178 unorm_normalize(source, u_strlen(source), UNORM_NFKD, 0, result, resLen, &status);
179 }
180 if(U_FAILURE(status)){
181 log_err("ERROR in unorm_normalize at %s: %s\n", austrdup(source), myErrorName(status) );
182 } else {
183 assertEqual(result, compatTests[x][1], x);
184 }
185 free(result);
186 free(source);
187 }
188 }
189
190 void TestCanonDecompCompose()
191 {
192 UErrorCode status = U_ZERO_ERROR;
193 int32_t x, neededLen, resLen;
194 UChar *source=NULL, *result=NULL;
195 status = U_ZERO_ERROR;
196 resLen=0;
197 log_verbose("Testing unorm_normalize with Decomp can compose compat\n");
198 for(x=0; x < ARRAY_LENGTH(canonTests); x++)
199 {
200 source=CharsToUChars(canonTests[x][0]);
201 neededLen= unorm_normalize(source, u_strlen(source), UNORM_NFC, 0, NULL, 0, &status);
202 if(status==U_BUFFER_OVERFLOW_ERROR)
203 {
204 status=U_ZERO_ERROR;
205 resLen=neededLen+1;
206 result=(UChar*)malloc(sizeof(UChar*) * resLen);
207 unorm_normalize(source, u_strlen(source), UNORM_NFC, 0, result, resLen, &status);
208 }
209 if(U_FAILURE(status)){
210 log_err("ERROR in unorm_normalize at %s: %s\n", austrdup(source),myErrorName(status) );
211 } else {
212 assertEqual(result, canonTests[x][2], x);
213 }
214 free(result);
215 free(source);
216 }
217 }
218
219 void TestCompatDecompCompose()
220 {
221 UErrorCode status = U_ZERO_ERROR;
222 int32_t x, neededLen, resLen;
223 UChar *source=NULL, *result=NULL;
224 status = U_ZERO_ERROR;
225 resLen=0;
226 log_verbose("Testing unorm_normalize with compat decomp compose can\n");
227 for(x=0; x < ARRAY_LENGTH(compatTests); x++)
228 {
229 source=CharsToUChars(compatTests[x][0]);
230 neededLen= unorm_normalize(source, u_strlen(source), UNORM_NFKC, 0, NULL, 0, &status);
231 if(status==U_BUFFER_OVERFLOW_ERROR)
232 {
233 status=U_ZERO_ERROR;
234 resLen=neededLen+1;
235 result=(UChar*)malloc(sizeof(UChar*) * resLen);
236 unorm_normalize(source, u_strlen(source), UNORM_NFKC, 0, result, resLen, &status);
237 }
238 if(U_FAILURE(status)){
239 log_err("ERROR in unorm_normalize at %s: %s\n", austrdup(source), myErrorName(status) );
240 } else {
241 assertEqual(result, compatTests[x][2], x);
242 }
243 free(result);
244 free(source);
245 }
246 }
247
248
249 /*
250 static void assertEqual(const UChar* result, const UChar* expected, int32_t index)
251 {
252 if(u_strcmp(result, expected)!=0){
253 log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index, austrdup(expected),
254 austrdup(result) );
255 }
256 }
257 */
258
259 static void assertEqual(const UChar* result, const char* expected, int32_t index)
260 {
261 UChar *expectedUni = CharsToUChars(expected);
262 if(u_strcmp(result, expectedUni)!=0){
263 log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index, expected,
264 austrdup(result) );
265 }
266 free(expectedUni);
267 }
268
269 static void TestNull_check(UChar *src, int32_t srcLen,
270 UChar *exp, int32_t expLen,
271 UNormalizationMode mode,
272 const char *name)
273 {
274 UErrorCode status = U_ZERO_ERROR;
275 int32_t len, i;
276
277 UChar result[50];
278
279
280 status = U_ZERO_ERROR;
281
282 for(i=0;i<50;i++)
283 {
284 result[i] = 0xFFFD;
285 }
286
287 len = unorm_normalize(src, srcLen, mode, 0, result, 50, &status);
288
289 if(U_FAILURE(status)) {
290 log_err("unorm_normalize(%s) with 0x0000 failed: %s\n", name, u_errorName(status));
291 } else if (len != expLen) {
292 log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name, expLen, len);
293 }
294
295 {
296 for(i=0;i<len;i++){
297 if(exp[i] != result[i]) {
298 log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n",
299 name,
300 i,
301 exp[i],
302 result[i]);
303 return;
304 }
305 log_verbose(" %d: \\u%04X\n", i, result[i]);
306 }
307 }
308
309 log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name);
310 }
311
312 void TestNull()
313 {
314
315 UChar source_comp[] = { 0x0061, 0x0000, 0x0044, 0x0307 };
316 int32_t source_comp_len = 4;
317 UChar expect_comp[] = { 0x0061, 0x0000, 0x1e0a };
318 int32_t expect_comp_len = 3;
319
320 UChar source_dcmp[] = { 0x1e0A, 0x0000, 0x0929 };
321 int32_t source_dcmp_len = 3;
322 UChar expect_dcmp[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C };
323 int32_t expect_dcmp_len = 5;
324
325 TestNull_check(source_comp,
326 source_comp_len,
327 expect_comp,
328 expect_comp_len,
329 UNORM_NFC,
330 "UNORM_NFC");
331
332 TestNull_check(source_dcmp,
333 source_dcmp_len,
334 expect_dcmp,
335 expect_dcmp_len,
336 UNORM_NFD,
337 "UNORM_NFD");
338
339 TestNull_check(source_comp,
340 source_comp_len,
341 expect_comp,
342 expect_comp_len,
343 UNORM_NFKC,
344 "UNORM_NFKC");
345
346
347 }
348
349 static void TestQuickCheckResultNO()
350 {
351 const UChar CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
352 0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
353 const UChar CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
354 0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
355 const UChar CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
356 0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
357 const UChar CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
358 0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
359
360
361 const int SIZE = 10;
362
363 int count = 0;
364 UErrorCode error = U_ZERO_ERROR;
365
366 for (; count < SIZE; count ++)
367 {
368 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
369 UNORM_NO)
370 {
371 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
372 return;
373 }
374 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
375 UNORM_NO)
376 {
377 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
378 return;
379 }
380 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
381 UNORM_NO)
382 {
383 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
384 return;
385 }
386 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
387 UNORM_NO)
388 {
389 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
390 return;
391 }
392 }
393 }
394
395
396 static void TestQuickCheckResultYES()
397 {
398 const UChar CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
399 0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
400 const UChar CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
401 0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
402 const UChar CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
403 0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
404 const UChar CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
405 0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
406
407 const int SIZE = 10;
408 int count = 0;
409 UErrorCode error = U_ZERO_ERROR;
410
411 UChar cp = 0;
412 while (cp < 0xA0)
413 {
414 if (unorm_quickCheck(&cp, 1, UNORM_NFD, &error) != UNORM_YES)
415 {
416 log_err("ERROR in NFD quick check at U+%04x\n", cp);
417 return;
418 }
419 if (unorm_quickCheck(&cp, 1, UNORM_NFC, &error) !=
420 UNORM_YES)
421 {
422 log_err("ERROR in NFC quick check at U+%04x\n", cp);
423 return;
424 }
425 if (unorm_quickCheck(&cp, 1, UNORM_NFKD, &error) != UNORM_YES)
426 {
427 log_err("ERROR in NFKD quick check at U+%04x\n", cp);
428 return;
429 }
430 if (unorm_quickCheck(&cp, 1, UNORM_NFKC, &error) !=
431 UNORM_YES)
432 {
433 log_err("ERROR in NFKC quick check at U+%04x\n", cp);
434 return;
435 }
436 cp ++;
437 }
438
439 for (; count < SIZE; count ++)
440 {
441 if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
442 UNORM_YES)
443 {
444 log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
445 return;
446 }
447 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error)
448 != UNORM_YES)
449 {
450 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
451 return;
452 }
453 if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
454 UNORM_YES)
455 {
456 log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
457 return;
458 }
459 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
460 UNORM_YES)
461 {
462 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
463 return;
464 }
465 }
466 }
467
468 static void TestQuickCheckResultMAYBE()
469 {
470 const UChar CPNFC[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
471 0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
472 const UChar CPNFKC[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
473 0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
474
475
476 const int SIZE = 10;
477
478 int count = 0;
479 UErrorCode error = U_ZERO_ERROR;
480
481 /* NFD and NFKD does not have any MAYBE codepoints */
482 for (; count < SIZE; count ++)
483 {
484 if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
485 UNORM_MAYBE)
486 {
487 log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
488 return;
489 }
490 if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
491 UNORM_MAYBE)
492 {
493 log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
494 return;
495 }
496 }
497 }
498
499 static void TestQuickCheckStringResult()
500 {
501 int count;
502 UChar *d = NULL;
503 UChar *c = NULL;
504 UErrorCode error = U_ZERO_ERROR;
505
506 for (count = 0; count < ARRAY_LENGTH(canonTests); count ++)
507 {
508 d = CharsToUChars(canonTests[count][1]);
509 c = CharsToUChars(canonTests[count][2]);
510 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFD, &error) !=
511 UNORM_YES)
512 {
513 log_err("ERROR in NFD quick check for string at count %d\n", count);
514 return;
515 }
516
517 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFC, &error) ==
518 UNORM_NO)
519 {
520 log_err("ERROR in NFC quick check for string at count %d\n", count);
521 return;
522 }
523
524 free(d);
525 free(c);
526 }
527
528 for (count = 0; count < ARRAY_LENGTH(compatTests); count ++)
529 {
530 d = CharsToUChars(compatTests[count][1]);
531 c = CharsToUChars(compatTests[count][2]);
532 if (unorm_quickCheck(d, u_strlen(d), UNORM_NFKD, &error) !=
533 UNORM_YES)
534 {
535 log_err("ERROR in NFKD quick check for string at count %d\n", count);
536 return;
537 }
538
539 if (unorm_quickCheck(c, u_strlen(c), UNORM_NFKC, &error) !=
540 UNORM_YES)
541 {
542 log_err("ERROR in NFKC quick check for string at count %d\n", count);
543 return;
544 }
545
546 free(d);
547 free(c);
548 }
549 }
550
551 void TestQuickCheck()
552 {
553 TestQuickCheckResultNO();
554 TestQuickCheckResultYES();
555 TestQuickCheckResultMAYBE();
556 TestQuickCheckStringResult();
557 }
558
559 /*
560 * The intltest/NormalizerConformanceTest tests a lot of strings that _are_
561 * normalized, and some that are not.
562 * Here we pick some specific cases and test the C API.
563 */
564 static void TestIsNormalized(void) {
565 static const UChar notNFC[][8]={ /* strings that are not in NFC */
566 { 0x62, 0x61, 0x300, 0x63, 0 }, /* 0061 0300 compose */
567 { 0xfb1d, 0 }, /* excluded from composition */
568 { 0x0627, 0x0653, 0 }, /* 0627 0653 compose */
569 { 0x3071, 0x306f, 0x309a, 0x3073, 0 } /* 306F 309A compose */
570 };
571 static const UChar notNFKC[][8]={ /* strings that are not in NFKC */
572 { 0x1100, 0x1161, 0 }, /* Jamo compose */
573 { 0x1100, 0x314f, 0 }, /* compatibility Jamo compose */
574 { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 } /* 1F00 0345 compose */
575 };
576
577 int32_t i;
578 UErrorCode errorCode;
579
580 /* API test */
581
582 /* normal case with length>=0 (length -1 used for special cases below) */
583 errorCode=U_ZERO_ERROR;
584 if(!unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
585 log_err("error: !isNormalized(<U+0300>, NFC) (%s)\n", u_errorName(errorCode));
586 }
587
588 /* incoming U_FAILURE */
589 errorCode=U_TRUNCATED_CHAR_FOUND;
590 (void)unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode);
591 if(errorCode!=U_TRUNCATED_CHAR_FOUND) {
592 log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode));
593 }
594
595 /* NULL source */
596 errorCode=U_ZERO_ERROR;
597 (void)unorm_isNormalized(NULL, 1, UNORM_NFC, &errorCode);
598 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
599 log_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s\n", u_errorName(errorCode));
600 }
601
602 /* bad length */
603 errorCode=U_ZERO_ERROR;
604 (void)unorm_isNormalized(notNFC[0]+2, -2, UNORM_NFC, &errorCode);
605 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
606 log_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s\n", u_errorName(errorCode));
607 }
608
609 /* specific cases */
610 for(i=0; i<ARRAY_LENGTH(notNFC); ++i) {
611 errorCode=U_ZERO_ERROR;
612 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
613 log_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s)\n", i, u_errorName(errorCode));
614 }
615 errorCode=U_ZERO_ERROR;
616 if(unorm_isNormalized(notNFC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
617 log_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s)\n", i, u_errorName(errorCode));
618 }
619 }
620 for(i=0; i<ARRAY_LENGTH(notNFKC); ++i) {
621 errorCode=U_ZERO_ERROR;
622 if(unorm_isNormalized(notNFKC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
623 log_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s)\n", i, u_errorName(errorCode));
624 }
625 }
626 }
627
628 void TestCheckFCD()
629 {
630 UErrorCode status = U_ZERO_ERROR;
631 static const UChar FAST_[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
632 0x0A};
633 static const UChar FALSE_[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
634 0x02B9, 0x0314, 0x0315, 0x0316};
635 static const UChar TRUE_[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
636 0x0050, 0x0730, 0x09EE, 0x1E10};
637
638 static const UChar datastr[][5] =
639 { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
640 {0x0061, 0x030A, 0x00E2, 0x0323, 0},
641 {0x0061, 0x0323, 0x00E2, 0x0323, 0},
642 {0x0061, 0x0323, 0x1E05, 0x0302, 0} };
643 static const UBool result[] = {UNORM_YES, UNORM_NO, UNORM_NO, UNORM_YES};
644
645 static const UChar datachar[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
646 0x6a,
647 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
648 0xea,
649 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
650 0x0307, 0x0308, 0x0309, 0x030a,
651 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
652 0x0327, 0x0328, 0x0329, 0x032a,
653 0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
654 0x1e07, 0x1e08, 0x1e09, 0x1e0a};
655
656 int count = 0;
657
658 if (unorm_quickCheck(FAST_, 10, UNORM_FCD, &status) != UNORM_YES)
659 log_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES\n");
660 if (unorm_quickCheck(FALSE_, 10, UNORM_FCD, &status) != UNORM_NO)
661 log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n");
662 if (unorm_quickCheck(TRUE_, 10, UNORM_FCD, &status) != UNORM_YES)
663 log_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES\n");
664
665 if (U_FAILURE(status))
666 log_err("unorm_quickCheck(FCD) failed: %s\n", u_errorName(status));
667
668 while (count < 4)
669 {
670 UBool fcdresult = unorm_quickCheck(datastr[count], 4, UNORM_FCD, &status);
671 if (U_FAILURE(status)) {
672 log_err("unorm_quickCheck(FCD) failed: exception occured at data set %d\n", count);
673 break;
674 }
675 else {
676 if (result[count] != fcdresult) {
677 log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count,
678 result[count]);
679 }
680 }
681 count ++;
682 }
683
684 /* random checks of long strings */
685 status = U_ZERO_ERROR;
686 srand((unsigned)time( NULL ));
687
688 for (count = 0; count < 50; count ++)
689 {
690 int size = 0;
691 UBool testresult = UNORM_YES;
692 UChar data[20];
693 UChar norm[100];
694 UChar nfd[100];
695 int normsize = 0;
696 int nfdsize = 0;
697
698 while (size != 19) {
699 data[size] = datachar[(rand() * 50) / RAND_MAX];
700 log_verbose("0x%x", data[size]);
701 normsize += unorm_normalize(data + size, 1, UNORM_NFD, 0,
702 norm + normsize, 100 - normsize, &status);
703 if (U_FAILURE(status)) {
704 log_err("unorm_quickCheck(FCD) failed: exception occured at data generation\n");
705 break;
706 }
707 size ++;
708 }
709 log_verbose("\n");
710
711 nfdsize = unorm_normalize(data, size, UNORM_NFD, 0,
712 nfd, 100, &status);
713 if (U_FAILURE(status)) {
714 log_err("unorm_quickCheck(FCD) failed: exception occured at normalized data generation\n");
715 }
716
717 if (nfdsize != normsize || u_memcmp(nfd, norm, nfdsize) != 0) {
718 testresult = UNORM_NO;
719 }
720 if (testresult == UNORM_YES) {
721 log_verbose("result UNORM_YES\n");
722 }
723 else {
724 log_verbose("result UNORM_NO\n");
725 }
726
727 if (unorm_quickCheck(data, size, UNORM_FCD, &status) != testresult || U_FAILURE(status)) {
728 log_err("unorm_quickCheck(FCD) failed: expected %d for random data\n", testresult);
729 }
730 }
731 }
732
733 static void
734 TestAPI() {
735 static const UChar in[]={ 0x68, 0xe4 };
736 UChar out[20]={ 0xffff, 0xffff, 0xffff, 0xffff };
737 UErrorCode errorCode;
738 int32_t length;
739
740 /* try preflighting */
741 errorCode=U_ZERO_ERROR;
742 length=unorm_normalize(in, 2, UNORM_NFD, 0, NULL, 0, &errorCode);
743 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
744 log_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s\n", length, u_errorName(errorCode));
745 return;
746 }
747
748 errorCode=U_ZERO_ERROR;
749 length=unorm_normalize(in, 2, UNORM_NFD, 0, out, 3, &errorCode);
750 if(U_FAILURE(errorCode)) {
751 log_err("unorm_normalize(NFD)=%ld failed with %s\n", length, u_errorName(errorCode));
752 return;
753 }
754 if(length!=3 || out[2]!=0x308 || out[3]!=0xffff) {
755 log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length, out[0], out[1], out[2], out[3]);
756 return;
757 }
758 }
759
760 /* test cases to improve test code coverage */
761 enum {
762 HANGUL_K_KIYEOK=0x3131, /* NFKD->Jamo L U+1100 */
763 HANGUL_K_WEO=0x315d, /* NFKD->Jamo V U+116f */
764 HANGUL_K_KIYEOK_SIOS=0x3133, /* NFKD->Jamo T U+11aa */
765
766 HANGUL_KIYEOK=0x1100, /* Jamo L U+1100 */
767 HANGUL_WEO=0x116f, /* Jamo V U+116f */
768 HANGUL_KIYEOK_SIOS=0x11aa, /* Jamo T U+11aa */
769
770 HANGUL_AC00=0xac00, /* Hangul syllable = Jamo LV U+ac00 */
771 HANGUL_SYLLABLE=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */
772
773 MUSICAL_VOID_NOTEHEAD=0x1d157,
774 MUSICAL_HALF_NOTE=0x1d15e, /* NFC/NFD->Notehead+Stem */
775 MUSICAL_STEM=0x1d165, /* cc=216 */
776 MUSICAL_STACCATO=0x1d17c /* cc=220 */
777 };
778
779 static void
780 TestNormCoverage() {
781 static UChar input[2000], expect[3000], output[3000];
782 UErrorCode errorCode;
783 int32_t i, length, inLength, expectLength, hangulPrefixLength, preflightLength;
784
785 /* create a long and nasty string with NFKC-unsafe characters */
786 inLength=0;
787
788 /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */
789 input[inLength++]=HANGUL_KIYEOK;
790 input[inLength++]=HANGUL_WEO;
791 input[inLength++]=HANGUL_KIYEOK_SIOS;
792
793 input[inLength++]=HANGUL_KIYEOK;
794 input[inLength++]=HANGUL_WEO;
795 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
796
797 input[inLength++]=HANGUL_KIYEOK;
798 input[inLength++]=HANGUL_K_WEO;
799 input[inLength++]=HANGUL_KIYEOK_SIOS;
800
801 input[inLength++]=HANGUL_KIYEOK;
802 input[inLength++]=HANGUL_K_WEO;
803 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
804
805 input[inLength++]=HANGUL_K_KIYEOK;
806 input[inLength++]=HANGUL_WEO;
807 input[inLength++]=HANGUL_KIYEOK_SIOS;
808
809 input[inLength++]=HANGUL_K_KIYEOK;
810 input[inLength++]=HANGUL_WEO;
811 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
812
813 input[inLength++]=HANGUL_K_KIYEOK;
814 input[inLength++]=HANGUL_K_WEO;
815 input[inLength++]=HANGUL_KIYEOK_SIOS;
816
817 input[inLength++]=HANGUL_K_KIYEOK;
818 input[inLength++]=HANGUL_K_WEO;
819 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
820
821 /* Hangul LV with normal/compatibility Jamo T */
822 input[inLength++]=HANGUL_AC00;
823 input[inLength++]=HANGUL_KIYEOK_SIOS;
824
825 input[inLength++]=HANGUL_AC00;
826 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
827
828 /* compatibility Jamo L, V */
829 input[inLength++]=HANGUL_K_KIYEOK;
830 input[inLength++]=HANGUL_K_WEO;
831
832 hangulPrefixLength=inLength;
833
834 input[inLength++]=UTF16_LEAD(MUSICAL_HALF_NOTE);
835 input[inLength++]=UTF16_TRAIL(MUSICAL_HALF_NOTE);
836 for(i=0; i<200; ++i) {
837 input[inLength++]=UTF16_LEAD(MUSICAL_STACCATO);
838 input[inLength++]=UTF16_TRAIL(MUSICAL_STACCATO);
839 input[inLength++]=UTF16_LEAD(MUSICAL_STEM);
840 input[inLength++]=UTF16_TRAIL(MUSICAL_STEM);
841 }
842
843 /* (compatibility) Jamo L, T do not compose */
844 input[inLength++]=HANGUL_K_KIYEOK;
845 input[inLength++]=HANGUL_K_KIYEOK_SIOS;
846
847 /* quick checks */
848 errorCode=U_ZERO_ERROR;
849 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFD, &errorCode) || U_FAILURE(errorCode)) {
850 log_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s)\n", u_errorName(errorCode));
851 }
852 errorCode=U_ZERO_ERROR;
853 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKD, &errorCode) || U_FAILURE(errorCode)) {
854 log_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s)\n", u_errorName(errorCode));
855 }
856 errorCode=U_ZERO_ERROR;
857 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
858 log_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s)\n", u_errorName(errorCode));
859 }
860 errorCode=U_ZERO_ERROR;
861 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
862 log_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s)\n", u_errorName(errorCode));
863 }
864 errorCode=U_ZERO_ERROR;
865 if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_FCD, &errorCode) || U_FAILURE(errorCode)) {
866 log_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s)\n", u_errorName(errorCode));
867 }
868
869 /* NFKC */
870 expectLength=0;
871 expect[expectLength++]=HANGUL_SYLLABLE;
872
873 expect[expectLength++]=HANGUL_SYLLABLE;
874
875 expect[expectLength++]=HANGUL_SYLLABLE;
876
877 expect[expectLength++]=HANGUL_SYLLABLE;
878
879 expect[expectLength++]=HANGUL_SYLLABLE;
880
881 expect[expectLength++]=HANGUL_SYLLABLE;
882
883 expect[expectLength++]=HANGUL_SYLLABLE;
884
885 expect[expectLength++]=HANGUL_SYLLABLE;
886
887 expect[expectLength++]=HANGUL_AC00+3;
888
889 expect[expectLength++]=HANGUL_AC00+3;
890
891 expect[expectLength++]=HANGUL_AC00+14*28;
892
893 expect[expectLength++]=UTF16_LEAD(MUSICAL_VOID_NOTEHEAD);
894 expect[expectLength++]=UTF16_TRAIL(MUSICAL_VOID_NOTEHEAD);
895 expect[expectLength++]=UTF16_LEAD(MUSICAL_STEM);
896 expect[expectLength++]=UTF16_TRAIL(MUSICAL_STEM);
897 for(i=0; i<200; ++i) {
898 expect[expectLength++]=UTF16_LEAD(MUSICAL_STEM);
899 expect[expectLength++]=UTF16_TRAIL(MUSICAL_STEM);
900 }
901 for(i=0; i<200; ++i) {
902 expect[expectLength++]=UTF16_LEAD(MUSICAL_STACCATO);
903 expect[expectLength++]=UTF16_TRAIL(MUSICAL_STACCATO);
904 }
905
906 expect[expectLength++]=HANGUL_KIYEOK;
907 expect[expectLength++]=HANGUL_KIYEOK_SIOS;
908
909 /* try destination overflow first */
910 errorCode=U_ZERO_ERROR;
911 preflightLength=unorm_normalize(input, inLength,
912 UNORM_NFKC, 0,
913 output, 100, /* too short */
914 &errorCode);
915 if(errorCode!=U_BUFFER_OVERFLOW_ERROR) {
916 log_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s\n", u_errorName(errorCode));
917 }
918
919 /* real NFKC */
920 errorCode=U_ZERO_ERROR;
921 length=unorm_normalize(input, inLength,
922 UNORM_NFKC, 0,
923 output, sizeof(output)/U_SIZEOF_UCHAR,
924 &errorCode);
925 if(U_FAILURE(errorCode)) {
926 log_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s\n", u_errorName(errorCode));
927 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
928 log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n");
929 for(i=0; i<length; ++i) {
930 if(output[i]!=expect[i]) {
931 log_err(" NFKC[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
932 break;
933 }
934 }
935 }
936 if(length!=preflightLength) {
937 log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length, preflightLength);
938 }
939
940 /* FCD */
941 u_memcpy(expect, input, hangulPrefixLength);
942 expectLength=hangulPrefixLength;
943
944 expect[expectLength++]=UTF16_LEAD(MUSICAL_VOID_NOTEHEAD);
945 expect[expectLength++]=UTF16_TRAIL(MUSICAL_VOID_NOTEHEAD);
946 expect[expectLength++]=UTF16_LEAD(MUSICAL_STEM);
947 expect[expectLength++]=UTF16_TRAIL(MUSICAL_STEM);
948 for(i=0; i<200; ++i) {
949 expect[expectLength++]=UTF16_LEAD(MUSICAL_STEM);
950 expect[expectLength++]=UTF16_TRAIL(MUSICAL_STEM);
951 }
952 for(i=0; i<200; ++i) {
953 expect[expectLength++]=UTF16_LEAD(MUSICAL_STACCATO);
954 expect[expectLength++]=UTF16_TRAIL(MUSICAL_STACCATO);
955 }
956
957 expect[expectLength++]=HANGUL_K_KIYEOK;
958 expect[expectLength++]=HANGUL_K_KIYEOK_SIOS;
959
960 errorCode=U_ZERO_ERROR;
961 length=unorm_normalize(input, inLength,
962 UNORM_FCD, 0,
963 output, sizeof(output)/U_SIZEOF_UCHAR,
964 &errorCode);
965 if(U_FAILURE(errorCode)) {
966 log_err("error unorm_normalize(long input, UNORM_FCD) failed with %s\n", u_errorName(errorCode));
967 } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
968 log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n");
969 for(i=0; i<length; ++i) {
970 if(output[i]!=expect[i]) {
971 log_err(" FCD[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
972 break;
973 }
974 }
975 }
976 }
977
978 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */
979 static void
980 TestConcatenate(void) {
981 /* "re + 'sume'" */
982 static const UChar
983 left[]={
984 0x72, 0x65, 0
985 },
986 right[]={
987 0x301, 0x73, 0x75, 0x6d, 0xe9, 0
988 },
989 expect[]={
990 0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0
991 };
992
993 UChar buffer[100];
994 UErrorCode errorCode;
995 int32_t length;
996
997 /* left with length, right NUL-terminated */
998 errorCode=U_ZERO_ERROR;
999 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1000 if(U_FAILURE(errorCode) || length!=6 || 0!=u_memcmp(buffer, expect, length)) {
1001 log_err("error: unorm_concatenate()=%ld (expect 6) failed with %s\n", length, u_errorName(errorCode));
1002 }
1003
1004 /* preflighting */
1005 errorCode=U_ZERO_ERROR;
1006 length=unorm_concatenate(left, 2, right, -1, NULL, 0, UNORM_NFC, 0, &errorCode);
1007 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6) {
1008 log_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s\n", length, u_errorName(errorCode));
1009 }
1010
1011 buffer[2]=0x5555;
1012 errorCode=U_ZERO_ERROR;
1013 length=unorm_concatenate(left, 2, right, -1, buffer, 1, UNORM_NFC, 0, &errorCode);
1014 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6 || buffer[2]!=0x5555) {
1015 log_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s\n", length, u_errorName(errorCode));
1016 }
1017
1018 /* enter with U_FAILURE */
1019 buffer[2]=0xaaaa;
1020 errorCode=U_UNEXPECTED_TOKEN;
1021 length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1022 if(errorCode!=U_UNEXPECTED_TOKEN || buffer[2]!=0xaaaa) {
1023 log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length, u_errorName(errorCode));
1024 }
1025
1026 /* illegal arguments */
1027 buffer[2]=0xaaaa;
1028 errorCode=U_ZERO_ERROR;
1029 length=unorm_concatenate(NULL, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1030 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || buffer[2]!=0xaaaa) {
1031 log_err("error: unorm_concatenate(left=NULL)=%ld failed with %s\n", length, u_errorName(errorCode));
1032 }
1033
1034 errorCode=U_ZERO_ERROR;
1035 length=unorm_concatenate(left, 2, right, -1, NULL, 100, UNORM_NFC, 0, &errorCode);
1036 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1037 log_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s\n", length, u_errorName(errorCode));
1038 }
1039 }
1040
1041 enum {
1042 _PLUS=0x2b
1043 };
1044
1045 static const char *const _modeString[UNORM_MODE_COUNT]={
1046 "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD"
1047 };
1048
1049 static void
1050 _testIter(const UChar *src, int32_t srcLength,
1051 UCharIterator *iter, UNormalizationMode mode, UBool forward,
1052 const UChar *out, int32_t outLength,
1053 const int32_t *srcIndexes, int32_t srcIndexesLength) {
1054 UChar buffer[4];
1055 const UChar *expect, *outLimit, *in;
1056 int32_t length, i, expectLength, expectIndex, prevIndex, index, inLength;
1057 UErrorCode errorCode;
1058 UBool neededToNormalize, expectNeeded;
1059
1060 errorCode=U_ZERO_ERROR;
1061 outLimit=out+outLength;
1062 if(forward) {
1063 expect=out;
1064 i=index=0;
1065 } else {
1066 expect=outLimit;
1067 i=srcIndexesLength-2;
1068 index=srcLength;
1069 }
1070
1071 for(;;) {
1072 prevIndex=index;
1073 if(forward) {
1074 if(!iter->hasNext(iter)) {
1075 return;
1076 }
1077 length=unorm_next(iter,
1078 buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1079 mode, 0,
1080 (UBool)(out!=NULL), &neededToNormalize,
1081 &errorCode);
1082 expectIndex=srcIndexes[i+1];
1083 in=src+prevIndex;
1084 inLength=expectIndex-prevIndex;
1085
1086 if(out!=NULL) {
1087 /* get output piece from between plus signs */
1088 expectLength=0;
1089 while((expect+expectLength)!=outLimit && expect[expectLength]!=_PLUS) {
1090 ++expectLength;
1091 }
1092 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1093 } else {
1094 expect=in;
1095 expectLength=inLength;
1096 expectNeeded=FALSE;
1097 }
1098 } else {
1099 if(!iter->hasPrevious(iter)) {
1100 return;
1101 }
1102 length=unorm_previous(iter,
1103 buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1104 mode, 0,
1105 (UBool)(out!=NULL), &neededToNormalize,
1106 &errorCode);
1107 expectIndex=srcIndexes[i];
1108 in=src+expectIndex;
1109 inLength=prevIndex-expectIndex;
1110
1111 if(out!=NULL) {
1112 /* get output piece from between plus signs */
1113 expectLength=0;
1114 while(expect!=out && expect[-1]!=_PLUS) {
1115 ++expectLength;
1116 --expect;
1117 }
1118 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1119 } else {
1120 expect=in;
1121 expectLength=inLength;
1122 expectNeeded=FALSE;
1123 }
1124 }
1125 index=iter->getIndex(iter, UITER_CURRENT);
1126
1127 if(U_FAILURE(errorCode)) {
1128 log_err("error unorm iteration (next/previous %d %s)[%d]: %s\n",
1129 forward, _modeString[mode], i, u_errorName(errorCode));
1130 return;
1131 }
1132 if(expectIndex!=index) {
1133 log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n",
1134 forward, _modeString[mode], i, index, expectIndex);
1135 return;
1136 }
1137 if(expectLength!=length) {
1138 log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n",
1139 forward, _modeString[mode], i, length, expectLength);
1140 return;
1141 }
1142 if(0!=u_memcmp(expect, buffer, length)) {
1143 log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n",
1144 forward, _modeString[mode], i);
1145 return;
1146 }
1147 if(neededToNormalize!=expectNeeded) {
1148 }
1149
1150 if(forward) {
1151 expect+=expectLength+1; /* go after the + */
1152 ++i;
1153 } else {
1154 --expect; /* go before the + */
1155 --i;
1156 }
1157 }
1158 }
1159
1160 static void
1161 TestNextPrevious() {
1162 static const UChar
1163 src[]={ /* input string */
1164 0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133
1165 },
1166 nfd[]={ /* + separates expected output pieces */
1167 0xa0, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x3133
1168 },
1169 nfkd[]={
1170 0x20, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x11aa
1171 },
1172 nfc[]={
1173 0xa0, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1174 },
1175 nfkc[]={
1176 0x20, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac03
1177 },
1178 fcd[]={
1179 0xa0, _PLUS, 0xe4, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1180 };
1181
1182 /* expected iterator indexes in the source string for each iteration piece */
1183 static const int32_t
1184 nfdIndexes[]={
1185 0, 1, 2, 5, 6, 7
1186 },
1187 nfkdIndexes[]={
1188 0, 1, 2, 5, 6, 7
1189 },
1190 nfcIndexes[]={
1191 0, 1, 2, 5, 6, 7
1192 },
1193 nfkcIndexes[]={
1194 0, 1, 2, 5, 7
1195 },
1196 fcdIndexes[]={
1197 0, 1, 2, 5, 6, 7
1198 };
1199
1200 UCharIterator iter;
1201
1202 UChar buffer[4];
1203 int32_t length;
1204
1205 UBool neededToNormalize;
1206 UErrorCode errorCode;
1207
1208 uiter_setString(&iter, src, sizeof(src)/U_SIZEOF_UCHAR);
1209
1210 /* test iteration with doNormalize */
1211 iter.index=0;
1212 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, TRUE, nfd, sizeof(nfd)/U_SIZEOF_UCHAR, nfdIndexes, sizeof(nfdIndexes)/4);
1213 iter.index=0;
1214 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, TRUE, nfkd, sizeof(nfkd)/U_SIZEOF_UCHAR, nfkdIndexes, sizeof(nfkdIndexes)/4);
1215 iter.index=0;
1216 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, TRUE, nfc, sizeof(nfc)/U_SIZEOF_UCHAR, nfcIndexes, sizeof(nfcIndexes)/4);
1217 iter.index=0;
1218 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, TRUE, nfkc, sizeof(nfkc)/U_SIZEOF_UCHAR, nfkcIndexes, sizeof(nfkcIndexes)/4);
1219 iter.index=0;
1220 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, TRUE, fcd, sizeof(fcd)/U_SIZEOF_UCHAR, fcdIndexes, sizeof(fcdIndexes)/4);
1221
1222 iter.index=iter.length;
1223 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, FALSE, nfd, sizeof(nfd)/U_SIZEOF_UCHAR, nfdIndexes, sizeof(nfdIndexes)/4);
1224 iter.index=iter.length;
1225 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, FALSE, nfkd, sizeof(nfkd)/U_SIZEOF_UCHAR, nfkdIndexes, sizeof(nfkdIndexes)/4);
1226 iter.index=iter.length;
1227 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, FALSE, nfc, sizeof(nfc)/U_SIZEOF_UCHAR, nfcIndexes, sizeof(nfcIndexes)/4);
1228 iter.index=iter.length;
1229 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, FALSE, nfkc, sizeof(nfkc)/U_SIZEOF_UCHAR, nfkcIndexes, sizeof(nfkcIndexes)/4);
1230 iter.index=iter.length;
1231 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, FALSE, fcd, sizeof(fcd)/U_SIZEOF_UCHAR, fcdIndexes, sizeof(fcdIndexes)/4);
1232
1233 /* test iteration without doNormalize */
1234 iter.index=0;
1235 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, TRUE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1236 iter.index=0;
1237 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, TRUE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1238 iter.index=0;
1239 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, TRUE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1240 iter.index=0;
1241 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, TRUE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1242 iter.index=0;
1243 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, TRUE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1244
1245 iter.index=iter.length;
1246 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, FALSE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1247 iter.index=iter.length;
1248 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, FALSE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1249 iter.index=iter.length;
1250 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, FALSE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1251 iter.index=iter.length;
1252 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, FALSE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1253 iter.index=iter.length;
1254 _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, FALSE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1255
1256 /* try without neededToNormalize */
1257 errorCode=U_ZERO_ERROR;
1258 buffer[0]=5;
1259 iter.index=1;
1260 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1261 UNORM_NFD, 0, TRUE, NULL,
1262 &errorCode);
1263 if(U_FAILURE(errorCode) || length!=2 || buffer[0]!=nfd[2] || buffer[1]!=nfd[3]) {
1264 log_err("error unorm_next(without needed) %s\n", u_errorName(errorCode));
1265 return;
1266 }
1267
1268 /* preflight */
1269 neededToNormalize=9;
1270 iter.index=1;
1271 length=unorm_next(&iter, NULL, 0,
1272 UNORM_NFD, 0, TRUE, &neededToNormalize,
1273 &errorCode);
1274 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2) {
1275 log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode));
1276 return;
1277 }
1278
1279 errorCode=U_ZERO_ERROR;
1280 buffer[0]=buffer[1]=5;
1281 neededToNormalize=9;
1282 iter.index=1;
1283 length=unorm_next(&iter, buffer, 1,
1284 UNORM_NFD, 0, TRUE, &neededToNormalize,
1285 &errorCode);
1286 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2 || buffer[1]!=5) {
1287 log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode));
1288 return;
1289 }
1290
1291 /* no iterator */
1292 errorCode=U_ZERO_ERROR;
1293 buffer[0]=buffer[1]=5;
1294 neededToNormalize=9;
1295 iter.index=1;
1296 length=unorm_next(NULL, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1297 UNORM_NFD, 0, TRUE, &neededToNormalize,
1298 &errorCode);
1299 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1300 log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode));
1301 return;
1302 }
1303
1304 /* illegal mode */
1305 buffer[0]=buffer[1]=5;
1306 neededToNormalize=9;
1307 iter.index=1;
1308 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1309 (UNormalizationMode)0, 0, TRUE, &neededToNormalize,
1310 &errorCode);
1311 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1312 log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode));
1313 return;
1314 }
1315
1316 /* error coming in */
1317 errorCode=U_MISPLACED_QUANTIFIER;
1318 buffer[0]=5;
1319 iter.index=1;
1320 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1321 UNORM_NFD, 0, TRUE, NULL,
1322 &errorCode);
1323 if(errorCode!=U_MISPLACED_QUANTIFIER) {
1324 log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode));
1325 return;
1326 }
1327
1328 /* missing pErrorCode */
1329 buffer[0]=5;
1330 iter.index=1;
1331 length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1332 UNORM_NFD, 0, TRUE, NULL,
1333 NULL);
1334 if(iter.index!=1 || buffer[0]!=5) {
1335 log_err("error unorm_next(pErrorCode==NULL) %s\n", u_errorName(errorCode));
1336 return;
1337 }
1338 }
1339
1340 static void
1341 TestFCNFKCClosure(void) {
1342 static const struct {
1343 UChar32 c;
1344 const UChar s[6];
1345 } tests[]={
1346 { 0x037A, { 0x0020, 0x03B9, 0 } },
1347 { 0x03D2, { 0x03C5, 0 } },
1348 { 0x20A8, { 0x0072, 0x0073, 0 } },
1349 { 0x210B, { 0x0068, 0 } },
1350 { 0x210C, { 0x0068, 0 } },
1351 { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } },
1352 { 0x2122, { 0x0074, 0x006D, 0 } },
1353 { 0x2128, { 0x007A, 0 } },
1354 { 0x1D5DB, { 0x0068, 0 } },
1355 { 0x1D5ED, { 0x007A, 0 } },
1356 { 0x0061, { 0 } }
1357 };
1358
1359 UChar buffer[8];
1360 UErrorCode errorCode;
1361 int32_t i, length;
1362
1363 for(i=0; i<ARRAY_LENGTH(tests); ++i) {
1364 errorCode=U_ZERO_ERROR;
1365 length=u_getFC_NFKC_Closure(tests[i].c, buffer, ARRAY_LENGTH(buffer), &errorCode);
1366 if(U_FAILURE(errorCode) || length!=u_strlen(buffer) || 0!=u_strcmp(tests[i].s, buffer)) {
1367 log_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s)\n", tests[i].c, u_errorName(errorCode));
1368 }
1369 }
1370
1371 /* error handling */
1372 errorCode=U_ZERO_ERROR;
1373 length=u_getFC_NFKC_Closure(0x5c, NULL, ARRAY_LENGTH(buffer), &errorCode);
1374 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1375 log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode));
1376 }
1377
1378 length=u_getFC_NFKC_Closure(0x5c, buffer, ARRAY_LENGTH(buffer), &errorCode);
1379 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1380 log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode));
1381 }
1382 }
1383
1384 #endif /* #if !UCONFIG_NO_NORMALIZATION */