]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/nucnvtst.c
baecb4586d9e3894d19722859c5821aabf4acac2
[apple/icu.git] / icuSources / test / cintltst / nucnvtst.c
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2006, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /*******************************************************************************
7 *
8 * File CCONVTST.C
9 *
10 * Modification History:
11 * Name Description
12 * Steven R. Loomis 7/8/1999 Adding input buffer test
13 ********************************************************************************
14 */
15 #include <stdio.h>
16 #include "cstring.h"
17 #include "unicode/uloc.h"
18 #include "unicode/ucnv.h"
19 #include "unicode/ucnv_err.h"
20 #include "cintltst.h"
21 #include "unicode/utypes.h"
22 #include "unicode/ustring.h"
23 #include "unicode/ucol.h"
24 #include "cmemory.h"
25
26 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
27 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
28 #if !UCONFIG_NO_COLLATION
29 static void TestJitterbug981(void);
30 #endif
31 static void TestJitterbug1293(void);
32 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
33 static void TestConverterTypesAndStarters(void);
34 static void TestAmbiguous(void);
35 static void TestSignatureDetection(void);
36 static void TestUTF7(void);
37 static void TestIMAP(void);
38 static void TestUTF8(void);
39 static void TestCESU8(void);
40 static void TestUTF16(void);
41 static void TestUTF16BE(void);
42 static void TestUTF16LE(void);
43 static void TestUTF32(void);
44 static void TestUTF32BE(void);
45 static void TestUTF32LE(void);
46 static void TestLATIN1(void);
47
48 #if !UCONFIG_NO_LEGACY_CONVERSION
49 static void TestSBCS(void);
50 static void TestDBCS(void);
51 static void TestMBCS(void);
52
53 #ifdef U_ENABLE_GENERIC_ISO_2022
54 static void TestISO_2022(void);
55 #endif
56
57 static void TestISO_2022_JP(void);
58 static void TestISO_2022_JP_1(void);
59 static void TestISO_2022_JP_2(void);
60 static void TestISO_2022_KR(void);
61 static void TestISO_2022_KR_1(void);
62 static void TestISO_2022_CN(void);
63 static void TestISO_2022_CN_EXT(void);
64 static void TestJIS(void);
65 static void TestHZ(void);
66 #endif
67
68 static void TestSCSU(void);
69
70 #if !UCONFIG_NO_LEGACY_CONVERSION
71 static void TestEBCDIC_STATEFUL(void);
72 static void TestGB18030(void);
73 static void TestLMBCS(void);
74 static void TestJitterbug255(void);
75 static void TestEBCDICUS4XML(void);
76 static void TestJitterbug915(void);
77 static void TestISCII(void);
78
79 static void TestCoverageMBCS(void);
80 static void TestJitterbug2346(void);
81 static void TestJitterbug2411(void);
82 #endif
83
84 static void TestRoundTrippingAllUTF(void);
85 static void TestConv(const uint16_t in[],
86 int len,
87 const char* conv,
88 const char* lang,
89 char byteArr[],
90 int byteArrLen);
91 void addTestNewConvert(TestNode** root);
92
93 /* open a converter, using test data if it begins with '@' */
94 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
95
96
97 #define NEW_MAX_BUFFER 999
98
99 static int32_t gInBufferSize = NEW_MAX_BUFFER;
100 static int32_t gOutBufferSize = NEW_MAX_BUFFER;
101 static char gNuConvTestName[1024];
102
103 #define nct_min(x,y) ((x<y) ? x : y)
104
105 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
106 {
107 if(cnv && cnv[0] == '@') {
108 return ucnv_openPackage(loadTestData(err), cnv+1, err);
109 } else {
110 return ucnv_open(cnv, err);
111 }
112 }
113
114 static void printSeq(const unsigned char* a, int len)
115 {
116 int i=0;
117 log_verbose("{");
118 while (i<len)
119 log_verbose("0x%02x ", a[i++]);
120 log_verbose("}\n");
121 }
122
123 static void printUSeq(const UChar* a, int len)
124 {
125 int i=0;
126 log_verbose("{U+");
127 while (i<len) log_verbose("0x%04x ", a[i++]);
128 log_verbose("}\n");
129 }
130
131 static void printSeqErr(const unsigned char* a, int len)
132 {
133 int i=0;
134 fprintf(stderr, "{");
135 while (i<len)
136 fprintf(stderr, "0x%02x ", a[i++]);
137 fprintf(stderr, "}\n");
138 }
139
140 static void printUSeqErr(const UChar* a, int len)
141 {
142 int i=0;
143 fprintf(stderr, "{U+");
144 while (i<len)
145 fprintf(stderr, "0x%04x ", a[i++]);
146 fprintf(stderr,"}\n");
147 }
148
149 static void
150 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
151 {
152 const char* s0;
153 const char* s=(char*)source;
154 const int32_t *r=results;
155 UErrorCode errorCode=U_ZERO_ERROR;
156 UChar32 c;
157
158 while(s<limit) {
159 s0=s;
160 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
161 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
162 break; /* no more significant input */
163 } else if(U_FAILURE(errorCode)) {
164 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
165 break;
166 } else if(
167 /* test the expected number of input bytes only if >=0 */
168 (*r>=0 && (int32_t)(s-s0)!=*r) ||
169 c!=*(r+1)
170 ) {
171 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
172 message, c, (s-s0), *(r+1), *r);
173 break;
174 }
175 r+=2;
176 }
177 }
178
179 static void
180 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
181 {
182 const char* s=(char*)source;
183 UErrorCode errorCode=U_ZERO_ERROR;
184 uint32_t c;
185 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
186 if(errorCode != expected){
187 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
188 }
189 if(c != 0xFFFD && c != 0xffff){
190 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
191 }
192
193 }
194
195 static void TestInBufSizes(void)
196 {
197 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
198 #if 1
199 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
200 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
201 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
202 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
203 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
204 TestNewConvertWithBufferSizes(1,1);
205 TestNewConvertWithBufferSizes(2,3);
206 TestNewConvertWithBufferSizes(3,2);
207 #endif
208 }
209
210 static void TestOutBufSizes(void)
211 {
212 #if 1
213 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
214 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
215 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
216 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
217 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
218 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
219
220 #endif
221 }
222
223
224 void addTestNewConvert(TestNode** root)
225 {
226 addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
227 addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
228 addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
229 addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
230 addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
231 addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
232 addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
233 addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
234
235 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
236 addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
237 addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
238 addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
239 addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
240 addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
241 addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
242 addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
243
244 #if !UCONFIG_NO_LEGACY_CONVERSION
245 addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
246 #endif
247
248 addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
249
250 #if !UCONFIG_NO_LEGACY_CONVERSION
251 addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
252 addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
253 addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
254
255 #ifdef U_ENABLE_GENERIC_ISO_2022
256 addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
257 #endif
258
259 addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
260 addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
261 addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
262 addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
263 addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
264 addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
265 addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
266 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
267 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
268 addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
269 #endif
270
271 addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
272
273 #if !UCONFIG_NO_LEGACY_CONVERSION
274 addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
275 addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
276 addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
277 addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
278 addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
279
280 #if !UCONFIG_NO_COLLATION
281 addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
282 #endif
283
284 addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
285 #endif
286
287
288 #if !UCONFIG_NO_LEGACY_CONVERSION
289 addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
290 #endif
291
292 addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
293
294 #if !UCONFIG_NO_LEGACY_CONVERSION
295 addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
296 addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
297 #endif
298
299 }
300
301
302 /* Note that this test already makes use of statics, so it's not really
303 multithread safe.
304 This convenience function lets us make the error messages actually useful.
305 */
306
307 static void setNuConvTestName(const char *codepage, const char *direction)
308 {
309 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
310 codepage,
311 direction,
312 (int)gInBufferSize,
313 (int)gOutBufferSize);
314 }
315
316 typedef enum
317 {
318 TC_OK = 0, /* test was OK */
319 TC_MISMATCH = 1, /* Match failed - err was printed */
320 TC_FAIL = 2 /* Test failed, don't print an err because it was already printed. */
321 } ETestConvertResult;
322
323 /* Note: This function uses global variables and it will not do offset
324 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
325 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
326 const char *codepage, const int32_t *expectOffsets , UBool useFallback)
327 {
328 UErrorCode status = U_ZERO_ERROR;
329 UConverter *conv = 0;
330 char junkout[NEW_MAX_BUFFER]; /* FIX */
331 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
332 char *p;
333 const UChar *src;
334 char *end;
335 char *targ;
336 int32_t *offs;
337 int i;
338 int32_t realBufferSize;
339 char *realBufferEnd;
340 const UChar *realSourceEnd;
341 const UChar *sourceLimit;
342 UBool checkOffsets = TRUE;
343 UBool doFlush;
344
345 for(i=0;i<NEW_MAX_BUFFER;i++)
346 junkout[i] = (char)0xF0;
347 for(i=0;i<NEW_MAX_BUFFER;i++)
348 junokout[i] = 0xFF;
349
350 setNuConvTestName(codepage, "FROM");
351
352 log_verbose("\n========= %s\n", gNuConvTestName);
353
354 conv = my_ucnv_open(codepage, &status);
355
356 if(U_FAILURE(status))
357 {
358 log_data_err("Couldn't open converter %s\n",codepage);
359 return TC_FAIL;
360 }
361 if(useFallback){
362 ucnv_setFallback(conv,useFallback);
363 }
364
365 log_verbose("Converter opened..\n");
366
367 src = source;
368 targ = junkout;
369 offs = junokout;
370
371 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
372 realBufferEnd = junkout + realBufferSize;
373 realSourceEnd = source + sourceLen;
374
375 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
376 checkOffsets = FALSE;
377
378 do
379 {
380 end = nct_min(targ + gOutBufferSize, realBufferEnd);
381 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
382
383 doFlush = (UBool)(sourceLimit == realSourceEnd);
384
385 if(targ == realBufferEnd) {
386 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
387 return TC_FAIL;
388 }
389 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
390
391
392 status = U_ZERO_ERROR;
393
394 ucnv_fromUnicode (conv,
395 &targ,
396 end,
397 &src,
398 sourceLimit,
399 checkOffsets ? offs : NULL,
400 doFlush, /* flush if we're at the end of the input data */
401 &status);
402 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
403
404 if(U_FAILURE(status)) {
405 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
406 return TC_FAIL;
407 }
408
409 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
410 sourceLen, targ-junkout);
411
412 if(VERBOSITY)
413 {
414 char junk[9999];
415 char offset_str[9999];
416 char *ptr;
417
418 junk[0] = 0;
419 offset_str[0] = 0;
420 for(ptr = junkout;ptr<targ;ptr++) {
421 sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
422 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
423 }
424
425 log_verbose(junk);
426 printSeq((const uint8_t *)expect, expectLen);
427 if ( checkOffsets ) {
428 log_verbose("\nOffsets:");
429 log_verbose(offset_str);
430 }
431 log_verbose("\n");
432 }
433 ucnv_close(conv);
434
435 if(expectLen != targ-junkout) {
436 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
437 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
438 printf("\nGot:");
439 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
440 printf("\nExpected:");
441 printSeqErr((const unsigned char*)expect, expectLen);
442 return TC_MISMATCH;
443 }
444
445 if (checkOffsets && (expectOffsets != 0) ) {
446 log_verbose("comparing %d offsets..\n", targ-junkout);
447 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
448 log_err("did not get the expected offsets. %s\n", gNuConvTestName);
449 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
450 log_err("\n");
451 log_err("Got : ");
452 for(p=junkout;p<targ;p++) {
453 log_err("%d,", junokout[p-junkout]);
454 }
455 log_err("\n");
456 log_err("Expected: ");
457 for(i=0; i<(targ-junkout); i++) {
458 log_err("%d,", expectOffsets[i]);
459 }
460 log_err("\n");
461 }
462 }
463
464 log_verbose("comparing..\n");
465 if(!memcmp(junkout, expect, expectLen)) {
466 log_verbose("Matches!\n");
467 return TC_OK;
468 } else {
469 log_err("String does not match u->%s\n", gNuConvTestName);
470 printUSeqErr(source, sourceLen);
471 printf("\nGot:");
472 printSeqErr((const unsigned char *)junkout, expectLen);
473 printf("\nExpected:");
474 printSeqErr((const unsigned char *)expect, expectLen);
475
476 return TC_MISMATCH;
477 }
478 }
479
480 /* Note: This function uses global variables and it will not do offset
481 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
482 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
483 const char *codepage, const int32_t *expectOffsets, UBool useFallback)
484 {
485 UErrorCode status = U_ZERO_ERROR;
486 UConverter *conv = 0;
487 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
488 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
489 const char *src;
490 const char *realSourceEnd;
491 const char *srcLimit;
492 UChar *p;
493 UChar *targ;
494 UChar *end;
495 int32_t *offs;
496 int i;
497 UBool checkOffsets = TRUE;
498
499 int32_t realBufferSize;
500 UChar *realBufferEnd;
501
502
503 for(i=0;i<NEW_MAX_BUFFER;i++)
504 junkout[i] = 0xFFFE;
505
506 for(i=0;i<NEW_MAX_BUFFER;i++)
507 junokout[i] = -1;
508
509 setNuConvTestName(codepage, "TO");
510
511 log_verbose("\n========= %s\n", gNuConvTestName);
512
513 conv = my_ucnv_open(codepage, &status);
514
515 if(U_FAILURE(status))
516 {
517 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
518 return TC_FAIL;
519 }
520 if(useFallback){
521 ucnv_setFallback(conv,useFallback);
522 }
523 log_verbose("Converter opened..\n");
524
525 src = (const char *)source;
526 targ = junkout;
527 offs = junokout;
528
529 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
530 realBufferEnd = junkout + realBufferSize;
531 realSourceEnd = src + sourcelen;
532
533 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
534 checkOffsets = FALSE;
535
536 do
537 {
538 end = nct_min( targ + gOutBufferSize, realBufferEnd);
539 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
540
541 if(targ == realBufferEnd)
542 {
543 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
544 return TC_FAIL;
545 }
546 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
547
548 /* oldTarg = targ; */
549
550 status = U_ZERO_ERROR;
551
552 ucnv_toUnicode (conv,
553 &targ,
554 end,
555 &src,
556 srcLimit,
557 checkOffsets ? offs : NULL,
558 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */
559 &status);
560
561 /* offs += (targ-oldTarg); */
562
563 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
564
565 if(U_FAILURE(status))
566 {
567 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
568 return TC_FAIL;
569 }
570
571 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
572 sourcelen, targ-junkout);
573 if(VERBOSITY)
574 {
575 char junk[9999];
576 char offset_str[9999];
577 UChar *ptr;
578
579 junk[0] = 0;
580 offset_str[0] = 0;
581
582 for(ptr = junkout;ptr<targ;ptr++)
583 {
584 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
585 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
586 }
587
588 log_verbose(junk);
589 printUSeq(expect, expectlen);
590 if ( checkOffsets )
591 {
592 log_verbose("\nOffsets:");
593 log_verbose(offset_str);
594 }
595 log_verbose("\n");
596 }
597 ucnv_close(conv);
598
599 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
600
601 if (checkOffsets && (expectOffsets != 0))
602 {
603 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
604 log_err("did not get the expected offsets. %s\n",gNuConvTestName);
605 log_err("Got: ");
606 for(p=junkout;p<targ;p++) {
607 log_err("%d,", junokout[p-junkout]);
608 }
609 log_err("\n");
610 log_err("Expected: ");
611 for(i=0; i<(targ-junkout); i++) {
612 log_err("%d,", expectOffsets[i]);
613 }
614 log_err("\n");
615 log_err("output: ");
616 for(i=0; i<(targ-junkout); i++) {
617 log_err("%X,", junkout[i]);
618 }
619 log_err("\n");
620 log_err("input: ");
621 for(i=0; i<(src-(const char *)source); i++) {
622 log_err("%X,", (unsigned char)source[i]);
623 }
624 log_err("\n");
625 }
626 }
627
628 if(!memcmp(junkout, expect, expectlen*2))
629 {
630 log_verbose("Matches!\n");
631 return TC_OK;
632 }
633 else
634 {
635 log_err("String does not match. %s\n", gNuConvTestName);
636 log_verbose("String does not match. %s\n", gNuConvTestName);
637 printf("\nGot:");
638 printUSeqErr(junkout, expectlen);
639 printf("\nExpected:");
640 printUSeqErr(expect, expectlen);
641 return TC_MISMATCH;
642 }
643 }
644
645
646 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
647 {
648 /** test chars #1 */
649 /* 1 2 3 1Han 2Han 3Han . */
650 static const UChar sampleText[] =
651 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E };
652
653
654 static const uint8_t expectedUTF8[] =
655 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
656 static const int32_t toUTF8Offs[] =
657 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07};
658 static const int32_t fmUTF8Offs[] =
659 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };
660
661 #ifdef U_ENABLE_GENERIC_ISO_2022
662 /* Same as UTF8, but with ^[%B preceeding */
663 static const const uint8_t expectedISO2022[] =
664 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
665 static const int32_t toISO2022Offs[] =
666 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
667 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
668 static const int32_t fmISO2022Offs[] =
669 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
670 #endif
671
672 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
673 static const uint8_t expectedIBM930[] =
674 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B };
675 static const int32_t toIBM930Offs[] =
676 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, };
677 static const int32_t fmIBM930Offs[] =
678 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c};
679
680 /* 1 2 3 0 h1 h2 h3 . MBCS*/
681 static const uint8_t expectedIBM943[] =
682 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e };
683 static const int32_t toIBM943Offs [] =
684 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07 };
685 static const int32_t fmIBM943Offs[] =
686 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a};
687
688 /* 1 2 3 0 h1 h2 h3 . DBCS*/
689 static const uint8_t expectedIBM9027[] =
690 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe};
691 static const int32_t toIBM9027Offs [] =
692 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
693
694 /* 1 2 3 0 <?> <?> <?> . SBCS*/
695 static const uint8_t expectedIBM920[] =
696 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e };
697 static const int32_t toIBM920Offs [] =
698 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
699
700 /* 1 2 3 0 <?> <?> <?> . SBCS*/
701 static const uint8_t expectedISO88593[] =
702 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E };
703 static const int32_t toISO88593Offs[] =
704 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
705
706 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
707 static const uint8_t expectedLATIN1[] =
708 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E };
709 static const int32_t toLATIN1Offs[] =
710 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
711
712
713 /* etc */
714 static const uint8_t expectedUTF16BE[] =
715 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e };
716 static const int32_t toUTF16BEOffs[]=
717 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
718 static const int32_t fmUTF16BEOffs[] =
719 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e };
720
721 static const uint8_t expectedUTF16LE[] =
722 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00 };
723 static const int32_t toUTF16LEOffs[]=
724 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
725 static const int32_t fmUTF16LEOffs[] =
726 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e };
727
728 static const uint8_t expectedUTF32BE[] =
729 { 0x00, 0x00, 0x00, 0x31,
730 0x00, 0x00, 0x00, 0x32,
731 0x00, 0x00, 0x00, 0x33,
732 0x00, 0x00, 0x00, 0x00,
733 0x00, 0x00, 0x4e, 0x00,
734 0x00, 0x00, 0x4e, 0x8c,
735 0x00, 0x00, 0x4e, 0x09,
736 0x00, 0x00, 0x00, 0x2e };
737 static const int32_t toUTF32BEOffs[]=
738 { 0x00, 0x00, 0x00, 0x00,
739 0x01, 0x01, 0x01, 0x01,
740 0x02, 0x02, 0x02, 0x02,
741 0x03, 0x03, 0x03, 0x03,
742 0x04, 0x04, 0x04, 0x04,
743 0x05, 0x05, 0x05, 0x05,
744 0x06, 0x06, 0x06, 0x06,
745 0x07, 0x07, 0x07, 0x07,
746 0x08, 0x08, 0x08, 0x08 };
747 static const int32_t fmUTF32BEOffs[] =
748 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c };
749
750 static const uint8_t expectedUTF32LE[] =
751 { 0x31, 0x00, 0x00, 0x00,
752 0x32, 0x00, 0x00, 0x00,
753 0x33, 0x00, 0x00, 0x00,
754 0x00, 0x00, 0x00, 0x00,
755 0x00, 0x4e, 0x00, 0x00,
756 0x8c, 0x4e, 0x00, 0x00,
757 0x09, 0x4e, 0x00, 0x00,
758 0x2e, 0x00, 0x00, 0x00 };
759 static const int32_t toUTF32LEOffs[]=
760 { 0x00, 0x00, 0x00, 0x00,
761 0x01, 0x01, 0x01, 0x01,
762 0x02, 0x02, 0x02, 0x02,
763 0x03, 0x03, 0x03, 0x03,
764 0x04, 0x04, 0x04, 0x04,
765 0x05, 0x05, 0x05, 0x05,
766 0x06, 0x06, 0x06, 0x06,
767 0x07, 0x07, 0x07, 0x07,
768 0x08, 0x08, 0x08, 0x08 };
769 static const int32_t fmUTF32LEOffs[] =
770 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c };
771
772
773
774
775 /** Test chars #2 **/
776
777 /* Sahha [health], slashed h's */
778 static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
779 static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
780
781 /* LMBCS */
782 static const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
783 static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
784 static const int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
785 static const int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
786 /*********************************** START OF CODE finally *************/
787
788 gInBufferSize = insize;
789 gOutBufferSize = outsize;
790
791 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
792
793
794 /*UTF-8*/
795 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
796 expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE );
797
798 log_verbose("Test surrogate behaviour for UTF8\n");
799 {
800 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
801 static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
802 0xf0, 0x90, 0x90, 0x81,
803 0xef, 0xbf, 0xbd
804 };
805 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
806 testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]),
807 expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE );
808
809
810 }
811
812 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
813 /*ISO-2022*/
814 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
815 expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE );
816 #endif
817
818 /*UTF16 LE*/
819 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
820 expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE );
821 /*UTF16 BE*/
822 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
823 expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE );
824 /*UTF32 LE*/
825 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
826 expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE );
827 /*UTF32 BE*/
828 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
829 expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE );
830
831 /*LATIN_1*/
832 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
833 expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE );
834
835 #if !UCONFIG_NO_LEGACY_CONVERSION
836 /*EBCDIC_STATEFUL*/
837 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
838 expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE );
839
840 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
841 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
842
843 /*MBCS*/
844
845 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
846 expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE );
847 /*DBCS*/
848 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
849 expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE );
850 /*SBCS*/
851 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
852 expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE );
853 /*SBCS*/
854 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
855 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
856 #endif
857
858
859 /****/
860
861 /*UTF-8*/
862 testConvertToU(expectedUTF8, sizeof(expectedUTF8),
863 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE);
864 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
865 /*ISO-2022*/
866 testConvertToU(expectedISO2022, sizeof(expectedISO2022),
867 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE);
868 #endif
869
870 /*UTF16 LE*/
871 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
872 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
873 /*UTF16 BE*/
874 testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
875 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE);
876 /*UTF32 LE*/
877 testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
878 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE);
879 /*UTF32 BE*/
880 testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
881 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE);
882
883 #if !UCONFIG_NO_LEGACY_CONVERSION
884 /*EBCDIC_STATEFUL*/
885 testConvertToU(expectedIBM930, sizeof(expectedIBM930),
886 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-930", fmIBM930Offs,FALSE);
887 /*MBCS*/
888 testConvertToU(expectedIBM943, sizeof(expectedIBM943),
889 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-943", fmIBM943Offs,FALSE);
890 #endif
891
892 /* Try it again to make sure it still works */
893 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
894 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
895
896 #if !UCONFIG_NO_LEGACY_CONVERSION
897 testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
898 malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE);
899
900 testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]),
901 expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE );
902
903 /*LMBCS*/
904 testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]),
905 expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE );
906 testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
907 LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE);
908 #endif
909
910 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
911 {
912 /* encode directly set D and set O */
913 static const uint8_t utf7[] = {
914 /*
915 Hi Mom -+Jjo--!
916 A+ImIDkQ.
917 +-
918 +ZeVnLIqe
919 */
920 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
921 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
922 0x2b, 0x2d,
923 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
924 };
925 static const UChar unicode[] = {
926 /*
927 Hi Mom -<WHITE SMILING FACE>-!
928 A<NOT IDENTICAL TO><ALPHA>.
929 +
930 [Japanese word "nihongo"]
931 */
932 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
933 0x41, 0x2262, 0x0391, 0x2e,
934 0x2b,
935 0x65e5, 0x672c, 0x8a9e
936 };
937 static const int32_t toUnicodeOffsets[] = {
938 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
939 15, 17, 19, 23,
940 24,
941 27, 29, 32
942 };
943 static const int32_t fromUnicodeOffsets[] = {
944 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
945 11, 12, 12, 12, 13, 13, 13, 13, 14,
946 15, 15,
947 16, 16, 16, 17, 17, 17, 18, 18, 18
948 };
949
950 /* same but escaping set O (the exclamation mark) */
951 static const uint8_t utf7Restricted[] = {
952 /*
953 Hi Mom -+Jjo--+ACE-
954 A+ImIDkQ.
955 +-
956 +ZeVnLIqe
957 */
958 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
959 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
960 0x2b, 0x2d,
961 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
962 };
963 static const int32_t toUnicodeOffsetsR[] = {
964 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
965 19, 21, 23, 27,
966 28,
967 31, 33, 36
968 };
969 static const int32_t fromUnicodeOffsetsR[] = {
970 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
971 11, 12, 12, 12, 13, 13, 13, 13, 14,
972 15, 15,
973 16, 16, 16, 17, 17, 17, 18, 18, 18
974 };
975
976 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
977
978 testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE);
979
980 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
981
982 testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
983 }
984
985 /*
986 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
987 * modified according to RFC 2060,
988 * and supplemented with the one example in RFC 2060 itself.
989 */
990 {
991 static const uint8_t imap[] = {
992 /* Hi Mom -&Jjo--!
993 A&ImIDkQ-.
994 &-
995 &ZeVnLIqe-
996 \
997 ~peter
998 /mail
999 /&ZeVnLIqe-
1000 /&U,BTFw-
1001 */
1002 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1003 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1004 0x26, 0x2d,
1005 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1006 0x5c,
1007 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1008 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1009 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1010 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1011 };
1012 static const UChar unicode[] = {
1013 /* Hi Mom -<WHITE SMILING FACE>-!
1014 A<NOT IDENTICAL TO><ALPHA>.
1015 &
1016 [Japanese word "nihongo"]
1017 \
1018 ~peter
1019 /mail
1020 /<65e5, 672c, 8a9e>
1021 /<53f0, 5317>
1022 */
1023 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1024 0x41, 0x2262, 0x0391, 0x2e,
1025 0x26,
1026 0x65e5, 0x672c, 0x8a9e,
1027 0x5c,
1028 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1029 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1030 0x2f, 0x65e5, 0x672c, 0x8a9e,
1031 0x2f, 0x53f0, 0x5317
1032 };
1033 static const int32_t toUnicodeOffsets[] = {
1034 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1035 15, 17, 19, 24,
1036 25,
1037 28, 30, 33,
1038 37,
1039 38, 39, 40, 41, 42, 43,
1040 44, 45, 46, 47, 48,
1041 49, 51, 53, 56,
1042 60, 62, 64
1043 };
1044 static const int32_t fromUnicodeOffsets[] = {
1045 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1046 11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1047 15, 15,
1048 16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1049 19,
1050 20, 21, 22, 23, 24, 25,
1051 26, 27, 28, 29, 30,
1052 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1053 35, 36, 36, 36, 37, 37, 37, 37, 37
1054 };
1055
1056 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
1057
1058 testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
1059 }
1060
1061 /* Test UTF-8 bad data handling*/
1062 {
1063 static const uint8_t utf8[]={
1064 0x61,
1065 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1066 0x00,
1067 0x62,
1068 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1069 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1070 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */
1071 0xdf, 0xbf, /* 7ff */
1072 0xbf, /* truncated tail */
1073 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */
1074 0x02
1075 };
1076
1077 static const uint16_t utf8Expected[]={
1078 0x0061,
1079 0xfffd,
1080 0x0000,
1081 0x0062,
1082 0xfffd,
1083 0xfffd,
1084 0xdbff, 0xdfff,
1085 0x07ff,
1086 0xfffd,
1087 0xfffd,
1088 0x0002
1089 };
1090
1091 static const int32_t utf8Offsets[]={
1092 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28
1093 };
1094 testConvertToU(utf8, sizeof(utf8),
1095 utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE);
1096
1097 }
1098
1099 /* Test UTF-32BE bad data handling*/
1100 {
1101 static const uint8_t utf32[]={
1102 0x00, 0x00, 0x00, 0x61,
1103 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
1104 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1105 0x00, 0x00, 0x00, 0x62,
1106 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1107 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
1108 0x00, 0x00, 0x01, 0x62,
1109 0x00, 0x00, 0x02, 0x62
1110 };
1111 static const uint16_t utf32Expected[]={
1112 0x0061,
1113 0xfffd, /* 0x110000 out of range */
1114 0xDBFF, /* 0x10FFFF in range */
1115 0xDFFF,
1116 0x0062,
1117 0xfffd, /* 0xffffffff out of range */
1118 0xfffd, /* 0x7fffffff out of range */
1119 0x0162,
1120 0x0262
1121 };
1122 static const int32_t utf32Offsets[]={
1123 0, 4, 8, 8, 12, 16, 20, 24, 28
1124 };
1125 static const uint8_t utf32ExpectedBack[]={
1126 0x00, 0x00, 0x00, 0x61,
1127 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */
1128 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1129 0x00, 0x00, 0x00, 0x62,
1130 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */
1131 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */
1132 0x00, 0x00, 0x01, 0x62,
1133 0x00, 0x00, 0x02, 0x62
1134 };
1135 static const int32_t utf32OffsetsBack[]={
1136 0,0,0,0,
1137 1,1,1,1,
1138 2,2,2,2,
1139 4,4,4,4,
1140 5,5,5,5,
1141 6,6,6,6,
1142 7,7,7,7,
1143 8,8,8,8
1144 };
1145
1146 testConvertToU(utf32, sizeof(utf32),
1147 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE);
1148 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]),
1149 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE);
1150 }
1151
1152 /* Test UTF-32LE bad data handling*/
1153 {
1154 static const uint8_t utf32[]={
1155 0x61, 0x00, 0x00, 0x00,
1156 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
1157 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1158 0x62, 0x00, 0x00, 0x00,
1159 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1160 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
1161 0x62, 0x01, 0x00, 0x00,
1162 0x62, 0x02, 0x00, 0x00,
1163 };
1164
1165 static const uint16_t utf32Expected[]={
1166 0x0061,
1167 0xfffd, /* 0x110000 out of range */
1168 0xDBFF, /* 0x10FFFF in range */
1169 0xDFFF,
1170 0x0062,
1171 0xfffd, /* 0xffffffff out of range */
1172 0xfffd, /* 0x7fffffff out of range */
1173 0x0162,
1174 0x0262
1175 };
1176 static const int32_t utf32Offsets[]={
1177 0, 4, 8, 8, 12, 16, 20, 24, 28
1178 };
1179 static const uint8_t utf32ExpectedBack[]={
1180 0x61, 0x00, 0x00, 0x00,
1181 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */
1182 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1183 0x62, 0x00, 0x00, 0x00,
1184 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */
1185 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */
1186 0x62, 0x01, 0x00, 0x00,
1187 0x62, 0x02, 0x00, 0x00
1188 };
1189 static const int32_t utf32OffsetsBack[]={
1190 0,0,0,0,
1191 1,1,1,1,
1192 2,2,2,2,
1193 4,4,4,4,
1194 5,5,5,5,
1195 6,6,6,6,
1196 7,7,7,7,
1197 8,8,8,8
1198 };
1199 testConvertToU(utf32, sizeof(utf32),
1200 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE );
1201 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]),
1202 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE);
1203 }
1204 }
1205
1206 static void TestCoverageMBCS(){
1207 #if 0
1208 UErrorCode status = U_ZERO_ERROR;
1209 const char *directory = loadTestData(&status);
1210 char* tdpath = NULL;
1211 char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1212 int len = strlen(directory);
1213 char* index=NULL;
1214
1215 tdpath = (char*) malloc(sizeof(char) * (len * 2));
1216 uprv_strcpy(saveDirectory,u_getDataDirectory());
1217 log_verbose("Retrieved data directory %s \n",saveDirectory);
1218 uprv_strcpy(tdpath,directory);
1219 index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
1220
1221 if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
1222 *(index+1)=0;
1223 }
1224 u_setDataDirectory(tdpath);
1225 log_verbose("ICU data directory is set to: %s \n" ,tdpath);
1226 #endif
1227
1228 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm
1229 which is test file for MBCS conversion with single-byte codepage data.*/
1230 {
1231
1232 /* MBCS with single byte codepage data test1.ucm*/
1233 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1234 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1235 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, };
1236
1237 /*from Unicode*/
1238 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1239 expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
1240 }
1241
1242 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm
1243 which is test file for MBCS conversion with three-byte codepage data.*/
1244 {
1245
1246 /* MBCS with three byte codepage data test3.ucm*/
1247 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1248 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,};
1249 int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1250
1251 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1252 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1253 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1254
1255 /*from Unicode*/
1256 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1257 expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE );
1258
1259 /*to Unicode*/
1260 testConvertToU(test3input, sizeof(test3input),
1261 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE);
1262
1263 }
1264
1265 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm
1266 which is test file for MBCS conversion with four-byte codepage data.*/
1267 {
1268
1269 /* MBCS with three byte codepage data test4.ucm*/
1270 static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1271 static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,};
1272 static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1273
1274 static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1275 static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1276 static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1277
1278 /*from Unicode*/
1279 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1280 expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE );
1281
1282 /*to Unicode*/
1283 testConvertToU(test4input, sizeof(test4input),
1284 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE );
1285
1286 }
1287 #if 0
1288 free(tdpath);
1289 /* restore the original data directory */
1290 log_verbose("Setting the data directory to %s \n", saveDirectory);
1291 u_setDataDirectory(saveDirectory);
1292 free(saveDirectory);
1293 #endif
1294
1295 }
1296
1297 static void TestConverterType(const char *convName, UConverterType convType) {
1298 UConverter* myConverter;
1299 UErrorCode err = U_ZERO_ERROR;
1300
1301 myConverter = my_ucnv_open(convName, &err);
1302
1303 if (U_FAILURE(err)) {
1304 log_data_err("Failed to create an %s converter\n", convName);
1305 return;
1306 }
1307 else
1308 {
1309 if (ucnv_getType(myConverter)!=convType) {
1310 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1311 convName, convType);
1312 }
1313 else {
1314 log_verbose("ucnv_getType %s ok\n", convName);
1315 }
1316 }
1317 ucnv_close(myConverter);
1318 }
1319
1320 static void TestConverterTypesAndStarters()
1321 {
1322 #if !UCONFIG_NO_LEGACY_CONVERSION
1323 UConverter* myConverter;
1324 UErrorCode err = U_ZERO_ERROR;
1325 UBool mystarters[256];
1326
1327 /* const UBool expectedKSCstarters[256] = {
1328 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1329 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1330 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1331 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1332 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1333 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1334 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1335 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1336 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1337 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1338 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1339 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1340 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1341 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1342 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1343 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1344 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1345 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1346 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1347 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1348 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1349 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1350 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1351 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1352 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1353 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1354
1355
1356 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types.");
1357
1358 myConverter = ucnv_open("ksc", &err);
1359 if (U_FAILURE(err)) {
1360 log_data_err("Failed to create an ibm-ksc converter\n");
1361 return;
1362 }
1363 else
1364 {
1365 if (ucnv_getType(myConverter)!=UCNV_MBCS)
1366 log_err("ucnv_getType Failed for ibm-949\n");
1367 else
1368 log_verbose("ucnv_getType ibm-949 ok\n");
1369
1370 if(myConverter!=NULL)
1371 ucnv_getStarters(myConverter, mystarters, &err);
1372
1373 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1374 log_err("Failed ucnv_getStarters for ksc\n");
1375 else
1376 log_verbose("ucnv_getStarters ok\n");*/
1377
1378 }
1379 ucnv_close(myConverter);
1380
1381 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
1382 TestConverterType("ibm-878", UCNV_SBCS);
1383 #endif
1384
1385 TestConverterType("iso-8859-1", UCNV_LATIN_1);
1386
1387 TestConverterType("ibm-1208", UCNV_UTF8);
1388
1389 TestConverterType("utf-8", UCNV_UTF8);
1390 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
1391 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
1392 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
1393 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
1394
1395 #if !UCONFIG_NO_LEGACY_CONVERSION
1396
1397 #if defined(U_ENABLE_GENERIC_ISO_2022)
1398 TestConverterType("iso-2022", UCNV_ISO_2022);
1399 #endif
1400
1401 TestConverterType("hz", UCNV_HZ);
1402 #endif
1403
1404 TestConverterType("scsu", UCNV_SCSU);
1405
1406 #if !UCONFIG_NO_LEGACY_CONVERSION
1407 TestConverterType("x-iscii-de", UCNV_ISCII);
1408 #endif
1409
1410 TestConverterType("ascii", UCNV_US_ASCII);
1411 TestConverterType("utf-7", UCNV_UTF7);
1412 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
1413 TestConverterType("bocu-1", UCNV_BOCU1);
1414 }
1415
1416 static void
1417 TestAmbiguousConverter(UConverter *cnv) {
1418 static const char inBytes[2]={ 0x61, 0x5c };
1419 UChar outUnicode[20]={ 0, 0, 0, 0 };
1420
1421 const char *s;
1422 UChar *u;
1423 UErrorCode errorCode;
1424 UBool isAmbiguous;
1425
1426 /* try to convert an 'a' and a US-ASCII backslash */
1427 errorCode=U_ZERO_ERROR;
1428 s=inBytes;
1429 u=outUnicode;
1430 ucnv_toUnicode(cnv, &u, u+20, &s, s+2, NULL, TRUE, &errorCode);
1431 if(U_FAILURE(errorCode)) {
1432 /* we do not care about general failures in this test; the input may just not be mappable */
1433 return;
1434 }
1435
1436 if(outUnicode[0]!=0x61 || outUnicode[1]==0xfffd) {
1437 /* not an ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1438 return;
1439 }
1440
1441 isAmbiguous=ucnv_isAmbiguous(cnv);
1442
1443 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1444 if((outUnicode[1]!=0x5c)!=isAmbiguous) {
1445 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1446 ucnv_getName(cnv, &errorCode), outUnicode[1]!=0x5c, isAmbiguous);
1447 return;
1448 }
1449
1450 if(outUnicode[1]!=0x5c) {
1451 /* needs fixup, fix it */
1452 ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
1453 if(outUnicode[1]!=0x5c) {
1454 /* the fix failed */
1455 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
1456 return;
1457 }
1458 }
1459 }
1460
1461 static void TestAmbiguous()
1462 {
1463 UErrorCode status = U_ZERO_ERROR;
1464 UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
1465 static const char target[] = {
1466 /* "\\usr\\local\\share\\data\\icutest.txt" */
1467 0x5c, 0x75, 0x73, 0x72,
1468 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1469 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1470 0x5c, 0x64, 0x61, 0x74, 0x61,
1471 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1472 0
1473 };
1474 UChar asciiResult[200], sjisResult[200];
1475 int32_t asciiLength = 0, sjisLength = 0, i;
1476 const char *name;
1477
1478 /* enumerate all converters */
1479 status=U_ZERO_ERROR;
1480 for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
1481 cnv=ucnv_open(name, &status);
1482 if(U_SUCCESS(status)) {
1483 TestAmbiguousConverter(cnv);
1484 ucnv_close(cnv);
1485 } else {
1486 log_err("error: unable to open available converter \"%s\"\n", name);
1487 status=U_ZERO_ERROR;
1488 }
1489 }
1490
1491 #if !UCONFIG_NO_LEGACY_CONVERSION
1492 sjis_cnv = ucnv_open("ibm-943", &status);
1493 if (U_FAILURE(status))
1494 {
1495 log_data_err("Failed to create a SJIS converter\n");
1496 return;
1497 }
1498 ascii_cnv = ucnv_open("LATIN-1", &status);
1499 if (U_FAILURE(status))
1500 {
1501 log_data_err("Failed to create a LATIN-1 converter\n");
1502 ucnv_close(sjis_cnv);
1503 return;
1504 }
1505 /* convert target from SJIS to Unicode */
1506 sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status);
1507 if (U_FAILURE(status))
1508 {
1509 log_err("Failed to convert the SJIS string.\n");
1510 ucnv_close(sjis_cnv);
1511 ucnv_close(ascii_cnv);
1512 return;
1513 }
1514 /* convert target from Latin-1 to Unicode */
1515 asciiLength = ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status);
1516 if (U_FAILURE(status))
1517 {
1518 log_err("Failed to convert the Latin-1 string.\n");
1519 free(sjisResult);
1520 ucnv_close(sjis_cnv);
1521 ucnv_close(ascii_cnv);
1522 return;
1523 }
1524 if (!ucnv_isAmbiguous(sjis_cnv))
1525 {
1526 log_err("SJIS converter should contain ambiguous character mappings.\n");
1527 free(sjisResult);
1528 free(asciiResult);
1529 ucnv_close(sjis_cnv);
1530 ucnv_close(ascii_cnv);
1531 return;
1532 }
1533 if (u_strcmp(sjisResult, asciiResult) == 0)
1534 {
1535 log_err("File separators for SJIS don't need to be fixed.\n");
1536 }
1537 ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
1538 if (u_strcmp(sjisResult, asciiResult) != 0)
1539 {
1540 log_err("Fixing file separator for SJIS failed.\n");
1541 }
1542 ucnv_close(sjis_cnv);
1543 ucnv_close(ascii_cnv);
1544 #endif
1545 }
1546
1547 static void
1548 TestSignatureDetection(){
1549 /* with null terminated strings */
1550 {
1551 static const char* data[] = {
1552 "\xFE\xFF\x00\x00", /* UTF-16BE */
1553 "\xFF\xFE\x00\x00", /* UTF-16LE */
1554 "\xEF\xBB\xBF\x00", /* UTF-8 */
1555 "\x0E\xFE\xFF\x00", /* SCSU */
1556
1557 "\xFE\xFF", /* UTF-16BE */
1558 "\xFF\xFE", /* UTF-16LE */
1559 "\xEF\xBB\xBF", /* UTF-8 */
1560 "\x0E\xFE\xFF", /* SCSU */
1561
1562 "\xFE\xFF\x41\x42", /* UTF-16BE */
1563 "\xFF\xFE\x41\x41", /* UTF-16LE */
1564 "\xEF\xBB\xBF\x41", /* UTF-8 */
1565 "\x0E\xFE\xFF\x41", /* SCSU */
1566
1567 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */
1568 "\x2B\x2F\x76\x38\x41", /* UTF-7 */
1569 "\x2B\x2F\x76\x39\x41", /* UTF-7 */
1570 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */
1571 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */
1572
1573 "\xDD\x73\x66\x73" /* UTF-EBCDIC */
1574 };
1575 static const char* expected[] = {
1576 "UTF-16BE",
1577 "UTF-16LE",
1578 "UTF-8",
1579 "SCSU",
1580
1581 "UTF-16BE",
1582 "UTF-16LE",
1583 "UTF-8",
1584 "SCSU",
1585
1586 "UTF-16BE",
1587 "UTF-16LE",
1588 "UTF-8",
1589 "SCSU",
1590
1591 "UTF-7",
1592 "UTF-7",
1593 "UTF-7",
1594 "UTF-7",
1595 "UTF-7",
1596 "UTF-EBCDIC"
1597 };
1598 static const int32_t expectedLength[] ={
1599 2,
1600 2,
1601 3,
1602 3,
1603
1604 2,
1605 2,
1606 3,
1607 3,
1608
1609 2,
1610 2,
1611 3,
1612 3,
1613
1614 5,
1615 4,
1616 4,
1617 4,
1618 4,
1619 4
1620 };
1621 int i=0;
1622 UErrorCode err;
1623 int32_t signatureLength = -1;
1624 const char* source = NULL;
1625 const char* enc = NULL;
1626 for( ; i<sizeof(data)/sizeof(char*); i++){
1627 err = U_ZERO_ERROR;
1628 source = data[i];
1629 enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
1630 if(U_FAILURE(err)){
1631 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1632 continue;
1633 }
1634 if(enc == NULL || strcmp(enc,expected[i]) !=0){
1635 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1636 continue;
1637 }
1638 if(signatureLength != expectedLength[i]){
1639 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1640 }
1641 }
1642 }
1643 {
1644 static const char* data[] = {
1645 "\xFE\xFF\x00", /* UTF-16BE */
1646 "\xFF\xFE\x00", /* UTF-16LE */
1647 "\xEF\xBB\xBF\x00", /* UTF-8 */
1648 "\x0E\xFE\xFF\x00", /* SCSU */
1649 "\x00\x00\xFE\xFF", /* UTF-32BE */
1650 "\xFF\xFE\x00\x00", /* UTF-32LE */
1651 "\xFE\xFF", /* UTF-16BE */
1652 "\xFF\xFE", /* UTF-16LE */
1653 "\xEF\xBB\xBF", /* UTF-8 */
1654 "\x0E\xFE\xFF", /* SCSU */
1655 "\x00\x00\xFE\xFF", /* UTF-32BE */
1656 "\xFF\xFE\x00\x00", /* UTF-32LE */
1657 "\xFE\xFF\x41\x42", /* UTF-16BE */
1658 "\xFF\xFE\x41\x41", /* UTF-16LE */
1659 "\xEF\xBB\xBF\x41", /* UTF-8 */
1660 "\x0E\xFE\xFF\x41", /* SCSU */
1661 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1662 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1663 "\xFB\xEE\x28", /* BOCU-1 */
1664 "\xFF\x41\x42" /* NULL */
1665 };
1666 static const int len[] = {
1667 3,
1668 3,
1669 4,
1670 4,
1671 4,
1672 4,
1673 2,
1674 2,
1675 3,
1676 3,
1677 4,
1678 4,
1679 4,
1680 4,
1681 4,
1682 4,
1683 5,
1684 5,
1685 3,
1686 3
1687 };
1688
1689 static const char* expected[] = {
1690 "UTF-16BE",
1691 "UTF-16LE",
1692 "UTF-8",
1693 "SCSU",
1694 "UTF-32BE",
1695 "UTF-32LE",
1696 "UTF-16BE",
1697 "UTF-16LE",
1698 "UTF-8",
1699 "SCSU",
1700 "UTF-32BE",
1701 "UTF-32LE",
1702 "UTF-16BE",
1703 "UTF-16LE",
1704 "UTF-8",
1705 "SCSU",
1706 "UTF-32BE",
1707 "UTF-32LE",
1708 "BOCU-1",
1709 NULL
1710 };
1711 static const int32_t expectedLength[] ={
1712 2,
1713 2,
1714 3,
1715 3,
1716 4,
1717 4,
1718 2,
1719 2,
1720 3,
1721 3,
1722 4,
1723 4,
1724 2,
1725 2,
1726 3,
1727 3,
1728 4,
1729 4,
1730 3,
1731 0
1732 };
1733 int i=0;
1734 UErrorCode err;
1735 int32_t signatureLength = -1;
1736 int32_t sourceLength=-1;
1737 const char* source = NULL;
1738 const char* enc = NULL;
1739 for( ; i<sizeof(data)/sizeof(char*); i++){
1740 err = U_ZERO_ERROR;
1741 source = data[i];
1742 sourceLength = len[i];
1743 enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
1744 if(U_FAILURE(err)){
1745 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1746 continue;
1747 }
1748 if(enc == NULL || strcmp(enc,expected[i]) !=0){
1749 if(expected[i] !=NULL){
1750 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1751 continue;
1752 }
1753 }
1754 if(signatureLength != expectedLength[i]){
1755 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1756 }
1757 }
1758 }
1759 }
1760
1761 void
1762 static TestUTF7() {
1763 /* test input */
1764 static const uint8_t in[]={
1765 /* H - +Jjo- - ! +- +2AHcAQ */
1766 0x48,
1767 0x2d,
1768 0x2b, 0x4a, 0x6a, 0x6f,
1769 0x2d, 0x2d,
1770 0x21,
1771 0x2b, 0x2d,
1772 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1773 };
1774
1775 /* expected test results */
1776 static const int32_t results[]={
1777 /* number of bytes read, code point */
1778 1, 0x48,
1779 1, 0x2d,
1780 4, 0x263a, /* <WHITE SMILING FACE> */
1781 2, 0x2d,
1782 1, 0x21,
1783 2, 0x2b,
1784 7, 0x10401
1785 };
1786
1787 const char *cnvName;
1788 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1789 UErrorCode errorCode=U_ZERO_ERROR;
1790 UConverter *cnv=ucnv_open("UTF-7", &errorCode);
1791 if(U_FAILURE(errorCode)) {
1792 log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
1793 return;
1794 }
1795 TestNextUChar(cnv, source, limit, results, "UTF-7");
1796 /* Test the condition when source >= sourceLimit */
1797 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1798 cnvName = ucnv_getName(cnv, &errorCode);
1799 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
1800 log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1801 }
1802 ucnv_close(cnv);
1803 }
1804
1805 void
1806 static TestIMAP() {
1807 /* test input */
1808 static const uint8_t in[]={
1809 /* H - &Jjo- - ! &- &2AHcAQ- \ */
1810 0x48,
1811 0x2d,
1812 0x26, 0x4a, 0x6a, 0x6f,
1813 0x2d, 0x2d,
1814 0x21,
1815 0x26, 0x2d,
1816 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1817 };
1818
1819 /* expected test results */
1820 static const int32_t results[]={
1821 /* number of bytes read, code point */
1822 1, 0x48,
1823 1, 0x2d,
1824 4, 0x263a, /* <WHITE SMILING FACE> */
1825 2, 0x2d,
1826 1, 0x21,
1827 2, 0x26,
1828 7, 0x10401
1829 };
1830
1831 const char *cnvName;
1832 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1833 UErrorCode errorCode=U_ZERO_ERROR;
1834 UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
1835 if(U_FAILURE(errorCode)) {
1836 log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
1837 return;
1838 }
1839 TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
1840 /* Test the condition when source >= sourceLimit */
1841 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1842 cnvName = ucnv_getName(cnv, &errorCode);
1843 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
1844 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1845 }
1846 ucnv_close(cnv);
1847 }
1848
1849 void
1850 static TestUTF8() {
1851 /* test input */
1852 static const uint8_t in[]={
1853 0x61,
1854 0xc2, 0x80,
1855 0xe0, 0xa0, 0x80,
1856 0xf0, 0x90, 0x80, 0x80,
1857 0xf4, 0x84, 0x8c, 0xa1,
1858 0xf0, 0x90, 0x90, 0x81
1859 };
1860
1861 /* expected test results */
1862 static const int32_t results[]={
1863 /* number of bytes read, code point */
1864 1, 0x61,
1865 2, 0x80,
1866 3, 0x800,
1867 4, 0x10000,
1868 4, 0x104321,
1869 4, 0x10401
1870 };
1871
1872 /* error test input */
1873 static const uint8_t in2[]={
1874 0x61,
1875 0xc0, 0x80, /* illegal non-shortest form */
1876 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1877 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1878 0xc0, 0xc0, /* illegal trail byte */
1879 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1880 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1881 0xfe, /* illegal byte altogether */
1882 0x62
1883 };
1884
1885 /* expected error test results */
1886 static const int32_t results2[]={
1887 /* number of bytes read, code point */
1888 1, 0x61,
1889 22, 0x62
1890 };
1891
1892 UConverterToUCallback cb;
1893 const void *p;
1894
1895 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1896 UErrorCode errorCode=U_ZERO_ERROR;
1897 UConverter *cnv=ucnv_open("UTF-8", &errorCode);
1898 if(U_FAILURE(errorCode)) {
1899 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
1900 return;
1901 }
1902 TestNextUChar(cnv, source, limit, results, "UTF-8");
1903 /* Test the condition when source >= sourceLimit */
1904 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1905
1906 /* test error behavior with a skip callback */
1907 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1908 source=(const char *)in2;
1909 limit=(const char *)(in2+sizeof(in2));
1910 TestNextUChar(cnv, source, limit, results2, "UTF-8");
1911
1912 ucnv_close(cnv);
1913 }
1914
1915 void
1916 static TestCESU8() {
1917 /* test input */
1918 static const uint8_t in[]={
1919 0x61,
1920 0xc2, 0x80,
1921 0xe0, 0xa0, 0x80,
1922 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1923 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1924 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1925 0xef, 0xbf, 0xbc
1926 };
1927
1928 /* expected test results */
1929 static const int32_t results[]={
1930 /* number of bytes read, code point */
1931 1, 0x61,
1932 2, 0x80,
1933 3, 0x800,
1934 6, 0x10000,
1935 3, 0xdc01,
1936 -1,0xd802, /* may read 3 or 6 bytes */
1937 -1,0x10ffff,/* may read 0 or 3 bytes */
1938 3, 0xfffc
1939 };
1940
1941 /* error test input */
1942 static const uint8_t in2[]={
1943 0x61,
1944 0xc0, 0x80, /* illegal non-shortest form */
1945 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1946 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1947 0xc0, 0xc0, /* illegal trail byte */
1948 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */
1949 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */
1950 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */
1951 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1952 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1953 0xfe, /* illegal byte altogether */
1954 0x62
1955 };
1956
1957 /* expected error test results */
1958 static const int32_t results2[]={
1959 /* number of bytes read, code point */
1960 1, 0x61,
1961 34, 0x62
1962 };
1963
1964 UConverterToUCallback cb;
1965 const void *p;
1966
1967 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1968 UErrorCode errorCode=U_ZERO_ERROR;
1969 UConverter *cnv=ucnv_open("CESU-8", &errorCode);
1970 if(U_FAILURE(errorCode)) {
1971 log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
1972 return;
1973 }
1974 TestNextUChar(cnv, source, limit, results, "CESU-8");
1975 /* Test the condition when source >= sourceLimit */
1976 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1977
1978 /* test error behavior with a skip callback */
1979 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1980 source=(const char *)in2;
1981 limit=(const char *)(in2+sizeof(in2));
1982 TestNextUChar(cnv, source, limit, results2, "CESU-8");
1983
1984 ucnv_close(cnv);
1985 }
1986
1987 void
1988 static TestUTF16() {
1989 /* test input */
1990 static const uint8_t in1[]={
1991 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
1992 };
1993 static const uint8_t in2[]={
1994 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
1995 };
1996 static const uint8_t in3[]={
1997 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
1998 };
1999
2000 /* expected test results */
2001 static const int32_t results1[]={
2002 /* number of bytes read, code point */
2003 4, 0x4e00,
2004 2, 0xfeff
2005 };
2006 static const int32_t results2[]={
2007 /* number of bytes read, code point */
2008 4, 0x004e,
2009 2, 0xfffe
2010 };
2011 static const int32_t results3[]={
2012 /* number of bytes read, code point */
2013 2, 0xfefe,
2014 2, 0x4e00,
2015 2, 0xfeff,
2016 4, 0x20001
2017 };
2018
2019 const char *source, *limit;
2020
2021 UErrorCode errorCode=U_ZERO_ERROR;
2022 UConverter *cnv=ucnv_open("UTF-16", &errorCode);
2023 if(U_FAILURE(errorCode)) {
2024 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
2025 return;
2026 }
2027
2028 source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2029 TestNextUChar(cnv, source, limit, results1, "UTF-16");
2030
2031 source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2032 ucnv_resetToUnicode(cnv);
2033 TestNextUChar(cnv, source, limit, results2, "UTF-16");
2034
2035 source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2036 ucnv_resetToUnicode(cnv);
2037 TestNextUChar(cnv, source, limit, results3, "UTF-16");
2038
2039 /* Test the condition when source >= sourceLimit */
2040 ucnv_resetToUnicode(cnv);
2041 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2042
2043 ucnv_close(cnv);
2044 }
2045
2046 void
2047 static TestUTF16BE() {
2048 /* test input */
2049 static const uint8_t in[]={
2050 0x00, 0x61,
2051 0x00, 0xc0,
2052 0x00, 0x31,
2053 0x00, 0xf4,
2054 0xce, 0xfe,
2055 0xd8, 0x01, 0xdc, 0x01
2056 };
2057
2058 /* expected test results */
2059 static const int32_t results[]={
2060 /* number of bytes read, code point */
2061 2, 0x61,
2062 2, 0xc0,
2063 2, 0x31,
2064 2, 0xf4,
2065 2, 0xcefe,
2066 4, 0x10401
2067 };
2068
2069 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2070 UErrorCode errorCode=U_ZERO_ERROR;
2071 UConverter *cnv=ucnv_open("utf-16be", &errorCode);
2072 if(U_FAILURE(errorCode)) {
2073 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
2074 return;
2075 }
2076 TestNextUChar(cnv, source, limit, results, "UTF-16BE");
2077 /* Test the condition when source >= sourceLimit */
2078 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2079 /*Test for the condition where there is an invalid character*/
2080 {
2081 static const uint8_t source2[]={0x61};
2082 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2083 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2084 }
2085 #if 0
2086 /*
2087 * Test disabled because currently the UTF-16BE/LE converters are supposed
2088 * to not set errors for unpaired surrogates.
2089 * This may change with
2090 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2091 */
2092
2093 /*Test for the condition where there is a surrogate pair*/
2094 {
2095 const uint8_t source2[]={0xd8, 0x01};
2096 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2097 }
2098 #endif
2099 ucnv_close(cnv);
2100 }
2101
2102 static void
2103 TestUTF16LE() {
2104 /* test input */
2105 static const uint8_t in[]={
2106 0x61, 0x00,
2107 0x31, 0x00,
2108 0x4e, 0x2e,
2109 0x4e, 0x00,
2110 0x01, 0xd8, 0x01, 0xdc
2111 };
2112
2113 /* expected test results */
2114 static const int32_t results[]={
2115 /* number of bytes read, code point */
2116 2, 0x61,
2117 2, 0x31,
2118 2, 0x2e4e,
2119 2, 0x4e,
2120 4, 0x10401
2121 };
2122
2123 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2124 UErrorCode errorCode=U_ZERO_ERROR;
2125 UConverter *cnv=ucnv_open("utf-16le", &errorCode);
2126 if(U_FAILURE(errorCode)) {
2127 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
2128 return;
2129 }
2130 TestNextUChar(cnv, source, limit, results, "UTF-16LE");
2131 /* Test the condition when source >= sourceLimit */
2132 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2133 /*Test for the condition where there is an invalid character*/
2134 {
2135 static const uint8_t source2[]={0x61};
2136 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2137 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2138 }
2139 #if 0
2140 /*
2141 * Test disabled because currently the UTF-16BE/LE converters are supposed
2142 * to not set errors for unpaired surrogates.
2143 * This may change with
2144 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2145 */
2146
2147 /*Test for the condition where there is a surrogate character*/
2148 {
2149 static const uint8_t source2[]={0x01, 0xd8};
2150 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2151 }
2152 #endif
2153
2154 ucnv_close(cnv);
2155 }
2156
2157 void
2158 static TestUTF32() {
2159 /* test input */
2160 static const uint8_t in1[]={
2161 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff
2162 };
2163 static const uint8_t in2[]={
2164 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00
2165 };
2166 static const uint8_t in3[]={
2167 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01
2168 };
2169
2170 /* expected test results */
2171 static const int32_t results1[]={
2172 /* number of bytes read, code point */
2173 8, 0x100f00,
2174 4, 0xfeff
2175 };
2176 static const int32_t results2[]={
2177 /* number of bytes read, code point */
2178 8, 0x0f1000,
2179 4, 0xfffe
2180 };
2181 static const int32_t results3[]={
2182 /* number of bytes read, code point */
2183 4, 0xfefe,
2184 4, 0x100f00,
2185 4, 0xfffd, /* unmatched surrogate */
2186 4, 0xfffd /* unmatched surrogate */
2187 };
2188
2189 const char *source, *limit;
2190
2191 UErrorCode errorCode=U_ZERO_ERROR;
2192 UConverter *cnv=ucnv_open("UTF-32", &errorCode);
2193 if(U_FAILURE(errorCode)) {
2194 log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
2195 return;
2196 }
2197
2198 source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2199 TestNextUChar(cnv, source, limit, results1, "UTF-32");
2200
2201 source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2202 ucnv_resetToUnicode(cnv);
2203 TestNextUChar(cnv, source, limit, results2, "UTF-32");
2204
2205 source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2206 ucnv_resetToUnicode(cnv);
2207 TestNextUChar(cnv, source, limit, results3, "UTF-32");
2208
2209 /* Test the condition when source >= sourceLimit */
2210 ucnv_resetToUnicode(cnv);
2211 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2212
2213 ucnv_close(cnv);
2214 }
2215
2216 static void
2217 TestUTF32BE() {
2218 /* test input */
2219 static const uint8_t in[]={
2220 0x00, 0x00, 0x00, 0x61,
2221 0x00, 0x00, 0x30, 0x61,
2222 0x00, 0x00, 0xdc, 0x00,
2223 0x00, 0x00, 0xd8, 0x00,
2224 0x00, 0x00, 0xdf, 0xff,
2225 0x00, 0x00, 0xff, 0xfe,
2226 0x00, 0x10, 0xab, 0xcd,
2227 0x00, 0x10, 0xff, 0xff
2228 };
2229
2230 /* expected test results */
2231 static const int32_t results[]={
2232 /* number of bytes read, code point */
2233 4, 0x61,
2234 4, 0x3061,
2235 4, 0xfffd,
2236 4, 0xfffd,
2237 4, 0xfffd,
2238 4, 0xfffe,
2239 4, 0x10abcd,
2240 4, 0x10ffff
2241 };
2242
2243 /* error test input */
2244 static const uint8_t in2[]={
2245 0x00, 0x00, 0x00, 0x61,
2246 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
2247 0x00, 0x00, 0x00, 0x62,
2248 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2249 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
2250 0x00, 0x00, 0x01, 0x62,
2251 0x00, 0x00, 0x02, 0x62
2252 };
2253
2254 /* expected error test results */
2255 static const int32_t results2[]={
2256 /* number of bytes read, code point */
2257 4, 0x61,
2258 8, 0x62,
2259 12, 0x162,
2260 4, 0x262
2261 };
2262
2263 UConverterToUCallback cb;
2264 const void *p;
2265
2266 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2267 UErrorCode errorCode=U_ZERO_ERROR;
2268 UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
2269 if(U_FAILURE(errorCode)) {
2270 log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
2271 return;
2272 }
2273 TestNextUChar(cnv, source, limit, results, "UTF-32BE");
2274
2275 /* Test the condition when source >= sourceLimit */
2276 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2277
2278 /* test error behavior with a skip callback */
2279 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2280 source=(const char *)in2;
2281 limit=(const char *)(in2+sizeof(in2));
2282 TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
2283
2284 ucnv_close(cnv);
2285 }
2286
2287 static void
2288 TestUTF32LE() {
2289 /* test input */
2290 static const uint8_t in[]={
2291 0x61, 0x00, 0x00, 0x00,
2292 0x61, 0x30, 0x00, 0x00,
2293 0x00, 0xdc, 0x00, 0x00,
2294 0x00, 0xd8, 0x00, 0x00,
2295 0xff, 0xdf, 0x00, 0x00,
2296 0xfe, 0xff, 0x00, 0x00,
2297 0xcd, 0xab, 0x10, 0x00,
2298 0xff, 0xff, 0x10, 0x00
2299 };
2300
2301 /* expected test results */
2302 static const int32_t results[]={
2303 /* number of bytes read, code point */
2304 4, 0x61,
2305 4, 0x3061,
2306 4, 0xfffd,
2307 4, 0xfffd,
2308 4, 0xfffd,
2309 4, 0xfffe,
2310 4, 0x10abcd,
2311 4, 0x10ffff
2312 };
2313
2314 /* error test input */
2315 static const uint8_t in2[]={
2316 0x61, 0x00, 0x00, 0x00,
2317 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
2318 0x62, 0x00, 0x00, 0x00,
2319 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2320 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
2321 0x62, 0x01, 0x00, 0x00,
2322 0x62, 0x02, 0x00, 0x00,
2323 };
2324
2325 /* expected error test results */
2326 static const int32_t results2[]={
2327 /* number of bytes read, code point */
2328 4, 0x61,
2329 8, 0x62,
2330 12, 0x162,
2331 4, 0x262,
2332 };
2333
2334 UConverterToUCallback cb;
2335 const void *p;
2336
2337 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2338 UErrorCode errorCode=U_ZERO_ERROR;
2339 UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
2340 if(U_FAILURE(errorCode)) {
2341 log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
2342 return;
2343 }
2344 TestNextUChar(cnv, source, limit, results, "UTF-32LE");
2345
2346 /* Test the condition when source >= sourceLimit */
2347 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2348
2349 /* test error behavior with a skip callback */
2350 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2351 source=(const char *)in2;
2352 limit=(const char *)(in2+sizeof(in2));
2353 TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
2354
2355 ucnv_close(cnv);
2356 }
2357
2358 static void
2359 TestLATIN1() {
2360 /* test input */
2361 static const uint8_t in[]={
2362 0x61,
2363 0x31,
2364 0x32,
2365 0xc0,
2366 0xf0,
2367 0xf4,
2368 };
2369
2370 /* expected test results */
2371 static const int32_t results[]={
2372 /* number of bytes read, code point */
2373 1, 0x61,
2374 1, 0x31,
2375 1, 0x32,
2376 1, 0xc0,
2377 1, 0xf0,
2378 1, 0xf4,
2379 };
2380 static const uint16_t in1[] = {
2381 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2382 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2383 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2384 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2385 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2386 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2387 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2388 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2389 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2390 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2391 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2392 0xcb, 0x82
2393 };
2394 static const uint8_t out1[] = {
2395 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2396 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2397 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2398 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2399 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2400 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2401 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2402 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2403 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2404 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2405 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2406 0xcb, 0x82
2407 };
2408 static const uint16_t in2[]={
2409 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2410 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2411 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2412 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2413 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2414 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2415 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2416 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2417 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2418 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2419 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2420 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2421 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2422 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2423 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2424 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2425 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2426 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2427 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2428 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2429 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2430 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2431 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2432 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2433 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2434 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2435 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2436 0x37, 0x20, 0x2A, 0x2F,
2437 };
2438 static const unsigned char out2[]={
2439 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2440 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2441 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2442 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2443 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2444 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2445 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2446 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2447 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2448 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2449 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2450 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2451 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2452 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2453 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2454 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2455 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2456 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2457 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2458 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2459 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2460 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2461 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2462 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2463 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2464 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2465 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2466 0x37, 0x20, 0x2A, 0x2F,
2467 };
2468 const char *source=(const char *)in;
2469 const char *limit=(const char *)in+sizeof(in);
2470
2471 UErrorCode errorCode=U_ZERO_ERROR;
2472 UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
2473 if(U_FAILURE(errorCode)) {
2474 log_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
2475 return;
2476 }
2477 TestNextUChar(cnv, source, limit, results, "LATIN_1");
2478 /* Test the condition when source >= sourceLimit */
2479 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2480 TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
2481 TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
2482
2483 ucnv_close(cnv);
2484 }
2485
2486 static void
2487 TestSBCS() {
2488 /* test input */
2489 static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2490 /* expected test results */
2491 static const int32_t results[]={
2492 /* number of bytes read, code point */
2493 1, 0x61,
2494 1, 0xbf,
2495 1, 0xc4,
2496 1, 0x2021,
2497 1, 0xf8ff,
2498 1, 0x00d9
2499 };
2500
2501 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2502 UErrorCode errorCode=U_ZERO_ERROR;
2503 UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode);
2504 if(U_FAILURE(errorCode)) {
2505 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode));
2506 return;
2507 }
2508 TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)");
2509 /* Test the condition when source >= sourceLimit */
2510 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2511 /*Test for Illegal character */ /*
2512 {
2513 static const uint8_t input1[]={ 0xA1 };
2514 const char* illegalsource=(const char*)input1;
2515 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2516 }
2517 */
2518 ucnv_close(cnv);
2519 }
2520
2521 static void
2522 TestDBCS() {
2523 /* test input */
2524 static const uint8_t in[]={
2525 0x44, 0x6a,
2526 0xc4, 0x9c,
2527 0x7a, 0x74,
2528 0x46, 0xab,
2529 0x42, 0x5b,
2530
2531 };
2532
2533 /* expected test results */
2534 static const int32_t results[]={
2535 /* number of bytes read, code point */
2536 2, 0x00a7,
2537 2, 0xe1d2,
2538 2, 0x6962,
2539 2, 0xf842,
2540 2, 0xffe5,
2541 };
2542
2543 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2544 UErrorCode errorCode=U_ZERO_ERROR;
2545
2546 UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
2547 if(U_FAILURE(errorCode)) {
2548 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
2549 return;
2550 }
2551 TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
2552 /* Test the condition when source >= sourceLimit */
2553 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2554 /*Test for the condition where there is an invalid character*/
2555 {
2556 static const uint8_t source2[]={0x1a, 0x1b};
2557 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2558 }
2559 /*Test for the condition where we have a truncated char*/
2560 {
2561 static const uint8_t source1[]={0xc4};
2562 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2563 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2564 }
2565 ucnv_close(cnv);
2566 }
2567
2568 static void
2569 TestMBCS() {
2570 /* test input */
2571 static const uint8_t in[]={
2572 0x01,
2573 0xa6, 0xa3,
2574 0x00,
2575 0xa6, 0xa1,
2576 0x08,
2577 0xc2, 0x76,
2578 0xc2, 0x78,
2579
2580 };
2581
2582 /* expected test results */
2583 static const int32_t results[]={
2584 /* number of bytes read, code point */
2585 1, 0x0001,
2586 2, 0x250c,
2587 1, 0x0000,
2588 2, 0x2500,
2589 1, 0x0008,
2590 2, 0xd60c,
2591 2, 0xd60e,
2592 };
2593
2594 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2595 UErrorCode errorCode=U_ZERO_ERROR;
2596
2597 UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
2598 if(U_FAILURE(errorCode)) {
2599 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
2600 return;
2601 }
2602 TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
2603 /* Test the condition when source >= sourceLimit */
2604 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2605 /*Test for the condition where there is an invalid character*/
2606 {
2607 static const uint8_t source2[]={0xa1, 0x01};
2608 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2609 }
2610 /*Test for the condition where we have a truncated char*/
2611 {
2612 static const uint8_t source1[]={0xc4};
2613 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2614 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2615 }
2616 ucnv_close(cnv);
2617
2618 }
2619
2620 #ifdef U_ENABLE_GENERIC_ISO_2022
2621
2622 static void
2623 TestISO_2022() {
2624 /* test input */
2625 static const uint8_t in[]={
2626 0x1b, 0x25, 0x42,
2627 0x31,
2628 0x32,
2629 0x61,
2630 0xc2, 0x80,
2631 0xe0, 0xa0, 0x80,
2632 0xf0, 0x90, 0x80, 0x80
2633 };
2634
2635
2636
2637 /* expected test results */
2638 static const int32_t results[]={
2639 /* number of bytes read, code point */
2640 4, 0x0031, /* 4 bytes including the escape sequence */
2641 1, 0x0032,
2642 1, 0x61,
2643 2, 0x80,
2644 3, 0x800,
2645 4, 0x10000
2646 };
2647
2648 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2649 UErrorCode errorCode=U_ZERO_ERROR;
2650 UConverter *cnv;
2651
2652 cnv=ucnv_open("ISO_2022", &errorCode);
2653 if(U_FAILURE(errorCode)) {
2654 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
2655 return;
2656 }
2657 TestNextUChar(cnv, source, limit, results, "ISO_2022");
2658
2659 /* Test the condition when source >= sourceLimit */
2660 TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
2661 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2662 /*Test for the condition where we have a truncated char*/
2663 {
2664 static const uint8_t source1[]={0xc4};
2665 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2666 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2667 }
2668 /*Test for the condition where there is an invalid character*/
2669 {
2670 static const uint8_t source2[]={0xa1, 0x01};
2671 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
2672 }
2673 ucnv_close(cnv);
2674 }
2675
2676 #endif
2677
2678 static void
2679 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2680 const UChar* uSource;
2681 const UChar* uSourceLimit;
2682 const char* cSource;
2683 const char* cSourceLimit;
2684 UChar *uTargetLimit =NULL;
2685 UChar *uTarget;
2686 char *cTarget;
2687 const char *cTargetLimit;
2688 char *cBuf;
2689 UChar *uBuf,*test;
2690 int32_t uBufSize = 120;
2691 int len=0;
2692 int i=2;
2693 UErrorCode errorCode=U_ZERO_ERROR;
2694 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2695 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2696 ucnv_reset(cnv);
2697 for(;--i>0; ){
2698 uSource = (UChar*) source;
2699 uSourceLimit=(const UChar*)sourceLimit;
2700 cTarget = cBuf;
2701 uTarget = uBuf;
2702 cSource = cBuf;
2703 cTargetLimit = cBuf;
2704 uTargetLimit = uBuf;
2705
2706 do{
2707
2708 cTargetLimit = cTargetLimit+ i;
2709 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2710 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2711 errorCode=U_ZERO_ERROR;
2712 continue;
2713 }
2714
2715 if(U_FAILURE(errorCode)){
2716 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2717 return;
2718 }
2719
2720 }while (uSource<uSourceLimit);
2721
2722 cSourceLimit =cTarget;
2723 do{
2724 uTargetLimit=uTargetLimit+i;
2725 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2726 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2727 errorCode=U_ZERO_ERROR;
2728 continue;
2729 }
2730 if(U_FAILURE(errorCode)){
2731 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2732 return;
2733 }
2734 }while(cSource<cSourceLimit);
2735
2736 uSource = source;
2737 test =uBuf;
2738 for(len=0;len<(int)(source - sourceLimit);len++){
2739 if(uBuf[len]!=uSource[len]){
2740 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2741 }
2742 }
2743 }
2744 free(uBuf);
2745 free(cBuf);
2746 }
2747 /* Test for Jitterbug 778 */
2748 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2749 const UChar* uSource;
2750 const UChar* uSourceLimit;
2751 const char* cSource;
2752 UChar *uTargetLimit =NULL;
2753 UChar *uTarget;
2754 char *cTarget;
2755 const char *cTargetLimit;
2756 char *cBuf;
2757 UChar *uBuf,*test;
2758 int32_t uBufSize = 120;
2759 int numCharsInTarget=0;
2760 UErrorCode errorCode=U_ZERO_ERROR;
2761 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2762 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2763 uSource = source;
2764 uSourceLimit=sourceLimit;
2765 cTarget = cBuf;
2766 cTargetLimit = cBuf +uBufSize*5;
2767 uTarget = uBuf;
2768 uTargetLimit = uBuf+ uBufSize*5;
2769 ucnv_reset(cnv);
2770 numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode);
2771 if(U_FAILURE(errorCode)){
2772 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2773 return;
2774 }
2775 cSource = cBuf;
2776 test =uBuf;
2777 ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
2778 if(U_FAILURE(errorCode)){
2779 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
2780 return;
2781 }
2782 uSource = source;
2783 while(uSource<uSourceLimit){
2784 if(*test!=*uSource){
2785
2786 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2787 }
2788 uSource++;
2789 test++;
2790 }
2791 free(uBuf);
2792 free(cBuf);
2793 }
2794
2795 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2796 const UChar* uSource;
2797 const UChar* uSourceLimit;
2798 const char* cSource;
2799 const char* cSourceLimit;
2800 UChar *uTargetLimit =NULL;
2801 UChar *uTarget;
2802 char *cTarget;
2803 const char *cTargetLimit;
2804 char *cBuf;
2805 UChar *uBuf,*test;
2806 int32_t uBufSize = 120;
2807 int len=0;
2808 int i=2;
2809 const UChar *temp = sourceLimit;
2810 UErrorCode errorCode=U_ZERO_ERROR;
2811 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2812 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2813
2814 ucnv_reset(cnv);
2815 for(;--i>0;){
2816 uSource = (UChar*) source;
2817 cTarget = cBuf;
2818 uTarget = uBuf;
2819 cSource = cBuf;
2820 cTargetLimit = cBuf;
2821 uTargetLimit = uBuf+uBufSize*5;
2822 cTargetLimit = cTargetLimit+uBufSize*10;
2823 uSourceLimit=uSource;
2824 do{
2825
2826 if (uSourceLimit < sourceLimit) {
2827 uSourceLimit = uSourceLimit+1;
2828 }
2829 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2830 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2831 errorCode=U_ZERO_ERROR;
2832 continue;
2833 }
2834
2835 if(U_FAILURE(errorCode)){
2836 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2837 return;
2838 }
2839
2840 }while (uSource<temp);
2841
2842 cSourceLimit =cBuf;
2843 do{
2844 if (cSourceLimit < cBuf + (cTarget - cBuf)) {
2845 cSourceLimit = cSourceLimit+1;
2846 }
2847 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2848 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2849 errorCode=U_ZERO_ERROR;
2850 continue;
2851 }
2852 if(U_FAILURE(errorCode)){
2853 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2854 return;
2855 }
2856 }while(cSource<cTarget);
2857
2858 uSource = source;
2859 test =uBuf;
2860 for(;len<(int)(source - sourceLimit);len++){
2861 if(uBuf[len]!=uSource[len]){
2862 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2863 }
2864 }
2865 }
2866 free(uBuf);
2867 free(cBuf);
2868 }
2869 static void
2870 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
2871 const uint16_t results[], const char* message){
2872 const char* s0;
2873 const char* s=(char*)source;
2874 const uint16_t *r=results;
2875 UErrorCode errorCode=U_ZERO_ERROR;
2876 uint32_t c,exC;
2877 ucnv_reset(cnv);
2878 while(s<limit) {
2879 s0=s;
2880 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
2881 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2882 break; /* no more significant input */
2883 } else if(U_FAILURE(errorCode)) {
2884 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
2885 break;
2886 } else {
2887 if(UTF_IS_FIRST_SURROGATE(*r)){
2888 int i =0, len = 2;
2889 UTF_NEXT_CHAR_SAFE(r, i, len, exC, FALSE);
2890 r++;
2891 }else{
2892 exC = *r;
2893 }
2894 if(c!=(uint32_t)(exC))
2895 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message,(uint32_t) (*r),c);
2896 }
2897 r++;
2898 }
2899 }
2900
2901 static int TestJitterbug930(const char* enc){
2902 UErrorCode err = U_ZERO_ERROR;
2903 UConverter*converter;
2904 char out[80];
2905 char*target = out;
2906 UChar in[4];
2907 const UChar*source = in;
2908 int32_t off[80];
2909 int32_t* offsets = off;
2910 int numOffWritten=0;
2911 UBool flush = 0;
2912 converter = my_ucnv_open(enc, &err);
2913
2914 in[0] = 0x41; /* 0x4E00;*/
2915 in[1] = 0x4E01;
2916 in[2] = 0x4E02;
2917 in[3] = 0x4E03;
2918
2919 memset(off, '*', sizeof(off));
2920
2921 ucnv_fromUnicode (converter,
2922 &target,
2923 target+2,
2924 &source,
2925 source+3,
2926 offsets,
2927 flush,
2928 &err);
2929
2930 /* writes three bytes into the output buffer: 41 1B 24
2931 * but offsets contains 0 1 1
2932 */
2933 while(*offsets< off[10]){
2934 numOffWritten++;
2935 offsets++;
2936 }
2937 log_verbose("Testing Jitterbug 930 for encoding %s",enc);
2938 if(numOffWritten!= (int)(target-out)){
2939 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
2940 }
2941
2942 err = U_ZERO_ERROR;
2943
2944 memset(off,'*' , sizeof(off));
2945
2946 flush = 1;
2947 offsets=off;
2948 ucnv_fromUnicode (converter,
2949 &target,
2950 target+4,
2951 &source,
2952 source,
2953 offsets,
2954 flush,
2955 &err);
2956 numOffWritten=0;
2957 while(*offsets< off[10]){
2958 numOffWritten++;
2959 if(*offsets!= -1){
2960 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
2961 }
2962 offsets++;
2963 }
2964
2965 /* writes 42 43 7A into output buffer,
2966 * offsets contains -1 -1 -1
2967 */
2968 ucnv_close(converter);
2969 return 0;
2970 }
2971
2972 static void
2973 TestHZ() {
2974 /* test input */
2975 static const uint16_t in[]={
2976 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
2977 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
2978 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
2979 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
2980 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
2981 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
2982 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
2983 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
2984 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
2985 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
2986 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
2987 0x005A, 0x005B, 0x005C, 0x000A
2988 };
2989 const UChar* uSource;
2990 const UChar* uSourceLimit;
2991 const char* cSource;
2992 const char* cSourceLimit;
2993 UChar *uTargetLimit =NULL;
2994 UChar *uTarget;
2995 char *cTarget;
2996 const char *cTargetLimit;
2997 char *cBuf;
2998 UChar *uBuf,*test;
2999 int32_t uBufSize = 120;
3000 UErrorCode errorCode=U_ZERO_ERROR;
3001 UConverter *cnv;
3002 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3003 int32_t* myOff= offsets;
3004 cnv=ucnv_open("HZ", &errorCode);
3005 if(U_FAILURE(errorCode)) {
3006 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
3007 return;
3008 }
3009
3010 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3011 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3012 uSource = (const UChar*)in;
3013 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3014 cTarget = cBuf;
3015 cTargetLimit = cBuf +uBufSize*5;
3016 uTarget = uBuf;
3017 uTargetLimit = uBuf+ uBufSize*5;
3018 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3019 if(U_FAILURE(errorCode)){
3020 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3021 return;
3022 }
3023 cSource = cBuf;
3024 cSourceLimit =cTarget;
3025 test =uBuf;
3026 myOff=offsets;
3027 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3028 if(U_FAILURE(errorCode)){
3029 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3030 return;
3031 }
3032 uSource = (const UChar*)in;
3033 while(uSource<uSourceLimit){
3034 if(*test!=*uSource){
3035
3036 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3037 }
3038 uSource++;
3039 test++;
3040 }
3041 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
3042 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3043 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3044 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3045 TestJitterbug930("csISO2022JP");
3046 ucnv_close(cnv);
3047 free(offsets);
3048 free(uBuf);
3049 free(cBuf);
3050 }
3051
3052 static void
3053 TestISCII(){
3054 /* test input */
3055 static const uint16_t in[]={
3056 /* test full range of Devanagari */
3057 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3058 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3059 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3060 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3061 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3062 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3063 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3064 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3065 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3066 0x096D,0x096E,0x096F,
3067 /* test Soft halant*/
3068 0x0915,0x094d, 0x200D,
3069 /* test explicit halant */
3070 0x0915,0x094d, 0x200c,
3071 /* test double danda */
3072 0x965,
3073 /* test ASCII */
3074 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3075 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3076 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3077 /* tests from Lotus */
3078 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3079 0x0930,0x094D,0x200D,
3080 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3081 0x0915,0x0921,0x002B,0x095F,
3082 /* tamil range */
3083 0x0B86, 0xB87, 0xB88,
3084 /* telugu range */
3085 0x0C05, 0x0C02, 0x0C03,0x0c31,
3086 /* kannada range */
3087 0x0C85, 0xC82, 0x0C83,
3088 /* test Abbr sign and Anudatta */
3089 0x0970, 0x952,
3090 /* 0x0958,
3091 0x0959,
3092 0x095A,
3093 0x095B,
3094 0x095C,
3095 0x095D,
3096 0x095E,
3097 0x095F,*/
3098 0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3099 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3100 0x090C ,
3101 0x0962,
3102 0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3103 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3104 0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3105 0x093D /* Avagraha 0xEA, 0xE9*/,
3106 0x0958,
3107 0x0959,
3108 0x095A,
3109 0x095B,
3110 0x095C,
3111 0x095D,
3112 0x095E,
3113 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3114 };
3115 static const unsigned char byteArr[]={
3116
3117 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3118 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3119 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3120 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3121 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3122 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3123 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3124 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3125 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3126 0xf8,0xf9,0xfa,
3127 /* test soft halant */
3128 0xb3, 0xE8, 0xE9,
3129 /* test explicit halant */
3130 0xb3, 0xE8, 0xE8,
3131 /* test double danda */
3132 0xea, 0xea,
3133 /* test ASCII */
3134 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3135 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3136 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3137 /* test ATR code */
3138
3139 /* tests from Lotus */
3140 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3141 0xEF,0x42,0xCF,0xE8,0xD9,
3142 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3143 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3144 /* tamil range */
3145 0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3146 /* telugu range */
3147 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3148 /* kannada range */
3149 0xEF, 0x48,0xa4, 0xa2, 0xa3,
3150 /* anudatta and abbreviation sign */
3151 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3152
3153
3154 0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3155
3156 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3157
3158 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3159
3160 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3161
3162 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3163
3164 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3165
3166 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3167
3168 0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3169
3170 0xB3, 0xE9, /* Ka + NUKTA */
3171
3172 0xB4, 0xE9, /* Kha + NUKTA */
3173
3174 0xB5, 0xE9, /* Ga + NUKTA */
3175
3176 0xBA, 0xE9,
3177
3178 0xBF, 0xE9,
3179
3180 0xC0, 0xE9,
3181
3182 0xC9, 0xE9,
3183 /* INV halant RA */
3184 0xD9, 0xE8, 0xCF,
3185 0x00, 0x00A0,
3186 /* just consume unhandled codepoints */
3187 0xEF, 0x30,
3188
3189 };
3190 testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-iscii-de",NULL,TRUE);
3191 TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
3192
3193 }
3194
3195 static void
3196 TestISO_2022_JP() {
3197 /* test input */
3198 static const uint16_t in[]={
3199 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3200 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3201 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3202 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3203 0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3204 0x201D, 0x3014, 0x000D, 0x000A,
3205 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3206 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3207 };
3208 const UChar* uSource;
3209 const UChar* uSourceLimit;
3210 const char* cSource;
3211 const char* cSourceLimit;
3212 UChar *uTargetLimit =NULL;
3213 UChar *uTarget;
3214 char *cTarget;
3215 const char *cTargetLimit;
3216 char *cBuf;
3217 UChar *uBuf,*test;
3218 int32_t uBufSize = 120;
3219 UErrorCode errorCode=U_ZERO_ERROR;
3220 UConverter *cnv;
3221 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3222 int32_t* myOff= offsets;
3223 cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3224 if(U_FAILURE(errorCode)) {
3225 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode));
3226 return;
3227 }
3228
3229 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3230 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3231 uSource = (const UChar*)in;
3232 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3233 cTarget = cBuf;
3234 cTargetLimit = cBuf +uBufSize*5;
3235 uTarget = uBuf;
3236 uTargetLimit = uBuf+ uBufSize*5;
3237 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3238 if(U_FAILURE(errorCode)){
3239 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3240 return;
3241 }
3242 cSource = cBuf;
3243 cSourceLimit =cTarget;
3244 test =uBuf;
3245 myOff=offsets;
3246 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3247 if(U_FAILURE(errorCode)){
3248 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3249 return;
3250 }
3251
3252 uSource = (const UChar*)in;
3253 while(uSource<uSourceLimit){
3254 if(*test!=*uSource){
3255
3256 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3257 }
3258 uSource++;
3259 test++;
3260 }
3261
3262 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3263 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3264 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
3265 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3266 TestJitterbug930("csISO2022JP");
3267 ucnv_close(cnv);
3268 free(uBuf);
3269 free(cBuf);
3270 free(offsets);
3271 }
3272
3273 static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
3274 const UChar* uSource;
3275 const UChar* uSourceLimit;
3276 const char* cSource;
3277 const char* cSourceLimit;
3278 UChar *uTargetLimit =NULL;
3279 UChar *uTarget;
3280 char *cTarget;
3281 const char *cTargetLimit;
3282 char *cBuf;
3283 UChar *uBuf,*test;
3284 int32_t uBufSize = 120*10;
3285 UErrorCode errorCode=U_ZERO_ERROR;
3286 UConverter *cnv;
3287 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
3288 int32_t* myOff= offsets;
3289 cnv=my_ucnv_open(conv, &errorCode);
3290 if(U_FAILURE(errorCode)) {
3291 log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
3292 return;
3293 }
3294
3295 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
3296 cBuf =(char*)malloc(uBufSize * sizeof(char));
3297 uSource = (const UChar*)in;
3298 uSourceLimit=uSource+len;
3299 cTarget = cBuf;
3300 cTargetLimit = cBuf +uBufSize;
3301 uTarget = uBuf;
3302 uTargetLimit = uBuf+ uBufSize;
3303 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3304 if(U_FAILURE(errorCode)){
3305 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3306 return;
3307 }
3308 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3309 cSource = cBuf;
3310 cSourceLimit =cTarget;
3311 test =uBuf;
3312 myOff=offsets;
3313 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3314 if(U_FAILURE(errorCode)){
3315 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
3316 return;
3317 }
3318
3319 uSource = (const UChar*)in;
3320 while(uSource<uSourceLimit){
3321 if(*test!=*uSource){
3322 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
3323 }
3324 uSource++;
3325 test++;
3326 }
3327 TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv);
3328 TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv);
3329 TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
3330 if(byteArr && byteArrLen!=0){
3331 TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
3332 TestToAndFromUChars(in,(const UChar*)&in[len],cnv);
3333 {
3334 cSource = byteArr;
3335 cSourceLimit = cSource+byteArrLen;
3336 test=uBuf;
3337 myOff = offsets;
3338 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3339 if(U_FAILURE(errorCode)){
3340 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3341 return;
3342 }
3343
3344 uSource = (const UChar*)in;
3345 while(uSource<uSourceLimit){
3346 if(*test!=*uSource){
3347 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3348 }
3349 uSource++;
3350 test++;
3351 }
3352 }
3353 }
3354
3355 ucnv_close(cnv);
3356 free(uBuf);
3357 free(cBuf);
3358 free(offsets);
3359 }
3360 static UChar U_CALLCONV
3361 _charAt(int32_t offset, void *context) {
3362 return ((char*)context)[offset];
3363 }
3364
3365 static int32_t
3366 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
3367 int32_t srcIndex=0;
3368 int32_t dstIndex=0;
3369 if(U_FAILURE(*status)){
3370 return 0;
3371 }
3372 if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
3373 *status = U_ILLEGAL_ARGUMENT_ERROR;
3374 return 0;
3375 }
3376 if(srcLen==-1){
3377 srcLen = (int32_t)uprv_strlen(src);
3378 }
3379
3380 for (; srcIndex<srcLen; ) {
3381 UChar32 c = src[srcIndex++];
3382 if (c == 0x005C /*'\\'*/) {
3383 c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
3384 if (c == (UChar32)0xFFFFFFFF) {
3385 *status=U_INVALID_CHAR_FOUND; /* return empty string */
3386 break; /* invalid escape sequence */
3387 }
3388 }
3389 if(dstIndex < dstLen){
3390 if(c>0xFFFF){
3391 dst[dstIndex++] = UTF16_LEAD(c);
3392 if(dstIndex<dstLen){
3393 dst[dstIndex]=UTF16_TRAIL(c);
3394 }else{
3395 *status=U_BUFFER_OVERFLOW_ERROR;
3396 }
3397 }else{
3398 dst[dstIndex]=(UChar)c;
3399 }
3400
3401 }else{
3402 *status = U_BUFFER_OVERFLOW_ERROR;
3403 }
3404 dstIndex++; /* for preflighting */
3405 }
3406 return dstIndex;
3407 }
3408
3409 static void
3410 TestFullRoundtrip(const char* cp){
3411 UChar usource[10] ={0};
3412 UChar nsrc[10] = {0};
3413 uint32_t i=1;
3414 int len=0, ulen;
3415 nsrc[0]=0x0061;
3416 /* Test codepoint 0 */
3417 TestConv(usource,1,cp,"",NULL,0);
3418 TestConv(usource,2,cp,"",NULL,0);
3419 nsrc[2]=0x5555;
3420 TestConv(nsrc,3,cp,"",NULL,0);
3421
3422 for(;i<=0x10FFFF;i++){
3423 if(i==0xD800){
3424 i=0xDFFF;
3425 continue;
3426 }
3427 if(i<=0xFFFF){
3428 usource[0] =(UChar) i;
3429 len=1;
3430 }else{
3431 usource[0]=UTF16_LEAD(i);
3432 usource[1]=UTF16_TRAIL(i);
3433 len=2;
3434 }
3435 ulen=len;
3436 if(i==0x80) {
3437 usource[2]=0;
3438 }
3439 /* Test only single code points */
3440 TestConv(usource,ulen,cp,"",NULL,0);
3441 /* Test codepoint repeated twice */
3442 usource[ulen]=usource[0];
3443 usource[ulen+1]=usource[1];
3444 ulen+=len;
3445 TestConv(usource,ulen,cp,"",NULL,0);
3446 /* Test codepoint repeated 3 times */
3447 usource[ulen]=usource[0];
3448 usource[ulen+1]=usource[1];
3449 ulen+=len;
3450 TestConv(usource,ulen,cp,"",NULL,0);
3451 /* Test codepoint in between 2 codepoints */
3452 nsrc[1]=usource[0];
3453 nsrc[2]=usource[1];
3454 nsrc[len+1]=0x5555;
3455 TestConv(nsrc,len+2,cp,"",NULL,0);
3456 uprv_memset(usource,0,sizeof(UChar)*10);
3457 }
3458 }
3459
3460 static void
3461 TestRoundTrippingAllUTF(void){
3462 if(!QUICK){
3463 log_verbose("Running exhaustive round trip test for BOCU-1\n");
3464 TestFullRoundtrip("BOCU-1");
3465 log_verbose("Running exhaustive round trip test for SCSU\n");
3466 TestFullRoundtrip("SCSU");
3467 log_verbose("Running exhaustive round trip test for UTF-8\n");
3468 TestFullRoundtrip("UTF-8");
3469 log_verbose("Running exhaustive round trip test for CESU-8\n");
3470 TestFullRoundtrip("CESU-8");
3471 log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3472 TestFullRoundtrip("UTF-16BE");
3473 log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3474 TestFullRoundtrip("UTF-16LE");
3475 log_verbose("Running exhaustive round trip test for UTF-16\n");
3476 TestFullRoundtrip("UTF-16");
3477 log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3478 TestFullRoundtrip("UTF-32BE");
3479 log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3480 TestFullRoundtrip("UTF-32LE");
3481 log_verbose("Running exhaustive round trip test for UTF-32\n");
3482 TestFullRoundtrip("UTF-32");
3483 log_verbose("Running exhaustive round trip test for UTF-7\n");
3484 TestFullRoundtrip("UTF-7");
3485 log_verbose("Running exhaustive round trip test for UTF-7\n");
3486 TestFullRoundtrip("UTF-7,version=1");
3487 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3488 TestFullRoundtrip("IMAP-mailbox-name");
3489 log_verbose("Running exhaustive round trip test for GB18030\n");
3490 TestFullRoundtrip("GB18030");
3491 }
3492 }
3493
3494 static void
3495 TestSCSU() {
3496
3497 static const uint16_t germanUTF16[]={
3498 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3499 };
3500
3501 static const uint8_t germanSCSU[]={
3502 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3503 };
3504
3505 static const uint16_t russianUTF16[]={
3506 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3507 };
3508
3509 static const uint8_t russianSCSU[]={
3510 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3511 };
3512
3513 static const uint16_t japaneseUTF16[]={
3514 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3515 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3516 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3517 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3518 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3519 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3520 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3521 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3522 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3523 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3524 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3525 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3526 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3527 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3528 0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3529 };
3530
3531 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3532 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3533 static const uint8_t japaneseSCSU[]={
3534 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3535 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3536 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3537 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3538 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3539 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3540 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3541 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3542 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3543 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3544 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3545 0xcb, 0x82
3546 };
3547
3548 static const uint16_t allFeaturesUTF16[]={
3549 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3550 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3551 0x01df, 0xf000, 0xdbff, 0xdfff
3552 };
3553
3554 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3555 * result here (34B vs. 35B)
3556 */
3557 static const uint8_t allFeaturesSCSU[]={
3558 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3559 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3560 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3561 0xdf, 0x14, 0x80, 0x15, 0xff
3562 };
3563 static const uint16_t monkeyIn[]={
3564 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3565 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3566 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3567 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3568 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3569 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3570 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3571 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3572 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3573 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3574 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3575 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3576 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3577 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3578 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3579 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3580 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3581 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3582 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3583 /* test non-BMP code points */
3584 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3585 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3586 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3587 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3588 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3589 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3590 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3591 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3592 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3593 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3594 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3595
3596
3597 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3598 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3599 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3600 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3601 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3602 };
3603 static const char *fTestCases [] = {
3604 "\\ud800\\udc00", /* smallest surrogate*/
3605 "\\ud8ff\\udcff",
3606 "\\udBff\\udFff", /* largest surrogate pair*/
3607 "\\ud834\\udc00",
3608 "\\U0010FFFF",
3609 "Hello \\u9292 \\u9192 World!",
3610 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3611 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3612
3613 "\\u0648\\u06c8", /* catch missing reset*/
3614 "\\u0648\\u06c8",
3615
3616 "\\u4444\\uE001", /* lowest quotable*/
3617 "\\u4444\\uf2FF", /* highest quotable*/
3618 "\\u4444\\uf188\\u4444",
3619 "\\u4444\\uf188\\uf288",
3620 "\\u4444\\uf188abc\\u0429\\uf288",
3621 "\\u9292\\u2222",
3622 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3623 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3624 "Hello World!123456",
3625 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3626
3627 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/
3628 "abc\\u4411d", /* uses SQU*/
3629 "abc\\u4411\\u4412d",/* uses SCU*/
3630 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3631 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3632 "\\u9292\\u2222",
3633 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3634 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3635 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3636
3637 "", /* empty input*/
3638 "\\u0000", /* smallest BMP character*/
3639 "\\uFFFF", /* largest BMP character*/
3640
3641 /* regression tests*/
3642 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3643 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3644 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3645 "\\u0041\\u00df\\u0401\\u015f",
3646 "\\u9066\\u2123abc",
3647 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3648 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3649 };
3650 int i=0;
3651 for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){
3652 const char* cSrc = fTestCases[i];
3653 UErrorCode status = U_ZERO_ERROR;
3654 int32_t cSrcLen,srcLen;
3655 UChar* src;
3656 /* UConverter* cnv = ucnv_open("SCSU",&status); */
3657 cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]);
3658 src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
3659 srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
3660 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
3661 TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
3662 free(src);
3663 }
3664 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3665 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3666 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3667 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3668 TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
3669 TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
3670 TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
3671 }
3672
3673 #if !UCONFIG_NO_LEGACY_CONVERSION
3674 static void TestJitterbug2346(){
3675 char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3676 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3677 uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3678
3679 UChar uTarget[500]={'\0'};
3680 UChar* utarget=uTarget;
3681 UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
3682
3683 char cTarget[500]={'\0'};
3684 char* ctarget=cTarget;
3685 char* ctargetLimit=cTarget+sizeof(cTarget);
3686 const char* csource=source;
3687 UChar* temp = expected;
3688 UErrorCode err=U_ZERO_ERROR;
3689
3690 UConverter* conv =ucnv_open("ISO_2022_JP",&err);
3691 if(U_FAILURE(err)) {
3692 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
3693 return;
3694 }
3695 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err);
3696 if(U_FAILURE(err)) {
3697 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
3698 return;
3699 }
3700 utargetLimit=utarget;
3701 utarget = uTarget;
3702 while(utarget<utargetLimit){
3703 if(*temp!=*utarget){
3704
3705 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
3706 }
3707 utarget++;
3708 temp++;
3709 }
3710 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
3711 if(U_FAILURE(err)) {
3712 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
3713 return;
3714 }
3715 ctargetLimit=ctarget;
3716 ctarget =cTarget;
3717 ucnv_close(conv);
3718
3719
3720 }
3721
3722 static void
3723 TestISO_2022_JP_1() {
3724 /* test input */
3725 static const uint16_t in[]={
3726 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3727 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3728 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3729 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3730 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3731 0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3732 0x201D, 0x000D, 0x000A,
3733 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3734 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3735 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3736 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3737 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3738 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3739 };
3740 const UChar* uSource;
3741 const UChar* uSourceLimit;
3742 const char* cSource;
3743 const char* cSourceLimit;
3744 UChar *uTargetLimit =NULL;
3745 UChar *uTarget;
3746 char *cTarget;
3747 const char *cTargetLimit;
3748 char *cBuf;
3749 UChar *uBuf,*test;
3750 int32_t uBufSize = 120;
3751 UErrorCode errorCode=U_ZERO_ERROR;
3752 UConverter *cnv;
3753
3754 cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3755 if(U_FAILURE(errorCode)) {
3756 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3757 return;
3758 }
3759
3760 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3761 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3762 uSource = (const UChar*)in;
3763 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3764 cTarget = cBuf;
3765 cTargetLimit = cBuf +uBufSize*5;
3766 uTarget = uBuf;
3767 uTargetLimit = uBuf+ uBufSize*5;
3768 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode);
3769 if(U_FAILURE(errorCode)){
3770 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3771 return;
3772 }
3773 cSource = cBuf;
3774 cSourceLimit =cTarget;
3775 test =uBuf;
3776 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode);
3777 if(U_FAILURE(errorCode)){
3778 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3779 return;
3780 }
3781 uSource = (const UChar*)in;
3782 while(uSource<uSourceLimit){
3783 if(*test!=*uSource){
3784
3785 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3786 }
3787 uSource++;
3788 test++;
3789 }
3790 /*ucnv_close(cnv);
3791 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3792 /*Test for the condition where there is an invalid character*/
3793 ucnv_reset(cnv);
3794 {
3795 static const uint8_t source2[]={0x0e,0x24,0x053};
3796 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
3797 }
3798 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3799 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3800 ucnv_close(cnv);
3801 free(uBuf);
3802 free(cBuf);
3803 }
3804
3805 static void
3806 TestISO_2022_JP_2() {
3807 /* test input */
3808 static const uint16_t in[]={
3809 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3810 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3811 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3812 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3813 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3814 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3815 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3816 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3817 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3818 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3819 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3820 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3821 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3822 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3823 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3824 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3825 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3826 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3827 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3828 };
3829 const UChar* uSource;
3830 const UChar* uSourceLimit;
3831 const char* cSource;
3832 const char* cSourceLimit;
3833 UChar *uTargetLimit =NULL;
3834 UChar *uTarget;
3835 char *cTarget;
3836 const char *cTargetLimit;
3837 char *cBuf;
3838 UChar *uBuf,*test;
3839 int32_t uBufSize = 120;
3840 UErrorCode errorCode=U_ZERO_ERROR;
3841 UConverter *cnv;
3842 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3843 int32_t* myOff= offsets;
3844 cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
3845 if(U_FAILURE(errorCode)) {
3846 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3847 return;
3848 }
3849
3850 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3851 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3852 uSource = (const UChar*)in;
3853 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3854 cTarget = cBuf;
3855 cTargetLimit = cBuf +uBufSize*5;
3856 uTarget = uBuf;
3857 uTargetLimit = uBuf+ uBufSize*5;
3858 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3859 if(U_FAILURE(errorCode)){
3860 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3861 return;
3862 }
3863 cSource = cBuf;
3864 cSourceLimit =cTarget;
3865 test =uBuf;
3866 myOff=offsets;
3867 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3868 if(U_FAILURE(errorCode)){
3869 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3870 return;
3871 }
3872 uSource = (const UChar*)in;
3873 while(uSource<uSourceLimit){
3874 if(*test!=*uSource){
3875
3876 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3877 }
3878 uSource++;
3879 test++;
3880 }
3881 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3882 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3883 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3884 /*Test for the condition where there is an invalid character*/
3885 ucnv_reset(cnv);
3886 {
3887 static const uint8_t source2[]={0x0e,0x24,0x053};
3888 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
3889 }
3890 ucnv_close(cnv);
3891 free(uBuf);
3892 free(cBuf);
3893 free(offsets);
3894 }
3895
3896 static void
3897 TestISO_2022_KR() {
3898 /* test input */
3899 static const uint16_t in[]={
3900 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F66,0x9F67,0x9F6A,0x000A,0x000D
3901 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC02,0xAC04
3902 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
3903 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
3904 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53DF,0x53E1,0x53E2
3905 ,0x53E3,0x53E4,0x000A,0x000D};
3906 const UChar* uSource;
3907 const UChar* uSourceLimit;
3908 const char* cSource;
3909 const char* cSourceLimit;
3910 UChar *uTargetLimit =NULL;
3911 UChar *uTarget;
3912 char *cTarget;
3913 const char *cTargetLimit;
3914 char *cBuf;
3915 UChar *uBuf,*test;
3916 int32_t uBufSize = 120;
3917 UErrorCode errorCode=U_ZERO_ERROR;
3918 UConverter *cnv;
3919 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3920 int32_t* myOff= offsets;
3921 cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
3922 if(U_FAILURE(errorCode)) {
3923 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3924 return;
3925 }
3926
3927 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3928 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3929 uSource = (const UChar*)in;
3930 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3931 cTarget = cBuf;
3932 cTargetLimit = cBuf +uBufSize*5;
3933 uTarget = uBuf;
3934 uTargetLimit = uBuf+ uBufSize*5;
3935 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3936 if(U_FAILURE(errorCode)){
3937 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3938 return;
3939 }
3940 cSource = cBuf;
3941 cSourceLimit =cTarget;
3942 test =uBuf;
3943 myOff=offsets;
3944 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3945 if(U_FAILURE(errorCode)){
3946 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3947 return;
3948 }
3949 uSource = (const UChar*)in;
3950 while(uSource<uSourceLimit){
3951 if(*test!=*uSource){
3952 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
3953 }
3954 uSource++;
3955 test++;
3956 }
3957 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
3958 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3959 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3960 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3961 TestJitterbug930("csISO2022KR");
3962 /*Test for the condition where there is an invalid character*/
3963 ucnv_reset(cnv);
3964 {
3965 static const uint8_t source2[]={0x1b,0x24,0x053};
3966 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
3967 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
3968 }
3969 ucnv_close(cnv);
3970 free(uBuf);
3971 free(cBuf);
3972 free(offsets);
3973 }
3974
3975 static void
3976 TestISO_2022_KR_1() {
3977 /* test input */
3978 static const uint16_t in[]={
3979 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
3980 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
3981 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
3982 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
3983 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
3984 ,0x53E3,0x53E4,0x000A,0x000D};
3985 const UChar* uSource;
3986 const UChar* uSourceLimit;
3987 const char* cSource;
3988 const char* cSourceLimit;
3989 UChar *uTargetLimit =NULL;
3990 UChar *uTarget;
3991 char *cTarget;
3992 const char *cTargetLimit;
3993 char *cBuf;
3994 UChar *uBuf,*test;
3995 int32_t uBufSize = 120;
3996 UErrorCode errorCode=U_ZERO_ERROR;
3997 UConverter *cnv;
3998 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3999 int32_t* myOff= offsets;
4000 cnv=ucnv_open("ibm-25546", &errorCode);
4001 if(U_FAILURE(errorCode)) {
4002 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4003 return;
4004 }
4005
4006 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4007 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4008 uSource = (const UChar*)in;
4009 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4010 cTarget = cBuf;
4011 cTargetLimit = cBuf +uBufSize*5;
4012 uTarget = uBuf;
4013 uTargetLimit = uBuf+ uBufSize*5;
4014 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4015 if(U_FAILURE(errorCode)){
4016 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4017 return;
4018 }
4019 cSource = cBuf;
4020 cSourceLimit =cTarget;
4021 test =uBuf;
4022 myOff=offsets;
4023 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4024 if(U_FAILURE(errorCode)){
4025 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4026 return;
4027 }
4028 uSource = (const UChar*)in;
4029 while(uSource<uSourceLimit){
4030 if(*test!=*uSource){
4031 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4032 }
4033 uSource++;
4034 test++;
4035 }
4036 ucnv_reset(cnv);
4037 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4038 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4039 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4040 ucnv_reset(cnv);
4041 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4042 /*Test for the condition where there is an invalid character*/
4043 ucnv_reset(cnv);
4044 {
4045 static const uint8_t source2[]={0x1b,0x24,0x053};
4046 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4047 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4048 }
4049 ucnv_close(cnv);
4050 free(uBuf);
4051 free(cBuf);
4052 free(offsets);
4053 }
4054
4055 static void TestJitterbug2411(){
4056 static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
4057 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4058 UConverter* kr=NULL, *kr1=NULL;
4059 UErrorCode errorCode = U_ZERO_ERROR;
4060 UChar tgt[100]={'\0'};
4061 UChar* target = tgt;
4062 UChar* targetLimit = target+100;
4063 kr=ucnv_open("iso-2022-kr", &errorCode);
4064 if(U_FAILURE(errorCode)) {
4065 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
4066 return;
4067 }
4068 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4069 if(U_FAILURE(errorCode)) {
4070 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4071 return;
4072 }
4073 kr1 = ucnv_open("ibm-25546", &errorCode);
4074 if(U_FAILURE(errorCode)) {
4075 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
4076 return;
4077 }
4078 target = tgt;
4079 targetLimit = target+100;
4080 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4081
4082 if(U_FAILURE(errorCode)) {
4083 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4084 return;
4085 }
4086
4087 ucnv_close(kr);
4088 ucnv_close(kr1);
4089
4090 }
4091
4092 static void
4093 TestJIS(){
4094 /* From Unicode moved to testdata/conversion.txt */
4095 /*To Unicode*/
4096 {
4097 static const uint8_t sampleTextJIS[] = {
4098 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4099 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4100 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4101 };
4102 static const uint16_t expectedISO2022JIS[] = {
4103 0x0041, 0x0042,
4104 0xFF81, 0xFF82,
4105 0x3000
4106 };
4107 static const int32_t toISO2022JISOffs[]={
4108 3,4,
4109 8,9,
4110 16
4111 };
4112
4113 static const uint8_t sampleTextJIS7[] = {
4114 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4115 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4116 0x1b,0x24,0x42,0x21,0x21,
4117 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */
4118 0x21,0x22,
4119 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4120 };
4121 static const uint16_t expectedISO2022JIS7[] = {
4122 0x0041, 0x0042,
4123 0xFF81, 0xFF82,
4124 0x3000,
4125 0xFF81, 0xFF82,
4126 0x3001,
4127 0x3000
4128 };
4129 static const int32_t toISO2022JIS7Offs[]={
4130 3,4,
4131 8,9,
4132 13,16,
4133 17,
4134 19,27
4135 };
4136 static const uint8_t sampleTextJIS8[] = {
4137 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4138 0xa1,0xc8,0xd9,/*Katakana Set*/
4139 0x1b,0x28,0x42,
4140 0x41,0x42,
4141 0xb1,0xc3, /*Katakana Set*/
4142 0x1b,0x24,0x42,0x21,0x21
4143 };
4144 static const uint16_t expectedISO2022JIS8[] = {
4145 0x0041, 0x0042,
4146 0xff61, 0xff88, 0xff99,
4147 0x0041, 0x0042,
4148 0xff71, 0xff83,
4149 0x3000
4150 };
4151 static const int32_t toISO2022JIS8Offs[]={
4152 3, 4, 5, 6,
4153 7, 11, 12, 13,
4154 14, 18,
4155 };
4156
4157 testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
4158 sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE);
4159 testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
4160 sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE);
4161 testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
4162 sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE);
4163 }
4164
4165 }
4166
4167 static void TestJitterbug915(){
4168 /* tests for roundtripping of the below sequence
4169 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * /
4170 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4171 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4172 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4173 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4174 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4175 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4176 */
4177 static const char cSource[]={
4178 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4179 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4180 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4181 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4182 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4183 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4184 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4185 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4186 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4187 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4188 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4189 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4190 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4191 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4192 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4193 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4194 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4195 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4196 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4197 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4198 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4199 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4200 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4201 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4202 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4203 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4204 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4205 0x37, 0x20, 0x2A, 0x2F
4206 };
4207 UChar uTarget[500]={'\0'};
4208 UChar* utarget=uTarget;
4209 UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
4210
4211 char cTarget[500]={'\0'};
4212 char* ctarget=cTarget;
4213 char* ctargetLimit=cTarget+sizeof(cTarget);
4214 const char* csource=cSource;
4215 const char* tempSrc = cSource;
4216 UErrorCode err=U_ZERO_ERROR;
4217
4218 UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
4219 if(U_FAILURE(err)) {
4220 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
4221 return;
4222 }
4223 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err);
4224 if(U_FAILURE(err)) {
4225 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
4226 return;
4227 }
4228 utargetLimit=utarget;
4229 utarget = uTarget;
4230 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
4231 if(U_FAILURE(err)) {
4232 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
4233 return;
4234 }
4235 ctargetLimit=ctarget;
4236 ctarget =cTarget;
4237 while(ctarget<ctargetLimit){
4238 if(*ctarget != *tempSrc){
4239 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ;
4240 }
4241 ++ctarget;
4242 ++tempSrc;
4243 }
4244
4245 ucnv_close(conv);
4246 }
4247
4248 static void
4249 TestISO_2022_CN_EXT() {
4250 /* test input */
4251 static const uint16_t in[]={
4252 /* test Non-BMP code points */
4253 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4254 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4255 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4256 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4257 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4258 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4259 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4260 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4261 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4262 0xD869, 0xDED5,
4263
4264 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4265 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4266 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4267 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4268 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4269 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4270 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4271 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4272 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4273 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4274 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4275 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4276 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4277 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4278 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4279 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4280 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4281 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4282
4283 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4284
4285 };
4286
4287 const UChar* uSource;
4288 const UChar* uSourceLimit;
4289 const char* cSource;
4290 const char* cSourceLimit;
4291 UChar *uTargetLimit =NULL;
4292 UChar *uTarget;
4293 char *cTarget;
4294 const char *cTargetLimit;
4295 char *cBuf;
4296 UChar *uBuf,*test;
4297 int32_t uBufSize = 180;
4298 UErrorCode errorCode=U_ZERO_ERROR;
4299 UConverter *cnv;
4300 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4301 int32_t* myOff= offsets;
4302 cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
4303 if(U_FAILURE(errorCode)) {
4304 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4305 return;
4306 }
4307
4308 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4309 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4310 uSource = (const UChar*)in;
4311 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4312 cTarget = cBuf;
4313 cTargetLimit = cBuf +uBufSize*5;
4314 uTarget = uBuf;
4315 uTargetLimit = uBuf+ uBufSize*5;
4316 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4317 if(U_FAILURE(errorCode)){
4318 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4319 return;
4320 }
4321 cSource = cBuf;
4322 cSourceLimit =cTarget;
4323 test =uBuf;
4324 myOff=offsets;
4325 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4326 if(U_FAILURE(errorCode)){
4327 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4328 return;
4329 }
4330 uSource = (const UChar*)in;
4331 while(uSource<uSourceLimit){
4332 if(*test!=*uSource){
4333 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4334 }
4335 else{
4336 log_verbose(" Got: \\u%04X\n",(int)*test) ;
4337 }
4338 uSource++;
4339 test++;
4340 }
4341 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4342 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4343 /*Test for the condition where there is an invalid character*/
4344 ucnv_reset(cnv);
4345 {
4346 static const uint8_t source2[]={0x0e,0x24,0x053};
4347 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
4348 }
4349 ucnv_close(cnv);
4350 free(uBuf);
4351 free(cBuf);
4352 free(offsets);
4353 }
4354
4355 static void
4356 TestISO_2022_CN() {
4357 /* test input */
4358 static const uint16_t in[]={
4359 /* jitterbug 951 */
4360 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4361 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4362 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4363 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4364 0x0020, 0x0045, 0x004e, 0x0044,
4365 /**/
4366 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4367 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4368 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4369 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4370 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4371 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4372 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4373 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4374 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4375 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4376 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4377 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4378 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4379 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4380 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4381 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4382 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4383
4384 };
4385 const UChar* uSource;
4386 const UChar* uSourceLimit;
4387 const char* cSource;
4388 const char* cSourceLimit;
4389 UChar *uTargetLimit =NULL;
4390 UChar *uTarget;
4391 char *cTarget;
4392 const char *cTargetLimit;
4393 char *cBuf;
4394 UChar *uBuf,*test;
4395 int32_t uBufSize = 180;
4396 UErrorCode errorCode=U_ZERO_ERROR;
4397 UConverter *cnv;
4398 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4399 int32_t* myOff= offsets;
4400 cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
4401 if(U_FAILURE(errorCode)) {
4402 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4403 return;
4404 }
4405
4406 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4407 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4408 uSource = (const UChar*)in;
4409 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4410 cTarget = cBuf;
4411 cTargetLimit = cBuf +uBufSize*5;
4412 uTarget = uBuf;
4413 uTargetLimit = uBuf+ uBufSize*5;
4414 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4415 if(U_FAILURE(errorCode)){
4416 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4417 return;
4418 }
4419 cSource = cBuf;
4420 cSourceLimit =cTarget;
4421 test =uBuf;
4422 myOff=offsets;
4423 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4424 if(U_FAILURE(errorCode)){
4425 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4426 return;
4427 }
4428 uSource = (const UChar*)in;
4429 while(uSource<uSourceLimit){
4430 if(*test!=*uSource){
4431 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4432 }
4433 else{
4434 log_verbose(" Got: \\u%04X\n",(int)*test) ;
4435 }
4436 uSource++;
4437 test++;
4438 }
4439 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
4440 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4441 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4442 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4443 TestJitterbug930("csISO2022CN");
4444 /*Test for the condition where there is an invalid character*/
4445 ucnv_reset(cnv);
4446 {
4447 static const uint8_t source2[]={0x0e,0x24,0x053};
4448 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
4449 }
4450
4451 ucnv_close(cnv);
4452 free(uBuf);
4453 free(cBuf);
4454 free(offsets);
4455 }
4456
4457 static void
4458 TestEBCDIC_STATEFUL() {
4459 /* test input */
4460 static const uint8_t in[]={
4461 0x61,
4462 0x1a,
4463 0x0f, 0x4b,
4464 0x42,
4465 0x40,
4466 0x36,
4467 };
4468
4469 /* expected test results */
4470 static const int32_t results[]={
4471 /* number of bytes read, code point */
4472 1, 0x002f,
4473 1, 0x0092,
4474 2, 0x002e,
4475 1, 0xff62,
4476 1, 0x0020,
4477 1, 0x0096,
4478
4479 };
4480 static const uint8_t in2[]={
4481 0x0f,
4482 0xa1,
4483 0x01
4484 };
4485
4486 /* expected test results */
4487 static const int32_t results2[]={
4488 /* number of bytes read, code point */
4489 2, 0x203E,
4490 1, 0x0001,
4491 };
4492
4493 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
4494 UErrorCode errorCode=U_ZERO_ERROR;
4495 UConverter *cnv=ucnv_open("ibm-930", &errorCode);
4496 if(U_FAILURE(errorCode)) {
4497 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
4498 return;
4499 }
4500 TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
4501 ucnv_reset(cnv);
4502 /* Test the condition when source >= sourceLimit */
4503 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
4504 ucnv_reset(cnv);
4505 /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4506 {
4507 static const uint8_t source1[]={0x0f};
4508 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
4509 }
4510 /*Test for the condition where there is an invalid character*/
4511 ucnv_reset(cnv);
4512 {
4513 static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
4514 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
4515 }
4516 ucnv_reset(cnv);
4517 source=(const char*)in2;
4518 limit=(const char*)in2+sizeof(in2);
4519 TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
4520 ucnv_close(cnv);
4521
4522 }
4523
4524 static void
4525 TestGB18030() {
4526 /* test input */
4527 static const uint8_t in[]={
4528 0x24,
4529 0x7f,
4530 0x81, 0x30, 0x81, 0x30,
4531 0xa8, 0xbf,
4532 0xa2, 0xe3,
4533 0xd2, 0xbb,
4534 0x82, 0x35, 0x8f, 0x33,
4535 0x84, 0x31, 0xa4, 0x39,
4536 0x90, 0x30, 0x81, 0x30,
4537 0xe3, 0x32, 0x9a, 0x35
4538 #if 0
4539 /*
4540 * Feature removed markus 2000-oct-26
4541 * Only some codepages must match surrogate pairs into supplementary code points -
4542 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4543 * GB 18030 provides direct encodings for supplementary code points, therefore
4544 * it must not combine two single-encoded surrogates into one code point.
4545 */
4546 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4547 #endif
4548 };
4549
4550 /* expected test results */
4551 static const int32_t results[]={
4552 /* number of bytes read, code point */
4553 1, 0x24,
4554 1, 0x7f,
4555 4, 0x80,
4556 2, 0x1f9,
4557 2, 0x20ac,
4558 2, 0x4e00,
4559 4, 0x9fa6,
4560 4, 0xffff,
4561 4, 0x10000,
4562 4, 0x10ffff
4563 #if 0
4564 /* Feature removed. See comment above. */
4565 8, 0x10000
4566 #endif
4567 };
4568
4569 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4570 UErrorCode errorCode=U_ZERO_ERROR;
4571 UConverter *cnv=ucnv_open("gb18030", &errorCode);
4572 if(U_FAILURE(errorCode)) {
4573 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
4574 return;
4575 }
4576 TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
4577 ucnv_close(cnv);
4578 }
4579
4580 static void
4581 TestLMBCS() {
4582 /* LMBCS-1 string */
4583 static const uint8_t pszLMBCS[]={
4584 0x61,
4585 0x01, 0x29,
4586 0x81,
4587 0xA0,
4588 0x0F, 0x27,
4589 0x0F, 0x91,
4590 0x14, 0x0a, 0x74,
4591 0x14, 0xF6, 0x02,
4592 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4593 0x10, 0x88, 0xA0,
4594 };
4595
4596 /* Unicode UChar32 equivalents */
4597 static const UChar32 pszUnicode32[]={
4598 /* code point */
4599 0x00000061,
4600 0x00002013,
4601 0x000000FC,
4602 0x000000E1,
4603 0x00000007,
4604 0x00000091,
4605 0x00000a74,
4606 0x00000200,
4607 0x00023456, /* code point for surrogate pair */
4608 0x00005516
4609 };
4610
4611 /* Unicode UChar equivalents */
4612 static const UChar pszUnicode[]={
4613 /* code point */
4614 0x0061,
4615 0x2013,
4616 0x00FC,
4617 0x00E1,
4618 0x0007,
4619 0x0091,
4620 0x0a74,
4621 0x0200,
4622 0xD84D, /* low surrogate */
4623 0xDC56, /* high surrogate */
4624 0x5516
4625 };
4626
4627 /* expected test results */
4628 static const int offsets32[]={
4629 /* number of bytes read, code point */
4630 0,
4631 1,
4632 3,
4633 4,
4634 5,
4635 7,
4636 9,
4637 12,
4638 15,
4639 21,
4640 24
4641 };
4642
4643 /* expected test results */
4644 static const int offsets[]={
4645 /* number of bytes read, code point */
4646 0,
4647 1,
4648 3,
4649 4,
4650 5,
4651 7,
4652 9,
4653 12,
4654 15,
4655 18,
4656 21,
4657 24
4658 };
4659
4660
4661 UConverter *cnv;
4662
4663 #define NAME_LMBCS_1 "LMBCS-1"
4664 #define NAME_LMBCS_2 "LMBCS-2"
4665
4666
4667 /* Some basic open/close/property tests on some LMBCS converters */
4668 {
4669
4670 char expected_subchars[] = {0x3F}; /* ANSI Question Mark */
4671 char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/
4672 char get_subchars [1];
4673 const char * get_name;
4674 UConverter *cnv1;
4675 UConverter *cnv2;
4676
4677 int8_t len = sizeof(get_subchars);
4678
4679 UErrorCode errorCode=U_ZERO_ERROR;
4680
4681 /* Open */
4682 cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
4683 if(U_FAILURE(errorCode)) {
4684 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4685 return;
4686 }
4687 cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
4688 if(U_FAILURE(errorCode)) {
4689 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
4690 return;
4691 }
4692
4693 /* Name */
4694 get_name = ucnv_getName (cnv1, &errorCode);
4695 if (strcmp(NAME_LMBCS_1,get_name)){
4696 log_err("Unexpected converter name: %s\n", get_name);
4697 }
4698 get_name = ucnv_getName (cnv2, &errorCode);
4699 if (strcmp(NAME_LMBCS_2,get_name)){
4700 log_err("Unexpected converter name: %s\n", get_name);
4701 }
4702
4703 /* substitution chars */
4704 ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
4705 if(U_FAILURE(errorCode)) {
4706 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4707 }
4708 if (len!=1){
4709 log_err("Unexpected length of sub chars\n");
4710 }
4711 if (get_subchars[0] != expected_subchars[0]){
4712 log_err("Unexpected value of sub chars\n");
4713 }
4714 ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
4715 if(U_FAILURE(errorCode)) {
4716 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
4717 }
4718 ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
4719 if(U_FAILURE(errorCode)) {
4720 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4721 }
4722 if (len!=1){
4723 log_err("Unexpected length of sub chars\n");
4724 }
4725 if (get_subchars[0] != new_subchars[0]){
4726 log_err("Unexpected value of sub chars\n");
4727 }
4728 ucnv_close(cnv1);
4729 ucnv_close(cnv2);
4730
4731 }
4732
4733 /* LMBCS to Unicode - offsets */
4734 {
4735 UErrorCode errorCode=U_ZERO_ERROR;
4736
4737 const char * pSource = (const char *)pszLMBCS;
4738 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
4739
4740 UChar Out [sizeof(pszUnicode) + 1];
4741 UChar * pOut = Out;
4742 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
4743
4744 int32_t off [sizeof(offsets)];
4745
4746 /* last 'offset' in expected results is just the final size.
4747 (Makes other tests easier). Compensate here: */
4748
4749 off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS);
4750
4751
4752
4753 cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */
4754 if(U_FAILURE(errorCode)) {
4755 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
4756 return;
4757 }
4758
4759
4760
4761 ucnv_toUnicode (cnv,
4762 &pOut,
4763 OutLimit,
4764 &pSource,
4765 sourceLimit,
4766 off,
4767 TRUE,
4768 &errorCode);
4769
4770
4771 if (memcmp(off,offsets,sizeof(offsets)))
4772 {
4773 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4774 }
4775 if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
4776 {
4777 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4778 }
4779 ucnv_close(cnv);
4780 }
4781 {
4782 /* LMBCS to Unicode - getNextUChar */
4783 const char * sourceStart;
4784 const char *source=(const char *)pszLMBCS;
4785 const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
4786 const UChar32 *results= pszUnicode32;
4787 const int *off = offsets32;
4788
4789 UErrorCode errorCode=U_ZERO_ERROR;
4790 UChar32 uniChar;
4791
4792 cnv=ucnv_open("LMBCS-1", &errorCode);
4793 if(U_FAILURE(errorCode)) {
4794 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4795 return;
4796 }
4797 else
4798 {
4799
4800 while(source<limit) {
4801 sourceStart=source;
4802 uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
4803 if(U_FAILURE(errorCode)) {
4804 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
4805 break;
4806 } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
4807 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
4808 uniChar, (source-sourceStart), *results, *off);
4809 break;
4810 }
4811 results++;
4812 off++;
4813 }
4814 }
4815 ucnv_close(cnv);
4816 }
4817 { /* test locale & optimization group operations: Unicode to LMBCS */
4818
4819 UErrorCode errorCode=U_ZERO_ERROR;
4820 UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
4821 UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
4822 UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
4823 UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */
4824 const UChar * pUniOut = uniString;
4825 UChar * pUniIn = uniString;
4826 uint8_t lmbcsString [4];
4827 const char * pLMBCSOut = (const char *)lmbcsString;
4828 char * pLMBCSIn = (char *)lmbcsString;
4829
4830 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
4831 ucnv_fromUnicode (cnv16he,
4832 &pLMBCSIn, (pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
4833 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
4834 NULL, 1, &errorCode);
4835
4836 if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
4837 {
4838 log_err("LMBCS-16,locale=he gives unexpected translation\n");
4839 }
4840
4841 pLMBCSIn= (char *)lmbcsString;
4842 pUniOut = uniString;
4843 ucnv_fromUnicode (cnv01us,
4844 &pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
4845 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
4846 NULL, 1, &errorCode);
4847
4848 if (lmbcsString[0] != 0x9F)
4849 {
4850 log_err("LMBCS-1,locale=US gives unexpected translation\n");
4851 }
4852
4853 /* single byte char from mbcs char set */
4854 lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */
4855 pLMBCSOut = (const char *)lmbcsString;
4856 pUniIn = uniString;
4857 ucnv_toUnicode (cnv16jp,
4858 &pUniIn, pUniIn + 1,
4859 &pLMBCSOut, (pLMBCSOut + 1),
4860 NULL, 1, &errorCode);
4861 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
4862 {
4863 log_err("Unexpected results from LMBCS-16 single byte char\n");
4864 }
4865 /* convert to group 1: should be 3 bytes */
4866 pLMBCSIn = (char *)lmbcsString;
4867 pUniOut = uniString;
4868 ucnv_fromUnicode (cnv01us,
4869 &pLMBCSIn, (const char *)(pLMBCSIn + 3),
4870 &pUniOut, pUniOut + 1,
4871 NULL, 1, &errorCode);
4872 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1
4873 || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
4874 {
4875 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
4876 }
4877 pLMBCSOut = (const char *)lmbcsString;
4878 pUniIn = uniString;
4879 ucnv_toUnicode (cnv01us,
4880 &pUniIn, pUniIn + 1,
4881 &pLMBCSOut, (const char *)(pLMBCSOut + 3),
4882 NULL, 1, &errorCode);
4883 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
4884 {
4885 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
4886 }
4887 pLMBCSIn = (char *)lmbcsString;
4888 pUniOut = uniString;
4889 ucnv_fromUnicode (cnv16jp,
4890 &pLMBCSIn, (const char *)(pLMBCSIn + 1),
4891 &pUniOut, pUniOut + 1,
4892 NULL, 1, &errorCode);
4893 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
4894 {
4895 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
4896 }
4897 ucnv_close(cnv16he);
4898 ucnv_close(cnv16jp);
4899 ucnv_close(cnv01us);
4900 }
4901 {
4902 /* Small source buffer testing, LMBCS -> Unicode */
4903
4904 UErrorCode errorCode=U_ZERO_ERROR;
4905
4906 const char * pSource = (const char *)pszLMBCS;
4907 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
4908 int codepointCount = 0;
4909
4910 UChar Out [sizeof(pszUnicode) + 1];
4911 UChar * pOut = Out;
4912 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
4913
4914
4915 cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
4916 if(U_FAILURE(errorCode)) {
4917 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4918 return;
4919 }
4920
4921
4922 while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
4923 {
4924 ucnv_toUnicode (cnv,
4925 &pOut,
4926 OutLimit,
4927 &pSource,
4928 (pSource+1), /* claim that this is a 1- byte buffer */
4929 NULL,
4930 FALSE, /* FALSE means there might be more chars in the next buffer */
4931 &errorCode);
4932
4933 if (U_SUCCESS (errorCode))
4934 {
4935 if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1])
4936 {
4937 /* we are on to the next code point: check value */
4938
4939 if (Out[0] != pszUnicode[codepointCount]){
4940 log_err("LMBCS->Uni result %lx should have been %lx \n",
4941 Out[0], pszUnicode[codepointCount]);
4942 }
4943
4944 pOut = Out; /* reset for accumulating next code point */
4945 codepointCount++;
4946 }
4947 }
4948 else
4949 {
4950 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
4951 }
4952 }
4953 {
4954 /* limits & surrogate error testing */
4955 char LIn [sizeof(pszLMBCS)];
4956 const char * pLIn = LIn;
4957
4958 char LOut [sizeof(pszLMBCS)];
4959 char * pLOut = LOut;
4960
4961 UChar UOut [sizeof(pszUnicode)];
4962 UChar * pUOut = UOut;
4963
4964 UChar UIn [sizeof(pszUnicode)];
4965 const UChar * pUIn = UIn;
4966
4967 int32_t off [sizeof(offsets)];
4968 UChar32 uniChar;
4969
4970 errorCode=U_ZERO_ERROR;
4971
4972 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
4973 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn-1,off,FALSE, &errorCode);
4974 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
4975 {
4976 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
4977 }
4978 errorCode=U_ZERO_ERROR;
4979 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode);
4980 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
4981 {
4982 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
4983 }
4984 errorCode=U_ZERO_ERROR;
4985
4986 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
4987 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
4988 {
4989 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
4990 }
4991 errorCode=U_ZERO_ERROR;
4992
4993 /* 0 byte source request - no error, no pointer movement */
4994 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode);
4995 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode);
4996 if(U_FAILURE(errorCode)) {
4997 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
4998 }
4999 if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
5000 {
5001 log_err("Unexpected pointer move in 0 byte source request \n");
5002 }
5003 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5004 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
5005 if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
5006 {
5007 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
5008 }
5009 if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5010 {
5011 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5012 }
5013 errorCode = U_ZERO_ERROR;
5014
5015 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5016
5017 pUIn = pszUnicode;
5018 ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode);
5019 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
5020 {
5021 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5022 }
5023
5024 errorCode = U_ZERO_ERROR;
5025
5026 pLIn = (const char *)pszLMBCS;
5027 ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode);
5028 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4])
5029 {
5030 log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5031 }
5032
5033 /* unpaired or chopped LMBCS surrogates */
5034
5035 /* OK high surrogate, Low surrogate is chopped */
5036 LIn [0] = (char)0x14;
5037 LIn [1] = (char)0xD8;
5038 LIn [2] = (char)0x01;
5039 LIn [3] = (char)0x14;
5040 LIn [4] = (char)0xDC;
5041 pLIn = LIn;
5042 errorCode = U_ZERO_ERROR;
5043 pUOut = UOut;
5044
5045 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
5046 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5047 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5048 {
5049 log_err("Unexpected results on chopped low surrogate\n");
5050 }
5051
5052 /* chopped at surrogate boundary */
5053 LIn [0] = (char)0x14;
5054 LIn [1] = (char)0xD8;
5055 LIn [2] = (char)0x01;
5056 pLIn = LIn;
5057 errorCode = U_ZERO_ERROR;
5058 pUOut = UOut;
5059
5060 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode);
5061 if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
5062 {
5063 log_err("Unexpected results on chopped at surrogate boundary \n");
5064 }
5065
5066 /* unpaired surrogate plus valid Unichar */
5067 LIn [0] = (char)0x14;
5068 LIn [1] = (char)0xD8;
5069 LIn [2] = (char)0x01;
5070 LIn [3] = (char)0x14;
5071 LIn [4] = (char)0xC9;
5072 LIn [5] = (char)0xD0;
5073 pLIn = LIn;
5074 errorCode = U_ZERO_ERROR;
5075 pUOut = UOut;
5076
5077 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode);
5078 if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
5079 {
5080 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5081 }
5082
5083 /* unpaired surrogate plus chopped Unichar */
5084 LIn [0] = (char)0x14;
5085 LIn [1] = (char)0xD8;
5086 LIn [2] = (char)0x01;
5087 LIn [3] = (char)0x14;
5088 LIn [4] = (char)0xC9;
5089
5090 pLIn = LIn;
5091 errorCode = U_ZERO_ERROR;
5092 pUOut = UOut;
5093
5094 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5095 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5096 {
5097 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5098 }
5099
5100 /* unpaired surrogate plus valid non-Unichar */
5101 LIn [0] = (char)0x14;
5102 LIn [1] = (char)0xD8;
5103 LIn [2] = (char)0x01;
5104 LIn [3] = (char)0x0F;
5105 LIn [4] = (char)0x3B;
5106
5107 pLIn = LIn;
5108 errorCode = U_ZERO_ERROR;
5109 pUOut = UOut;
5110
5111 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5112 if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
5113 {
5114 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5115 }
5116
5117 /* unpaired surrogate plus chopped non-Unichar */
5118 LIn [0] = (char)0x14;
5119 LIn [1] = (char)0xD8;
5120 LIn [2] = (char)0x01;
5121 LIn [3] = (char)0x0F;
5122
5123 pLIn = LIn;
5124 errorCode = U_ZERO_ERROR;
5125 pUOut = UOut;
5126
5127 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode);
5128
5129 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
5130 {
5131 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5132 }
5133 }
5134 }
5135 ucnv_close(cnv); /* final cleanup */
5136 }
5137
5138
5139 static void TestJitterbug255()
5140 {
5141 static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5142 const char *testBuffer = (const char *)testBytes;
5143 const char *testEnd = (const char *)testBytes + sizeof(testBytes);
5144 UErrorCode status = U_ZERO_ERROR;
5145 UChar32 result;
5146 UConverter *cnv = 0;
5147
5148 cnv = ucnv_open("shift-jis", &status);
5149 if (U_FAILURE(status) || cnv == 0) {
5150 log_data_err("Failed to open the converter for SJIS.\n");
5151 return;
5152 }
5153 while (testBuffer != testEnd)
5154 {
5155 result = ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status);
5156 if (U_FAILURE(status))
5157 {
5158 log_err("Failed to convert the next UChar for SJIS.\n");
5159 break;
5160 }
5161 }
5162 ucnv_close(cnv);
5163 }
5164
5165 static void TestEBCDICUS4XML()
5166 {
5167 UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
5168 static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
5169 static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
5170 static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
5171 char target_x[] = {0x00, 0x00, 0x00, 0x00};
5172 UChar *unicodes = unicodes_x;
5173 const UChar *toUnicodeMaps = toUnicodeMaps_x;
5174 char *target = target_x;
5175 const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
5176 UErrorCode status = U_ZERO_ERROR;
5177 UConverter *cnv = 0;
5178
5179 cnv = ucnv_open("ebcdic-xml-us", &status);
5180 if (U_FAILURE(status) || cnv == 0) {
5181 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5182 return;
5183 }
5184 ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status);
5185 if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
5186 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5187 u_errorName(status));
5188 printUSeqErr(unicodes_x, 3);
5189 printUSeqErr(toUnicodeMaps, 3);
5190 }
5191 status = U_ZERO_ERROR;
5192 ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status);
5193 if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
5194 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5195 u_errorName(status));
5196 printSeqErr((const unsigned char*)target_x, 3);
5197 printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
5198 }
5199 ucnv_close(cnv);
5200 }
5201 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
5202
5203 #if !UCONFIG_NO_COLLATION
5204
5205 static void TestJitterbug981(){
5206 const UChar* rules;
5207 int32_t rules_length, target_cap, bytes_needed, buff_size;
5208 UErrorCode status = U_ZERO_ERROR;
5209 UConverter *utf8cnv;
5210 UCollator* myCollator;
5211 char *buff;
5212 int numNeeded=0;
5213 utf8cnv = ucnv_open ("utf8", &status);
5214 if(U_FAILURE(status)){
5215 log_err("Could not open UTF-8 converter. Error: %s", u_errorName(status));
5216 return;
5217 }
5218 myCollator = ucol_open("zh", &status);
5219 if(U_FAILURE(status)){
5220 log_err("Could not open collator for zh locale. Error: %s", u_errorName(status));
5221 return;
5222 }
5223
5224 rules = ucol_getRules(myCollator, &rules_length);
5225 buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv);
5226 buff = malloc(buff_size);
5227
5228 target_cap = 0;
5229 do {
5230 ucnv_reset(utf8cnv);
5231 status = U_ZERO_ERROR;
5232 if(target_cap >= buff_size) {
5233 log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
5234 return;
5235 }
5236 bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
5237 rules, rules_length, &status);
5238 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5239 if(numNeeded!=0 && numNeeded!= bytes_needed){
5240 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5241 }
5242 numNeeded = bytes_needed;
5243 } while (status == U_BUFFER_OVERFLOW_ERROR);
5244 ucol_close(myCollator);
5245 ucnv_close(utf8cnv);
5246 free(buff);
5247 }
5248
5249 #endif
5250
5251 static void TestJitterbug1293(){
5252 static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5253 char target[256];
5254 UErrorCode status = U_ZERO_ERROR;
5255 UConverter* conv=NULL;
5256 int32_t target_cap, bytes_needed, numNeeded = 0;
5257 conv = ucnv_open("shift-jis",&status);
5258 if(U_FAILURE(status)){
5259 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status));
5260 return;
5261 }
5262
5263 do{
5264 target_cap =0;
5265 bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status);
5266 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5267 if(numNeeded!=0 && numNeeded!= bytes_needed){
5268 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5269 }
5270 numNeeded = bytes_needed;
5271 } while (status == U_BUFFER_OVERFLOW_ERROR);
5272 if(U_FAILURE(status)){
5273 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status));
5274 return;
5275 }
5276 ucnv_close(conv);
5277 }
5278