]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/nucnvtst.c
ICU-3.13.tar.gz
[apple/icu.git] / icuSources / test / cintltst / nucnvtst.c
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2003, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
7 *
8 * File CCONVTST.C
9 *
10 * Modification History:
11 * Name Description
12 * Steven R. Loomis 7/8/1999 Adding input buffer test
13 *********************************************************************************
14 */
15 #include <stdio.h>
16 #include "cstring.h"
17 #include "unicode/uloc.h"
18 #include "unicode/ucnv.h"
19 #include "unicode/ucnv_err.h"
20 #include "cintltst.h"
21 #include "unicode/utypes.h"
22 #include "unicode/ustring.h"
23 #include "unicode/ucol.h"
24 #include "cmemory.h"
25
26 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const uint32_t results[], const char* message);
27 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
28 #if !UCONFIG_NO_COLLATION
29 static void TestJitterbug981(void);
30 #endif
31 static void TestJitterbug1293(void);
32 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
33 static void TestConverterTypesAndStarters(void);
34 static void TestAmbiguous(void);
35 static void TestSignatureDetection(void);
36 static void TestUTF7(void);
37 static void TestIMAP(void);
38 static void TestUTF8(void);
39 static void TestCESU8(void);
40 static void TestUTF16(void);
41 static void TestUTF16BE(void);
42 static void TestUTF16LE(void);
43 static void TestUTF32(void);
44 static void TestUTF32BE(void);
45 static void TestUTF32LE(void);
46 static void TestLATIN1(void);
47 static void TestSBCS(void);
48 static void TestDBCS(void);
49 static void TestMBCS(void);
50 static void TestISO_2022(void);
51 static void TestISO_2022_JP(void);
52 static void TestISO_2022_JP_1(void);
53 static void TestISO_2022_JP_2(void);
54 static void TestISO_2022_KR(void);
55 static void TestISO_2022_KR_1(void);
56 static void TestISO_2022_CN(void);
57 static void TestISO_2022_CN_EXT(void);
58 static void TestJIS(void);
59 static void TestHZ(void);
60 static void TestSCSU(void);
61 static void TestEBCDIC_STATEFUL(void);
62 static void TestGB18030(void);
63 static void TestLMBCS(void);
64 static void TestJitterbug255(void);
65 static void TestEBCDICUS4XML(void);
66 static void TestJitterbug915(void);
67 static void TestISCII(void);
68 static void TestConv(const uint16_t in[],
69 int len,
70 const char* conv,
71 const char* lang,
72 char byteArr[],
73 int byteArrLen);
74 static void TestRoundTrippingAllUTF(void);
75 static void TestCoverageMBCS(void);
76 static void TestJitterbug2346(void);
77 static void TestJitterbug2411(void);
78 void addTestNewConvert(TestNode** root);
79
80 /* open a converter, using test data if it begins with '@' */
81 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
82
83
84 #define NEW_MAX_BUFFER 999
85
86 static int32_t gInBufferSize = NEW_MAX_BUFFER;
87 static int32_t gOutBufferSize = NEW_MAX_BUFFER;
88 static char gNuConvTestName[1024];
89
90 #define nct_min(x,y) ((x<y) ? x : y)
91
92 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
93 {
94 if(cnv && cnv[0] == '@') {
95 return ucnv_openPackage("testdata", cnv+1, err);
96 } else {
97 return ucnv_open(cnv, err);
98 }
99 }
100
101 static void printSeq(const unsigned char* a, int len)
102 {
103 int i=0;
104 log_verbose("{");
105 while (i<len)
106 log_verbose("0x%02x ", a[i++]);
107 log_verbose("}\n");
108 }
109
110 static void printUSeq(const UChar* a, int len)
111 {
112 int i=0;
113 log_verbose("{U+");
114 while (i<len) log_verbose("0x%04x ", a[i++]);
115 log_verbose("}\n");
116 }
117
118 static void printSeqErr(const unsigned char* a, int len)
119 {
120 int i=0;
121 fprintf(stderr, "{");
122 while (i<len)
123 fprintf(stderr, "0x%02x ", a[i++]);
124 fprintf(stderr, "}\n");
125 }
126
127 static void printUSeqErr(const UChar* a, int len)
128 {
129 int i=0;
130 fprintf(stderr, "{U+");
131 while (i<len)
132 fprintf(stderr, "0x%04x ", a[i++]);
133 fprintf(stderr,"}\n");
134 }
135
136 static void
137 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const uint32_t results[], const char* message)
138 {
139 const char* s0;
140 const char* s=(char*)source;
141 const uint32_t *r=results;
142 UErrorCode errorCode=U_ZERO_ERROR;
143 uint32_t c;
144
145 while(s<limit) {
146 s0=s;
147 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
148 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
149 break; /* no more significant input */
150 } else if(U_FAILURE(errorCode)) {
151 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
152 break;
153 } else if((uint32_t)(s-s0)!=*r || c!=*(r+1)) {
154 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
155 message, c, (s-s0), *(r+1), *r);
156 break;
157 }
158 r+=2;
159 }
160 }
161
162 static void
163 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
164 {
165 const char* s=(char*)source;
166 UErrorCode errorCode=U_ZERO_ERROR;
167 uint32_t c;
168 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
169 if(errorCode != expected){
170 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
171 }
172 if(c != 0xFFFD && c != 0xffff){
173 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
174 }
175
176 }
177
178 static void TestInBufSizes(void)
179 {
180 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
181 #if 1
182 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
183 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
184 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
185 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
186 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
187 TestNewConvertWithBufferSizes(1,1);
188 TestNewConvertWithBufferSizes(2,3);
189 TestNewConvertWithBufferSizes(3,2);
190 #endif
191 }
192
193 static void TestOutBufSizes(void)
194 {
195 #if 1
196 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
197 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
198 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
199 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
200 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
201 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
202
203 #endif
204 }
205
206
207 void addTestNewConvert(TestNode** root)
208 {
209 addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
210 addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
211 addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
212 addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
213 addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
214 addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
215 addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
216 addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
217 addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
218 addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
219 addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
220 addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
221 addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
222 addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
223 addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
224 addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
225 addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
226 addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
227 addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
228 addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
229 addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
230 addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
231 addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
232 addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
233 addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
234 addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
235 addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
236 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
237 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
238 addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
239 addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
240 addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
241 addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
242 addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
243 addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
244 addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
245 addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
246 #if !UCONFIG_NO_COLLATION
247 addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
248 #endif
249 addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
250 addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
251 addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
252 addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
253 addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
254
255 }
256
257
258 /* Note that this test already makes use of statics, so it's not really
259 multithread safe.
260 This convenience function lets us make the error messages actually useful.
261 */
262
263 static void setNuConvTestName(const char *codepage, const char *direction)
264 {
265 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
266 codepage,
267 direction,
268 gInBufferSize,
269 gOutBufferSize);
270 }
271
272 typedef enum
273 {
274 TC_OK = 0, /* test was OK */
275 TC_MISMATCH = 1, /* Match failed - err was printed */
276 TC_FAIL = 2 /* Test failed, don't print an err because it was already printed. */
277 } ETestConvertResult;
278
279 /* Note: This function uses global variables and it will not do offset
280 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
281 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
282 const char *codepage, const int32_t *expectOffsets , UBool useFallback)
283 {
284 UErrorCode status = U_ZERO_ERROR;
285 UConverter *conv = 0;
286 uint8_t junkout[NEW_MAX_BUFFER]; /* FIX */
287 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
288 uint8_t *p;
289 const UChar *src;
290 uint8_t *end;
291 uint8_t *targ;
292 int32_t *offs;
293 int i;
294 int32_t realBufferSize;
295 uint8_t *realBufferEnd;
296 const UChar *realSourceEnd;
297 const UChar *sourceLimit;
298 UBool checkOffsets = TRUE;
299 UBool doFlush;
300
301 for(i=0;i<NEW_MAX_BUFFER;i++)
302 junkout[i] = 0xF0;
303 for(i=0;i<NEW_MAX_BUFFER;i++)
304 junokout[i] = 0xFF;
305
306 setNuConvTestName(codepage, "FROM");
307
308 log_verbose("\n========= %s\n", gNuConvTestName);
309
310 conv = my_ucnv_open(codepage, &status);
311
312 if(U_FAILURE(status))
313 {
314 log_data_err("Couldn't open converter %s\n",codepage);
315 return TC_FAIL;
316 }
317 if(useFallback){
318 ucnv_setFallback(conv,useFallback);
319 }
320
321 log_verbose("Converter opened..\n");
322
323 src = source;
324 targ = junkout;
325 offs = junokout;
326
327 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
328 realBufferEnd = junkout + realBufferSize;
329 realSourceEnd = source + sourceLen;
330
331 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
332 checkOffsets = FALSE;
333
334 do
335 {
336 end = nct_min(targ + gOutBufferSize, realBufferEnd);
337 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
338
339 doFlush = (UBool)(sourceLimit == realSourceEnd);
340
341 if(targ == realBufferEnd) {
342 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
343 return TC_FAIL;
344 }
345 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
346
347
348 status = U_ZERO_ERROR;
349
350 ucnv_fromUnicode (conv,
351 (char **)&targ,
352 (const char*)end,
353 &src,
354 sourceLimit,
355 checkOffsets ? offs : NULL,
356 doFlush, /* flush if we're at the end of the input data */
357 &status);
358 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
359
360 if(U_FAILURE(status)) {
361 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
362 return TC_FAIL;
363 }
364
365 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
366 sourceLen, targ-junkout);
367
368 if(VERBOSITY)
369 {
370 char junk[9999];
371 char offset_str[9999];
372 uint8_t *ptr;
373
374 junk[0] = 0;
375 offset_str[0] = 0;
376 for(ptr = junkout;ptr<targ;ptr++) {
377 sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
378 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
379 }
380
381 log_verbose(junk);
382 printSeq((const uint8_t *)expect, expectLen);
383 if ( checkOffsets ) {
384 log_verbose("\nOffsets:");
385 log_verbose(offset_str);
386 }
387 log_verbose("\n");
388 }
389 ucnv_close(conv);
390
391 if(expectLen != targ-junkout) {
392 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
393 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
394 printf("\nGot:");
395 printSeqErr((const unsigned char*)junkout, targ-junkout);
396 printf("\nExpected:");
397 printSeqErr((const unsigned char*)expect, expectLen);
398 return TC_MISMATCH;
399 }
400
401 if (checkOffsets && (expectOffsets != 0) ) {
402 log_verbose("comparing %d offsets..\n", targ-junkout);
403 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
404 log_err("did not get the expected offsets. %s\n", gNuConvTestName);
405 printSeqErr((const unsigned char*)junkout, targ-junkout);
406 log_err("\n");
407 log_err("Got : ");
408 for(p=junkout;p<targ;p++) {
409 log_err("%d,", junokout[p-junkout]);
410 }
411 log_err("\n");
412 log_err("Expected: ");
413 for(i=0; i<(targ-junkout); i++) {
414 log_err("%d,", expectOffsets[i]);
415 }
416 log_err("\n");
417 }
418 }
419
420 log_verbose("comparing..\n");
421 if(!memcmp(junkout, expect, expectLen)) {
422 log_verbose("Matches!\n");
423 return TC_OK;
424 } else {
425 log_err("String does not match u->%s\n", gNuConvTestName);
426 printUSeqErr(source, sourceLen);
427 printf("\nGot:");
428 printSeqErr((const unsigned char *)junkout, expectLen);
429 printf("\nExpected:");
430 printSeqErr((const unsigned char *)expect, expectLen);
431
432 return TC_MISMATCH;
433 }
434 }
435
436 /* Note: This function uses global variables and it will not do offset
437 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
438 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
439 const char *codepage, const int32_t *expectOffsets, UBool useFallback)
440 {
441 UErrorCode status = U_ZERO_ERROR;
442 UConverter *conv = 0;
443 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
444 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
445 const uint8_t *src;
446 const uint8_t *realSourceEnd;
447 const uint8_t *srcLimit;
448 UChar *p;
449 UChar *targ;
450 UChar *end;
451 int32_t *offs;
452 int i;
453 UBool checkOffsets = TRUE;
454
455 int32_t realBufferSize;
456 UChar *realBufferEnd;
457
458
459 for(i=0;i<NEW_MAX_BUFFER;i++)
460 junkout[i] = 0xFFFE;
461
462 for(i=0;i<NEW_MAX_BUFFER;i++)
463 junokout[i] = -1;
464
465 setNuConvTestName(codepage, "TO");
466
467 log_verbose("\n========= %s\n", gNuConvTestName);
468
469 conv = my_ucnv_open(codepage, &status);
470
471 if(U_FAILURE(status))
472 {
473 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
474 return TC_FAIL;
475 }
476 if(useFallback){
477 ucnv_setFallback(conv,useFallback);
478 }
479 log_verbose("Converter opened..\n");
480
481 src = source;
482 targ = junkout;
483 offs = junokout;
484
485 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
486 realBufferEnd = junkout + realBufferSize;
487 realSourceEnd = src + sourcelen;
488
489 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
490 checkOffsets = FALSE;
491
492 do
493 {
494 end = nct_min( targ + gOutBufferSize, realBufferEnd);
495 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
496
497 if(targ == realBufferEnd)
498 {
499 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
500 return TC_FAIL;
501 }
502 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
503
504 /* oldTarg = targ; */
505
506 status = U_ZERO_ERROR;
507
508 ucnv_toUnicode (conv,
509 &targ,
510 end,
511 (const char **)&src,
512 (const char *)srcLimit,
513 checkOffsets ? offs : NULL,
514 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */
515 &status);
516
517 /* offs += (targ-oldTarg); */
518
519 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
520
521 if(U_FAILURE(status))
522 {
523 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
524 return TC_FAIL;
525 }
526
527 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
528 sourcelen, targ-junkout);
529 if(VERBOSITY)
530 {
531 char junk[9999];
532 char offset_str[9999];
533 UChar *ptr;
534
535 junk[0] = 0;
536 offset_str[0] = 0;
537
538 for(ptr = junkout;ptr<targ;ptr++)
539 {
540 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
541 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
542 }
543
544 log_verbose(junk);
545 printUSeq(expect, expectlen);
546 if ( checkOffsets )
547 {
548 log_verbose("\nOffsets:");
549 log_verbose(offset_str);
550 }
551 log_verbose("\n");
552 }
553 ucnv_close(conv);
554
555 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
556
557 if (checkOffsets && (expectOffsets != 0))
558 {
559 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
560 log_err("did not get the expected offsets. %s\n",gNuConvTestName);
561 log_err("Got: ");
562 for(p=junkout;p<targ;p++) {
563 log_err("%d,", junokout[p-junkout]);
564 }
565 log_err("\n");
566 log_err("Expected: ");
567 for(i=0; i<(targ-junkout); i++) {
568 log_err("%d,", expectOffsets[i]);
569 }
570 log_err("\n");
571 log_err("output: ");
572 for(i=0; i<(targ-junkout); i++) {
573 log_err("%X,", junkout[i]);
574 }
575 log_err("\n");
576 log_err("input: ");
577 for(i=0; i<(src-source); i++) {
578 log_err("%X,", (unsigned char)source[i]);
579 }
580 log_err("\n");
581 }
582 }
583
584 if(!memcmp(junkout, expect, expectlen*2))
585 {
586 log_verbose("Matches!\n");
587 return TC_OK;
588 }
589 else
590 {
591 log_err("String does not match. %s\n", gNuConvTestName);
592 log_verbose("String does not match. %s\n", gNuConvTestName);
593 printf("\nGot:");
594 printUSeqErr(junkout, expectlen);
595 printf("\nExpected:");
596 printUSeqErr(expect, expectlen);
597 return TC_MISMATCH;
598 }
599 }
600
601
602 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
603 {
604 /** test chars #1 */
605 /* 1 2 3 1Han 2Han 3Han . */
606 UChar sampleText[] =
607 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E };
608
609
610 const uint8_t expectedUTF8[] =
611 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
612 int32_t toUTF8Offs[] =
613 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07};
614 int32_t fmUTF8Offs[] =
615 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };
616
617 /* Same as UTF8, but with ^[%B preceeding */
618 const uint8_t expectedISO2022[] =
619 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
620 int32_t toISO2022Offs[] =
621 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
622 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
623 int32_t fmISO2022Offs[] =
624 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
625
626 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
627 const uint8_t expectedIBM930[] =
628 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B };
629 int32_t toIBM930Offs[] =
630 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, };
631 int32_t fmIBM930Offs[] =
632 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c};
633
634 /* 1 2 3 0 h1 h2 h3 . MBCS*/
635 const uint8_t expectedIBM943[] =
636 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e };
637 int32_t toIBM943Offs [] =
638 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07 };
639 int32_t fmIBM943Offs[] =
640 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a};
641
642 /* 1 2 3 0 h1 h2 h3 . DBCS*/
643 const uint8_t expectedIBM9027[] =
644 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe};
645 int32_t toIBM9027Offs [] =
646 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
647
648 /* 1 2 3 0 <?> <?> <?> . SBCS*/
649 const uint8_t expectedIBM920[] =
650 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e };
651 int32_t toIBM920Offs [] =
652 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
653
654 /* 1 2 3 0 <?> <?> <?> . SBCS*/
655 const uint8_t expectedISO88593[] =
656 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E };
657 int32_t toISO88593Offs[] =
658 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
659
660 /* 1 2 3 0 <?> <?> <?> . LATIN_1*/
661 const uint8_t expectedLATIN1[] =
662 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E };
663 int32_t toLATIN1Offs[] =
664 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
665
666
667 /* etc */
668 const uint8_t expectedUTF16BE[] =
669 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e };
670 int32_t toUTF16BEOffs[]=
671 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
672 int32_t fmUTF16BEOffs[] =
673 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e };
674
675 const uint8_t expectedUTF16LE[] =
676 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00 };
677 int32_t toUTF16LEOffs[]=
678 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
679 int32_t fmUTF16LEOffs[] =
680 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e };
681
682 const uint8_t expectedUTF32BE[] =
683 { 0x00, 0x00, 0x00, 0x31,
684 0x00, 0x00, 0x00, 0x32,
685 0x00, 0x00, 0x00, 0x33,
686 0x00, 0x00, 0x00, 0x00,
687 0x00, 0x00, 0x4e, 0x00,
688 0x00, 0x00, 0x4e, 0x8c,
689 0x00, 0x00, 0x4e, 0x09,
690 0x00, 0x00, 0x00, 0x2e };
691 int32_t toUTF32BEOffs[]=
692 { 0x00, 0x00, 0x00, 0x00,
693 0x01, 0x01, 0x01, 0x01,
694 0x02, 0x02, 0x02, 0x02,
695 0x03, 0x03, 0x03, 0x03,
696 0x04, 0x04, 0x04, 0x04,
697 0x05, 0x05, 0x05, 0x05,
698 0x06, 0x06, 0x06, 0x06,
699 0x07, 0x07, 0x07, 0x07,
700 0x08, 0x08, 0x08, 0x08 };
701 int32_t fmUTF32BEOffs[] =
702 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c };
703
704 const uint8_t expectedUTF32LE[] =
705 { 0x31, 0x00, 0x00, 0x00,
706 0x32, 0x00, 0x00, 0x00,
707 0x33, 0x00, 0x00, 0x00,
708 0x00, 0x00, 0x00, 0x00,
709 0x00, 0x4e, 0x00, 0x00,
710 0x8c, 0x4e, 0x00, 0x00,
711 0x09, 0x4e, 0x00, 0x00,
712 0x2e, 0x00, 0x00, 0x00 };
713 int32_t toUTF32LEOffs[]=
714 { 0x00, 0x00, 0x00, 0x00,
715 0x01, 0x01, 0x01, 0x01,
716 0x02, 0x02, 0x02, 0x02,
717 0x03, 0x03, 0x03, 0x03,
718 0x04, 0x04, 0x04, 0x04,
719 0x05, 0x05, 0x05, 0x05,
720 0x06, 0x06, 0x06, 0x06,
721 0x07, 0x07, 0x07, 0x07,
722 0x08, 0x08, 0x08, 0x08 };
723 int32_t fmUTF32LEOffs[] =
724 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c };
725
726
727
728
729 /** Test chars #2 **/
730
731 /* Sahha [health], slashed h's */
732 const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
733 const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
734
735 /* LMBCS */
736 const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
737 const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
738 int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
739 int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
740 /*********************************** START OF CODE finally *************/
741
742 gInBufferSize = insize;
743 gOutBufferSize = outsize;
744
745 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
746
747
748 #if 1
749 /*UTF-8*/
750 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
751 expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE );
752
753 log_verbose("Test surrogate behaviour for UTF8\n");
754 {
755 const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
756 const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
757 0xf0, 0x90, 0x90, 0x81,
758 0xef, 0xbf, 0xbd
759 };
760 int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
761 testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]),
762 expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE );
763
764
765 }
766 /*ISO-2022*/
767 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
768 expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE );
769 /*UTF16 LE*/
770 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
771 expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE );
772 /*UTF16 BE*/
773 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
774 expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE );
775 /*UTF32 LE*/
776 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
777 expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE );
778 /*UTF32 BE*/
779 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
780 expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE );
781 /*LATIN_1*/
782 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
783 expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE );
784 /*EBCDIC_STATEFUL*/
785 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
786 expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE );
787
788 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
789 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
790
791 /*MBCS*/
792
793 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
794 expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE );
795 /*DBCS*/
796 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
797 expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE );
798 /*SBCS*/
799 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
800 expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE );
801 /*SBCS*/
802 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
803 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
804
805
806 /****/
807 #endif
808
809 #if 1
810 /*UTF-8*/
811 testConvertToU(expectedUTF8, sizeof(expectedUTF8),
812 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE);
813 /*ISO-2022*/
814 testConvertToU(expectedISO2022, sizeof(expectedISO2022),
815 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE);
816 /*UTF16 LE*/
817 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
818 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
819 /*UTF16 BE*/
820 testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
821 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE);
822 /*UTF32 LE*/
823 testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
824 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE);
825 /*UTF32 BE*/
826 testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
827 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE);
828 /*EBCDIC_STATEFUL*/
829 testConvertToU(expectedIBM930, sizeof(expectedIBM930),
830 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-930", fmIBM930Offs,FALSE);
831 /*MBCS*/
832 testConvertToU(expectedIBM943, sizeof(expectedIBM943),
833 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-943", fmIBM943Offs,FALSE);
834
835 /* Try it again to make sure it still works */
836 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
837 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
838
839 testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
840 malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE);
841
842 testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]),
843 expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE );
844
845 /*LMBCS*/
846 testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]),
847 expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE );
848 testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
849 LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE);
850
851 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
852 {
853 /* encode directly set D and set O */
854 static const uint8_t utf7[] = {
855 /*
856 Hi Mom -+Jjo--!
857 A+ImIDkQ.
858 +-
859 +ZeVnLIqe
860 */
861 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
862 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
863 0x2b, 0x2d,
864 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
865 };
866 static const UChar unicode[] = {
867 /*
868 Hi Mom -<WHITE SMILING FACE>-!
869 A<NOT IDENTICAL TO><ALPHA>.
870 +
871 [Japanese word "nihongo"]
872 */
873 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
874 0x41, 0x2262, 0x0391, 0x2e,
875 0x2b,
876 0x65e5, 0x672c, 0x8a9e
877 };
878 static const int32_t toUnicodeOffsets[] = {
879 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
880 15, 17, 19, 23,
881 24,
882 27, 29, 32
883 };
884 static const int32_t fromUnicodeOffsets[] = {
885 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
886 11, 12, 12, 12, 13, 13, 13, 13, 14,
887 15, 15,
888 16, 16, 16, 17, 17, 17, 18, 18, 18
889 };
890
891 /* same but escaping set O (the exclamation mark) */
892 static const uint8_t utf7Restricted[] = {
893 /*
894 Hi Mom -+Jjo--+ACE-
895 A+ImIDkQ.
896 +-
897 +ZeVnLIqe
898 */
899 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
900 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
901 0x2b, 0x2d,
902 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
903 };
904 static const int32_t toUnicodeOffsetsR[] = {
905 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
906 19, 21, 23, 27,
907 28,
908 31, 33, 36
909 };
910 static const int32_t fromUnicodeOffsetsR[] = {
911 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
912 11, 12, 12, 12, 13, 13, 13, 13, 14,
913 15, 15,
914 16, 16, 16, 17, 17, 17, 18, 18, 18
915 };
916
917 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
918
919 testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE);
920
921 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
922
923 testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
924 }
925
926 /*
927 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
928 * modified according to RFC 2060,
929 * and supplemented with the one example in RFC 2060 itself.
930 */
931 {
932 static const uint8_t imap[] = {
933 /* Hi Mom -&Jjo--!
934 A&ImIDkQ-.
935 &-
936 &ZeVnLIqe-
937 \
938 ~peter
939 /mail
940 /&ZeVnLIqe-
941 /&U,BTFw-
942 */
943 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
944 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
945 0x26, 0x2d,
946 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
947 0x5c,
948 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
949 0x2f, 0x6d, 0x61, 0x69, 0x6c,
950 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
951 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
952 };
953 static const UChar unicode[] = {
954 /* Hi Mom -<WHITE SMILING FACE>-!
955 A<NOT IDENTICAL TO><ALPHA>.
956 &
957 [Japanese word "nihongo"]
958 \
959 ~peter
960 /mail
961 /<65e5, 672c, 8a9e>
962 /<53f0, 5317>
963 */
964 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
965 0x41, 0x2262, 0x0391, 0x2e,
966 0x26,
967 0x65e5, 0x672c, 0x8a9e,
968 0x5c,
969 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
970 0x2f, 0x6d, 0x61, 0x69, 0x6c,
971 0x2f, 0x65e5, 0x672c, 0x8a9e,
972 0x2f, 0x53f0, 0x5317
973 };
974 static const int32_t toUnicodeOffsets[] = {
975 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
976 15, 17, 19, 24,
977 25,
978 28, 30, 33,
979 37,
980 38, 39, 40, 41, 42, 43,
981 44, 45, 46, 47, 48,
982 49, 51, 53, 56,
983 60, 62, 64
984 };
985 static const int32_t fromUnicodeOffsets[] = {
986 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
987 11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
988 15, 15,
989 16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
990 19,
991 20, 21, 22, 23, 24, 25,
992 26, 27, 28, 29, 30,
993 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
994 35, 36, 36, 36, 37, 37, 37, 37, 37
995 };
996
997 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
998
999 testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
1000 }
1001
1002 /* Test UTF-8 bad data handling*/
1003 {
1004 static const uint8_t utf8[]={
1005 0x61,
1006 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1007 0x00,
1008 0x62,
1009 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1010 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1011 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */
1012 0xdf, 0xbf, /* 7ff */
1013 0xbf, /* truncated tail */
1014 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */
1015 0x02
1016 };
1017
1018 static const uint16_t utf8Expected[]={
1019 0x0061,
1020 0xfffd,
1021 0x0000,
1022 0x0062,
1023 0xfffd,
1024 0xfffd,
1025 0xdbff, 0xdfff,
1026 0x07ff,
1027 0xfffd,
1028 0xfffd,
1029 0x0002
1030 };
1031
1032 static const int32_t utf8Offsets[]={
1033 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28
1034 };
1035 testConvertToU(utf8, sizeof(utf8),
1036 utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE);
1037
1038 }
1039
1040 /* Test UTF-32BE bad data handling*/
1041 {
1042 static const uint8_t utf32[]={
1043 0x00, 0x00, 0x00, 0x61,
1044 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
1045 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1046 0x00, 0x00, 0x00, 0x62,
1047 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1048 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
1049 0x00, 0x00, 0x01, 0x62,
1050 0x00, 0x00, 0x02, 0x62
1051 };
1052
1053 static const uint16_t utf32Expected[]={
1054 0x0061,
1055 0xfffd, /* 0x110000 out of range */
1056 0xDBFF, /* 0x10FFFF in range */
1057 0xDFFF,
1058 0x0062,
1059 0xfffd, /* 0xffffffff out of range */
1060 0xfffd, /* 0x7fffffff out of range */
1061 0x0162,
1062 0x0262
1063 };
1064
1065 static const int32_t utf32Offsets[]={
1066 0, 4, 8, 8, 12, 16, 20, 24, 28
1067 };
1068 testConvertToU(utf32, sizeof(utf32),
1069 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE);
1070
1071 }
1072
1073 /* Test UTF-32LE bad data handling*/
1074 {
1075 static const uint8_t utf32[]={
1076 0x61, 0x00, 0x00, 0x00,
1077 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
1078 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1079 0x62, 0x00, 0x00, 0x00,
1080 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1081 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
1082 0x62, 0x01, 0x00, 0x00,
1083 0x62, 0x02, 0x00, 0x00,
1084 };
1085
1086 static const uint16_t utf32Expected[]={
1087 0x0061,
1088 0xfffd, /* 0x110000 out of range */
1089 0xDBFF, /* 0x10FFFF in range */
1090 0xDFFF,
1091 0x0062,
1092 0xfffd, /* 0xffffffff out of range */
1093 0xfffd, /* 0x7fffffff out of range */
1094 0x0162,
1095 0x0262
1096 };
1097
1098 static const int32_t utf32Offsets[]={
1099 0, 4, 8, 8, 12, 16, 20, 24, 28
1100 };
1101 testConvertToU(utf32, sizeof(utf32),
1102 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE );
1103
1104 }
1105 }
1106
1107 static void TestCoverageMBCS(){
1108 #if 0
1109 UErrorCode status = U_ZERO_ERROR;
1110 const char *directory = loadTestData(&status);
1111 char* tdpath = NULL;
1112 char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1113 int len = strlen(directory);
1114 char* index=NULL;
1115
1116 tdpath = (char*) malloc(sizeof(char) * (len * 2));
1117 uprv_strcpy(saveDirectory,u_getDataDirectory());
1118 log_verbose("Retrieved data directory %s \n",saveDirectory);
1119 uprv_strcpy(tdpath,directory);
1120 index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
1121
1122 if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
1123 *(index+1)=0;
1124 }
1125 u_setDataDirectory(tdpath);
1126 log_verbose("ICU data directory is set to: %s \n" ,tdpath);
1127 #endif
1128
1129 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm
1130 which is test file for MBCS conversion with single-byte codepage data.*/
1131 {
1132
1133 /* MBCS with single byte codepage data test1.ucm*/
1134 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1135 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1136 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, };
1137
1138 const uint8_t test1input[] = { 0x00, 0x05, 0x06, 0x07, 0x08, 0x09};
1139 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0xfffd, 0xfffd};
1140 int32_t fromtest1Offs[] = { 0, 1, 2, 3, 3, 4, 5};
1141
1142 /*from Unicode*/
1143 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1144 expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
1145
1146 /*to Unicode*/
1147 testConvertToU(test1input, sizeof(test1input),
1148 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test1", fromtest1Offs ,FALSE);
1149
1150 }
1151
1152 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm
1153 which is test file for MBCS conversion with three-byte codepage data.*/
1154 {
1155
1156 /* MBCS with three byte codepage data test3.ucm*/
1157 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1158 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,};
1159 int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1160
1161 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1162 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1163 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1164
1165 /*from Unicode*/
1166 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1167 expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE );
1168
1169 /*to Unicode*/
1170 testConvertToU(test3input, sizeof(test3input),
1171 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE);
1172
1173 }
1174
1175 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm
1176 which is test file for MBCS conversion with four-byte codepage data.*/
1177 {
1178
1179 /* MBCS with three byte codepage data test4.ucm*/
1180 static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1181 static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,};
1182 static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1183
1184 static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1185 static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1186 static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1187
1188 /*from Unicode*/
1189 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1190 expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE );
1191
1192 /*to Unicode*/
1193 testConvertToU(test4input, sizeof(test4input),
1194 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE );
1195
1196 }
1197 #if 0
1198 free(tdpath);
1199 /* restore the original data directory */
1200 log_verbose("Setting the data directory to %s \n", saveDirectory);
1201 u_setDataDirectory(saveDirectory);
1202 free(saveDirectory);
1203 #endif
1204
1205 }
1206
1207 static void TestConverterType(const char *convName, UConverterType convType) {
1208 UConverter* myConverter;
1209 UErrorCode err = U_ZERO_ERROR;
1210
1211 myConverter = my_ucnv_open(convName, &err);
1212
1213 if (U_FAILURE(err)) {
1214 log_data_err("Failed to create an %s converter\n", convName);
1215 return;
1216 }
1217 else
1218 {
1219 if (ucnv_getType(myConverter)!=convType) {
1220 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1221 convName, convType);
1222 }
1223 else {
1224 log_verbose("ucnv_getType %s ok\n", convName);
1225 }
1226 }
1227 ucnv_close(myConverter);
1228 }
1229
1230 static void TestConverterTypesAndStarters()
1231 {
1232 UConverter* myConverter;
1233 UErrorCode err = U_ZERO_ERROR;
1234 UBool mystarters[256];
1235
1236 /* const UBool expectedKSCstarters[256] = {
1237 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1238 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1239 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1240 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1241 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1242 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1243 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1244 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1245 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1246 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1247 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1248 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1249 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1250 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1251 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1252 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1253 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1254 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1255 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1256 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1257 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1258 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1259 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1260 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1261 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1262 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1263
1264
1265 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types.");
1266
1267 myConverter = ucnv_open("ksc", &err);
1268 if (U_FAILURE(err)) {
1269 log_data_err("Failed to create an ibm-ksc converter\n");
1270 return;
1271 }
1272 else
1273 {
1274 if (ucnv_getType(myConverter)!=UCNV_MBCS)
1275 log_err("ucnv_getType Failed for ibm-949\n");
1276 else
1277 log_verbose("ucnv_getType ibm-949 ok\n");
1278
1279 if(myConverter!=NULL)
1280 ucnv_getStarters(myConverter, mystarters, &err);
1281
1282 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1283 log_err("Failed ucnv_getStarters for ksc\n");
1284 else
1285 log_verbose("ucnv_getStarters ok\n");*/
1286
1287 }
1288 ucnv_close(myConverter);
1289
1290 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
1291 TestConverterType("ibm-878", UCNV_SBCS);
1292 TestConverterType("iso-8859-1", UCNV_LATIN_1);
1293 TestConverterType("ibm-1208", UCNV_UTF8);
1294 TestConverterType("utf-8", UCNV_UTF8);
1295 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
1296 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
1297 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
1298 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
1299 TestConverterType("iso-2022", UCNV_ISO_2022);
1300 TestConverterType("hz", UCNV_HZ);
1301 TestConverterType("scsu", UCNV_SCSU);
1302 TestConverterType("x-iscii-de", UCNV_ISCII);
1303 TestConverterType("ascii", UCNV_US_ASCII);
1304 TestConverterType("utf-7", UCNV_UTF7);
1305 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
1306 TestConverterType("bocu-1", UCNV_BOCU1);
1307 }
1308
1309 static void
1310 TestAmbiguousConverter(UConverter *cnv) {
1311 static const char inBytes[2]={ 0x61, 0x5c };
1312 UChar outUnicode[20]={ 0, 0, 0, 0 };
1313
1314 const char *s;
1315 UChar *u;
1316 UErrorCode errorCode;
1317 UBool isAmbiguous;
1318
1319 /* try to convert an 'a' and a US-ASCII backslash */
1320 errorCode=U_ZERO_ERROR;
1321 s=inBytes;
1322 u=outUnicode;
1323 ucnv_toUnicode(cnv, &u, u+20, &s, s+2, NULL, TRUE, &errorCode);
1324 if(U_FAILURE(errorCode)) {
1325 /* we do not care about general failures in this test; the input may just not be mappable */
1326 return;
1327 }
1328
1329 if(outUnicode[0]!=0x61 || outUnicode[1]==0xfffd) {
1330 /* not an ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1331 return;
1332 }
1333
1334 isAmbiguous=ucnv_isAmbiguous(cnv);
1335
1336 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1337 if((outUnicode[1]!=0x5c)!=isAmbiguous) {
1338 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1339 ucnv_getName(cnv, &errorCode), outUnicode[1]!=0x5c, isAmbiguous);
1340 return;
1341 }
1342
1343 if(outUnicode[1]!=0x5c) {
1344 /* needs fixup, fix it */
1345 ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
1346 if(outUnicode[1]!=0x5c) {
1347 /* the fix failed */
1348 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
1349 return;
1350 }
1351 }
1352 }
1353
1354 static void TestAmbiguous()
1355 {
1356 UErrorCode status = U_ZERO_ERROR;
1357 UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
1358 const char target[] = {
1359 /* "\\usr\\local\\share\\data\\icutest.txt" */
1360 0x5c, 0x75, 0x73, 0x72,
1361 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1362 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1363 0x5c, 0x64, 0x61, 0x74, 0x61,
1364 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1365 0
1366 };
1367 UChar asciiResult[200], sjisResult[200];
1368 int32_t asciiLength = 0, sjisLength = 0, i;
1369 const char *name;
1370
1371 /* enumerate all converters */
1372 status=U_ZERO_ERROR;
1373 for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
1374 cnv=ucnv_open(name, &status);
1375 if(U_SUCCESS(status)) {
1376 TestAmbiguousConverter(cnv);
1377 ucnv_close(cnv);
1378 } else {
1379 log_err("error: unable to open available converter \"%s\"\n", name);
1380 status=U_ZERO_ERROR;
1381 }
1382 }
1383
1384 sjis_cnv = ucnv_open("ibm-943", &status);
1385 if (U_FAILURE(status))
1386 {
1387 log_data_err("Failed to create a SJIS converter\n");
1388 return;
1389 }
1390 ascii_cnv = ucnv_open("LATIN-1", &status);
1391 if (U_FAILURE(status))
1392 {
1393 log_data_err("Failed to create a LATIN-1 converter\n");
1394 ucnv_close(sjis_cnv);
1395 return;
1396 }
1397 /* convert target from SJIS to Unicode */
1398 sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, strlen(target), &status);
1399 if (U_FAILURE(status))
1400 {
1401 log_err("Failed to convert the SJIS string.\n");
1402 ucnv_close(sjis_cnv);
1403 ucnv_close(ascii_cnv);
1404 return;
1405 }
1406 /* convert target from Latin-1 to Unicode */
1407 asciiLength = ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, strlen(target), &status);
1408 if (U_FAILURE(status))
1409 {
1410 log_err("Failed to convert the Latin-1 string.\n");
1411 free(sjisResult);
1412 ucnv_close(sjis_cnv);
1413 ucnv_close(ascii_cnv);
1414 return;
1415 }
1416 if (!ucnv_isAmbiguous(sjis_cnv))
1417 {
1418 log_err("SJIS converter should contain ambiguous character mappings.\n");
1419 free(sjisResult);
1420 free(asciiResult);
1421 ucnv_close(sjis_cnv);
1422 ucnv_close(ascii_cnv);
1423 return;
1424 }
1425 if (u_strcmp(sjisResult, asciiResult) == 0)
1426 {
1427 log_err("File separators for SJIS don't need to be fixed.\n");
1428 }
1429 ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
1430 if (u_strcmp(sjisResult, asciiResult) != 0)
1431 {
1432 log_err("Fixing file separator for SJIS failed.\n");
1433 }
1434 ucnv_close(sjis_cnv);
1435 ucnv_close(ascii_cnv);
1436 }
1437
1438 static void
1439 TestSignatureDetection(){
1440 /* with null terminated strings */
1441 {
1442 static const char* data[] = {
1443 "\xFE\xFF\x00\x00", /* UTF-16BE */
1444 "\xFF\xFE\x00\x00", /* UTF-16LE */
1445 "\xEF\xBB\xBF\x00", /* UTF-8 */
1446 "\x0E\xFE\xFF\x00", /* SCSU */
1447
1448 "\xFE\xFF", /* UTF-16BE */
1449 "\xFF\xFE", /* UTF-16LE */
1450 "\xEF\xBB\xBF", /* UTF-8 */
1451 "\x0E\xFE\xFF", /* SCSU */
1452
1453 "\xFE\xFF\x41\x42", /* UTF-16BE */
1454 "\xFF\xFE\x41\x41", /* UTF-16LE */
1455 "\xEF\xBB\xBF\x41", /* UTF-8 */
1456 "\x0E\xFE\xFF\x41", /* SCSU */
1457
1458 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */
1459 "\x2B\x2F\x76\x38\x41", /* UTF-7 */
1460 "\x2B\x2F\x76\x39\x41", /* UTF-7 */
1461 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */
1462 "\x2B\x2F\x76\x2F\x41" /* UTF-7 */
1463 };
1464 static const char* expected[] = {
1465 "UTF-16BE",
1466 "UTF-16LE",
1467 "UTF-8",
1468 "SCSU",
1469
1470 "UTF-16BE",
1471 "UTF-16LE",
1472 "UTF-8",
1473 "SCSU",
1474
1475 "UTF-16BE",
1476 "UTF-16LE",
1477 "UTF-8",
1478 "SCSU",
1479
1480 "UTF-7",
1481 "UTF-7",
1482 "UTF-7",
1483 "UTF-7",
1484 "UTF-7"
1485 };
1486 static const int32_t expectedLength[] ={
1487 2,
1488 2,
1489 3,
1490 3,
1491
1492 2,
1493 2,
1494 3,
1495 3,
1496
1497 2,
1498 2,
1499 3,
1500 3,
1501
1502 5,
1503 4,
1504 4,
1505 4,
1506 4
1507 };
1508 int i=0;
1509 UErrorCode err;
1510 int32_t signatureLength = -1;
1511 const char* source = NULL;
1512 const char* enc = NULL;
1513 for( ; i<sizeof(data)/sizeof(char*); i++){
1514 err = U_ZERO_ERROR;
1515 source = data[i];
1516 enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
1517 if(U_FAILURE(err)){
1518 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1519 continue;
1520 }
1521 if(enc == NULL || strcmp(enc,expected[i]) !=0){
1522 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1523 continue;
1524 }
1525 if(signatureLength != expectedLength[i]){
1526 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1527 }
1528 }
1529 }
1530 {
1531 static const char* data[] = {
1532 "\xFE\xFF\x00", /* UTF-16BE */
1533 "\xFF\xFE\x00", /* UTF-16LE */
1534 "\xEF\xBB\xBF\x00", /* UTF-8 */
1535 "\x0E\xFE\xFF\x00", /* SCSU */
1536 "\x00\x00\xFE\xFF", /* UTF-32BE */
1537 "\xFF\xFE\x00\x00", /* UTF-32LE */
1538 "\xFE\xFF", /* UTF-16BE */
1539 "\xFF\xFE", /* UTF-16LE */
1540 "\xEF\xBB\xBF", /* UTF-8 */
1541 "\x0E\xFE\xFF", /* SCSU */
1542 "\x00\x00\xFE\xFF", /* UTF-32BE */
1543 "\xFF\xFE\x00\x00", /* UTF-32LE */
1544 "\xFE\xFF\x41\x42", /* UTF-16BE */
1545 "\xFF\xFE\x41\x41", /* UTF-16LE */
1546 "\xEF\xBB\xBF\x41", /* UTF-8 */
1547 "\x0E\xFE\xFF\x41", /* SCSU */
1548 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1549 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1550 "\xFB\xEE\x28", /* BOCU-1 */
1551 "\xFF\x41\x42" /* NULL */
1552 };
1553 static const int len[] = {
1554 3,
1555 3,
1556 4,
1557 4,
1558 4,
1559 4,
1560 2,
1561 2,
1562 3,
1563 3,
1564 4,
1565 4,
1566 4,
1567 4,
1568 4,
1569 4,
1570 5,
1571 5,
1572 3,
1573 3
1574 };
1575
1576 static const char* expected[] = {
1577 "UTF-16BE",
1578 "UTF-16LE",
1579 "UTF-8",
1580 "SCSU",
1581 "UTF-32BE",
1582 "UTF-32LE",
1583 "UTF-16BE",
1584 "UTF-16LE",
1585 "UTF-8",
1586 "SCSU",
1587 "UTF-32BE",
1588 "UTF-32LE",
1589 "UTF-16BE",
1590 "UTF-16LE",
1591 "UTF-8",
1592 "SCSU",
1593 "UTF-32BE",
1594 "UTF-32LE",
1595 "BOCU-1",
1596 NULL
1597 };
1598 static const int32_t expectedLength[] ={
1599 2,
1600 2,
1601 3,
1602 3,
1603 4,
1604 4,
1605 2,
1606 2,
1607 3,
1608 3,
1609 4,
1610 4,
1611 2,
1612 2,
1613 3,
1614 3,
1615 4,
1616 4,
1617 3,
1618 0
1619 };
1620 int i=0;
1621 UErrorCode err;
1622 int32_t signatureLength = -1;
1623 int32_t sourceLength=-1;
1624 const char* source = NULL;
1625 const char* enc = NULL;
1626 for( ; i<sizeof(data)/sizeof(char*); i++){
1627 err = U_ZERO_ERROR;
1628 source = data[i];
1629 sourceLength = len[i];
1630 enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
1631 if(U_FAILURE(err)){
1632 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1633 continue;
1634 }
1635 if(enc == NULL || strcmp(enc,expected[i]) !=0){
1636 if(expected[i] !=NULL){
1637 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1638 continue;
1639 }
1640 }
1641 if(signatureLength != expectedLength[i]){
1642 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1643 }
1644 }
1645 }
1646 }
1647
1648 void
1649 static TestUTF7() {
1650 /* test input */
1651 static const uint8_t in[]={
1652 /* H - +Jjo- - ! +- +2AHcAQ */
1653 0x48,
1654 0x2d,
1655 0x2b, 0x4a, 0x6a, 0x6f,
1656 0x2d, 0x2d,
1657 0x21,
1658 0x2b, 0x2d,
1659 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1660 };
1661
1662 /* expected test results */
1663 static const uint32_t results[]={
1664 /* number of bytes read, code point */
1665 1, 0x48,
1666 1, 0x2d,
1667 4, 0x263a, /* <WHITE SMILING FACE> */
1668 2, 0x2d,
1669 1, 0x21,
1670 2, 0x2b,
1671 7, 0x10401
1672 };
1673
1674 const char *cnvName;
1675 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1676 UErrorCode errorCode=U_ZERO_ERROR;
1677 UConverter *cnv=ucnv_open("UTF-7", &errorCode);
1678 if(U_FAILURE(errorCode)) {
1679 log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
1680 return;
1681 }
1682 TestNextUChar(cnv, source, limit, results, "UTF-7");
1683 /* Test the condition when source >= sourceLimit */
1684 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1685 cnvName = ucnv_getName(cnv, &errorCode);
1686 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
1687 log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1688 }
1689 ucnv_close(cnv);
1690 }
1691
1692 void
1693 static TestIMAP() {
1694 /* test input */
1695 static const uint8_t in[]={
1696 /* H - &Jjo- - ! &- &2AHcAQ- \ */
1697 0x48,
1698 0x2d,
1699 0x26, 0x4a, 0x6a, 0x6f,
1700 0x2d, 0x2d,
1701 0x21,
1702 0x26, 0x2d,
1703 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1704 };
1705
1706 /* expected test results */
1707 static const uint32_t results[]={
1708 /* number of bytes read, code point */
1709 1, 0x48,
1710 1, 0x2d,
1711 4, 0x263a, /* <WHITE SMILING FACE> */
1712 2, 0x2d,
1713 1, 0x21,
1714 2, 0x26,
1715 7, 0x10401
1716 };
1717
1718 const char *cnvName;
1719 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1720 UErrorCode errorCode=U_ZERO_ERROR;
1721 UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
1722 if(U_FAILURE(errorCode)) {
1723 log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
1724 return;
1725 }
1726 TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
1727 /* Test the condition when source >= sourceLimit */
1728 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1729 cnvName = ucnv_getName(cnv, &errorCode);
1730 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
1731 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1732 }
1733 ucnv_close(cnv);
1734 }
1735
1736 void
1737 static TestUTF8() {
1738 /* test input */
1739 static const uint8_t in[]={
1740 0x61,
1741 0xc2, 0x80,
1742 0xe0, 0xa0, 0x80,
1743 0xf0, 0x90, 0x80, 0x80,
1744 0xf4, 0x84, 0x8c, 0xa1,
1745 0xf0, 0x90, 0x90, 0x81
1746 };
1747
1748 /* expected test results */
1749 static const uint32_t results[]={
1750 /* number of bytes read, code point */
1751 1, 0x61,
1752 2, 0x80,
1753 3, 0x800,
1754 4, 0x10000,
1755 4, 0x104321,
1756 4, 0x10401
1757 };
1758
1759 /* error test input */
1760 static const uint8_t in2[]={
1761 0x61,
1762 0xc0, 0x80, /* illegal non-shortest form */
1763 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1764 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1765 0xc0, 0xc0, /* illegal trail byte */
1766 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1767 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1768 0xfe, /* illegal byte altogether */
1769 0x62
1770 };
1771
1772 /* expected error test results */
1773 static const uint32_t results2[]={
1774 /* number of bytes read, code point */
1775 1, 0x61,
1776 22, 0x62
1777 };
1778
1779 UConverterToUCallback cb;
1780 const void *p;
1781
1782 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1783 UErrorCode errorCode=U_ZERO_ERROR;
1784 UConverter *cnv=ucnv_open("UTF-8", &errorCode);
1785 if(U_FAILURE(errorCode)) {
1786 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
1787 return;
1788 }
1789 TestNextUChar(cnv, source, limit, results, "UTF-8");
1790 /* Test the condition when source >= sourceLimit */
1791 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1792
1793 /* test error behavior with a skip callback */
1794 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1795 source=(const char *)in2;
1796 limit=(const char *)(in2+sizeof(in2));
1797 TestNextUChar(cnv, source, limit, results2, "UTF-8");
1798
1799 ucnv_close(cnv);
1800 }
1801
1802 void
1803 static TestCESU8() {
1804 /* test input */
1805 static const uint8_t in[]={
1806 0x61,
1807 0xc2, 0x80,
1808 0xe0, 0xa0, 0x80,
1809 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1810 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1811 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1812 0xef, 0xbf, 0xbc
1813 };
1814
1815 /* expected test results */
1816 static const uint32_t results[]={
1817 /* number of bytes read, code point */
1818 1, 0x61,
1819 2, 0x80,
1820 3, 0x800,
1821 6, 0x10000,
1822 3, 0xdc01,
1823 3, 0xd802,
1824 6, 0x10ffff,
1825 3, 0xfffc
1826 };
1827
1828 /* error test input */
1829 static const uint8_t in2[]={
1830 0x61,
1831 0xc0, 0x80, /* illegal non-shortest form */
1832 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1833 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1834 0xc0, 0xc0, /* illegal trail byte */
1835 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */
1836 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */
1837 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */
1838 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1839 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1840 0xfe, /* illegal byte altogether */
1841 0x62
1842 };
1843
1844 /* expected error test results */
1845 static const uint32_t results2[]={
1846 /* number of bytes read, code point */
1847 1, 0x61,
1848 34, 0x62
1849 };
1850
1851 UConverterToUCallback cb;
1852 const void *p;
1853
1854 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1855 UErrorCode errorCode=U_ZERO_ERROR;
1856 UConverter *cnv=ucnv_open("CESU-8", &errorCode);
1857 if(U_FAILURE(errorCode)) {
1858 log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
1859 return;
1860 }
1861 TestNextUChar(cnv, source, limit, results, "CESU-8");
1862 /* Test the condition when source >= sourceLimit */
1863 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1864
1865 /* test error behavior with a skip callback */
1866 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1867 source=(const char *)in2;
1868 limit=(const char *)(in2+sizeof(in2));
1869 TestNextUChar(cnv, source, limit, results2, "CESU-8");
1870
1871 ucnv_close(cnv);
1872 }
1873
1874 void
1875 static TestUTF16() {
1876 /* test input */
1877 static const uint8_t in1[]={
1878 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
1879 };
1880 static const uint8_t in2[]={
1881 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
1882 };
1883 static const uint8_t in3[]={
1884 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
1885 };
1886
1887 /* expected test results */
1888 static const uint32_t results1[]={
1889 /* number of bytes read, code point */
1890 4, 0x4e00,
1891 2, 0xfeff
1892 };
1893 static const uint32_t results2[]={
1894 /* number of bytes read, code point */
1895 4, 0x004e,
1896 2, 0xfffe
1897 };
1898 static const uint32_t results3[]={
1899 /* number of bytes read, code point */
1900 2, 0xfefe,
1901 2, 0x4e00,
1902 2, 0xfeff,
1903 4, 0x20001
1904 };
1905
1906 const char *source, *limit;
1907
1908 UErrorCode errorCode=U_ZERO_ERROR;
1909 UConverter *cnv=ucnv_open("UTF-16", &errorCode);
1910 if(U_FAILURE(errorCode)) {
1911 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
1912 return;
1913 }
1914
1915 source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
1916 TestNextUChar(cnv, source, limit, results1, "UTF-16");
1917
1918 source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
1919 ucnv_resetToUnicode(cnv);
1920 TestNextUChar(cnv, source, limit, results2, "UTF-16");
1921
1922 source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
1923 ucnv_resetToUnicode(cnv);
1924 TestNextUChar(cnv, source, limit, results3, "UTF-16");
1925
1926 /* Test the condition when source >= sourceLimit */
1927 ucnv_resetToUnicode(cnv);
1928 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1929
1930 ucnv_close(cnv);
1931 }
1932
1933 void
1934 static TestUTF16BE() {
1935 /* test input */
1936 static const uint8_t in[]={
1937 0x00, 0x61,
1938 0x00, 0xc0,
1939 0x00, 0x31,
1940 0x00, 0xf4,
1941 0xce, 0xfe,
1942 0xd8, 0x01, 0xdc, 0x01
1943 };
1944
1945 /* expected test results */
1946 static const uint32_t results[]={
1947 /* number of bytes read, code point */
1948 2, 0x61,
1949 2, 0xc0,
1950 2, 0x31,
1951 2, 0xf4,
1952 2, 0xcefe,
1953 4, 0x10401
1954 };
1955
1956 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1957 UErrorCode errorCode=U_ZERO_ERROR;
1958 UConverter *cnv=ucnv_open("utf-16be", &errorCode);
1959 if(U_FAILURE(errorCode)) {
1960 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
1961 return;
1962 }
1963 TestNextUChar(cnv, source, limit, results, "UTF-16BE");
1964 /* Test the condition when source >= sourceLimit */
1965 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1966 /*Test for the condition where there is an invalid character*/
1967 {
1968 static const uint8_t source2[]={0x61};
1969 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
1970 }
1971 /*Test for the condition where there is a surrogate pair*/
1972 {
1973 const uint8_t source2[]={0xd8, 0x01};
1974 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
1975 }
1976 ucnv_close(cnv);
1977 }
1978
1979 static void
1980 TestUTF16LE() {
1981 /* test input */
1982 static const uint8_t in[]={
1983 0x61, 0x00,
1984 0x31, 0x00,
1985 0x4e, 0x2e,
1986 0x4e, 0x00,
1987 0x01, 0xd8, 0x01, 0xdc
1988 };
1989
1990 /* expected test results */
1991 static const uint32_t results[]={
1992 /* number of bytes read, code point */
1993 2, 0x61,
1994 2, 0x31,
1995 2, 0x2e4e,
1996 2, 0x4e,
1997 4, 0x10401
1998 };
1999
2000 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2001 UErrorCode errorCode=U_ZERO_ERROR;
2002 UConverter *cnv=ucnv_open("utf-16le", &errorCode);
2003 if(U_FAILURE(errorCode)) {
2004 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
2005 return;
2006 }
2007 TestNextUChar(cnv, source, limit, results, "UTF-16LE");
2008 /* Test the condition when source >= sourceLimit */
2009 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2010 /*Test for the condition where there is an invalid character*/
2011 {
2012 static const uint8_t source2[]={0x61};
2013 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2014 }
2015 /*Test for the condition where there is a surrogate character*/
2016 {
2017 static const uint8_t source2[]={0x01, 0xd8};
2018 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2019 }
2020
2021 ucnv_close(cnv);
2022 }
2023
2024 void
2025 static TestUTF32() {
2026 /* test input */
2027 static const uint8_t in1[]={
2028 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff
2029 };
2030 static const uint8_t in2[]={
2031 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00
2032 };
2033 static const uint8_t in3[]={
2034 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01
2035 };
2036
2037 /* expected test results */
2038 static const uint32_t results1[]={
2039 /* number of bytes read, code point */
2040 8, 0x100f00,
2041 4, 0xfeff
2042 };
2043 static const uint32_t results2[]={
2044 /* number of bytes read, code point */
2045 8, 0x0f1000,
2046 4, 0xfffe
2047 };
2048 static const uint32_t results3[]={
2049 /* number of bytes read, code point */
2050 4, 0xfefe,
2051 4, 0x100f00,
2052 4, 0xd840,
2053 4, 0xdc01
2054 };
2055
2056 const char *source, *limit;
2057
2058 UErrorCode errorCode=U_ZERO_ERROR;
2059 UConverter *cnv=ucnv_open("UTF-32", &errorCode);
2060 if(U_FAILURE(errorCode)) {
2061 log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
2062 return;
2063 }
2064
2065 source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2066 TestNextUChar(cnv, source, limit, results1, "UTF-32");
2067
2068 source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2069 ucnv_resetToUnicode(cnv);
2070 TestNextUChar(cnv, source, limit, results2, "UTF-32");
2071
2072 source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2073 ucnv_resetToUnicode(cnv);
2074 TestNextUChar(cnv, source, limit, results3, "UTF-32");
2075
2076 /* Test the condition when source >= sourceLimit */
2077 ucnv_resetToUnicode(cnv);
2078 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2079
2080 ucnv_close(cnv);
2081 }
2082
2083 static void
2084 TestUTF32BE() {
2085 /* test input */
2086 static const uint8_t in[]={
2087 0x00, 0x00, 0x00, 0x61,
2088 0x00, 0x00, 0xdc, 0x00,
2089 0x00, 0x00, 0xd8, 0x00,
2090 0x00, 0x00, 0xdf, 0xff,
2091 0x00, 0x00, 0xff, 0xfd,
2092 0x00, 0x10, 0xab, 0xcd,
2093 0x00, 0x10, 0xff, 0xff
2094 };
2095
2096 /* expected test results */
2097 static const uint32_t results[]={
2098 /* number of bytes read, code point */
2099 4, 0x61,
2100 4, 0xdc00,
2101 4, 0xd800,
2102 4, 0xdfff,
2103 4, 0xfffd,
2104 4, 0x10abcd,
2105 4, 0x10ffff
2106 };
2107
2108 /* error test input */
2109 static const uint8_t in2[]={
2110 0x00, 0x00, 0x00, 0x61,
2111 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
2112 0x00, 0x00, 0x00, 0x62,
2113 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2114 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
2115 0x00, 0x00, 0x01, 0x62,
2116 0x00, 0x00, 0x02, 0x62
2117 };
2118
2119 /* expected error test results */
2120 static const uint32_t results2[]={
2121 /* number of bytes read, code point */
2122 4, 0x61,
2123 8, 0x62,
2124 12, 0x162,
2125 4, 0x262
2126 };
2127
2128 UConverterToUCallback cb;
2129 const void *p;
2130
2131 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2132 UErrorCode errorCode=U_ZERO_ERROR;
2133 UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
2134 if(U_FAILURE(errorCode)) {
2135 log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
2136 return;
2137 }
2138 TestNextUChar(cnv, source, limit, results, "UTF-32BE");
2139
2140 /* Test the condition when source >= sourceLimit */
2141 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2142
2143 /* test error behavior with a skip callback */
2144 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2145 source=(const char *)in2;
2146 limit=(const char *)(in2+sizeof(in2));
2147 TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
2148
2149 ucnv_close(cnv);
2150 }
2151
2152 static void
2153 TestUTF32LE() {
2154 /* test input */
2155 static const uint8_t in[]={
2156 0x61, 0x00, 0x00, 0x00,
2157 0x00, 0xdc, 0x00, 0x00,
2158 0x00, 0xd8, 0x00, 0x00,
2159 0xff, 0xdf, 0x00, 0x00,
2160 0xfd, 0xff, 0x00, 0x00,
2161 0xcd, 0xab, 0x10, 0x00,
2162 0xff, 0xff, 0x10, 0x00
2163 };
2164
2165 /* expected test results */
2166 static const uint32_t results[]={
2167 /* number of bytes read, code point */
2168 4, 0x61,
2169 4, 0xdc00,
2170 4, 0xd800,
2171 4, 0xdfff,
2172 4, 0xfffd,
2173 4, 0x10abcd,
2174 4, 0x10ffff
2175 };
2176
2177 /* error test input */
2178 static const uint8_t in2[]={
2179 0x61, 0x00, 0x00, 0x00,
2180 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
2181 0x62, 0x00, 0x00, 0x00,
2182 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2183 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
2184 0x62, 0x01, 0x00, 0x00,
2185 0x62, 0x02, 0x00, 0x00,
2186 };
2187
2188 /* expected error test results */
2189 static const uint32_t results2[]={
2190 /* number of bytes read, code point */
2191 4, 0x61,
2192 8, 0x62,
2193 12, 0x162,
2194 4, 0x262,
2195 };
2196
2197 UConverterToUCallback cb;
2198 const void *p;
2199
2200 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2201 UErrorCode errorCode=U_ZERO_ERROR;
2202 UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
2203 if(U_FAILURE(errorCode)) {
2204 log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
2205 return;
2206 }
2207 TestNextUChar(cnv, source, limit, results, "UTF-32LE");
2208
2209 /* Test the condition when source >= sourceLimit */
2210 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2211
2212 /* test error behavior with a skip callback */
2213 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2214 source=(const char *)in2;
2215 limit=(const char *)(in2+sizeof(in2));
2216 TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
2217
2218 ucnv_close(cnv);
2219 }
2220
2221 static void
2222 TestLATIN1() {
2223 /* test input */
2224 static const uint8_t in[]={
2225 0x61,
2226 0x31,
2227 0x32,
2228 0xc0,
2229 0xf0,
2230 0xf4,
2231 };
2232
2233 /* expected test results */
2234 static const uint32_t results[]={
2235 /* number of bytes read, code point */
2236 1, 0x61,
2237 1, 0x31,
2238 1, 0x32,
2239 1, 0xc0,
2240 1, 0xf0,
2241 1, 0xf4,
2242 };
2243 static const uint16_t in1[] = {
2244 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2245 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2246 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2247 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2248 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2249 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2250 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2251 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2252 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2253 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2254 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2255 0xcb, 0x82
2256 };
2257 static const uint8_t out1[] = {
2258 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2259 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2260 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2261 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2262 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2263 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2264 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2265 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2266 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2267 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2268 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2269 0xcb, 0x82
2270 };
2271 static const uint16_t in2[]={
2272 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2273 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2274 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2275 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2276 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2277 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2278 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2279 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2280 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2281 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2282 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2283 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2284 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2285 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2286 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2287 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2288 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2289 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2290 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2291 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2292 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2293 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2294 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2295 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2296 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2297 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2298 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2299 0x37, 0x20, 0x2A, 0x2F,
2300 };
2301 static const unsigned char out2[]={
2302 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2303 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2304 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2305 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2306 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2307 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2308 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2309 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2310 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2311 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2312 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2313 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2314 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2315 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2316 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2317 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2318 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2319 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2320 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2321 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2322 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2323 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2324 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2325 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2326 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2327 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2328 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2329 0x37, 0x20, 0x2A, 0x2F,
2330 };
2331 const char *source=(const char *)in;
2332 const char *limit=(const char *)in+sizeof(in);
2333
2334 UErrorCode errorCode=U_ZERO_ERROR;
2335 UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
2336 if(U_FAILURE(errorCode)) {
2337 log_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
2338 return;
2339 }
2340 TestNextUChar(cnv, source, limit, results, "LATIN_1");
2341 /* Test the condition when source >= sourceLimit */
2342 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2343 TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
2344 TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
2345
2346 ucnv_close(cnv);
2347 }
2348
2349 static void
2350 TestSBCS() {
2351 /* test input */
2352 static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2353 /* expected test results */
2354 static const uint32_t results[]={
2355 /* number of bytes read, code point */
2356 1, 0x61,
2357 1, 0xbf,
2358 1, 0xc4,
2359 1, 0x2021,
2360 1, 0xf8ff,
2361 1, 0x00d9
2362 };
2363
2364 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2365 UErrorCode errorCode=U_ZERO_ERROR;
2366 UConverter *cnv=ucnv_open("ibm-1281", &errorCode);
2367 if(U_FAILURE(errorCode)) {
2368 log_data_err("Unable to open a SBCS(ibm-1281) converter: %s\n", u_errorName(errorCode));
2369 return;
2370 }
2371 TestNextUChar(cnv, source, limit, results, "SBCS(ibm-1281)");
2372 /* Test the condition when source >= sourceLimit */
2373 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2374 /*Test for Illegal character */ /*
2375 {
2376 static const uint8_t input1[]={ 0xA1 };
2377 const char* illegalsource=(const char*)input1;
2378 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2379 }
2380 */
2381 ucnv_close(cnv);
2382 }
2383
2384 static void
2385 TestDBCS() {
2386 /* test input */
2387 static const uint8_t in[]={
2388 0x44, 0x6a,
2389 0xc4, 0x9c,
2390 0x7a, 0x74,
2391 0x46, 0xab,
2392 0x42, 0x5b,
2393
2394 };
2395
2396 /* expected test results */
2397 static const uint32_t results[]={
2398 /* number of bytes read, code point */
2399 2, 0x00a7,
2400 2, 0xe1d2,
2401 2, 0x6962,
2402 2, 0xf842,
2403 2, 0xffe5,
2404 };
2405
2406 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2407 UErrorCode errorCode=U_ZERO_ERROR;
2408
2409 UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
2410 if(U_FAILURE(errorCode)) {
2411 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
2412 return;
2413 }
2414 TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
2415 /* Test the condition when source >= sourceLimit */
2416 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2417 /*Test for the condition where we have a truncated char*/
2418 {
2419 static const uint8_t source1[]={0xc4};
2420 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2421 }
2422 /*Test for the condition where there is an invalid character*/
2423 {
2424 static const uint8_t source2[]={0x1a, 0x1b};
2425 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2426 }
2427 ucnv_close(cnv);
2428 }
2429
2430 static void
2431 TestMBCS() {
2432 /* test input */
2433 static const uint8_t in[]={
2434 0x01,
2435 0xa6, 0xa3,
2436 0x00,
2437 0xa6, 0xa1,
2438 0x08,
2439 0xc2, 0x76,
2440 0xc2, 0x78,
2441
2442 };
2443
2444 /* expected test results */
2445 static const uint32_t results[]={
2446 /* number of bytes read, code point */
2447 1, 0x0001,
2448 2, 0x250c,
2449 1, 0x0000,
2450 2, 0x2500,
2451 1, 0x0008,
2452 2, 0xd60c,
2453 2, 0xd60e,
2454 };
2455
2456 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2457 UErrorCode errorCode=U_ZERO_ERROR;
2458
2459 UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
2460 if(U_FAILURE(errorCode)) {
2461 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
2462 return;
2463 }
2464 TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
2465 /* Test the condition when source >= sourceLimit */
2466 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2467 /*Test for the condition where we have a truncated char*/
2468 {
2469 static const uint8_t source1[]={0xc4};
2470 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2471 }
2472 /*Test for the condition where there is an invalid character*/
2473 {
2474 static const uint8_t source2[]={0xa1, 0x01};
2475 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2476 }
2477 ucnv_close(cnv);
2478
2479 }
2480
2481 static void
2482 TestISO_2022() {
2483 /* test input */
2484 static const uint8_t in[]={
2485 0x1b, 0x25, 0x42, 0x31,
2486 0x32,
2487 0x61,
2488 0xc2, 0x80,
2489 0xe0, 0xa0, 0x80,
2490 0xf0, 0x90, 0x80, 0x80
2491 };
2492
2493
2494
2495 /* expected test results */
2496 static const uint32_t results[]={
2497 /* number of bytes read, code point */
2498 4, 0x0031,
2499 1, 0x0032,
2500 1, 0x61,
2501 2, 0x80,
2502 3, 0x800,
2503 4, 0x10000,
2504
2505 };
2506
2507 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2508 UErrorCode errorCode=U_ZERO_ERROR;
2509 UConverter *cnv;
2510
2511 cnv=ucnv_open("ISO_2022", &errorCode);
2512 if(U_FAILURE(errorCode)) {
2513 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
2514 return;
2515 }
2516 TestNextUChar(cnv, source, limit, results, "ISO_2022");
2517
2518 /* Test the condition when source >= sourceLimit */
2519 TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
2520 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2521 /*Test for the condition where we have a truncated char*/
2522 {
2523 static const uint8_t source1[]={0xc4};
2524 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2525 }
2526 /*Test for the condition where there is an invalid character*/
2527 {
2528 static const uint8_t source2[]={0xa1, 0x01};
2529 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2530 }
2531 ucnv_close(cnv);
2532 }
2533
2534 static void
2535 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2536 const UChar* uSource;
2537 const UChar* uSourceLimit;
2538 const char* cSource;
2539 const char* cSourceLimit;
2540 UChar *uTargetLimit =NULL;
2541 UChar *uTarget;
2542 char *cTarget;
2543 const char *cTargetLimit;
2544 char *cBuf;
2545 UChar *uBuf,*test;
2546 int32_t uBufSize = 120;
2547 int len=0;
2548 int i=2;
2549 UErrorCode errorCode=U_ZERO_ERROR;
2550 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2551 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2552 ucnv_reset(cnv);
2553 for(;--i>0; ){
2554 uSource = (UChar*) source;
2555 uSourceLimit=(const UChar*)sourceLimit;
2556 cTarget = cBuf;
2557 uTarget = uBuf;
2558 cSource = cBuf;
2559 cTargetLimit = cBuf;
2560 uTargetLimit = uBuf;
2561
2562 do{
2563
2564 cTargetLimit = cTargetLimit+ i;
2565 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2566 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2567 errorCode=U_ZERO_ERROR;
2568 continue;
2569 }
2570
2571 if(U_FAILURE(errorCode)){
2572 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2573 return;
2574 }
2575
2576 }while (uSource<uSourceLimit);
2577
2578 cSourceLimit =cTarget;
2579 do{
2580 uTargetLimit=uTargetLimit+i;
2581 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2582 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2583 errorCode=U_ZERO_ERROR;
2584 continue;
2585 }
2586 if(U_FAILURE(errorCode)){
2587 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2588 return;
2589 }
2590 }while(cSource<cSourceLimit);
2591
2592 uSource = source;
2593 test =uBuf;
2594 for(len=0;len<(int)(source - sourceLimit);len++){
2595 if(uBuf[len]!=uSource[len]){
2596 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2597 }
2598 }
2599 }
2600 free(uBuf);
2601 free(cBuf);
2602 }
2603 /* Test for Jitterbug 778 */
2604 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2605 const UChar* uSource;
2606 const UChar* uSourceLimit;
2607 const char* cSource;
2608 UChar *uTargetLimit =NULL;
2609 UChar *uTarget;
2610 char *cTarget;
2611 const char *cTargetLimit;
2612 char *cBuf;
2613 UChar *uBuf,*test;
2614 int32_t uBufSize = 120;
2615 int numCharsInTarget=0;
2616 UErrorCode errorCode=U_ZERO_ERROR;
2617 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2618 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2619 uSource = source;
2620 uSourceLimit=sourceLimit;
2621 cTarget = cBuf;
2622 cTargetLimit = cBuf +uBufSize*5;
2623 uTarget = uBuf;
2624 uTargetLimit = uBuf+ uBufSize*5;
2625 ucnv_reset(cnv);
2626 numCharsInTarget=ucnv_fromUChars( cnv , cTarget, (cTargetLimit-cTarget),uSource,(uSourceLimit-uSource), &errorCode);
2627 if(U_FAILURE(errorCode)){
2628 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2629 return;
2630 }
2631 cSource = cBuf;
2632 test =uBuf;
2633 ucnv_toUChars(cnv,uTarget,(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
2634 if(U_FAILURE(errorCode)){
2635 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
2636 return;
2637 }
2638 uSource = source;
2639 while(uSource<uSourceLimit){
2640 if(*test!=*uSource){
2641
2642 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2643 }
2644 uSource++;
2645 test++;
2646 }
2647 free(uBuf);
2648 free(cBuf);
2649 }
2650
2651 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2652 const UChar* uSource;
2653 const UChar* uSourceLimit;
2654 const char* cSource;
2655 const char* cSourceLimit;
2656 UChar *uTargetLimit =NULL;
2657 UChar *uTarget;
2658 char *cTarget;
2659 const char *cTargetLimit;
2660 char *cBuf;
2661 UChar *uBuf,*test;
2662 int32_t uBufSize = 120;
2663 int len=0;
2664 int i=2;
2665 const UChar *temp = sourceLimit;
2666 UErrorCode errorCode=U_ZERO_ERROR;
2667 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2668 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2669
2670 ucnv_reset(cnv);
2671 for(;--i>0;){
2672 uSource = (UChar*) source;
2673 cTarget = cBuf;
2674 uTarget = uBuf;
2675 cSource = cBuf;
2676 cTargetLimit = cBuf;
2677 uTargetLimit = uBuf+uBufSize*5;
2678 cTargetLimit = cTargetLimit+uBufSize*10;
2679 uSourceLimit=uSource;
2680 do{
2681
2682 if (uSourceLimit < sourceLimit) {
2683 uSourceLimit = uSourceLimit+1;
2684 }
2685 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2686 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2687 errorCode=U_ZERO_ERROR;
2688 continue;
2689 }
2690
2691 if(U_FAILURE(errorCode)){
2692 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2693 return;
2694 }
2695
2696 }while (uSource<temp);
2697
2698 cSourceLimit =cBuf;
2699 do{
2700 if (cSourceLimit < cBuf + (cTarget - cBuf)) {
2701 cSourceLimit = cSourceLimit+1;
2702 }
2703 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2704 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2705 errorCode=U_ZERO_ERROR;
2706 continue;
2707 }
2708 if(U_FAILURE(errorCode)){
2709 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2710 return;
2711 }
2712 }while(cSource<cTarget);
2713
2714 uSource = source;
2715 test =uBuf;
2716 for(;len<(int)(source - sourceLimit);len++){
2717 if(uBuf[len]!=uSource[len]){
2718 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2719 }
2720 }
2721 }
2722 free(uBuf);
2723 free(cBuf);
2724 }
2725 static void
2726 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
2727 const uint16_t results[], const char* message){
2728 const char* s0;
2729 const char* s=(char*)source;
2730 const uint16_t *r=results;
2731 UErrorCode errorCode=U_ZERO_ERROR;
2732 uint32_t c,exC;
2733 ucnv_reset(cnv);
2734 while(s<limit) {
2735 s0=s;
2736 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
2737 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2738 break; /* no more significant input */
2739 } else if(U_FAILURE(errorCode)) {
2740 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
2741 break;
2742 } else {
2743 if(UTF_IS_FIRST_SURROGATE(*r)){
2744 int i =0, len = 2;
2745 UTF_NEXT_CHAR_SAFE(r, i, len, exC, FALSE);
2746 r++;
2747 }else{
2748 exC = *r;
2749 }
2750 if(c!=(uint32_t)(exC))
2751 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message,(uint32_t) (*r),c);
2752 }
2753 r++;
2754 }
2755 }
2756
2757 static int TestJitterbug930(const char* enc){
2758 UErrorCode err = U_ZERO_ERROR;
2759 UConverter*converter;
2760 char out[80];
2761 char*target = out;
2762 UChar in[4];
2763 const UChar*source = in;
2764 int32_t off[80];
2765 int32_t* offsets = off;
2766 int numOffWritten=0;
2767 UBool flush = 0;
2768 converter = my_ucnv_open(enc, &err);
2769
2770 in[0] = 0x41; /* 0x4E00;*/
2771 in[1] = 0x4E01;
2772 in[2] = 0x4E02;
2773 in[3] = 0x4E03;
2774
2775 memset(off, '*', sizeof(off));
2776
2777 ucnv_fromUnicode (converter,
2778 &target,
2779 target+2,
2780 &source,
2781 source+3,
2782 offsets,
2783 flush,
2784 &err);
2785
2786 /* writes three bytes into the output buffer: 41 1B 24
2787 * but offsets contains 0 1 1
2788 */
2789 while(*offsets< off[10]){
2790 numOffWritten++;
2791 offsets++;
2792 }
2793 log_verbose("Testing Jitterbug 930 for encoding %s",enc);
2794 if(numOffWritten!= (int)(target-out)){
2795 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
2796 }
2797
2798 err = U_ZERO_ERROR;
2799
2800 memset(off,'*' , sizeof(off));
2801
2802 flush = 1;
2803 offsets=off;
2804 ucnv_fromUnicode (converter,
2805 &target,
2806 target+4,
2807 &source,
2808 source,
2809 offsets,
2810 flush,
2811 &err);
2812 numOffWritten=0;
2813 while(*offsets< off[10]){
2814 numOffWritten++;
2815 if(*offsets!= -1){
2816 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
2817 }
2818 offsets++;
2819 }
2820
2821 /* writes 42 43 7A into output buffer,
2822 * offsets contains -1 -1 -1
2823 */
2824 ucnv_close(converter);
2825 return 0;
2826 }
2827
2828 static void
2829 TestHZ() {
2830 /* test input */
2831 static const uint16_t in[]={
2832 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
2833 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
2834 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
2835 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
2836 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
2837 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
2838 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
2839 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
2840 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
2841 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
2842 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
2843 0x005A, 0x005B, 0x005C, 0x000A
2844 };
2845 const UChar* uSource;
2846 const UChar* uSourceLimit;
2847 const char* cSource;
2848 const char* cSourceLimit;
2849 UChar *uTargetLimit =NULL;
2850 UChar *uTarget;
2851 char *cTarget;
2852 const char *cTargetLimit;
2853 char *cBuf;
2854 UChar *uBuf,*test;
2855 int32_t uBufSize = 120;
2856 UErrorCode errorCode=U_ZERO_ERROR;
2857 UConverter *cnv;
2858 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
2859 int32_t* myOff= offsets;
2860 cnv=ucnv_open("HZ", &errorCode);
2861 if(U_FAILURE(errorCode)) {
2862 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
2863 return;
2864 }
2865
2866 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2867 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2868 uSource = (const UChar*)&in[0];
2869 uSourceLimit=(const UChar*)&in[sizeof(in)/2];
2870 cTarget = cBuf;
2871 cTargetLimit = cBuf +uBufSize*5;
2872 uTarget = uBuf;
2873 uTargetLimit = uBuf+ uBufSize*5;
2874 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
2875 if(U_FAILURE(errorCode)){
2876 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2877 return;
2878 }
2879 cSource = cBuf;
2880 cSourceLimit =cTarget;
2881 test =uBuf;
2882 myOff=offsets;
2883 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
2884 if(U_FAILURE(errorCode)){
2885 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2886 return;
2887 }
2888 uSource = (const UChar*)&in[0];
2889 while(uSource<uSourceLimit){
2890 if(*test!=*uSource){
2891
2892 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2893 }
2894 uSource++;
2895 test++;
2896 }
2897 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
2898 TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
2899 TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
2900 TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
2901 TestJitterbug930("csISO2022JP");
2902 ucnv_close(cnv);
2903 free(offsets);
2904 free(uBuf);
2905 free(cBuf);
2906 }
2907
2908 static void
2909 TestISCII(){
2910 /* test input */
2911 static const uint16_t in[]={
2912 /* test full range of Devanagari */
2913 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
2914 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
2915 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
2916 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
2917 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
2918 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
2919 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
2920 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
2921 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
2922 0x096D,0x096E,0x096F,
2923 /* test Soft halant*/
2924 0x0915,0x094d, 0x200D,
2925 /* test explicit halant */
2926 0x0915,0x094d, 0x200c,
2927 /* test double danda */
2928 0x965,
2929 /* test ASCII */
2930 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2931 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2932 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2933 /* tests from Lotus */
2934 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
2935 0x0930,0x094D,0x200D,
2936 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
2937 0x0915,0x0921,0x002B,0x095F,
2938 /* tamil range */
2939 0x0B86, 0xB87, 0xB88,
2940 /* telugu range */
2941 0x0C05, 0x0C02, 0x0C03,0x0c31,
2942 /* kannada range */
2943 0x0C85, 0xC82, 0x0C83,
2944 /* test Abbr sign and Anudatta */
2945 0x0970, 0x952,
2946 /* 0x0958,
2947 0x0959,
2948 0x095A,
2949 0x095B,
2950 0x095C,
2951 0x095D,
2952 0x095E,
2953 0x095F,*/
2954 0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
2955 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
2956 0x090C ,
2957 0x0962,
2958 0x0961 /* Vocallic LL 0xa6, 0xE9 */,
2959 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
2960 0x0950 /* OM Symbol 0xa1, 0xE9,*/,
2961 0x093D /* Avagraha 0xEA, 0xE9*/,
2962 0x0958,
2963 0x0959,
2964 0x095A,
2965 0x095B,
2966 0x095C,
2967 0x095D,
2968 0x095E,
2969 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
2970 };
2971 static const unsigned char byteArr[]={
2972
2973 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
2974 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
2975 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
2976 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
2977 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
2978 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
2979 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
2980 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
2981 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
2982 0xf8,0xf9,0xfa,
2983 /* test soft halant */
2984 0xb3, 0xE8, 0xE9,
2985 /* test explicit halant */
2986 0xb3, 0xE8, 0xE8,
2987 /* test double danda */
2988 0xea, 0xea,
2989 /* test ASCII */
2990 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2991 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2992 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2993 /* test ATR code */
2994
2995 /* tests from Lotus */
2996 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
2997 0xEF,0x42,0xCF,0xE8,0xD9,
2998 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
2999 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3000 /* tamil range */
3001 0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3002 /* telugu range */
3003 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3004 /* kannada range */
3005 0xEF, 0x48,0xa4, 0xa2, 0xa3,
3006 /* anudatta and abbreviation sign */
3007 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3008
3009
3010 0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3011
3012 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3013
3014 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3015
3016 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3017
3018 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3019
3020 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3021
3022 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3023
3024 0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3025
3026 0xB3, 0xE9, /* Ka + NUKTA */
3027
3028 0xB4, 0xE9, /* Kha + NUKTA */
3029
3030 0xB5, 0xE9, /* Ga + NUKTA */
3031
3032 0xBA, 0xE9,
3033
3034 0xBF, 0xE9,
3035
3036 0xC0, 0xE9,
3037
3038 0xC9, 0xE9,
3039 /* INV halant RA */
3040 0xD9, 0xE8, 0xCF,
3041 0x00, 0x00A0,
3042 /* just consume unhandled codepoints */
3043 0xEF, 0x30,
3044
3045 };
3046 testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-iscii-de",NULL,TRUE);
3047 TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
3048
3049 }
3050
3051 static void
3052 TestISO_2022_JP() {
3053 /* test input */
3054 static const uint16_t in[]={
3055 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3056 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3057 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3058 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3059 0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3060 0x201D, 0x3014, 0x000D, 0x000A,
3061 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3062 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3063 };
3064 const UChar* uSource;
3065 const UChar* uSourceLimit;
3066 const char* cSource;
3067 const char* cSourceLimit;
3068 UChar *uTargetLimit =NULL;
3069 UChar *uTarget;
3070 char *cTarget;
3071 const char *cTargetLimit;
3072 char *cBuf;
3073 UChar *uBuf,*test;
3074 int32_t uBufSize = 120;
3075 UErrorCode errorCode=U_ZERO_ERROR;
3076 UConverter *cnv;
3077 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3078 int32_t* myOff= offsets;
3079 cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3080 if(U_FAILURE(errorCode)) {
3081 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3082 return;
3083 }
3084
3085 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3086 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3087 uSource = (const UChar*)&in[0];
3088 uSourceLimit=(const UChar*)&in[sizeof(in)/2];
3089 cTarget = cBuf;
3090 cTargetLimit = cBuf +uBufSize*5;
3091 uTarget = uBuf;
3092 uTargetLimit = uBuf+ uBufSize*5;
3093 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3094 if(U_FAILURE(errorCode)){
3095 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3096 return;
3097 }
3098 cSource = cBuf;
3099 cSourceLimit =cTarget;
3100 test =uBuf;
3101 myOff=offsets;
3102 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3103 if(U_FAILURE(errorCode)){
3104 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3105 return;
3106 }
3107
3108 uSource = (const UChar*)&in[0];
3109 while(uSource<uSourceLimit){
3110 if(*test!=*uSource){
3111
3112 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3113 }
3114 uSource++;
3115 test++;
3116 }
3117
3118 TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3119 TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3120 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
3121 TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3122 TestJitterbug930("csISO2022JP");
3123 ucnv_close(cnv);
3124 free(uBuf);
3125 free(cBuf);
3126 free(offsets);
3127 }
3128
3129 static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
3130 const UChar* uSource;
3131 const UChar* uSourceLimit;
3132 const char* cSource;
3133 const char* cSourceLimit;
3134 UChar *uTargetLimit =NULL;
3135 UChar *uTarget;
3136 char *cTarget;
3137 const char *cTargetLimit;
3138 char *cBuf;
3139 UChar *uBuf,*test;
3140 int32_t uBufSize = 120*10;
3141 UErrorCode errorCode=U_ZERO_ERROR;
3142 UConverter *cnv;
3143 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
3144 int32_t* myOff= offsets;
3145 cnv=my_ucnv_open(conv, &errorCode);
3146 if(U_FAILURE(errorCode)) {
3147 log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
3148 return;
3149 }
3150
3151 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
3152 cBuf =(char*)malloc(uBufSize * sizeof(char));
3153 uSource = (const UChar*)&in[0];
3154 uSourceLimit=uSource+len;
3155 cTarget = cBuf;
3156 cTargetLimit = cBuf +uBufSize;
3157 uTarget = uBuf;
3158 uTargetLimit = uBuf+ uBufSize;
3159 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3160 if(U_FAILURE(errorCode)){
3161 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3162 return;
3163 }
3164 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3165 cSource = cBuf;
3166 cSourceLimit =cTarget;
3167 test =uBuf;
3168 myOff=offsets;
3169 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3170 if(U_FAILURE(errorCode)){
3171 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
3172 return;
3173 }
3174
3175 uSource = (const UChar*)&in[0];
3176 while(uSource<uSourceLimit){
3177 if(*test!=*uSource){
3178 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
3179 }
3180 uSource++;
3181 test++;
3182 }
3183 TestSmallTargetBuffer(&in[0],(const UChar*)&in[len],cnv);
3184 TestSmallSourceBuffer(&in[0],(const UChar*)&in[len],cnv);
3185 TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
3186 if(byteArr && byteArrLen!=0){
3187 TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
3188 TestToAndFromUChars(&in[0],(const UChar*)&in[len],cnv);
3189 {
3190 cSource = byteArr;
3191 cSourceLimit = cSource+byteArrLen;
3192 test=uBuf;
3193 myOff = offsets;
3194 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3195 if(U_FAILURE(errorCode)){
3196 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3197 return;
3198 }
3199
3200 uSource = (const UChar*)&in[0];
3201 while(uSource<uSourceLimit){
3202 if(*test!=*uSource){
3203 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3204 }
3205 uSource++;
3206 test++;
3207 }
3208 }
3209 }
3210
3211 ucnv_close(cnv);
3212 free(uBuf);
3213 free(cBuf);
3214 free(offsets);
3215 }
3216 static UChar U_CALLCONV
3217 _charAt(int32_t offset, void *context) {
3218 return ((char*)context)[offset];
3219 }
3220
3221 static int32_t
3222 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
3223 int32_t srcIndex=0;
3224 int32_t dstIndex=0;
3225 if(U_FAILURE(*status)){
3226 return 0;
3227 }
3228 if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
3229 *status = U_ILLEGAL_ARGUMENT_ERROR;
3230 return 0;
3231 }
3232 if(srcLen==-1){
3233 srcLen = uprv_strlen(src);
3234 }
3235
3236 for (; srcIndex<srcLen; ) {
3237 UChar32 c = src[srcIndex++];
3238 if (c == 0x005C /*'\\'*/) {
3239 c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
3240 if (c == (UChar32)0xFFFFFFFF) {
3241 *status=U_INVALID_CHAR_FOUND; /* return empty string */
3242 break; /* invalid escape sequence */
3243 }
3244 }
3245 if(dstIndex < dstLen){
3246 if(c>0xFFFF){
3247 dst[dstIndex++] = UTF16_LEAD(c);
3248 if(dstIndex<dstLen){
3249 dst[dstIndex]=UTF16_TRAIL(c);
3250 }else{
3251 *status=U_BUFFER_OVERFLOW_ERROR;
3252 }
3253 }else{
3254 dst[dstIndex]=(UChar)c;
3255 }
3256
3257 }else{
3258 *status = U_BUFFER_OVERFLOW_ERROR;
3259 }
3260 dstIndex++; /* for preflighting */
3261 }
3262 return dstIndex;
3263 }
3264
3265 static void
3266 TestFullRoundtrip(const char* cp){
3267 UChar usource[10] ={0};
3268 UChar nsrc[10] = {0};
3269 uint32_t i=1;
3270 int len=0, ulen;
3271 nsrc[0]=0x0061;
3272 /* Test codepoint 0 */
3273 TestConv(usource,1,cp,"",NULL,0);
3274 TestConv(usource,2,cp,"",NULL,0);
3275 nsrc[2]=0x5555;
3276 TestConv(nsrc,3,cp,"",NULL,0);
3277
3278 for(;i<=0x10FFFF;i++){
3279 if(i==0xD800){
3280 i=0xDFFF;
3281 continue;
3282 }
3283 if(i<=0xFFFF){
3284 usource[0] =(UChar) i;
3285 len=1;
3286 }else{
3287 usource[0]=UTF16_LEAD(i);
3288 usource[1]=UTF16_TRAIL(i);
3289 len=2;
3290 }
3291 ulen=len;
3292 if(i==0x80) {
3293 usource[2]=0;
3294 }
3295 /* Test only single code points */
3296 TestConv(usource,ulen,cp,"",NULL,0);
3297 /* Test codepoint repeated twice */
3298 usource[ulen]=usource[0];
3299 usource[ulen+1]=usource[1];
3300 ulen+=len;
3301 TestConv(usource,ulen,cp,"",NULL,0);
3302 /* Test codepoint repeated 3 times */
3303 usource[ulen]=usource[0];
3304 usource[ulen+1]=usource[1];
3305 ulen+=len;
3306 TestConv(usource,ulen,cp,"",NULL,0);
3307 /* Test codepoint in between 2 codepoints */
3308 nsrc[1]=usource[0];
3309 nsrc[2]=usource[1];
3310 nsrc[len+1]=0x5555;
3311 TestConv(nsrc,len+2,cp,"",NULL,0);
3312 uprv_memset(usource,0,sizeof(UChar)*10);
3313 }
3314 }
3315
3316 static void
3317 TestRoundTrippingAllUTF(void){
3318 if(!QUICK){
3319 log_verbose("Running exhaustive round trip test for BOCU-1\n");
3320 TestFullRoundtrip("BOCU-1");
3321 log_verbose("Running exhaustive round trip test for SCSU\n");
3322 TestFullRoundtrip("SCSU");
3323 log_verbose("Running exhaustive round trip test for UTF-8\n");
3324 TestFullRoundtrip("UTF-8");
3325 log_verbose("Running exhaustive round trip test for CESU-8\n");
3326 TestFullRoundtrip("CESU-8");
3327 log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3328 TestFullRoundtrip("UTF-16BE");
3329 log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3330 TestFullRoundtrip("UTF-16LE");
3331 log_verbose("Running exhaustive round trip test for UTF-16\n");
3332 TestFullRoundtrip("UTF-16");
3333 log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3334 TestFullRoundtrip("UTF-32BE");
3335 log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3336 TestFullRoundtrip("UTF-32LE");
3337 log_verbose("Running exhaustive round trip test for UTF-32\n");
3338 TestFullRoundtrip("UTF-32");
3339 log_verbose("Running exhaustive round trip test for UTF-7\n");
3340 TestFullRoundtrip("UTF-7");
3341 log_verbose("Running exhaustive round trip test for UTF-7\n");
3342 TestFullRoundtrip("UTF-7,version=1");
3343 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3344 TestFullRoundtrip("IMAP-mailbox-name");
3345 log_verbose("Running exhaustive round trip test for GB18030\n");
3346 TestFullRoundtrip("GB18030");
3347 }
3348 }
3349
3350 static void
3351 TestSCSU() {
3352
3353 static const uint16_t germanUTF16[]={
3354 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3355 };
3356
3357 static const uint8_t germanSCSU[]={
3358 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3359 };
3360
3361 static const uint16_t russianUTF16[]={
3362 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3363 };
3364
3365 static const uint8_t russianSCSU[]={
3366 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3367 };
3368
3369 static const uint16_t japaneseUTF16[]={
3370 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3371 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3372 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3373 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3374 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3375 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3376 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3377 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3378 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3379 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3380 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3381 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3382 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3383 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3384 0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3385 };
3386
3387 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3388 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3389 static const uint8_t japaneseSCSU[]={
3390 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3391 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3392 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3393 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3394 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3395 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3396 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3397 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3398 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3399 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3400 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3401 0xcb, 0x82
3402 };
3403
3404 static const uint16_t allFeaturesUTF16[]={
3405 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3406 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3407 0x01df, 0xf000, 0xdbff, 0xdfff
3408 };
3409
3410 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3411 * result here (34B vs. 35B)
3412 */
3413 static const uint8_t allFeaturesSCSU[]={
3414 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3415 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3416 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3417 0xdf, 0x14, 0x80, 0x15, 0xff
3418 };
3419 static const uint16_t monkeyIn[]={
3420 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3421 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3422 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3423 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3424 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3425 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3426 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3427 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3428 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3429 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3430 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3431 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3432 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3433 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3434 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3435 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3436 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3437 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3438 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3439 /* test non-BMP code points */
3440 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3441 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3442 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3443 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3444 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3445 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3446 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3447 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3448 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3449 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3450 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3451
3452
3453 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3454 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3455 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3456 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3457 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3458 };
3459 static const char *fTestCases [] = {
3460 "\\ud800\\udc00", /* smallest surrogate*/
3461 "\\ud8ff\\udcff",
3462 "\\udBff\\udFff", /* largest surrogate pair*/
3463 "\\ud834\\udc00",
3464 "\\U0010FFFF",
3465 "Hello \\u9292 \\u9192 World!",
3466 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3467 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3468
3469 "\\u0648\\u06c8", /* catch missing reset*/
3470 "\\u0648\\u06c8",
3471
3472 "\\u4444\\uE001", /* lowest quotable*/
3473 "\\u4444\\uf2FF", /* highest quotable*/
3474 "\\u4444\\uf188\\u4444",
3475 "\\u4444\\uf188\\uf288",
3476 "\\u4444\\uf188abc\\u0429\\uf288",
3477 "\\u9292\\u2222",
3478 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3479 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3480 "Hello World!123456",
3481 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3482
3483 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/
3484 "abc\\u4411d", /* uses SQU*/
3485 "abc\\u4411\\u4412d",/* uses SCU*/
3486 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3487 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3488 "\\u9292\\u2222",
3489 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3490 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3491 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3492
3493 "", /* empty input*/
3494 "\\u0000", /* smallest BMP character*/
3495 "\\uFFFF", /* largest BMP character*/
3496
3497 /* regression tests*/
3498 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3499 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3500 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3501 "\\u0041\\u00df\\u0401\\u015f",
3502 "\\u9066\\u2123abc",
3503 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3504 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3505 };
3506 int i=0;
3507 for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){
3508 const char* cSrc = fTestCases[i];
3509 UErrorCode status = U_ZERO_ERROR;
3510 int32_t cSrcLen,srcLen;
3511 UChar* src;
3512 /* UConverter* cnv = ucnv_open("SCSU",&status); */
3513 cSrcLen= srcLen = uprv_strlen(fTestCases[i]);
3514 src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
3515 srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
3516 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
3517 TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
3518 free(src);
3519 }
3520 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3521 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3522 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3523 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3524 TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
3525 TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
3526 TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
3527 }
3528 static void TestJitterbug2346(){
3529 char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3530 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3531 uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3532
3533 UChar uTarget[500]={'\0'};
3534 UChar* utarget=uTarget;
3535 UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
3536
3537 char cTarget[500]={'\0'};
3538 char* ctarget=cTarget;
3539 char* ctargetLimit=cTarget+sizeof(cTarget);
3540 const char* csource=source;
3541 UChar* temp = expected;
3542 UErrorCode err=U_ZERO_ERROR;
3543
3544 UConverter* conv =ucnv_open("ISO_2022_JP",&err);
3545 if(U_FAILURE(err)) {
3546 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
3547 return;
3548 }
3549 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err);
3550 if(U_FAILURE(err)) {
3551 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
3552 return;
3553 }
3554 utargetLimit=utarget;
3555 utarget = uTarget;
3556 while(utarget<utargetLimit){
3557 if(*temp!=*utarget){
3558
3559 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
3560 }
3561 utarget++;
3562 temp++;
3563 }
3564 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
3565 if(U_FAILURE(err)) {
3566 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
3567 return;
3568 }
3569 ctargetLimit=ctarget;
3570 ctarget =cTarget;
3571 ucnv_close(conv);
3572
3573
3574 }
3575 static void
3576 TestISO_2022_JP_1() {
3577 /* test input */
3578 static const uint16_t in[]={
3579 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3580 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3581 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3582 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3583 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3584 0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3585 0x201D, 0x000D, 0x000A,
3586 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3587 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3588 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3589 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3590 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3591 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3592 };
3593 const UChar* uSource;
3594 const UChar* uSourceLimit;
3595 const char* cSource;
3596 const char* cSourceLimit;
3597 UChar *uTargetLimit =NULL;
3598 UChar *uTarget;
3599 char *cTarget;
3600 const char *cTargetLimit;
3601 char *cBuf;
3602 UChar *uBuf,*test;
3603 int32_t uBufSize = 120;
3604 UErrorCode errorCode=U_ZERO_ERROR;
3605 UConverter *cnv;
3606
3607 cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3608 if(U_FAILURE(errorCode)) {
3609 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3610 return;
3611 }
3612
3613 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3614 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3615 uSource = (const UChar*)&in[0];
3616 uSourceLimit=(const UChar*)&in[sizeof(in)/2];
3617 cTarget = cBuf;
3618 cTargetLimit = cBuf +uBufSize*5;
3619 uTarget = uBuf;
3620 uTargetLimit = uBuf+ uBufSize*5;
3621 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode);
3622 if(U_FAILURE(errorCode)){
3623 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3624 return;
3625 }
3626 cSource = cBuf;
3627 cSourceLimit =cTarget;
3628 test =uBuf;
3629 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode);
3630 if(U_FAILURE(errorCode)){
3631 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3632 return;
3633 }
3634 uSource = (const UChar*)&in[0];
3635 while(uSource<uSourceLimit){
3636 if(*test!=*uSource){
3637
3638 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3639 }
3640 uSource++;
3641 test++;
3642 }
3643 /*ucnv_close(cnv);
3644 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3645 /*Test for the condition where there is an invalid character*/
3646 ucnv_reset(cnv);
3647 {
3648 static const uint8_t source2[]={0x0e,0x24,0x053};
3649 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
3650 }
3651 TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3652 TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3653 ucnv_close(cnv);
3654 free(uBuf);
3655 free(cBuf);
3656 }
3657
3658 static void
3659 TestISO_2022_JP_2() {
3660 /* test input */
3661 static const uint16_t in[]={
3662 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3663 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3664 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3665 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3666 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3667 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3668 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3669 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3670 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3671 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3672 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3673 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3674 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3675 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3676 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3677 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3678 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3679 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3680 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3681 };
3682 const UChar* uSource;
3683 const UChar* uSourceLimit;
3684 const char* cSource;
3685 const char* cSourceLimit;
3686 UChar *uTargetLimit =NULL;
3687 UChar *uTarget;
3688 char *cTarget;
3689 const char *cTargetLimit;
3690 char *cBuf;
3691 UChar *uBuf,*test;
3692 int32_t uBufSize = 120;
3693 UErrorCode errorCode=U_ZERO_ERROR;
3694 UConverter *cnv;
3695 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3696 int32_t* myOff= offsets;
3697 cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
3698 if(U_FAILURE(errorCode)) {
3699 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3700 return;
3701 }
3702
3703 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3704 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3705 uSource = (const UChar*)&in[0];
3706 uSourceLimit=(const UChar*)&in[sizeof(in)/2];
3707 cTarget = cBuf;
3708 cTargetLimit = cBuf +uBufSize*5;
3709 uTarget = uBuf;
3710 uTargetLimit = uBuf+ uBufSize*5;
3711 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3712 if(U_FAILURE(errorCode)){
3713 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3714 return;
3715 }
3716 cSource = cBuf;
3717 cSourceLimit =cTarget;
3718 test =uBuf;
3719 myOff=offsets;
3720 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3721 if(U_FAILURE(errorCode)){
3722 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3723 return;
3724 }
3725 uSource = (const UChar*)&in[0];
3726 while(uSource<uSourceLimit){
3727 if(*test!=*uSource){
3728
3729 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3730 }
3731 uSource++;
3732 test++;
3733 }
3734 TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3735 TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3736 TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3737 /*Test for the condition where there is an invalid character*/
3738 ucnv_reset(cnv);
3739 {
3740 static const uint8_t source2[]={0x0e,0x24,0x053};
3741 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
3742 }
3743 ucnv_close(cnv);
3744 free(uBuf);
3745 free(cBuf);
3746 free(offsets);
3747 }
3748
3749 static void
3750 TestISO_2022_KR() {
3751 /* test input */
3752 static const uint16_t in[]={
3753 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F66,0x9F67,0x9F6A,0x000A,0x000D
3754 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC02,0xAC04
3755 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
3756 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
3757 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53DF,0x53E1,0x53E2
3758 ,0x53E3,0x53E4,0x000A,0x000D};
3759 const UChar* uSource;
3760 const UChar* uSourceLimit;
3761 const char* cSource;
3762 const char* cSourceLimit;
3763 UChar *uTargetLimit =NULL;
3764 UChar *uTarget;
3765 char *cTarget;
3766 const char *cTargetLimit;
3767 char *cBuf;
3768 UChar *uBuf,*test;
3769 int32_t uBufSize = 120;
3770 UErrorCode errorCode=U_ZERO_ERROR;
3771 UConverter *cnv;
3772 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3773 int32_t* myOff= offsets;
3774 cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
3775 if(U_FAILURE(errorCode)) {
3776 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3777 return;
3778 }
3779
3780 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3781 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3782 uSource = (const UChar*)&in[0];
3783 uSourceLimit=(const UChar*)&in[sizeof(in)/2];
3784 cTarget = cBuf;
3785 cTargetLimit = cBuf +uBufSize*5;
3786 uTarget = uBuf;
3787 uTargetLimit = uBuf+ uBufSize*5;
3788 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3789 if(U_FAILURE(errorCode)){
3790 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3791 return;
3792 }
3793 cSource = cBuf;
3794 cSourceLimit =cTarget;
3795 test =uBuf;
3796 myOff=offsets;
3797 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3798 if(U_FAILURE(errorCode)){
3799 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3800 return;
3801 }
3802 uSource = (const UChar*)&in[0];
3803 while(uSource<uSourceLimit){
3804 if(*test!=*uSource){
3805 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
3806 }
3807 uSource++;
3808 test++;
3809 }
3810 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
3811 TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3812 TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3813 TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3814 TestJitterbug930("csISO2022KR");
3815 /*Test for the condition where there is an invalid character*/
3816 ucnv_reset(cnv);
3817 {
3818 static const uint8_t source2[]={0x1b,0x24,0x053};
3819 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
3820 }
3821 ucnv_close(cnv);
3822 free(uBuf);
3823 free(cBuf);
3824 free(offsets);
3825 }
3826
3827 static void
3828 TestISO_2022_KR_1() {
3829 /* test input */
3830 static const uint16_t in[]={
3831 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
3832 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
3833 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
3834 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
3835 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
3836 ,0x53E3,0x53E4,0x000A,0x000D};
3837 const UChar* uSource;
3838 const UChar* uSourceLimit;
3839 const char* cSource;
3840 const char* cSourceLimit;
3841 UChar *uTargetLimit =NULL;
3842 UChar *uTarget;
3843 char *cTarget;
3844 const char *cTargetLimit;
3845 char *cBuf;
3846 UChar *uBuf,*test;
3847 int32_t uBufSize = 120;
3848 UErrorCode errorCode=U_ZERO_ERROR;
3849 UConverter *cnv;
3850 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3851 int32_t* myOff= offsets;
3852 cnv=ucnv_open("ibm-25546", &errorCode);
3853 if(U_FAILURE(errorCode)) {
3854 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3855 return;
3856 }
3857
3858 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3859 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3860 uSource = (const UChar*)&in[0];
3861 uSourceLimit=(const UChar*)&in[sizeof(in)/2];
3862 cTarget = cBuf;
3863 cTargetLimit = cBuf +uBufSize*5;
3864 uTarget = uBuf;
3865 uTargetLimit = uBuf+ uBufSize*5;
3866 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3867 if(U_FAILURE(errorCode)){
3868 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3869 return;
3870 }
3871 cSource = cBuf;
3872 cSourceLimit =cTarget;
3873 test =uBuf;
3874 myOff=offsets;
3875 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3876 if(U_FAILURE(errorCode)){
3877 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3878 return;
3879 }
3880 uSource = (const UChar*)&in[0];
3881 while(uSource<uSourceLimit){
3882 if(*test!=*uSource){
3883 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
3884 }
3885 uSource++;
3886 test++;
3887 }
3888 ucnv_reset(cnv);
3889 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
3890 TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3891 TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3892 ucnv_reset(cnv);
3893 TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3894 /*Test for the condition where there is an invalid character*/
3895 ucnv_reset(cnv);
3896 {
3897 static const uint8_t source2[]={0x1b,0x24,0x053};
3898 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
3899 }
3900 ucnv_close(cnv);
3901 free(uBuf);
3902 free(cBuf);
3903 free(offsets);
3904 }
3905
3906 static void TestJitterbug2411(){
3907 const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
3908 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
3909 UConverter* kr=NULL, *kr1=NULL;
3910 UErrorCode errorCode = U_ZERO_ERROR;
3911 UChar tgt[100]={'\0'};
3912 UChar* target = tgt;
3913 UChar* targetLimit = target+100;
3914 kr=ucnv_open("iso-2022-kr", &errorCode);
3915 if(U_FAILURE(errorCode)) {
3916 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
3917 return;
3918 }
3919 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
3920 if(U_FAILURE(errorCode)) {
3921 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
3922 return;
3923 }
3924 kr1 = ucnv_open("ibm-25546", &errorCode);
3925 if(U_FAILURE(errorCode)) {
3926 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
3927 return;
3928 }
3929 target = tgt;
3930 targetLimit = target+100;
3931 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
3932
3933 if(U_FAILURE(errorCode)) {
3934 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
3935 return;
3936 }
3937
3938 ucnv_close(kr);
3939 ucnv_close(kr1);
3940
3941 }
3942
3943 static void
3944 TestJIS(){
3945 /* From Unicode */
3946 {
3947 /* JIS Encoding */
3948 UChar sampleTextJIS[] ={
3949 0xFF81, 0xFF82,
3950 0x30EC, 0x30ED,
3951 0x30EE, 0x30EF,
3952 0xFF93, 0xFF94,
3953 0xFF95, 0xFF96,
3954 0xFF97, 0xFF98
3955 };
3956 const uint8_t expectedISO2022JIS[] ={
3957 0x1b, 0x24, 0x42,
3958 0x25, 0x41, 0x25, 0x44,
3959 0x25, 0x6c, 0x25, 0x6d,
3960 0x25, 0x6e, 0x25, 0x6F,
3961 0x25, 0x62, 0x25, 0x64,
3962 0x25, 0x66, 0x25, 0x68,
3963 0x25, 0x69, 0x25, 0x6a
3964
3965 };
3966 int32_t fmISO2022JISOffs[] ={
3967 0,0,0,
3968 0,0,1,1,
3969 2,2,3,3,
3970 4,4,5,5,
3971 6,6,7,7,
3972 8,8,9,9,
3973 10,10,11,11
3974
3975 };
3976
3977 /* JIS7 Encoding */
3978 const uint8_t expectedISO2022JIS7[] ={
3979 0x1b, 0x28, 0x49,
3980 0x41, 0x42,
3981 0x1b, 0x24, 0x42,
3982 0x25, 0x6c, 0x25, 0x6d,
3983 0x25, 0x6e, 0x25, 0x6F,
3984 0x1b, 0x28, 0x49,
3985 0x53, 0x54,
3986 0x55, 0x56,
3987 0x57, 0x58
3988
3989 };
3990 int32_t fmISO2022JIS7Offs[] ={
3991 0,0,0,
3992 0,1,
3993 2,2,2,
3994 2,2,3,3,
3995 4,4,5,5,
3996 6,6,6,
3997 6,7,
3998 8,9,
3999 10,11
4000
4001 };
4002
4003 /* JIS8 Encoding */
4004 const uint8_t expectedISO2022JIS8[] ={
4005 0x1b, 0x28, 0x4A,
4006 0xC1, 0xC2,
4007 0x1b, 0x24, 0x42,
4008 0x25, 0x6c, 0x25, 0x6d,
4009 0x25, 0x6e, 0x25, 0x6F,
4010 0x1b, 0x28, 0x4A,
4011 0xD3, 0xD4,
4012 0xD5, 0xD6,
4013 0xD7, 0xD8
4014
4015 };
4016 int32_t fmISO2022JIS8Offs[] ={
4017 0,0,0,
4018 0,1,
4019 2,2,2,
4020 2,2,3,3,
4021 4,4,5,5,
4022 6,6,6,
4023 6,7,
4024 8,9,
4025 10,11
4026
4027 };
4028 testConvertFromU(sampleTextJIS, sizeof(sampleTextJIS)/sizeof(sampleTextJIS[0]),
4029 expectedISO2022JIS, sizeof(expectedISO2022JIS), "JIS", fmISO2022JISOffs,TRUE );
4030 testConvertFromU(sampleTextJIS, sizeof(sampleTextJIS)/sizeof(sampleTextJIS[0]),
4031 expectedISO2022JIS7, sizeof(expectedISO2022JIS7), "JIS7", fmISO2022JIS7Offs,FALSE );
4032 testConvertFromU(sampleTextJIS, sizeof(sampleTextJIS)/sizeof(sampleTextJIS[0]),
4033 expectedISO2022JIS8, sizeof(expectedISO2022JIS8), "JIS8", fmISO2022JIS8Offs,FALSE );
4034
4035
4036 }
4037 /*To Unicode*/
4038 {
4039 const uint8_t sampleTextJIS[] = {
4040 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4041 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4042 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4043 };
4044 const uint16_t expectedISO2022JIS[] = {
4045 0x0041, 0x0042,
4046 0xFF81, 0xFF82,
4047 0x3000
4048 };
4049 int32_t toISO2022JISOffs[]={
4050 3,4,
4051 8,9,
4052 16
4053 };
4054
4055 const uint8_t sampleTextJIS7[] = {
4056 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4057 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4058 0x1b,0x24,0x42,0x21,0x21,
4059 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */
4060 0x21,0x22,
4061 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4062 };
4063 const uint16_t expectedISO2022JIS7[] = {
4064 0x0041, 0x0042,
4065 0xFF81, 0xFF82,
4066 0x3000,
4067 0xFF81, 0xFF82,
4068 0x3001,
4069 0x3000
4070 };
4071 int32_t toISO2022JIS7Offs[]={
4072 3,4,
4073 8,9,
4074 13,16,
4075 17,
4076 19,27
4077 };
4078 const uint8_t sampleTextJIS8[] = {
4079 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4080 0xa1,0xc8,0xd9,/*Katakana Set*/
4081 0x1b,0x28,0x42,
4082 0x41,0x42,
4083 0xb1,0xc3, /*Katakana Set*/
4084 0x1b,0x24,0x42,0x21,0x21
4085 };
4086 const uint16_t expectedISO2022JIS8[] = {
4087 0x0041, 0x0042,
4088 0xff61, 0xff88, 0xff99,
4089 0x0041, 0x0042,
4090 0xff71, 0xff83,
4091 0x3000
4092 };
4093 int32_t toISO2022JIS8Offs[]={
4094 3, 4, 5, 6,
4095 7, 11, 12, 13,
4096 14, 18,
4097 };
4098
4099 testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
4100 sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE);
4101 testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
4102 sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE);
4103 testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
4104 sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE);
4105 }
4106
4107 }
4108
4109 static void TestJitterbug915(){
4110 /* tests for roundtripping of the below sequence
4111 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * /
4112 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4113 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4114 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4115 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4116 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4117 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4118 */
4119 static char cSource[]={
4120 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4121 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4122 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4123 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4124 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4125 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4126 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
4127 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4128 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4129 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4130 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4131 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4132 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4133 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4134 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4135 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4136 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4137 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4138 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4139 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4140 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4141 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4142 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4143 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4144 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4145 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4146 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4147 0x37, 0x20, 0x2A, 0x2F,
4148 };
4149 UChar uTarget[500]={'\0'};
4150 UChar* utarget=uTarget;
4151 UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
4152
4153 char cTarget[500]={'\0'};
4154 char* ctarget=cTarget;
4155 char* ctargetLimit=cTarget+sizeof(cTarget);
4156 const char* csource=cSource;
4157 char* tempSrc = cSource;
4158 UErrorCode err=U_ZERO_ERROR;
4159
4160 UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
4161 if(U_FAILURE(err)) {
4162 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
4163 return;
4164 }
4165 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err);
4166 if(U_FAILURE(err)) {
4167 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
4168 return;
4169 }
4170 utargetLimit=utarget;
4171 utarget = uTarget;
4172 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
4173 if(U_FAILURE(err)) {
4174 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
4175 return;
4176 }
4177 ctargetLimit=ctarget;
4178 ctarget =cTarget;
4179 while(ctarget<ctargetLimit){
4180 if(*(ctarget++) != *(tempSrc++)){
4181 log_err("Expected : \\x%02X \t Got: \\x%02X\n",*ctarget,(int)*tempSrc) ;
4182 }
4183 }
4184
4185 ucnv_close(conv);
4186 }
4187
4188 static void
4189 TestISO_2022_CN_EXT() {
4190 /* test input */
4191 static const uint16_t in[]={
4192 /* test Non-BMP code points */
4193 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4194 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4195 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4196 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4197 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4198 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4199 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4200 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4201 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4202 0xD869, 0xDED5,
4203
4204 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4205 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4206 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4207 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4208 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4209 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4210 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4211 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4212 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4213 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4214 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4215 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4216 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4217 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4218 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4219 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4220 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4221 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4222
4223 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4224
4225 };
4226
4227 const UChar* uSource;
4228 const UChar* uSourceLimit;
4229 const char* cSource;
4230 const char* cSourceLimit;
4231 UChar *uTargetLimit =NULL;
4232 UChar *uTarget;
4233 char *cTarget;
4234 const char *cTargetLimit;
4235 char *cBuf;
4236 UChar *uBuf,*test;
4237 int32_t uBufSize = 180;
4238 UErrorCode errorCode=U_ZERO_ERROR;
4239 UConverter *cnv;
4240 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4241 int32_t* myOff= offsets;
4242 cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
4243 if(U_FAILURE(errorCode)) {
4244 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4245 return;
4246 }
4247
4248 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4249 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4250 uSource = (const UChar*)&in[0];
4251 uSourceLimit=(const UChar*)&in[sizeof(in)/2];
4252 cTarget = cBuf;
4253 cTargetLimit = cBuf +uBufSize*5;
4254 uTarget = uBuf;
4255 uTargetLimit = uBuf+ uBufSize*5;
4256 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4257 if(U_FAILURE(errorCode)){
4258 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4259 return;
4260 }
4261 cSource = cBuf;
4262 cSourceLimit =cTarget;
4263 test =uBuf;
4264 myOff=offsets;
4265 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4266 if(U_FAILURE(errorCode)){
4267 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4268 return;
4269 }
4270 uSource = (const UChar*)&in[0];
4271 while(uSource<uSourceLimit){
4272 if(*test!=*uSource){
4273 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4274 }
4275 else{
4276 log_verbose(" Got: \\u%04X\n",(int)*test) ;
4277 }
4278 uSource++;
4279 test++;
4280 }
4281 TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
4282 TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
4283 /*Test for the condition where there is an invalid character*/
4284 ucnv_reset(cnv);
4285 {
4286 static const uint8_t source2[]={0x0e,0x24,0x053};
4287 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
4288 }
4289 ucnv_close(cnv);
4290 free(uBuf);
4291 free(cBuf);
4292 free(offsets);
4293 }
4294
4295 static void
4296 TestISO_2022_CN() {
4297 /* test input */
4298 static const uint16_t in[]={
4299 /* jitterbug 951 */
4300 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4301 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4302 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4303 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4304 0x0020, 0x0045, 0x004e, 0x0044,
4305 /**/
4306 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4307 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4308 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4309 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4310 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4311 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4312 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4313 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4314 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4315 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4316 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4317 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4318 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4319 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4320 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4321 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4322 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4323
4324 };
4325 const UChar* uSource;
4326 const UChar* uSourceLimit;
4327 const char* cSource;
4328 const char* cSourceLimit;
4329 UChar *uTargetLimit =NULL;
4330 UChar *uTarget;
4331 char *cTarget;
4332 const char *cTargetLimit;
4333 char *cBuf;
4334 UChar *uBuf,*test;
4335 int32_t uBufSize = 180;
4336 UErrorCode errorCode=U_ZERO_ERROR;
4337 UConverter *cnv;
4338 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4339 int32_t* myOff= offsets;
4340 cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
4341 if(U_FAILURE(errorCode)) {
4342 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4343 return;
4344 }
4345
4346 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4347 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4348 uSource = (const UChar*)&in[0];
4349 uSourceLimit=(const UChar*)&in[sizeof(in)/2];
4350 cTarget = cBuf;
4351 cTargetLimit = cBuf +uBufSize*5;
4352 uTarget = uBuf;
4353 uTargetLimit = uBuf+ uBufSize*5;
4354 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4355 if(U_FAILURE(errorCode)){
4356 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4357 return;
4358 }
4359 cSource = cBuf;
4360 cSourceLimit =cTarget;
4361 test =uBuf;
4362 myOff=offsets;
4363 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4364 if(U_FAILURE(errorCode)){
4365 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4366 return;
4367 }
4368 uSource = (const UChar*)&in[0];
4369 while(uSource<uSourceLimit){
4370 if(*test!=*uSource){
4371 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4372 }
4373 else{
4374 log_verbose(" Got: \\u%04X\n",(int)*test) ;
4375 }
4376 uSource++;
4377 test++;
4378 }
4379 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
4380 TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
4381 TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
4382 TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
4383 TestJitterbug930("csISO2022CN");
4384 /*Test for the condition where there is an invalid character*/
4385 ucnv_reset(cnv);
4386 {
4387 static const uint8_t source2[]={0x0e,0x24,0x053};
4388 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
4389 }
4390
4391 ucnv_close(cnv);
4392 free(uBuf);
4393 free(cBuf);
4394 free(offsets);
4395 }
4396
4397 static void
4398 TestEBCDIC_STATEFUL() {
4399 /* test input */
4400 static const uint8_t in[]={
4401 0x61,
4402 0x1a,
4403 0x0f, 0x4b,
4404 0x42,
4405 0x40,
4406 0x36,
4407 };
4408
4409 /* expected test results */
4410 static const uint32_t results[]={
4411 /* number of bytes read, code point */
4412 1, 0x002f,
4413 1, 0x0092,
4414 2, 0x002e,
4415 1, 0xff62,
4416 1, 0x0020,
4417 1, 0x0096,
4418
4419 };
4420 static const uint8_t in2[]={
4421 0x0f,
4422 0xa1,
4423 0x01
4424 };
4425
4426 /* expected test results */
4427 static const uint32_t results2[]={
4428 /* number of bytes read, code point */
4429 2, 0x203E,
4430 1, 0x0001,
4431 };
4432
4433 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
4434 UErrorCode errorCode=U_ZERO_ERROR;
4435 UConverter *cnv=ucnv_open("ibm-930", &errorCode);
4436 if(U_FAILURE(errorCode)) {
4437 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
4438 return;
4439 }
4440 TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
4441 ucnv_reset(cnv);
4442 /* Test the condition when source >= sourceLimit */
4443 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
4444 ucnv_reset(cnv);
4445 /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4446 {
4447 static const uint8_t source1[]={0x0f};
4448 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
4449 }
4450 /*Test for the condition where there is an invalid character*/
4451 ucnv_reset(cnv);
4452 {
4453 static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
4454 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
4455 }
4456 ucnv_reset(cnv);
4457 source=(const char*)in2;
4458 limit=(const char*)in2+sizeof(in2);
4459 TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
4460 ucnv_close(cnv);
4461
4462 }
4463
4464 static void
4465 TestGB18030() {
4466 /* test input */
4467 static const uint8_t in[]={
4468 0x24,
4469 0x7f,
4470 0x81, 0x30, 0x81, 0x30,
4471 0xa8, 0xbf,
4472 0xa2, 0xe3,
4473 0xd2, 0xbb,
4474 0x82, 0x35, 0x8f, 0x33,
4475 0x84, 0x31, 0xa4, 0x39,
4476 0x90, 0x30, 0x81, 0x30,
4477 0xe3, 0x32, 0x9a, 0x35
4478 #if 0
4479 /*
4480 * Feature removed markus 2000-oct-26
4481 * Only some codepages must match surrogate pairs into supplementary code points -
4482 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4483 * GB 18030 provides direct encodings for supplementary code points, therefore
4484 * it must not combine two single-encoded surrogates into one code point.
4485 */
4486 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4487 #endif
4488 };
4489
4490 /* expected test results */
4491 static const uint32_t results[]={
4492 /* number of bytes read, code point */
4493 1, 0x24,
4494 1, 0x7f,
4495 4, 0x80,
4496 2, 0x1f9,
4497 2, 0x20ac,
4498 2, 0x4e00,
4499 4, 0x9fa6,
4500 4, 0xffff,
4501 4, 0x10000,
4502 4, 0x10ffff
4503 #if 0
4504 /* Feature removed. See comment above. */
4505 8, 0x10000
4506 #endif
4507 };
4508
4509 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4510 UErrorCode errorCode=U_ZERO_ERROR;
4511 UConverter *cnv=ucnv_open("gb18030", &errorCode);
4512 if(U_FAILURE(errorCode)) {
4513 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
4514 return;
4515 }
4516 TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
4517 ucnv_close(cnv);
4518 }
4519
4520 static void
4521 TestLMBCS() {
4522 /* LMBCS-1 string */
4523 static const uint8_t pszLMBCS[]={
4524 0x61,
4525 0x01, 0x29,
4526 0x81,
4527 0xA0,
4528 0x0F, 0x27,
4529 0x0F, 0x91,
4530 0x14, 0x0a, 0x74,
4531 0x14, 0xF6, 0x02,
4532 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4533 0x10, 0x88, 0xA0,
4534 };
4535
4536 /* Unicode UChar32 equivalents */
4537 static const UChar32 pszUnicode32[]={
4538 /* code point */
4539 0x00000061,
4540 0x00002013,
4541 0x000000FC,
4542 0x000000E1,
4543 0x00000007,
4544 0x00000091,
4545 0x00000a74,
4546 0x00000200,
4547 0x00023456, /* code point for surrogate pair */
4548 0x00005516
4549 };
4550
4551 /* Unicode UChar equivalents */
4552 static const UChar pszUnicode[]={
4553 /* code point */
4554 0x0061,
4555 0x2013,
4556 0x00FC,
4557 0x00E1,
4558 0x0007,
4559 0x0091,
4560 0x0a74,
4561 0x0200,
4562 0xD84D, /* low surrogate */
4563 0xDC56, /* high surrogate */
4564 0x5516
4565 };
4566
4567 /* expected test results */
4568 static const int offsets32[]={
4569 /* number of bytes read, code point */
4570 0,
4571 1,
4572 3,
4573 4,
4574 5,
4575 7,
4576 9,
4577 12,
4578 15,
4579 21,
4580 24
4581 };
4582
4583 /* expected test results */
4584 static const int offsets[]={
4585 /* number of bytes read, code point */
4586 0,
4587 1,
4588 3,
4589 4,
4590 5,
4591 7,
4592 9,
4593 12,
4594 15,
4595 18,
4596 21,
4597 24
4598 };
4599
4600
4601 UConverter *cnv;
4602
4603 #define NAME_LMBCS_1 "LMBCS-1"
4604 #define NAME_LMBCS_2 "LMBCS-2"
4605
4606
4607 /* Some basic open/close/property tests on some LMBCS converters */
4608 {
4609
4610 char expected_subchars[] = {0x3F}; /* ANSI Question Mark */
4611 char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/
4612 char get_subchars [1];
4613 const char * get_name;
4614 UConverter *cnv1;
4615 UConverter *cnv2;
4616
4617 int8_t len = sizeof(get_subchars);
4618
4619 UErrorCode errorCode=U_ZERO_ERROR;
4620
4621 /* Open */
4622 cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
4623 if(U_FAILURE(errorCode)) {
4624 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4625 return;
4626 }
4627 cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
4628 if(U_FAILURE(errorCode)) {
4629 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
4630 return;
4631 }
4632
4633 /* Name */
4634 get_name = ucnv_getName (cnv1, &errorCode);
4635 if (strcmp(NAME_LMBCS_1,get_name)){
4636 log_err("Unexpected converter name: %s\n", get_name);
4637 }
4638 get_name = ucnv_getName (cnv2, &errorCode);
4639 if (strcmp(NAME_LMBCS_2,get_name)){
4640 log_err("Unexpected converter name: %s\n", get_name);
4641 }
4642
4643 /* substitution chars */
4644 ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
4645 if(U_FAILURE(errorCode)) {
4646 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4647 }
4648 if (len!=1){
4649 log_err("Unexpected length of sub chars\n");
4650 }
4651 if (get_subchars[0] != expected_subchars[0]){
4652 log_err("Unexpected value of sub chars\n");
4653 }
4654 ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
4655 if(U_FAILURE(errorCode)) {
4656 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
4657 }
4658 ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
4659 if(U_FAILURE(errorCode)) {
4660 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4661 }
4662 if (len!=1){
4663 log_err("Unexpected length of sub chars\n");
4664 }
4665 if (get_subchars[0] != new_subchars[0]){
4666 log_err("Unexpected value of sub chars\n");
4667 }
4668 ucnv_close(cnv1);
4669 ucnv_close(cnv2);
4670
4671 }
4672
4673 /* LMBCS to Unicode - offsets */
4674 {
4675 UErrorCode errorCode=U_ZERO_ERROR;
4676
4677 const uint8_t * pSource = pszLMBCS;
4678 const uint8_t * sourceLimit = pszLMBCS + sizeof(pszLMBCS);
4679
4680 UChar Out [sizeof(pszUnicode) + 1];
4681 UChar * pOut = Out;
4682 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
4683
4684 int32_t off [sizeof(offsets)];
4685
4686 /* last 'offset' in expected results is just the final size.
4687 (Makes other tests easier). Compensate here: */
4688
4689 off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS);
4690
4691
4692
4693 cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */
4694 if(U_FAILURE(errorCode)) {
4695 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
4696 return;
4697 }
4698
4699
4700
4701 ucnv_toUnicode (cnv,
4702 &pOut,
4703 OutLimit,
4704 (const char **)&pSource,
4705 (const char *)sourceLimit,
4706 off,
4707 TRUE,
4708 &errorCode);
4709
4710
4711 if (memcmp(off,offsets,sizeof(offsets)))
4712 {
4713 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4714 }
4715 if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
4716 {
4717 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4718 }
4719 ucnv_close(cnv);
4720 }
4721 {
4722 /* LMBCS to Unicode - getNextUChar */
4723 const char * sourceStart;
4724 const char *source=(const char *)pszLMBCS;
4725 const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
4726 const UChar32 *results= pszUnicode32;
4727 const int *off = offsets32;
4728
4729 UErrorCode errorCode=U_ZERO_ERROR;
4730 UChar32 uniChar;
4731
4732 cnv=ucnv_open("LMBCS-1", &errorCode);
4733 if(U_FAILURE(errorCode)) {
4734 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4735 return;
4736 }
4737 else
4738 {
4739
4740 while(source<limit) {
4741 sourceStart=source;
4742 uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
4743 if(U_FAILURE(errorCode)) {
4744 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
4745 break;
4746 } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
4747 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
4748 uniChar, (source-sourceStart), *results, *off);
4749 break;
4750 }
4751 results++;
4752 off++;
4753 }
4754 }
4755 ucnv_close(cnv);
4756 }
4757 { /* test locale & optimization group operations: Unicode to LMBCS */
4758
4759 UErrorCode errorCode=U_ZERO_ERROR;
4760 UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
4761 UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
4762 UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
4763 UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */
4764 const UChar * pUniOut = uniString;
4765 UChar * pUniIn = uniString;
4766 uint8_t lmbcsString [4];
4767 const uint8_t * pLMBCSOut = lmbcsString;
4768 uint8_t * pLMBCSIn = lmbcsString;
4769
4770 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
4771 ucnv_fromUnicode (cnv16he,
4772 (char **)&pLMBCSIn, (const char *)(pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
4773 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
4774 NULL, 1, &errorCode);
4775
4776 if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
4777 {
4778 log_err("LMBCS-16,locale=he gives unexpected translation\n");
4779 }
4780
4781 pLMBCSIn=lmbcsString;
4782 pUniOut = uniString;
4783 ucnv_fromUnicode (cnv01us,
4784 (char **)&pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
4785 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
4786 NULL, 1, &errorCode);
4787
4788 if (lmbcsString[0] != 0x9F)
4789 {
4790 log_err("LMBCS-1,locale=US gives unexpected translation\n");
4791 }
4792
4793 /* single byte char from mbcs char set */
4794 lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */
4795 pLMBCSOut = lmbcsString;
4796 pUniIn = uniString;
4797 ucnv_toUnicode (cnv16jp,
4798 &pUniIn, pUniIn + 1,
4799 (const char **)&pLMBCSOut, (const char *)(pLMBCSOut + 1),
4800 NULL, 1, &errorCode);
4801 if (U_FAILURE(errorCode) || pLMBCSOut != lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
4802 {
4803 log_err("Unexpected results from LMBCS-16 single byte char\n");
4804 }
4805 /* convert to group 1: should be 3 bytes */
4806 pLMBCSIn = lmbcsString;
4807 pUniOut = uniString;
4808 ucnv_fromUnicode (cnv01us,
4809 (char **)&pLMBCSIn, (const char *)(pLMBCSIn + 3),
4810 &pUniOut, pUniOut + 1,
4811 NULL, 1, &errorCode);
4812 if (U_FAILURE(errorCode) || pLMBCSIn != lmbcsString+3 || pUniOut != uniString+1
4813 || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
4814 {
4815 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
4816 }
4817 pLMBCSOut = lmbcsString;
4818 pUniIn = uniString;
4819 ucnv_toUnicode (cnv01us,
4820 &pUniIn, pUniIn + 1,
4821 (const char **)&pLMBCSOut, (const char *)(pLMBCSOut + 3),
4822 NULL, 1, &errorCode);
4823 if (U_FAILURE(errorCode) || pLMBCSOut != lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
4824 {
4825 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
4826 }
4827 pLMBCSIn = lmbcsString;
4828 pUniOut = uniString;
4829 ucnv_fromUnicode (cnv16jp,
4830 (char **)&pLMBCSIn, (const char *)(pLMBCSIn + 1),
4831 &pUniOut, pUniOut + 1,
4832 NULL, 1, &errorCode);
4833 if (U_FAILURE(errorCode) || pLMBCSIn != lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
4834 {
4835 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
4836 }
4837 ucnv_close(cnv16he);
4838 ucnv_close(cnv16jp);
4839 ucnv_close(cnv01us);
4840 }
4841 {
4842 /* Small source buffer testing, LMBCS -> Unicode */
4843
4844 UErrorCode errorCode=U_ZERO_ERROR;
4845
4846 const uint8_t * pSource = pszLMBCS;
4847 const uint8_t * sourceLimit = pszLMBCS + sizeof(pszLMBCS);
4848 int codepointCount = 0;
4849
4850 UChar Out [sizeof(pszUnicode) + 1];
4851 UChar * pOut = Out;
4852 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
4853
4854
4855 cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
4856 if(U_FAILURE(errorCode)) {
4857 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4858 return;
4859 }
4860
4861
4862 while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
4863 {
4864 ucnv_toUnicode (cnv,
4865 &pOut,
4866 OutLimit,
4867 (const char **)&pSource,
4868 (const char *)(pSource+1), /* claim that this is a 1- byte buffer */
4869 NULL,
4870 FALSE, /* FALSE means there might be more chars in the next buffer */
4871 &errorCode);
4872
4873 if (U_SUCCESS (errorCode))
4874 {
4875 if ((pSource - (const uint8_t *)pszLMBCS) == offsets [codepointCount+1])
4876 {
4877 /* we are on to the next code point: check value */
4878
4879 if (Out[0] != pszUnicode[codepointCount]){
4880 log_err("LMBCS->Uni result %lx should have been %lx \n",
4881 Out[0], pszUnicode[codepointCount]);
4882 }
4883
4884 pOut = Out; /* reset for accumulating next code point */
4885 codepointCount++;
4886 }
4887 }
4888 else
4889 {
4890 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
4891 }
4892 }
4893 {
4894 /* limits & surrogate error testing */
4895 uint8_t LIn [sizeof(pszLMBCS)];
4896 const uint8_t * pLIn = LIn;
4897
4898 char LOut [sizeof(pszLMBCS)];
4899 char * pLOut = LOut;
4900
4901 UChar UOut [sizeof(pszUnicode)];
4902 UChar * pUOut = UOut;
4903
4904 UChar UIn [sizeof(pszUnicode)];
4905 const UChar * pUIn = UIn;
4906
4907 int32_t off [sizeof(offsets)];
4908 UChar32 uniChar;
4909
4910 errorCode=U_ZERO_ERROR;
4911
4912 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
4913 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn-1,off,FALSE, &errorCode);
4914 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
4915 {
4916 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
4917 }
4918 errorCode=U_ZERO_ERROR;
4919 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode);
4920 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
4921 {
4922 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
4923 }
4924 errorCode=U_ZERO_ERROR;
4925
4926 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
4927 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
4928 {
4929 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
4930 }
4931 errorCode=U_ZERO_ERROR;
4932
4933 /* 0 byte source request - no error, no pointer movement */
4934 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode);
4935 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode);
4936 if(U_FAILURE(errorCode)) {
4937 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
4938 }
4939 if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
4940 {
4941 log_err("Unexpected pointer move in 0 byte source request \n");
4942 }
4943 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
4944 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
4945 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
4946 {
4947 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
4948 }
4949 if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
4950 {
4951 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
4952 }
4953 errorCode = U_ZERO_ERROR;
4954
4955 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
4956
4957 pUIn = pszUnicode;
4958 ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode);
4959 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
4960 {
4961 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
4962 }
4963
4964 errorCode = U_ZERO_ERROR;
4965
4966 pLIn = pszLMBCS;
4967 ucnv_toUnicode(cnv, &pUOut,pUOut+4,(const char **)&pLIn,(const char *)(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode);
4968 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const uint8_t *)pszLMBCS+offsets[4])
4969 {
4970 log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
4971 }
4972
4973 /* unpaired or chopped LMBCS surrogates */
4974
4975 /* OK high surrogate, Low surrogate is chopped */
4976 LIn [0] = 0x14;
4977 LIn [1] = 0xD8;
4978 LIn [2] = 0x01;
4979 LIn [3] = 0x14;
4980 LIn [4] = 0xDC;
4981 pLIn = LIn;
4982 errorCode = U_ZERO_ERROR;
4983 pUOut = UOut;
4984
4985 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
4986 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
4987 {
4988 log_err("Unexpected results on chopped low surrogate\n");
4989 }
4990
4991 /* chopped at surrogate boundary */
4992 LIn [0] = 0x14;
4993 LIn [1] = 0xD8;
4994 LIn [2] = 0x01;
4995 pLIn = LIn;
4996 errorCode = U_ZERO_ERROR;
4997 pUOut = UOut;
4998
4999 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode);
5000 if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
5001 {
5002 log_err("Unexpected results on chopped at surrogate boundary \n");
5003 }
5004
5005 /* unpaired surrogate plus valid Unichar */
5006 LIn [0] = 0x14;
5007 LIn [1] = 0xD8;
5008 LIn [2] = 0x01;
5009 LIn [3] = 0x14;
5010 LIn [4] = 0xC9;
5011 LIn [5] = 0xD0;
5012 pLIn = LIn;
5013 errorCode = U_ZERO_ERROR;
5014 pUOut = UOut;
5015
5016 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode);
5017 if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
5018 {
5019 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5020 }
5021
5022 /* unpaired surrogate plus chopped Unichar */
5023 LIn [0] = 0x14;
5024 LIn [1] = 0xD8;
5025 LIn [2] = 0x01;
5026 LIn [3] = 0x14;
5027 LIn [4] = 0xC9;
5028
5029 pLIn = LIn;
5030 errorCode = U_ZERO_ERROR;
5031 pUOut = UOut;
5032
5033 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5034 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5035 {
5036 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5037 }
5038
5039 /* unpaired surrogate plus valid non-Unichar */
5040 LIn [0] = 0x14;
5041 LIn [1] = 0xD8;
5042 LIn [2] = 0x01;
5043 LIn [3] = 0x0F;
5044 LIn [4] = 0x3B;
5045
5046 pLIn = LIn;
5047 errorCode = U_ZERO_ERROR;
5048 pUOut = UOut;
5049
5050 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5051 if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
5052 {
5053 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5054 }
5055
5056 /* unpaired surrogate plus chopped non-Unichar */
5057 LIn [0] = 0x14;
5058 LIn [1] = 0xD8;
5059 LIn [2] = 0x01;
5060 LIn [3] = 0x0F;
5061
5062 pLIn = LIn;
5063 errorCode = U_ZERO_ERROR;
5064 pUOut = UOut;
5065
5066 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode);
5067
5068 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
5069 {
5070 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5071 }
5072 }
5073 }
5074 ucnv_close(cnv); /* final cleanup */
5075 }
5076
5077
5078 static void TestJitterbug255()
5079 {
5080 const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5081 const uint8_t *testBuffer = testBytes;
5082 const uint8_t *testEnd = testBytes + sizeof(testBytes);
5083 UErrorCode status = U_ZERO_ERROR;
5084 UChar32 result;
5085 UConverter *cnv = 0;
5086
5087 cnv = ucnv_open("shift-jis", &status);
5088 if (U_FAILURE(status) || cnv == 0) {
5089 log_data_err("Failed to open the converter for SJIS.\n");
5090 return;
5091 }
5092 while (testBuffer != testEnd)
5093 {
5094 result = ucnv_getNextUChar (cnv, (const char **)&testBuffer, (const char *)testEnd , &status);
5095 if (U_FAILURE(status))
5096 {
5097 log_err("Failed to convert the next UChar for SJIS.\n");
5098 break;
5099 }
5100 }
5101 ucnv_close(cnv);
5102 }
5103
5104 static void TestEBCDICUS4XML()
5105 {
5106 UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
5107 static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
5108 static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
5109 static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
5110 char target_x[] = {0x00, 0x00, 0x00, 0x00};
5111 UChar *unicodes = unicodes_x;
5112 const UChar *toUnicodeMaps = toUnicodeMaps_x;
5113 char *target = target_x;
5114 const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
5115 UErrorCode status = U_ZERO_ERROR;
5116 UConverter *cnv = 0;
5117
5118 cnv = ucnv_open("ebcdic-xml-us", &status);
5119 if (U_FAILURE(status) || cnv == 0) {
5120 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5121 return;
5122 }
5123 ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status);
5124 if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
5125 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5126 u_errorName(status));
5127 printUSeqErr(unicodes_x, 3);
5128 printUSeqErr(toUnicodeMaps, 3);
5129 }
5130 status = U_ZERO_ERROR;
5131 ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status);
5132 if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
5133 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5134 u_errorName(status));
5135 printSeqErr((const unsigned char*)target_x, 3);
5136 printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
5137 }
5138 ucnv_close(cnv);
5139 }
5140
5141 #if !UCONFIG_NO_COLLATION
5142
5143 static void TestJitterbug981(){
5144 const UChar* rules;
5145 int32_t rules_length, target_cap, bytes_needed;
5146 UErrorCode status = U_ZERO_ERROR;
5147 UConverter *utf8cnv;
5148 UCollator* myCollator;
5149 char buff[50000];
5150 int numNeeded=0;
5151 utf8cnv = ucnv_open ("utf8", &status);
5152 if(U_FAILURE(status)){
5153 log_err("Could not open UTF-8 converter. Error: %s", u_errorName(status));
5154 return;
5155 }
5156 myCollator = ucol_open("zh", &status);
5157 if(U_FAILURE(status)){
5158 log_err("Could not open collator for zh locale. Error: %s", u_errorName(status));
5159 return;
5160 }
5161
5162 rules = ucol_getRules(myCollator, &rules_length);
5163
5164 target_cap = 0;
5165 do {
5166 ucnv_reset(utf8cnv);
5167 status = U_ZERO_ERROR;
5168 bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
5169 rules, rules_length, &status);
5170 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5171 if(numNeeded!=0 && numNeeded!= bytes_needed){
5172 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5173 }
5174 numNeeded = bytes_needed;
5175 } while (status == U_BUFFER_OVERFLOW_ERROR);
5176 ucol_close(myCollator);
5177 ucnv_close(utf8cnv);
5178 }
5179
5180 #endif
5181
5182 static void TestJitterbug1293(){
5183 UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5184 char target[256];
5185 UErrorCode status = U_ZERO_ERROR;
5186 UConverter* conv=NULL;
5187 int32_t target_cap, bytes_needed, numNeeded = 0;
5188 conv = ucnv_open("shift-jis",&status);
5189 if(U_FAILURE(status)){
5190 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status));
5191 return;
5192 }
5193
5194 do{
5195 target_cap =0;
5196 bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status);
5197 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5198 if(numNeeded!=0 && numNeeded!= bytes_needed){
5199 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5200 }
5201 numNeeded = bytes_needed;
5202 } while (status == U_BUFFER_OVERFLOW_ERROR);
5203 if(U_FAILURE(status)){
5204 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status));
5205 return;
5206 }
5207 ucnv_close(conv);
5208 }
5209
5210 #endif