]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/nucnvtst.c
ICU-6.2.8.tar.gz
[apple/icu.git] / icuSources / test / cintltst / nucnvtst.c
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2004, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
7 *
8 * File CCONVTST.C
9 *
10 * Modification History:
11 * Name Description
12 * Steven R. Loomis 7/8/1999 Adding input buffer test
13 *********************************************************************************
14 */
15 #include <stdio.h>
16 #include "cstring.h"
17 #include "unicode/uloc.h"
18 #include "unicode/ucnv.h"
19 #include "unicode/ucnv_err.h"
20 #include "cintltst.h"
21 #include "unicode/utypes.h"
22 #include "unicode/ustring.h"
23 #include "unicode/ucol.h"
24 #include "cmemory.h"
25
26 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
27 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
28 #if !UCONFIG_NO_COLLATION
29 static void TestJitterbug981(void);
30 #endif
31 static void TestJitterbug1293(void);
32 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
33 static void TestConverterTypesAndStarters(void);
34 static void TestAmbiguous(void);
35 static void TestSignatureDetection(void);
36 static void TestUTF7(void);
37 static void TestIMAP(void);
38 static void TestUTF8(void);
39 static void TestCESU8(void);
40 static void TestUTF16(void);
41 static void TestUTF16BE(void);
42 static void TestUTF16LE(void);
43 static void TestUTF32(void);
44 static void TestUTF32BE(void);
45 static void TestUTF32LE(void);
46 static void TestLATIN1(void);
47 static void TestSBCS(void);
48 static void TestDBCS(void);
49 static void TestMBCS(void);
50 #ifdef U_ENABLE_GENERIC_ISO_2022
51 static void TestISO_2022(void);
52 #endif
53 static void TestISO_2022_JP(void);
54 static void TestISO_2022_JP_1(void);
55 static void TestISO_2022_JP_2(void);
56 static void TestISO_2022_KR(void);
57 static void TestISO_2022_KR_1(void);
58 static void TestISO_2022_CN(void);
59 static void TestISO_2022_CN_EXT(void);
60 static void TestJIS(void);
61 static void TestHZ(void);
62 static void TestSCSU(void);
63 static void TestEBCDIC_STATEFUL(void);
64 static void TestGB18030(void);
65 static void TestLMBCS(void);
66 static void TestJitterbug255(void);
67 static void TestEBCDICUS4XML(void);
68 static void TestJitterbug915(void);
69 static void TestISCII(void);
70 static void TestConv(const uint16_t in[],
71 int len,
72 const char* conv,
73 const char* lang,
74 char byteArr[],
75 int byteArrLen);
76 static void TestRoundTrippingAllUTF(void);
77 static void TestCoverageMBCS(void);
78 static void TestJitterbug2346(void);
79 static void TestJitterbug2411(void);
80 void addTestNewConvert(TestNode** root);
81
82 /* open a converter, using test data if it begins with '@' */
83 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
84
85
86 #define NEW_MAX_BUFFER 999
87
88 static int32_t gInBufferSize = NEW_MAX_BUFFER;
89 static int32_t gOutBufferSize = NEW_MAX_BUFFER;
90 static char gNuConvTestName[1024];
91
92 #define nct_min(x,y) ((x<y) ? x : y)
93
94 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
95 {
96 if(cnv && cnv[0] == '@') {
97 return ucnv_openPackage(loadTestData(err), cnv+1, err);
98 } else {
99 return ucnv_open(cnv, err);
100 }
101 }
102
103 static void printSeq(const unsigned char* a, int len)
104 {
105 int i=0;
106 log_verbose("{");
107 while (i<len)
108 log_verbose("0x%02x ", a[i++]);
109 log_verbose("}\n");
110 }
111
112 static void printUSeq(const UChar* a, int len)
113 {
114 int i=0;
115 log_verbose("{U+");
116 while (i<len) log_verbose("0x%04x ", a[i++]);
117 log_verbose("}\n");
118 }
119
120 static void printSeqErr(const unsigned char* a, int len)
121 {
122 int i=0;
123 fprintf(stderr, "{");
124 while (i<len)
125 fprintf(stderr, "0x%02x ", a[i++]);
126 fprintf(stderr, "}\n");
127 }
128
129 static void printUSeqErr(const UChar* a, int len)
130 {
131 int i=0;
132 fprintf(stderr, "{U+");
133 while (i<len)
134 fprintf(stderr, "0x%04x ", a[i++]);
135 fprintf(stderr,"}\n");
136 }
137
138 static void
139 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
140 {
141 const char* s0;
142 const char* s=(char*)source;
143 const int32_t *r=results;
144 UErrorCode errorCode=U_ZERO_ERROR;
145 UChar32 c;
146
147 while(s<limit) {
148 s0=s;
149 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
150 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
151 break; /* no more significant input */
152 } else if(U_FAILURE(errorCode)) {
153 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
154 break;
155 } else if(
156 /* test the expected number of input bytes only if >=0 */
157 (*r>=0 && (int32_t)(s-s0)!=*r) ||
158 c!=*(r+1)
159 ) {
160 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
161 message, c, (s-s0), *(r+1), *r);
162 break;
163 }
164 r+=2;
165 }
166 }
167
168 static void
169 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
170 {
171 const char* s=(char*)source;
172 UErrorCode errorCode=U_ZERO_ERROR;
173 uint32_t c;
174 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
175 if(errorCode != expected){
176 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
177 }
178 if(c != 0xFFFD && c != 0xffff){
179 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
180 }
181
182 }
183
184 static void TestInBufSizes(void)
185 {
186 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
187 #if 1
188 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
189 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
190 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
191 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
192 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
193 TestNewConvertWithBufferSizes(1,1);
194 TestNewConvertWithBufferSizes(2,3);
195 TestNewConvertWithBufferSizes(3,2);
196 #endif
197 }
198
199 static void TestOutBufSizes(void)
200 {
201 #if 1
202 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
203 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
204 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
205 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
206 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
207 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
208
209 #endif
210 }
211
212
213 void addTestNewConvert(TestNode** root)
214 {
215 addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
216 addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
217 addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
218 addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
219 addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
220 addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
221 addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
222 addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
223
224 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
225 addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
226 addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
227 addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
228 addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
229 addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
230 addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
231 addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
232 addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
233
234 addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
235 addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
236 addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
237 addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
238 #ifdef U_ENABLE_GENERIC_ISO_2022
239 addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
240 #endif
241 addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
242 addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
243 addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
244 addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
245 addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
246 addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
247 addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
248 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
249 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
250 addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
251 addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
252 addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
253 addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
254 addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
255 addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
256 addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
257 #if !UCONFIG_NO_COLLATION
258 addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
259 #endif
260 addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
261 addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
262 addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
263 addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
264 addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
265
266 }
267
268
269 /* Note that this test already makes use of statics, so it's not really
270 multithread safe.
271 This convenience function lets us make the error messages actually useful.
272 */
273
274 static void setNuConvTestName(const char *codepage, const char *direction)
275 {
276 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
277 codepage,
278 direction,
279 (int)gInBufferSize,
280 (int)gOutBufferSize);
281 }
282
283 typedef enum
284 {
285 TC_OK = 0, /* test was OK */
286 TC_MISMATCH = 1, /* Match failed - err was printed */
287 TC_FAIL = 2 /* Test failed, don't print an err because it was already printed. */
288 } ETestConvertResult;
289
290 /* Note: This function uses global variables and it will not do offset
291 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
292 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
293 const char *codepage, const int32_t *expectOffsets , UBool useFallback)
294 {
295 UErrorCode status = U_ZERO_ERROR;
296 UConverter *conv = 0;
297 uint8_t junkout[NEW_MAX_BUFFER]; /* FIX */
298 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
299 uint8_t *p;
300 const UChar *src;
301 uint8_t *end;
302 uint8_t *targ;
303 int32_t *offs;
304 int i;
305 int32_t realBufferSize;
306 uint8_t *realBufferEnd;
307 const UChar *realSourceEnd;
308 const UChar *sourceLimit;
309 UBool checkOffsets = TRUE;
310 UBool doFlush;
311
312 for(i=0;i<NEW_MAX_BUFFER;i++)
313 junkout[i] = 0xF0;
314 for(i=0;i<NEW_MAX_BUFFER;i++)
315 junokout[i] = 0xFF;
316
317 setNuConvTestName(codepage, "FROM");
318
319 log_verbose("\n========= %s\n", gNuConvTestName);
320
321 conv = my_ucnv_open(codepage, &status);
322
323 if(U_FAILURE(status))
324 {
325 log_data_err("Couldn't open converter %s\n",codepage);
326 return TC_FAIL;
327 }
328 if(useFallback){
329 ucnv_setFallback(conv,useFallback);
330 }
331
332 log_verbose("Converter opened..\n");
333
334 src = source;
335 targ = junkout;
336 offs = junokout;
337
338 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
339 realBufferEnd = junkout + realBufferSize;
340 realSourceEnd = source + sourceLen;
341
342 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
343 checkOffsets = FALSE;
344
345 do
346 {
347 end = nct_min(targ + gOutBufferSize, realBufferEnd);
348 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
349
350 doFlush = (UBool)(sourceLimit == realSourceEnd);
351
352 if(targ == realBufferEnd) {
353 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
354 return TC_FAIL;
355 }
356 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
357
358
359 status = U_ZERO_ERROR;
360
361 ucnv_fromUnicode (conv,
362 (char **)&targ,
363 (const char*)end,
364 &src,
365 sourceLimit,
366 checkOffsets ? offs : NULL,
367 doFlush, /* flush if we're at the end of the input data */
368 &status);
369 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
370
371 if(U_FAILURE(status)) {
372 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
373 return TC_FAIL;
374 }
375
376 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
377 sourceLen, targ-junkout);
378
379 if(VERBOSITY)
380 {
381 char junk[9999];
382 char offset_str[9999];
383 uint8_t *ptr;
384
385 junk[0] = 0;
386 offset_str[0] = 0;
387 for(ptr = junkout;ptr<targ;ptr++) {
388 sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
389 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
390 }
391
392 log_verbose(junk);
393 printSeq((const uint8_t *)expect, expectLen);
394 if ( checkOffsets ) {
395 log_verbose("\nOffsets:");
396 log_verbose(offset_str);
397 }
398 log_verbose("\n");
399 }
400 ucnv_close(conv);
401
402 if(expectLen != targ-junkout) {
403 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
404 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
405 printf("\nGot:");
406 printSeqErr((const unsigned char*)junkout, targ-junkout);
407 printf("\nExpected:");
408 printSeqErr((const unsigned char*)expect, expectLen);
409 return TC_MISMATCH;
410 }
411
412 if (checkOffsets && (expectOffsets != 0) ) {
413 log_verbose("comparing %d offsets..\n", targ-junkout);
414 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
415 log_err("did not get the expected offsets. %s\n", gNuConvTestName);
416 printSeqErr((const unsigned char*)junkout, targ-junkout);
417 log_err("\n");
418 log_err("Got : ");
419 for(p=junkout;p<targ;p++) {
420 log_err("%d,", junokout[p-junkout]);
421 }
422 log_err("\n");
423 log_err("Expected: ");
424 for(i=0; i<(targ-junkout); i++) {
425 log_err("%d,", expectOffsets[i]);
426 }
427 log_err("\n");
428 }
429 }
430
431 log_verbose("comparing..\n");
432 if(!memcmp(junkout, expect, expectLen)) {
433 log_verbose("Matches!\n");
434 return TC_OK;
435 } else {
436 log_err("String does not match u->%s\n", gNuConvTestName);
437 printUSeqErr(source, sourceLen);
438 printf("\nGot:");
439 printSeqErr((const unsigned char *)junkout, expectLen);
440 printf("\nExpected:");
441 printSeqErr((const unsigned char *)expect, expectLen);
442
443 return TC_MISMATCH;
444 }
445 }
446
447 /* Note: This function uses global variables and it will not do offset
448 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
449 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
450 const char *codepage, const int32_t *expectOffsets, UBool useFallback)
451 {
452 UErrorCode status = U_ZERO_ERROR;
453 UConverter *conv = 0;
454 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
455 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
456 const uint8_t *src;
457 const uint8_t *realSourceEnd;
458 const uint8_t *srcLimit;
459 UChar *p;
460 UChar *targ;
461 UChar *end;
462 int32_t *offs;
463 int i;
464 UBool checkOffsets = TRUE;
465
466 int32_t realBufferSize;
467 UChar *realBufferEnd;
468
469
470 for(i=0;i<NEW_MAX_BUFFER;i++)
471 junkout[i] = 0xFFFE;
472
473 for(i=0;i<NEW_MAX_BUFFER;i++)
474 junokout[i] = -1;
475
476 setNuConvTestName(codepage, "TO");
477
478 log_verbose("\n========= %s\n", gNuConvTestName);
479
480 conv = my_ucnv_open(codepage, &status);
481
482 if(U_FAILURE(status))
483 {
484 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
485 return TC_FAIL;
486 }
487 if(useFallback){
488 ucnv_setFallback(conv,useFallback);
489 }
490 log_verbose("Converter opened..\n");
491
492 src = source;
493 targ = junkout;
494 offs = junokout;
495
496 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
497 realBufferEnd = junkout + realBufferSize;
498 realSourceEnd = src + sourcelen;
499
500 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
501 checkOffsets = FALSE;
502
503 do
504 {
505 end = nct_min( targ + gOutBufferSize, realBufferEnd);
506 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
507
508 if(targ == realBufferEnd)
509 {
510 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
511 return TC_FAIL;
512 }
513 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
514
515 /* oldTarg = targ; */
516
517 status = U_ZERO_ERROR;
518
519 ucnv_toUnicode (conv,
520 &targ,
521 end,
522 (const char **)&src,
523 (const char *)srcLimit,
524 checkOffsets ? offs : NULL,
525 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */
526 &status);
527
528 /* offs += (targ-oldTarg); */
529
530 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
531
532 if(U_FAILURE(status))
533 {
534 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
535 return TC_FAIL;
536 }
537
538 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
539 sourcelen, targ-junkout);
540 if(VERBOSITY)
541 {
542 char junk[9999];
543 char offset_str[9999];
544 UChar *ptr;
545
546 junk[0] = 0;
547 offset_str[0] = 0;
548
549 for(ptr = junkout;ptr<targ;ptr++)
550 {
551 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
552 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
553 }
554
555 log_verbose(junk);
556 printUSeq(expect, expectlen);
557 if ( checkOffsets )
558 {
559 log_verbose("\nOffsets:");
560 log_verbose(offset_str);
561 }
562 log_verbose("\n");
563 }
564 ucnv_close(conv);
565
566 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
567
568 if (checkOffsets && (expectOffsets != 0))
569 {
570 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
571 log_err("did not get the expected offsets. %s\n",gNuConvTestName);
572 log_err("Got: ");
573 for(p=junkout;p<targ;p++) {
574 log_err("%d,", junokout[p-junkout]);
575 }
576 log_err("\n");
577 log_err("Expected: ");
578 for(i=0; i<(targ-junkout); i++) {
579 log_err("%d,", expectOffsets[i]);
580 }
581 log_err("\n");
582 log_err("output: ");
583 for(i=0; i<(targ-junkout); i++) {
584 log_err("%X,", junkout[i]);
585 }
586 log_err("\n");
587 log_err("input: ");
588 for(i=0; i<(src-source); i++) {
589 log_err("%X,", (unsigned char)source[i]);
590 }
591 log_err("\n");
592 }
593 }
594
595 if(!memcmp(junkout, expect, expectlen*2))
596 {
597 log_verbose("Matches!\n");
598 return TC_OK;
599 }
600 else
601 {
602 log_err("String does not match. %s\n", gNuConvTestName);
603 log_verbose("String does not match. %s\n", gNuConvTestName);
604 printf("\nGot:");
605 printUSeqErr(junkout, expectlen);
606 printf("\nExpected:");
607 printUSeqErr(expect, expectlen);
608 return TC_MISMATCH;
609 }
610 }
611
612
613 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
614 {
615 /** test chars #1 */
616 /* 1 2 3 1Han 2Han 3Han . */
617 UChar sampleText[] =
618 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E };
619
620
621 const uint8_t expectedUTF8[] =
622 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
623 int32_t toUTF8Offs[] =
624 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07};
625 int32_t fmUTF8Offs[] =
626 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };
627
628 #ifdef U_ENABLE_GENERIC_ISO_2022
629 /* Same as UTF8, but with ^[%B preceeding */
630 const uint8_t expectedISO2022[] =
631 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
632 int32_t toISO2022Offs[] =
633 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
634 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
635 int32_t fmISO2022Offs[] =
636 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
637 #endif
638
639 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
640 const uint8_t expectedIBM930[] =
641 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B };
642 int32_t toIBM930Offs[] =
643 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, };
644 int32_t fmIBM930Offs[] =
645 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c};
646
647 /* 1 2 3 0 h1 h2 h3 . MBCS*/
648 const uint8_t expectedIBM943[] =
649 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e };
650 int32_t toIBM943Offs [] =
651 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07 };
652 int32_t fmIBM943Offs[] =
653 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a};
654
655 /* 1 2 3 0 h1 h2 h3 . DBCS*/
656 const uint8_t expectedIBM9027[] =
657 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe};
658 int32_t toIBM9027Offs [] =
659 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
660
661 /* 1 2 3 0 <?> <?> <?> . SBCS*/
662 const uint8_t expectedIBM920[] =
663 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e };
664 int32_t toIBM920Offs [] =
665 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
666
667 /* 1 2 3 0 <?> <?> <?> . SBCS*/
668 const uint8_t expectedISO88593[] =
669 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E };
670 int32_t toISO88593Offs[] =
671 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
672
673 /* 1 2 3 0 <?> <?> <?> . LATIN_1*/
674 const uint8_t expectedLATIN1[] =
675 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E };
676 int32_t toLATIN1Offs[] =
677 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
678
679
680 /* etc */
681 const uint8_t expectedUTF16BE[] =
682 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e };
683 int32_t toUTF16BEOffs[]=
684 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
685 int32_t fmUTF16BEOffs[] =
686 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e };
687
688 const uint8_t expectedUTF16LE[] =
689 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00 };
690 int32_t toUTF16LEOffs[]=
691 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
692 int32_t fmUTF16LEOffs[] =
693 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e };
694
695 const uint8_t expectedUTF32BE[] =
696 { 0x00, 0x00, 0x00, 0x31,
697 0x00, 0x00, 0x00, 0x32,
698 0x00, 0x00, 0x00, 0x33,
699 0x00, 0x00, 0x00, 0x00,
700 0x00, 0x00, 0x4e, 0x00,
701 0x00, 0x00, 0x4e, 0x8c,
702 0x00, 0x00, 0x4e, 0x09,
703 0x00, 0x00, 0x00, 0x2e };
704 int32_t toUTF32BEOffs[]=
705 { 0x00, 0x00, 0x00, 0x00,
706 0x01, 0x01, 0x01, 0x01,
707 0x02, 0x02, 0x02, 0x02,
708 0x03, 0x03, 0x03, 0x03,
709 0x04, 0x04, 0x04, 0x04,
710 0x05, 0x05, 0x05, 0x05,
711 0x06, 0x06, 0x06, 0x06,
712 0x07, 0x07, 0x07, 0x07,
713 0x08, 0x08, 0x08, 0x08 };
714 int32_t fmUTF32BEOffs[] =
715 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c };
716
717 const uint8_t expectedUTF32LE[] =
718 { 0x31, 0x00, 0x00, 0x00,
719 0x32, 0x00, 0x00, 0x00,
720 0x33, 0x00, 0x00, 0x00,
721 0x00, 0x00, 0x00, 0x00,
722 0x00, 0x4e, 0x00, 0x00,
723 0x8c, 0x4e, 0x00, 0x00,
724 0x09, 0x4e, 0x00, 0x00,
725 0x2e, 0x00, 0x00, 0x00 };
726 int32_t toUTF32LEOffs[]=
727 { 0x00, 0x00, 0x00, 0x00,
728 0x01, 0x01, 0x01, 0x01,
729 0x02, 0x02, 0x02, 0x02,
730 0x03, 0x03, 0x03, 0x03,
731 0x04, 0x04, 0x04, 0x04,
732 0x05, 0x05, 0x05, 0x05,
733 0x06, 0x06, 0x06, 0x06,
734 0x07, 0x07, 0x07, 0x07,
735 0x08, 0x08, 0x08, 0x08 };
736 int32_t fmUTF32LEOffs[] =
737 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c };
738
739
740
741
742 /** Test chars #2 **/
743
744 /* Sahha [health], slashed h's */
745 const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
746 const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
747
748 /* LMBCS */
749 const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
750 const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
751 int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
752 int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
753 /*********************************** START OF CODE finally *************/
754
755 gInBufferSize = insize;
756 gOutBufferSize = outsize;
757
758 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
759
760
761 #if 1
762 /*UTF-8*/
763 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
764 expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE );
765
766 log_verbose("Test surrogate behaviour for UTF8\n");
767 {
768 const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
769 const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
770 0xf0, 0x90, 0x90, 0x81,
771 0xef, 0xbf, 0xbd
772 };
773 int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
774 testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]),
775 expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE );
776
777
778 }
779 #ifdef U_ENABLE_GENERIC_ISO_2022
780 /*ISO-2022*/
781 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
782 expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE );
783 #endif
784 /*UTF16 LE*/
785 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
786 expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE );
787 /*UTF16 BE*/
788 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
789 expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE );
790 /*UTF32 LE*/
791 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
792 expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE );
793 /*UTF32 BE*/
794 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
795 expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE );
796 /*LATIN_1*/
797 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
798 expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE );
799 /*EBCDIC_STATEFUL*/
800 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
801 expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE );
802
803 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
804 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
805
806 /*MBCS*/
807
808 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
809 expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE );
810 /*DBCS*/
811 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
812 expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE );
813 /*SBCS*/
814 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
815 expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE );
816 /*SBCS*/
817 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
818 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
819
820
821 /****/
822 #endif
823
824 #if 1
825 /*UTF-8*/
826 testConvertToU(expectedUTF8, sizeof(expectedUTF8),
827 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE);
828 #ifdef U_ENABLE_GENERIC_ISO_2022
829 /*ISO-2022*/
830 testConvertToU(expectedISO2022, sizeof(expectedISO2022),
831 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE);
832 #endif
833 /*UTF16 LE*/
834 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
835 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
836 /*UTF16 BE*/
837 testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
838 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE);
839 /*UTF32 LE*/
840 testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
841 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE);
842 /*UTF32 BE*/
843 testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
844 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE);
845 /*EBCDIC_STATEFUL*/
846 testConvertToU(expectedIBM930, sizeof(expectedIBM930),
847 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-930", fmIBM930Offs,FALSE);
848 /*MBCS*/
849 testConvertToU(expectedIBM943, sizeof(expectedIBM943),
850 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-943", fmIBM943Offs,FALSE);
851
852 /* Try it again to make sure it still works */
853 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
854 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
855
856 testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
857 malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE);
858
859 testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]),
860 expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE );
861
862 /*LMBCS*/
863 testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]),
864 expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE );
865 testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
866 LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE);
867
868 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
869 {
870 /* encode directly set D and set O */
871 static const uint8_t utf7[] = {
872 /*
873 Hi Mom -+Jjo--!
874 A+ImIDkQ.
875 +-
876 +ZeVnLIqe
877 */
878 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
879 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
880 0x2b, 0x2d,
881 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
882 };
883 static const UChar unicode[] = {
884 /*
885 Hi Mom -<WHITE SMILING FACE>-!
886 A<NOT IDENTICAL TO><ALPHA>.
887 +
888 [Japanese word "nihongo"]
889 */
890 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
891 0x41, 0x2262, 0x0391, 0x2e,
892 0x2b,
893 0x65e5, 0x672c, 0x8a9e
894 };
895 static const int32_t toUnicodeOffsets[] = {
896 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
897 15, 17, 19, 23,
898 24,
899 27, 29, 32
900 };
901 static const int32_t fromUnicodeOffsets[] = {
902 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
903 11, 12, 12, 12, 13, 13, 13, 13, 14,
904 15, 15,
905 16, 16, 16, 17, 17, 17, 18, 18, 18
906 };
907
908 /* same but escaping set O (the exclamation mark) */
909 static const uint8_t utf7Restricted[] = {
910 /*
911 Hi Mom -+Jjo--+ACE-
912 A+ImIDkQ.
913 +-
914 +ZeVnLIqe
915 */
916 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
917 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
918 0x2b, 0x2d,
919 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
920 };
921 static const int32_t toUnicodeOffsetsR[] = {
922 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
923 19, 21, 23, 27,
924 28,
925 31, 33, 36
926 };
927 static const int32_t fromUnicodeOffsetsR[] = {
928 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
929 11, 12, 12, 12, 13, 13, 13, 13, 14,
930 15, 15,
931 16, 16, 16, 17, 17, 17, 18, 18, 18
932 };
933
934 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
935
936 testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE);
937
938 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
939
940 testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
941 }
942
943 /*
944 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
945 * modified according to RFC 2060,
946 * and supplemented with the one example in RFC 2060 itself.
947 */
948 {
949 static const uint8_t imap[] = {
950 /* Hi Mom -&Jjo--!
951 A&ImIDkQ-.
952 &-
953 &ZeVnLIqe-
954 \
955 ~peter
956 /mail
957 /&ZeVnLIqe-
958 /&U,BTFw-
959 */
960 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
961 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
962 0x26, 0x2d,
963 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
964 0x5c,
965 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
966 0x2f, 0x6d, 0x61, 0x69, 0x6c,
967 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
968 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
969 };
970 static const UChar unicode[] = {
971 /* Hi Mom -<WHITE SMILING FACE>-!
972 A<NOT IDENTICAL TO><ALPHA>.
973 &
974 [Japanese word "nihongo"]
975 \
976 ~peter
977 /mail
978 /<65e5, 672c, 8a9e>
979 /<53f0, 5317>
980 */
981 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
982 0x41, 0x2262, 0x0391, 0x2e,
983 0x26,
984 0x65e5, 0x672c, 0x8a9e,
985 0x5c,
986 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
987 0x2f, 0x6d, 0x61, 0x69, 0x6c,
988 0x2f, 0x65e5, 0x672c, 0x8a9e,
989 0x2f, 0x53f0, 0x5317
990 };
991 static const int32_t toUnicodeOffsets[] = {
992 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
993 15, 17, 19, 24,
994 25,
995 28, 30, 33,
996 37,
997 38, 39, 40, 41, 42, 43,
998 44, 45, 46, 47, 48,
999 49, 51, 53, 56,
1000 60, 62, 64
1001 };
1002 static const int32_t fromUnicodeOffsets[] = {
1003 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1004 11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1005 15, 15,
1006 16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1007 19,
1008 20, 21, 22, 23, 24, 25,
1009 26, 27, 28, 29, 30,
1010 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1011 35, 36, 36, 36, 37, 37, 37, 37, 37
1012 };
1013
1014 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
1015
1016 testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
1017 }
1018
1019 /* Test UTF-8 bad data handling*/
1020 {
1021 static const uint8_t utf8[]={
1022 0x61,
1023 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1024 0x00,
1025 0x62,
1026 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1027 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1028 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */
1029 0xdf, 0xbf, /* 7ff */
1030 0xbf, /* truncated tail */
1031 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */
1032 0x02
1033 };
1034
1035 static const uint16_t utf8Expected[]={
1036 0x0061,
1037 0xfffd,
1038 0x0000,
1039 0x0062,
1040 0xfffd,
1041 0xfffd,
1042 0xdbff, 0xdfff,
1043 0x07ff,
1044 0xfffd,
1045 0xfffd,
1046 0x0002
1047 };
1048
1049 static const int32_t utf8Offsets[]={
1050 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28
1051 };
1052 testConvertToU(utf8, sizeof(utf8),
1053 utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE);
1054
1055 }
1056
1057 /* Test UTF-32BE bad data handling*/
1058 {
1059 static const uint8_t utf32[]={
1060 0x00, 0x00, 0x00, 0x61,
1061 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
1062 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1063 0x00, 0x00, 0x00, 0x62,
1064 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1065 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
1066 0x00, 0x00, 0x01, 0x62,
1067 0x00, 0x00, 0x02, 0x62
1068 };
1069
1070 static const uint16_t utf32Expected[]={
1071 0x0061,
1072 0xfffd, /* 0x110000 out of range */
1073 0xDBFF, /* 0x10FFFF in range */
1074 0xDFFF,
1075 0x0062,
1076 0xfffd, /* 0xffffffff out of range */
1077 0xfffd, /* 0x7fffffff out of range */
1078 0x0162,
1079 0x0262
1080 };
1081
1082 static const int32_t utf32Offsets[]={
1083 0, 4, 8, 8, 12, 16, 20, 24, 28
1084 };
1085 testConvertToU(utf32, sizeof(utf32),
1086 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE);
1087
1088 }
1089
1090 /* Test UTF-32LE bad data handling*/
1091 {
1092 static const uint8_t utf32[]={
1093 0x61, 0x00, 0x00, 0x00,
1094 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
1095 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1096 0x62, 0x00, 0x00, 0x00,
1097 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1098 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
1099 0x62, 0x01, 0x00, 0x00,
1100 0x62, 0x02, 0x00, 0x00,
1101 };
1102
1103 static const uint16_t utf32Expected[]={
1104 0x0061,
1105 0xfffd, /* 0x110000 out of range */
1106 0xDBFF, /* 0x10FFFF in range */
1107 0xDFFF,
1108 0x0062,
1109 0xfffd, /* 0xffffffff out of range */
1110 0xfffd, /* 0x7fffffff out of range */
1111 0x0162,
1112 0x0262
1113 };
1114
1115 static const int32_t utf32Offsets[]={
1116 0, 4, 8, 8, 12, 16, 20, 24, 28
1117 };
1118 testConvertToU(utf32, sizeof(utf32),
1119 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE );
1120
1121 }
1122 }
1123
1124 static void TestCoverageMBCS(){
1125 #if 0
1126 UErrorCode status = U_ZERO_ERROR;
1127 const char *directory = loadTestData(&status);
1128 char* tdpath = NULL;
1129 char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1130 int len = strlen(directory);
1131 char* index=NULL;
1132
1133 tdpath = (char*) malloc(sizeof(char) * (len * 2));
1134 uprv_strcpy(saveDirectory,u_getDataDirectory());
1135 log_verbose("Retrieved data directory %s \n",saveDirectory);
1136 uprv_strcpy(tdpath,directory);
1137 index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
1138
1139 if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
1140 *(index+1)=0;
1141 }
1142 u_setDataDirectory(tdpath);
1143 log_verbose("ICU data directory is set to: %s \n" ,tdpath);
1144 #endif
1145
1146 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm
1147 which is test file for MBCS conversion with single-byte codepage data.*/
1148 {
1149
1150 /* MBCS with single byte codepage data test1.ucm*/
1151 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1152 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1153 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, };
1154
1155 /*from Unicode*/
1156 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1157 expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
1158 }
1159
1160 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm
1161 which is test file for MBCS conversion with three-byte codepage data.*/
1162 {
1163
1164 /* MBCS with three byte codepage data test3.ucm*/
1165 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1166 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,};
1167 int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1168
1169 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1170 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1171 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1172
1173 /*from Unicode*/
1174 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1175 expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE );
1176
1177 /*to Unicode*/
1178 testConvertToU(test3input, sizeof(test3input),
1179 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE);
1180
1181 }
1182
1183 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm
1184 which is test file for MBCS conversion with four-byte codepage data.*/
1185 {
1186
1187 /* MBCS with three byte codepage data test4.ucm*/
1188 static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1189 static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,};
1190 static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1191
1192 static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1193 static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1194 static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1195
1196 /*from Unicode*/
1197 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1198 expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE );
1199
1200 /*to Unicode*/
1201 testConvertToU(test4input, sizeof(test4input),
1202 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE );
1203
1204 }
1205 #if 0
1206 free(tdpath);
1207 /* restore the original data directory */
1208 log_verbose("Setting the data directory to %s \n", saveDirectory);
1209 u_setDataDirectory(saveDirectory);
1210 free(saveDirectory);
1211 #endif
1212
1213 }
1214
1215 static void TestConverterType(const char *convName, UConverterType convType) {
1216 UConverter* myConverter;
1217 UErrorCode err = U_ZERO_ERROR;
1218
1219 myConverter = my_ucnv_open(convName, &err);
1220
1221 if (U_FAILURE(err)) {
1222 log_data_err("Failed to create an %s converter\n", convName);
1223 return;
1224 }
1225 else
1226 {
1227 if (ucnv_getType(myConverter)!=convType) {
1228 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1229 convName, convType);
1230 }
1231 else {
1232 log_verbose("ucnv_getType %s ok\n", convName);
1233 }
1234 }
1235 ucnv_close(myConverter);
1236 }
1237
1238 static void TestConverterTypesAndStarters()
1239 {
1240 UConverter* myConverter;
1241 UErrorCode err = U_ZERO_ERROR;
1242 UBool mystarters[256];
1243
1244 /* const UBool expectedKSCstarters[256] = {
1245 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1246 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1247 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1248 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1249 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1250 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1251 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1252 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1253 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1254 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1255 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1256 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1257 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1258 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1259 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1260 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1261 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1262 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1263 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1264 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1265 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1266 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1267 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1268 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1269 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1270 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1271
1272
1273 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types.");
1274
1275 myConverter = ucnv_open("ksc", &err);
1276 if (U_FAILURE(err)) {
1277 log_data_err("Failed to create an ibm-ksc converter\n");
1278 return;
1279 }
1280 else
1281 {
1282 if (ucnv_getType(myConverter)!=UCNV_MBCS)
1283 log_err("ucnv_getType Failed for ibm-949\n");
1284 else
1285 log_verbose("ucnv_getType ibm-949 ok\n");
1286
1287 if(myConverter!=NULL)
1288 ucnv_getStarters(myConverter, mystarters, &err);
1289
1290 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1291 log_err("Failed ucnv_getStarters for ksc\n");
1292 else
1293 log_verbose("ucnv_getStarters ok\n");*/
1294
1295 }
1296 ucnv_close(myConverter);
1297
1298 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
1299 TestConverterType("ibm-878", UCNV_SBCS);
1300 TestConverterType("iso-8859-1", UCNV_LATIN_1);
1301 TestConverterType("ibm-1208", UCNV_UTF8);
1302 TestConverterType("utf-8", UCNV_UTF8);
1303 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
1304 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
1305 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
1306 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
1307 #ifdef U_ENABLE_GENERIC_ISO_2022
1308 TestConverterType("iso-2022", UCNV_ISO_2022);
1309 #endif
1310 TestConverterType("hz", UCNV_HZ);
1311 TestConverterType("scsu", UCNV_SCSU);
1312 TestConverterType("x-iscii-de", UCNV_ISCII);
1313 TestConverterType("ascii", UCNV_US_ASCII);
1314 TestConverterType("utf-7", UCNV_UTF7);
1315 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
1316 TestConverterType("bocu-1", UCNV_BOCU1);
1317 }
1318
1319 static void
1320 TestAmbiguousConverter(UConverter *cnv) {
1321 static const char inBytes[2]={ 0x61, 0x5c };
1322 UChar outUnicode[20]={ 0, 0, 0, 0 };
1323
1324 const char *s;
1325 UChar *u;
1326 UErrorCode errorCode;
1327 UBool isAmbiguous;
1328
1329 /* try to convert an 'a' and a US-ASCII backslash */
1330 errorCode=U_ZERO_ERROR;
1331 s=inBytes;
1332 u=outUnicode;
1333 ucnv_toUnicode(cnv, &u, u+20, &s, s+2, NULL, TRUE, &errorCode);
1334 if(U_FAILURE(errorCode)) {
1335 /* we do not care about general failures in this test; the input may just not be mappable */
1336 return;
1337 }
1338
1339 if(outUnicode[0]!=0x61 || outUnicode[1]==0xfffd) {
1340 /* not an ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1341 return;
1342 }
1343
1344 isAmbiguous=ucnv_isAmbiguous(cnv);
1345
1346 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1347 if((outUnicode[1]!=0x5c)!=isAmbiguous) {
1348 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1349 ucnv_getName(cnv, &errorCode), outUnicode[1]!=0x5c, isAmbiguous);
1350 return;
1351 }
1352
1353 if(outUnicode[1]!=0x5c) {
1354 /* needs fixup, fix it */
1355 ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
1356 if(outUnicode[1]!=0x5c) {
1357 /* the fix failed */
1358 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
1359 return;
1360 }
1361 }
1362 }
1363
1364 static void TestAmbiguous()
1365 {
1366 UErrorCode status = U_ZERO_ERROR;
1367 UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
1368 const char target[] = {
1369 /* "\\usr\\local\\share\\data\\icutest.txt" */
1370 0x5c, 0x75, 0x73, 0x72,
1371 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1372 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1373 0x5c, 0x64, 0x61, 0x74, 0x61,
1374 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1375 0
1376 };
1377 UChar asciiResult[200], sjisResult[200];
1378 int32_t asciiLength = 0, sjisLength = 0, i;
1379 const char *name;
1380
1381 /* enumerate all converters */
1382 status=U_ZERO_ERROR;
1383 for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
1384 cnv=ucnv_open(name, &status);
1385 if(U_SUCCESS(status)) {
1386 TestAmbiguousConverter(cnv);
1387 ucnv_close(cnv);
1388 } else {
1389 log_err("error: unable to open available converter \"%s\"\n", name);
1390 status=U_ZERO_ERROR;
1391 }
1392 }
1393
1394 sjis_cnv = ucnv_open("ibm-943", &status);
1395 if (U_FAILURE(status))
1396 {
1397 log_data_err("Failed to create a SJIS converter\n");
1398 return;
1399 }
1400 ascii_cnv = ucnv_open("LATIN-1", &status);
1401 if (U_FAILURE(status))
1402 {
1403 log_data_err("Failed to create a LATIN-1 converter\n");
1404 ucnv_close(sjis_cnv);
1405 return;
1406 }
1407 /* convert target from SJIS to Unicode */
1408 sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, strlen(target), &status);
1409 if (U_FAILURE(status))
1410 {
1411 log_err("Failed to convert the SJIS string.\n");
1412 ucnv_close(sjis_cnv);
1413 ucnv_close(ascii_cnv);
1414 return;
1415 }
1416 /* convert target from Latin-1 to Unicode */
1417 asciiLength = ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, strlen(target), &status);
1418 if (U_FAILURE(status))
1419 {
1420 log_err("Failed to convert the Latin-1 string.\n");
1421 free(sjisResult);
1422 ucnv_close(sjis_cnv);
1423 ucnv_close(ascii_cnv);
1424 return;
1425 }
1426 if (!ucnv_isAmbiguous(sjis_cnv))
1427 {
1428 log_err("SJIS converter should contain ambiguous character mappings.\n");
1429 free(sjisResult);
1430 free(asciiResult);
1431 ucnv_close(sjis_cnv);
1432 ucnv_close(ascii_cnv);
1433 return;
1434 }
1435 if (u_strcmp(sjisResult, asciiResult) == 0)
1436 {
1437 log_err("File separators for SJIS don't need to be fixed.\n");
1438 }
1439 ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
1440 if (u_strcmp(sjisResult, asciiResult) != 0)
1441 {
1442 log_err("Fixing file separator for SJIS failed.\n");
1443 }
1444 ucnv_close(sjis_cnv);
1445 ucnv_close(ascii_cnv);
1446 }
1447
1448 static void
1449 TestSignatureDetection(){
1450 /* with null terminated strings */
1451 {
1452 static const char* data[] = {
1453 "\xFE\xFF\x00\x00", /* UTF-16BE */
1454 "\xFF\xFE\x00\x00", /* UTF-16LE */
1455 "\xEF\xBB\xBF\x00", /* UTF-8 */
1456 "\x0E\xFE\xFF\x00", /* SCSU */
1457
1458 "\xFE\xFF", /* UTF-16BE */
1459 "\xFF\xFE", /* UTF-16LE */
1460 "\xEF\xBB\xBF", /* UTF-8 */
1461 "\x0E\xFE\xFF", /* SCSU */
1462
1463 "\xFE\xFF\x41\x42", /* UTF-16BE */
1464 "\xFF\xFE\x41\x41", /* UTF-16LE */
1465 "\xEF\xBB\xBF\x41", /* UTF-8 */
1466 "\x0E\xFE\xFF\x41", /* SCSU */
1467
1468 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */
1469 "\x2B\x2F\x76\x38\x41", /* UTF-7 */
1470 "\x2B\x2F\x76\x39\x41", /* UTF-7 */
1471 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */
1472 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */
1473
1474 "\xDD\x73\x66\x73" /* UTF-EBCDIC */
1475 };
1476 static const char* expected[] = {
1477 "UTF-16BE",
1478 "UTF-16LE",
1479 "UTF-8",
1480 "SCSU",
1481
1482 "UTF-16BE",
1483 "UTF-16LE",
1484 "UTF-8",
1485 "SCSU",
1486
1487 "UTF-16BE",
1488 "UTF-16LE",
1489 "UTF-8",
1490 "SCSU",
1491
1492 "UTF-7",
1493 "UTF-7",
1494 "UTF-7",
1495 "UTF-7",
1496 "UTF-7",
1497 "UTF-EBCDIC"
1498 };
1499 static const int32_t expectedLength[] ={
1500 2,
1501 2,
1502 3,
1503 3,
1504
1505 2,
1506 2,
1507 3,
1508 3,
1509
1510 2,
1511 2,
1512 3,
1513 3,
1514
1515 5,
1516 4,
1517 4,
1518 4,
1519 4,
1520 4
1521 };
1522 int i=0;
1523 UErrorCode err;
1524 int32_t signatureLength = -1;
1525 const char* source = NULL;
1526 const char* enc = NULL;
1527 for( ; i<sizeof(data)/sizeof(char*); i++){
1528 err = U_ZERO_ERROR;
1529 source = data[i];
1530 enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
1531 if(U_FAILURE(err)){
1532 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1533 continue;
1534 }
1535 if(enc == NULL || strcmp(enc,expected[i]) !=0){
1536 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1537 continue;
1538 }
1539 if(signatureLength != expectedLength[i]){
1540 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1541 }
1542 }
1543 }
1544 {
1545 static const char* data[] = {
1546 "\xFE\xFF\x00", /* UTF-16BE */
1547 "\xFF\xFE\x00", /* UTF-16LE */
1548 "\xEF\xBB\xBF\x00", /* UTF-8 */
1549 "\x0E\xFE\xFF\x00", /* SCSU */
1550 "\x00\x00\xFE\xFF", /* UTF-32BE */
1551 "\xFF\xFE\x00\x00", /* UTF-32LE */
1552 "\xFE\xFF", /* UTF-16BE */
1553 "\xFF\xFE", /* UTF-16LE */
1554 "\xEF\xBB\xBF", /* UTF-8 */
1555 "\x0E\xFE\xFF", /* SCSU */
1556 "\x00\x00\xFE\xFF", /* UTF-32BE */
1557 "\xFF\xFE\x00\x00", /* UTF-32LE */
1558 "\xFE\xFF\x41\x42", /* UTF-16BE */
1559 "\xFF\xFE\x41\x41", /* UTF-16LE */
1560 "\xEF\xBB\xBF\x41", /* UTF-8 */
1561 "\x0E\xFE\xFF\x41", /* SCSU */
1562 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1563 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1564 "\xFB\xEE\x28", /* BOCU-1 */
1565 "\xFF\x41\x42" /* NULL */
1566 };
1567 static const int len[] = {
1568 3,
1569 3,
1570 4,
1571 4,
1572 4,
1573 4,
1574 2,
1575 2,
1576 3,
1577 3,
1578 4,
1579 4,
1580 4,
1581 4,
1582 4,
1583 4,
1584 5,
1585 5,
1586 3,
1587 3
1588 };
1589
1590 static const char* expected[] = {
1591 "UTF-16BE",
1592 "UTF-16LE",
1593 "UTF-8",
1594 "SCSU",
1595 "UTF-32BE",
1596 "UTF-32LE",
1597 "UTF-16BE",
1598 "UTF-16LE",
1599 "UTF-8",
1600 "SCSU",
1601 "UTF-32BE",
1602 "UTF-32LE",
1603 "UTF-16BE",
1604 "UTF-16LE",
1605 "UTF-8",
1606 "SCSU",
1607 "UTF-32BE",
1608 "UTF-32LE",
1609 "BOCU-1",
1610 NULL
1611 };
1612 static const int32_t expectedLength[] ={
1613 2,
1614 2,
1615 3,
1616 3,
1617 4,
1618 4,
1619 2,
1620 2,
1621 3,
1622 3,
1623 4,
1624 4,
1625 2,
1626 2,
1627 3,
1628 3,
1629 4,
1630 4,
1631 3,
1632 0
1633 };
1634 int i=0;
1635 UErrorCode err;
1636 int32_t signatureLength = -1;
1637 int32_t sourceLength=-1;
1638 const char* source = NULL;
1639 const char* enc = NULL;
1640 for( ; i<sizeof(data)/sizeof(char*); i++){
1641 err = U_ZERO_ERROR;
1642 source = data[i];
1643 sourceLength = len[i];
1644 enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
1645 if(U_FAILURE(err)){
1646 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1647 continue;
1648 }
1649 if(enc == NULL || strcmp(enc,expected[i]) !=0){
1650 if(expected[i] !=NULL){
1651 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1652 continue;
1653 }
1654 }
1655 if(signatureLength != expectedLength[i]){
1656 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1657 }
1658 }
1659 }
1660 }
1661
1662 void
1663 static TestUTF7() {
1664 /* test input */
1665 static const uint8_t in[]={
1666 /* H - +Jjo- - ! +- +2AHcAQ */
1667 0x48,
1668 0x2d,
1669 0x2b, 0x4a, 0x6a, 0x6f,
1670 0x2d, 0x2d,
1671 0x21,
1672 0x2b, 0x2d,
1673 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1674 };
1675
1676 /* expected test results */
1677 static const int32_t results[]={
1678 /* number of bytes read, code point */
1679 1, 0x48,
1680 1, 0x2d,
1681 4, 0x263a, /* <WHITE SMILING FACE> */
1682 2, 0x2d,
1683 1, 0x21,
1684 2, 0x2b,
1685 7, 0x10401
1686 };
1687
1688 const char *cnvName;
1689 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1690 UErrorCode errorCode=U_ZERO_ERROR;
1691 UConverter *cnv=ucnv_open("UTF-7", &errorCode);
1692 if(U_FAILURE(errorCode)) {
1693 log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
1694 return;
1695 }
1696 TestNextUChar(cnv, source, limit, results, "UTF-7");
1697 /* Test the condition when source >= sourceLimit */
1698 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1699 cnvName = ucnv_getName(cnv, &errorCode);
1700 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
1701 log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1702 }
1703 ucnv_close(cnv);
1704 }
1705
1706 void
1707 static TestIMAP() {
1708 /* test input */
1709 static const uint8_t in[]={
1710 /* H - &Jjo- - ! &- &2AHcAQ- \ */
1711 0x48,
1712 0x2d,
1713 0x26, 0x4a, 0x6a, 0x6f,
1714 0x2d, 0x2d,
1715 0x21,
1716 0x26, 0x2d,
1717 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1718 };
1719
1720 /* expected test results */
1721 static const int32_t results[]={
1722 /* number of bytes read, code point */
1723 1, 0x48,
1724 1, 0x2d,
1725 4, 0x263a, /* <WHITE SMILING FACE> */
1726 2, 0x2d,
1727 1, 0x21,
1728 2, 0x26,
1729 7, 0x10401
1730 };
1731
1732 const char *cnvName;
1733 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1734 UErrorCode errorCode=U_ZERO_ERROR;
1735 UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
1736 if(U_FAILURE(errorCode)) {
1737 log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
1738 return;
1739 }
1740 TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
1741 /* Test the condition when source >= sourceLimit */
1742 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1743 cnvName = ucnv_getName(cnv, &errorCode);
1744 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
1745 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1746 }
1747 ucnv_close(cnv);
1748 }
1749
1750 void
1751 static TestUTF8() {
1752 /* test input */
1753 static const uint8_t in[]={
1754 0x61,
1755 0xc2, 0x80,
1756 0xe0, 0xa0, 0x80,
1757 0xf0, 0x90, 0x80, 0x80,
1758 0xf4, 0x84, 0x8c, 0xa1,
1759 0xf0, 0x90, 0x90, 0x81
1760 };
1761
1762 /* expected test results */
1763 static const int32_t results[]={
1764 /* number of bytes read, code point */
1765 1, 0x61,
1766 2, 0x80,
1767 3, 0x800,
1768 4, 0x10000,
1769 4, 0x104321,
1770 4, 0x10401
1771 };
1772
1773 /* error test input */
1774 static const uint8_t in2[]={
1775 0x61,
1776 0xc0, 0x80, /* illegal non-shortest form */
1777 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1778 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1779 0xc0, 0xc0, /* illegal trail byte */
1780 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1781 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1782 0xfe, /* illegal byte altogether */
1783 0x62
1784 };
1785
1786 /* expected error test results */
1787 static const int32_t results2[]={
1788 /* number of bytes read, code point */
1789 1, 0x61,
1790 22, 0x62
1791 };
1792
1793 UConverterToUCallback cb;
1794 const void *p;
1795
1796 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1797 UErrorCode errorCode=U_ZERO_ERROR;
1798 UConverter *cnv=ucnv_open("UTF-8", &errorCode);
1799 if(U_FAILURE(errorCode)) {
1800 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
1801 return;
1802 }
1803 TestNextUChar(cnv, source, limit, results, "UTF-8");
1804 /* Test the condition when source >= sourceLimit */
1805 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1806
1807 /* test error behavior with a skip callback */
1808 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1809 source=(const char *)in2;
1810 limit=(const char *)(in2+sizeof(in2));
1811 TestNextUChar(cnv, source, limit, results2, "UTF-8");
1812
1813 ucnv_close(cnv);
1814 }
1815
1816 void
1817 static TestCESU8() {
1818 /* test input */
1819 static const uint8_t in[]={
1820 0x61,
1821 0xc2, 0x80,
1822 0xe0, 0xa0, 0x80,
1823 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1824 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1825 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1826 0xef, 0xbf, 0xbc
1827 };
1828
1829 /* expected test results */
1830 static const int32_t results[]={
1831 /* number of bytes read, code point */
1832 1, 0x61,
1833 2, 0x80,
1834 3, 0x800,
1835 6, 0x10000,
1836 3, 0xdc01,
1837 -1,0xd802, /* may read 3 or 6 bytes */
1838 -1,0x10ffff,/* may read 0 or 3 bytes */
1839 3, 0xfffc
1840 };
1841
1842 /* error test input */
1843 static const uint8_t in2[]={
1844 0x61,
1845 0xc0, 0x80, /* illegal non-shortest form */
1846 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1847 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1848 0xc0, 0xc0, /* illegal trail byte */
1849 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */
1850 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */
1851 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */
1852 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1853 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1854 0xfe, /* illegal byte altogether */
1855 0x62
1856 };
1857
1858 /* expected error test results */
1859 static const int32_t results2[]={
1860 /* number of bytes read, code point */
1861 1, 0x61,
1862 34, 0x62
1863 };
1864
1865 UConverterToUCallback cb;
1866 const void *p;
1867
1868 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1869 UErrorCode errorCode=U_ZERO_ERROR;
1870 UConverter *cnv=ucnv_open("CESU-8", &errorCode);
1871 if(U_FAILURE(errorCode)) {
1872 log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
1873 return;
1874 }
1875 TestNextUChar(cnv, source, limit, results, "CESU-8");
1876 /* Test the condition when source >= sourceLimit */
1877 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1878
1879 /* test error behavior with a skip callback */
1880 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1881 source=(const char *)in2;
1882 limit=(const char *)(in2+sizeof(in2));
1883 TestNextUChar(cnv, source, limit, results2, "CESU-8");
1884
1885 ucnv_close(cnv);
1886 }
1887
1888 void
1889 static TestUTF16() {
1890 /* test input */
1891 static const uint8_t in1[]={
1892 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
1893 };
1894 static const uint8_t in2[]={
1895 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
1896 };
1897 static const uint8_t in3[]={
1898 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
1899 };
1900
1901 /* expected test results */
1902 static const int32_t results1[]={
1903 /* number of bytes read, code point */
1904 4, 0x4e00,
1905 2, 0xfeff
1906 };
1907 static const int32_t results2[]={
1908 /* number of bytes read, code point */
1909 4, 0x004e,
1910 2, 0xfffe
1911 };
1912 static const int32_t results3[]={
1913 /* number of bytes read, code point */
1914 2, 0xfefe,
1915 2, 0x4e00,
1916 2, 0xfeff,
1917 4, 0x20001
1918 };
1919
1920 const char *source, *limit;
1921
1922 UErrorCode errorCode=U_ZERO_ERROR;
1923 UConverter *cnv=ucnv_open("UTF-16", &errorCode);
1924 if(U_FAILURE(errorCode)) {
1925 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
1926 return;
1927 }
1928
1929 source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
1930 TestNextUChar(cnv, source, limit, results1, "UTF-16");
1931
1932 source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
1933 ucnv_resetToUnicode(cnv);
1934 TestNextUChar(cnv, source, limit, results2, "UTF-16");
1935
1936 source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
1937 ucnv_resetToUnicode(cnv);
1938 TestNextUChar(cnv, source, limit, results3, "UTF-16");
1939
1940 /* Test the condition when source >= sourceLimit */
1941 ucnv_resetToUnicode(cnv);
1942 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1943
1944 ucnv_close(cnv);
1945 }
1946
1947 void
1948 static TestUTF16BE() {
1949 /* test input */
1950 static const uint8_t in[]={
1951 0x00, 0x61,
1952 0x00, 0xc0,
1953 0x00, 0x31,
1954 0x00, 0xf4,
1955 0xce, 0xfe,
1956 0xd8, 0x01, 0xdc, 0x01
1957 };
1958
1959 /* expected test results */
1960 static const int32_t results[]={
1961 /* number of bytes read, code point */
1962 2, 0x61,
1963 2, 0xc0,
1964 2, 0x31,
1965 2, 0xf4,
1966 2, 0xcefe,
1967 4, 0x10401
1968 };
1969
1970 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1971 UErrorCode errorCode=U_ZERO_ERROR;
1972 UConverter *cnv=ucnv_open("utf-16be", &errorCode);
1973 if(U_FAILURE(errorCode)) {
1974 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
1975 return;
1976 }
1977 TestNextUChar(cnv, source, limit, results, "UTF-16BE");
1978 /* Test the condition when source >= sourceLimit */
1979 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1980 /*Test for the condition where there is an invalid character*/
1981 {
1982 static const uint8_t source2[]={0x61};
1983 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
1984 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
1985 }
1986 #if 0
1987 /*
1988 * Test disabled because currently the UTF-16BE/LE converters are supposed
1989 * to not set errors for unpaired surrogates.
1990 * This may change with
1991 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
1992 */
1993
1994 /*Test for the condition where there is a surrogate pair*/
1995 {
1996 const uint8_t source2[]={0xd8, 0x01};
1997 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
1998 }
1999 #endif
2000 ucnv_close(cnv);
2001 }
2002
2003 static void
2004 TestUTF16LE() {
2005 /* test input */
2006 static const uint8_t in[]={
2007 0x61, 0x00,
2008 0x31, 0x00,
2009 0x4e, 0x2e,
2010 0x4e, 0x00,
2011 0x01, 0xd8, 0x01, 0xdc
2012 };
2013
2014 /* expected test results */
2015 static const int32_t results[]={
2016 /* number of bytes read, code point */
2017 2, 0x61,
2018 2, 0x31,
2019 2, 0x2e4e,
2020 2, 0x4e,
2021 4, 0x10401
2022 };
2023
2024 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2025 UErrorCode errorCode=U_ZERO_ERROR;
2026 UConverter *cnv=ucnv_open("utf-16le", &errorCode);
2027 if(U_FAILURE(errorCode)) {
2028 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
2029 return;
2030 }
2031 TestNextUChar(cnv, source, limit, results, "UTF-16LE");
2032 /* Test the condition when source >= sourceLimit */
2033 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2034 /*Test for the condition where there is an invalid character*/
2035 {
2036 static const uint8_t source2[]={0x61};
2037 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2038 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2039 }
2040 #if 0
2041 /*
2042 * Test disabled because currently the UTF-16BE/LE converters are supposed
2043 * to not set errors for unpaired surrogates.
2044 * This may change with
2045 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2046 */
2047
2048 /*Test for the condition where there is a surrogate character*/
2049 {
2050 static const uint8_t source2[]={0x01, 0xd8};
2051 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2052 }
2053 #endif
2054
2055 ucnv_close(cnv);
2056 }
2057
2058 void
2059 static TestUTF32() {
2060 /* test input */
2061 static const uint8_t in1[]={
2062 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff
2063 };
2064 static const uint8_t in2[]={
2065 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00
2066 };
2067 static const uint8_t in3[]={
2068 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01
2069 };
2070
2071 /* expected test results */
2072 static const int32_t results1[]={
2073 /* number of bytes read, code point */
2074 8, 0x100f00,
2075 4, 0xfeff
2076 };
2077 static const int32_t results2[]={
2078 /* number of bytes read, code point */
2079 8, 0x0f1000,
2080 4, 0xfffe
2081 };
2082 static const int32_t results3[]={
2083 /* number of bytes read, code point */
2084 4, 0xfefe,
2085 4, 0x100f00,
2086 4, 0xfffd, /* unmatched surrogate */
2087 4, 0xfffd /* unmatched surrogate */
2088 };
2089
2090 const char *source, *limit;
2091
2092 UErrorCode errorCode=U_ZERO_ERROR;
2093 UConverter *cnv=ucnv_open("UTF-32", &errorCode);
2094 if(U_FAILURE(errorCode)) {
2095 log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
2096 return;
2097 }
2098
2099 source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2100 TestNextUChar(cnv, source, limit, results1, "UTF-32");
2101
2102 source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2103 ucnv_resetToUnicode(cnv);
2104 TestNextUChar(cnv, source, limit, results2, "UTF-32");
2105
2106 source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2107 ucnv_resetToUnicode(cnv);
2108 TestNextUChar(cnv, source, limit, results3, "UTF-32");
2109
2110 /* Test the condition when source >= sourceLimit */
2111 ucnv_resetToUnicode(cnv);
2112 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2113
2114 ucnv_close(cnv);
2115 }
2116
2117 static void
2118 TestUTF32BE() {
2119 /* test input */
2120 static const uint8_t in[]={
2121 0x00, 0x00, 0x00, 0x61,
2122 0x00, 0x00, 0x30, 0x61,
2123 0x00, 0x00, 0xdc, 0x00,
2124 0x00, 0x00, 0xd8, 0x00,
2125 0x00, 0x00, 0xdf, 0xff,
2126 0x00, 0x00, 0xff, 0xfe,
2127 0x00, 0x10, 0xab, 0xcd,
2128 0x00, 0x10, 0xff, 0xff
2129 };
2130
2131 /* expected test results */
2132 static const int32_t results[]={
2133 /* number of bytes read, code point */
2134 4, 0x61,
2135 4, 0x3061,
2136 4, 0xfffd,
2137 4, 0xfffd,
2138 4, 0xfffd,
2139 4, 0xfffe,
2140 4, 0x10abcd,
2141 4, 0x10ffff
2142 };
2143
2144 /* error test input */
2145 static const uint8_t in2[]={
2146 0x00, 0x00, 0x00, 0x61,
2147 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
2148 0x00, 0x00, 0x00, 0x62,
2149 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2150 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
2151 0x00, 0x00, 0x01, 0x62,
2152 0x00, 0x00, 0x02, 0x62
2153 };
2154
2155 /* expected error test results */
2156 static const int32_t results2[]={
2157 /* number of bytes read, code point */
2158 4, 0x61,
2159 8, 0x62,
2160 12, 0x162,
2161 4, 0x262
2162 };
2163
2164 UConverterToUCallback cb;
2165 const void *p;
2166
2167 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2168 UErrorCode errorCode=U_ZERO_ERROR;
2169 UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
2170 if(U_FAILURE(errorCode)) {
2171 log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
2172 return;
2173 }
2174 TestNextUChar(cnv, source, limit, results, "UTF-32BE");
2175
2176 /* Test the condition when source >= sourceLimit */
2177 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2178
2179 /* test error behavior with a skip callback */
2180 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2181 source=(const char *)in2;
2182 limit=(const char *)(in2+sizeof(in2));
2183 TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
2184
2185 ucnv_close(cnv);
2186 }
2187
2188 static void
2189 TestUTF32LE() {
2190 /* test input */
2191 static const uint8_t in[]={
2192 0x61, 0x00, 0x00, 0x00,
2193 0x61, 0x30, 0x00, 0x00,
2194 0x00, 0xdc, 0x00, 0x00,
2195 0x00, 0xd8, 0x00, 0x00,
2196 0xff, 0xdf, 0x00, 0x00,
2197 0xfe, 0xff, 0x00, 0x00,
2198 0xcd, 0xab, 0x10, 0x00,
2199 0xff, 0xff, 0x10, 0x00
2200 };
2201
2202 /* expected test results */
2203 static const int32_t results[]={
2204 /* number of bytes read, code point */
2205 4, 0x61,
2206 4, 0x3061,
2207 4, 0xfffd,
2208 4, 0xfffd,
2209 4, 0xfffd,
2210 4, 0xfffe,
2211 4, 0x10abcd,
2212 4, 0x10ffff
2213 };
2214
2215 /* error test input */
2216 static const uint8_t in2[]={
2217 0x61, 0x00, 0x00, 0x00,
2218 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
2219 0x62, 0x00, 0x00, 0x00,
2220 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2221 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
2222 0x62, 0x01, 0x00, 0x00,
2223 0x62, 0x02, 0x00, 0x00,
2224 };
2225
2226 /* expected error test results */
2227 static const int32_t results2[]={
2228 /* number of bytes read, code point */
2229 4, 0x61,
2230 8, 0x62,
2231 12, 0x162,
2232 4, 0x262,
2233 };
2234
2235 UConverterToUCallback cb;
2236 const void *p;
2237
2238 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2239 UErrorCode errorCode=U_ZERO_ERROR;
2240 UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
2241 if(U_FAILURE(errorCode)) {
2242 log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
2243 return;
2244 }
2245 TestNextUChar(cnv, source, limit, results, "UTF-32LE");
2246
2247 /* Test the condition when source >= sourceLimit */
2248 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2249
2250 /* test error behavior with a skip callback */
2251 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2252 source=(const char *)in2;
2253 limit=(const char *)(in2+sizeof(in2));
2254 TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
2255
2256 ucnv_close(cnv);
2257 }
2258
2259 static void
2260 TestLATIN1() {
2261 /* test input */
2262 static const uint8_t in[]={
2263 0x61,
2264 0x31,
2265 0x32,
2266 0xc0,
2267 0xf0,
2268 0xf4,
2269 };
2270
2271 /* expected test results */
2272 static const int32_t results[]={
2273 /* number of bytes read, code point */
2274 1, 0x61,
2275 1, 0x31,
2276 1, 0x32,
2277 1, 0xc0,
2278 1, 0xf0,
2279 1, 0xf4,
2280 };
2281 static const uint16_t in1[] = {
2282 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2283 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2284 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2285 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2286 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2287 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2288 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2289 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2290 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2291 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2292 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2293 0xcb, 0x82
2294 };
2295 static const uint8_t out1[] = {
2296 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2297 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2298 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2299 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2300 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2301 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2302 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2303 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2304 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2305 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2306 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2307 0xcb, 0x82
2308 };
2309 static const uint16_t in2[]={
2310 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2311 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2312 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2313 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2314 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2315 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2316 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2317 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2318 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2319 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2320 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2321 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2322 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2323 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2324 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2325 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2326 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2327 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2328 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2329 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2330 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2331 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2332 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2333 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2334 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2335 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2336 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2337 0x37, 0x20, 0x2A, 0x2F,
2338 };
2339 static const unsigned char out2[]={
2340 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2341 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2342 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2343 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2344 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2345 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2346 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2347 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2348 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2349 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2350 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2351 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2352 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2353 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2354 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2355 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2356 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2357 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2358 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2359 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2360 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2361 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2362 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2363 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2364 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2365 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2366 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2367 0x37, 0x20, 0x2A, 0x2F,
2368 };
2369 const char *source=(const char *)in;
2370 const char *limit=(const char *)in+sizeof(in);
2371
2372 UErrorCode errorCode=U_ZERO_ERROR;
2373 UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
2374 if(U_FAILURE(errorCode)) {
2375 log_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
2376 return;
2377 }
2378 TestNextUChar(cnv, source, limit, results, "LATIN_1");
2379 /* Test the condition when source >= sourceLimit */
2380 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2381 TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
2382 TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
2383
2384 ucnv_close(cnv);
2385 }
2386
2387 static void
2388 TestSBCS() {
2389 /* test input */
2390 static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2391 /* expected test results */
2392 static const int32_t results[]={
2393 /* number of bytes read, code point */
2394 1, 0x61,
2395 1, 0xbf,
2396 1, 0xc4,
2397 1, 0x2021,
2398 1, 0xf8ff,
2399 1, 0x00d9
2400 };
2401
2402 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2403 UErrorCode errorCode=U_ZERO_ERROR;
2404 UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode);
2405 if(U_FAILURE(errorCode)) {
2406 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode));
2407 return;
2408 }
2409 TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)");
2410 /* Test the condition when source >= sourceLimit */
2411 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2412 /*Test for Illegal character */ /*
2413 {
2414 static const uint8_t input1[]={ 0xA1 };
2415 const char* illegalsource=(const char*)input1;
2416 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2417 }
2418 */
2419 ucnv_close(cnv);
2420 }
2421
2422 static void
2423 TestDBCS() {
2424 /* test input */
2425 static const uint8_t in[]={
2426 0x44, 0x6a,
2427 0xc4, 0x9c,
2428 0x7a, 0x74,
2429 0x46, 0xab,
2430 0x42, 0x5b,
2431
2432 };
2433
2434 /* expected test results */
2435 static const int32_t results[]={
2436 /* number of bytes read, code point */
2437 2, 0x00a7,
2438 2, 0xe1d2,
2439 2, 0x6962,
2440 2, 0xf842,
2441 2, 0xffe5,
2442 };
2443
2444 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2445 UErrorCode errorCode=U_ZERO_ERROR;
2446
2447 UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
2448 if(U_FAILURE(errorCode)) {
2449 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
2450 return;
2451 }
2452 TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
2453 /* Test the condition when source >= sourceLimit */
2454 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2455 /*Test for the condition where there is an invalid character*/
2456 {
2457 static const uint8_t source2[]={0x1a, 0x1b};
2458 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2459 }
2460 /*Test for the condition where we have a truncated char*/
2461 {
2462 static const uint8_t source1[]={0xc4};
2463 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2464 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2465 }
2466 ucnv_close(cnv);
2467 }
2468
2469 static void
2470 TestMBCS() {
2471 /* test input */
2472 static const uint8_t in[]={
2473 0x01,
2474 0xa6, 0xa3,
2475 0x00,
2476 0xa6, 0xa1,
2477 0x08,
2478 0xc2, 0x76,
2479 0xc2, 0x78,
2480
2481 };
2482
2483 /* expected test results */
2484 static const int32_t results[]={
2485 /* number of bytes read, code point */
2486 1, 0x0001,
2487 2, 0x250c,
2488 1, 0x0000,
2489 2, 0x2500,
2490 1, 0x0008,
2491 2, 0xd60c,
2492 2, 0xd60e,
2493 };
2494
2495 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2496 UErrorCode errorCode=U_ZERO_ERROR;
2497
2498 UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
2499 if(U_FAILURE(errorCode)) {
2500 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
2501 return;
2502 }
2503 TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
2504 /* Test the condition when source >= sourceLimit */
2505 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2506 /*Test for the condition where there is an invalid character*/
2507 {
2508 static const uint8_t source2[]={0xa1, 0x01};
2509 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2510 }
2511 /*Test for the condition where we have a truncated char*/
2512 {
2513 static const uint8_t source1[]={0xc4};
2514 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2515 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2516 }
2517 ucnv_close(cnv);
2518
2519 }
2520
2521 #ifdef U_ENABLE_GENERIC_ISO_2022
2522
2523 static void
2524 TestISO_2022() {
2525 /* test input */
2526 static const uint8_t in[]={
2527 0x1b, 0x25, 0x42,
2528 0x31,
2529 0x32,
2530 0x61,
2531 0xc2, 0x80,
2532 0xe0, 0xa0, 0x80,
2533 0xf0, 0x90, 0x80, 0x80
2534 };
2535
2536
2537
2538 /* expected test results */
2539 static const int32_t results[]={
2540 /* number of bytes read, code point */
2541 4, 0x0031, /* 4 bytes including the escape sequence */
2542 1, 0x0032,
2543 1, 0x61,
2544 2, 0x80,
2545 3, 0x800,
2546 4, 0x10000
2547 };
2548
2549 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2550 UErrorCode errorCode=U_ZERO_ERROR;
2551 UConverter *cnv;
2552
2553 cnv=ucnv_open("ISO_2022", &errorCode);
2554 if(U_FAILURE(errorCode)) {
2555 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
2556 return;
2557 }
2558 TestNextUChar(cnv, source, limit, results, "ISO_2022");
2559
2560 /* Test the condition when source >= sourceLimit */
2561 TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
2562 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2563 /*Test for the condition where we have a truncated char*/
2564 {
2565 static const uint8_t source1[]={0xc4};
2566 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2567 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2568 }
2569 /*Test for the condition where there is an invalid character*/
2570 {
2571 static const uint8_t source2[]={0xa1, 0x01};
2572 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
2573 }
2574 ucnv_close(cnv);
2575 }
2576
2577 #endif
2578
2579 static void
2580 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2581 const UChar* uSource;
2582 const UChar* uSourceLimit;
2583 const char* cSource;
2584 const char* cSourceLimit;
2585 UChar *uTargetLimit =NULL;
2586 UChar *uTarget;
2587 char *cTarget;
2588 const char *cTargetLimit;
2589 char *cBuf;
2590 UChar *uBuf,*test;
2591 int32_t uBufSize = 120;
2592 int len=0;
2593 int i=2;
2594 UErrorCode errorCode=U_ZERO_ERROR;
2595 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2596 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2597 ucnv_reset(cnv);
2598 for(;--i>0; ){
2599 uSource = (UChar*) source;
2600 uSourceLimit=(const UChar*)sourceLimit;
2601 cTarget = cBuf;
2602 uTarget = uBuf;
2603 cSource = cBuf;
2604 cTargetLimit = cBuf;
2605 uTargetLimit = uBuf;
2606
2607 do{
2608
2609 cTargetLimit = cTargetLimit+ i;
2610 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2611 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2612 errorCode=U_ZERO_ERROR;
2613 continue;
2614 }
2615
2616 if(U_FAILURE(errorCode)){
2617 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2618 return;
2619 }
2620
2621 }while (uSource<uSourceLimit);
2622
2623 cSourceLimit =cTarget;
2624 do{
2625 uTargetLimit=uTargetLimit+i;
2626 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2627 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2628 errorCode=U_ZERO_ERROR;
2629 continue;
2630 }
2631 if(U_FAILURE(errorCode)){
2632 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2633 return;
2634 }
2635 }while(cSource<cSourceLimit);
2636
2637 uSource = source;
2638 test =uBuf;
2639 for(len=0;len<(int)(source - sourceLimit);len++){
2640 if(uBuf[len]!=uSource[len]){
2641 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2642 }
2643 }
2644 }
2645 free(uBuf);
2646 free(cBuf);
2647 }
2648 /* Test for Jitterbug 778 */
2649 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2650 const UChar* uSource;
2651 const UChar* uSourceLimit;
2652 const char* cSource;
2653 UChar *uTargetLimit =NULL;
2654 UChar *uTarget;
2655 char *cTarget;
2656 const char *cTargetLimit;
2657 char *cBuf;
2658 UChar *uBuf,*test;
2659 int32_t uBufSize = 120;
2660 int numCharsInTarget=0;
2661 UErrorCode errorCode=U_ZERO_ERROR;
2662 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2663 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2664 uSource = source;
2665 uSourceLimit=sourceLimit;
2666 cTarget = cBuf;
2667 cTargetLimit = cBuf +uBufSize*5;
2668 uTarget = uBuf;
2669 uTargetLimit = uBuf+ uBufSize*5;
2670 ucnv_reset(cnv);
2671 numCharsInTarget=ucnv_fromUChars( cnv , cTarget, (cTargetLimit-cTarget),uSource,(uSourceLimit-uSource), &errorCode);
2672 if(U_FAILURE(errorCode)){
2673 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2674 return;
2675 }
2676 cSource = cBuf;
2677 test =uBuf;
2678 ucnv_toUChars(cnv,uTarget,(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
2679 if(U_FAILURE(errorCode)){
2680 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
2681 return;
2682 }
2683 uSource = source;
2684 while(uSource<uSourceLimit){
2685 if(*test!=*uSource){
2686
2687 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2688 }
2689 uSource++;
2690 test++;
2691 }
2692 free(uBuf);
2693 free(cBuf);
2694 }
2695
2696 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2697 const UChar* uSource;
2698 const UChar* uSourceLimit;
2699 const char* cSource;
2700 const char* cSourceLimit;
2701 UChar *uTargetLimit =NULL;
2702 UChar *uTarget;
2703 char *cTarget;
2704 const char *cTargetLimit;
2705 char *cBuf;
2706 UChar *uBuf,*test;
2707 int32_t uBufSize = 120;
2708 int len=0;
2709 int i=2;
2710 const UChar *temp = sourceLimit;
2711 UErrorCode errorCode=U_ZERO_ERROR;
2712 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2713 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2714
2715 ucnv_reset(cnv);
2716 for(;--i>0;){
2717 uSource = (UChar*) source;
2718 cTarget = cBuf;
2719 uTarget = uBuf;
2720 cSource = cBuf;
2721 cTargetLimit = cBuf;
2722 uTargetLimit = uBuf+uBufSize*5;
2723 cTargetLimit = cTargetLimit+uBufSize*10;
2724 uSourceLimit=uSource;
2725 do{
2726
2727 if (uSourceLimit < sourceLimit) {
2728 uSourceLimit = uSourceLimit+1;
2729 }
2730 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2731 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2732 errorCode=U_ZERO_ERROR;
2733 continue;
2734 }
2735
2736 if(U_FAILURE(errorCode)){
2737 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2738 return;
2739 }
2740
2741 }while (uSource<temp);
2742
2743 cSourceLimit =cBuf;
2744 do{
2745 if (cSourceLimit < cBuf + (cTarget - cBuf)) {
2746 cSourceLimit = cSourceLimit+1;
2747 }
2748 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2749 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2750 errorCode=U_ZERO_ERROR;
2751 continue;
2752 }
2753 if(U_FAILURE(errorCode)){
2754 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2755 return;
2756 }
2757 }while(cSource<cTarget);
2758
2759 uSource = source;
2760 test =uBuf;
2761 for(;len<(int)(source - sourceLimit);len++){
2762 if(uBuf[len]!=uSource[len]){
2763 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2764 }
2765 }
2766 }
2767 free(uBuf);
2768 free(cBuf);
2769 }
2770 static void
2771 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
2772 const uint16_t results[], const char* message){
2773 const char* s0;
2774 const char* s=(char*)source;
2775 const uint16_t *r=results;
2776 UErrorCode errorCode=U_ZERO_ERROR;
2777 uint32_t c,exC;
2778 ucnv_reset(cnv);
2779 while(s<limit) {
2780 s0=s;
2781 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
2782 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2783 break; /* no more significant input */
2784 } else if(U_FAILURE(errorCode)) {
2785 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
2786 break;
2787 } else {
2788 if(UTF_IS_FIRST_SURROGATE(*r)){
2789 int i =0, len = 2;
2790 UTF_NEXT_CHAR_SAFE(r, i, len, exC, FALSE);
2791 r++;
2792 }else{
2793 exC = *r;
2794 }
2795 if(c!=(uint32_t)(exC))
2796 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message,(uint32_t) (*r),c);
2797 }
2798 r++;
2799 }
2800 }
2801
2802 static int TestJitterbug930(const char* enc){
2803 UErrorCode err = U_ZERO_ERROR;
2804 UConverter*converter;
2805 char out[80];
2806 char*target = out;
2807 UChar in[4];
2808 const UChar*source = in;
2809 int32_t off[80];
2810 int32_t* offsets = off;
2811 int numOffWritten=0;
2812 UBool flush = 0;
2813 converter = my_ucnv_open(enc, &err);
2814
2815 in[0] = 0x41; /* 0x4E00;*/
2816 in[1] = 0x4E01;
2817 in[2] = 0x4E02;
2818 in[3] = 0x4E03;
2819
2820 memset(off, '*', sizeof(off));
2821
2822 ucnv_fromUnicode (converter,
2823 &target,
2824 target+2,
2825 &source,
2826 source+3,
2827 offsets,
2828 flush,
2829 &err);
2830
2831 /* writes three bytes into the output buffer: 41 1B 24
2832 * but offsets contains 0 1 1
2833 */
2834 while(*offsets< off[10]){
2835 numOffWritten++;
2836 offsets++;
2837 }
2838 log_verbose("Testing Jitterbug 930 for encoding %s",enc);
2839 if(numOffWritten!= (int)(target-out)){
2840 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
2841 }
2842
2843 err = U_ZERO_ERROR;
2844
2845 memset(off,'*' , sizeof(off));
2846
2847 flush = 1;
2848 offsets=off;
2849 ucnv_fromUnicode (converter,
2850 &target,
2851 target+4,
2852 &source,
2853 source,
2854 offsets,
2855 flush,
2856 &err);
2857 numOffWritten=0;
2858 while(*offsets< off[10]){
2859 numOffWritten++;
2860 if(*offsets!= -1){
2861 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
2862 }
2863 offsets++;
2864 }
2865
2866 /* writes 42 43 7A into output buffer,
2867 * offsets contains -1 -1 -1
2868 */
2869 ucnv_close(converter);
2870 return 0;
2871 }
2872
2873 static void
2874 TestHZ() {
2875 /* test input */
2876 static const uint16_t in[]={
2877 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
2878 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
2879 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
2880 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
2881 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
2882 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
2883 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
2884 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
2885 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
2886 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
2887 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
2888 0x005A, 0x005B, 0x005C, 0x000A
2889 };
2890 const UChar* uSource;
2891 const UChar* uSourceLimit;
2892 const char* cSource;
2893 const char* cSourceLimit;
2894 UChar *uTargetLimit =NULL;
2895 UChar *uTarget;
2896 char *cTarget;
2897 const char *cTargetLimit;
2898 char *cBuf;
2899 UChar *uBuf,*test;
2900 int32_t uBufSize = 120;
2901 UErrorCode errorCode=U_ZERO_ERROR;
2902 UConverter *cnv;
2903 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
2904 int32_t* myOff= offsets;
2905 cnv=ucnv_open("HZ", &errorCode);
2906 if(U_FAILURE(errorCode)) {
2907 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
2908 return;
2909 }
2910
2911 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2912 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2913 uSource = (const UChar*)&in[0];
2914 uSourceLimit=(const UChar*)&in[sizeof(in)/2];
2915 cTarget = cBuf;
2916 cTargetLimit = cBuf +uBufSize*5;
2917 uTarget = uBuf;
2918 uTargetLimit = uBuf+ uBufSize*5;
2919 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
2920 if(U_FAILURE(errorCode)){
2921 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2922 return;
2923 }
2924 cSource = cBuf;
2925 cSourceLimit =cTarget;
2926 test =uBuf;
2927 myOff=offsets;
2928 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
2929 if(U_FAILURE(errorCode)){
2930 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2931 return;
2932 }
2933 uSource = (const UChar*)&in[0];
2934 while(uSource<uSourceLimit){
2935 if(*test!=*uSource){
2936
2937 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2938 }
2939 uSource++;
2940 test++;
2941 }
2942 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
2943 TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
2944 TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
2945 TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
2946 TestJitterbug930("csISO2022JP");
2947 ucnv_close(cnv);
2948 free(offsets);
2949 free(uBuf);
2950 free(cBuf);
2951 }
2952
2953 static void
2954 TestISCII(){
2955 /* test input */
2956 static const uint16_t in[]={
2957 /* test full range of Devanagari */
2958 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
2959 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
2960 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
2961 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
2962 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
2963 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
2964 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
2965 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
2966 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
2967 0x096D,0x096E,0x096F,
2968 /* test Soft halant*/
2969 0x0915,0x094d, 0x200D,
2970 /* test explicit halant */
2971 0x0915,0x094d, 0x200c,
2972 /* test double danda */
2973 0x965,
2974 /* test ASCII */
2975 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2976 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2977 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2978 /* tests from Lotus */
2979 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
2980 0x0930,0x094D,0x200D,
2981 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
2982 0x0915,0x0921,0x002B,0x095F,
2983 /* tamil range */
2984 0x0B86, 0xB87, 0xB88,
2985 /* telugu range */
2986 0x0C05, 0x0C02, 0x0C03,0x0c31,
2987 /* kannada range */
2988 0x0C85, 0xC82, 0x0C83,
2989 /* test Abbr sign and Anudatta */
2990 0x0970, 0x952,
2991 /* 0x0958,
2992 0x0959,
2993 0x095A,
2994 0x095B,
2995 0x095C,
2996 0x095D,
2997 0x095E,
2998 0x095F,*/
2999 0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3000 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3001 0x090C ,
3002 0x0962,
3003 0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3004 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3005 0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3006 0x093D /* Avagraha 0xEA, 0xE9*/,
3007 0x0958,
3008 0x0959,
3009 0x095A,
3010 0x095B,
3011 0x095C,
3012 0x095D,
3013 0x095E,
3014 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3015 };
3016 static const unsigned char byteArr[]={
3017
3018 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3019 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3020 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3021 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3022 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3023 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3024 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3025 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3026 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3027 0xf8,0xf9,0xfa,
3028 /* test soft halant */
3029 0xb3, 0xE8, 0xE9,
3030 /* test explicit halant */
3031 0xb3, 0xE8, 0xE8,
3032 /* test double danda */
3033 0xea, 0xea,
3034 /* test ASCII */
3035 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3036 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3037 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3038 /* test ATR code */
3039
3040 /* tests from Lotus */
3041 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3042 0xEF,0x42,0xCF,0xE8,0xD9,
3043 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3044 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3045 /* tamil range */
3046 0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3047 /* telugu range */
3048 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3049 /* kannada range */
3050 0xEF, 0x48,0xa4, 0xa2, 0xa3,
3051 /* anudatta and abbreviation sign */
3052 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3053
3054
3055 0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3056
3057 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3058
3059 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3060
3061 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3062
3063 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3064
3065 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3066
3067 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3068
3069 0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3070
3071 0xB3, 0xE9, /* Ka + NUKTA */
3072
3073 0xB4, 0xE9, /* Kha + NUKTA */
3074
3075 0xB5, 0xE9, /* Ga + NUKTA */
3076
3077 0xBA, 0xE9,
3078
3079 0xBF, 0xE9,
3080
3081 0xC0, 0xE9,
3082
3083 0xC9, 0xE9,
3084 /* INV halant RA */
3085 0xD9, 0xE8, 0xCF,
3086 0x00, 0x00A0,
3087 /* just consume unhandled codepoints */
3088 0xEF, 0x30,
3089
3090 };
3091 testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-iscii-de",NULL,TRUE);
3092 TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
3093
3094 }
3095
3096 static void
3097 TestISO_2022_JP() {
3098 /* test input */
3099 static const uint16_t in[]={
3100 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3101 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3102 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3103 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3104 0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3105 0x201D, 0x3014, 0x000D, 0x000A,
3106 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3107 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3108 };
3109 const UChar* uSource;
3110 const UChar* uSourceLimit;
3111 const char* cSource;
3112 const char* cSourceLimit;
3113 UChar *uTargetLimit =NULL;
3114 UChar *uTarget;
3115 char *cTarget;
3116 const char *cTargetLimit;
3117 char *cBuf;
3118 UChar *uBuf,*test;
3119 int32_t uBufSize = 120;
3120 UErrorCode errorCode=U_ZERO_ERROR;
3121 UConverter *cnv;
3122 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3123 int32_t* myOff= offsets;
3124 cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3125 if(U_FAILURE(errorCode)) {
3126 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode));
3127 return;
3128 }
3129
3130 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3131 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3132 uSource = (const UChar*)&in[0];
3133 uSourceLimit=(const UChar*)&in[sizeof(in)/2];
3134 cTarget = cBuf;
3135 cTargetLimit = cBuf +uBufSize*5;
3136 uTarget = uBuf;
3137 uTargetLimit = uBuf+ uBufSize*5;
3138 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3139 if(U_FAILURE(errorCode)){
3140 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3141 return;
3142 }
3143 cSource = cBuf;
3144 cSourceLimit =cTarget;
3145 test =uBuf;
3146 myOff=offsets;
3147 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3148 if(U_FAILURE(errorCode)){
3149 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3150 return;
3151 }
3152
3153 uSource = (const UChar*)&in[0];
3154 while(uSource<uSourceLimit){
3155 if(*test!=*uSource){
3156
3157 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3158 }
3159 uSource++;
3160 test++;
3161 }
3162
3163 TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3164 TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3165 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
3166 TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3167 TestJitterbug930("csISO2022JP");
3168 ucnv_close(cnv);
3169 free(uBuf);
3170 free(cBuf);
3171 free(offsets);
3172 }
3173
3174 static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
3175 const UChar* uSource;
3176 const UChar* uSourceLimit;
3177 const char* cSource;
3178 const char* cSourceLimit;
3179 UChar *uTargetLimit =NULL;
3180 UChar *uTarget;
3181 char *cTarget;
3182 const char *cTargetLimit;
3183 char *cBuf;
3184 UChar *uBuf,*test;
3185 int32_t uBufSize = 120*10;
3186 UErrorCode errorCode=U_ZERO_ERROR;
3187 UConverter *cnv;
3188 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
3189 int32_t* myOff= offsets;
3190 cnv=my_ucnv_open(conv, &errorCode);
3191 if(U_FAILURE(errorCode)) {
3192 log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
3193 return;
3194 }
3195
3196 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
3197 cBuf =(char*)malloc(uBufSize * sizeof(char));
3198 uSource = (const UChar*)&in[0];
3199 uSourceLimit=uSource+len;
3200 cTarget = cBuf;
3201 cTargetLimit = cBuf +uBufSize;
3202 uTarget = uBuf;
3203 uTargetLimit = uBuf+ uBufSize;
3204 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3205 if(U_FAILURE(errorCode)){
3206 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3207 return;
3208 }
3209 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3210 cSource = cBuf;
3211 cSourceLimit =cTarget;
3212 test =uBuf;
3213 myOff=offsets;
3214 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3215 if(U_FAILURE(errorCode)){
3216 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
3217 return;
3218 }
3219
3220 uSource = (const UChar*)&in[0];
3221 while(uSource<uSourceLimit){
3222 if(*test!=*uSource){
3223 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
3224 }
3225 uSource++;
3226 test++;
3227 }
3228 TestSmallTargetBuffer(&in[0],(const UChar*)&in[len],cnv);
3229 TestSmallSourceBuffer(&in[0],(const UChar*)&in[len],cnv);
3230 TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
3231 if(byteArr && byteArrLen!=0){
3232 TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
3233 TestToAndFromUChars(&in[0],(const UChar*)&in[len],cnv);
3234 {
3235 cSource = byteArr;
3236 cSourceLimit = cSource+byteArrLen;
3237 test=uBuf;
3238 myOff = offsets;
3239 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3240 if(U_FAILURE(errorCode)){
3241 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3242 return;
3243 }
3244
3245 uSource = (const UChar*)&in[0];
3246 while(uSource<uSourceLimit){
3247 if(*test!=*uSource){
3248 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3249 }
3250 uSource++;
3251 test++;
3252 }
3253 }
3254 }
3255
3256 ucnv_close(cnv);
3257 free(uBuf);
3258 free(cBuf);
3259 free(offsets);
3260 }
3261 static UChar U_CALLCONV
3262 _charAt(int32_t offset, void *context) {
3263 return ((char*)context)[offset];
3264 }
3265
3266 static int32_t
3267 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
3268 int32_t srcIndex=0;
3269 int32_t dstIndex=0;
3270 if(U_FAILURE(*status)){
3271 return 0;
3272 }
3273 if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
3274 *status = U_ILLEGAL_ARGUMENT_ERROR;
3275 return 0;
3276 }
3277 if(srcLen==-1){
3278 srcLen = uprv_strlen(src);
3279 }
3280
3281 for (; srcIndex<srcLen; ) {
3282 UChar32 c = src[srcIndex++];
3283 if (c == 0x005C /*'\\'*/) {
3284 c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
3285 if (c == (UChar32)0xFFFFFFFF) {
3286 *status=U_INVALID_CHAR_FOUND; /* return empty string */
3287 break; /* invalid escape sequence */
3288 }
3289 }
3290 if(dstIndex < dstLen){
3291 if(c>0xFFFF){
3292 dst[dstIndex++] = UTF16_LEAD(c);
3293 if(dstIndex<dstLen){
3294 dst[dstIndex]=UTF16_TRAIL(c);
3295 }else{
3296 *status=U_BUFFER_OVERFLOW_ERROR;
3297 }
3298 }else{
3299 dst[dstIndex]=(UChar)c;
3300 }
3301
3302 }else{
3303 *status = U_BUFFER_OVERFLOW_ERROR;
3304 }
3305 dstIndex++; /* for preflighting */
3306 }
3307 return dstIndex;
3308 }
3309
3310 static void
3311 TestFullRoundtrip(const char* cp){
3312 UChar usource[10] ={0};
3313 UChar nsrc[10] = {0};
3314 uint32_t i=1;
3315 int len=0, ulen;
3316 nsrc[0]=0x0061;
3317 /* Test codepoint 0 */
3318 TestConv(usource,1,cp,"",NULL,0);
3319 TestConv(usource,2,cp,"",NULL,0);
3320 nsrc[2]=0x5555;
3321 TestConv(nsrc,3,cp,"",NULL,0);
3322
3323 for(;i<=0x10FFFF;i++){
3324 if(i==0xD800){
3325 i=0xDFFF;
3326 continue;
3327 }
3328 if(i<=0xFFFF){
3329 usource[0] =(UChar) i;
3330 len=1;
3331 }else{
3332 usource[0]=UTF16_LEAD(i);
3333 usource[1]=UTF16_TRAIL(i);
3334 len=2;
3335 }
3336 ulen=len;
3337 if(i==0x80) {
3338 usource[2]=0;
3339 }
3340 /* Test only single code points */
3341 TestConv(usource,ulen,cp,"",NULL,0);
3342 /* Test codepoint repeated twice */
3343 usource[ulen]=usource[0];
3344 usource[ulen+1]=usource[1];
3345 ulen+=len;
3346 TestConv(usource,ulen,cp,"",NULL,0);
3347 /* Test codepoint repeated 3 times */
3348 usource[ulen]=usource[0];
3349 usource[ulen+1]=usource[1];
3350 ulen+=len;
3351 TestConv(usource,ulen,cp,"",NULL,0);
3352 /* Test codepoint in between 2 codepoints */
3353 nsrc[1]=usource[0];
3354 nsrc[2]=usource[1];
3355 nsrc[len+1]=0x5555;
3356 TestConv(nsrc,len+2,cp,"",NULL,0);
3357 uprv_memset(usource,0,sizeof(UChar)*10);
3358 }
3359 }
3360
3361 static void
3362 TestRoundTrippingAllUTF(void){
3363 if(!QUICK){
3364 log_verbose("Running exhaustive round trip test for BOCU-1\n");
3365 TestFullRoundtrip("BOCU-1");
3366 log_verbose("Running exhaustive round trip test for SCSU\n");
3367 TestFullRoundtrip("SCSU");
3368 log_verbose("Running exhaustive round trip test for UTF-8\n");
3369 TestFullRoundtrip("UTF-8");
3370 log_verbose("Running exhaustive round trip test for CESU-8\n");
3371 TestFullRoundtrip("CESU-8");
3372 log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3373 TestFullRoundtrip("UTF-16BE");
3374 log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3375 TestFullRoundtrip("UTF-16LE");
3376 log_verbose("Running exhaustive round trip test for UTF-16\n");
3377 TestFullRoundtrip("UTF-16");
3378 log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3379 TestFullRoundtrip("UTF-32BE");
3380 log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3381 TestFullRoundtrip("UTF-32LE");
3382 log_verbose("Running exhaustive round trip test for UTF-32\n");
3383 TestFullRoundtrip("UTF-32");
3384 log_verbose("Running exhaustive round trip test for UTF-7\n");
3385 TestFullRoundtrip("UTF-7");
3386 log_verbose("Running exhaustive round trip test for UTF-7\n");
3387 TestFullRoundtrip("UTF-7,version=1");
3388 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3389 TestFullRoundtrip("IMAP-mailbox-name");
3390 log_verbose("Running exhaustive round trip test for GB18030\n");
3391 TestFullRoundtrip("GB18030");
3392 }
3393 }
3394
3395 static void
3396 TestSCSU() {
3397
3398 static const uint16_t germanUTF16[]={
3399 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3400 };
3401
3402 static const uint8_t germanSCSU[]={
3403 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3404 };
3405
3406 static const uint16_t russianUTF16[]={
3407 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3408 };
3409
3410 static const uint8_t russianSCSU[]={
3411 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3412 };
3413
3414 static const uint16_t japaneseUTF16[]={
3415 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3416 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3417 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3418 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3419 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3420 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3421 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3422 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3423 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3424 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3425 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3426 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3427 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3428 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3429 0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3430 };
3431
3432 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3433 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3434 static const uint8_t japaneseSCSU[]={
3435 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3436 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3437 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3438 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3439 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3440 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3441 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3442 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3443 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3444 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3445 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3446 0xcb, 0x82
3447 };
3448
3449 static const uint16_t allFeaturesUTF16[]={
3450 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3451 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3452 0x01df, 0xf000, 0xdbff, 0xdfff
3453 };
3454
3455 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3456 * result here (34B vs. 35B)
3457 */
3458 static const uint8_t allFeaturesSCSU[]={
3459 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3460 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3461 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3462 0xdf, 0x14, 0x80, 0x15, 0xff
3463 };
3464 static const uint16_t monkeyIn[]={
3465 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3466 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3467 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3468 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3469 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3470 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3471 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3472 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3473 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3474 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3475 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3476 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3477 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3478 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3479 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3480 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3481 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3482 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3483 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3484 /* test non-BMP code points */
3485 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3486 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3487 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3488 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3489 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3490 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3491 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3492 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3493 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3494 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3495 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3496
3497
3498 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3499 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3500 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3501 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3502 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3503 };
3504 static const char *fTestCases [] = {
3505 "\\ud800\\udc00", /* smallest surrogate*/
3506 "\\ud8ff\\udcff",
3507 "\\udBff\\udFff", /* largest surrogate pair*/
3508 "\\ud834\\udc00",
3509 "\\U0010FFFF",
3510 "Hello \\u9292 \\u9192 World!",
3511 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3512 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3513
3514 "\\u0648\\u06c8", /* catch missing reset*/
3515 "\\u0648\\u06c8",
3516
3517 "\\u4444\\uE001", /* lowest quotable*/
3518 "\\u4444\\uf2FF", /* highest quotable*/
3519 "\\u4444\\uf188\\u4444",
3520 "\\u4444\\uf188\\uf288",
3521 "\\u4444\\uf188abc\\u0429\\uf288",
3522 "\\u9292\\u2222",
3523 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3524 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3525 "Hello World!123456",
3526 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3527
3528 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/
3529 "abc\\u4411d", /* uses SQU*/
3530 "abc\\u4411\\u4412d",/* uses SCU*/
3531 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3532 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3533 "\\u9292\\u2222",
3534 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3535 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3536 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3537
3538 "", /* empty input*/
3539 "\\u0000", /* smallest BMP character*/
3540 "\\uFFFF", /* largest BMP character*/
3541
3542 /* regression tests*/
3543 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3544 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3545 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3546 "\\u0041\\u00df\\u0401\\u015f",
3547 "\\u9066\\u2123abc",
3548 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3549 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3550 };
3551 int i=0;
3552 for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){
3553 const char* cSrc = fTestCases[i];
3554 UErrorCode status = U_ZERO_ERROR;
3555 int32_t cSrcLen,srcLen;
3556 UChar* src;
3557 /* UConverter* cnv = ucnv_open("SCSU",&status); */
3558 cSrcLen= srcLen = uprv_strlen(fTestCases[i]);
3559 src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
3560 srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
3561 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
3562 TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
3563 free(src);
3564 }
3565 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3566 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3567 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3568 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3569 TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
3570 TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
3571 TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
3572 }
3573 static void TestJitterbug2346(){
3574 char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3575 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3576 uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3577
3578 UChar uTarget[500]={'\0'};
3579 UChar* utarget=uTarget;
3580 UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
3581
3582 char cTarget[500]={'\0'};
3583 char* ctarget=cTarget;
3584 char* ctargetLimit=cTarget+sizeof(cTarget);
3585 const char* csource=source;
3586 UChar* temp = expected;
3587 UErrorCode err=U_ZERO_ERROR;
3588
3589 UConverter* conv =ucnv_open("ISO_2022_JP",&err);
3590 if(U_FAILURE(err)) {
3591 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
3592 return;
3593 }
3594 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err);
3595 if(U_FAILURE(err)) {
3596 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
3597 return;
3598 }
3599 utargetLimit=utarget;
3600 utarget = uTarget;
3601 while(utarget<utargetLimit){
3602 if(*temp!=*utarget){
3603
3604 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
3605 }
3606 utarget++;
3607 temp++;
3608 }
3609 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
3610 if(U_FAILURE(err)) {
3611 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
3612 return;
3613 }
3614 ctargetLimit=ctarget;
3615 ctarget =cTarget;
3616 ucnv_close(conv);
3617
3618
3619 }
3620 static void
3621 TestISO_2022_JP_1() {
3622 /* test input */
3623 static const uint16_t in[]={
3624 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3625 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3626 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3627 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3628 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3629 0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3630 0x201D, 0x000D, 0x000A,
3631 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3632 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3633 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3634 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3635 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3636 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3637 };
3638 const UChar* uSource;
3639 const UChar* uSourceLimit;
3640 const char* cSource;
3641 const char* cSourceLimit;
3642 UChar *uTargetLimit =NULL;
3643 UChar *uTarget;
3644 char *cTarget;
3645 const char *cTargetLimit;
3646 char *cBuf;
3647 UChar *uBuf,*test;
3648 int32_t uBufSize = 120;
3649 UErrorCode errorCode=U_ZERO_ERROR;
3650 UConverter *cnv;
3651
3652 cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3653 if(U_FAILURE(errorCode)) {
3654 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3655 return;
3656 }
3657
3658 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3659 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3660 uSource = (const UChar*)&in[0];
3661 uSourceLimit=(const UChar*)&in[sizeof(in)/2];
3662 cTarget = cBuf;
3663 cTargetLimit = cBuf +uBufSize*5;
3664 uTarget = uBuf;
3665 uTargetLimit = uBuf+ uBufSize*5;
3666 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode);
3667 if(U_FAILURE(errorCode)){
3668 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3669 return;
3670 }
3671 cSource = cBuf;
3672 cSourceLimit =cTarget;
3673 test =uBuf;
3674 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode);
3675 if(U_FAILURE(errorCode)){
3676 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3677 return;
3678 }
3679 uSource = (const UChar*)&in[0];
3680 while(uSource<uSourceLimit){
3681 if(*test!=*uSource){
3682
3683 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3684 }
3685 uSource++;
3686 test++;
3687 }
3688 /*ucnv_close(cnv);
3689 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3690 /*Test for the condition where there is an invalid character*/
3691 ucnv_reset(cnv);
3692 {
3693 static const uint8_t source2[]={0x0e,0x24,0x053};
3694 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
3695 }
3696 TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3697 TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3698 ucnv_close(cnv);
3699 free(uBuf);
3700 free(cBuf);
3701 }
3702
3703 static void
3704 TestISO_2022_JP_2() {
3705 /* test input */
3706 static const uint16_t in[]={
3707 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3708 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3709 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3710 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3711 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3712 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3713 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3714 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3715 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3716 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3717 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3718 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3719 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3720 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3721 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3722 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3723 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3724 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3725 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3726 };
3727 const UChar* uSource;
3728 const UChar* uSourceLimit;
3729 const char* cSource;
3730 const char* cSourceLimit;
3731 UChar *uTargetLimit =NULL;
3732 UChar *uTarget;
3733 char *cTarget;
3734 const char *cTargetLimit;
3735 char *cBuf;
3736 UChar *uBuf,*test;
3737 int32_t uBufSize = 120;
3738 UErrorCode errorCode=U_ZERO_ERROR;
3739 UConverter *cnv;
3740 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3741 int32_t* myOff= offsets;
3742 cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
3743 if(U_FAILURE(errorCode)) {
3744 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3745 return;
3746 }
3747
3748 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3749 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3750 uSource = (const UChar*)&in[0];
3751 uSourceLimit=(const UChar*)&in[sizeof(in)/2];
3752 cTarget = cBuf;
3753 cTargetLimit = cBuf +uBufSize*5;
3754 uTarget = uBuf;
3755 uTargetLimit = uBuf+ uBufSize*5;
3756 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3757 if(U_FAILURE(errorCode)){
3758 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3759 return;
3760 }
3761 cSource = cBuf;
3762 cSourceLimit =cTarget;
3763 test =uBuf;
3764 myOff=offsets;
3765 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3766 if(U_FAILURE(errorCode)){
3767 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3768 return;
3769 }
3770 uSource = (const UChar*)&in[0];
3771 while(uSource<uSourceLimit){
3772 if(*test!=*uSource){
3773
3774 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3775 }
3776 uSource++;
3777 test++;
3778 }
3779 TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3780 TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3781 TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3782 /*Test for the condition where there is an invalid character*/
3783 ucnv_reset(cnv);
3784 {
3785 static const uint8_t source2[]={0x0e,0x24,0x053};
3786 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
3787 }
3788 ucnv_close(cnv);
3789 free(uBuf);
3790 free(cBuf);
3791 free(offsets);
3792 }
3793
3794 static void
3795 TestISO_2022_KR() {
3796 /* test input */
3797 static const uint16_t in[]={
3798 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F66,0x9F67,0x9F6A,0x000A,0x000D
3799 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC02,0xAC04
3800 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
3801 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
3802 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53DF,0x53E1,0x53E2
3803 ,0x53E3,0x53E4,0x000A,0x000D};
3804 const UChar* uSource;
3805 const UChar* uSourceLimit;
3806 const char* cSource;
3807 const char* cSourceLimit;
3808 UChar *uTargetLimit =NULL;
3809 UChar *uTarget;
3810 char *cTarget;
3811 const char *cTargetLimit;
3812 char *cBuf;
3813 UChar *uBuf,*test;
3814 int32_t uBufSize = 120;
3815 UErrorCode errorCode=U_ZERO_ERROR;
3816 UConverter *cnv;
3817 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3818 int32_t* myOff= offsets;
3819 cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
3820 if(U_FAILURE(errorCode)) {
3821 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3822 return;
3823 }
3824
3825 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3826 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3827 uSource = (const UChar*)&in[0];
3828 uSourceLimit=(const UChar*)&in[sizeof(in)/2];
3829 cTarget = cBuf;
3830 cTargetLimit = cBuf +uBufSize*5;
3831 uTarget = uBuf;
3832 uTargetLimit = uBuf+ uBufSize*5;
3833 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3834 if(U_FAILURE(errorCode)){
3835 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3836 return;
3837 }
3838 cSource = cBuf;
3839 cSourceLimit =cTarget;
3840 test =uBuf;
3841 myOff=offsets;
3842 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3843 if(U_FAILURE(errorCode)){
3844 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3845 return;
3846 }
3847 uSource = (const UChar*)&in[0];
3848 while(uSource<uSourceLimit){
3849 if(*test!=*uSource){
3850 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
3851 }
3852 uSource++;
3853 test++;
3854 }
3855 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
3856 TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3857 TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3858 TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3859 TestJitterbug930("csISO2022KR");
3860 /*Test for the condition where there is an invalid character*/
3861 ucnv_reset(cnv);
3862 {
3863 static const uint8_t source2[]={0x1b,0x24,0x053};
3864 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
3865 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
3866 }
3867 ucnv_close(cnv);
3868 free(uBuf);
3869 free(cBuf);
3870 free(offsets);
3871 }
3872
3873 static void
3874 TestISO_2022_KR_1() {
3875 /* test input */
3876 static const uint16_t in[]={
3877 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
3878 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
3879 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
3880 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
3881 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
3882 ,0x53E3,0x53E4,0x000A,0x000D};
3883 const UChar* uSource;
3884 const UChar* uSourceLimit;
3885 const char* cSource;
3886 const char* cSourceLimit;
3887 UChar *uTargetLimit =NULL;
3888 UChar *uTarget;
3889 char *cTarget;
3890 const char *cTargetLimit;
3891 char *cBuf;
3892 UChar *uBuf,*test;
3893 int32_t uBufSize = 120;
3894 UErrorCode errorCode=U_ZERO_ERROR;
3895 UConverter *cnv;
3896 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3897 int32_t* myOff= offsets;
3898 cnv=ucnv_open("ibm-25546", &errorCode);
3899 if(U_FAILURE(errorCode)) {
3900 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3901 return;
3902 }
3903
3904 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3905 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3906 uSource = (const UChar*)&in[0];
3907 uSourceLimit=(const UChar*)&in[sizeof(in)/2];
3908 cTarget = cBuf;
3909 cTargetLimit = cBuf +uBufSize*5;
3910 uTarget = uBuf;
3911 uTargetLimit = uBuf+ uBufSize*5;
3912 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3913 if(U_FAILURE(errorCode)){
3914 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3915 return;
3916 }
3917 cSource = cBuf;
3918 cSourceLimit =cTarget;
3919 test =uBuf;
3920 myOff=offsets;
3921 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3922 if(U_FAILURE(errorCode)){
3923 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3924 return;
3925 }
3926 uSource = (const UChar*)&in[0];
3927 while(uSource<uSourceLimit){
3928 if(*test!=*uSource){
3929 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
3930 }
3931 uSource++;
3932 test++;
3933 }
3934 ucnv_reset(cnv);
3935 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
3936 TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3937 TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3938 ucnv_reset(cnv);
3939 TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3940 /*Test for the condition where there is an invalid character*/
3941 ucnv_reset(cnv);
3942 {
3943 static const uint8_t source2[]={0x1b,0x24,0x053};
3944 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
3945 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
3946 }
3947 ucnv_close(cnv);
3948 free(uBuf);
3949 free(cBuf);
3950 free(offsets);
3951 }
3952
3953 static void TestJitterbug2411(){
3954 const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
3955 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
3956 UConverter* kr=NULL, *kr1=NULL;
3957 UErrorCode errorCode = U_ZERO_ERROR;
3958 UChar tgt[100]={'\0'};
3959 UChar* target = tgt;
3960 UChar* targetLimit = target+100;
3961 kr=ucnv_open("iso-2022-kr", &errorCode);
3962 if(U_FAILURE(errorCode)) {
3963 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
3964 return;
3965 }
3966 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
3967 if(U_FAILURE(errorCode)) {
3968 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
3969 return;
3970 }
3971 kr1 = ucnv_open("ibm-25546", &errorCode);
3972 if(U_FAILURE(errorCode)) {
3973 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
3974 return;
3975 }
3976 target = tgt;
3977 targetLimit = target+100;
3978 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
3979
3980 if(U_FAILURE(errorCode)) {
3981 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
3982 return;
3983 }
3984
3985 ucnv_close(kr);
3986 ucnv_close(kr1);
3987
3988 }
3989
3990 static void
3991 TestJIS(){
3992 /* From Unicode moved to testdata/conversion.txt */
3993 /*To Unicode*/
3994 {
3995 const uint8_t sampleTextJIS[] = {
3996 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
3997 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
3998 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
3999 };
4000 const uint16_t expectedISO2022JIS[] = {
4001 0x0041, 0x0042,
4002 0xFF81, 0xFF82,
4003 0x3000
4004 };
4005 int32_t toISO2022JISOffs[]={
4006 3,4,
4007 8,9,
4008 16
4009 };
4010
4011 const uint8_t sampleTextJIS7[] = {
4012 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4013 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4014 0x1b,0x24,0x42,0x21,0x21,
4015 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */
4016 0x21,0x22,
4017 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4018 };
4019 const uint16_t expectedISO2022JIS7[] = {
4020 0x0041, 0x0042,
4021 0xFF81, 0xFF82,
4022 0x3000,
4023 0xFF81, 0xFF82,
4024 0x3001,
4025 0x3000
4026 };
4027 int32_t toISO2022JIS7Offs[]={
4028 3,4,
4029 8,9,
4030 13,16,
4031 17,
4032 19,27
4033 };
4034 const uint8_t sampleTextJIS8[] = {
4035 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4036 0xa1,0xc8,0xd9,/*Katakana Set*/
4037 0x1b,0x28,0x42,
4038 0x41,0x42,
4039 0xb1,0xc3, /*Katakana Set*/
4040 0x1b,0x24,0x42,0x21,0x21
4041 };
4042 const uint16_t expectedISO2022JIS8[] = {
4043 0x0041, 0x0042,
4044 0xff61, 0xff88, 0xff99,
4045 0x0041, 0x0042,
4046 0xff71, 0xff83,
4047 0x3000
4048 };
4049 int32_t toISO2022JIS8Offs[]={
4050 3, 4, 5, 6,
4051 7, 11, 12, 13,
4052 14, 18,
4053 };
4054
4055 testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
4056 sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE);
4057 testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
4058 sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE);
4059 testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
4060 sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE);
4061 }
4062
4063 }
4064
4065 static void TestJitterbug915(){
4066 /* tests for roundtripping of the below sequence
4067 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * /
4068 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4069 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4070 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4071 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4072 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4073 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4074 */
4075 static char cSource[]={
4076 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4077 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4078 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4079 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4080 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4081 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4082 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4083 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4084 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4085 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4086 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4087 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4088 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4089 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4090 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4091 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4092 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4093 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4094 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4095 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4096 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4097 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4098 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4099 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4100 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4101 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4102 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4103 0x37, 0x20, 0x2A, 0x2F
4104 };
4105 UChar uTarget[500]={'\0'};
4106 UChar* utarget=uTarget;
4107 UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
4108
4109 char cTarget[500]={'\0'};
4110 char* ctarget=cTarget;
4111 char* ctargetLimit=cTarget+sizeof(cTarget);
4112 const char* csource=cSource;
4113 char* tempSrc = cSource;
4114 UErrorCode err=U_ZERO_ERROR;
4115
4116 UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
4117 if(U_FAILURE(err)) {
4118 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
4119 return;
4120 }
4121 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err);
4122 if(U_FAILURE(err)) {
4123 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
4124 return;
4125 }
4126 utargetLimit=utarget;
4127 utarget = uTarget;
4128 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
4129 if(U_FAILURE(err)) {
4130 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
4131 return;
4132 }
4133 ctargetLimit=ctarget;
4134 ctarget =cTarget;
4135 while(ctarget<ctargetLimit){
4136 if(*ctarget != *tempSrc){
4137 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ;
4138 }
4139 ++ctarget;
4140 ++tempSrc;
4141 }
4142
4143 ucnv_close(conv);
4144 }
4145
4146 static void
4147 TestISO_2022_CN_EXT() {
4148 /* test input */
4149 static const uint16_t in[]={
4150 /* test Non-BMP code points */
4151 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4152 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4153 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4154 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4155 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4156 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4157 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4158 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4159 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4160 0xD869, 0xDED5,
4161
4162 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4163 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4164 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4165 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4166 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4167 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4168 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4169 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4170 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4171 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4172 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4173 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4174 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4175 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4176 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4177 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4178 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4179 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4180
4181 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4182
4183 };
4184
4185 const UChar* uSource;
4186 const UChar* uSourceLimit;
4187 const char* cSource;
4188 const char* cSourceLimit;
4189 UChar *uTargetLimit =NULL;
4190 UChar *uTarget;
4191 char *cTarget;
4192 const char *cTargetLimit;
4193 char *cBuf;
4194 UChar *uBuf,*test;
4195 int32_t uBufSize = 180;
4196 UErrorCode errorCode=U_ZERO_ERROR;
4197 UConverter *cnv;
4198 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4199 int32_t* myOff= offsets;
4200 cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
4201 if(U_FAILURE(errorCode)) {
4202 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4203 return;
4204 }
4205
4206 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4207 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4208 uSource = (const UChar*)&in[0];
4209 uSourceLimit=(const UChar*)&in[sizeof(in)/2];
4210 cTarget = cBuf;
4211 cTargetLimit = cBuf +uBufSize*5;
4212 uTarget = uBuf;
4213 uTargetLimit = uBuf+ uBufSize*5;
4214 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4215 if(U_FAILURE(errorCode)){
4216 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4217 return;
4218 }
4219 cSource = cBuf;
4220 cSourceLimit =cTarget;
4221 test =uBuf;
4222 myOff=offsets;
4223 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4224 if(U_FAILURE(errorCode)){
4225 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4226 return;
4227 }
4228 uSource = (const UChar*)&in[0];
4229 while(uSource<uSourceLimit){
4230 if(*test!=*uSource){
4231 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4232 }
4233 else{
4234 log_verbose(" Got: \\u%04X\n",(int)*test) ;
4235 }
4236 uSource++;
4237 test++;
4238 }
4239 TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
4240 TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
4241 /*Test for the condition where there is an invalid character*/
4242 ucnv_reset(cnv);
4243 {
4244 static const uint8_t source2[]={0x0e,0x24,0x053};
4245 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
4246 }
4247 ucnv_close(cnv);
4248 free(uBuf);
4249 free(cBuf);
4250 free(offsets);
4251 }
4252
4253 static void
4254 TestISO_2022_CN() {
4255 /* test input */
4256 static const uint16_t in[]={
4257 /* jitterbug 951 */
4258 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4259 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4260 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4261 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4262 0x0020, 0x0045, 0x004e, 0x0044,
4263 /**/
4264 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4265 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4266 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4267 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4268 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4269 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4270 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4271 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4272 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4273 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4274 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4275 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4276 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4277 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4278 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4279 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4280 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4281
4282 };
4283 const UChar* uSource;
4284 const UChar* uSourceLimit;
4285 const char* cSource;
4286 const char* cSourceLimit;
4287 UChar *uTargetLimit =NULL;
4288 UChar *uTarget;
4289 char *cTarget;
4290 const char *cTargetLimit;
4291 char *cBuf;
4292 UChar *uBuf,*test;
4293 int32_t uBufSize = 180;
4294 UErrorCode errorCode=U_ZERO_ERROR;
4295 UConverter *cnv;
4296 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4297 int32_t* myOff= offsets;
4298 cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
4299 if(U_FAILURE(errorCode)) {
4300 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4301 return;
4302 }
4303
4304 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4305 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4306 uSource = (const UChar*)&in[0];
4307 uSourceLimit=(const UChar*)&in[sizeof(in)/2];
4308 cTarget = cBuf;
4309 cTargetLimit = cBuf +uBufSize*5;
4310 uTarget = uBuf;
4311 uTargetLimit = uBuf+ uBufSize*5;
4312 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4313 if(U_FAILURE(errorCode)){
4314 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4315 return;
4316 }
4317 cSource = cBuf;
4318 cSourceLimit =cTarget;
4319 test =uBuf;
4320 myOff=offsets;
4321 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4322 if(U_FAILURE(errorCode)){
4323 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4324 return;
4325 }
4326 uSource = (const UChar*)&in[0];
4327 while(uSource<uSourceLimit){
4328 if(*test!=*uSource){
4329 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4330 }
4331 else{
4332 log_verbose(" Got: \\u%04X\n",(int)*test) ;
4333 }
4334 uSource++;
4335 test++;
4336 }
4337 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
4338 TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
4339 TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
4340 TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
4341 TestJitterbug930("csISO2022CN");
4342 /*Test for the condition where there is an invalid character*/
4343 ucnv_reset(cnv);
4344 {
4345 static const uint8_t source2[]={0x0e,0x24,0x053};
4346 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
4347 }
4348
4349 ucnv_close(cnv);
4350 free(uBuf);
4351 free(cBuf);
4352 free(offsets);
4353 }
4354
4355 static void
4356 TestEBCDIC_STATEFUL() {
4357 /* test input */
4358 static const uint8_t in[]={
4359 0x61,
4360 0x1a,
4361 0x0f, 0x4b,
4362 0x42,
4363 0x40,
4364 0x36,
4365 };
4366
4367 /* expected test results */
4368 static const int32_t results[]={
4369 /* number of bytes read, code point */
4370 1, 0x002f,
4371 1, 0x0092,
4372 2, 0x002e,
4373 1, 0xff62,
4374 1, 0x0020,
4375 1, 0x0096,
4376
4377 };
4378 static const uint8_t in2[]={
4379 0x0f,
4380 0xa1,
4381 0x01
4382 };
4383
4384 /* expected test results */
4385 static const int32_t results2[]={
4386 /* number of bytes read, code point */
4387 2, 0x203E,
4388 1, 0x0001,
4389 };
4390
4391 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
4392 UErrorCode errorCode=U_ZERO_ERROR;
4393 UConverter *cnv=ucnv_open("ibm-930", &errorCode);
4394 if(U_FAILURE(errorCode)) {
4395 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
4396 return;
4397 }
4398 TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
4399 ucnv_reset(cnv);
4400 /* Test the condition when source >= sourceLimit */
4401 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
4402 ucnv_reset(cnv);
4403 /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4404 {
4405 static const uint8_t source1[]={0x0f};
4406 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
4407 }
4408 /*Test for the condition where there is an invalid character*/
4409 ucnv_reset(cnv);
4410 {
4411 static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
4412 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
4413 }
4414 ucnv_reset(cnv);
4415 source=(const char*)in2;
4416 limit=(const char*)in2+sizeof(in2);
4417 TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
4418 ucnv_close(cnv);
4419
4420 }
4421
4422 static void
4423 TestGB18030() {
4424 /* test input */
4425 static const uint8_t in[]={
4426 0x24,
4427 0x7f,
4428 0x81, 0x30, 0x81, 0x30,
4429 0xa8, 0xbf,
4430 0xa2, 0xe3,
4431 0xd2, 0xbb,
4432 0x82, 0x35, 0x8f, 0x33,
4433 0x84, 0x31, 0xa4, 0x39,
4434 0x90, 0x30, 0x81, 0x30,
4435 0xe3, 0x32, 0x9a, 0x35
4436 #if 0
4437 /*
4438 * Feature removed markus 2000-oct-26
4439 * Only some codepages must match surrogate pairs into supplementary code points -
4440 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4441 * GB 18030 provides direct encodings for supplementary code points, therefore
4442 * it must not combine two single-encoded surrogates into one code point.
4443 */
4444 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4445 #endif
4446 };
4447
4448 /* expected test results */
4449 static const int32_t results[]={
4450 /* number of bytes read, code point */
4451 1, 0x24,
4452 1, 0x7f,
4453 4, 0x80,
4454 2, 0x1f9,
4455 2, 0x20ac,
4456 2, 0x4e00,
4457 4, 0x9fa6,
4458 4, 0xffff,
4459 4, 0x10000,
4460 4, 0x10ffff
4461 #if 0
4462 /* Feature removed. See comment above. */
4463 8, 0x10000
4464 #endif
4465 };
4466
4467 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4468 UErrorCode errorCode=U_ZERO_ERROR;
4469 UConverter *cnv=ucnv_open("gb18030", &errorCode);
4470 if(U_FAILURE(errorCode)) {
4471 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
4472 return;
4473 }
4474 TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
4475 ucnv_close(cnv);
4476 }
4477
4478 static void
4479 TestLMBCS() {
4480 /* LMBCS-1 string */
4481 static const uint8_t pszLMBCS[]={
4482 0x61,
4483 0x01, 0x29,
4484 0x81,
4485 0xA0,
4486 0x0F, 0x27,
4487 0x0F, 0x91,
4488 0x14, 0x0a, 0x74,
4489 0x14, 0xF6, 0x02,
4490 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4491 0x10, 0x88, 0xA0,
4492 };
4493
4494 /* Unicode UChar32 equivalents */
4495 static const UChar32 pszUnicode32[]={
4496 /* code point */
4497 0x00000061,
4498 0x00002013,
4499 0x000000FC,
4500 0x000000E1,
4501 0x00000007,
4502 0x00000091,
4503 0x00000a74,
4504 0x00000200,
4505 0x00023456, /* code point for surrogate pair */
4506 0x00005516
4507 };
4508
4509 /* Unicode UChar equivalents */
4510 static const UChar pszUnicode[]={
4511 /* code point */
4512 0x0061,
4513 0x2013,
4514 0x00FC,
4515 0x00E1,
4516 0x0007,
4517 0x0091,
4518 0x0a74,
4519 0x0200,
4520 0xD84D, /* low surrogate */
4521 0xDC56, /* high surrogate */
4522 0x5516
4523 };
4524
4525 /* expected test results */
4526 static const int offsets32[]={
4527 /* number of bytes read, code point */
4528 0,
4529 1,
4530 3,
4531 4,
4532 5,
4533 7,
4534 9,
4535 12,
4536 15,
4537 21,
4538 24
4539 };
4540
4541 /* expected test results */
4542 static const int offsets[]={
4543 /* number of bytes read, code point */
4544 0,
4545 1,
4546 3,
4547 4,
4548 5,
4549 7,
4550 9,
4551 12,
4552 15,
4553 18,
4554 21,
4555 24
4556 };
4557
4558
4559 UConverter *cnv;
4560
4561 #define NAME_LMBCS_1 "LMBCS-1"
4562 #define NAME_LMBCS_2 "LMBCS-2"
4563
4564
4565 /* Some basic open/close/property tests on some LMBCS converters */
4566 {
4567
4568 char expected_subchars[] = {0x3F}; /* ANSI Question Mark */
4569 char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/
4570 char get_subchars [1];
4571 const char * get_name;
4572 UConverter *cnv1;
4573 UConverter *cnv2;
4574
4575 int8_t len = sizeof(get_subchars);
4576
4577 UErrorCode errorCode=U_ZERO_ERROR;
4578
4579 /* Open */
4580 cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
4581 if(U_FAILURE(errorCode)) {
4582 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4583 return;
4584 }
4585 cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
4586 if(U_FAILURE(errorCode)) {
4587 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
4588 return;
4589 }
4590
4591 /* Name */
4592 get_name = ucnv_getName (cnv1, &errorCode);
4593 if (strcmp(NAME_LMBCS_1,get_name)){
4594 log_err("Unexpected converter name: %s\n", get_name);
4595 }
4596 get_name = ucnv_getName (cnv2, &errorCode);
4597 if (strcmp(NAME_LMBCS_2,get_name)){
4598 log_err("Unexpected converter name: %s\n", get_name);
4599 }
4600
4601 /* substitution chars */
4602 ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
4603 if(U_FAILURE(errorCode)) {
4604 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4605 }
4606 if (len!=1){
4607 log_err("Unexpected length of sub chars\n");
4608 }
4609 if (get_subchars[0] != expected_subchars[0]){
4610 log_err("Unexpected value of sub chars\n");
4611 }
4612 ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
4613 if(U_FAILURE(errorCode)) {
4614 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
4615 }
4616 ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
4617 if(U_FAILURE(errorCode)) {
4618 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4619 }
4620 if (len!=1){
4621 log_err("Unexpected length of sub chars\n");
4622 }
4623 if (get_subchars[0] != new_subchars[0]){
4624 log_err("Unexpected value of sub chars\n");
4625 }
4626 ucnv_close(cnv1);
4627 ucnv_close(cnv2);
4628
4629 }
4630
4631 /* LMBCS to Unicode - offsets */
4632 {
4633 UErrorCode errorCode=U_ZERO_ERROR;
4634
4635 const uint8_t * pSource = pszLMBCS;
4636 const uint8_t * sourceLimit = pszLMBCS + sizeof(pszLMBCS);
4637
4638 UChar Out [sizeof(pszUnicode) + 1];
4639 UChar * pOut = Out;
4640 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
4641
4642 int32_t off [sizeof(offsets)];
4643
4644 /* last 'offset' in expected results is just the final size.
4645 (Makes other tests easier). Compensate here: */
4646
4647 off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS);
4648
4649
4650
4651 cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */
4652 if(U_FAILURE(errorCode)) {
4653 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
4654 return;
4655 }
4656
4657
4658
4659 ucnv_toUnicode (cnv,
4660 &pOut,
4661 OutLimit,
4662 (const char **)&pSource,
4663 (const char *)sourceLimit,
4664 off,
4665 TRUE,
4666 &errorCode);
4667
4668
4669 if (memcmp(off,offsets,sizeof(offsets)))
4670 {
4671 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4672 }
4673 if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
4674 {
4675 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4676 }
4677 ucnv_close(cnv);
4678 }
4679 {
4680 /* LMBCS to Unicode - getNextUChar */
4681 const char * sourceStart;
4682 const char *source=(const char *)pszLMBCS;
4683 const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
4684 const UChar32 *results= pszUnicode32;
4685 const int *off = offsets32;
4686
4687 UErrorCode errorCode=U_ZERO_ERROR;
4688 UChar32 uniChar;
4689
4690 cnv=ucnv_open("LMBCS-1", &errorCode);
4691 if(U_FAILURE(errorCode)) {
4692 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4693 return;
4694 }
4695 else
4696 {
4697
4698 while(source<limit) {
4699 sourceStart=source;
4700 uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
4701 if(U_FAILURE(errorCode)) {
4702 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
4703 break;
4704 } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
4705 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
4706 uniChar, (source-sourceStart), *results, *off);
4707 break;
4708 }
4709 results++;
4710 off++;
4711 }
4712 }
4713 ucnv_close(cnv);
4714 }
4715 { /* test locale & optimization group operations: Unicode to LMBCS */
4716
4717 UErrorCode errorCode=U_ZERO_ERROR;
4718 UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
4719 UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
4720 UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
4721 UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */
4722 const UChar * pUniOut = uniString;
4723 UChar * pUniIn = uniString;
4724 uint8_t lmbcsString [4];
4725 const uint8_t * pLMBCSOut = lmbcsString;
4726 uint8_t * pLMBCSIn = lmbcsString;
4727
4728 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
4729 ucnv_fromUnicode (cnv16he,
4730 (char **)&pLMBCSIn, (const char *)(pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
4731 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
4732 NULL, 1, &errorCode);
4733
4734 if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
4735 {
4736 log_err("LMBCS-16,locale=he gives unexpected translation\n");
4737 }
4738
4739 pLMBCSIn=lmbcsString;
4740 pUniOut = uniString;
4741 ucnv_fromUnicode (cnv01us,
4742 (char **)&pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
4743 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
4744 NULL, 1, &errorCode);
4745
4746 if (lmbcsString[0] != 0x9F)
4747 {
4748 log_err("LMBCS-1,locale=US gives unexpected translation\n");
4749 }
4750
4751 /* single byte char from mbcs char set */
4752 lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */
4753 pLMBCSOut = lmbcsString;
4754 pUniIn = uniString;
4755 ucnv_toUnicode (cnv16jp,
4756 &pUniIn, pUniIn + 1,
4757 (const char **)&pLMBCSOut, (const char *)(pLMBCSOut + 1),
4758 NULL, 1, &errorCode);
4759 if (U_FAILURE(errorCode) || pLMBCSOut != lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
4760 {
4761 log_err("Unexpected results from LMBCS-16 single byte char\n");
4762 }
4763 /* convert to group 1: should be 3 bytes */
4764 pLMBCSIn = lmbcsString;
4765 pUniOut = uniString;
4766 ucnv_fromUnicode (cnv01us,
4767 (char **)&pLMBCSIn, (const char *)(pLMBCSIn + 3),
4768 &pUniOut, pUniOut + 1,
4769 NULL, 1, &errorCode);
4770 if (U_FAILURE(errorCode) || pLMBCSIn != lmbcsString+3 || pUniOut != uniString+1
4771 || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
4772 {
4773 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
4774 }
4775 pLMBCSOut = lmbcsString;
4776 pUniIn = uniString;
4777 ucnv_toUnicode (cnv01us,
4778 &pUniIn, pUniIn + 1,
4779 (const char **)&pLMBCSOut, (const char *)(pLMBCSOut + 3),
4780 NULL, 1, &errorCode);
4781 if (U_FAILURE(errorCode) || pLMBCSOut != lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
4782 {
4783 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
4784 }
4785 pLMBCSIn = lmbcsString;
4786 pUniOut = uniString;
4787 ucnv_fromUnicode (cnv16jp,
4788 (char **)&pLMBCSIn, (const char *)(pLMBCSIn + 1),
4789 &pUniOut, pUniOut + 1,
4790 NULL, 1, &errorCode);
4791 if (U_FAILURE(errorCode) || pLMBCSIn != lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
4792 {
4793 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
4794 }
4795 ucnv_close(cnv16he);
4796 ucnv_close(cnv16jp);
4797 ucnv_close(cnv01us);
4798 }
4799 {
4800 /* Small source buffer testing, LMBCS -> Unicode */
4801
4802 UErrorCode errorCode=U_ZERO_ERROR;
4803
4804 const uint8_t * pSource = pszLMBCS;
4805 const uint8_t * sourceLimit = pszLMBCS + sizeof(pszLMBCS);
4806 int codepointCount = 0;
4807
4808 UChar Out [sizeof(pszUnicode) + 1];
4809 UChar * pOut = Out;
4810 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
4811
4812
4813 cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
4814 if(U_FAILURE(errorCode)) {
4815 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4816 return;
4817 }
4818
4819
4820 while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
4821 {
4822 ucnv_toUnicode (cnv,
4823 &pOut,
4824 OutLimit,
4825 (const char **)&pSource,
4826 (const char *)(pSource+1), /* claim that this is a 1- byte buffer */
4827 NULL,
4828 FALSE, /* FALSE means there might be more chars in the next buffer */
4829 &errorCode);
4830
4831 if (U_SUCCESS (errorCode))
4832 {
4833 if ((pSource - (const uint8_t *)pszLMBCS) == offsets [codepointCount+1])
4834 {
4835 /* we are on to the next code point: check value */
4836
4837 if (Out[0] != pszUnicode[codepointCount]){
4838 log_err("LMBCS->Uni result %lx should have been %lx \n",
4839 Out[0], pszUnicode[codepointCount]);
4840 }
4841
4842 pOut = Out; /* reset for accumulating next code point */
4843 codepointCount++;
4844 }
4845 }
4846 else
4847 {
4848 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
4849 }
4850 }
4851 {
4852 /* limits & surrogate error testing */
4853 uint8_t LIn [sizeof(pszLMBCS)];
4854 const uint8_t * pLIn = LIn;
4855
4856 char LOut [sizeof(pszLMBCS)];
4857 char * pLOut = LOut;
4858
4859 UChar UOut [sizeof(pszUnicode)];
4860 UChar * pUOut = UOut;
4861
4862 UChar UIn [sizeof(pszUnicode)];
4863 const UChar * pUIn = UIn;
4864
4865 int32_t off [sizeof(offsets)];
4866 UChar32 uniChar;
4867
4868 errorCode=U_ZERO_ERROR;
4869
4870 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
4871 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn-1,off,FALSE, &errorCode);
4872 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
4873 {
4874 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
4875 }
4876 errorCode=U_ZERO_ERROR;
4877 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode);
4878 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
4879 {
4880 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
4881 }
4882 errorCode=U_ZERO_ERROR;
4883
4884 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
4885 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
4886 {
4887 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
4888 }
4889 errorCode=U_ZERO_ERROR;
4890
4891 /* 0 byte source request - no error, no pointer movement */
4892 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode);
4893 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode);
4894 if(U_FAILURE(errorCode)) {
4895 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
4896 }
4897 if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
4898 {
4899 log_err("Unexpected pointer move in 0 byte source request \n");
4900 }
4901 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
4902 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
4903 if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
4904 {
4905 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
4906 }
4907 if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
4908 {
4909 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
4910 }
4911 errorCode = U_ZERO_ERROR;
4912
4913 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
4914
4915 pUIn = pszUnicode;
4916 ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode);
4917 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
4918 {
4919 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
4920 }
4921
4922 errorCode = U_ZERO_ERROR;
4923
4924 pLIn = pszLMBCS;
4925 ucnv_toUnicode(cnv, &pUOut,pUOut+4,(const char **)&pLIn,(const char *)(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode);
4926 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const uint8_t *)pszLMBCS+offsets[4])
4927 {
4928 log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
4929 }
4930
4931 /* unpaired or chopped LMBCS surrogates */
4932
4933 /* OK high surrogate, Low surrogate is chopped */
4934 LIn [0] = 0x14;
4935 LIn [1] = 0xD8;
4936 LIn [2] = 0x01;
4937 LIn [3] = 0x14;
4938 LIn [4] = 0xDC;
4939 pLIn = LIn;
4940 errorCode = U_ZERO_ERROR;
4941 pUOut = UOut;
4942
4943 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4944 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
4945 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
4946 {
4947 log_err("Unexpected results on chopped low surrogate\n");
4948 }
4949
4950 /* chopped at surrogate boundary */
4951 LIn [0] = 0x14;
4952 LIn [1] = 0xD8;
4953 LIn [2] = 0x01;
4954 pLIn = LIn;
4955 errorCode = U_ZERO_ERROR;
4956 pUOut = UOut;
4957
4958 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode);
4959 if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
4960 {
4961 log_err("Unexpected results on chopped at surrogate boundary \n");
4962 }
4963
4964 /* unpaired surrogate plus valid Unichar */
4965 LIn [0] = 0x14;
4966 LIn [1] = 0xD8;
4967 LIn [2] = 0x01;
4968 LIn [3] = 0x14;
4969 LIn [4] = 0xC9;
4970 LIn [5] = 0xD0;
4971 pLIn = LIn;
4972 errorCode = U_ZERO_ERROR;
4973 pUOut = UOut;
4974
4975 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode);
4976 if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
4977 {
4978 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
4979 }
4980
4981 /* unpaired surrogate plus chopped Unichar */
4982 LIn [0] = 0x14;
4983 LIn [1] = 0xD8;
4984 LIn [2] = 0x01;
4985 LIn [3] = 0x14;
4986 LIn [4] = 0xC9;
4987
4988 pLIn = LIn;
4989 errorCode = U_ZERO_ERROR;
4990 pUOut = UOut;
4991
4992 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
4993 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
4994 {
4995 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
4996 }
4997
4998 /* unpaired surrogate plus valid non-Unichar */
4999 LIn [0] = 0x14;
5000 LIn [1] = 0xD8;
5001 LIn [2] = 0x01;
5002 LIn [3] = 0x0F;
5003 LIn [4] = 0x3B;
5004
5005 pLIn = LIn;
5006 errorCode = U_ZERO_ERROR;
5007 pUOut = UOut;
5008
5009 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5010 if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
5011 {
5012 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5013 }
5014
5015 /* unpaired surrogate plus chopped non-Unichar */
5016 LIn [0] = 0x14;
5017 LIn [1] = 0xD8;
5018 LIn [2] = 0x01;
5019 LIn [3] = 0x0F;
5020
5021 pLIn = LIn;
5022 errorCode = U_ZERO_ERROR;
5023 pUOut = UOut;
5024
5025 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode);
5026
5027 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
5028 {
5029 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5030 }
5031 }
5032 }
5033 ucnv_close(cnv); /* final cleanup */
5034 }
5035
5036
5037 static void TestJitterbug255()
5038 {
5039 const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5040 const uint8_t *testBuffer = testBytes;
5041 const uint8_t *testEnd = testBytes + sizeof(testBytes);
5042 UErrorCode status = U_ZERO_ERROR;
5043 UChar32 result;
5044 UConverter *cnv = 0;
5045
5046 cnv = ucnv_open("shift-jis", &status);
5047 if (U_FAILURE(status) || cnv == 0) {
5048 log_data_err("Failed to open the converter for SJIS.\n");
5049 return;
5050 }
5051 while (testBuffer != testEnd)
5052 {
5053 result = ucnv_getNextUChar (cnv, (const char **)&testBuffer, (const char *)testEnd , &status);
5054 if (U_FAILURE(status))
5055 {
5056 log_err("Failed to convert the next UChar for SJIS.\n");
5057 break;
5058 }
5059 }
5060 ucnv_close(cnv);
5061 }
5062
5063 static void TestEBCDICUS4XML()
5064 {
5065 UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
5066 static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
5067 static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
5068 static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
5069 char target_x[] = {0x00, 0x00, 0x00, 0x00};
5070 UChar *unicodes = unicodes_x;
5071 const UChar *toUnicodeMaps = toUnicodeMaps_x;
5072 char *target = target_x;
5073 const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
5074 UErrorCode status = U_ZERO_ERROR;
5075 UConverter *cnv = 0;
5076
5077 cnv = ucnv_open("ebcdic-xml-us", &status);
5078 if (U_FAILURE(status) || cnv == 0) {
5079 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5080 return;
5081 }
5082 ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status);
5083 if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
5084 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5085 u_errorName(status));
5086 printUSeqErr(unicodes_x, 3);
5087 printUSeqErr(toUnicodeMaps, 3);
5088 }
5089 status = U_ZERO_ERROR;
5090 ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status);
5091 if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
5092 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5093 u_errorName(status));
5094 printSeqErr((const unsigned char*)target_x, 3);
5095 printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
5096 }
5097 ucnv_close(cnv);
5098 }
5099
5100 #if !UCONFIG_NO_COLLATION
5101
5102 static void TestJitterbug981(){
5103 const UChar* rules;
5104 int32_t rules_length, target_cap, bytes_needed, buff_size;
5105 UErrorCode status = U_ZERO_ERROR;
5106 UConverter *utf8cnv;
5107 UCollator* myCollator;
5108 char *buff;
5109 int numNeeded=0;
5110 utf8cnv = ucnv_open ("utf8", &status);
5111 if(U_FAILURE(status)){
5112 log_err("Could not open UTF-8 converter. Error: %s", u_errorName(status));
5113 return;
5114 }
5115 myCollator = ucol_open("zh", &status);
5116 if(U_FAILURE(status)){
5117 log_err("Could not open collator for zh locale. Error: %s", u_errorName(status));
5118 return;
5119 }
5120
5121 rules = ucol_getRules(myCollator, &rules_length);
5122 buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv);
5123 buff = malloc(buff_size);
5124
5125 target_cap = 0;
5126 do {
5127 ucnv_reset(utf8cnv);
5128 status = U_ZERO_ERROR;
5129 if(target_cap >= buff_size) {
5130 log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
5131 return;
5132 }
5133 bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
5134 rules, rules_length, &status);
5135 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5136 if(numNeeded!=0 && numNeeded!= bytes_needed){
5137 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5138 }
5139 numNeeded = bytes_needed;
5140 } while (status == U_BUFFER_OVERFLOW_ERROR);
5141 ucol_close(myCollator);
5142 ucnv_close(utf8cnv);
5143 free(buff);
5144 }
5145
5146 #endif
5147
5148 static void TestJitterbug1293(){
5149 UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5150 char target[256];
5151 UErrorCode status = U_ZERO_ERROR;
5152 UConverter* conv=NULL;
5153 int32_t target_cap, bytes_needed, numNeeded = 0;
5154 conv = ucnv_open("shift-jis",&status);
5155 if(U_FAILURE(status)){
5156 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status));
5157 return;
5158 }
5159
5160 do{
5161 target_cap =0;
5162 bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status);
5163 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5164 if(numNeeded!=0 && numNeeded!= bytes_needed){
5165 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5166 }
5167 numNeeded = bytes_needed;
5168 } while (status == U_BUFFER_OVERFLOW_ERROR);
5169 if(U_FAILURE(status)){
5170 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status));
5171 return;
5172 }
5173 ucnv_close(conv);
5174 }
5175
5176 #endif