]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/nucnvtst.c
ICU-59173.0.1.tar.gz
[apple/icu.git] / icuSources / test / cintltst / nucnvtst.c
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /*******************************************************************************
9 *
10 * File nucnvtst.c
11 *
12 * Modification History:
13 * Name Description
14 * Steven R. Loomis 7/8/1999 Adding input buffer test
15 ********************************************************************************
16 */
17 #include <stdio.h>
18 #include "cstring.h"
19 #include "unicode/uloc.h"
20 #include "unicode/ucnv.h"
21 #include "unicode/ucnv_err.h"
22 #include "unicode/ucnv_cb.h"
23 #include "cintltst.h"
24 #include "unicode/utypes.h"
25 #include "unicode/ustring.h"
26 #include "unicode/ucol.h"
27 #include "unicode/utf16.h"
28 #include "cmemory.h"
29 #include "nucnvtst.h"
30
31 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
32 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
33 #if !UCONFIG_NO_COLLATION
34 static void TestJitterbug981(void);
35 #endif
36 #if !UCONFIG_NO_LEGACY_CONVERSION
37 static void TestJitterbug1293(void);
38 #endif
39 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
40 static void TestConverterTypesAndStarters(void);
41 static void TestAmbiguous(void);
42 static void TestSignatureDetection(void);
43 static void TestUTF7(void);
44 static void TestIMAP(void);
45 static void TestUTF8(void);
46 static void TestCESU8(void);
47 static void TestUTF16(void);
48 static void TestUTF16BE(void);
49 static void TestUTF16LE(void);
50 static void TestUTF32(void);
51 static void TestUTF32BE(void);
52 static void TestUTF32LE(void);
53 static void TestLATIN1(void);
54
55 #if !UCONFIG_NO_LEGACY_CONVERSION
56 static void TestSBCS(void);
57 static void TestDBCS(void);
58 static void TestMBCS(void);
59 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
60 static void TestICCRunout(void);
61 #endif
62
63 #ifdef U_ENABLE_GENERIC_ISO_2022
64 static void TestISO_2022(void);
65 #endif
66
67 static void TestISO_2022_JP(void);
68 static void TestISO_2022_JP_1(void);
69 static void TestISO_2022_JP_2(void);
70 static void TestISO_2022_KR(void);
71 static void TestISO_2022_KR_1(void);
72 static void TestISO_2022_CN(void);
73 #if 0
74 /*
75 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
76 */
77 static void TestISO_2022_CN_EXT(void);
78 #endif
79 static void TestJIS(void);
80 static void TestHZ(void);
81 #endif
82
83 static void TestSCSU(void);
84
85 #if !UCONFIG_NO_LEGACY_CONVERSION
86 static void TestEBCDIC_STATEFUL(void);
87 static void TestGB18030(void);
88 static void TestLMBCS(void);
89 static void TestJitterbug255(void);
90 static void TestEBCDICUS4XML(void);
91 #if 0
92 /*
93 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
94 */
95 static void TestJitterbug915(void);
96 #endif
97 static void TestISCII(void);
98
99 static void TestCoverageMBCS(void);
100 static void TestJitterbug2346(void);
101 static void TestJitterbug2411(void);
102 static void TestJB5275(void);
103 static void TestJB5275_1(void);
104 static void TestJitterbug6175(void);
105
106 static void TestIsFixedWidth(void);
107 #endif
108
109 static void TestInBufSizes(void);
110
111 static void TestRoundTrippingAllUTF(void);
112 static void TestConv(const uint16_t in[],
113 int len,
114 const char* conv,
115 const char* lang,
116 char byteArr[],
117 int byteArrLen);
118
119 /* open a converter, using test data if it begins with '@' */
120 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
121
122
123 #define NEW_MAX_BUFFER 999
124
125 static int32_t gInBufferSize = NEW_MAX_BUFFER;
126 static int32_t gOutBufferSize = NEW_MAX_BUFFER;
127 static char gNuConvTestName[1024];
128
129 #define nct_min(x,y) ((x<y) ? x : y)
130
131 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
132 {
133 if(cnv && cnv[0] == '@') {
134 return ucnv_openPackage(loadTestData(err), cnv+1, err);
135 } else {
136 return ucnv_open(cnv, err);
137 }
138 }
139
140 static void printSeq(const unsigned char* a, int len)
141 {
142 int i=0;
143 log_verbose("{");
144 while (i<len)
145 log_verbose("0x%02x ", a[i++]);
146 log_verbose("}\n");
147 }
148
149 static void printUSeq(const UChar* a, int len)
150 {
151 int i=0;
152 log_verbose("{U+");
153 while (i<len) log_verbose("0x%04x ", a[i++]);
154 log_verbose("}\n");
155 }
156
157 static void printSeqErr(const unsigned char* a, int len)
158 {
159 int i=0;
160 fprintf(stderr, "{");
161 while (i<len)
162 fprintf(stderr, "0x%02x ", a[i++]);
163 fprintf(stderr, "}\n");
164 }
165
166 static void printUSeqErr(const UChar* a, int len)
167 {
168 int i=0;
169 fprintf(stderr, "{U+");
170 while (i<len)
171 fprintf(stderr, "0x%04x ", a[i++]);
172 fprintf(stderr,"}\n");
173 }
174
175 static void
176 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
177 {
178 const char* s0;
179 const char* s=(char*)source;
180 const int32_t *r=results;
181 UErrorCode errorCode=U_ZERO_ERROR;
182 UChar32 c;
183
184 while(s<limit) {
185 s0=s;
186 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
187 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
188 break; /* no more significant input */
189 } else if(U_FAILURE(errorCode)) {
190 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
191 break;
192 } else if(
193 /* test the expected number of input bytes only if >=0 */
194 (*r>=0 && (int32_t)(s-s0)!=*r) ||
195 c!=*(r+1)
196 ) {
197 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
198 message, c, (s-s0), *(r+1), *r);
199 break;
200 }
201 r+=2;
202 }
203 }
204
205 static void
206 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
207 {
208 const char* s=(char*)source;
209 UErrorCode errorCode=U_ZERO_ERROR;
210 uint32_t c;
211 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
212 if(errorCode != expected){
213 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
214 }
215 if(c != 0xFFFD && c != 0xffff){
216 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
217 }
218
219 }
220
221 static void TestInBufSizes(void)
222 {
223 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
224 #if 1
225 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
226 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
227 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
228 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
229 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
230 TestNewConvertWithBufferSizes(1,1);
231 TestNewConvertWithBufferSizes(2,3);
232 TestNewConvertWithBufferSizes(3,2);
233 #endif
234 }
235
236 static void TestOutBufSizes(void)
237 {
238 #if 1
239 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
240 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
241 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
242 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
243 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
244 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
245
246 #endif
247 }
248
249
250 void addTestNewConvert(TestNode** root)
251 {
252 #if !UCONFIG_NO_FILE_IO
253 addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
254 addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
255 #endif
256 addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
257 addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
258 addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
259 addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
260 addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
261 addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
262
263 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
264 addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
265 addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
266 addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
267 addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
268 addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
269 addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
270 addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
271
272 #if !UCONFIG_NO_LEGACY_CONVERSION
273 addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
274 #endif
275
276 addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
277
278 #if !UCONFIG_NO_LEGACY_CONVERSION
279 addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
280 #if !UCONFIG_NO_FILE_IO
281 addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
282 addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout");
283 #endif
284 addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
285
286 #ifdef U_ENABLE_GENERIC_ISO_2022
287 addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
288 #endif
289
290 addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
291 addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
292 addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
293 addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
294 addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
295 addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
296 addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
297 /*
298 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
299 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
300 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
301 */
302 addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
303 #endif
304
305 addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
306
307 #if !UCONFIG_NO_LEGACY_CONVERSION
308 addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
309 addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
310 addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
311 addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
312 addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
313 addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275");
314 addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1");
315 #if !UCONFIG_NO_COLLATION
316 addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
317 #endif
318
319 addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
320 #endif
321
322
323 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
324 addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
325 #endif
326
327 addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
328
329 #if !UCONFIG_NO_LEGACY_CONVERSION
330 addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
331 addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
332 addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175");
333
334 addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth");
335 #endif
336 }
337
338
339 /* Note that this test already makes use of statics, so it's not really
340 multithread safe.
341 This convenience function lets us make the error messages actually useful.
342 */
343
344 static void setNuConvTestName(const char *codepage, const char *direction)
345 {
346 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
347 codepage,
348 direction,
349 (int)gInBufferSize,
350 (int)gOutBufferSize);
351 }
352
353 typedef enum
354 {
355 TC_OK = 0, /* test was OK */
356 TC_MISMATCH = 1, /* Match failed - err was printed */
357 TC_FAIL = 2 /* Test failed, don't print an err because it was already printed. */
358 } ETestConvertResult;
359
360 /* Note: This function uses global variables and it will not do offset
361 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
362 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
363 const char *codepage, const int32_t *expectOffsets , UBool useFallback)
364 {
365 UErrorCode status = U_ZERO_ERROR;
366 UConverter *conv = 0;
367 char junkout[NEW_MAX_BUFFER]; /* FIX */
368 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
369 char *p;
370 const UChar *src;
371 char *end;
372 char *targ;
373 int32_t *offs;
374 int i;
375 int32_t realBufferSize;
376 char *realBufferEnd;
377 const UChar *realSourceEnd;
378 const UChar *sourceLimit;
379 UBool checkOffsets = TRUE;
380 UBool doFlush;
381
382 for(i=0;i<NEW_MAX_BUFFER;i++)
383 junkout[i] = (char)0xF0;
384 for(i=0;i<NEW_MAX_BUFFER;i++)
385 junokout[i] = 0xFF;
386
387 setNuConvTestName(codepage, "FROM");
388
389 log_verbose("\n========= %s\n", gNuConvTestName);
390
391 conv = my_ucnv_open(codepage, &status);
392
393 if(U_FAILURE(status))
394 {
395 log_data_err("Couldn't open converter %s\n",codepage);
396 return TC_FAIL;
397 }
398 if(useFallback){
399 ucnv_setFallback(conv,useFallback);
400 }
401
402 log_verbose("Converter opened..\n");
403
404 src = source;
405 targ = junkout;
406 offs = junokout;
407
408 realBufferSize = UPRV_LENGTHOF(junkout);
409 realBufferEnd = junkout + realBufferSize;
410 realSourceEnd = source + sourceLen;
411
412 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
413 checkOffsets = FALSE;
414
415 do
416 {
417 end = nct_min(targ + gOutBufferSize, realBufferEnd);
418 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
419
420 doFlush = (UBool)(sourceLimit == realSourceEnd);
421
422 if(targ == realBufferEnd) {
423 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
424 return TC_FAIL;
425 }
426 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
427
428
429 status = U_ZERO_ERROR;
430
431 ucnv_fromUnicode (conv,
432 &targ,
433 end,
434 &src,
435 sourceLimit,
436 checkOffsets ? offs : NULL,
437 doFlush, /* flush if we're at the end of the input data */
438 &status);
439 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
440
441 if(U_FAILURE(status)) {
442 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
443 return TC_FAIL;
444 }
445
446 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
447 sourceLen, targ-junkout);
448
449 if(getTestOption(VERBOSITY_OPTION))
450 {
451 char junk[9999];
452 char offset_str[9999];
453 char *ptr;
454
455 junk[0] = 0;
456 offset_str[0] = 0;
457 for(ptr = junkout;ptr<targ;ptr++) {
458 sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
459 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
460 }
461
462 log_verbose(junk);
463 printSeq((const uint8_t *)expect, expectLen);
464 if ( checkOffsets ) {
465 log_verbose("\nOffsets:");
466 log_verbose(offset_str);
467 }
468 log_verbose("\n");
469 }
470 ucnv_close(conv);
471
472 if(expectLen != targ-junkout) {
473 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
474 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
475 fprintf(stderr, "Got:\n");
476 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
477 fprintf(stderr, "Expected:\n");
478 printSeqErr((const unsigned char*)expect, expectLen);
479 return TC_MISMATCH;
480 }
481
482 if (checkOffsets && (expectOffsets != 0) ) {
483 log_verbose("comparing %d offsets..\n", targ-junkout);
484 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
485 log_err("did not get the expected offsets. %s\n", gNuConvTestName);
486 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
487 log_err("\n");
488 log_err("Got : ");
489 for(p=junkout;p<targ;p++) {
490 log_err("%d,", junokout[p-junkout]);
491 }
492 log_err("\n");
493 log_err("Expected: ");
494 for(i=0; i<(targ-junkout); i++) {
495 log_err("%d,", expectOffsets[i]);
496 }
497 log_err("\n");
498 }
499 }
500
501 log_verbose("comparing..\n");
502 if(!memcmp(junkout, expect, expectLen)) {
503 log_verbose("Matches!\n");
504 return TC_OK;
505 } else {
506 log_err("String does not match u->%s\n", gNuConvTestName);
507 printUSeqErr(source, sourceLen);
508 fprintf(stderr, "Got:\n");
509 printSeqErr((const unsigned char *)junkout, expectLen);
510 fprintf(stderr, "Expected:\n");
511 printSeqErr((const unsigned char *)expect, expectLen);
512
513 return TC_MISMATCH;
514 }
515 }
516
517 /* Note: This function uses global variables and it will not do offset
518 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
519 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
520 const char *codepage, const int32_t *expectOffsets, UBool useFallback)
521 {
522 UErrorCode status = U_ZERO_ERROR;
523 UConverter *conv = 0;
524 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
525 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
526 const char *src;
527 const char *realSourceEnd;
528 const char *srcLimit;
529 UChar *p;
530 UChar *targ;
531 UChar *end;
532 int32_t *offs;
533 int i;
534 UBool checkOffsets = TRUE;
535
536 int32_t realBufferSize;
537 UChar *realBufferEnd;
538
539
540 for(i=0;i<NEW_MAX_BUFFER;i++)
541 junkout[i] = 0xFFFE;
542
543 for(i=0;i<NEW_MAX_BUFFER;i++)
544 junokout[i] = -1;
545
546 setNuConvTestName(codepage, "TO");
547
548 log_verbose("\n========= %s\n", gNuConvTestName);
549
550 conv = my_ucnv_open(codepage, &status);
551
552 if(U_FAILURE(status))
553 {
554 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
555 return TC_FAIL;
556 }
557 if(useFallback){
558 ucnv_setFallback(conv,useFallback);
559 }
560 log_verbose("Converter opened..\n");
561
562 src = (const char *)source;
563 targ = junkout;
564 offs = junokout;
565
566 realBufferSize = UPRV_LENGTHOF(junkout);
567 realBufferEnd = junkout + realBufferSize;
568 realSourceEnd = src + sourcelen;
569
570 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
571 checkOffsets = FALSE;
572
573 do
574 {
575 end = nct_min( targ + gOutBufferSize, realBufferEnd);
576 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
577
578 if(targ == realBufferEnd)
579 {
580 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
581 return TC_FAIL;
582 }
583 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
584
585 /* oldTarg = targ; */
586
587 status = U_ZERO_ERROR;
588
589 ucnv_toUnicode (conv,
590 &targ,
591 end,
592 &src,
593 srcLimit,
594 checkOffsets ? offs : NULL,
595 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */
596 &status);
597
598 /* offs += (targ-oldTarg); */
599
600 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
601
602 if(U_FAILURE(status))
603 {
604 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
605 return TC_FAIL;
606 }
607
608 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
609 sourcelen, targ-junkout);
610 if(getTestOption(VERBOSITY_OPTION))
611 {
612 char junk[9999];
613 char offset_str[9999];
614 UChar *ptr;
615
616 junk[0] = 0;
617 offset_str[0] = 0;
618
619 for(ptr = junkout;ptr<targ;ptr++)
620 {
621 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
622 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
623 }
624
625 log_verbose(junk);
626 printUSeq(expect, expectlen);
627 if ( checkOffsets )
628 {
629 log_verbose("\nOffsets:");
630 log_verbose(offset_str);
631 }
632 log_verbose("\n");
633 }
634 ucnv_close(conv);
635
636 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
637
638 if (checkOffsets && (expectOffsets != 0))
639 {
640 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
641 log_err("did not get the expected offsets. %s\n",gNuConvTestName);
642 log_err("Got: ");
643 for(p=junkout;p<targ;p++) {
644 log_err("%d,", junokout[p-junkout]);
645 }
646 log_err("\n");
647 log_err("Expected: ");
648 for(i=0; i<(targ-junkout); i++) {
649 log_err("%d,", expectOffsets[i]);
650 }
651 log_err("\n");
652 log_err("output: ");
653 for(i=0; i<(targ-junkout); i++) {
654 log_err("%X,", junkout[i]);
655 }
656 log_err("\n");
657 log_err("input: ");
658 for(i=0; i<(src-(const char *)source); i++) {
659 log_err("%X,", (unsigned char)source[i]);
660 }
661 log_err("\n");
662 }
663 }
664
665 if(!memcmp(junkout, expect, expectlen*2))
666 {
667 log_verbose("Matches!\n");
668 return TC_OK;
669 }
670 else
671 {
672 log_err("String does not match. %s\n", gNuConvTestName);
673 log_verbose("String does not match. %s\n", gNuConvTestName);
674 printf("\nGot:");
675 printUSeqErr(junkout, expectlen);
676 printf("\nExpected:");
677 printUSeqErr(expect, expectlen);
678 return TC_MISMATCH;
679 }
680 }
681
682
683 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
684 {
685 /** test chars #1 */
686 /* 1 2 3 1Han 2Han 3Han . */
687 static const UChar sampleText[] =
688 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 };
689 static const UChar sampleTextRoundTripUnmappable[] =
690 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd };
691
692
693 static const uint8_t expectedUTF8[] =
694 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 };
695 static const int32_t toUTF8Offs[] =
696 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 };
697 static const int32_t fmUTF8Offs[] =
698 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e };
699
700 #ifdef U_ENABLE_GENERIC_ISO_2022
701 /* Same as UTF8, but with ^[%B preceeding */
702 static const const uint8_t expectedISO2022[] =
703 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
704 static const int32_t toISO2022Offs[] =
705 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
706 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
707 static const int32_t fmISO2022Offs[] =
708 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
709 #endif
710
711 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
712 static const uint8_t expectedIBM930[] =
713 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f };
714 static const int32_t toIBM930Offs[] =
715 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 };
716 static const int32_t fmIBM930Offs[] =
717 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e };
718
719 /* 1 2 3 0 h1 h2 h3 . MBCS*/
720 static const uint8_t expectedIBM943[] =
721 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc };
722 static const int32_t toIBM943Offs [] =
723 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 };
724 static const int32_t fmIBM943Offs[] =
725 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b };
726
727 /* 1 2 3 0 h1 h2 h3 . DBCS*/
728 static const uint8_t expectedIBM9027[] =
729 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe };
730 static const int32_t toIBM9027Offs [] =
731 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 };
732
733 /* 1 2 3 0 <?> <?> <?> . SBCS*/
734 static const uint8_t expectedIBM920[] =
735 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a };
736 static const int32_t toIBM920Offs [] =
737 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
738
739 /* 1 2 3 0 <?> <?> <?> . SBCS*/
740 static const uint8_t expectedISO88593[] =
741 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
742 static const int32_t toISO88593Offs[] =
743 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
744
745 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
746 static const uint8_t expectedLATIN1[] =
747 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
748 static const int32_t toLATIN1Offs[] =
749 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
750
751
752 /* etc */
753 static const uint8_t expectedUTF16BE[] =
754 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 };
755 static const int32_t toUTF16BEOffs[]=
756 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
757 static const int32_t fmUTF16BEOffs[] =
758 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
759
760 static const uint8_t expectedUTF16LE[] =
761 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc };
762 static const int32_t toUTF16LEOffs[]=
763 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
764 static const int32_t fmUTF16LEOffs[] =
765 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
766
767 static const uint8_t expectedUTF32BE[] =
768 { 0x00, 0x00, 0x00, 0x31,
769 0x00, 0x00, 0x00, 0x32,
770 0x00, 0x00, 0x00, 0x33,
771 0x00, 0x00, 0x00, 0x00,
772 0x00, 0x00, 0x4e, 0x00,
773 0x00, 0x00, 0x4e, 0x8c,
774 0x00, 0x00, 0x4e, 0x09,
775 0x00, 0x00, 0x00, 0x2e,
776 0x00, 0x02, 0x00, 0x21 };
777 static const int32_t toUTF32BEOffs[]=
778 { 0x00, 0x00, 0x00, 0x00,
779 0x01, 0x01, 0x01, 0x01,
780 0x02, 0x02, 0x02, 0x02,
781 0x03, 0x03, 0x03, 0x03,
782 0x04, 0x04, 0x04, 0x04,
783 0x05, 0x05, 0x05, 0x05,
784 0x06, 0x06, 0x06, 0x06,
785 0x07, 0x07, 0x07, 0x07,
786 0x08, 0x08, 0x08, 0x08,
787 0x08, 0x08, 0x08, 0x08 };
788 static const int32_t fmUTF32BEOffs[] =
789 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
790
791 static const uint8_t expectedUTF32LE[] =
792 { 0x31, 0x00, 0x00, 0x00,
793 0x32, 0x00, 0x00, 0x00,
794 0x33, 0x00, 0x00, 0x00,
795 0x00, 0x00, 0x00, 0x00,
796 0x00, 0x4e, 0x00, 0x00,
797 0x8c, 0x4e, 0x00, 0x00,
798 0x09, 0x4e, 0x00, 0x00,
799 0x2e, 0x00, 0x00, 0x00,
800 0x21, 0x00, 0x02, 0x00 };
801 static const int32_t toUTF32LEOffs[]=
802 { 0x00, 0x00, 0x00, 0x00,
803 0x01, 0x01, 0x01, 0x01,
804 0x02, 0x02, 0x02, 0x02,
805 0x03, 0x03, 0x03, 0x03,
806 0x04, 0x04, 0x04, 0x04,
807 0x05, 0x05, 0x05, 0x05,
808 0x06, 0x06, 0x06, 0x06,
809 0x07, 0x07, 0x07, 0x07,
810 0x08, 0x08, 0x08, 0x08,
811 0x08, 0x08, 0x08, 0x08 };
812 static const int32_t fmUTF32LEOffs[] =
813 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
814
815
816
817
818 /** Test chars #2 **/
819
820 /* Sahha [health], slashed h's */
821 static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
822 static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
823
824 /* LMBCS */
825 static const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
826 static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
827 static const int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
828 static const int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
829 /*********************************** START OF CODE finally *************/
830
831 gInBufferSize = insize;
832 gOutBufferSize = outsize;
833
834 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
835
836
837 /*UTF-8*/
838 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
839 expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE );
840
841 log_verbose("Test surrogate behaviour for UTF8\n");
842 {
843 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
844 static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
845 0xf0, 0x90, 0x90, 0x81,
846 0xef, 0xbf, 0xbd
847 };
848 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
849 testConvertFromU(testinput, UPRV_LENGTHOF(testinput),
850 expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE );
851
852
853 }
854
855 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
856 /*ISO-2022*/
857 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
858 expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE );
859 #endif
860
861 /*UTF16 LE*/
862 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
863 expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE );
864 /*UTF16 BE*/
865 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
866 expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE );
867 /*UTF32 LE*/
868 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
869 expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE );
870 /*UTF32 BE*/
871 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
872 expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE );
873
874 /*LATIN_1*/
875 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
876 expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE );
877
878 #if !UCONFIG_NO_LEGACY_CONVERSION
879 /*EBCDIC_STATEFUL*/
880 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
881 expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE );
882
883 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
884 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
885
886 /*MBCS*/
887
888 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
889 expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE );
890 /*DBCS*/
891 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
892 expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE );
893 /*SBCS*/
894 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
895 expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE );
896 /*SBCS*/
897 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
898 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
899 #endif
900
901
902 /****/
903
904 /*UTF-8*/
905 testConvertToU(expectedUTF8, sizeof(expectedUTF8),
906 sampleText, UPRV_LENGTHOF(sampleText), "utf8", fmUTF8Offs,FALSE);
907 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
908 /*ISO-2022*/
909 testConvertToU(expectedISO2022, sizeof(expectedISO2022),
910 sampleText, UPRV_LENGTHOF(sampleText), "ISO_2022", fmISO2022Offs,FALSE);
911 #endif
912
913 /*UTF16 LE*/
914 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
915 sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,FALSE);
916 /*UTF16 BE*/
917 testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
918 sampleText, UPRV_LENGTHOF(sampleText), "utf-16be", fmUTF16BEOffs,FALSE);
919 /*UTF32 LE*/
920 testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
921 sampleText, UPRV_LENGTHOF(sampleText), "utf-32le", fmUTF32LEOffs,FALSE);
922 /*UTF32 BE*/
923 testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
924 sampleText, UPRV_LENGTHOF(sampleText), "utf-32be", fmUTF32BEOffs,FALSE);
925
926 #if !UCONFIG_NO_LEGACY_CONVERSION
927 /*EBCDIC_STATEFUL*/
928 testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable,
929 UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-930", fmIBM930Offs,FALSE);
930 /*MBCS*/
931 testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable,
932 UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-943", fmIBM943Offs,FALSE);
933 #endif
934
935 /* Try it again to make sure it still works */
936 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
937 sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,FALSE);
938
939 #if !UCONFIG_NO_LEGACY_CONVERSION
940 testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
941 malteseUChars, UPRV_LENGTHOF(malteseUChars), "latin3", NULL,FALSE);
942
943 testConvertFromU(malteseUChars, UPRV_LENGTHOF(malteseUChars),
944 expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE );
945
946 /*LMBCS*/
947 testConvertFromU(LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars),
948 expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE );
949 testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
950 LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars), "LMBCS-1", fmLMBCSOffs,FALSE);
951 #endif
952
953 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
954 {
955 /* encode directly set D and set O */
956 static const uint8_t utf7[] = {
957 /*
958 Hi Mom -+Jjo--!
959 A+ImIDkQ.
960 +-
961 +ZeVnLIqe-
962 */
963 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
964 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
965 0x2b, 0x2d,
966 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
967 };
968 static const UChar unicode[] = {
969 /*
970 Hi Mom -<WHITE SMILING FACE>-!
971 A<NOT IDENTICAL TO><ALPHA>.
972 +
973 [Japanese word "nihongo"]
974 */
975 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
976 0x41, 0x2262, 0x0391, 0x2e,
977 0x2b,
978 0x65e5, 0x672c, 0x8a9e
979 };
980 static const int32_t toUnicodeOffsets[] = {
981 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
982 15, 17, 19, 23,
983 24,
984 27, 29, 32
985 };
986 static const int32_t fromUnicodeOffsets[] = {
987 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
988 11, 12, 12, 12, 13, 13, 13, 13, 14,
989 15, 15,
990 16, 16, 16, 17, 17, 17, 18, 18, 18, 18
991 };
992
993 /* same but escaping set O (the exclamation mark) */
994 static const uint8_t utf7Restricted[] = {
995 /*
996 Hi Mom -+Jjo--+ACE-
997 A+ImIDkQ.
998 +-
999 +ZeVnLIqe-
1000 */
1001 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
1002 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
1003 0x2b, 0x2d,
1004 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
1005 };
1006 static const int32_t toUnicodeOffsetsR[] = {
1007 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
1008 19, 21, 23, 27,
1009 28,
1010 31, 33, 36
1011 };
1012 static const int32_t fromUnicodeOffsetsR[] = {
1013 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
1014 11, 12, 12, 12, 13, 13, 13, 13, 14,
1015 15, 15,
1016 16, 16, 16, 17, 17, 17, 18, 18, 18, 18
1017 };
1018
1019 testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
1020
1021 testConvertToU(utf7, sizeof(utf7), unicode, UPRV_LENGTHOF(unicode), "UTF-7", toUnicodeOffsets,FALSE);
1022
1023 testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
1024
1025 testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, UPRV_LENGTHOF(unicode), "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
1026 }
1027
1028 /*
1029 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
1030 * modified according to RFC 2060,
1031 * and supplemented with the one example in RFC 2060 itself.
1032 */
1033 {
1034 static const uint8_t imap[] = {
1035 /* Hi Mom -&Jjo--!
1036 A&ImIDkQ-.
1037 &-
1038 &ZeVnLIqe-
1039 \
1040 ~peter
1041 /mail
1042 /&ZeVnLIqe-
1043 /&U,BTFw-
1044 */
1045 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1046 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1047 0x26, 0x2d,
1048 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1049 0x5c,
1050 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1051 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1052 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1053 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1054 };
1055 static const UChar unicode[] = {
1056 /* Hi Mom -<WHITE SMILING FACE>-!
1057 A<NOT IDENTICAL TO><ALPHA>.
1058 &
1059 [Japanese word "nihongo"]
1060 \
1061 ~peter
1062 /mail
1063 /<65e5, 672c, 8a9e>
1064 /<53f0, 5317>
1065 */
1066 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1067 0x41, 0x2262, 0x0391, 0x2e,
1068 0x26,
1069 0x65e5, 0x672c, 0x8a9e,
1070 0x5c,
1071 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1072 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1073 0x2f, 0x65e5, 0x672c, 0x8a9e,
1074 0x2f, 0x53f0, 0x5317
1075 };
1076 static const int32_t toUnicodeOffsets[] = {
1077 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1078 15, 17, 19, 24,
1079 25,
1080 28, 30, 33,
1081 37,
1082 38, 39, 40, 41, 42, 43,
1083 44, 45, 46, 47, 48,
1084 49, 51, 53, 56,
1085 60, 62, 64
1086 };
1087 static const int32_t fromUnicodeOffsets[] = {
1088 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1089 11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1090 15, 15,
1091 16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1092 19,
1093 20, 21, 22, 23, 24, 25,
1094 26, 27, 28, 29, 30,
1095 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1096 35, 36, 36, 36, 37, 37, 37, 37, 37
1097 };
1098
1099 testConvertFromU(unicode, UPRV_LENGTHOF(unicode), imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
1100
1101 testConvertToU(imap, sizeof(imap), unicode, UPRV_LENGTHOF(unicode), "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
1102 }
1103
1104 /* Test UTF-8 bad data handling*/
1105 {
1106 static const uint8_t utf8[]={
1107 0x61,
1108 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1109 0x00,
1110 0x62,
1111 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1112 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1113 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */
1114 0xdf, 0xbf, /* 7ff */
1115 0xbf, /* truncated tail */
1116 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */
1117 0x02
1118 };
1119
1120 static const uint16_t utf8Expected[]={
1121 0x0061,
1122 0xfffd,
1123 0x0000,
1124 0x0062,
1125 0xfffd,
1126 0xfffd,
1127 0xdbff, 0xdfff,
1128 0x07ff,
1129 0xfffd,
1130 0xfffd,
1131 0x0002
1132 };
1133
1134 static const int32_t utf8Offsets[]={
1135 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28
1136 };
1137 testConvertToU(utf8, sizeof(utf8),
1138 utf8Expected, UPRV_LENGTHOF(utf8Expected), "utf-8", utf8Offsets ,FALSE);
1139
1140 }
1141
1142 /* Test UTF-32BE bad data handling*/
1143 {
1144 static const uint8_t utf32[]={
1145 0x00, 0x00, 0x00, 0x61,
1146 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
1147 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1148 0x00, 0x00, 0x00, 0x62,
1149 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1150 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
1151 0x00, 0x00, 0x01, 0x62,
1152 0x00, 0x00, 0x02, 0x62
1153 };
1154 static const uint16_t utf32Expected[]={
1155 0x0061,
1156 0xfffd, /* 0x110000 out of range */
1157 0xDBFF, /* 0x10FFFF in range */
1158 0xDFFF,
1159 0x0062,
1160 0xfffd, /* 0xffffffff out of range */
1161 0xfffd, /* 0x7fffffff out of range */
1162 0x0162,
1163 0x0262
1164 };
1165 static const int32_t utf32Offsets[]={
1166 0, 4, 8, 8, 12, 16, 20, 24, 28
1167 };
1168 static const uint8_t utf32ExpectedBack[]={
1169 0x00, 0x00, 0x00, 0x61,
1170 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */
1171 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1172 0x00, 0x00, 0x00, 0x62,
1173 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */
1174 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */
1175 0x00, 0x00, 0x01, 0x62,
1176 0x00, 0x00, 0x02, 0x62
1177 };
1178 static const int32_t utf32OffsetsBack[]={
1179 0,0,0,0,
1180 1,1,1,1,
1181 2,2,2,2,
1182 4,4,4,4,
1183 5,5,5,5,
1184 6,6,6,6,
1185 7,7,7,7,
1186 8,8,8,8
1187 };
1188
1189 testConvertToU(utf32, sizeof(utf32),
1190 utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32be", utf32Offsets ,FALSE);
1191 testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected),
1192 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE);
1193 }
1194
1195 /* Test UTF-32LE bad data handling*/
1196 {
1197 static const uint8_t utf32[]={
1198 0x61, 0x00, 0x00, 0x00,
1199 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
1200 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1201 0x62, 0x00, 0x00, 0x00,
1202 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1203 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
1204 0x62, 0x01, 0x00, 0x00,
1205 0x62, 0x02, 0x00, 0x00,
1206 };
1207
1208 static const uint16_t utf32Expected[]={
1209 0x0061,
1210 0xfffd, /* 0x110000 out of range */
1211 0xDBFF, /* 0x10FFFF in range */
1212 0xDFFF,
1213 0x0062,
1214 0xfffd, /* 0xffffffff out of range */
1215 0xfffd, /* 0x7fffffff out of range */
1216 0x0162,
1217 0x0262
1218 };
1219 static const int32_t utf32Offsets[]={
1220 0, 4, 8, 8, 12, 16, 20, 24, 28
1221 };
1222 static const uint8_t utf32ExpectedBack[]={
1223 0x61, 0x00, 0x00, 0x00,
1224 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */
1225 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1226 0x62, 0x00, 0x00, 0x00,
1227 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */
1228 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */
1229 0x62, 0x01, 0x00, 0x00,
1230 0x62, 0x02, 0x00, 0x00
1231 };
1232 static const int32_t utf32OffsetsBack[]={
1233 0,0,0,0,
1234 1,1,1,1,
1235 2,2,2,2,
1236 4,4,4,4,
1237 5,5,5,5,
1238 6,6,6,6,
1239 7,7,7,7,
1240 8,8,8,8
1241 };
1242 testConvertToU(utf32, sizeof(utf32),
1243 utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32le", utf32Offsets,FALSE );
1244 testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected),
1245 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE);
1246 }
1247 }
1248
1249 static void TestCoverageMBCS(){
1250 #if 0
1251 UErrorCode status = U_ZERO_ERROR;
1252 const char *directory = loadTestData(&status);
1253 char* tdpath = NULL;
1254 char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1255 int len = strlen(directory);
1256 char* index=NULL;
1257
1258 tdpath = (char*) malloc(sizeof(char) * (len * 2));
1259 uprv_strcpy(saveDirectory,u_getDataDirectory());
1260 log_verbose("Retrieved data directory %s \n",saveDirectory);
1261 uprv_strcpy(tdpath,directory);
1262 index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
1263
1264 if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
1265 *(index+1)=0;
1266 }
1267 u_setDataDirectory(tdpath);
1268 log_verbose("ICU data directory is set to: %s \n" ,tdpath);
1269 #endif
1270
1271 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm
1272 which is test file for MBCS conversion with single-byte codepage data.*/
1273 {
1274
1275 /* MBCS with single byte codepage data test1.ucm*/
1276 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1277 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1278 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, };
1279
1280 /*from Unicode*/
1281 testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1282 expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
1283 }
1284
1285 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm
1286 which is test file for MBCS conversion with three-byte codepage data.*/
1287 {
1288
1289 /* MBCS with three byte codepage data test3.ucm*/
1290 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1291 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,};
1292 int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1293
1294 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1295 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1296 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1297
1298 /*from Unicode*/
1299 testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1300 expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE );
1301
1302 /*to Unicode*/
1303 testConvertToU(test3input, sizeof(test3input),
1304 expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test3", fromtest3Offs ,FALSE);
1305
1306 }
1307
1308 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm
1309 which is test file for MBCS conversion with four-byte codepage data.*/
1310 {
1311
1312 /* MBCS with three byte codepage data test4.ucm*/
1313 static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1314 static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,};
1315 static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1316
1317 static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1318 static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1319 static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1320
1321 /*from Unicode*/
1322 testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1323 expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE );
1324
1325 /*to Unicode*/
1326 testConvertToU(test4input, sizeof(test4input),
1327 expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test4", fromtest4Offs,FALSE );
1328
1329 }
1330 #if 0
1331 free(tdpath);
1332 /* restore the original data directory */
1333 log_verbose("Setting the data directory to %s \n", saveDirectory);
1334 u_setDataDirectory(saveDirectory);
1335 free(saveDirectory);
1336 #endif
1337
1338 }
1339
1340 static void TestConverterType(const char *convName, UConverterType convType) {
1341 UConverter* myConverter;
1342 UErrorCode err = U_ZERO_ERROR;
1343
1344 myConverter = my_ucnv_open(convName, &err);
1345
1346 if (U_FAILURE(err)) {
1347 log_data_err("Failed to create an %s converter\n", convName);
1348 return;
1349 }
1350 else
1351 {
1352 if (ucnv_getType(myConverter)!=convType) {
1353 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1354 convName, convType);
1355 }
1356 else {
1357 log_verbose("ucnv_getType %s ok\n", convName);
1358 }
1359 }
1360 ucnv_close(myConverter);
1361 }
1362
1363 static void TestConverterTypesAndStarters()
1364 {
1365 #if !UCONFIG_NO_LEGACY_CONVERSION
1366 UConverter* myConverter;
1367 UErrorCode err = U_ZERO_ERROR;
1368 UBool mystarters[256];
1369
1370 /* const UBool expectedKSCstarters[256] = {
1371 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1372 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1373 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1374 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1375 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1376 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1377 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1378 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1379 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1380 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1381 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1382 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1383 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1384 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1385 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1386 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1387 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1388 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1389 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1390 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1391 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1392 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1393 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1394 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1395 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1396 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1397
1398
1399 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types.");
1400
1401 myConverter = ucnv_open("ksc", &err);
1402 if (U_FAILURE(err)) {
1403 log_data_err("Failed to create an ibm-ksc converter\n");
1404 return;
1405 }
1406 else
1407 {
1408 if (ucnv_getType(myConverter)!=UCNV_MBCS)
1409 log_err("ucnv_getType Failed for ibm-949\n");
1410 else
1411 log_verbose("ucnv_getType ibm-949 ok\n");
1412
1413 if(myConverter!=NULL)
1414 ucnv_getStarters(myConverter, mystarters, &err);
1415
1416 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1417 log_err("Failed ucnv_getStarters for ksc\n");
1418 else
1419 log_verbose("ucnv_getStarters ok\n");*/
1420
1421 }
1422 ucnv_close(myConverter);
1423
1424 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
1425 TestConverterType("ibm-878", UCNV_SBCS);
1426 #endif
1427
1428 TestConverterType("iso-8859-1", UCNV_LATIN_1);
1429
1430 TestConverterType("ibm-1208", UCNV_UTF8);
1431
1432 TestConverterType("utf-8", UCNV_UTF8);
1433 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
1434 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
1435 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
1436 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
1437
1438 #if !UCONFIG_NO_LEGACY_CONVERSION
1439
1440 #if defined(U_ENABLE_GENERIC_ISO_2022)
1441 TestConverterType("iso-2022", UCNV_ISO_2022);
1442 #endif
1443
1444 TestConverterType("hz", UCNV_HZ);
1445 #endif
1446
1447 TestConverterType("scsu", UCNV_SCSU);
1448
1449 #if !UCONFIG_NO_LEGACY_CONVERSION
1450 TestConverterType("x-iscii-de", UCNV_ISCII);
1451 #endif
1452
1453 TestConverterType("ascii", UCNV_US_ASCII);
1454 TestConverterType("utf-7", UCNV_UTF7);
1455 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
1456 TestConverterType("bocu-1", UCNV_BOCU1);
1457 }
1458
1459 static void
1460 TestAmbiguousConverter(UConverter *cnv) {
1461 static const char inBytes[3]={ 0x61, 0x5B, 0x5c };
1462 UChar outUnicode[20]={ 0, 0, 0, 0 };
1463
1464 const char *s;
1465 UChar *u;
1466 UErrorCode errorCode;
1467 UBool isAmbiguous;
1468
1469 /* try to convert an 'a', a square bracket and a US-ASCII backslash */
1470 errorCode=U_ZERO_ERROR;
1471 s=inBytes;
1472 u=outUnicode;
1473 ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode);
1474 if(U_FAILURE(errorCode)) {
1475 /* we do not care about general failures in this test; the input may just not be mappable */
1476 return;
1477 }
1478
1479 if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) {
1480 /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1481 /* There are some encodings that are partially ASCII based,
1482 like the ISO-7 and GSM series of codepages, which we ignore. */
1483 return;
1484 }
1485
1486 isAmbiguous=ucnv_isAmbiguous(cnv);
1487
1488 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1489 if((outUnicode[2]!=0x5c)!=isAmbiguous) {
1490 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1491 ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous);
1492 return;
1493 }
1494
1495 if(outUnicode[2]!=0x5c) {
1496 /* needs fixup, fix it */
1497 ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
1498 if(outUnicode[2]!=0x5c) {
1499 /* the fix failed */
1500 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
1501 return;
1502 }
1503 }
1504 }
1505
1506 static void TestAmbiguous()
1507 {
1508 UErrorCode status = U_ZERO_ERROR;
1509 UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
1510 static const char target[] = {
1511 /* "\\usr\\local\\share\\data\\icutest.txt" */
1512 0x5c, 0x75, 0x73, 0x72,
1513 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1514 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1515 0x5c, 0x64, 0x61, 0x74, 0x61,
1516 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1517 0
1518 };
1519 UChar asciiResult[200], sjisResult[200];
1520 int32_t /*asciiLength = 0,*/ sjisLength = 0, i;
1521 const char *name;
1522
1523 /* enumerate all converters */
1524 status=U_ZERO_ERROR;
1525 for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
1526 cnv=ucnv_open(name, &status);
1527 if(U_SUCCESS(status)) {
1528 TestAmbiguousConverter(cnv);
1529 ucnv_close(cnv);
1530 } else {
1531 log_err("error: unable to open available converter \"%s\"\n", name);
1532 status=U_ZERO_ERROR;
1533 }
1534 }
1535
1536 #if !UCONFIG_NO_LEGACY_CONVERSION
1537 sjis_cnv = ucnv_open("ibm-943", &status);
1538 if (U_FAILURE(status))
1539 {
1540 log_data_err("Failed to create a SJIS converter\n");
1541 return;
1542 }
1543 ascii_cnv = ucnv_open("LATIN-1", &status);
1544 if (U_FAILURE(status))
1545 {
1546 log_data_err("Failed to create a LATIN-1 converter\n");
1547 ucnv_close(sjis_cnv);
1548 return;
1549 }
1550 /* convert target from SJIS to Unicode */
1551 sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, UPRV_LENGTHOF(sjisResult), target, (int32_t)strlen(target), &status);
1552 if (U_FAILURE(status))
1553 {
1554 log_err("Failed to convert the SJIS string.\n");
1555 ucnv_close(sjis_cnv);
1556 ucnv_close(ascii_cnv);
1557 return;
1558 }
1559 /* convert target from Latin-1 to Unicode */
1560 /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, UPRV_LENGTHOF(asciiResult), target, (int32_t)strlen(target), &status);
1561 if (U_FAILURE(status))
1562 {
1563 log_err("Failed to convert the Latin-1 string.\n");
1564 ucnv_close(sjis_cnv);
1565 ucnv_close(ascii_cnv);
1566 return;
1567 }
1568 if (!ucnv_isAmbiguous(sjis_cnv))
1569 {
1570 log_err("SJIS converter should contain ambiguous character mappings.\n");
1571 ucnv_close(sjis_cnv);
1572 ucnv_close(ascii_cnv);
1573 return;
1574 }
1575 if (u_strcmp(sjisResult, asciiResult) == 0)
1576 {
1577 log_err("File separators for SJIS don't need to be fixed.\n");
1578 }
1579 ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
1580 if (u_strcmp(sjisResult, asciiResult) != 0)
1581 {
1582 log_err("Fixing file separator for SJIS failed.\n");
1583 }
1584 ucnv_close(sjis_cnv);
1585 ucnv_close(ascii_cnv);
1586 #endif
1587 }
1588
1589 static void
1590 TestSignatureDetection(){
1591 /* with null terminated strings */
1592 {
1593 static const char* data[] = {
1594 "\xFE\xFF\x00\x00", /* UTF-16BE */
1595 "\xFF\xFE\x00\x00", /* UTF-16LE */
1596 "\xEF\xBB\xBF\x00", /* UTF-8 */
1597 "\x0E\xFE\xFF\x00", /* SCSU */
1598
1599 "\xFE\xFF", /* UTF-16BE */
1600 "\xFF\xFE", /* UTF-16LE */
1601 "\xEF\xBB\xBF", /* UTF-8 */
1602 "\x0E\xFE\xFF", /* SCSU */
1603
1604 "\xFE\xFF\x41\x42", /* UTF-16BE */
1605 "\xFF\xFE\x41\x41", /* UTF-16LE */
1606 "\xEF\xBB\xBF\x41", /* UTF-8 */
1607 "\x0E\xFE\xFF\x41", /* SCSU */
1608
1609 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */
1610 "\x2B\x2F\x76\x38\x41", /* UTF-7 */
1611 "\x2B\x2F\x76\x39\x41", /* UTF-7 */
1612 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */
1613 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */
1614
1615 "\xDD\x73\x66\x73" /* UTF-EBCDIC */
1616 };
1617 static const char* expected[] = {
1618 "UTF-16BE",
1619 "UTF-16LE",
1620 "UTF-8",
1621 "SCSU",
1622
1623 "UTF-16BE",
1624 "UTF-16LE",
1625 "UTF-8",
1626 "SCSU",
1627
1628 "UTF-16BE",
1629 "UTF-16LE",
1630 "UTF-8",
1631 "SCSU",
1632
1633 "UTF-7",
1634 "UTF-7",
1635 "UTF-7",
1636 "UTF-7",
1637 "UTF-7",
1638 "UTF-EBCDIC"
1639 };
1640 static const int32_t expectedLength[] ={
1641 2,
1642 2,
1643 3,
1644 3,
1645
1646 2,
1647 2,
1648 3,
1649 3,
1650
1651 2,
1652 2,
1653 3,
1654 3,
1655
1656 5,
1657 4,
1658 4,
1659 4,
1660 4,
1661 4
1662 };
1663 int i=0;
1664 UErrorCode err;
1665 int32_t signatureLength = -1;
1666 const char* source = NULL;
1667 const char* enc = NULL;
1668 for( ; i<UPRV_LENGTHOF(data); i++){
1669 err = U_ZERO_ERROR;
1670 source = data[i];
1671 enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
1672 if(U_FAILURE(err)){
1673 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1674 continue;
1675 }
1676 if(enc == NULL || strcmp(enc,expected[i]) !=0){
1677 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1678 continue;
1679 }
1680 if(signatureLength != expectedLength[i]){
1681 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1682 }
1683 }
1684 }
1685 {
1686 static const char* data[] = {
1687 "\xFE\xFF\x00", /* UTF-16BE */
1688 "\xFF\xFE\x00", /* UTF-16LE */
1689 "\xEF\xBB\xBF\x00", /* UTF-8 */
1690 "\x0E\xFE\xFF\x00", /* SCSU */
1691 "\x00\x00\xFE\xFF", /* UTF-32BE */
1692 "\xFF\xFE\x00\x00", /* UTF-32LE */
1693 "\xFE\xFF", /* UTF-16BE */
1694 "\xFF\xFE", /* UTF-16LE */
1695 "\xEF\xBB\xBF", /* UTF-8 */
1696 "\x0E\xFE\xFF", /* SCSU */
1697 "\x00\x00\xFE\xFF", /* UTF-32BE */
1698 "\xFF\xFE\x00\x00", /* UTF-32LE */
1699 "\xFE\xFF\x41\x42", /* UTF-16BE */
1700 "\xFF\xFE\x41\x41", /* UTF-16LE */
1701 "\xEF\xBB\xBF\x41", /* UTF-8 */
1702 "\x0E\xFE\xFF\x41", /* SCSU */
1703 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1704 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1705 "\xFB\xEE\x28", /* BOCU-1 */
1706 "\xFF\x41\x42" /* NULL */
1707 };
1708 static const int len[] = {
1709 3,
1710 3,
1711 4,
1712 4,
1713 4,
1714 4,
1715 2,
1716 2,
1717 3,
1718 3,
1719 4,
1720 4,
1721 4,
1722 4,
1723 4,
1724 4,
1725 5,
1726 5,
1727 3,
1728 3
1729 };
1730
1731 static const char* expected[] = {
1732 "UTF-16BE",
1733 "UTF-16LE",
1734 "UTF-8",
1735 "SCSU",
1736 "UTF-32BE",
1737 "UTF-32LE",
1738 "UTF-16BE",
1739 "UTF-16LE",
1740 "UTF-8",
1741 "SCSU",
1742 "UTF-32BE",
1743 "UTF-32LE",
1744 "UTF-16BE",
1745 "UTF-16LE",
1746 "UTF-8",
1747 "SCSU",
1748 "UTF-32BE",
1749 "UTF-32LE",
1750 "BOCU-1",
1751 NULL
1752 };
1753 static const int32_t expectedLength[] ={
1754 2,
1755 2,
1756 3,
1757 3,
1758 4,
1759 4,
1760 2,
1761 2,
1762 3,
1763 3,
1764 4,
1765 4,
1766 2,
1767 2,
1768 3,
1769 3,
1770 4,
1771 4,
1772 3,
1773 0
1774 };
1775 int i=0;
1776 UErrorCode err;
1777 int32_t signatureLength = -1;
1778 int32_t sourceLength=-1;
1779 const char* source = NULL;
1780 const char* enc = NULL;
1781 for( ; i<UPRV_LENGTHOF(data); i++){
1782 err = U_ZERO_ERROR;
1783 source = data[i];
1784 sourceLength = len[i];
1785 enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
1786 if(U_FAILURE(err)){
1787 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1788 continue;
1789 }
1790 if(enc == NULL || strcmp(enc,expected[i]) !=0){
1791 if(expected[i] !=NULL){
1792 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1793 continue;
1794 }
1795 }
1796 if(signatureLength != expectedLength[i]){
1797 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1798 }
1799 }
1800 }
1801 }
1802
1803 static void TestUTF7() {
1804 /* test input */
1805 static const uint8_t in[]={
1806 /* H - +Jjo- - ! +- +2AHcAQ */
1807 0x48,
1808 0x2d,
1809 0x2b, 0x4a, 0x6a, 0x6f,
1810 0x2d, 0x2d,
1811 0x21,
1812 0x2b, 0x2d,
1813 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1814 };
1815
1816 /* expected test results */
1817 static const int32_t results[]={
1818 /* number of bytes read, code point */
1819 1, 0x48,
1820 1, 0x2d,
1821 4, 0x263a, /* <WHITE SMILING FACE> */
1822 2, 0x2d,
1823 1, 0x21,
1824 2, 0x2b,
1825 7, 0x10401
1826 };
1827
1828 const char *cnvName;
1829 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1830 UErrorCode errorCode=U_ZERO_ERROR;
1831 UConverter *cnv=ucnv_open("UTF-7", &errorCode);
1832 if(U_FAILURE(errorCode)) {
1833 log_data_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode));
1834 return;
1835 }
1836 TestNextUChar(cnv, source, limit, results, "UTF-7");
1837 /* Test the condition when source >= sourceLimit */
1838 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1839 cnvName = ucnv_getName(cnv, &errorCode);
1840 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
1841 log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1842 }
1843 ucnv_close(cnv);
1844 }
1845
1846 static void TestIMAP() {
1847 /* test input */
1848 static const uint8_t in[]={
1849 /* H - &Jjo- - ! &- &2AHcAQ- \ */
1850 0x48,
1851 0x2d,
1852 0x26, 0x4a, 0x6a, 0x6f,
1853 0x2d, 0x2d,
1854 0x21,
1855 0x26, 0x2d,
1856 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1857 };
1858
1859 /* expected test results */
1860 static const int32_t results[]={
1861 /* number of bytes read, code point */
1862 1, 0x48,
1863 1, 0x2d,
1864 4, 0x263a, /* <WHITE SMILING FACE> */
1865 2, 0x2d,
1866 1, 0x21,
1867 2, 0x26,
1868 7, 0x10401
1869 };
1870
1871 const char *cnvName;
1872 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1873 UErrorCode errorCode=U_ZERO_ERROR;
1874 UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
1875 if(U_FAILURE(errorCode)) {
1876 log_data_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode));
1877 return;
1878 }
1879 TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
1880 /* Test the condition when source >= sourceLimit */
1881 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1882 cnvName = ucnv_getName(cnv, &errorCode);
1883 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
1884 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1885 }
1886 ucnv_close(cnv);
1887 }
1888
1889 static void TestUTF8() {
1890 /* test input */
1891 static const uint8_t in[]={
1892 0x61,
1893 0xc2, 0x80,
1894 0xe0, 0xa0, 0x80,
1895 0xf0, 0x90, 0x80, 0x80,
1896 0xf4, 0x84, 0x8c, 0xa1,
1897 0xf0, 0x90, 0x90, 0x81
1898 };
1899
1900 /* expected test results */
1901 static const int32_t results[]={
1902 /* number of bytes read, code point */
1903 1, 0x61,
1904 2, 0x80,
1905 3, 0x800,
1906 4, 0x10000,
1907 4, 0x104321,
1908 4, 0x10401
1909 };
1910
1911 /* error test input */
1912 static const uint8_t in2[]={
1913 0x61,
1914 0xc0, 0x80, /* illegal non-shortest form */
1915 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1916 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1917 0xc0, 0xc0, /* illegal trail byte */
1918 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1919 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1920 0xfe, /* illegal byte altogether */
1921 0x62
1922 };
1923
1924 /* expected error test results */
1925 static const int32_t results2[]={
1926 /* number of bytes read, code point */
1927 1, 0x61,
1928 22, 0x62
1929 };
1930
1931 UConverterToUCallback cb;
1932 const void *p;
1933
1934 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1935 UErrorCode errorCode=U_ZERO_ERROR;
1936 UConverter *cnv=ucnv_open("UTF-8", &errorCode);
1937 if(U_FAILURE(errorCode)) {
1938 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
1939 return;
1940 }
1941 TestNextUChar(cnv, source, limit, results, "UTF-8");
1942 /* Test the condition when source >= sourceLimit */
1943 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1944
1945 /* test error behavior with a skip callback */
1946 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1947 source=(const char *)in2;
1948 limit=(const char *)(in2+sizeof(in2));
1949 TestNextUChar(cnv, source, limit, results2, "UTF-8");
1950
1951 ucnv_close(cnv);
1952 }
1953
1954 static void TestCESU8() {
1955 /* test input */
1956 static const uint8_t in[]={
1957 0x61,
1958 0xc2, 0x80,
1959 0xe0, 0xa0, 0x80,
1960 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1961 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1962 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1963 0xef, 0xbf, 0xbc
1964 };
1965
1966 /* expected test results */
1967 static const int32_t results[]={
1968 /* number of bytes read, code point */
1969 1, 0x61,
1970 2, 0x80,
1971 3, 0x800,
1972 6, 0x10000,
1973 3, 0xdc01,
1974 -1,0xd802, /* may read 3 or 6 bytes */
1975 -1,0x10ffff,/* may read 0 or 3 bytes */
1976 3, 0xfffc
1977 };
1978
1979 /* error test input */
1980 static const uint8_t in2[]={
1981 0x61,
1982 0xc0, 0x80, /* illegal non-shortest form */
1983 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1984 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1985 0xc0, 0xc0, /* illegal trail byte */
1986 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */
1987 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */
1988 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */
1989 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1990 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1991 0xfe, /* illegal byte altogether */
1992 0x62
1993 };
1994
1995 /* expected error test results */
1996 static const int32_t results2[]={
1997 /* number of bytes read, code point */
1998 1, 0x61,
1999 34, 0x62
2000 };
2001
2002 UConverterToUCallback cb;
2003 const void *p;
2004
2005 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
2006 UErrorCode errorCode=U_ZERO_ERROR;
2007 UConverter *cnv=ucnv_open("CESU-8", &errorCode);
2008 if(U_FAILURE(errorCode)) {
2009 log_data_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
2010 return;
2011 }
2012 TestNextUChar(cnv, source, limit, results, "CESU-8");
2013 /* Test the condition when source >= sourceLimit */
2014 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2015
2016 /* test error behavior with a skip callback */
2017 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2018 source=(const char *)in2;
2019 limit=(const char *)(in2+sizeof(in2));
2020 TestNextUChar(cnv, source, limit, results2, "CESU-8");
2021
2022 ucnv_close(cnv);
2023 }
2024
2025 static void TestUTF16() {
2026 /* test input */
2027 static const uint8_t in1[]={
2028 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
2029 };
2030 static const uint8_t in2[]={
2031 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
2032 };
2033 static const uint8_t in3[]={
2034 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
2035 };
2036
2037 /* expected test results */
2038 static const int32_t results1[]={
2039 /* number of bytes read, code point */
2040 4, 0x4e00,
2041 2, 0xfeff
2042 };
2043 static const int32_t results2[]={
2044 /* number of bytes read, code point */
2045 4, 0x004e,
2046 2, 0xfffe
2047 };
2048 static const int32_t results3[]={
2049 /* number of bytes read, code point */
2050 2, 0xfefe,
2051 2, 0x4e00,
2052 2, 0xfeff,
2053 4, 0x20001
2054 };
2055
2056 const char *source, *limit;
2057
2058 UErrorCode errorCode=U_ZERO_ERROR;
2059 UConverter *cnv=ucnv_open("UTF-16", &errorCode);
2060 if(U_FAILURE(errorCode)) {
2061 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
2062 return;
2063 }
2064
2065 source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2066 TestNextUChar(cnv, source, limit, results1, "UTF-16");
2067
2068 source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2069 ucnv_resetToUnicode(cnv);
2070 TestNextUChar(cnv, source, limit, results2, "UTF-16");
2071
2072 source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2073 ucnv_resetToUnicode(cnv);
2074 TestNextUChar(cnv, source, limit, results3, "UTF-16");
2075
2076 /* Test the condition when source >= sourceLimit */
2077 ucnv_resetToUnicode(cnv);
2078 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2079
2080 ucnv_close(cnv);
2081 }
2082
2083 static void TestUTF16BE() {
2084 /* test input */
2085 static const uint8_t in[]={
2086 0x00, 0x61,
2087 0x00, 0xc0,
2088 0x00, 0x31,
2089 0x00, 0xf4,
2090 0xce, 0xfe,
2091 0xd8, 0x01, 0xdc, 0x01
2092 };
2093
2094 /* expected test results */
2095 static const int32_t results[]={
2096 /* number of bytes read, code point */
2097 2, 0x61,
2098 2, 0xc0,
2099 2, 0x31,
2100 2, 0xf4,
2101 2, 0xcefe,
2102 4, 0x10401
2103 };
2104
2105 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2106 UErrorCode errorCode=U_ZERO_ERROR;
2107 UConverter *cnv=ucnv_open("utf-16be", &errorCode);
2108 if(U_FAILURE(errorCode)) {
2109 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
2110 return;
2111 }
2112 TestNextUChar(cnv, source, limit, results, "UTF-16BE");
2113 /* Test the condition when source >= sourceLimit */
2114 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2115 /*Test for the condition where there is an invalid character*/
2116 {
2117 static const uint8_t source2[]={0x61};
2118 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2119 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2120 }
2121 #if 0
2122 /*
2123 * Test disabled because currently the UTF-16BE/LE converters are supposed
2124 * to not set errors for unpaired surrogates.
2125 * This may change with
2126 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2127 */
2128
2129 /*Test for the condition where there is a surrogate pair*/
2130 {
2131 const uint8_t source2[]={0xd8, 0x01};
2132 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2133 }
2134 #endif
2135 ucnv_close(cnv);
2136 }
2137
2138 static void
2139 TestUTF16LE() {
2140 /* test input */
2141 static const uint8_t in[]={
2142 0x61, 0x00,
2143 0x31, 0x00,
2144 0x4e, 0x2e,
2145 0x4e, 0x00,
2146 0x01, 0xd8, 0x01, 0xdc
2147 };
2148
2149 /* expected test results */
2150 static const int32_t results[]={
2151 /* number of bytes read, code point */
2152 2, 0x61,
2153 2, 0x31,
2154 2, 0x2e4e,
2155 2, 0x4e,
2156 4, 0x10401
2157 };
2158
2159 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2160 UErrorCode errorCode=U_ZERO_ERROR;
2161 UConverter *cnv=ucnv_open("utf-16le", &errorCode);
2162 if(U_FAILURE(errorCode)) {
2163 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
2164 return;
2165 }
2166 TestNextUChar(cnv, source, limit, results, "UTF-16LE");
2167 /* Test the condition when source >= sourceLimit */
2168 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2169 /*Test for the condition where there is an invalid character*/
2170 {
2171 static const uint8_t source2[]={0x61};
2172 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2173 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2174 }
2175 #if 0
2176 /*
2177 * Test disabled because currently the UTF-16BE/LE converters are supposed
2178 * to not set errors for unpaired surrogates.
2179 * This may change with
2180 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2181 */
2182
2183 /*Test for the condition where there is a surrogate character*/
2184 {
2185 static const uint8_t source2[]={0x01, 0xd8};
2186 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2187 }
2188 #endif
2189
2190 ucnv_close(cnv);
2191 }
2192
2193 static void TestUTF32() {
2194 /* test input */
2195 static const uint8_t in1[]={
2196 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff
2197 };
2198 static const uint8_t in2[]={
2199 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00
2200 };
2201 static const uint8_t in3[]={
2202 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01
2203 };
2204
2205 /* expected test results */
2206 static const int32_t results1[]={
2207 /* number of bytes read, code point */
2208 8, 0x100f00,
2209 4, 0xfeff
2210 };
2211 static const int32_t results2[]={
2212 /* number of bytes read, code point */
2213 8, 0x0f1000,
2214 4, 0xfffe
2215 };
2216 static const int32_t results3[]={
2217 /* number of bytes read, code point */
2218 4, 0xfefe,
2219 4, 0x100f00,
2220 4, 0xfffd, /* unmatched surrogate */
2221 4, 0xfffd /* unmatched surrogate */
2222 };
2223
2224 const char *source, *limit;
2225
2226 UErrorCode errorCode=U_ZERO_ERROR;
2227 UConverter *cnv=ucnv_open("UTF-32", &errorCode);
2228 if(U_FAILURE(errorCode)) {
2229 log_data_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
2230 return;
2231 }
2232
2233 source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2234 TestNextUChar(cnv, source, limit, results1, "UTF-32");
2235
2236 source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2237 ucnv_resetToUnicode(cnv);
2238 TestNextUChar(cnv, source, limit, results2, "UTF-32");
2239
2240 source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2241 ucnv_resetToUnicode(cnv);
2242 TestNextUChar(cnv, source, limit, results3, "UTF-32");
2243
2244 /* Test the condition when source >= sourceLimit */
2245 ucnv_resetToUnicode(cnv);
2246 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2247
2248 ucnv_close(cnv);
2249 }
2250
2251 static void
2252 TestUTF32BE() {
2253 /* test input */
2254 static const uint8_t in[]={
2255 0x00, 0x00, 0x00, 0x61,
2256 0x00, 0x00, 0x30, 0x61,
2257 0x00, 0x00, 0xdc, 0x00,
2258 0x00, 0x00, 0xd8, 0x00,
2259 0x00, 0x00, 0xdf, 0xff,
2260 0x00, 0x00, 0xff, 0xfe,
2261 0x00, 0x10, 0xab, 0xcd,
2262 0x00, 0x10, 0xff, 0xff
2263 };
2264
2265 /* expected test results */
2266 static const int32_t results[]={
2267 /* number of bytes read, code point */
2268 4, 0x61,
2269 4, 0x3061,
2270 4, 0xfffd,
2271 4, 0xfffd,
2272 4, 0xfffd,
2273 4, 0xfffe,
2274 4, 0x10abcd,
2275 4, 0x10ffff
2276 };
2277
2278 /* error test input */
2279 static const uint8_t in2[]={
2280 0x00, 0x00, 0x00, 0x61,
2281 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
2282 0x00, 0x00, 0x00, 0x62,
2283 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2284 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
2285 0x00, 0x00, 0x01, 0x62,
2286 0x00, 0x00, 0x02, 0x62
2287 };
2288
2289 /* expected error test results */
2290 static const int32_t results2[]={
2291 /* number of bytes read, code point */
2292 4, 0x61,
2293 8, 0x62,
2294 12, 0x162,
2295 4, 0x262
2296 };
2297
2298 UConverterToUCallback cb;
2299 const void *p;
2300
2301 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2302 UErrorCode errorCode=U_ZERO_ERROR;
2303 UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
2304 if(U_FAILURE(errorCode)) {
2305 log_data_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
2306 return;
2307 }
2308 TestNextUChar(cnv, source, limit, results, "UTF-32BE");
2309
2310 /* Test the condition when source >= sourceLimit */
2311 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2312
2313 /* test error behavior with a skip callback */
2314 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2315 source=(const char *)in2;
2316 limit=(const char *)(in2+sizeof(in2));
2317 TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
2318
2319 ucnv_close(cnv);
2320 }
2321
2322 static void
2323 TestUTF32LE() {
2324 /* test input */
2325 static const uint8_t in[]={
2326 0x61, 0x00, 0x00, 0x00,
2327 0x61, 0x30, 0x00, 0x00,
2328 0x00, 0xdc, 0x00, 0x00,
2329 0x00, 0xd8, 0x00, 0x00,
2330 0xff, 0xdf, 0x00, 0x00,
2331 0xfe, 0xff, 0x00, 0x00,
2332 0xcd, 0xab, 0x10, 0x00,
2333 0xff, 0xff, 0x10, 0x00
2334 };
2335
2336 /* expected test results */
2337 static const int32_t results[]={
2338 /* number of bytes read, code point */
2339 4, 0x61,
2340 4, 0x3061,
2341 4, 0xfffd,
2342 4, 0xfffd,
2343 4, 0xfffd,
2344 4, 0xfffe,
2345 4, 0x10abcd,
2346 4, 0x10ffff
2347 };
2348
2349 /* error test input */
2350 static const uint8_t in2[]={
2351 0x61, 0x00, 0x00, 0x00,
2352 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
2353 0x62, 0x00, 0x00, 0x00,
2354 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2355 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
2356 0x62, 0x01, 0x00, 0x00,
2357 0x62, 0x02, 0x00, 0x00,
2358 };
2359
2360 /* expected error test results */
2361 static const int32_t results2[]={
2362 /* number of bytes read, code point */
2363 4, 0x61,
2364 8, 0x62,
2365 12, 0x162,
2366 4, 0x262,
2367 };
2368
2369 UConverterToUCallback cb;
2370 const void *p;
2371
2372 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2373 UErrorCode errorCode=U_ZERO_ERROR;
2374 UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
2375 if(U_FAILURE(errorCode)) {
2376 log_data_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
2377 return;
2378 }
2379 TestNextUChar(cnv, source, limit, results, "UTF-32LE");
2380
2381 /* Test the condition when source >= sourceLimit */
2382 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2383
2384 /* test error behavior with a skip callback */
2385 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2386 source=(const char *)in2;
2387 limit=(const char *)(in2+sizeof(in2));
2388 TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
2389
2390 ucnv_close(cnv);
2391 }
2392
2393 static void
2394 TestLATIN1() {
2395 /* test input */
2396 static const uint8_t in[]={
2397 0x61,
2398 0x31,
2399 0x32,
2400 0xc0,
2401 0xf0,
2402 0xf4,
2403 };
2404
2405 /* expected test results */
2406 static const int32_t results[]={
2407 /* number of bytes read, code point */
2408 1, 0x61,
2409 1, 0x31,
2410 1, 0x32,
2411 1, 0xc0,
2412 1, 0xf0,
2413 1, 0xf4,
2414 };
2415 static const uint16_t in1[] = {
2416 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2417 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2418 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2419 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2420 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2421 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2422 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2423 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2424 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2425 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2426 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2427 0xcb, 0x82
2428 };
2429 static const uint8_t out1[] = {
2430 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2431 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2432 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2433 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2434 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2435 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2436 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2437 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2438 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2439 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2440 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2441 0xcb, 0x82
2442 };
2443 static const uint16_t in2[]={
2444 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2445 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2446 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2447 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2448 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2449 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2450 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2451 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2452 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2453 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2454 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2455 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2456 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2457 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2458 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2459 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2460 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2461 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2462 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2463 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2464 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2465 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2466 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2467 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2468 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2469 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2470 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2471 0x37, 0x20, 0x2A, 0x2F,
2472 };
2473 static const unsigned char out2[]={
2474 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2475 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2476 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2477 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2478 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2479 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2480 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2481 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2482 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2483 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2484 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2485 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2486 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2487 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2488 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2489 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2490 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2491 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2492 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2493 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2494 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2495 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2496 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2497 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2498 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2499 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2500 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2501 0x37, 0x20, 0x2A, 0x2F,
2502 };
2503 const char *source=(const char *)in;
2504 const char *limit=(const char *)in+sizeof(in);
2505
2506 UErrorCode errorCode=U_ZERO_ERROR;
2507 UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
2508 if(U_FAILURE(errorCode)) {
2509 log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
2510 return;
2511 }
2512 TestNextUChar(cnv, source, limit, results, "LATIN_1");
2513 /* Test the condition when source >= sourceLimit */
2514 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2515 TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
2516 TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
2517
2518 ucnv_close(cnv);
2519 }
2520
2521 static void
2522 TestSBCS() {
2523 /* test input */
2524 static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2525 /* expected test results */
2526 static const int32_t results[]={
2527 /* number of bytes read, code point */
2528 1, 0x61,
2529 1, 0xbf,
2530 1, 0xc4,
2531 1, 0x2021,
2532 1, 0xf8ff,
2533 1, 0x00d9
2534 };
2535
2536 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2537 UErrorCode errorCode=U_ZERO_ERROR;
2538 UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode);
2539 if(U_FAILURE(errorCode)) {
2540 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode));
2541 return;
2542 }
2543 TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)");
2544 /* Test the condition when source >= sourceLimit */
2545 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2546 /*Test for Illegal character */ /*
2547 {
2548 static const uint8_t input1[]={ 0xA1 };
2549 const char* illegalsource=(const char*)input1;
2550 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2551 }
2552 */
2553 ucnv_close(cnv);
2554 }
2555
2556 static void
2557 TestDBCS() {
2558 /* test input */
2559 static const uint8_t in[]={
2560 0x44, 0x6a,
2561 0xc4, 0x9c,
2562 0x7a, 0x74,
2563 0x46, 0xab,
2564 0x42, 0x5b,
2565
2566 };
2567
2568 /* expected test results */
2569 static const int32_t results[]={
2570 /* number of bytes read, code point */
2571 2, 0x00a7,
2572 2, 0xe1d2,
2573 2, 0x6962,
2574 2, 0xf842,
2575 2, 0xffe5,
2576 };
2577
2578 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2579 UErrorCode errorCode=U_ZERO_ERROR;
2580
2581 UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
2582 if(U_FAILURE(errorCode)) {
2583 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
2584 return;
2585 }
2586 TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
2587 /* Test the condition when source >= sourceLimit */
2588 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2589 /*Test for the condition where there is an invalid character*/
2590 {
2591 static const uint8_t source2[]={0x1a, 0x1b};
2592 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2593 }
2594 /*Test for the condition where we have a truncated char*/
2595 {
2596 static const uint8_t source1[]={0xc4};
2597 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2598 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2599 }
2600 ucnv_close(cnv);
2601 }
2602
2603 static void
2604 TestMBCS() {
2605 /* test input */
2606 static const uint8_t in[]={
2607 0x01,
2608 0xa6, 0xa3,
2609 0x00,
2610 0xa6, 0xa1,
2611 0x08,
2612 0xc2, 0x76,
2613 0xc2, 0x78,
2614
2615 };
2616
2617 /* expected test results */
2618 static const int32_t results[]={
2619 /* number of bytes read, code point */
2620 1, 0x0001,
2621 2, 0x250c,
2622 1, 0x0000,
2623 2, 0x2500,
2624 1, 0x0008,
2625 2, 0xd60c,
2626 2, 0xd60e,
2627 };
2628
2629 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2630 UErrorCode errorCode=U_ZERO_ERROR;
2631
2632 UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
2633 if(U_FAILURE(errorCode)) {
2634 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
2635 return;
2636 }
2637 TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
2638 /* Test the condition when source >= sourceLimit */
2639 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2640 /*Test for the condition where there is an invalid character*/
2641 {
2642 static const uint8_t source2[]={0xa1, 0x80};
2643 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2644 }
2645 /*Test for the condition where we have a truncated char*/
2646 {
2647 static const uint8_t source1[]={0xc4};
2648 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2649 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2650 }
2651 ucnv_close(cnv);
2652
2653 }
2654
2655 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
2656 static void
2657 TestICCRunout() {
2658 /* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */
2659
2660 const char *cnvName = "ibm-1363";
2661 UErrorCode status = U_ZERO_ERROR;
2662 const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 };
2663 /* UChar expectUData[] = { 0x00a1, 0x001a }; */
2664 const char *source = sourceData;
2665 const char *sourceLim = sourceData+sizeof(sourceData);
2666 UChar c1, c2, c3;
2667 UConverter *cnv=ucnv_open(cnvName, &status);
2668 if(U_FAILURE(status)) {
2669 log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status));
2670 return;
2671 }
2672
2673 #if 0
2674 {
2675 UChar targetBuf[256];
2676 UChar *target = targetBuf;
2677 UChar *targetLim = target+256;
2678 ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status);
2679
2680 log_info("After convert: target@%d, source@%d, status%s\n",
2681 target-targetBuf, source-sourceData, u_errorName(status));
2682
2683 if(U_FAILURE(status)) {
2684 log_err("Failed to convert: %s\n", u_errorName(status));
2685 } else {
2686
2687 }
2688 }
2689 #endif
2690
2691 c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2692 log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status));
2693
2694 c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2695 log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status));
2696
2697 c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2698 log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status));
2699
2700 if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) {
2701 log_verbose("OK\n");
2702 } else {
2703 log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n");
2704 }
2705
2706 ucnv_close(cnv);
2707
2708 }
2709 #endif
2710
2711 #ifdef U_ENABLE_GENERIC_ISO_2022
2712
2713 static void
2714 TestISO_2022() {
2715 /* test input */
2716 static const uint8_t in[]={
2717 0x1b, 0x25, 0x42,
2718 0x31,
2719 0x32,
2720 0x61,
2721 0xc2, 0x80,
2722 0xe0, 0xa0, 0x80,
2723 0xf0, 0x90, 0x80, 0x80
2724 };
2725
2726
2727
2728 /* expected test results */
2729 static const int32_t results[]={
2730 /* number of bytes read, code point */
2731 4, 0x0031, /* 4 bytes including the escape sequence */
2732 1, 0x0032,
2733 1, 0x61,
2734 2, 0x80,
2735 3, 0x800,
2736 4, 0x10000
2737 };
2738
2739 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2740 UErrorCode errorCode=U_ZERO_ERROR;
2741 UConverter *cnv;
2742
2743 cnv=ucnv_open("ISO_2022", &errorCode);
2744 if(U_FAILURE(errorCode)) {
2745 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
2746 return;
2747 }
2748 TestNextUChar(cnv, source, limit, results, "ISO_2022");
2749
2750 /* Test the condition when source >= sourceLimit */
2751 TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
2752 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2753 /*Test for the condition where we have a truncated char*/
2754 {
2755 static const uint8_t source1[]={0xc4};
2756 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2757 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2758 }
2759 /*Test for the condition where there is an invalid character*/
2760 {
2761 static const uint8_t source2[]={0xa1, 0x01};
2762 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
2763 }
2764 ucnv_close(cnv);
2765 }
2766
2767 #endif
2768
2769 static void
2770 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2771 const UChar* uSource;
2772 const UChar* uSourceLimit;
2773 const char* cSource;
2774 const char* cSourceLimit;
2775 UChar *uTargetLimit =NULL;
2776 UChar *uTarget;
2777 char *cTarget;
2778 const char *cTargetLimit;
2779 char *cBuf;
2780 UChar *uBuf; /*,*test;*/
2781 int32_t uBufSize = 120;
2782 int len=0;
2783 int i=2;
2784 UErrorCode errorCode=U_ZERO_ERROR;
2785 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2786 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2787 ucnv_reset(cnv);
2788 for(;--i>0; ){
2789 uSource = (UChar*) source;
2790 uSourceLimit=(const UChar*)sourceLimit;
2791 cTarget = cBuf;
2792 uTarget = uBuf;
2793 cSource = cBuf;
2794 cTargetLimit = cBuf;
2795 uTargetLimit = uBuf;
2796
2797 do{
2798
2799 cTargetLimit = cTargetLimit+ i;
2800 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2801 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2802 errorCode=U_ZERO_ERROR;
2803 continue;
2804 }
2805
2806 if(U_FAILURE(errorCode)){
2807 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2808 return;
2809 }
2810
2811 }while (uSource<uSourceLimit);
2812
2813 cSourceLimit =cTarget;
2814 do{
2815 uTargetLimit=uTargetLimit+i;
2816 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2817 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2818 errorCode=U_ZERO_ERROR;
2819 continue;
2820 }
2821 if(U_FAILURE(errorCode)){
2822 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2823 return;
2824 }
2825 }while(cSource<cSourceLimit);
2826
2827 uSource = source;
2828 /*test =uBuf;*/
2829 for(len=0;len<(int)(source - sourceLimit);len++){
2830 if(uBuf[len]!=uSource[len]){
2831 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2832 }
2833 }
2834 }
2835 free(uBuf);
2836 free(cBuf);
2837 }
2838 /* Test for Jitterbug 778 */
2839 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2840 const UChar* uSource;
2841 const UChar* uSourceLimit;
2842 const char* cSource;
2843 UChar *uTargetLimit =NULL;
2844 UChar *uTarget;
2845 char *cTarget;
2846 const char *cTargetLimit;
2847 char *cBuf;
2848 UChar *uBuf,*test;
2849 int32_t uBufSize = 120;
2850 int numCharsInTarget=0;
2851 UErrorCode errorCode=U_ZERO_ERROR;
2852 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2853 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2854 uSource = source;
2855 uSourceLimit=sourceLimit;
2856 cTarget = cBuf;
2857 cTargetLimit = cBuf +uBufSize*5;
2858 uTarget = uBuf;
2859 uTargetLimit = uBuf+ uBufSize*5;
2860 ucnv_reset(cnv);
2861 numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode);
2862 if(U_FAILURE(errorCode)){
2863 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2864 return;
2865 }
2866 cSource = cBuf;
2867 test =uBuf;
2868 ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
2869 if(U_FAILURE(errorCode)){
2870 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
2871 return;
2872 }
2873 uSource = source;
2874 while(uSource<uSourceLimit){
2875 if(*test!=*uSource){
2876
2877 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2878 }
2879 uSource++;
2880 test++;
2881 }
2882 free(uBuf);
2883 free(cBuf);
2884 }
2885
2886 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2887 const UChar* uSource;
2888 const UChar* uSourceLimit;
2889 const char* cSource;
2890 const char* cSourceLimit;
2891 UChar *uTargetLimit =NULL;
2892 UChar *uTarget;
2893 char *cTarget;
2894 const char *cTargetLimit;
2895 char *cBuf;
2896 UChar *uBuf; /*,*test;*/
2897 int32_t uBufSize = 120;
2898 int len=0;
2899 int i=2;
2900 const UChar *temp = sourceLimit;
2901 UErrorCode errorCode=U_ZERO_ERROR;
2902 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2903 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2904
2905 ucnv_reset(cnv);
2906 for(;--i>0;){
2907 uSource = (UChar*) source;
2908 cTarget = cBuf;
2909 uTarget = uBuf;
2910 cSource = cBuf;
2911 cTargetLimit = cBuf;
2912 uTargetLimit = uBuf+uBufSize*5;
2913 cTargetLimit = cTargetLimit+uBufSize*10;
2914 uSourceLimit=uSource;
2915 do{
2916
2917 if (uSourceLimit < sourceLimit) {
2918 uSourceLimit = uSourceLimit+1;
2919 }
2920 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2921 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2922 errorCode=U_ZERO_ERROR;
2923 continue;
2924 }
2925
2926 if(U_FAILURE(errorCode)){
2927 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2928 return;
2929 }
2930
2931 }while (uSource<temp);
2932
2933 cSourceLimit =cBuf;
2934 do{
2935 if (cSourceLimit < cBuf + (cTarget - cBuf)) {
2936 cSourceLimit = cSourceLimit+1;
2937 }
2938 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2939 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2940 errorCode=U_ZERO_ERROR;
2941 continue;
2942 }
2943 if(U_FAILURE(errorCode)){
2944 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2945 return;
2946 }
2947 }while(cSource<cTarget);
2948
2949 uSource = source;
2950 /*test =uBuf;*/
2951 for(;len<(int)(source - sourceLimit);len++){
2952 if(uBuf[len]!=uSource[len]){
2953 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2954 }
2955 }
2956 }
2957 free(uBuf);
2958 free(cBuf);
2959 }
2960 static void
2961 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
2962 const uint16_t results[], const char* message){
2963 /* const char* s0; */
2964 const char* s=(char*)source;
2965 const uint16_t *r=results;
2966 UErrorCode errorCode=U_ZERO_ERROR;
2967 uint32_t c,exC;
2968 ucnv_reset(cnv);
2969 while(s<limit) {
2970 /* s0=s; */
2971 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
2972 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2973 break; /* no more significant input */
2974 } else if(U_FAILURE(errorCode)) {
2975 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
2976 break;
2977 } else {
2978 if(U16_IS_LEAD(*r)){
2979 int i =0, len = 2;
2980 U16_NEXT(r, i, len, exC);
2981 r++;
2982 }else{
2983 exC = *r;
2984 }
2985 if(c!=(uint32_t)(exC))
2986 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message,(uint32_t) (*r),c);
2987 }
2988 r++;
2989 }
2990 }
2991
2992 static int TestJitterbug930(const char* enc){
2993 UErrorCode err = U_ZERO_ERROR;
2994 UConverter*converter;
2995 char out[80];
2996 char*target = out;
2997 UChar in[4];
2998 const UChar*source = in;
2999 int32_t off[80];
3000 int32_t* offsets = off;
3001 int numOffWritten=0;
3002 UBool flush = 0;
3003 converter = my_ucnv_open(enc, &err);
3004
3005 in[0] = 0x41; /* 0x4E00;*/
3006 in[1] = 0x4E01;
3007 in[2] = 0x4E02;
3008 in[3] = 0x4E03;
3009
3010 memset(off, '*', sizeof(off));
3011
3012 ucnv_fromUnicode (converter,
3013 &target,
3014 target+2,
3015 &source,
3016 source+3,
3017 offsets,
3018 flush,
3019 &err);
3020
3021 /* writes three bytes into the output buffer: 41 1B 24
3022 * but offsets contains 0 1 1
3023 */
3024 while(*offsets< off[10]){
3025 numOffWritten++;
3026 offsets++;
3027 }
3028 log_verbose("Testing Jitterbug 930 for encoding %s",enc);
3029 if(numOffWritten!= (int)(target-out)){
3030 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
3031 }
3032
3033 err = U_ZERO_ERROR;
3034
3035 memset(off,'*' , sizeof(off));
3036
3037 flush = 1;
3038 offsets=off;
3039 ucnv_fromUnicode (converter,
3040 &target,
3041 target+4,
3042 &source,
3043 source,
3044 offsets,
3045 flush,
3046 &err);
3047 numOffWritten=0;
3048 while(*offsets< off[10]){
3049 numOffWritten++;
3050 if(*offsets!= -1){
3051 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
3052 }
3053 offsets++;
3054 }
3055
3056 /* writes 42 43 7A into output buffer,
3057 * offsets contains -1 -1 -1
3058 */
3059 ucnv_close(converter);
3060 return 0;
3061 }
3062
3063 static void
3064 TestHZ() {
3065 /* test input */
3066 static const uint16_t in[]={
3067 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
3068 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
3069 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
3070 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
3071 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
3072 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
3073 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
3074 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
3075 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
3076 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
3077 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
3078 0x005A, 0x005B, 0x005C, 0x000A
3079 };
3080 const UChar* uSource;
3081 const UChar* uSourceLimit;
3082 const char* cSource;
3083 const char* cSourceLimit;
3084 UChar *uTargetLimit =NULL;
3085 UChar *uTarget;
3086 char *cTarget;
3087 const char *cTargetLimit;
3088 char *cBuf;
3089 UChar *uBuf,*test;
3090 int32_t uBufSize = 120;
3091 UErrorCode errorCode=U_ZERO_ERROR;
3092 UConverter *cnv;
3093 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3094 int32_t* myOff= offsets;
3095 cnv=ucnv_open("HZ", &errorCode);
3096 if(U_FAILURE(errorCode)) {
3097 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
3098 return;
3099 }
3100
3101 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3102 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3103 uSource = (const UChar*)in;
3104 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3105 cTarget = cBuf;
3106 cTargetLimit = cBuf +uBufSize*5;
3107 uTarget = uBuf;
3108 uTargetLimit = uBuf+ uBufSize*5;
3109 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3110 if(U_FAILURE(errorCode)){
3111 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3112 return;
3113 }
3114 cSource = cBuf;
3115 cSourceLimit =cTarget;
3116 test =uBuf;
3117 myOff=offsets;
3118 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3119 if(U_FAILURE(errorCode)){
3120 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3121 return;
3122 }
3123 uSource = (const UChar*)in;
3124 while(uSource<uSourceLimit){
3125 if(*test!=*uSource){
3126
3127 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3128 }
3129 uSource++;
3130 test++;
3131 }
3132 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
3133 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3134 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3135 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3136 TestJitterbug930("csISO2022JP");
3137 ucnv_close(cnv);
3138 free(offsets);
3139 free(uBuf);
3140 free(cBuf);
3141 }
3142
3143 static void
3144 TestISCII(){
3145 /* test input */
3146 static const uint16_t in[]={
3147 /* test full range of Devanagari */
3148 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3149 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3150 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3151 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3152 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3153 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3154 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3155 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3156 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3157 0x096D,0x096E,0x096F,
3158 /* test Soft halant*/
3159 0x0915,0x094d, 0x200D,
3160 /* test explicit halant */
3161 0x0915,0x094d, 0x200c,
3162 /* test double danda */
3163 0x965,
3164 /* test ASCII */
3165 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3166 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3167 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3168 /* tests from Lotus */
3169 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3170 0x0930,0x094D,0x200D,
3171 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3172 0x0915,0x0921,0x002B,0x095F,
3173 /* tamil range */
3174 0x0B86, 0xB87, 0xB88,
3175 /* telugu range */
3176 0x0C05, 0x0C02, 0x0C03,0x0c31,
3177 /* kannada range */
3178 0x0C85, 0xC82, 0x0C83,
3179 /* test Abbr sign and Anudatta */
3180 0x0970, 0x952,
3181 /* 0x0958,
3182 0x0959,
3183 0x095A,
3184 0x095B,
3185 0x095C,
3186 0x095D,
3187 0x095E,
3188 0x095F,*/
3189 0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3190 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3191 0x090C ,
3192 0x0962,
3193 0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3194 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3195 0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3196 0x093D /* Avagraha 0xEA, 0xE9*/,
3197 0x0958,
3198 0x0959,
3199 0x095A,
3200 0x095B,
3201 0x095C,
3202 0x095D,
3203 0x095E,
3204 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3205 };
3206 static const unsigned char byteArr[]={
3207
3208 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3209 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3210 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3211 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3212 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3213 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3214 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3215 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3216 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3217 0xf8,0xf9,0xfa,
3218 /* test soft halant */
3219 0xb3, 0xE8, 0xE9,
3220 /* test explicit halant */
3221 0xb3, 0xE8, 0xE8,
3222 /* test double danda */
3223 0xea, 0xea,
3224 /* test ASCII */
3225 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3226 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3227 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3228 /* test ATR code */
3229
3230 /* tests from Lotus */
3231 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3232 0xEF,0x42,0xCF,0xE8,0xD9,
3233 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3234 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3235 /* tamil range */
3236 0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3237 /* telugu range */
3238 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3239 /* kannada range */
3240 0xEF, 0x48,0xa4, 0xa2, 0xa3,
3241 /* anudatta and abbreviation sign */
3242 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3243
3244
3245 0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3246
3247 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3248
3249 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3250
3251 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3252
3253 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3254
3255 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3256
3257 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3258
3259 0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3260
3261 0xB3, 0xE9, /* Ka + NUKTA */
3262
3263 0xB4, 0xE9, /* Kha + NUKTA */
3264
3265 0xB5, 0xE9, /* Ga + NUKTA */
3266
3267 0xBA, 0xE9,
3268
3269 0xBF, 0xE9,
3270
3271 0xC0, 0xE9,
3272
3273 0xC9, 0xE9,
3274 /* INV halant RA */
3275 0xD9, 0xE8, 0xCF,
3276 0x00, 0x00A0,
3277 /* just consume unhandled codepoints */
3278 0xEF, 0x30,
3279
3280 };
3281 testConvertToU(byteArr,(sizeof(byteArr)),in,UPRV_LENGTHOF(in),"x-iscii-de",NULL,TRUE);
3282 TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
3283
3284 }
3285
3286 static void
3287 TestISO_2022_JP() {
3288 /* test input */
3289 static const uint16_t in[]={
3290 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3291 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3292 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3293 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3294 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3295 0x201D, 0x3014, 0x000D, 0x000A,
3296 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3297 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3298 };
3299 const UChar* uSource;
3300 const UChar* uSourceLimit;
3301 const char* cSource;
3302 const char* cSourceLimit;
3303 UChar *uTargetLimit =NULL;
3304 UChar *uTarget;
3305 char *cTarget;
3306 const char *cTargetLimit;
3307 char *cBuf;
3308 UChar *uBuf,*test;
3309 int32_t uBufSize = 120;
3310 UErrorCode errorCode=U_ZERO_ERROR;
3311 UConverter *cnv;
3312 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3313 int32_t* myOff= offsets;
3314 cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3315 if(U_FAILURE(errorCode)) {
3316 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode));
3317 return;
3318 }
3319
3320 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3321 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3322 uSource = (const UChar*)in;
3323 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3324 cTarget = cBuf;
3325 cTargetLimit = cBuf +uBufSize*5;
3326 uTarget = uBuf;
3327 uTargetLimit = uBuf+ uBufSize*5;
3328 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3329 if(U_FAILURE(errorCode)){
3330 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3331 return;
3332 }
3333 cSource = cBuf;
3334 cSourceLimit =cTarget;
3335 test =uBuf;
3336 myOff=offsets;
3337 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3338 if(U_FAILURE(errorCode)){
3339 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3340 return;
3341 }
3342
3343 uSource = (const UChar*)in;
3344 while(uSource<uSourceLimit){
3345 if(*test!=*uSource){
3346
3347 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3348 }
3349 uSource++;
3350 test++;
3351 }
3352
3353 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3354 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3355 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
3356 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3357 TestJitterbug930("csISO2022JP");
3358 ucnv_close(cnv);
3359 free(uBuf);
3360 free(cBuf);
3361 free(offsets);
3362 }
3363
3364 static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
3365 const UChar* uSource;
3366 const UChar* uSourceLimit;
3367 const char* cSource;
3368 const char* cSourceLimit;
3369 UChar *uTargetLimit =NULL;
3370 UChar *uTarget;
3371 char *cTarget;
3372 const char *cTargetLimit;
3373 char *cBuf;
3374 UChar *uBuf,*test;
3375 int32_t uBufSize = 120*10;
3376 UErrorCode errorCode=U_ZERO_ERROR;
3377 UConverter *cnv;
3378 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
3379 int32_t* myOff= offsets;
3380 cnv=my_ucnv_open(conv, &errorCode);
3381 if(U_FAILURE(errorCode)) {
3382 log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
3383 return;
3384 }
3385
3386 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
3387 cBuf =(char*)malloc(uBufSize * sizeof(char));
3388 uSource = (const UChar*)in;
3389 uSourceLimit=uSource+len;
3390 cTarget = cBuf;
3391 cTargetLimit = cBuf +uBufSize;
3392 uTarget = uBuf;
3393 uTargetLimit = uBuf+ uBufSize;
3394 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3395 if(U_FAILURE(errorCode)){
3396 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3397 return;
3398 }
3399 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3400 cSource = cBuf;
3401 cSourceLimit =cTarget;
3402 test =uBuf;
3403 myOff=offsets;
3404 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3405 if(U_FAILURE(errorCode)){
3406 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
3407 return;
3408 }
3409
3410 uSource = (const UChar*)in;
3411 while(uSource<uSourceLimit){
3412 if(*test!=*uSource){
3413 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
3414 }
3415 uSource++;
3416 test++;
3417 }
3418 TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv);
3419 TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv);
3420 TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
3421 if(byteArr && byteArrLen!=0){
3422 TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
3423 TestToAndFromUChars(in,(const UChar*)&in[len],cnv);
3424 {
3425 cSource = byteArr;
3426 cSourceLimit = cSource+byteArrLen;
3427 test=uBuf;
3428 myOff = offsets;
3429 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3430 if(U_FAILURE(errorCode)){
3431 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3432 return;
3433 }
3434
3435 uSource = (const UChar*)in;
3436 while(uSource<uSourceLimit){
3437 if(*test!=*uSource){
3438 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3439 }
3440 uSource++;
3441 test++;
3442 }
3443 }
3444 }
3445
3446 ucnv_close(cnv);
3447 free(uBuf);
3448 free(cBuf);
3449 free(offsets);
3450 }
3451 static UChar U_CALLCONV
3452 _charAt(int32_t offset, void *context) {
3453 return ((char*)context)[offset];
3454 }
3455
3456 static int32_t
3457 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
3458 int32_t srcIndex=0;
3459 int32_t dstIndex=0;
3460 if(U_FAILURE(*status)){
3461 return 0;
3462 }
3463 if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
3464 *status = U_ILLEGAL_ARGUMENT_ERROR;
3465 return 0;
3466 }
3467 if(srcLen==-1){
3468 srcLen = (int32_t)uprv_strlen(src);
3469 }
3470
3471 for (; srcIndex<srcLen; ) {
3472 UChar32 c = src[srcIndex++];
3473 if (c == 0x005C /*'\\'*/) {
3474 c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
3475 if (c == (UChar32)0xFFFFFFFF) {
3476 *status=U_INVALID_CHAR_FOUND; /* return empty string */
3477 break; /* invalid escape sequence */
3478 }
3479 }
3480 if(dstIndex < dstLen){
3481 if(c>0xFFFF){
3482 dst[dstIndex++] = U16_LEAD(c);
3483 if(dstIndex<dstLen){
3484 dst[dstIndex]=U16_TRAIL(c);
3485 }else{
3486 *status=U_BUFFER_OVERFLOW_ERROR;
3487 }
3488 }else{
3489 dst[dstIndex]=(UChar)c;
3490 }
3491
3492 }else{
3493 *status = U_BUFFER_OVERFLOW_ERROR;
3494 }
3495 dstIndex++; /* for preflighting */
3496 }
3497 return dstIndex;
3498 }
3499
3500 static void
3501 TestFullRoundtrip(const char* cp){
3502 UChar usource[10] ={0};
3503 UChar nsrc[10] = {0};
3504 uint32_t i=1;
3505 int len=0, ulen;
3506 nsrc[0]=0x0061;
3507 /* Test codepoint 0 */
3508 TestConv(usource,1,cp,"",NULL,0);
3509 TestConv(usource,2,cp,"",NULL,0);
3510 nsrc[2]=0x5555;
3511 TestConv(nsrc,3,cp,"",NULL,0);
3512
3513 for(;i<=0x10FFFF;i++){
3514 if(i==0xD800){
3515 i=0xDFFF;
3516 continue;
3517 }
3518 if(i<=0xFFFF){
3519 usource[0] =(UChar) i;
3520 len=1;
3521 }else{
3522 usource[0]=U16_LEAD(i);
3523 usource[1]=U16_TRAIL(i);
3524 len=2;
3525 }
3526 ulen=len;
3527 if(i==0x80) {
3528 usource[2]=0;
3529 }
3530 /* Test only single code points */
3531 TestConv(usource,ulen,cp,"",NULL,0);
3532 /* Test codepoint repeated twice */
3533 usource[ulen]=usource[0];
3534 usource[ulen+1]=usource[1];
3535 ulen+=len;
3536 TestConv(usource,ulen,cp,"",NULL,0);
3537 /* Test codepoint repeated 3 times */
3538 usource[ulen]=usource[0];
3539 usource[ulen+1]=usource[1];
3540 ulen+=len;
3541 TestConv(usource,ulen,cp,"",NULL,0);
3542 /* Test codepoint in between 2 codepoints */
3543 nsrc[1]=usource[0];
3544 nsrc[2]=usource[1];
3545 nsrc[len+1]=0x5555;
3546 TestConv(nsrc,len+2,cp,"",NULL,0);
3547 uprv_memset(usource,0,sizeof(UChar)*10);
3548 }
3549 }
3550
3551 static void
3552 TestRoundTrippingAllUTF(void){
3553 if(!getTestOption(QUICK_OPTION)){
3554 log_verbose("Running exhaustive round trip test for BOCU-1\n");
3555 TestFullRoundtrip("BOCU-1");
3556 log_verbose("Running exhaustive round trip test for SCSU\n");
3557 TestFullRoundtrip("SCSU");
3558 log_verbose("Running exhaustive round trip test for UTF-8\n");
3559 TestFullRoundtrip("UTF-8");
3560 log_verbose("Running exhaustive round trip test for CESU-8\n");
3561 TestFullRoundtrip("CESU-8");
3562 log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3563 TestFullRoundtrip("UTF-16BE");
3564 log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3565 TestFullRoundtrip("UTF-16LE");
3566 log_verbose("Running exhaustive round trip test for UTF-16\n");
3567 TestFullRoundtrip("UTF-16");
3568 log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3569 TestFullRoundtrip("UTF-32BE");
3570 log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3571 TestFullRoundtrip("UTF-32LE");
3572 log_verbose("Running exhaustive round trip test for UTF-32\n");
3573 TestFullRoundtrip("UTF-32");
3574 log_verbose("Running exhaustive round trip test for UTF-7\n");
3575 TestFullRoundtrip("UTF-7");
3576 log_verbose("Running exhaustive round trip test for UTF-7\n");
3577 TestFullRoundtrip("UTF-7,version=1");
3578 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3579 TestFullRoundtrip("IMAP-mailbox-name");
3580 /*
3581 *
3582 * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of
3583 * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA).
3584 * The old mappings remain as fallbacks.
3585 * This test may be reintroduced at a later time.
3586 *
3587 * 110118 - mow
3588 */
3589 /*
3590 log_verbose("Running exhaustive round trip test for GB18030\n");
3591 TestFullRoundtrip("GB18030");
3592 */
3593 }
3594 }
3595
3596 static void
3597 TestSCSU() {
3598
3599 static const uint16_t germanUTF16[]={
3600 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3601 };
3602
3603 static const uint8_t germanSCSU[]={
3604 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3605 };
3606
3607 static const uint16_t russianUTF16[]={
3608 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3609 };
3610
3611 static const uint8_t russianSCSU[]={
3612 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3613 };
3614
3615 static const uint16_t japaneseUTF16[]={
3616 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3617 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3618 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3619 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3620 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3621 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3622 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3623 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3624 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3625 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3626 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3627 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3628 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3629 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3630 0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3631 };
3632
3633 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3634 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3635 static const uint8_t japaneseSCSU[]={
3636 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3637 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3638 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3639 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3640 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3641 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3642 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3643 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3644 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3645 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3646 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3647 0xcb, 0x82
3648 };
3649
3650 static const uint16_t allFeaturesUTF16[]={
3651 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3652 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3653 0x01df, 0xf000, 0xdbff, 0xdfff
3654 };
3655
3656 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3657 * result here (34B vs. 35B)
3658 */
3659 static const uint8_t allFeaturesSCSU[]={
3660 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3661 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3662 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3663 0xdf, 0x14, 0x80, 0x15, 0xff
3664 };
3665 static const uint16_t monkeyIn[]={
3666 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3667 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3668 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3669 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3670 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3671 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3672 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3673 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3674 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3675 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3676 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3677 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3678 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3679 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3680 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3681 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3682 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3683 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3684 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3685 /* test non-BMP code points */
3686 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3687 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3688 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3689 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3690 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3691 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3692 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3693 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3694 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3695 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3696 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3697
3698
3699 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3700 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3701 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3702 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3703 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3704 };
3705 static const char *fTestCases [] = {
3706 "\\ud800\\udc00", /* smallest surrogate*/
3707 "\\ud8ff\\udcff",
3708 "\\udBff\\udFff", /* largest surrogate pair*/
3709 "\\ud834\\udc00",
3710 "\\U0010FFFF",
3711 "Hello \\u9292 \\u9192 World!",
3712 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3713 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3714
3715 "\\u0648\\u06c8", /* catch missing reset*/
3716 "\\u0648\\u06c8",
3717
3718 "\\u4444\\uE001", /* lowest quotable*/
3719 "\\u4444\\uf2FF", /* highest quotable*/
3720 "\\u4444\\uf188\\u4444",
3721 "\\u4444\\uf188\\uf288",
3722 "\\u4444\\uf188abc\\u0429\\uf288",
3723 "\\u9292\\u2222",
3724 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3725 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3726 "Hello World!123456",
3727 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3728
3729 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/
3730 "abc\\u4411d", /* uses SQU*/
3731 "abc\\u4411\\u4412d",/* uses SCU*/
3732 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3733 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3734 "\\u9292\\u2222",
3735 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3736 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3737 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3738
3739 "", /* empty input*/
3740 "\\u0000", /* smallest BMP character*/
3741 "\\uFFFF", /* largest BMP character*/
3742
3743 /* regression tests*/
3744 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3745 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3746 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3747 "\\u0041\\u00df\\u0401\\u015f",
3748 "\\u9066\\u2123abc",
3749 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3750 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3751 };
3752 int i=0;
3753 for(;i<UPRV_LENGTHOF(fTestCases);i++){
3754 const char* cSrc = fTestCases[i];
3755 UErrorCode status = U_ZERO_ERROR;
3756 int32_t cSrcLen,srcLen;
3757 UChar* src;
3758 /* UConverter* cnv = ucnv_open("SCSU",&status); */
3759 cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]);
3760 src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
3761 srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
3762 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
3763 TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
3764 free(src);
3765 }
3766 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3767 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3768 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3769 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3770 TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
3771 TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
3772 TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
3773 }
3774
3775 #if !UCONFIG_NO_LEGACY_CONVERSION
3776 static void TestJitterbug2346(){
3777 char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3778 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3779 uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3780
3781 UChar uTarget[500]={'\0'};
3782 UChar* utarget=uTarget;
3783 UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
3784
3785 char cTarget[500]={'\0'};
3786 char* ctarget=cTarget;
3787 char* ctargetLimit=cTarget+sizeof(cTarget);
3788 const char* csource=source;
3789 UChar* temp = expected;
3790 UErrorCode err=U_ZERO_ERROR;
3791
3792 UConverter* conv =ucnv_open("ISO_2022_JP",&err);
3793 if(U_FAILURE(err)) {
3794 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
3795 return;
3796 }
3797 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err);
3798 if(U_FAILURE(err)) {
3799 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
3800 return;
3801 }
3802 utargetLimit=utarget;
3803 utarget = uTarget;
3804 while(utarget<utargetLimit){
3805 if(*temp!=*utarget){
3806
3807 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
3808 }
3809 utarget++;
3810 temp++;
3811 }
3812 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
3813 if(U_FAILURE(err)) {
3814 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
3815 return;
3816 }
3817 ctargetLimit=ctarget;
3818 ctarget =cTarget;
3819 ucnv_close(conv);
3820
3821
3822 }
3823
3824 static void
3825 TestISO_2022_JP_1() {
3826 /* test input */
3827 static const uint16_t in[]={
3828 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3829 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3830 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3831 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3832 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3833 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3834 0x201D, 0x000D, 0x000A,
3835 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3836 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3837 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3838 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3839 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3840 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3841 };
3842 const UChar* uSource;
3843 const UChar* uSourceLimit;
3844 const char* cSource;
3845 const char* cSourceLimit;
3846 UChar *uTargetLimit =NULL;
3847 UChar *uTarget;
3848 char *cTarget;
3849 const char *cTargetLimit;
3850 char *cBuf;
3851 UChar *uBuf,*test;
3852 int32_t uBufSize = 120;
3853 UErrorCode errorCode=U_ZERO_ERROR;
3854 UConverter *cnv;
3855
3856 cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3857 if(U_FAILURE(errorCode)) {
3858 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3859 return;
3860 }
3861
3862 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3863 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3864 uSource = (const UChar*)in;
3865 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3866 cTarget = cBuf;
3867 cTargetLimit = cBuf +uBufSize*5;
3868 uTarget = uBuf;
3869 uTargetLimit = uBuf+ uBufSize*5;
3870 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode);
3871 if(U_FAILURE(errorCode)){
3872 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3873 return;
3874 }
3875 cSource = cBuf;
3876 cSourceLimit =cTarget;
3877 test =uBuf;
3878 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode);
3879 if(U_FAILURE(errorCode)){
3880 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3881 return;
3882 }
3883 uSource = (const UChar*)in;
3884 while(uSource<uSourceLimit){
3885 if(*test!=*uSource){
3886
3887 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3888 }
3889 uSource++;
3890 test++;
3891 }
3892 /*ucnv_close(cnv);
3893 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3894 /*Test for the condition where there is an invalid character*/
3895 ucnv_reset(cnv);
3896 {
3897 static const uint8_t source2[]={0x0e,0x24,0x053};
3898 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
3899 }
3900 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3901 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3902 ucnv_close(cnv);
3903 free(uBuf);
3904 free(cBuf);
3905 }
3906
3907 static void
3908 TestISO_2022_JP_2() {
3909 /* test input */
3910 static const uint16_t in[]={
3911 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3912 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3913 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3914 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3915 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3916 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3917 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3918 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3919 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3920 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3921 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3922 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3923 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3924 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3925 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3926 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3927 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3928 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3929 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3930 };
3931 const UChar* uSource;
3932 const UChar* uSourceLimit;
3933 const char* cSource;
3934 const char* cSourceLimit;
3935 UChar *uTargetLimit =NULL;
3936 UChar *uTarget;
3937 char *cTarget;
3938 const char *cTargetLimit;
3939 char *cBuf;
3940 UChar *uBuf,*test;
3941 int32_t uBufSize = 120;
3942 UErrorCode errorCode=U_ZERO_ERROR;
3943 UConverter *cnv;
3944 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3945 int32_t* myOff= offsets;
3946 cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
3947 if(U_FAILURE(errorCode)) {
3948 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3949 return;
3950 }
3951
3952 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3953 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3954 uSource = (const UChar*)in;
3955 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3956 cTarget = cBuf;
3957 cTargetLimit = cBuf +uBufSize*5;
3958 uTarget = uBuf;
3959 uTargetLimit = uBuf+ uBufSize*5;
3960 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3961 if(U_FAILURE(errorCode)){
3962 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3963 return;
3964 }
3965 cSource = cBuf;
3966 cSourceLimit =cTarget;
3967 test =uBuf;
3968 myOff=offsets;
3969 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3970 if(U_FAILURE(errorCode)){
3971 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3972 return;
3973 }
3974 uSource = (const UChar*)in;
3975 while(uSource<uSourceLimit){
3976 if(*test!=*uSource){
3977
3978 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3979 }
3980 uSource++;
3981 test++;
3982 }
3983 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3984 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3985 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3986 /*Test for the condition where there is an invalid character*/
3987 ucnv_reset(cnv);
3988 {
3989 static const uint8_t source2[]={0x0e,0x24,0x053};
3990 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
3991 }
3992 ucnv_close(cnv);
3993 free(uBuf);
3994 free(cBuf);
3995 free(offsets);
3996 }
3997
3998 static void
3999 TestISO_2022_KR() {
4000 /* test input */
4001 static const uint16_t in[]={
4002 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4003 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4004 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4005 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4006 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4007 ,0x53E3,0x53E4,0x000A,0x000D};
4008 const UChar* uSource;
4009 const UChar* uSourceLimit;
4010 const char* cSource;
4011 const char* cSourceLimit;
4012 UChar *uTargetLimit =NULL;
4013 UChar *uTarget;
4014 char *cTarget;
4015 const char *cTargetLimit;
4016 char *cBuf;
4017 UChar *uBuf,*test;
4018 int32_t uBufSize = 120;
4019 UErrorCode errorCode=U_ZERO_ERROR;
4020 UConverter *cnv;
4021 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4022 int32_t* myOff= offsets;
4023 cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
4024 if(U_FAILURE(errorCode)) {
4025 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4026 return;
4027 }
4028
4029 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4030 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4031 uSource = (const UChar*)in;
4032 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4033 cTarget = cBuf;
4034 cTargetLimit = cBuf +uBufSize*5;
4035 uTarget = uBuf;
4036 uTargetLimit = uBuf+ uBufSize*5;
4037 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4038 if(U_FAILURE(errorCode)){
4039 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4040 return;
4041 }
4042 cSource = cBuf;
4043 cSourceLimit =cTarget;
4044 test =uBuf;
4045 myOff=offsets;
4046 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4047 if(U_FAILURE(errorCode)){
4048 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4049 return;
4050 }
4051 uSource = (const UChar*)in;
4052 while(uSource<uSourceLimit){
4053 if(*test!=*uSource){
4054 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4055 }
4056 uSource++;
4057 test++;
4058 }
4059 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4060 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4061 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4062 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4063 TestJitterbug930("csISO2022KR");
4064 /*Test for the condition where there is an invalid character*/
4065 ucnv_reset(cnv);
4066 {
4067 static const uint8_t source2[]={0x1b,0x24,0x053};
4068 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4069 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4070 }
4071 ucnv_close(cnv);
4072 free(uBuf);
4073 free(cBuf);
4074 free(offsets);
4075 }
4076
4077 static void
4078 TestISO_2022_KR_1() {
4079 /* test input */
4080 static const uint16_t in[]={
4081 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4082 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4083 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4084 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4085 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4086 ,0x53E3,0x53E4,0x000A,0x000D};
4087 const UChar* uSource;
4088 const UChar* uSourceLimit;
4089 const char* cSource;
4090 const char* cSourceLimit;
4091 UChar *uTargetLimit =NULL;
4092 UChar *uTarget;
4093 char *cTarget;
4094 const char *cTargetLimit;
4095 char *cBuf;
4096 UChar *uBuf,*test;
4097 int32_t uBufSize = 120;
4098 UErrorCode errorCode=U_ZERO_ERROR;
4099 UConverter *cnv;
4100 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4101 int32_t* myOff= offsets;
4102 cnv=ucnv_open("ibm-25546", &errorCode);
4103 if(U_FAILURE(errorCode)) {
4104 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4105 return;
4106 }
4107
4108 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4109 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4110 uSource = (const UChar*)in;
4111 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4112 cTarget = cBuf;
4113 cTargetLimit = cBuf +uBufSize*5;
4114 uTarget = uBuf;
4115 uTargetLimit = uBuf+ uBufSize*5;
4116 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4117 if(U_FAILURE(errorCode)){
4118 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4119 return;
4120 }
4121 cSource = cBuf;
4122 cSourceLimit =cTarget;
4123 test =uBuf;
4124 myOff=offsets;
4125 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4126 if(U_FAILURE(errorCode)){
4127 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4128 return;
4129 }
4130 uSource = (const UChar*)in;
4131 while(uSource<uSourceLimit){
4132 if(*test!=*uSource){
4133 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4134 }
4135 uSource++;
4136 test++;
4137 }
4138 ucnv_reset(cnv);
4139 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4140 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4141 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4142 ucnv_reset(cnv);
4143 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4144 /*Test for the condition where there is an invalid character*/
4145 ucnv_reset(cnv);
4146 {
4147 static const uint8_t source2[]={0x1b,0x24,0x053};
4148 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4149 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4150 }
4151 ucnv_close(cnv);
4152 free(uBuf);
4153 free(cBuf);
4154 free(offsets);
4155 }
4156
4157 static void TestJitterbug2411(){
4158 static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
4159 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4160 UConverter* kr=NULL, *kr1=NULL;
4161 UErrorCode errorCode = U_ZERO_ERROR;
4162 UChar tgt[100]={'\0'};
4163 UChar* target = tgt;
4164 UChar* targetLimit = target+100;
4165 kr=ucnv_open("iso-2022-kr", &errorCode);
4166 if(U_FAILURE(errorCode)) {
4167 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
4168 return;
4169 }
4170 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4171 if(U_FAILURE(errorCode)) {
4172 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4173 return;
4174 }
4175 kr1 = ucnv_open("ibm-25546", &errorCode);
4176 if(U_FAILURE(errorCode)) {
4177 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
4178 return;
4179 }
4180 target = tgt;
4181 targetLimit = target+100;
4182 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4183
4184 if(U_FAILURE(errorCode)) {
4185 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4186 return;
4187 }
4188
4189 ucnv_close(kr);
4190 ucnv_close(kr1);
4191
4192 }
4193
4194 static void
4195 TestJIS(){
4196 /* From Unicode moved to testdata/conversion.txt */
4197 /*To Unicode*/
4198 {
4199 static const uint8_t sampleTextJIS[] = {
4200 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4201 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4202 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4203 };
4204 static const uint16_t expectedISO2022JIS[] = {
4205 0x0041, 0x0042,
4206 0xFF81, 0xFF82,
4207 0x3000
4208 };
4209 static const int32_t toISO2022JISOffs[]={
4210 3,4,
4211 8,9,
4212 16
4213 };
4214
4215 static const uint8_t sampleTextJIS7[] = {
4216 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4217 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4218 0x1b,0x24,0x42,0x21,0x21,
4219 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */
4220 0x21,0x22,
4221 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4222 };
4223 static const uint16_t expectedISO2022JIS7[] = {
4224 0x0041, 0x0042,
4225 0xFF81, 0xFF82,
4226 0x3000,
4227 0xFF81, 0xFF82,
4228 0x3001,
4229 0x3000
4230 };
4231 static const int32_t toISO2022JIS7Offs[]={
4232 3,4,
4233 8,9,
4234 13,16,
4235 17,
4236 19,27
4237 };
4238 static const uint8_t sampleTextJIS8[] = {
4239 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4240 0xa1,0xc8,0xd9,/*Katakana Set*/
4241 0x1b,0x28,0x42,
4242 0x41,0x42,
4243 0xb1,0xc3, /*Katakana Set*/
4244 0x1b,0x24,0x42,0x21,0x21
4245 };
4246 static const uint16_t expectedISO2022JIS8[] = {
4247 0x0041, 0x0042,
4248 0xff61, 0xff88, 0xff99,
4249 0x0041, 0x0042,
4250 0xff71, 0xff83,
4251 0x3000
4252 };
4253 static const int32_t toISO2022JIS8Offs[]={
4254 3, 4, 5, 6,
4255 7, 11, 12, 13,
4256 14, 18,
4257 };
4258
4259 testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
4260 UPRV_LENGTHOF(expectedISO2022JIS),"JIS", toISO2022JISOffs,TRUE);
4261 testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
4262 UPRV_LENGTHOF(expectedISO2022JIS7),"JIS7", toISO2022JIS7Offs,TRUE);
4263 testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
4264 UPRV_LENGTHOF(expectedISO2022JIS8),"JIS8", toISO2022JIS8Offs,TRUE);
4265 }
4266
4267 }
4268
4269
4270 #if 0
4271 ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
4272
4273 static void TestJitterbug915(){
4274 /* tests for roundtripping of the below sequence
4275 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * /
4276 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4277 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4278 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4279 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4280 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4281 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4282 */
4283 static const char cSource[]={
4284 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4285 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4286 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4287 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4288 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4289 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4290 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4291 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4292 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4293 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4294 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4295 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4296 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4297 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4298 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4299 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4300 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4301 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4302 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4303 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4304 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4305 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4306 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4307 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4308 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4309 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4310 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4311 0x37, 0x20, 0x2A, 0x2F
4312 };
4313 UChar uTarget[500]={'\0'};
4314 UChar* utarget=uTarget;
4315 UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
4316
4317 char cTarget[500]={'\0'};
4318 char* ctarget=cTarget;
4319 char* ctargetLimit=cTarget+sizeof(cTarget);
4320 const char* csource=cSource;
4321 const char* tempSrc = cSource;
4322 UErrorCode err=U_ZERO_ERROR;
4323
4324 UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
4325 if(U_FAILURE(err)) {
4326 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
4327 return;
4328 }
4329 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err);
4330 if(U_FAILURE(err)) {
4331 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
4332 return;
4333 }
4334 utargetLimit=utarget;
4335 utarget = uTarget;
4336 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
4337 if(U_FAILURE(err)) {
4338 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
4339 return;
4340 }
4341 ctargetLimit=ctarget;
4342 ctarget =cTarget;
4343 while(ctarget<ctargetLimit){
4344 if(*ctarget != *tempSrc){
4345 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ;
4346 }
4347 ++ctarget;
4348 ++tempSrc;
4349 }
4350
4351 ucnv_close(conv);
4352 }
4353
4354 static void
4355 TestISO_2022_CN_EXT() {
4356 /* test input */
4357 static const uint16_t in[]={
4358 /* test Non-BMP code points */
4359 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4360 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4361 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4362 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4363 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4364 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4365 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4366 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4367 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4368 0xD869, 0xDED5,
4369
4370 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4371 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4372 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4373 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4374 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4375 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4376 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4377 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4378 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4379 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4380 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4381 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4382 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4383 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4384 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4385 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4386 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4387 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4388
4389 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4390
4391 };
4392
4393 const UChar* uSource;
4394 const UChar* uSourceLimit;
4395 const char* cSource;
4396 const char* cSourceLimit;
4397 UChar *uTargetLimit =NULL;
4398 UChar *uTarget;
4399 char *cTarget;
4400 const char *cTargetLimit;
4401 char *cBuf;
4402 UChar *uBuf,*test;
4403 int32_t uBufSize = 180;
4404 UErrorCode errorCode=U_ZERO_ERROR;
4405 UConverter *cnv;
4406 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4407 int32_t* myOff= offsets;
4408 cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
4409 if(U_FAILURE(errorCode)) {
4410 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4411 return;
4412 }
4413
4414 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4415 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4416 uSource = (const UChar*)in;
4417 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4418 cTarget = cBuf;
4419 cTargetLimit = cBuf +uBufSize*5;
4420 uTarget = uBuf;
4421 uTargetLimit = uBuf+ uBufSize*5;
4422 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4423 if(U_FAILURE(errorCode)){
4424 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4425 return;
4426 }
4427 cSource = cBuf;
4428 cSourceLimit =cTarget;
4429 test =uBuf;
4430 myOff=offsets;
4431 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4432 if(U_FAILURE(errorCode)){
4433 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4434 return;
4435 }
4436 uSource = (const UChar*)in;
4437 while(uSource<uSourceLimit){
4438 if(*test!=*uSource){
4439 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4440 }
4441 else{
4442 log_verbose(" Got: \\u%04X\n",(int)*test) ;
4443 }
4444 uSource++;
4445 test++;
4446 }
4447 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4448 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4449 /*Test for the condition where there is an invalid character*/
4450 ucnv_reset(cnv);
4451 {
4452 static const uint8_t source2[]={0x0e,0x24,0x053};
4453 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
4454 }
4455 ucnv_close(cnv);
4456 free(uBuf);
4457 free(cBuf);
4458 free(offsets);
4459 }
4460 #endif
4461
4462 static void
4463 TestISO_2022_CN() {
4464 /* test input */
4465 static const uint16_t in[]={
4466 /* jitterbug 951 */
4467 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4468 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4469 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4470 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4471 0x0020, 0x0045, 0x004e, 0x0044,
4472 /**/
4473 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4474 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4475 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4476 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4477 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4478 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4479 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4480 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4481 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4482 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4483 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4484 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4485 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4486 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4487 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4488 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4489 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4490
4491 };
4492 const UChar* uSource;
4493 const UChar* uSourceLimit;
4494 const char* cSource;
4495 const char* cSourceLimit;
4496 UChar *uTargetLimit =NULL;
4497 UChar *uTarget;
4498 char *cTarget;
4499 const char *cTargetLimit;
4500 char *cBuf;
4501 UChar *uBuf,*test;
4502 int32_t uBufSize = 180;
4503 UErrorCode errorCode=U_ZERO_ERROR;
4504 UConverter *cnv;
4505 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4506 int32_t* myOff= offsets;
4507 cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
4508 if(U_FAILURE(errorCode)) {
4509 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4510 return;
4511 }
4512
4513 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4514 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4515 uSource = (const UChar*)in;
4516 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4517 cTarget = cBuf;
4518 cTargetLimit = cBuf +uBufSize*5;
4519 uTarget = uBuf;
4520 uTargetLimit = uBuf+ uBufSize*5;
4521 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4522 if(U_FAILURE(errorCode)){
4523 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4524 return;
4525 }
4526 cSource = cBuf;
4527 cSourceLimit =cTarget;
4528 test =uBuf;
4529 myOff=offsets;
4530 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4531 if(U_FAILURE(errorCode)){
4532 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4533 return;
4534 }
4535 uSource = (const UChar*)in;
4536 while(uSource<uSourceLimit){
4537 if(*test!=*uSource){
4538 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4539 }
4540 else{
4541 log_verbose(" Got: \\u%04X\n",(int)*test) ;
4542 }
4543 uSource++;
4544 test++;
4545 }
4546 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
4547 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4548 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4549 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4550 TestJitterbug930("csISO2022CN");
4551 /*Test for the condition where there is an invalid character*/
4552 ucnv_reset(cnv);
4553 {
4554 static const uint8_t source2[]={0x0e,0x24,0x053};
4555 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
4556 }
4557
4558 ucnv_close(cnv);
4559 free(uBuf);
4560 free(cBuf);
4561 free(offsets);
4562 }
4563
4564 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
4565 typedef struct {
4566 const char * converterName;
4567 const char * inputText;
4568 int inputTextLength;
4569 } EmptySegmentTest;
4570
4571 /* Callback for TestJitterbug6175, should only get called for empty segment errors */
4572 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits,
4573 int32_t length, UConverterCallbackReason reason, UErrorCode * err ) {
4574 if (reason > UCNV_IRREGULAR) {
4575 return;
4576 }
4577 if (reason != UCNV_IRREGULAR) {
4578 log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
4579 }
4580 /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
4581 *err = U_ZERO_ERROR;
4582 ucnv_cbToUWriteSub(toArgs,0,err);
4583 }
4584
4585 enum { kEmptySegmentToUCharsMax = 64 };
4586 static void TestJitterbug6175(void) {
4587 static const char iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
4588 static const char iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
4589 static const char iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
4590 static const char iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
4591 static const char hzGB2312_a[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
4592 static const EmptySegmentTest emptySegmentTests[] = {
4593 /* converterName inputText inputTextLength */
4594 { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) },
4595 { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) },
4596 { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) },
4597 { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) },
4598 { "HZ-GB-2312", hzGB2312_a, sizeof(hzGB2312_a) },
4599 /* terminator: */
4600 { NULL, NULL, 0, }
4601 };
4602 const EmptySegmentTest * testPtr;
4603 for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) {
4604 UErrorCode err = U_ZERO_ERROR;
4605 UConverter * cnv = ucnv_open(testPtr->converterName, &err);
4606 if (U_FAILURE(err)) {
4607 log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err));
4608 return;
4609 }
4610 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err);
4611 if (U_FAILURE(err)) {
4612 log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err));
4613 ucnv_close(cnv);
4614 return;
4615 }
4616 {
4617 UChar toUChars[kEmptySegmentToUCharsMax];
4618 UChar * toUCharsPtr = toUChars;
4619 const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax;
4620 const char * inCharsPtr = testPtr->inputText;
4621 const char * inCharsLimit = inCharsPtr + testPtr->inputTextLength;
4622 ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err);
4623 }
4624 ucnv_close(cnv);
4625 }
4626 }
4627
4628 static void
4629 TestEBCDIC_STATEFUL() {
4630 /* test input */
4631 static const uint8_t in[]={
4632 0x61,
4633 0x1a,
4634 0x0f, 0x4b,
4635 0x42,
4636 0x40,
4637 0x36,
4638 };
4639
4640 /* expected test results */
4641 static const int32_t results[]={
4642 /* number of bytes read, code point */
4643 1, 0x002f,
4644 1, 0x0092,
4645 2, 0x002e,
4646 1, 0xff62,
4647 1, 0x0020,
4648 1, 0x0096,
4649
4650 };
4651 static const uint8_t in2[]={
4652 0x0f,
4653 0xa1,
4654 0x01
4655 };
4656
4657 /* expected test results */
4658 static const int32_t results2[]={
4659 /* number of bytes read, code point */
4660 2, 0x203E,
4661 1, 0x0001,
4662 };
4663
4664 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
4665 UErrorCode errorCode=U_ZERO_ERROR;
4666 UConverter *cnv=ucnv_open("ibm-930", &errorCode);
4667 if(U_FAILURE(errorCode)) {
4668 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
4669 return;
4670 }
4671 TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
4672 ucnv_reset(cnv);
4673 /* Test the condition when source >= sourceLimit */
4674 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
4675 ucnv_reset(cnv);
4676 /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4677 {
4678 static const uint8_t source1[]={0x0f};
4679 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
4680 }
4681 /*Test for the condition where there is an invalid character*/
4682 ucnv_reset(cnv);
4683 {
4684 static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
4685 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
4686 }
4687 ucnv_reset(cnv);
4688 source=(const char*)in2;
4689 limit=(const char*)in2+sizeof(in2);
4690 TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
4691 ucnv_close(cnv);
4692
4693 }
4694
4695 static void
4696 TestGB18030() {
4697 /* test input */
4698 static const uint8_t in[]={
4699 0x24,
4700 0x7f,
4701 0x81, 0x30, 0x81, 0x30,
4702 0xa8, 0xbf,
4703 0xa2, 0xe3,
4704 0xd2, 0xbb,
4705 0x82, 0x35, 0x8f, 0x33,
4706 0x84, 0x31, 0xa4, 0x39,
4707 0x90, 0x30, 0x81, 0x30,
4708 0xe3, 0x32, 0x9a, 0x35
4709 #if 0
4710 /*
4711 * Feature removed markus 2000-oct-26
4712 * Only some codepages must match surrogate pairs into supplementary code points -
4713 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4714 * GB 18030 provides direct encodings for supplementary code points, therefore
4715 * it must not combine two single-encoded surrogates into one code point.
4716 */
4717 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4718 #endif
4719 };
4720
4721 /* expected test results */
4722 static const int32_t results[]={
4723 /* number of bytes read, code point */
4724 1, 0x24,
4725 1, 0x7f,
4726 4, 0x80,
4727 2, 0x1f9,
4728 2, 0x20ac,
4729 2, 0x4e00,
4730 4, 0x9fa6,
4731 4, 0xffff,
4732 4, 0x10000,
4733 4, 0x10ffff
4734 #if 0
4735 /* Feature removed. See comment above. */
4736 8, 0x10000
4737 #endif
4738 };
4739
4740 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4741 UErrorCode errorCode=U_ZERO_ERROR;
4742 UConverter *cnv=ucnv_open("gb18030", &errorCode);
4743 if(U_FAILURE(errorCode)) {
4744 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
4745 return;
4746 }
4747 TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
4748 ucnv_close(cnv);
4749 }
4750
4751 static void
4752 TestLMBCS() {
4753 /* LMBCS-1 string */
4754 static const uint8_t pszLMBCS[]={
4755 0x61,
4756 0x01, 0x29,
4757 0x81,
4758 0xA0,
4759 0x0F, 0x27,
4760 0x0F, 0x91,
4761 0x14, 0x0a, 0x74,
4762 0x14, 0xF6, 0x02,
4763 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4764 0x10, 0x88, 0xA0,
4765 };
4766
4767 /* Unicode UChar32 equivalents */
4768 static const UChar32 pszUnicode32[]={
4769 /* code point */
4770 0x00000061,
4771 0x00002013,
4772 0x000000FC,
4773 0x000000E1,
4774 0x00000007,
4775 0x00000091,
4776 0x00000a74,
4777 0x00000200,
4778 0x00023456, /* code point for surrogate pair */
4779 0x00005516
4780 };
4781
4782 /* Unicode UChar equivalents */
4783 static const UChar pszUnicode[]={
4784 /* code point */
4785 0x0061,
4786 0x2013,
4787 0x00FC,
4788 0x00E1,
4789 0x0007,
4790 0x0091,
4791 0x0a74,
4792 0x0200,
4793 0xD84D, /* low surrogate */
4794 0xDC56, /* high surrogate */
4795 0x5516
4796 };
4797
4798 /* expected test results */
4799 static const int offsets32[]={
4800 /* number of bytes read, code point */
4801 0,
4802 1,
4803 3,
4804 4,
4805 5,
4806 7,
4807 9,
4808 12,
4809 15,
4810 21,
4811 24
4812 };
4813
4814 /* expected test results */
4815 static const int offsets[]={
4816 /* number of bytes read, code point */
4817 0,
4818 1,
4819 3,
4820 4,
4821 5,
4822 7,
4823 9,
4824 12,
4825 15,
4826 18,
4827 21,
4828 24
4829 };
4830
4831
4832 UConverter *cnv;
4833
4834 #define NAME_LMBCS_1 "LMBCS-1"
4835 #define NAME_LMBCS_2 "LMBCS-2"
4836
4837
4838 /* Some basic open/close/property tests on some LMBCS converters */
4839 {
4840
4841 char expected_subchars[] = {0x3F}; /* ANSI Question Mark */
4842 char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/
4843 char get_subchars [1];
4844 const char * get_name;
4845 UConverter *cnv1;
4846 UConverter *cnv2;
4847
4848 int8_t len = sizeof(get_subchars);
4849
4850 UErrorCode errorCode=U_ZERO_ERROR;
4851
4852 /* Open */
4853 cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
4854 if(U_FAILURE(errorCode)) {
4855 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4856 return;
4857 }
4858 cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
4859 if(U_FAILURE(errorCode)) {
4860 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
4861 return;
4862 }
4863
4864 /* Name */
4865 get_name = ucnv_getName (cnv1, &errorCode);
4866 if (strcmp(NAME_LMBCS_1,get_name)){
4867 log_err("Unexpected converter name: %s\n", get_name);
4868 }
4869 get_name = ucnv_getName (cnv2, &errorCode);
4870 if (strcmp(NAME_LMBCS_2,get_name)){
4871 log_err("Unexpected converter name: %s\n", get_name);
4872 }
4873
4874 /* substitution chars */
4875 ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
4876 if(U_FAILURE(errorCode)) {
4877 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4878 }
4879 if (len!=1){
4880 log_err("Unexpected length of sub chars\n");
4881 }
4882 if (get_subchars[0] != expected_subchars[0]){
4883 log_err("Unexpected value of sub chars\n");
4884 }
4885 ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
4886 if(U_FAILURE(errorCode)) {
4887 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
4888 }
4889 ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
4890 if(U_FAILURE(errorCode)) {
4891 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4892 }
4893 if (len!=1){
4894 log_err("Unexpected length of sub chars\n");
4895 }
4896 if (get_subchars[0] != new_subchars[0]){
4897 log_err("Unexpected value of sub chars\n");
4898 }
4899 ucnv_close(cnv1);
4900 ucnv_close(cnv2);
4901
4902 }
4903
4904 /* LMBCS to Unicode - offsets */
4905 {
4906 UErrorCode errorCode=U_ZERO_ERROR;
4907
4908 const char * pSource = (const char *)pszLMBCS;
4909 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
4910
4911 UChar Out [sizeof(pszUnicode) + 1];
4912 UChar * pOut = Out;
4913 UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode);
4914
4915 int32_t off [sizeof(offsets)];
4916
4917 /* last 'offset' in expected results is just the final size.
4918 (Makes other tests easier). Compensate here: */
4919
4920 off[UPRV_LENGTHOF(offsets)-1] = sizeof(pszLMBCS);
4921
4922
4923
4924 cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */
4925 if(U_FAILURE(errorCode)) {
4926 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
4927 return;
4928 }
4929
4930
4931
4932 ucnv_toUnicode (cnv,
4933 &pOut,
4934 OutLimit,
4935 &pSource,
4936 sourceLimit,
4937 off,
4938 TRUE,
4939 &errorCode);
4940
4941
4942 if (memcmp(off,offsets,sizeof(offsets)))
4943 {
4944 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4945 }
4946 if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
4947 {
4948 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4949 }
4950 ucnv_close(cnv);
4951 }
4952 {
4953 /* LMBCS to Unicode - getNextUChar */
4954 const char * sourceStart;
4955 const char *source=(const char *)pszLMBCS;
4956 const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
4957 const UChar32 *results= pszUnicode32;
4958 const int *off = offsets32;
4959
4960 UErrorCode errorCode=U_ZERO_ERROR;
4961 UChar32 uniChar;
4962
4963 cnv=ucnv_open("LMBCS-1", &errorCode);
4964 if(U_FAILURE(errorCode)) {
4965 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4966 return;
4967 }
4968 else
4969 {
4970
4971 while(source<limit) {
4972 sourceStart=source;
4973 uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
4974 if(U_FAILURE(errorCode)) {
4975 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
4976 break;
4977 } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
4978 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
4979 uniChar, (source-sourceStart), *results, *off);
4980 break;
4981 }
4982 results++;
4983 off++;
4984 }
4985 }
4986 ucnv_close(cnv);
4987 }
4988 { /* test locale & optimization group operations: Unicode to LMBCS */
4989
4990 UErrorCode errorCode=U_ZERO_ERROR;
4991 UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
4992 UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
4993 UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
4994 UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */
4995 const UChar * pUniOut = uniString;
4996 UChar * pUniIn = uniString;
4997 uint8_t lmbcsString [4];
4998 const char * pLMBCSOut = (const char *)lmbcsString;
4999 char * pLMBCSIn = (char *)lmbcsString;
5000
5001 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
5002 ucnv_fromUnicode (cnv16he,
5003 &pLMBCSIn, (pLMBCSIn + UPRV_LENGTHOF(lmbcsString)),
5004 &pUniOut, pUniOut + UPRV_LENGTHOF(uniString),
5005 NULL, 1, &errorCode);
5006
5007 if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
5008 {
5009 log_err("LMBCS-16,locale=he gives unexpected translation\n");
5010 }
5011
5012 pLMBCSIn= (char *)lmbcsString;
5013 pUniOut = uniString;
5014 ucnv_fromUnicode (cnv01us,
5015 &pLMBCSIn, (const char *)(lmbcsString + UPRV_LENGTHOF(lmbcsString)),
5016 &pUniOut, pUniOut + UPRV_LENGTHOF(uniString),
5017 NULL, 1, &errorCode);
5018
5019 if (lmbcsString[0] != 0x9F)
5020 {
5021 log_err("LMBCS-1,locale=US gives unexpected translation\n");
5022 }
5023
5024 /* single byte char from mbcs char set */
5025 lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */
5026 pLMBCSOut = (const char *)lmbcsString;
5027 pUniIn = uniString;
5028 ucnv_toUnicode (cnv16jp,
5029 &pUniIn, pUniIn + 1,
5030 &pLMBCSOut, (pLMBCSOut + 1),
5031 NULL, 1, &errorCode);
5032 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5033 {
5034 log_err("Unexpected results from LMBCS-16 single byte char\n");
5035 }
5036 /* convert to group 1: should be 3 bytes */
5037 pLMBCSIn = (char *)lmbcsString;
5038 pUniOut = uniString;
5039 ucnv_fromUnicode (cnv01us,
5040 &pLMBCSIn, (const char *)(pLMBCSIn + 3),
5041 &pUniOut, pUniOut + 1,
5042 NULL, 1, &errorCode);
5043 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1
5044 || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
5045 {
5046 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
5047 }
5048 pLMBCSOut = (const char *)lmbcsString;
5049 pUniIn = uniString;
5050 ucnv_toUnicode (cnv01us,
5051 &pUniIn, pUniIn + 1,
5052 &pLMBCSOut, (const char *)(pLMBCSOut + 3),
5053 NULL, 1, &errorCode);
5054 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5055 {
5056 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
5057 }
5058 pLMBCSIn = (char *)lmbcsString;
5059 pUniOut = uniString;
5060 ucnv_fromUnicode (cnv16jp,
5061 &pLMBCSIn, (const char *)(pLMBCSIn + 1),
5062 &pUniOut, pUniOut + 1,
5063 NULL, 1, &errorCode);
5064 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
5065 {
5066 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
5067 }
5068 ucnv_close(cnv16he);
5069 ucnv_close(cnv16jp);
5070 ucnv_close(cnv01us);
5071 }
5072 {
5073 /* Small source buffer testing, LMBCS -> Unicode */
5074
5075 UErrorCode errorCode=U_ZERO_ERROR;
5076
5077 const char * pSource = (const char *)pszLMBCS;
5078 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
5079 int codepointCount = 0;
5080
5081 UChar Out [sizeof(pszUnicode) + 1];
5082 UChar * pOut = Out;
5083 UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode);
5084
5085
5086 cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
5087 if(U_FAILURE(errorCode)) {
5088 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
5089 return;
5090 }
5091
5092
5093 while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
5094 {
5095 ucnv_toUnicode (cnv,
5096 &pOut,
5097 OutLimit,
5098 &pSource,
5099 (pSource+1), /* claim that this is a 1- byte buffer */
5100 NULL,
5101 FALSE, /* FALSE means there might be more chars in the next buffer */
5102 &errorCode);
5103
5104 if (U_SUCCESS (errorCode))
5105 {
5106 if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1])
5107 {
5108 /* we are on to the next code point: check value */
5109
5110 if (Out[0] != pszUnicode[codepointCount]){
5111 log_err("LMBCS->Uni result %lx should have been %lx \n",
5112 Out[0], pszUnicode[codepointCount]);
5113 }
5114
5115 pOut = Out; /* reset for accumulating next code point */
5116 codepointCount++;
5117 }
5118 }
5119 else
5120 {
5121 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
5122 }
5123 }
5124 {
5125 /* limits & surrogate error testing */
5126 char LIn [sizeof(pszLMBCS)];
5127 const char * pLIn = LIn;
5128
5129 char LOut [sizeof(pszLMBCS)];
5130 char * pLOut = LOut;
5131
5132 UChar UOut [sizeof(pszUnicode)];
5133 UChar * pUOut = UOut;
5134
5135 UChar UIn [sizeof(pszUnicode)];
5136 const UChar * pUIn = UIn;
5137
5138 int32_t off [sizeof(offsets)];
5139 UChar32 uniChar;
5140
5141 errorCode=U_ZERO_ERROR;
5142
5143 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
5144 pUIn++;
5145 ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode);
5146 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5147 {
5148 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
5149 }
5150 pUIn--;
5151
5152 errorCode=U_ZERO_ERROR;
5153 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode);
5154 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5155 {
5156 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
5157 }
5158 errorCode=U_ZERO_ERROR;
5159
5160 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
5161 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5162 {
5163 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
5164 }
5165 errorCode=U_ZERO_ERROR;
5166
5167 /* 0 byte source request - no error, no pointer movement */
5168 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode);
5169 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode);
5170 if(U_FAILURE(errorCode)) {
5171 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
5172 }
5173 if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
5174 {
5175 log_err("Unexpected pointer move in 0 byte source request \n");
5176 }
5177 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5178 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
5179 if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
5180 {
5181 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
5182 }
5183 if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5184 {
5185 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5186 }
5187 errorCode = U_ZERO_ERROR;
5188
5189 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5190
5191 pUIn = pszUnicode;
5192 ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+UPRV_LENGTHOF(pszUnicode),off,FALSE, &errorCode);
5193 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
5194 {
5195 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5196 }
5197
5198 errorCode = U_ZERO_ERROR;
5199
5200 pLIn = (const char *)pszLMBCS;
5201 ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode);
5202 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4])
5203 {
5204 log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5205 }
5206
5207 /* unpaired or chopped LMBCS surrogates */
5208
5209 /* OK high surrogate, Low surrogate is chopped */
5210 LIn [0] = (char)0x14;
5211 LIn [1] = (char)0xD8;
5212 LIn [2] = (char)0x01;
5213 LIn [3] = (char)0x14;
5214 LIn [4] = (char)0xDC;
5215 pLIn = LIn;
5216 errorCode = U_ZERO_ERROR;
5217 pUOut = UOut;
5218
5219 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
5220 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5221 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5222 {
5223 log_err("Unexpected results on chopped low surrogate\n");
5224 }
5225
5226 /* chopped at surrogate boundary */
5227 LIn [0] = (char)0x14;
5228 LIn [1] = (char)0xD8;
5229 LIn [2] = (char)0x01;
5230 pLIn = LIn;
5231 errorCode = U_ZERO_ERROR;
5232 pUOut = UOut;
5233
5234 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode);
5235 if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
5236 {
5237 log_err("Unexpected results on chopped at surrogate boundary \n");
5238 }
5239
5240 /* unpaired surrogate plus valid Unichar */
5241 LIn [0] = (char)0x14;
5242 LIn [1] = (char)0xD8;
5243 LIn [2] = (char)0x01;
5244 LIn [3] = (char)0x14;
5245 LIn [4] = (char)0xC9;
5246 LIn [5] = (char)0xD0;
5247 pLIn = LIn;
5248 errorCode = U_ZERO_ERROR;
5249 pUOut = UOut;
5250
5251 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode);
5252 if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
5253 {
5254 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5255 }
5256
5257 /* unpaired surrogate plus chopped Unichar */
5258 LIn [0] = (char)0x14;
5259 LIn [1] = (char)0xD8;
5260 LIn [2] = (char)0x01;
5261 LIn [3] = (char)0x14;
5262 LIn [4] = (char)0xC9;
5263
5264 pLIn = LIn;
5265 errorCode = U_ZERO_ERROR;
5266 pUOut = UOut;
5267
5268 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5269 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5270 {
5271 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5272 }
5273
5274 /* unpaired surrogate plus valid non-Unichar */
5275 LIn [0] = (char)0x14;
5276 LIn [1] = (char)0xD8;
5277 LIn [2] = (char)0x01;
5278 LIn [3] = (char)0x0F;
5279 LIn [4] = (char)0x3B;
5280
5281 pLIn = LIn;
5282 errorCode = U_ZERO_ERROR;
5283 pUOut = UOut;
5284
5285 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5286 if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
5287 {
5288 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5289 }
5290
5291 /* unpaired surrogate plus chopped non-Unichar */
5292 LIn [0] = (char)0x14;
5293 LIn [1] = (char)0xD8;
5294 LIn [2] = (char)0x01;
5295 LIn [3] = (char)0x0F;
5296
5297 pLIn = LIn;
5298 errorCode = U_ZERO_ERROR;
5299 pUOut = UOut;
5300
5301 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode);
5302
5303 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
5304 {
5305 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5306 }
5307 }
5308 }
5309 ucnv_close(cnv); /* final cleanup */
5310 }
5311
5312
5313 static void TestJitterbug255()
5314 {
5315 static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5316 const char *testBuffer = (const char *)testBytes;
5317 const char *testEnd = (const char *)testBytes + sizeof(testBytes);
5318 UErrorCode status = U_ZERO_ERROR;
5319 /*UChar32 result;*/
5320 UConverter *cnv = 0;
5321
5322 cnv = ucnv_open("shift-jis", &status);
5323 if (U_FAILURE(status) || cnv == 0) {
5324 log_data_err("Failed to open the converter for SJIS.\n");
5325 return;
5326 }
5327 while (testBuffer != testEnd)
5328 {
5329 /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status);
5330 if (U_FAILURE(status))
5331 {
5332 log_err("Failed to convert the next UChar for SJIS.\n");
5333 break;
5334 }
5335 }
5336 ucnv_close(cnv);
5337 }
5338
5339 static void TestEBCDICUS4XML()
5340 {
5341 UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
5342 static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
5343 static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
5344 static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
5345 char target_x[] = {0x00, 0x00, 0x00, 0x00};
5346 UChar *unicodes = unicodes_x;
5347 const UChar *toUnicodeMaps = toUnicodeMaps_x;
5348 char *target = target_x;
5349 const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
5350 UErrorCode status = U_ZERO_ERROR;
5351 UConverter *cnv = 0;
5352
5353 cnv = ucnv_open("ebcdic-xml-us", &status);
5354 if (U_FAILURE(status) || cnv == 0) {
5355 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5356 return;
5357 }
5358 ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status);
5359 if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
5360 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5361 u_errorName(status));
5362 printUSeqErr(unicodes_x, 3);
5363 printUSeqErr(toUnicodeMaps, 3);
5364 }
5365 status = U_ZERO_ERROR;
5366 ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status);
5367 if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
5368 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5369 u_errorName(status));
5370 printSeqErr((const unsigned char*)target_x, 3);
5371 printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
5372 }
5373 ucnv_close(cnv);
5374 }
5375 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
5376
5377 #if !UCONFIG_NO_COLLATION
5378
5379 static void TestJitterbug981(){
5380 const UChar* rules;
5381 int32_t rules_length, target_cap, bytes_needed, buff_size;
5382 UErrorCode status = U_ZERO_ERROR;
5383 UConverter *utf8cnv;
5384 UCollator* myCollator;
5385 char *buff;
5386 int numNeeded=0;
5387 utf8cnv = ucnv_open ("utf8", &status);
5388 if(U_FAILURE(status)){
5389 log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status));
5390 return;
5391 }
5392 myCollator = ucol_open("zh", &status);
5393 if(U_FAILURE(status)){
5394 log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status));
5395 ucnv_close(utf8cnv);
5396 return;
5397 }
5398
5399 rules = ucol_getRules(myCollator, &rules_length);
5400 if(rules_length == 0) {
5401 log_data_err("missing zh tailoring rule string\n");
5402 ucol_close(myCollator);
5403 ucnv_close(utf8cnv);
5404 return;
5405 }
5406 buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv);
5407 buff = malloc(buff_size);
5408
5409 target_cap = 0;
5410 do {
5411 ucnv_reset(utf8cnv);
5412 status = U_ZERO_ERROR;
5413 if(target_cap >= buff_size) {
5414 log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
5415 break;
5416 }
5417 bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
5418 rules, rules_length, &status);
5419 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5420 if(numNeeded!=0 && numNeeded!= bytes_needed){
5421 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5422 break;
5423 }
5424 numNeeded = bytes_needed;
5425 } while (status == U_BUFFER_OVERFLOW_ERROR);
5426 ucol_close(myCollator);
5427 ucnv_close(utf8cnv);
5428 free(buff);
5429 }
5430
5431 #endif
5432
5433 #if !UCONFIG_NO_LEGACY_CONVERSION
5434 static void TestJitterbug1293(){
5435 static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5436 char target[256];
5437 UErrorCode status = U_ZERO_ERROR;
5438 UConverter* conv=NULL;
5439 int32_t target_cap, bytes_needed, numNeeded = 0;
5440 conv = ucnv_open("shift-jis",&status);
5441 if(U_FAILURE(status)){
5442 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status));
5443 return;
5444 }
5445
5446 do{
5447 target_cap =0;
5448 bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status);
5449 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5450 if(numNeeded!=0 && numNeeded!= bytes_needed){
5451 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5452 }
5453 numNeeded = bytes_needed;
5454 } while (status == U_BUFFER_OVERFLOW_ERROR);
5455 if(U_FAILURE(status)){
5456 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status));
5457 return;
5458 }
5459 ucnv_close(conv);
5460 }
5461 #endif
5462
5463 static void TestJB5275_1(){
5464
5465 static const char* data = "\x3B\xB3\x0A" /* Easy characters */
5466 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5467 /* Switch script: */
5468 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */
5469 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/
5470 "\xEF\x40\x3B\xB3\x0A";
5471 static const UChar expected[] ={
5472 0x003b, 0x0a15, 0x000a, /* Easy characters */
5473 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */
5474 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/
5475 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/
5476 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/
5477 };
5478
5479 UErrorCode status = U_ZERO_ERROR;
5480 UConverter* conv = ucnv_open("iscii-gur", &status);
5481 UChar dest[100] = {'\0'};
5482 UChar* target = dest;
5483 UChar* targetLimit = dest+100;
5484 const char* source = data;
5485 const char* sourceLimit = data+strlen(data);
5486 const UChar* exp = expected;
5487
5488 if (U_FAILURE(status)) {
5489 log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status));
5490 return;
5491 }
5492
5493 log_verbose("Testing switching back to default script when new line is encountered.\n");
5494 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5495 if(U_FAILURE(status)){
5496 log_err("conversion failed: %s \n", u_errorName(status));
5497 }
5498 targetLimit = target;
5499 target = dest;
5500 printUSeq(target, targetLimit-target);
5501 while(target<targetLimit){
5502 if(*exp!=*target){
5503 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5504 }
5505 target++;
5506 exp++;
5507 }
5508 ucnv_close(conv);
5509 }
5510
5511 static void TestJB5275(){
5512 static const char* data =
5513 /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41 */
5514 /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41 */
5515 /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsupported sequence \xEF\x41 */
5516 "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5517 "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */
5518 "\xEF\x48\x38\xB3\x0A" /* Kannada test */
5519 "\xEF\x49\x39\xB3\x0A" /* Malayalam test */
5520 "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */
5521 "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */
5522 /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */;
5523 static const UChar expected[] ={
5524 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */
5525 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati test */
5526 0x0038, 0x0C95, 0x000A, /* Kannada test */
5527 0x0039, 0x0D15, 0x000A, /* Malayalam test */
5528 0x003A, 0x0A95, 0x000A, /* Gujarati test */
5529 0x003B, 0x0A15, 0x000A, /* Punjabi test */
5530 };
5531
5532 UErrorCode status = U_ZERO_ERROR;
5533 UConverter* conv = ucnv_open("iscii", &status);
5534 UChar dest[100] = {'\0'};
5535 UChar* target = dest;
5536 UChar* targetLimit = dest+100;
5537 const char* source = data;
5538 const char* sourceLimit = data+strlen(data);
5539 const UChar* exp = expected;
5540 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5541 if(U_FAILURE(status)){
5542 log_data_err("conversion failed: %s \n", u_errorName(status));
5543 }
5544 targetLimit = target;
5545 target = dest;
5546
5547 printUSeq(target, targetLimit-target);
5548
5549 while(target<targetLimit){
5550 if(*exp!=*target){
5551 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5552 }
5553 target++;
5554 exp++;
5555 }
5556 ucnv_close(conv);
5557 }
5558
5559 static void
5560 TestIsFixedWidth() {
5561 UErrorCode status = U_ZERO_ERROR;
5562 UConverter *cnv = NULL;
5563 int32_t i;
5564
5565 const char *fixedWidth[] = {
5566 "US-ASCII",
5567 "UTF32",
5568 "ibm-5478_P100-1995"
5569 };
5570
5571 const char *notFixedWidth[] = {
5572 "GB18030",
5573 "UTF8",
5574 "windows-949-2000",
5575 "UTF16"
5576 };
5577
5578 for (i = 0; i < UPRV_LENGTHOF(fixedWidth); i++) {
5579 cnv = ucnv_open(fixedWidth[i], &status);
5580 if (cnv == NULL || U_FAILURE(status)) {
5581 log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_errorName(status));
5582 continue;
5583 }
5584
5585 if (!ucnv_isFixedWidth(cnv, &status)) {
5586 log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedWidth[i]);
5587 }
5588 ucnv_close(cnv);
5589 }
5590
5591 for (i = 0; i < UPRV_LENGTHOF(notFixedWidth); i++) {
5592 cnv = ucnv_open(notFixedWidth[i], &status);
5593 if (cnv == NULL || U_FAILURE(status)) {
5594 log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u_errorName(status));
5595 continue;
5596 }
5597
5598 if (ucnv_isFixedWidth(cnv, &status)) {
5599 log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", notFixedWidth[i]);
5600 }
5601 ucnv_close(cnv);
5602 }
5603 }