]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/nucnvtst.c
ICU-57163.0.1.tar.gz
[apple/icu.git] / icuSources / test / cintltst / nucnvtst.c
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2016, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /*******************************************************************************
7 *
8 * File nucnvtst.c
9 *
10 * Modification History:
11 * Name Description
12 * Steven R. Loomis 7/8/1999 Adding input buffer test
13 ********************************************************************************
14 */
15 #include <stdio.h>
16 #include "cstring.h"
17 #include "unicode/uloc.h"
18 #include "unicode/ucnv.h"
19 #include "unicode/ucnv_err.h"
20 #include "unicode/ucnv_cb.h"
21 #include "cintltst.h"
22 #include "unicode/utypes.h"
23 #include "unicode/ustring.h"
24 #include "unicode/ucol.h"
25 #include "unicode/utf16.h"
26 #include "cmemory.h"
27 #include "nucnvtst.h"
28
29 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
30 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
31 #if !UCONFIG_NO_COLLATION
32 static void TestJitterbug981(void);
33 #endif
34 #if !UCONFIG_NO_LEGACY_CONVERSION
35 static void TestJitterbug1293(void);
36 #endif
37 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
38 static void TestConverterTypesAndStarters(void);
39 static void TestAmbiguous(void);
40 static void TestSignatureDetection(void);
41 static void TestUTF7(void);
42 static void TestIMAP(void);
43 static void TestUTF8(void);
44 static void TestCESU8(void);
45 static void TestUTF16(void);
46 static void TestUTF16BE(void);
47 static void TestUTF16LE(void);
48 static void TestUTF32(void);
49 static void TestUTF32BE(void);
50 static void TestUTF32LE(void);
51 static void TestLATIN1(void);
52
53 #if !UCONFIG_NO_LEGACY_CONVERSION
54 static void TestSBCS(void);
55 static void TestDBCS(void);
56 static void TestMBCS(void);
57 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
58 static void TestICCRunout(void);
59 #endif
60
61 #ifdef U_ENABLE_GENERIC_ISO_2022
62 static void TestISO_2022(void);
63 #endif
64
65 static void TestISO_2022_JP(void);
66 static void TestISO_2022_JP_1(void);
67 static void TestISO_2022_JP_2(void);
68 static void TestISO_2022_KR(void);
69 static void TestISO_2022_KR_1(void);
70 static void TestISO_2022_CN(void);
71 #if 0
72 /*
73 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
74 */
75 static void TestISO_2022_CN_EXT(void);
76 #endif
77 static void TestJIS(void);
78 static void TestHZ(void);
79 #endif
80
81 static void TestSCSU(void);
82
83 #if !UCONFIG_NO_LEGACY_CONVERSION
84 static void TestEBCDIC_STATEFUL(void);
85 static void TestGB18030(void);
86 static void TestLMBCS(void);
87 static void TestJitterbug255(void);
88 static void TestEBCDICUS4XML(void);
89 #if 0
90 /*
91 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
92 */
93 static void TestJitterbug915(void);
94 #endif
95 static void TestISCII(void);
96
97 static void TestCoverageMBCS(void);
98 static void TestJitterbug2346(void);
99 static void TestJitterbug2411(void);
100 static void TestJB5275(void);
101 static void TestJB5275_1(void);
102 static void TestJitterbug6175(void);
103
104 static void TestIsFixedWidth(void);
105 #endif
106
107 static void TestInBufSizes(void);
108
109 static void TestRoundTrippingAllUTF(void);
110 static void TestConv(const uint16_t in[],
111 int len,
112 const char* conv,
113 const char* lang,
114 char byteArr[],
115 int byteArrLen);
116
117 /* open a converter, using test data if it begins with '@' */
118 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
119
120
121 #define NEW_MAX_BUFFER 999
122
123 static int32_t gInBufferSize = NEW_MAX_BUFFER;
124 static int32_t gOutBufferSize = NEW_MAX_BUFFER;
125 static char gNuConvTestName[1024];
126
127 #define nct_min(x,y) ((x<y) ? x : y)
128
129 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
130 {
131 if(cnv && cnv[0] == '@') {
132 return ucnv_openPackage(loadTestData(err), cnv+1, err);
133 } else {
134 return ucnv_open(cnv, err);
135 }
136 }
137
138 static void printSeq(const unsigned char* a, int len)
139 {
140 int i=0;
141 log_verbose("{");
142 while (i<len)
143 log_verbose("0x%02x ", a[i++]);
144 log_verbose("}\n");
145 }
146
147 static void printUSeq(const UChar* a, int len)
148 {
149 int i=0;
150 log_verbose("{U+");
151 while (i<len) log_verbose("0x%04x ", a[i++]);
152 log_verbose("}\n");
153 }
154
155 static void printSeqErr(const unsigned char* a, int len)
156 {
157 int i=0;
158 fprintf(stderr, "{");
159 while (i<len)
160 fprintf(stderr, "0x%02x ", a[i++]);
161 fprintf(stderr, "}\n");
162 }
163
164 static void printUSeqErr(const UChar* a, int len)
165 {
166 int i=0;
167 fprintf(stderr, "{U+");
168 while (i<len)
169 fprintf(stderr, "0x%04x ", a[i++]);
170 fprintf(stderr,"}\n");
171 }
172
173 static void
174 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
175 {
176 const char* s0;
177 const char* s=(char*)source;
178 const int32_t *r=results;
179 UErrorCode errorCode=U_ZERO_ERROR;
180 UChar32 c;
181
182 while(s<limit) {
183 s0=s;
184 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
185 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
186 break; /* no more significant input */
187 } else if(U_FAILURE(errorCode)) {
188 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
189 break;
190 } else if(
191 /* test the expected number of input bytes only if >=0 */
192 (*r>=0 && (int32_t)(s-s0)!=*r) ||
193 c!=*(r+1)
194 ) {
195 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
196 message, c, (s-s0), *(r+1), *r);
197 break;
198 }
199 r+=2;
200 }
201 }
202
203 static void
204 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
205 {
206 const char* s=(char*)source;
207 UErrorCode errorCode=U_ZERO_ERROR;
208 uint32_t c;
209 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
210 if(errorCode != expected){
211 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
212 }
213 if(c != 0xFFFD && c != 0xffff){
214 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
215 }
216
217 }
218
219 static void TestInBufSizes(void)
220 {
221 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
222 #if 1
223 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
224 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
225 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
226 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
227 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
228 TestNewConvertWithBufferSizes(1,1);
229 TestNewConvertWithBufferSizes(2,3);
230 TestNewConvertWithBufferSizes(3,2);
231 #endif
232 }
233
234 static void TestOutBufSizes(void)
235 {
236 #if 1
237 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
238 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
239 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
240 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
241 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
242 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
243
244 #endif
245 }
246
247
248 void addTestNewConvert(TestNode** root)
249 {
250 #if !UCONFIG_NO_FILE_IO
251 addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
252 addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
253 #endif
254 addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
255 addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
256 addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
257 addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
258 addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
259 addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
260
261 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
262 addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
263 addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
264 addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
265 addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
266 addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
267 addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
268 addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
269
270 #if !UCONFIG_NO_LEGACY_CONVERSION
271 addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
272 #endif
273
274 addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
275
276 #if !UCONFIG_NO_LEGACY_CONVERSION
277 addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
278 #if !UCONFIG_NO_FILE_IO
279 addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
280 addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout");
281 #endif
282 addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
283
284 #ifdef U_ENABLE_GENERIC_ISO_2022
285 addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
286 #endif
287
288 addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
289 addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
290 addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
291 addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
292 addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
293 addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
294 addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
295 /*
296 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
297 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
298 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
299 */
300 addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
301 #endif
302
303 addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
304
305 #if !UCONFIG_NO_LEGACY_CONVERSION
306 addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
307 addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
308 addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
309 addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
310 addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
311 addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275");
312 addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1");
313 #if !UCONFIG_NO_COLLATION
314 addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
315 #endif
316
317 addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
318 #endif
319
320
321 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
322 addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
323 #endif
324
325 addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
326
327 #if !UCONFIG_NO_LEGACY_CONVERSION
328 addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
329 addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
330 addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175");
331
332 addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth");
333 #endif
334 }
335
336
337 /* Note that this test already makes use of statics, so it's not really
338 multithread safe.
339 This convenience function lets us make the error messages actually useful.
340 */
341
342 static void setNuConvTestName(const char *codepage, const char *direction)
343 {
344 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
345 codepage,
346 direction,
347 (int)gInBufferSize,
348 (int)gOutBufferSize);
349 }
350
351 typedef enum
352 {
353 TC_OK = 0, /* test was OK */
354 TC_MISMATCH = 1, /* Match failed - err was printed */
355 TC_FAIL = 2 /* Test failed, don't print an err because it was already printed. */
356 } ETestConvertResult;
357
358 /* Note: This function uses global variables and it will not do offset
359 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
360 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
361 const char *codepage, const int32_t *expectOffsets , UBool useFallback)
362 {
363 UErrorCode status = U_ZERO_ERROR;
364 UConverter *conv = 0;
365 char junkout[NEW_MAX_BUFFER]; /* FIX */
366 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
367 char *p;
368 const UChar *src;
369 char *end;
370 char *targ;
371 int32_t *offs;
372 int i;
373 int32_t realBufferSize;
374 char *realBufferEnd;
375 const UChar *realSourceEnd;
376 const UChar *sourceLimit;
377 UBool checkOffsets = TRUE;
378 UBool doFlush;
379
380 for(i=0;i<NEW_MAX_BUFFER;i++)
381 junkout[i] = (char)0xF0;
382 for(i=0;i<NEW_MAX_BUFFER;i++)
383 junokout[i] = 0xFF;
384
385 setNuConvTestName(codepage, "FROM");
386
387 log_verbose("\n========= %s\n", gNuConvTestName);
388
389 conv = my_ucnv_open(codepage, &status);
390
391 if(U_FAILURE(status))
392 {
393 log_data_err("Couldn't open converter %s\n",codepage);
394 return TC_FAIL;
395 }
396 if(useFallback){
397 ucnv_setFallback(conv,useFallback);
398 }
399
400 log_verbose("Converter opened..\n");
401
402 src = source;
403 targ = junkout;
404 offs = junokout;
405
406 realBufferSize = UPRV_LENGTHOF(junkout);
407 realBufferEnd = junkout + realBufferSize;
408 realSourceEnd = source + sourceLen;
409
410 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
411 checkOffsets = FALSE;
412
413 do
414 {
415 end = nct_min(targ + gOutBufferSize, realBufferEnd);
416 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
417
418 doFlush = (UBool)(sourceLimit == realSourceEnd);
419
420 if(targ == realBufferEnd) {
421 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
422 return TC_FAIL;
423 }
424 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
425
426
427 status = U_ZERO_ERROR;
428
429 ucnv_fromUnicode (conv,
430 &targ,
431 end,
432 &src,
433 sourceLimit,
434 checkOffsets ? offs : NULL,
435 doFlush, /* flush if we're at the end of the input data */
436 &status);
437 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
438
439 if(U_FAILURE(status)) {
440 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
441 return TC_FAIL;
442 }
443
444 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
445 sourceLen, targ-junkout);
446
447 if(getTestOption(VERBOSITY_OPTION))
448 {
449 char junk[9999];
450 char offset_str[9999];
451 char *ptr;
452
453 junk[0] = 0;
454 offset_str[0] = 0;
455 for(ptr = junkout;ptr<targ;ptr++) {
456 sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
457 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
458 }
459
460 log_verbose(junk);
461 printSeq((const uint8_t *)expect, expectLen);
462 if ( checkOffsets ) {
463 log_verbose("\nOffsets:");
464 log_verbose(offset_str);
465 }
466 log_verbose("\n");
467 }
468 ucnv_close(conv);
469
470 if(expectLen != targ-junkout) {
471 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
472 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
473 fprintf(stderr, "Got:\n");
474 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
475 fprintf(stderr, "Expected:\n");
476 printSeqErr((const unsigned char*)expect, expectLen);
477 return TC_MISMATCH;
478 }
479
480 if (checkOffsets && (expectOffsets != 0) ) {
481 log_verbose("comparing %d offsets..\n", targ-junkout);
482 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
483 log_err("did not get the expected offsets. %s\n", gNuConvTestName);
484 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
485 log_err("\n");
486 log_err("Got : ");
487 for(p=junkout;p<targ;p++) {
488 log_err("%d,", junokout[p-junkout]);
489 }
490 log_err("\n");
491 log_err("Expected: ");
492 for(i=0; i<(targ-junkout); i++) {
493 log_err("%d,", expectOffsets[i]);
494 }
495 log_err("\n");
496 }
497 }
498
499 log_verbose("comparing..\n");
500 if(!memcmp(junkout, expect, expectLen)) {
501 log_verbose("Matches!\n");
502 return TC_OK;
503 } else {
504 log_err("String does not match u->%s\n", gNuConvTestName);
505 printUSeqErr(source, sourceLen);
506 fprintf(stderr, "Got:\n");
507 printSeqErr((const unsigned char *)junkout, expectLen);
508 fprintf(stderr, "Expected:\n");
509 printSeqErr((const unsigned char *)expect, expectLen);
510
511 return TC_MISMATCH;
512 }
513 }
514
515 /* Note: This function uses global variables and it will not do offset
516 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
517 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
518 const char *codepage, const int32_t *expectOffsets, UBool useFallback)
519 {
520 UErrorCode status = U_ZERO_ERROR;
521 UConverter *conv = 0;
522 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
523 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
524 const char *src;
525 const char *realSourceEnd;
526 const char *srcLimit;
527 UChar *p;
528 UChar *targ;
529 UChar *end;
530 int32_t *offs;
531 int i;
532 UBool checkOffsets = TRUE;
533
534 int32_t realBufferSize;
535 UChar *realBufferEnd;
536
537
538 for(i=0;i<NEW_MAX_BUFFER;i++)
539 junkout[i] = 0xFFFE;
540
541 for(i=0;i<NEW_MAX_BUFFER;i++)
542 junokout[i] = -1;
543
544 setNuConvTestName(codepage, "TO");
545
546 log_verbose("\n========= %s\n", gNuConvTestName);
547
548 conv = my_ucnv_open(codepage, &status);
549
550 if(U_FAILURE(status))
551 {
552 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
553 return TC_FAIL;
554 }
555 if(useFallback){
556 ucnv_setFallback(conv,useFallback);
557 }
558 log_verbose("Converter opened..\n");
559
560 src = (const char *)source;
561 targ = junkout;
562 offs = junokout;
563
564 realBufferSize = UPRV_LENGTHOF(junkout);
565 realBufferEnd = junkout + realBufferSize;
566 realSourceEnd = src + sourcelen;
567
568 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
569 checkOffsets = FALSE;
570
571 do
572 {
573 end = nct_min( targ + gOutBufferSize, realBufferEnd);
574 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
575
576 if(targ == realBufferEnd)
577 {
578 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
579 return TC_FAIL;
580 }
581 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
582
583 /* oldTarg = targ; */
584
585 status = U_ZERO_ERROR;
586
587 ucnv_toUnicode (conv,
588 &targ,
589 end,
590 &src,
591 srcLimit,
592 checkOffsets ? offs : NULL,
593 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */
594 &status);
595
596 /* offs += (targ-oldTarg); */
597
598 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
599
600 if(U_FAILURE(status))
601 {
602 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
603 return TC_FAIL;
604 }
605
606 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
607 sourcelen, targ-junkout);
608 if(getTestOption(VERBOSITY_OPTION))
609 {
610 char junk[9999];
611 char offset_str[9999];
612 UChar *ptr;
613
614 junk[0] = 0;
615 offset_str[0] = 0;
616
617 for(ptr = junkout;ptr<targ;ptr++)
618 {
619 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
620 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
621 }
622
623 log_verbose(junk);
624 printUSeq(expect, expectlen);
625 if ( checkOffsets )
626 {
627 log_verbose("\nOffsets:");
628 log_verbose(offset_str);
629 }
630 log_verbose("\n");
631 }
632 ucnv_close(conv);
633
634 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
635
636 if (checkOffsets && (expectOffsets != 0))
637 {
638 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
639 log_err("did not get the expected offsets. %s\n",gNuConvTestName);
640 log_err("Got: ");
641 for(p=junkout;p<targ;p++) {
642 log_err("%d,", junokout[p-junkout]);
643 }
644 log_err("\n");
645 log_err("Expected: ");
646 for(i=0; i<(targ-junkout); i++) {
647 log_err("%d,", expectOffsets[i]);
648 }
649 log_err("\n");
650 log_err("output: ");
651 for(i=0; i<(targ-junkout); i++) {
652 log_err("%X,", junkout[i]);
653 }
654 log_err("\n");
655 log_err("input: ");
656 for(i=0; i<(src-(const char *)source); i++) {
657 log_err("%X,", (unsigned char)source[i]);
658 }
659 log_err("\n");
660 }
661 }
662
663 if(!memcmp(junkout, expect, expectlen*2))
664 {
665 log_verbose("Matches!\n");
666 return TC_OK;
667 }
668 else
669 {
670 log_err("String does not match. %s\n", gNuConvTestName);
671 log_verbose("String does not match. %s\n", gNuConvTestName);
672 printf("\nGot:");
673 printUSeqErr(junkout, expectlen);
674 printf("\nExpected:");
675 printUSeqErr(expect, expectlen);
676 return TC_MISMATCH;
677 }
678 }
679
680
681 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
682 {
683 /** test chars #1 */
684 /* 1 2 3 1Han 2Han 3Han . */
685 static const UChar sampleText[] =
686 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 };
687 static const UChar sampleTextRoundTripUnmappable[] =
688 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd };
689
690
691 static const uint8_t expectedUTF8[] =
692 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 };
693 static const int32_t toUTF8Offs[] =
694 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 };
695 static const int32_t fmUTF8Offs[] =
696 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e };
697
698 #ifdef U_ENABLE_GENERIC_ISO_2022
699 /* Same as UTF8, but with ^[%B preceeding */
700 static const const uint8_t expectedISO2022[] =
701 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
702 static const int32_t toISO2022Offs[] =
703 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
704 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
705 static const int32_t fmISO2022Offs[] =
706 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
707 #endif
708
709 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
710 static const uint8_t expectedIBM930[] =
711 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f };
712 static const int32_t toIBM930Offs[] =
713 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 };
714 static const int32_t fmIBM930Offs[] =
715 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e };
716
717 /* 1 2 3 0 h1 h2 h3 . MBCS*/
718 static const uint8_t expectedIBM943[] =
719 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc };
720 static const int32_t toIBM943Offs [] =
721 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 };
722 static const int32_t fmIBM943Offs[] =
723 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b };
724
725 /* 1 2 3 0 h1 h2 h3 . DBCS*/
726 static const uint8_t expectedIBM9027[] =
727 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe };
728 static const int32_t toIBM9027Offs [] =
729 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 };
730
731 /* 1 2 3 0 <?> <?> <?> . SBCS*/
732 static const uint8_t expectedIBM920[] =
733 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a };
734 static const int32_t toIBM920Offs [] =
735 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
736
737 /* 1 2 3 0 <?> <?> <?> . SBCS*/
738 static const uint8_t expectedISO88593[] =
739 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
740 static const int32_t toISO88593Offs[] =
741 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
742
743 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
744 static const uint8_t expectedLATIN1[] =
745 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
746 static const int32_t toLATIN1Offs[] =
747 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
748
749
750 /* etc */
751 static const uint8_t expectedUTF16BE[] =
752 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 };
753 static const int32_t toUTF16BEOffs[]=
754 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
755 static const int32_t fmUTF16BEOffs[] =
756 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
757
758 static const uint8_t expectedUTF16LE[] =
759 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc };
760 static const int32_t toUTF16LEOffs[]=
761 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
762 static const int32_t fmUTF16LEOffs[] =
763 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
764
765 static const uint8_t expectedUTF32BE[] =
766 { 0x00, 0x00, 0x00, 0x31,
767 0x00, 0x00, 0x00, 0x32,
768 0x00, 0x00, 0x00, 0x33,
769 0x00, 0x00, 0x00, 0x00,
770 0x00, 0x00, 0x4e, 0x00,
771 0x00, 0x00, 0x4e, 0x8c,
772 0x00, 0x00, 0x4e, 0x09,
773 0x00, 0x00, 0x00, 0x2e,
774 0x00, 0x02, 0x00, 0x21 };
775 static const int32_t toUTF32BEOffs[]=
776 { 0x00, 0x00, 0x00, 0x00,
777 0x01, 0x01, 0x01, 0x01,
778 0x02, 0x02, 0x02, 0x02,
779 0x03, 0x03, 0x03, 0x03,
780 0x04, 0x04, 0x04, 0x04,
781 0x05, 0x05, 0x05, 0x05,
782 0x06, 0x06, 0x06, 0x06,
783 0x07, 0x07, 0x07, 0x07,
784 0x08, 0x08, 0x08, 0x08,
785 0x08, 0x08, 0x08, 0x08 };
786 static const int32_t fmUTF32BEOffs[] =
787 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
788
789 static const uint8_t expectedUTF32LE[] =
790 { 0x31, 0x00, 0x00, 0x00,
791 0x32, 0x00, 0x00, 0x00,
792 0x33, 0x00, 0x00, 0x00,
793 0x00, 0x00, 0x00, 0x00,
794 0x00, 0x4e, 0x00, 0x00,
795 0x8c, 0x4e, 0x00, 0x00,
796 0x09, 0x4e, 0x00, 0x00,
797 0x2e, 0x00, 0x00, 0x00,
798 0x21, 0x00, 0x02, 0x00 };
799 static const int32_t toUTF32LEOffs[]=
800 { 0x00, 0x00, 0x00, 0x00,
801 0x01, 0x01, 0x01, 0x01,
802 0x02, 0x02, 0x02, 0x02,
803 0x03, 0x03, 0x03, 0x03,
804 0x04, 0x04, 0x04, 0x04,
805 0x05, 0x05, 0x05, 0x05,
806 0x06, 0x06, 0x06, 0x06,
807 0x07, 0x07, 0x07, 0x07,
808 0x08, 0x08, 0x08, 0x08,
809 0x08, 0x08, 0x08, 0x08 };
810 static const int32_t fmUTF32LEOffs[] =
811 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
812
813
814
815
816 /** Test chars #2 **/
817
818 /* Sahha [health], slashed h's */
819 static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
820 static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
821
822 /* LMBCS */
823 static const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
824 static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
825 static const int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
826 static const int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
827 /*********************************** START OF CODE finally *************/
828
829 gInBufferSize = insize;
830 gOutBufferSize = outsize;
831
832 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
833
834
835 /*UTF-8*/
836 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
837 expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE );
838
839 log_verbose("Test surrogate behaviour for UTF8\n");
840 {
841 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
842 static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
843 0xf0, 0x90, 0x90, 0x81,
844 0xef, 0xbf, 0xbd
845 };
846 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
847 testConvertFromU(testinput, UPRV_LENGTHOF(testinput),
848 expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE );
849
850
851 }
852
853 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
854 /*ISO-2022*/
855 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
856 expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE );
857 #endif
858
859 /*UTF16 LE*/
860 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
861 expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE );
862 /*UTF16 BE*/
863 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
864 expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE );
865 /*UTF32 LE*/
866 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
867 expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE );
868 /*UTF32 BE*/
869 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
870 expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE );
871
872 /*LATIN_1*/
873 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
874 expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE );
875
876 #if !UCONFIG_NO_LEGACY_CONVERSION
877 /*EBCDIC_STATEFUL*/
878 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
879 expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE );
880
881 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
882 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
883
884 /*MBCS*/
885
886 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
887 expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE );
888 /*DBCS*/
889 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
890 expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE );
891 /*SBCS*/
892 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
893 expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE );
894 /*SBCS*/
895 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
896 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
897 #endif
898
899
900 /****/
901
902 /*UTF-8*/
903 testConvertToU(expectedUTF8, sizeof(expectedUTF8),
904 sampleText, UPRV_LENGTHOF(sampleText), "utf8", fmUTF8Offs,FALSE);
905 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
906 /*ISO-2022*/
907 testConvertToU(expectedISO2022, sizeof(expectedISO2022),
908 sampleText, UPRV_LENGTHOF(sampleText), "ISO_2022", fmISO2022Offs,FALSE);
909 #endif
910
911 /*UTF16 LE*/
912 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
913 sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,FALSE);
914 /*UTF16 BE*/
915 testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
916 sampleText, UPRV_LENGTHOF(sampleText), "utf-16be", fmUTF16BEOffs,FALSE);
917 /*UTF32 LE*/
918 testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
919 sampleText, UPRV_LENGTHOF(sampleText), "utf-32le", fmUTF32LEOffs,FALSE);
920 /*UTF32 BE*/
921 testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
922 sampleText, UPRV_LENGTHOF(sampleText), "utf-32be", fmUTF32BEOffs,FALSE);
923
924 #if !UCONFIG_NO_LEGACY_CONVERSION
925 /*EBCDIC_STATEFUL*/
926 testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable,
927 UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-930", fmIBM930Offs,FALSE);
928 /*MBCS*/
929 testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable,
930 UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-943", fmIBM943Offs,FALSE);
931 #endif
932
933 /* Try it again to make sure it still works */
934 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
935 sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,FALSE);
936
937 #if !UCONFIG_NO_LEGACY_CONVERSION
938 testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
939 malteseUChars, UPRV_LENGTHOF(malteseUChars), "latin3", NULL,FALSE);
940
941 testConvertFromU(malteseUChars, UPRV_LENGTHOF(malteseUChars),
942 expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE );
943
944 /*LMBCS*/
945 testConvertFromU(LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars),
946 expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE );
947 testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
948 LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars), "LMBCS-1", fmLMBCSOffs,FALSE);
949 #endif
950
951 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
952 {
953 /* encode directly set D and set O */
954 static const uint8_t utf7[] = {
955 /*
956 Hi Mom -+Jjo--!
957 A+ImIDkQ.
958 +-
959 +ZeVnLIqe-
960 */
961 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
962 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
963 0x2b, 0x2d,
964 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
965 };
966 static const UChar unicode[] = {
967 /*
968 Hi Mom -<WHITE SMILING FACE>-!
969 A<NOT IDENTICAL TO><ALPHA>.
970 +
971 [Japanese word "nihongo"]
972 */
973 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
974 0x41, 0x2262, 0x0391, 0x2e,
975 0x2b,
976 0x65e5, 0x672c, 0x8a9e
977 };
978 static const int32_t toUnicodeOffsets[] = {
979 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
980 15, 17, 19, 23,
981 24,
982 27, 29, 32
983 };
984 static const int32_t fromUnicodeOffsets[] = {
985 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
986 11, 12, 12, 12, 13, 13, 13, 13, 14,
987 15, 15,
988 16, 16, 16, 17, 17, 17, 18, 18, 18, 18
989 };
990
991 /* same but escaping set O (the exclamation mark) */
992 static const uint8_t utf7Restricted[] = {
993 /*
994 Hi Mom -+Jjo--+ACE-
995 A+ImIDkQ.
996 +-
997 +ZeVnLIqe-
998 */
999 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
1000 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
1001 0x2b, 0x2d,
1002 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
1003 };
1004 static const int32_t toUnicodeOffsetsR[] = {
1005 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
1006 19, 21, 23, 27,
1007 28,
1008 31, 33, 36
1009 };
1010 static const int32_t fromUnicodeOffsetsR[] = {
1011 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
1012 11, 12, 12, 12, 13, 13, 13, 13, 14,
1013 15, 15,
1014 16, 16, 16, 17, 17, 17, 18, 18, 18, 18
1015 };
1016
1017 testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
1018
1019 testConvertToU(utf7, sizeof(utf7), unicode, UPRV_LENGTHOF(unicode), "UTF-7", toUnicodeOffsets,FALSE);
1020
1021 testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
1022
1023 testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, UPRV_LENGTHOF(unicode), "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
1024 }
1025
1026 /*
1027 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
1028 * modified according to RFC 2060,
1029 * and supplemented with the one example in RFC 2060 itself.
1030 */
1031 {
1032 static const uint8_t imap[] = {
1033 /* Hi Mom -&Jjo--!
1034 A&ImIDkQ-.
1035 &-
1036 &ZeVnLIqe-
1037 \
1038 ~peter
1039 /mail
1040 /&ZeVnLIqe-
1041 /&U,BTFw-
1042 */
1043 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1044 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1045 0x26, 0x2d,
1046 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1047 0x5c,
1048 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1049 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1050 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1051 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1052 };
1053 static const UChar unicode[] = {
1054 /* Hi Mom -<WHITE SMILING FACE>-!
1055 A<NOT IDENTICAL TO><ALPHA>.
1056 &
1057 [Japanese word "nihongo"]
1058 \
1059 ~peter
1060 /mail
1061 /<65e5, 672c, 8a9e>
1062 /<53f0, 5317>
1063 */
1064 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1065 0x41, 0x2262, 0x0391, 0x2e,
1066 0x26,
1067 0x65e5, 0x672c, 0x8a9e,
1068 0x5c,
1069 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1070 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1071 0x2f, 0x65e5, 0x672c, 0x8a9e,
1072 0x2f, 0x53f0, 0x5317
1073 };
1074 static const int32_t toUnicodeOffsets[] = {
1075 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1076 15, 17, 19, 24,
1077 25,
1078 28, 30, 33,
1079 37,
1080 38, 39, 40, 41, 42, 43,
1081 44, 45, 46, 47, 48,
1082 49, 51, 53, 56,
1083 60, 62, 64
1084 };
1085 static const int32_t fromUnicodeOffsets[] = {
1086 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1087 11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1088 15, 15,
1089 16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1090 19,
1091 20, 21, 22, 23, 24, 25,
1092 26, 27, 28, 29, 30,
1093 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1094 35, 36, 36, 36, 37, 37, 37, 37, 37
1095 };
1096
1097 testConvertFromU(unicode, UPRV_LENGTHOF(unicode), imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
1098
1099 testConvertToU(imap, sizeof(imap), unicode, UPRV_LENGTHOF(unicode), "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
1100 }
1101
1102 /* Test UTF-8 bad data handling*/
1103 {
1104 static const uint8_t utf8[]={
1105 0x61,
1106 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1107 0x00,
1108 0x62,
1109 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1110 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1111 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */
1112 0xdf, 0xbf, /* 7ff */
1113 0xbf, /* truncated tail */
1114 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */
1115 0x02
1116 };
1117
1118 static const uint16_t utf8Expected[]={
1119 0x0061,
1120 0xfffd,
1121 0x0000,
1122 0x0062,
1123 0xfffd,
1124 0xfffd,
1125 0xdbff, 0xdfff,
1126 0x07ff,
1127 0xfffd,
1128 0xfffd,
1129 0x0002
1130 };
1131
1132 static const int32_t utf8Offsets[]={
1133 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28
1134 };
1135 testConvertToU(utf8, sizeof(utf8),
1136 utf8Expected, UPRV_LENGTHOF(utf8Expected), "utf-8", utf8Offsets ,FALSE);
1137
1138 }
1139
1140 /* Test UTF-32BE bad data handling*/
1141 {
1142 static const uint8_t utf32[]={
1143 0x00, 0x00, 0x00, 0x61,
1144 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
1145 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1146 0x00, 0x00, 0x00, 0x62,
1147 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1148 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
1149 0x00, 0x00, 0x01, 0x62,
1150 0x00, 0x00, 0x02, 0x62
1151 };
1152 static const uint16_t utf32Expected[]={
1153 0x0061,
1154 0xfffd, /* 0x110000 out of range */
1155 0xDBFF, /* 0x10FFFF in range */
1156 0xDFFF,
1157 0x0062,
1158 0xfffd, /* 0xffffffff out of range */
1159 0xfffd, /* 0x7fffffff out of range */
1160 0x0162,
1161 0x0262
1162 };
1163 static const int32_t utf32Offsets[]={
1164 0, 4, 8, 8, 12, 16, 20, 24, 28
1165 };
1166 static const uint8_t utf32ExpectedBack[]={
1167 0x00, 0x00, 0x00, 0x61,
1168 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */
1169 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1170 0x00, 0x00, 0x00, 0x62,
1171 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */
1172 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */
1173 0x00, 0x00, 0x01, 0x62,
1174 0x00, 0x00, 0x02, 0x62
1175 };
1176 static const int32_t utf32OffsetsBack[]={
1177 0,0,0,0,
1178 1,1,1,1,
1179 2,2,2,2,
1180 4,4,4,4,
1181 5,5,5,5,
1182 6,6,6,6,
1183 7,7,7,7,
1184 8,8,8,8
1185 };
1186
1187 testConvertToU(utf32, sizeof(utf32),
1188 utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32be", utf32Offsets ,FALSE);
1189 testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected),
1190 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE);
1191 }
1192
1193 /* Test UTF-32LE bad data handling*/
1194 {
1195 static const uint8_t utf32[]={
1196 0x61, 0x00, 0x00, 0x00,
1197 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
1198 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1199 0x62, 0x00, 0x00, 0x00,
1200 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1201 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
1202 0x62, 0x01, 0x00, 0x00,
1203 0x62, 0x02, 0x00, 0x00,
1204 };
1205
1206 static const uint16_t utf32Expected[]={
1207 0x0061,
1208 0xfffd, /* 0x110000 out of range */
1209 0xDBFF, /* 0x10FFFF in range */
1210 0xDFFF,
1211 0x0062,
1212 0xfffd, /* 0xffffffff out of range */
1213 0xfffd, /* 0x7fffffff out of range */
1214 0x0162,
1215 0x0262
1216 };
1217 static const int32_t utf32Offsets[]={
1218 0, 4, 8, 8, 12, 16, 20, 24, 28
1219 };
1220 static const uint8_t utf32ExpectedBack[]={
1221 0x61, 0x00, 0x00, 0x00,
1222 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */
1223 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1224 0x62, 0x00, 0x00, 0x00,
1225 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */
1226 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */
1227 0x62, 0x01, 0x00, 0x00,
1228 0x62, 0x02, 0x00, 0x00
1229 };
1230 static const int32_t utf32OffsetsBack[]={
1231 0,0,0,0,
1232 1,1,1,1,
1233 2,2,2,2,
1234 4,4,4,4,
1235 5,5,5,5,
1236 6,6,6,6,
1237 7,7,7,7,
1238 8,8,8,8
1239 };
1240 testConvertToU(utf32, sizeof(utf32),
1241 utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32le", utf32Offsets,FALSE );
1242 testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected),
1243 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE);
1244 }
1245 }
1246
1247 static void TestCoverageMBCS(){
1248 #if 0
1249 UErrorCode status = U_ZERO_ERROR;
1250 const char *directory = loadTestData(&status);
1251 char* tdpath = NULL;
1252 char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1253 int len = strlen(directory);
1254 char* index=NULL;
1255
1256 tdpath = (char*) malloc(sizeof(char) * (len * 2));
1257 uprv_strcpy(saveDirectory,u_getDataDirectory());
1258 log_verbose("Retrieved data directory %s \n",saveDirectory);
1259 uprv_strcpy(tdpath,directory);
1260 index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
1261
1262 if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
1263 *(index+1)=0;
1264 }
1265 u_setDataDirectory(tdpath);
1266 log_verbose("ICU data directory is set to: %s \n" ,tdpath);
1267 #endif
1268
1269 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm
1270 which is test file for MBCS conversion with single-byte codepage data.*/
1271 {
1272
1273 /* MBCS with single byte codepage data test1.ucm*/
1274 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1275 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1276 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, };
1277
1278 /*from Unicode*/
1279 testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1280 expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
1281 }
1282
1283 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm
1284 which is test file for MBCS conversion with three-byte codepage data.*/
1285 {
1286
1287 /* MBCS with three byte codepage data test3.ucm*/
1288 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1289 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,};
1290 int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1291
1292 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1293 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1294 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1295
1296 /*from Unicode*/
1297 testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1298 expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE );
1299
1300 /*to Unicode*/
1301 testConvertToU(test3input, sizeof(test3input),
1302 expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test3", fromtest3Offs ,FALSE);
1303
1304 }
1305
1306 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm
1307 which is test file for MBCS conversion with four-byte codepage data.*/
1308 {
1309
1310 /* MBCS with three byte codepage data test4.ucm*/
1311 static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1312 static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,};
1313 static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1314
1315 static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1316 static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1317 static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1318
1319 /*from Unicode*/
1320 testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1321 expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE );
1322
1323 /*to Unicode*/
1324 testConvertToU(test4input, sizeof(test4input),
1325 expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test4", fromtest4Offs,FALSE );
1326
1327 }
1328 #if 0
1329 free(tdpath);
1330 /* restore the original data directory */
1331 log_verbose("Setting the data directory to %s \n", saveDirectory);
1332 u_setDataDirectory(saveDirectory);
1333 free(saveDirectory);
1334 #endif
1335
1336 }
1337
1338 static void TestConverterType(const char *convName, UConverterType convType) {
1339 UConverter* myConverter;
1340 UErrorCode err = U_ZERO_ERROR;
1341
1342 myConverter = my_ucnv_open(convName, &err);
1343
1344 if (U_FAILURE(err)) {
1345 log_data_err("Failed to create an %s converter\n", convName);
1346 return;
1347 }
1348 else
1349 {
1350 if (ucnv_getType(myConverter)!=convType) {
1351 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1352 convName, convType);
1353 }
1354 else {
1355 log_verbose("ucnv_getType %s ok\n", convName);
1356 }
1357 }
1358 ucnv_close(myConverter);
1359 }
1360
1361 static void TestConverterTypesAndStarters()
1362 {
1363 #if !UCONFIG_NO_LEGACY_CONVERSION
1364 UConverter* myConverter;
1365 UErrorCode err = U_ZERO_ERROR;
1366 UBool mystarters[256];
1367
1368 /* const UBool expectedKSCstarters[256] = {
1369 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1370 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1371 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1372 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1373 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1374 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1375 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1376 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1377 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1378 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1379 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1380 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1381 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1382 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1383 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1384 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1385 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1386 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1387 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1388 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1389 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1390 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1391 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1392 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1393 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1394 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1395
1396
1397 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types.");
1398
1399 myConverter = ucnv_open("ksc", &err);
1400 if (U_FAILURE(err)) {
1401 log_data_err("Failed to create an ibm-ksc converter\n");
1402 return;
1403 }
1404 else
1405 {
1406 if (ucnv_getType(myConverter)!=UCNV_MBCS)
1407 log_err("ucnv_getType Failed for ibm-949\n");
1408 else
1409 log_verbose("ucnv_getType ibm-949 ok\n");
1410
1411 if(myConverter!=NULL)
1412 ucnv_getStarters(myConverter, mystarters, &err);
1413
1414 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1415 log_err("Failed ucnv_getStarters for ksc\n");
1416 else
1417 log_verbose("ucnv_getStarters ok\n");*/
1418
1419 }
1420 ucnv_close(myConverter);
1421
1422 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
1423 TestConverterType("ibm-878", UCNV_SBCS);
1424 #endif
1425
1426 TestConverterType("iso-8859-1", UCNV_LATIN_1);
1427
1428 TestConverterType("ibm-1208", UCNV_UTF8);
1429
1430 TestConverterType("utf-8", UCNV_UTF8);
1431 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
1432 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
1433 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
1434 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
1435
1436 #if !UCONFIG_NO_LEGACY_CONVERSION
1437
1438 #if defined(U_ENABLE_GENERIC_ISO_2022)
1439 TestConverterType("iso-2022", UCNV_ISO_2022);
1440 #endif
1441
1442 TestConverterType("hz", UCNV_HZ);
1443 #endif
1444
1445 TestConverterType("scsu", UCNV_SCSU);
1446
1447 #if !UCONFIG_NO_LEGACY_CONVERSION
1448 TestConverterType("x-iscii-de", UCNV_ISCII);
1449 #endif
1450
1451 TestConverterType("ascii", UCNV_US_ASCII);
1452 TestConverterType("utf-7", UCNV_UTF7);
1453 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
1454 TestConverterType("bocu-1", UCNV_BOCU1);
1455 }
1456
1457 static void
1458 TestAmbiguousConverter(UConverter *cnv) {
1459 static const char inBytes[3]={ 0x61, 0x5B, 0x5c };
1460 UChar outUnicode[20]={ 0, 0, 0, 0 };
1461
1462 const char *s;
1463 UChar *u;
1464 UErrorCode errorCode;
1465 UBool isAmbiguous;
1466
1467 /* try to convert an 'a', a square bracket and a US-ASCII backslash */
1468 errorCode=U_ZERO_ERROR;
1469 s=inBytes;
1470 u=outUnicode;
1471 ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode);
1472 if(U_FAILURE(errorCode)) {
1473 /* we do not care about general failures in this test; the input may just not be mappable */
1474 return;
1475 }
1476
1477 if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) {
1478 /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1479 /* There are some encodings that are partially ASCII based,
1480 like the ISO-7 and GSM series of codepages, which we ignore. */
1481 return;
1482 }
1483
1484 isAmbiguous=ucnv_isAmbiguous(cnv);
1485
1486 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1487 if((outUnicode[2]!=0x5c)!=isAmbiguous) {
1488 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1489 ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous);
1490 return;
1491 }
1492
1493 if(outUnicode[2]!=0x5c) {
1494 /* needs fixup, fix it */
1495 ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
1496 if(outUnicode[2]!=0x5c) {
1497 /* the fix failed */
1498 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
1499 return;
1500 }
1501 }
1502 }
1503
1504 static void TestAmbiguous()
1505 {
1506 UErrorCode status = U_ZERO_ERROR;
1507 UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
1508 static const char target[] = {
1509 /* "\\usr\\local\\share\\data\\icutest.txt" */
1510 0x5c, 0x75, 0x73, 0x72,
1511 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1512 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1513 0x5c, 0x64, 0x61, 0x74, 0x61,
1514 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1515 0
1516 };
1517 UChar asciiResult[200], sjisResult[200];
1518 int32_t /*asciiLength = 0,*/ sjisLength = 0, i;
1519 const char *name;
1520
1521 /* enumerate all converters */
1522 status=U_ZERO_ERROR;
1523 for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
1524 cnv=ucnv_open(name, &status);
1525 if(U_SUCCESS(status)) {
1526 TestAmbiguousConverter(cnv);
1527 ucnv_close(cnv);
1528 } else {
1529 log_err("error: unable to open available converter \"%s\"\n", name);
1530 status=U_ZERO_ERROR;
1531 }
1532 }
1533
1534 #if !UCONFIG_NO_LEGACY_CONVERSION
1535 sjis_cnv = ucnv_open("ibm-943", &status);
1536 if (U_FAILURE(status))
1537 {
1538 log_data_err("Failed to create a SJIS converter\n");
1539 return;
1540 }
1541 ascii_cnv = ucnv_open("LATIN-1", &status);
1542 if (U_FAILURE(status))
1543 {
1544 log_data_err("Failed to create a LATIN-1 converter\n");
1545 ucnv_close(sjis_cnv);
1546 return;
1547 }
1548 /* convert target from SJIS to Unicode */
1549 sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, UPRV_LENGTHOF(sjisResult), target, (int32_t)strlen(target), &status);
1550 if (U_FAILURE(status))
1551 {
1552 log_err("Failed to convert the SJIS string.\n");
1553 ucnv_close(sjis_cnv);
1554 ucnv_close(ascii_cnv);
1555 return;
1556 }
1557 /* convert target from Latin-1 to Unicode */
1558 /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, UPRV_LENGTHOF(asciiResult), target, (int32_t)strlen(target), &status);
1559 if (U_FAILURE(status))
1560 {
1561 log_err("Failed to convert the Latin-1 string.\n");
1562 ucnv_close(sjis_cnv);
1563 ucnv_close(ascii_cnv);
1564 return;
1565 }
1566 if (!ucnv_isAmbiguous(sjis_cnv))
1567 {
1568 log_err("SJIS converter should contain ambiguous character mappings.\n");
1569 ucnv_close(sjis_cnv);
1570 ucnv_close(ascii_cnv);
1571 return;
1572 }
1573 if (u_strcmp(sjisResult, asciiResult) == 0)
1574 {
1575 log_err("File separators for SJIS don't need to be fixed.\n");
1576 }
1577 ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
1578 if (u_strcmp(sjisResult, asciiResult) != 0)
1579 {
1580 log_err("Fixing file separator for SJIS failed.\n");
1581 }
1582 ucnv_close(sjis_cnv);
1583 ucnv_close(ascii_cnv);
1584 #endif
1585 }
1586
1587 static void
1588 TestSignatureDetection(){
1589 /* with null terminated strings */
1590 {
1591 static const char* data[] = {
1592 "\xFE\xFF\x00\x00", /* UTF-16BE */
1593 "\xFF\xFE\x00\x00", /* UTF-16LE */
1594 "\xEF\xBB\xBF\x00", /* UTF-8 */
1595 "\x0E\xFE\xFF\x00", /* SCSU */
1596
1597 "\xFE\xFF", /* UTF-16BE */
1598 "\xFF\xFE", /* UTF-16LE */
1599 "\xEF\xBB\xBF", /* UTF-8 */
1600 "\x0E\xFE\xFF", /* SCSU */
1601
1602 "\xFE\xFF\x41\x42", /* UTF-16BE */
1603 "\xFF\xFE\x41\x41", /* UTF-16LE */
1604 "\xEF\xBB\xBF\x41", /* UTF-8 */
1605 "\x0E\xFE\xFF\x41", /* SCSU */
1606
1607 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */
1608 "\x2B\x2F\x76\x38\x41", /* UTF-7 */
1609 "\x2B\x2F\x76\x39\x41", /* UTF-7 */
1610 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */
1611 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */
1612
1613 "\xDD\x73\x66\x73" /* UTF-EBCDIC */
1614 };
1615 static const char* expected[] = {
1616 "UTF-16BE",
1617 "UTF-16LE",
1618 "UTF-8",
1619 "SCSU",
1620
1621 "UTF-16BE",
1622 "UTF-16LE",
1623 "UTF-8",
1624 "SCSU",
1625
1626 "UTF-16BE",
1627 "UTF-16LE",
1628 "UTF-8",
1629 "SCSU",
1630
1631 "UTF-7",
1632 "UTF-7",
1633 "UTF-7",
1634 "UTF-7",
1635 "UTF-7",
1636 "UTF-EBCDIC"
1637 };
1638 static const int32_t expectedLength[] ={
1639 2,
1640 2,
1641 3,
1642 3,
1643
1644 2,
1645 2,
1646 3,
1647 3,
1648
1649 2,
1650 2,
1651 3,
1652 3,
1653
1654 5,
1655 4,
1656 4,
1657 4,
1658 4,
1659 4
1660 };
1661 int i=0;
1662 UErrorCode err;
1663 int32_t signatureLength = -1;
1664 const char* source = NULL;
1665 const char* enc = NULL;
1666 for( ; i<UPRV_LENGTHOF(data); i++){
1667 err = U_ZERO_ERROR;
1668 source = data[i];
1669 enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
1670 if(U_FAILURE(err)){
1671 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1672 continue;
1673 }
1674 if(enc == NULL || strcmp(enc,expected[i]) !=0){
1675 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1676 continue;
1677 }
1678 if(signatureLength != expectedLength[i]){
1679 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1680 }
1681 }
1682 }
1683 {
1684 static const char* data[] = {
1685 "\xFE\xFF\x00", /* UTF-16BE */
1686 "\xFF\xFE\x00", /* UTF-16LE */
1687 "\xEF\xBB\xBF\x00", /* UTF-8 */
1688 "\x0E\xFE\xFF\x00", /* SCSU */
1689 "\x00\x00\xFE\xFF", /* UTF-32BE */
1690 "\xFF\xFE\x00\x00", /* UTF-32LE */
1691 "\xFE\xFF", /* UTF-16BE */
1692 "\xFF\xFE", /* UTF-16LE */
1693 "\xEF\xBB\xBF", /* UTF-8 */
1694 "\x0E\xFE\xFF", /* SCSU */
1695 "\x00\x00\xFE\xFF", /* UTF-32BE */
1696 "\xFF\xFE\x00\x00", /* UTF-32LE */
1697 "\xFE\xFF\x41\x42", /* UTF-16BE */
1698 "\xFF\xFE\x41\x41", /* UTF-16LE */
1699 "\xEF\xBB\xBF\x41", /* UTF-8 */
1700 "\x0E\xFE\xFF\x41", /* SCSU */
1701 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1702 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1703 "\xFB\xEE\x28", /* BOCU-1 */
1704 "\xFF\x41\x42" /* NULL */
1705 };
1706 static const int len[] = {
1707 3,
1708 3,
1709 4,
1710 4,
1711 4,
1712 4,
1713 2,
1714 2,
1715 3,
1716 3,
1717 4,
1718 4,
1719 4,
1720 4,
1721 4,
1722 4,
1723 5,
1724 5,
1725 3,
1726 3
1727 };
1728
1729 static const char* expected[] = {
1730 "UTF-16BE",
1731 "UTF-16LE",
1732 "UTF-8",
1733 "SCSU",
1734 "UTF-32BE",
1735 "UTF-32LE",
1736 "UTF-16BE",
1737 "UTF-16LE",
1738 "UTF-8",
1739 "SCSU",
1740 "UTF-32BE",
1741 "UTF-32LE",
1742 "UTF-16BE",
1743 "UTF-16LE",
1744 "UTF-8",
1745 "SCSU",
1746 "UTF-32BE",
1747 "UTF-32LE",
1748 "BOCU-1",
1749 NULL
1750 };
1751 static const int32_t expectedLength[] ={
1752 2,
1753 2,
1754 3,
1755 3,
1756 4,
1757 4,
1758 2,
1759 2,
1760 3,
1761 3,
1762 4,
1763 4,
1764 2,
1765 2,
1766 3,
1767 3,
1768 4,
1769 4,
1770 3,
1771 0
1772 };
1773 int i=0;
1774 UErrorCode err;
1775 int32_t signatureLength = -1;
1776 int32_t sourceLength=-1;
1777 const char* source = NULL;
1778 const char* enc = NULL;
1779 for( ; i<UPRV_LENGTHOF(data); i++){
1780 err = U_ZERO_ERROR;
1781 source = data[i];
1782 sourceLength = len[i];
1783 enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
1784 if(U_FAILURE(err)){
1785 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1786 continue;
1787 }
1788 if(enc == NULL || strcmp(enc,expected[i]) !=0){
1789 if(expected[i] !=NULL){
1790 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1791 continue;
1792 }
1793 }
1794 if(signatureLength != expectedLength[i]){
1795 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1796 }
1797 }
1798 }
1799 }
1800
1801 static void TestUTF7() {
1802 /* test input */
1803 static const uint8_t in[]={
1804 /* H - +Jjo- - ! +- +2AHcAQ */
1805 0x48,
1806 0x2d,
1807 0x2b, 0x4a, 0x6a, 0x6f,
1808 0x2d, 0x2d,
1809 0x21,
1810 0x2b, 0x2d,
1811 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1812 };
1813
1814 /* expected test results */
1815 static const int32_t results[]={
1816 /* number of bytes read, code point */
1817 1, 0x48,
1818 1, 0x2d,
1819 4, 0x263a, /* <WHITE SMILING FACE> */
1820 2, 0x2d,
1821 1, 0x21,
1822 2, 0x2b,
1823 7, 0x10401
1824 };
1825
1826 const char *cnvName;
1827 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1828 UErrorCode errorCode=U_ZERO_ERROR;
1829 UConverter *cnv=ucnv_open("UTF-7", &errorCode);
1830 if(U_FAILURE(errorCode)) {
1831 log_data_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode));
1832 return;
1833 }
1834 TestNextUChar(cnv, source, limit, results, "UTF-7");
1835 /* Test the condition when source >= sourceLimit */
1836 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1837 cnvName = ucnv_getName(cnv, &errorCode);
1838 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
1839 log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1840 }
1841 ucnv_close(cnv);
1842 }
1843
1844 static void TestIMAP() {
1845 /* test input */
1846 static const uint8_t in[]={
1847 /* H - &Jjo- - ! &- &2AHcAQ- \ */
1848 0x48,
1849 0x2d,
1850 0x26, 0x4a, 0x6a, 0x6f,
1851 0x2d, 0x2d,
1852 0x21,
1853 0x26, 0x2d,
1854 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1855 };
1856
1857 /* expected test results */
1858 static const int32_t results[]={
1859 /* number of bytes read, code point */
1860 1, 0x48,
1861 1, 0x2d,
1862 4, 0x263a, /* <WHITE SMILING FACE> */
1863 2, 0x2d,
1864 1, 0x21,
1865 2, 0x26,
1866 7, 0x10401
1867 };
1868
1869 const char *cnvName;
1870 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1871 UErrorCode errorCode=U_ZERO_ERROR;
1872 UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
1873 if(U_FAILURE(errorCode)) {
1874 log_data_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode));
1875 return;
1876 }
1877 TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
1878 /* Test the condition when source >= sourceLimit */
1879 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1880 cnvName = ucnv_getName(cnv, &errorCode);
1881 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
1882 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1883 }
1884 ucnv_close(cnv);
1885 }
1886
1887 static void TestUTF8() {
1888 /* test input */
1889 static const uint8_t in[]={
1890 0x61,
1891 0xc2, 0x80,
1892 0xe0, 0xa0, 0x80,
1893 0xf0, 0x90, 0x80, 0x80,
1894 0xf4, 0x84, 0x8c, 0xa1,
1895 0xf0, 0x90, 0x90, 0x81
1896 };
1897
1898 /* expected test results */
1899 static const int32_t results[]={
1900 /* number of bytes read, code point */
1901 1, 0x61,
1902 2, 0x80,
1903 3, 0x800,
1904 4, 0x10000,
1905 4, 0x104321,
1906 4, 0x10401
1907 };
1908
1909 /* error test input */
1910 static const uint8_t in2[]={
1911 0x61,
1912 0xc0, 0x80, /* illegal non-shortest form */
1913 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1914 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1915 0xc0, 0xc0, /* illegal trail byte */
1916 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1917 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1918 0xfe, /* illegal byte altogether */
1919 0x62
1920 };
1921
1922 /* expected error test results */
1923 static const int32_t results2[]={
1924 /* number of bytes read, code point */
1925 1, 0x61,
1926 22, 0x62
1927 };
1928
1929 UConverterToUCallback cb;
1930 const void *p;
1931
1932 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1933 UErrorCode errorCode=U_ZERO_ERROR;
1934 UConverter *cnv=ucnv_open("UTF-8", &errorCode);
1935 if(U_FAILURE(errorCode)) {
1936 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
1937 return;
1938 }
1939 TestNextUChar(cnv, source, limit, results, "UTF-8");
1940 /* Test the condition when source >= sourceLimit */
1941 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1942
1943 /* test error behavior with a skip callback */
1944 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1945 source=(const char *)in2;
1946 limit=(const char *)(in2+sizeof(in2));
1947 TestNextUChar(cnv, source, limit, results2, "UTF-8");
1948
1949 ucnv_close(cnv);
1950 }
1951
1952 static void TestCESU8() {
1953 /* test input */
1954 static const uint8_t in[]={
1955 0x61,
1956 0xc2, 0x80,
1957 0xe0, 0xa0, 0x80,
1958 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1959 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1960 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1961 0xef, 0xbf, 0xbc
1962 };
1963
1964 /* expected test results */
1965 static const int32_t results[]={
1966 /* number of bytes read, code point */
1967 1, 0x61,
1968 2, 0x80,
1969 3, 0x800,
1970 6, 0x10000,
1971 3, 0xdc01,
1972 -1,0xd802, /* may read 3 or 6 bytes */
1973 -1,0x10ffff,/* may read 0 or 3 bytes */
1974 3, 0xfffc
1975 };
1976
1977 /* error test input */
1978 static const uint8_t in2[]={
1979 0x61,
1980 0xc0, 0x80, /* illegal non-shortest form */
1981 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1982 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1983 0xc0, 0xc0, /* illegal trail byte */
1984 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */
1985 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */
1986 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */
1987 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1988 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1989 0xfe, /* illegal byte altogether */
1990 0x62
1991 };
1992
1993 /* expected error test results */
1994 static const int32_t results2[]={
1995 /* number of bytes read, code point */
1996 1, 0x61,
1997 34, 0x62
1998 };
1999
2000 UConverterToUCallback cb;
2001 const void *p;
2002
2003 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
2004 UErrorCode errorCode=U_ZERO_ERROR;
2005 UConverter *cnv=ucnv_open("CESU-8", &errorCode);
2006 if(U_FAILURE(errorCode)) {
2007 log_data_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
2008 return;
2009 }
2010 TestNextUChar(cnv, source, limit, results, "CESU-8");
2011 /* Test the condition when source >= sourceLimit */
2012 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2013
2014 /* test error behavior with a skip callback */
2015 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2016 source=(const char *)in2;
2017 limit=(const char *)(in2+sizeof(in2));
2018 TestNextUChar(cnv, source, limit, results2, "CESU-8");
2019
2020 ucnv_close(cnv);
2021 }
2022
2023 static void TestUTF16() {
2024 /* test input */
2025 static const uint8_t in1[]={
2026 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
2027 };
2028 static const uint8_t in2[]={
2029 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
2030 };
2031 static const uint8_t in3[]={
2032 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
2033 };
2034
2035 /* expected test results */
2036 static const int32_t results1[]={
2037 /* number of bytes read, code point */
2038 4, 0x4e00,
2039 2, 0xfeff
2040 };
2041 static const int32_t results2[]={
2042 /* number of bytes read, code point */
2043 4, 0x004e,
2044 2, 0xfffe
2045 };
2046 static const int32_t results3[]={
2047 /* number of bytes read, code point */
2048 2, 0xfefe,
2049 2, 0x4e00,
2050 2, 0xfeff,
2051 4, 0x20001
2052 };
2053
2054 const char *source, *limit;
2055
2056 UErrorCode errorCode=U_ZERO_ERROR;
2057 UConverter *cnv=ucnv_open("UTF-16", &errorCode);
2058 if(U_FAILURE(errorCode)) {
2059 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
2060 return;
2061 }
2062
2063 source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2064 TestNextUChar(cnv, source, limit, results1, "UTF-16");
2065
2066 source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2067 ucnv_resetToUnicode(cnv);
2068 TestNextUChar(cnv, source, limit, results2, "UTF-16");
2069
2070 source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2071 ucnv_resetToUnicode(cnv);
2072 TestNextUChar(cnv, source, limit, results3, "UTF-16");
2073
2074 /* Test the condition when source >= sourceLimit */
2075 ucnv_resetToUnicode(cnv);
2076 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2077
2078 ucnv_close(cnv);
2079 }
2080
2081 static void TestUTF16BE() {
2082 /* test input */
2083 static const uint8_t in[]={
2084 0x00, 0x61,
2085 0x00, 0xc0,
2086 0x00, 0x31,
2087 0x00, 0xf4,
2088 0xce, 0xfe,
2089 0xd8, 0x01, 0xdc, 0x01
2090 };
2091
2092 /* expected test results */
2093 static const int32_t results[]={
2094 /* number of bytes read, code point */
2095 2, 0x61,
2096 2, 0xc0,
2097 2, 0x31,
2098 2, 0xf4,
2099 2, 0xcefe,
2100 4, 0x10401
2101 };
2102
2103 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2104 UErrorCode errorCode=U_ZERO_ERROR;
2105 UConverter *cnv=ucnv_open("utf-16be", &errorCode);
2106 if(U_FAILURE(errorCode)) {
2107 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
2108 return;
2109 }
2110 TestNextUChar(cnv, source, limit, results, "UTF-16BE");
2111 /* Test the condition when source >= sourceLimit */
2112 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2113 /*Test for the condition where there is an invalid character*/
2114 {
2115 static const uint8_t source2[]={0x61};
2116 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2117 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2118 }
2119 #if 0
2120 /*
2121 * Test disabled because currently the UTF-16BE/LE converters are supposed
2122 * to not set errors for unpaired surrogates.
2123 * This may change with
2124 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2125 */
2126
2127 /*Test for the condition where there is a surrogate pair*/
2128 {
2129 const uint8_t source2[]={0xd8, 0x01};
2130 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2131 }
2132 #endif
2133 ucnv_close(cnv);
2134 }
2135
2136 static void
2137 TestUTF16LE() {
2138 /* test input */
2139 static const uint8_t in[]={
2140 0x61, 0x00,
2141 0x31, 0x00,
2142 0x4e, 0x2e,
2143 0x4e, 0x00,
2144 0x01, 0xd8, 0x01, 0xdc
2145 };
2146
2147 /* expected test results */
2148 static const int32_t results[]={
2149 /* number of bytes read, code point */
2150 2, 0x61,
2151 2, 0x31,
2152 2, 0x2e4e,
2153 2, 0x4e,
2154 4, 0x10401
2155 };
2156
2157 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2158 UErrorCode errorCode=U_ZERO_ERROR;
2159 UConverter *cnv=ucnv_open("utf-16le", &errorCode);
2160 if(U_FAILURE(errorCode)) {
2161 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
2162 return;
2163 }
2164 TestNextUChar(cnv, source, limit, results, "UTF-16LE");
2165 /* Test the condition when source >= sourceLimit */
2166 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2167 /*Test for the condition where there is an invalid character*/
2168 {
2169 static const uint8_t source2[]={0x61};
2170 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2171 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2172 }
2173 #if 0
2174 /*
2175 * Test disabled because currently the UTF-16BE/LE converters are supposed
2176 * to not set errors for unpaired surrogates.
2177 * This may change with
2178 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2179 */
2180
2181 /*Test for the condition where there is a surrogate character*/
2182 {
2183 static const uint8_t source2[]={0x01, 0xd8};
2184 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2185 }
2186 #endif
2187
2188 ucnv_close(cnv);
2189 }
2190
2191 static void TestUTF32() {
2192 /* test input */
2193 static const uint8_t in1[]={
2194 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff
2195 };
2196 static const uint8_t in2[]={
2197 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00
2198 };
2199 static const uint8_t in3[]={
2200 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01
2201 };
2202
2203 /* expected test results */
2204 static const int32_t results1[]={
2205 /* number of bytes read, code point */
2206 8, 0x100f00,
2207 4, 0xfeff
2208 };
2209 static const int32_t results2[]={
2210 /* number of bytes read, code point */
2211 8, 0x0f1000,
2212 4, 0xfffe
2213 };
2214 static const int32_t results3[]={
2215 /* number of bytes read, code point */
2216 4, 0xfefe,
2217 4, 0x100f00,
2218 4, 0xfffd, /* unmatched surrogate */
2219 4, 0xfffd /* unmatched surrogate */
2220 };
2221
2222 const char *source, *limit;
2223
2224 UErrorCode errorCode=U_ZERO_ERROR;
2225 UConverter *cnv=ucnv_open("UTF-32", &errorCode);
2226 if(U_FAILURE(errorCode)) {
2227 log_data_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
2228 return;
2229 }
2230
2231 source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2232 TestNextUChar(cnv, source, limit, results1, "UTF-32");
2233
2234 source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2235 ucnv_resetToUnicode(cnv);
2236 TestNextUChar(cnv, source, limit, results2, "UTF-32");
2237
2238 source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2239 ucnv_resetToUnicode(cnv);
2240 TestNextUChar(cnv, source, limit, results3, "UTF-32");
2241
2242 /* Test the condition when source >= sourceLimit */
2243 ucnv_resetToUnicode(cnv);
2244 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2245
2246 ucnv_close(cnv);
2247 }
2248
2249 static void
2250 TestUTF32BE() {
2251 /* test input */
2252 static const uint8_t in[]={
2253 0x00, 0x00, 0x00, 0x61,
2254 0x00, 0x00, 0x30, 0x61,
2255 0x00, 0x00, 0xdc, 0x00,
2256 0x00, 0x00, 0xd8, 0x00,
2257 0x00, 0x00, 0xdf, 0xff,
2258 0x00, 0x00, 0xff, 0xfe,
2259 0x00, 0x10, 0xab, 0xcd,
2260 0x00, 0x10, 0xff, 0xff
2261 };
2262
2263 /* expected test results */
2264 static const int32_t results[]={
2265 /* number of bytes read, code point */
2266 4, 0x61,
2267 4, 0x3061,
2268 4, 0xfffd,
2269 4, 0xfffd,
2270 4, 0xfffd,
2271 4, 0xfffe,
2272 4, 0x10abcd,
2273 4, 0x10ffff
2274 };
2275
2276 /* error test input */
2277 static const uint8_t in2[]={
2278 0x00, 0x00, 0x00, 0x61,
2279 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
2280 0x00, 0x00, 0x00, 0x62,
2281 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2282 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
2283 0x00, 0x00, 0x01, 0x62,
2284 0x00, 0x00, 0x02, 0x62
2285 };
2286
2287 /* expected error test results */
2288 static const int32_t results2[]={
2289 /* number of bytes read, code point */
2290 4, 0x61,
2291 8, 0x62,
2292 12, 0x162,
2293 4, 0x262
2294 };
2295
2296 UConverterToUCallback cb;
2297 const void *p;
2298
2299 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2300 UErrorCode errorCode=U_ZERO_ERROR;
2301 UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
2302 if(U_FAILURE(errorCode)) {
2303 log_data_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
2304 return;
2305 }
2306 TestNextUChar(cnv, source, limit, results, "UTF-32BE");
2307
2308 /* Test the condition when source >= sourceLimit */
2309 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2310
2311 /* test error behavior with a skip callback */
2312 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2313 source=(const char *)in2;
2314 limit=(const char *)(in2+sizeof(in2));
2315 TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
2316
2317 ucnv_close(cnv);
2318 }
2319
2320 static void
2321 TestUTF32LE() {
2322 /* test input */
2323 static const uint8_t in[]={
2324 0x61, 0x00, 0x00, 0x00,
2325 0x61, 0x30, 0x00, 0x00,
2326 0x00, 0xdc, 0x00, 0x00,
2327 0x00, 0xd8, 0x00, 0x00,
2328 0xff, 0xdf, 0x00, 0x00,
2329 0xfe, 0xff, 0x00, 0x00,
2330 0xcd, 0xab, 0x10, 0x00,
2331 0xff, 0xff, 0x10, 0x00
2332 };
2333
2334 /* expected test results */
2335 static const int32_t results[]={
2336 /* number of bytes read, code point */
2337 4, 0x61,
2338 4, 0x3061,
2339 4, 0xfffd,
2340 4, 0xfffd,
2341 4, 0xfffd,
2342 4, 0xfffe,
2343 4, 0x10abcd,
2344 4, 0x10ffff
2345 };
2346
2347 /* error test input */
2348 static const uint8_t in2[]={
2349 0x61, 0x00, 0x00, 0x00,
2350 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
2351 0x62, 0x00, 0x00, 0x00,
2352 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2353 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
2354 0x62, 0x01, 0x00, 0x00,
2355 0x62, 0x02, 0x00, 0x00,
2356 };
2357
2358 /* expected error test results */
2359 static const int32_t results2[]={
2360 /* number of bytes read, code point */
2361 4, 0x61,
2362 8, 0x62,
2363 12, 0x162,
2364 4, 0x262,
2365 };
2366
2367 UConverterToUCallback cb;
2368 const void *p;
2369
2370 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2371 UErrorCode errorCode=U_ZERO_ERROR;
2372 UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
2373 if(U_FAILURE(errorCode)) {
2374 log_data_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
2375 return;
2376 }
2377 TestNextUChar(cnv, source, limit, results, "UTF-32LE");
2378
2379 /* Test the condition when source >= sourceLimit */
2380 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2381
2382 /* test error behavior with a skip callback */
2383 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2384 source=(const char *)in2;
2385 limit=(const char *)(in2+sizeof(in2));
2386 TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
2387
2388 ucnv_close(cnv);
2389 }
2390
2391 static void
2392 TestLATIN1() {
2393 /* test input */
2394 static const uint8_t in[]={
2395 0x61,
2396 0x31,
2397 0x32,
2398 0xc0,
2399 0xf0,
2400 0xf4,
2401 };
2402
2403 /* expected test results */
2404 static const int32_t results[]={
2405 /* number of bytes read, code point */
2406 1, 0x61,
2407 1, 0x31,
2408 1, 0x32,
2409 1, 0xc0,
2410 1, 0xf0,
2411 1, 0xf4,
2412 };
2413 static const uint16_t in1[] = {
2414 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2415 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2416 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2417 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2418 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2419 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2420 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2421 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2422 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2423 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2424 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2425 0xcb, 0x82
2426 };
2427 static const uint8_t out1[] = {
2428 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2429 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2430 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2431 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2432 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2433 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2434 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2435 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2436 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2437 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2438 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2439 0xcb, 0x82
2440 };
2441 static const uint16_t in2[]={
2442 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2443 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2444 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2445 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2446 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2447 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2448 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2449 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2450 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2451 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2452 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2453 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2454 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2455 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2456 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2457 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2458 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2459 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2460 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2461 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2462 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2463 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2464 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2465 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2466 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2467 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2468 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2469 0x37, 0x20, 0x2A, 0x2F,
2470 };
2471 static const unsigned char out2[]={
2472 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2473 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2474 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2475 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2476 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2477 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2478 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2479 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2480 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2481 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2482 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2483 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2484 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2485 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2486 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2487 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2488 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2489 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2490 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2491 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2492 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2493 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2494 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2495 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2496 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2497 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2498 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2499 0x37, 0x20, 0x2A, 0x2F,
2500 };
2501 const char *source=(const char *)in;
2502 const char *limit=(const char *)in+sizeof(in);
2503
2504 UErrorCode errorCode=U_ZERO_ERROR;
2505 UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
2506 if(U_FAILURE(errorCode)) {
2507 log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
2508 return;
2509 }
2510 TestNextUChar(cnv, source, limit, results, "LATIN_1");
2511 /* Test the condition when source >= sourceLimit */
2512 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2513 TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
2514 TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
2515
2516 ucnv_close(cnv);
2517 }
2518
2519 static void
2520 TestSBCS() {
2521 /* test input */
2522 static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2523 /* expected test results */
2524 static const int32_t results[]={
2525 /* number of bytes read, code point */
2526 1, 0x61,
2527 1, 0xbf,
2528 1, 0xc4,
2529 1, 0x2021,
2530 1, 0xf8ff,
2531 1, 0x00d9
2532 };
2533
2534 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2535 UErrorCode errorCode=U_ZERO_ERROR;
2536 UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode);
2537 if(U_FAILURE(errorCode)) {
2538 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode));
2539 return;
2540 }
2541 TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)");
2542 /* Test the condition when source >= sourceLimit */
2543 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2544 /*Test for Illegal character */ /*
2545 {
2546 static const uint8_t input1[]={ 0xA1 };
2547 const char* illegalsource=(const char*)input1;
2548 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2549 }
2550 */
2551 ucnv_close(cnv);
2552 }
2553
2554 static void
2555 TestDBCS() {
2556 /* test input */
2557 static const uint8_t in[]={
2558 0x44, 0x6a,
2559 0xc4, 0x9c,
2560 0x7a, 0x74,
2561 0x46, 0xab,
2562 0x42, 0x5b,
2563
2564 };
2565
2566 /* expected test results */
2567 static const int32_t results[]={
2568 /* number of bytes read, code point */
2569 2, 0x00a7,
2570 2, 0xe1d2,
2571 2, 0x6962,
2572 2, 0xf842,
2573 2, 0xffe5,
2574 };
2575
2576 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2577 UErrorCode errorCode=U_ZERO_ERROR;
2578
2579 UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
2580 if(U_FAILURE(errorCode)) {
2581 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
2582 return;
2583 }
2584 TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
2585 /* Test the condition when source >= sourceLimit */
2586 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2587 /*Test for the condition where there is an invalid character*/
2588 {
2589 static const uint8_t source2[]={0x1a, 0x1b};
2590 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2591 }
2592 /*Test for the condition where we have a truncated char*/
2593 {
2594 static const uint8_t source1[]={0xc4};
2595 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2596 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2597 }
2598 ucnv_close(cnv);
2599 }
2600
2601 static void
2602 TestMBCS() {
2603 /* test input */
2604 static const uint8_t in[]={
2605 0x01,
2606 0xa6, 0xa3,
2607 0x00,
2608 0xa6, 0xa1,
2609 0x08,
2610 0xc2, 0x76,
2611 0xc2, 0x78,
2612
2613 };
2614
2615 /* expected test results */
2616 static const int32_t results[]={
2617 /* number of bytes read, code point */
2618 1, 0x0001,
2619 2, 0x250c,
2620 1, 0x0000,
2621 2, 0x2500,
2622 1, 0x0008,
2623 2, 0xd60c,
2624 2, 0xd60e,
2625 };
2626
2627 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2628 UErrorCode errorCode=U_ZERO_ERROR;
2629
2630 UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
2631 if(U_FAILURE(errorCode)) {
2632 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
2633 return;
2634 }
2635 TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
2636 /* Test the condition when source >= sourceLimit */
2637 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2638 /*Test for the condition where there is an invalid character*/
2639 {
2640 static const uint8_t source2[]={0xa1, 0x80};
2641 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2642 }
2643 /*Test for the condition where we have a truncated char*/
2644 {
2645 static const uint8_t source1[]={0xc4};
2646 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2647 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2648 }
2649 ucnv_close(cnv);
2650
2651 }
2652
2653 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
2654 static void
2655 TestICCRunout() {
2656 /* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */
2657
2658 const char *cnvName = "ibm-1363";
2659 UErrorCode status = U_ZERO_ERROR;
2660 const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 };
2661 /* UChar expectUData[] = { 0x00a1, 0x001a }; */
2662 const char *source = sourceData;
2663 const char *sourceLim = sourceData+sizeof(sourceData);
2664 UChar c1, c2, c3;
2665 UConverter *cnv=ucnv_open(cnvName, &status);
2666 if(U_FAILURE(status)) {
2667 log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status));
2668 return;
2669 }
2670
2671 #if 0
2672 {
2673 UChar targetBuf[256];
2674 UChar *target = targetBuf;
2675 UChar *targetLim = target+256;
2676 ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status);
2677
2678 log_info("After convert: target@%d, source@%d, status%s\n",
2679 target-targetBuf, source-sourceData, u_errorName(status));
2680
2681 if(U_FAILURE(status)) {
2682 log_err("Failed to convert: %s\n", u_errorName(status));
2683 } else {
2684
2685 }
2686 }
2687 #endif
2688
2689 c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2690 log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status));
2691
2692 c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2693 log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status));
2694
2695 c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2696 log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status));
2697
2698 if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) {
2699 log_verbose("OK\n");
2700 } else {
2701 log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n");
2702 }
2703
2704 ucnv_close(cnv);
2705
2706 }
2707 #endif
2708
2709 #ifdef U_ENABLE_GENERIC_ISO_2022
2710
2711 static void
2712 TestISO_2022() {
2713 /* test input */
2714 static const uint8_t in[]={
2715 0x1b, 0x25, 0x42,
2716 0x31,
2717 0x32,
2718 0x61,
2719 0xc2, 0x80,
2720 0xe0, 0xa0, 0x80,
2721 0xf0, 0x90, 0x80, 0x80
2722 };
2723
2724
2725
2726 /* expected test results */
2727 static const int32_t results[]={
2728 /* number of bytes read, code point */
2729 4, 0x0031, /* 4 bytes including the escape sequence */
2730 1, 0x0032,
2731 1, 0x61,
2732 2, 0x80,
2733 3, 0x800,
2734 4, 0x10000
2735 };
2736
2737 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2738 UErrorCode errorCode=U_ZERO_ERROR;
2739 UConverter *cnv;
2740
2741 cnv=ucnv_open("ISO_2022", &errorCode);
2742 if(U_FAILURE(errorCode)) {
2743 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
2744 return;
2745 }
2746 TestNextUChar(cnv, source, limit, results, "ISO_2022");
2747
2748 /* Test the condition when source >= sourceLimit */
2749 TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
2750 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2751 /*Test for the condition where we have a truncated char*/
2752 {
2753 static const uint8_t source1[]={0xc4};
2754 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2755 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2756 }
2757 /*Test for the condition where there is an invalid character*/
2758 {
2759 static const uint8_t source2[]={0xa1, 0x01};
2760 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
2761 }
2762 ucnv_close(cnv);
2763 }
2764
2765 #endif
2766
2767 static void
2768 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2769 const UChar* uSource;
2770 const UChar* uSourceLimit;
2771 const char* cSource;
2772 const char* cSourceLimit;
2773 UChar *uTargetLimit =NULL;
2774 UChar *uTarget;
2775 char *cTarget;
2776 const char *cTargetLimit;
2777 char *cBuf;
2778 UChar *uBuf; /*,*test;*/
2779 int32_t uBufSize = 120;
2780 int len=0;
2781 int i=2;
2782 UErrorCode errorCode=U_ZERO_ERROR;
2783 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2784 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2785 ucnv_reset(cnv);
2786 for(;--i>0; ){
2787 uSource = (UChar*) source;
2788 uSourceLimit=(const UChar*)sourceLimit;
2789 cTarget = cBuf;
2790 uTarget = uBuf;
2791 cSource = cBuf;
2792 cTargetLimit = cBuf;
2793 uTargetLimit = uBuf;
2794
2795 do{
2796
2797 cTargetLimit = cTargetLimit+ i;
2798 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2799 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2800 errorCode=U_ZERO_ERROR;
2801 continue;
2802 }
2803
2804 if(U_FAILURE(errorCode)){
2805 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2806 return;
2807 }
2808
2809 }while (uSource<uSourceLimit);
2810
2811 cSourceLimit =cTarget;
2812 do{
2813 uTargetLimit=uTargetLimit+i;
2814 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2815 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2816 errorCode=U_ZERO_ERROR;
2817 continue;
2818 }
2819 if(U_FAILURE(errorCode)){
2820 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2821 return;
2822 }
2823 }while(cSource<cSourceLimit);
2824
2825 uSource = source;
2826 /*test =uBuf;*/
2827 for(len=0;len<(int)(source - sourceLimit);len++){
2828 if(uBuf[len]!=uSource[len]){
2829 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2830 }
2831 }
2832 }
2833 free(uBuf);
2834 free(cBuf);
2835 }
2836 /* Test for Jitterbug 778 */
2837 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2838 const UChar* uSource;
2839 const UChar* uSourceLimit;
2840 const char* cSource;
2841 UChar *uTargetLimit =NULL;
2842 UChar *uTarget;
2843 char *cTarget;
2844 const char *cTargetLimit;
2845 char *cBuf;
2846 UChar *uBuf,*test;
2847 int32_t uBufSize = 120;
2848 int numCharsInTarget=0;
2849 UErrorCode errorCode=U_ZERO_ERROR;
2850 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2851 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2852 uSource = source;
2853 uSourceLimit=sourceLimit;
2854 cTarget = cBuf;
2855 cTargetLimit = cBuf +uBufSize*5;
2856 uTarget = uBuf;
2857 uTargetLimit = uBuf+ uBufSize*5;
2858 ucnv_reset(cnv);
2859 numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode);
2860 if(U_FAILURE(errorCode)){
2861 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2862 return;
2863 }
2864 cSource = cBuf;
2865 test =uBuf;
2866 ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
2867 if(U_FAILURE(errorCode)){
2868 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
2869 return;
2870 }
2871 uSource = source;
2872 while(uSource<uSourceLimit){
2873 if(*test!=*uSource){
2874
2875 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2876 }
2877 uSource++;
2878 test++;
2879 }
2880 free(uBuf);
2881 free(cBuf);
2882 }
2883
2884 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2885 const UChar* uSource;
2886 const UChar* uSourceLimit;
2887 const char* cSource;
2888 const char* cSourceLimit;
2889 UChar *uTargetLimit =NULL;
2890 UChar *uTarget;
2891 char *cTarget;
2892 const char *cTargetLimit;
2893 char *cBuf;
2894 UChar *uBuf; /*,*test;*/
2895 int32_t uBufSize = 120;
2896 int len=0;
2897 int i=2;
2898 const UChar *temp = sourceLimit;
2899 UErrorCode errorCode=U_ZERO_ERROR;
2900 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2901 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2902
2903 ucnv_reset(cnv);
2904 for(;--i>0;){
2905 uSource = (UChar*) source;
2906 cTarget = cBuf;
2907 uTarget = uBuf;
2908 cSource = cBuf;
2909 cTargetLimit = cBuf;
2910 uTargetLimit = uBuf+uBufSize*5;
2911 cTargetLimit = cTargetLimit+uBufSize*10;
2912 uSourceLimit=uSource;
2913 do{
2914
2915 if (uSourceLimit < sourceLimit) {
2916 uSourceLimit = uSourceLimit+1;
2917 }
2918 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2919 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2920 errorCode=U_ZERO_ERROR;
2921 continue;
2922 }
2923
2924 if(U_FAILURE(errorCode)){
2925 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2926 return;
2927 }
2928
2929 }while (uSource<temp);
2930
2931 cSourceLimit =cBuf;
2932 do{
2933 if (cSourceLimit < cBuf + (cTarget - cBuf)) {
2934 cSourceLimit = cSourceLimit+1;
2935 }
2936 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2937 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2938 errorCode=U_ZERO_ERROR;
2939 continue;
2940 }
2941 if(U_FAILURE(errorCode)){
2942 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2943 return;
2944 }
2945 }while(cSource<cTarget);
2946
2947 uSource = source;
2948 /*test =uBuf;*/
2949 for(;len<(int)(source - sourceLimit);len++){
2950 if(uBuf[len]!=uSource[len]){
2951 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2952 }
2953 }
2954 }
2955 free(uBuf);
2956 free(cBuf);
2957 }
2958 static void
2959 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
2960 const uint16_t results[], const char* message){
2961 /* const char* s0; */
2962 const char* s=(char*)source;
2963 const uint16_t *r=results;
2964 UErrorCode errorCode=U_ZERO_ERROR;
2965 uint32_t c,exC;
2966 ucnv_reset(cnv);
2967 while(s<limit) {
2968 /* s0=s; */
2969 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
2970 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2971 break; /* no more significant input */
2972 } else if(U_FAILURE(errorCode)) {
2973 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
2974 break;
2975 } else {
2976 if(U16_IS_LEAD(*r)){
2977 int i =0, len = 2;
2978 U16_NEXT(r, i, len, exC);
2979 r++;
2980 }else{
2981 exC = *r;
2982 }
2983 if(c!=(uint32_t)(exC))
2984 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message,(uint32_t) (*r),c);
2985 }
2986 r++;
2987 }
2988 }
2989
2990 static int TestJitterbug930(const char* enc){
2991 UErrorCode err = U_ZERO_ERROR;
2992 UConverter*converter;
2993 char out[80];
2994 char*target = out;
2995 UChar in[4];
2996 const UChar*source = in;
2997 int32_t off[80];
2998 int32_t* offsets = off;
2999 int numOffWritten=0;
3000 UBool flush = 0;
3001 converter = my_ucnv_open(enc, &err);
3002
3003 in[0] = 0x41; /* 0x4E00;*/
3004 in[1] = 0x4E01;
3005 in[2] = 0x4E02;
3006 in[3] = 0x4E03;
3007
3008 memset(off, '*', sizeof(off));
3009
3010 ucnv_fromUnicode (converter,
3011 &target,
3012 target+2,
3013 &source,
3014 source+3,
3015 offsets,
3016 flush,
3017 &err);
3018
3019 /* writes three bytes into the output buffer: 41 1B 24
3020 * but offsets contains 0 1 1
3021 */
3022 while(*offsets< off[10]){
3023 numOffWritten++;
3024 offsets++;
3025 }
3026 log_verbose("Testing Jitterbug 930 for encoding %s",enc);
3027 if(numOffWritten!= (int)(target-out)){
3028 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
3029 }
3030
3031 err = U_ZERO_ERROR;
3032
3033 memset(off,'*' , sizeof(off));
3034
3035 flush = 1;
3036 offsets=off;
3037 ucnv_fromUnicode (converter,
3038 &target,
3039 target+4,
3040 &source,
3041 source,
3042 offsets,
3043 flush,
3044 &err);
3045 numOffWritten=0;
3046 while(*offsets< off[10]){
3047 numOffWritten++;
3048 if(*offsets!= -1){
3049 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
3050 }
3051 offsets++;
3052 }
3053
3054 /* writes 42 43 7A into output buffer,
3055 * offsets contains -1 -1 -1
3056 */
3057 ucnv_close(converter);
3058 return 0;
3059 }
3060
3061 static void
3062 TestHZ() {
3063 /* test input */
3064 static const uint16_t in[]={
3065 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
3066 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
3067 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
3068 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
3069 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
3070 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
3071 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
3072 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
3073 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
3074 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
3075 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
3076 0x005A, 0x005B, 0x005C, 0x000A
3077 };
3078 const UChar* uSource;
3079 const UChar* uSourceLimit;
3080 const char* cSource;
3081 const char* cSourceLimit;
3082 UChar *uTargetLimit =NULL;
3083 UChar *uTarget;
3084 char *cTarget;
3085 const char *cTargetLimit;
3086 char *cBuf;
3087 UChar *uBuf,*test;
3088 int32_t uBufSize = 120;
3089 UErrorCode errorCode=U_ZERO_ERROR;
3090 UConverter *cnv;
3091 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3092 int32_t* myOff= offsets;
3093 cnv=ucnv_open("HZ", &errorCode);
3094 if(U_FAILURE(errorCode)) {
3095 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
3096 return;
3097 }
3098
3099 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3100 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3101 uSource = (const UChar*)in;
3102 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3103 cTarget = cBuf;
3104 cTargetLimit = cBuf +uBufSize*5;
3105 uTarget = uBuf;
3106 uTargetLimit = uBuf+ uBufSize*5;
3107 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3108 if(U_FAILURE(errorCode)){
3109 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3110 return;
3111 }
3112 cSource = cBuf;
3113 cSourceLimit =cTarget;
3114 test =uBuf;
3115 myOff=offsets;
3116 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3117 if(U_FAILURE(errorCode)){
3118 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3119 return;
3120 }
3121 uSource = (const UChar*)in;
3122 while(uSource<uSourceLimit){
3123 if(*test!=*uSource){
3124
3125 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3126 }
3127 uSource++;
3128 test++;
3129 }
3130 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
3131 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3132 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3133 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3134 TestJitterbug930("csISO2022JP");
3135 ucnv_close(cnv);
3136 free(offsets);
3137 free(uBuf);
3138 free(cBuf);
3139 }
3140
3141 static void
3142 TestISCII(){
3143 /* test input */
3144 static const uint16_t in[]={
3145 /* test full range of Devanagari */
3146 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3147 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3148 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3149 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3150 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3151 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3152 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3153 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3154 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3155 0x096D,0x096E,0x096F,
3156 /* test Soft halant*/
3157 0x0915,0x094d, 0x200D,
3158 /* test explicit halant */
3159 0x0915,0x094d, 0x200c,
3160 /* test double danda */
3161 0x965,
3162 /* test ASCII */
3163 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3164 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3165 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3166 /* tests from Lotus */
3167 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3168 0x0930,0x094D,0x200D,
3169 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3170 0x0915,0x0921,0x002B,0x095F,
3171 /* tamil range */
3172 0x0B86, 0xB87, 0xB88,
3173 /* telugu range */
3174 0x0C05, 0x0C02, 0x0C03,0x0c31,
3175 /* kannada range */
3176 0x0C85, 0xC82, 0x0C83,
3177 /* test Abbr sign and Anudatta */
3178 0x0970, 0x952,
3179 /* 0x0958,
3180 0x0959,
3181 0x095A,
3182 0x095B,
3183 0x095C,
3184 0x095D,
3185 0x095E,
3186 0x095F,*/
3187 0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3188 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3189 0x090C ,
3190 0x0962,
3191 0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3192 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3193 0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3194 0x093D /* Avagraha 0xEA, 0xE9*/,
3195 0x0958,
3196 0x0959,
3197 0x095A,
3198 0x095B,
3199 0x095C,
3200 0x095D,
3201 0x095E,
3202 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3203 };
3204 static const unsigned char byteArr[]={
3205
3206 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3207 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3208 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3209 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3210 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3211 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3212 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3213 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3214 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3215 0xf8,0xf9,0xfa,
3216 /* test soft halant */
3217 0xb3, 0xE8, 0xE9,
3218 /* test explicit halant */
3219 0xb3, 0xE8, 0xE8,
3220 /* test double danda */
3221 0xea, 0xea,
3222 /* test ASCII */
3223 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3224 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3225 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3226 /* test ATR code */
3227
3228 /* tests from Lotus */
3229 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3230 0xEF,0x42,0xCF,0xE8,0xD9,
3231 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3232 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3233 /* tamil range */
3234 0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3235 /* telugu range */
3236 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3237 /* kannada range */
3238 0xEF, 0x48,0xa4, 0xa2, 0xa3,
3239 /* anudatta and abbreviation sign */
3240 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3241
3242
3243 0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3244
3245 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3246
3247 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3248
3249 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3250
3251 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3252
3253 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3254
3255 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3256
3257 0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3258
3259 0xB3, 0xE9, /* Ka + NUKTA */
3260
3261 0xB4, 0xE9, /* Kha + NUKTA */
3262
3263 0xB5, 0xE9, /* Ga + NUKTA */
3264
3265 0xBA, 0xE9,
3266
3267 0xBF, 0xE9,
3268
3269 0xC0, 0xE9,
3270
3271 0xC9, 0xE9,
3272 /* INV halant RA */
3273 0xD9, 0xE8, 0xCF,
3274 0x00, 0x00A0,
3275 /* just consume unhandled codepoints */
3276 0xEF, 0x30,
3277
3278 };
3279 testConvertToU(byteArr,(sizeof(byteArr)),in,UPRV_LENGTHOF(in),"x-iscii-de",NULL,TRUE);
3280 TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
3281
3282 }
3283
3284 static void
3285 TestISO_2022_JP() {
3286 /* test input */
3287 static const uint16_t in[]={
3288 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3289 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3290 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3291 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3292 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3293 0x201D, 0x3014, 0x000D, 0x000A,
3294 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3295 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3296 };
3297 const UChar* uSource;
3298 const UChar* uSourceLimit;
3299 const char* cSource;
3300 const char* cSourceLimit;
3301 UChar *uTargetLimit =NULL;
3302 UChar *uTarget;
3303 char *cTarget;
3304 const char *cTargetLimit;
3305 char *cBuf;
3306 UChar *uBuf,*test;
3307 int32_t uBufSize = 120;
3308 UErrorCode errorCode=U_ZERO_ERROR;
3309 UConverter *cnv;
3310 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3311 int32_t* myOff= offsets;
3312 cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3313 if(U_FAILURE(errorCode)) {
3314 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode));
3315 return;
3316 }
3317
3318 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3319 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3320 uSource = (const UChar*)in;
3321 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3322 cTarget = cBuf;
3323 cTargetLimit = cBuf +uBufSize*5;
3324 uTarget = uBuf;
3325 uTargetLimit = uBuf+ uBufSize*5;
3326 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3327 if(U_FAILURE(errorCode)){
3328 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3329 return;
3330 }
3331 cSource = cBuf;
3332 cSourceLimit =cTarget;
3333 test =uBuf;
3334 myOff=offsets;
3335 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3336 if(U_FAILURE(errorCode)){
3337 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3338 return;
3339 }
3340
3341 uSource = (const UChar*)in;
3342 while(uSource<uSourceLimit){
3343 if(*test!=*uSource){
3344
3345 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3346 }
3347 uSource++;
3348 test++;
3349 }
3350
3351 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3352 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3353 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
3354 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3355 TestJitterbug930("csISO2022JP");
3356 ucnv_close(cnv);
3357 free(uBuf);
3358 free(cBuf);
3359 free(offsets);
3360 }
3361
3362 static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
3363 const UChar* uSource;
3364 const UChar* uSourceLimit;
3365 const char* cSource;
3366 const char* cSourceLimit;
3367 UChar *uTargetLimit =NULL;
3368 UChar *uTarget;
3369 char *cTarget;
3370 const char *cTargetLimit;
3371 char *cBuf;
3372 UChar *uBuf,*test;
3373 int32_t uBufSize = 120*10;
3374 UErrorCode errorCode=U_ZERO_ERROR;
3375 UConverter *cnv;
3376 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
3377 int32_t* myOff= offsets;
3378 cnv=my_ucnv_open(conv, &errorCode);
3379 if(U_FAILURE(errorCode)) {
3380 log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
3381 return;
3382 }
3383
3384 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
3385 cBuf =(char*)malloc(uBufSize * sizeof(char));
3386 uSource = (const UChar*)in;
3387 uSourceLimit=uSource+len;
3388 cTarget = cBuf;
3389 cTargetLimit = cBuf +uBufSize;
3390 uTarget = uBuf;
3391 uTargetLimit = uBuf+ uBufSize;
3392 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3393 if(U_FAILURE(errorCode)){
3394 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3395 return;
3396 }
3397 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3398 cSource = cBuf;
3399 cSourceLimit =cTarget;
3400 test =uBuf;
3401 myOff=offsets;
3402 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3403 if(U_FAILURE(errorCode)){
3404 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
3405 return;
3406 }
3407
3408 uSource = (const UChar*)in;
3409 while(uSource<uSourceLimit){
3410 if(*test!=*uSource){
3411 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
3412 }
3413 uSource++;
3414 test++;
3415 }
3416 TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv);
3417 TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv);
3418 TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
3419 if(byteArr && byteArrLen!=0){
3420 TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
3421 TestToAndFromUChars(in,(const UChar*)&in[len],cnv);
3422 {
3423 cSource = byteArr;
3424 cSourceLimit = cSource+byteArrLen;
3425 test=uBuf;
3426 myOff = offsets;
3427 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3428 if(U_FAILURE(errorCode)){
3429 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3430 return;
3431 }
3432
3433 uSource = (const UChar*)in;
3434 while(uSource<uSourceLimit){
3435 if(*test!=*uSource){
3436 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3437 }
3438 uSource++;
3439 test++;
3440 }
3441 }
3442 }
3443
3444 ucnv_close(cnv);
3445 free(uBuf);
3446 free(cBuf);
3447 free(offsets);
3448 }
3449 static UChar U_CALLCONV
3450 _charAt(int32_t offset, void *context) {
3451 return ((char*)context)[offset];
3452 }
3453
3454 static int32_t
3455 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
3456 int32_t srcIndex=0;
3457 int32_t dstIndex=0;
3458 if(U_FAILURE(*status)){
3459 return 0;
3460 }
3461 if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
3462 *status = U_ILLEGAL_ARGUMENT_ERROR;
3463 return 0;
3464 }
3465 if(srcLen==-1){
3466 srcLen = (int32_t)uprv_strlen(src);
3467 }
3468
3469 for (; srcIndex<srcLen; ) {
3470 UChar32 c = src[srcIndex++];
3471 if (c == 0x005C /*'\\'*/) {
3472 c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
3473 if (c == (UChar32)0xFFFFFFFF) {
3474 *status=U_INVALID_CHAR_FOUND; /* return empty string */
3475 break; /* invalid escape sequence */
3476 }
3477 }
3478 if(dstIndex < dstLen){
3479 if(c>0xFFFF){
3480 dst[dstIndex++] = U16_LEAD(c);
3481 if(dstIndex<dstLen){
3482 dst[dstIndex]=U16_TRAIL(c);
3483 }else{
3484 *status=U_BUFFER_OVERFLOW_ERROR;
3485 }
3486 }else{
3487 dst[dstIndex]=(UChar)c;
3488 }
3489
3490 }else{
3491 *status = U_BUFFER_OVERFLOW_ERROR;
3492 }
3493 dstIndex++; /* for preflighting */
3494 }
3495 return dstIndex;
3496 }
3497
3498 static void
3499 TestFullRoundtrip(const char* cp){
3500 UChar usource[10] ={0};
3501 UChar nsrc[10] = {0};
3502 uint32_t i=1;
3503 int len=0, ulen;
3504 nsrc[0]=0x0061;
3505 /* Test codepoint 0 */
3506 TestConv(usource,1,cp,"",NULL,0);
3507 TestConv(usource,2,cp,"",NULL,0);
3508 nsrc[2]=0x5555;
3509 TestConv(nsrc,3,cp,"",NULL,0);
3510
3511 for(;i<=0x10FFFF;i++){
3512 if(i==0xD800){
3513 i=0xDFFF;
3514 continue;
3515 }
3516 if(i<=0xFFFF){
3517 usource[0] =(UChar) i;
3518 len=1;
3519 }else{
3520 usource[0]=U16_LEAD(i);
3521 usource[1]=U16_TRAIL(i);
3522 len=2;
3523 }
3524 ulen=len;
3525 if(i==0x80) {
3526 usource[2]=0;
3527 }
3528 /* Test only single code points */
3529 TestConv(usource,ulen,cp,"",NULL,0);
3530 /* Test codepoint repeated twice */
3531 usource[ulen]=usource[0];
3532 usource[ulen+1]=usource[1];
3533 ulen+=len;
3534 TestConv(usource,ulen,cp,"",NULL,0);
3535 /* Test codepoint repeated 3 times */
3536 usource[ulen]=usource[0];
3537 usource[ulen+1]=usource[1];
3538 ulen+=len;
3539 TestConv(usource,ulen,cp,"",NULL,0);
3540 /* Test codepoint in between 2 codepoints */
3541 nsrc[1]=usource[0];
3542 nsrc[2]=usource[1];
3543 nsrc[len+1]=0x5555;
3544 TestConv(nsrc,len+2,cp,"",NULL,0);
3545 uprv_memset(usource,0,sizeof(UChar)*10);
3546 }
3547 }
3548
3549 static void
3550 TestRoundTrippingAllUTF(void){
3551 if(!getTestOption(QUICK_OPTION)){
3552 log_verbose("Running exhaustive round trip test for BOCU-1\n");
3553 TestFullRoundtrip("BOCU-1");
3554 log_verbose("Running exhaustive round trip test for SCSU\n");
3555 TestFullRoundtrip("SCSU");
3556 log_verbose("Running exhaustive round trip test for UTF-8\n");
3557 TestFullRoundtrip("UTF-8");
3558 log_verbose("Running exhaustive round trip test for CESU-8\n");
3559 TestFullRoundtrip("CESU-8");
3560 log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3561 TestFullRoundtrip("UTF-16BE");
3562 log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3563 TestFullRoundtrip("UTF-16LE");
3564 log_verbose("Running exhaustive round trip test for UTF-16\n");
3565 TestFullRoundtrip("UTF-16");
3566 log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3567 TestFullRoundtrip("UTF-32BE");
3568 log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3569 TestFullRoundtrip("UTF-32LE");
3570 log_verbose("Running exhaustive round trip test for UTF-32\n");
3571 TestFullRoundtrip("UTF-32");
3572 log_verbose("Running exhaustive round trip test for UTF-7\n");
3573 TestFullRoundtrip("UTF-7");
3574 log_verbose("Running exhaustive round trip test for UTF-7\n");
3575 TestFullRoundtrip("UTF-7,version=1");
3576 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3577 TestFullRoundtrip("IMAP-mailbox-name");
3578 /*
3579 *
3580 * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of
3581 * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA).
3582 * The old mappings remain as fallbacks.
3583 * This test may be reintroduced at a later time.
3584 *
3585 * 110118 - mow
3586 */
3587 /*
3588 log_verbose("Running exhaustive round trip test for GB18030\n");
3589 TestFullRoundtrip("GB18030");
3590 */
3591 }
3592 }
3593
3594 static void
3595 TestSCSU() {
3596
3597 static const uint16_t germanUTF16[]={
3598 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3599 };
3600
3601 static const uint8_t germanSCSU[]={
3602 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3603 };
3604
3605 static const uint16_t russianUTF16[]={
3606 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3607 };
3608
3609 static const uint8_t russianSCSU[]={
3610 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3611 };
3612
3613 static const uint16_t japaneseUTF16[]={
3614 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3615 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3616 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3617 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3618 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3619 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3620 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3621 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3622 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3623 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3624 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3625 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3626 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3627 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3628 0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3629 };
3630
3631 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3632 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3633 static const uint8_t japaneseSCSU[]={
3634 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3635 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3636 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3637 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3638 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3639 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3640 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3641 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3642 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3643 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3644 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3645 0xcb, 0x82
3646 };
3647
3648 static const uint16_t allFeaturesUTF16[]={
3649 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3650 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3651 0x01df, 0xf000, 0xdbff, 0xdfff
3652 };
3653
3654 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3655 * result here (34B vs. 35B)
3656 */
3657 static const uint8_t allFeaturesSCSU[]={
3658 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3659 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3660 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3661 0xdf, 0x14, 0x80, 0x15, 0xff
3662 };
3663 static const uint16_t monkeyIn[]={
3664 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3665 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3666 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3667 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3668 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3669 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3670 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3671 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3672 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3673 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3674 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3675 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3676 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3677 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3678 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3679 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3680 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3681 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3682 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3683 /* test non-BMP code points */
3684 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3685 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3686 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3687 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3688 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3689 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3690 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3691 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3692 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3693 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3694 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3695
3696
3697 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3698 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3699 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3700 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3701 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3702 };
3703 static const char *fTestCases [] = {
3704 "\\ud800\\udc00", /* smallest surrogate*/
3705 "\\ud8ff\\udcff",
3706 "\\udBff\\udFff", /* largest surrogate pair*/
3707 "\\ud834\\udc00",
3708 "\\U0010FFFF",
3709 "Hello \\u9292 \\u9192 World!",
3710 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3711 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3712
3713 "\\u0648\\u06c8", /* catch missing reset*/
3714 "\\u0648\\u06c8",
3715
3716 "\\u4444\\uE001", /* lowest quotable*/
3717 "\\u4444\\uf2FF", /* highest quotable*/
3718 "\\u4444\\uf188\\u4444",
3719 "\\u4444\\uf188\\uf288",
3720 "\\u4444\\uf188abc\\u0429\\uf288",
3721 "\\u9292\\u2222",
3722 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3723 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3724 "Hello World!123456",
3725 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3726
3727 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/
3728 "abc\\u4411d", /* uses SQU*/
3729 "abc\\u4411\\u4412d",/* uses SCU*/
3730 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3731 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3732 "\\u9292\\u2222",
3733 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3734 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3735 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3736
3737 "", /* empty input*/
3738 "\\u0000", /* smallest BMP character*/
3739 "\\uFFFF", /* largest BMP character*/
3740
3741 /* regression tests*/
3742 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3743 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3744 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3745 "\\u0041\\u00df\\u0401\\u015f",
3746 "\\u9066\\u2123abc",
3747 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3748 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3749 };
3750 int i=0;
3751 for(;i<UPRV_LENGTHOF(fTestCases);i++){
3752 const char* cSrc = fTestCases[i];
3753 UErrorCode status = U_ZERO_ERROR;
3754 int32_t cSrcLen,srcLen;
3755 UChar* src;
3756 /* UConverter* cnv = ucnv_open("SCSU",&status); */
3757 cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]);
3758 src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
3759 srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
3760 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
3761 TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
3762 free(src);
3763 }
3764 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3765 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3766 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3767 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3768 TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
3769 TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
3770 TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
3771 }
3772
3773 #if !UCONFIG_NO_LEGACY_CONVERSION
3774 static void TestJitterbug2346(){
3775 char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3776 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3777 uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3778
3779 UChar uTarget[500]={'\0'};
3780 UChar* utarget=uTarget;
3781 UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
3782
3783 char cTarget[500]={'\0'};
3784 char* ctarget=cTarget;
3785 char* ctargetLimit=cTarget+sizeof(cTarget);
3786 const char* csource=source;
3787 UChar* temp = expected;
3788 UErrorCode err=U_ZERO_ERROR;
3789
3790 UConverter* conv =ucnv_open("ISO_2022_JP",&err);
3791 if(U_FAILURE(err)) {
3792 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
3793 return;
3794 }
3795 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err);
3796 if(U_FAILURE(err)) {
3797 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
3798 return;
3799 }
3800 utargetLimit=utarget;
3801 utarget = uTarget;
3802 while(utarget<utargetLimit){
3803 if(*temp!=*utarget){
3804
3805 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
3806 }
3807 utarget++;
3808 temp++;
3809 }
3810 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
3811 if(U_FAILURE(err)) {
3812 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
3813 return;
3814 }
3815 ctargetLimit=ctarget;
3816 ctarget =cTarget;
3817 ucnv_close(conv);
3818
3819
3820 }
3821
3822 static void
3823 TestISO_2022_JP_1() {
3824 /* test input */
3825 static const uint16_t in[]={
3826 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3827 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3828 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3829 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3830 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3831 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3832 0x201D, 0x000D, 0x000A,
3833 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3834 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3835 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3836 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3837 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3838 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3839 };
3840 const UChar* uSource;
3841 const UChar* uSourceLimit;
3842 const char* cSource;
3843 const char* cSourceLimit;
3844 UChar *uTargetLimit =NULL;
3845 UChar *uTarget;
3846 char *cTarget;
3847 const char *cTargetLimit;
3848 char *cBuf;
3849 UChar *uBuf,*test;
3850 int32_t uBufSize = 120;
3851 UErrorCode errorCode=U_ZERO_ERROR;
3852 UConverter *cnv;
3853
3854 cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3855 if(U_FAILURE(errorCode)) {
3856 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3857 return;
3858 }
3859
3860 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3861 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3862 uSource = (const UChar*)in;
3863 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3864 cTarget = cBuf;
3865 cTargetLimit = cBuf +uBufSize*5;
3866 uTarget = uBuf;
3867 uTargetLimit = uBuf+ uBufSize*5;
3868 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode);
3869 if(U_FAILURE(errorCode)){
3870 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3871 return;
3872 }
3873 cSource = cBuf;
3874 cSourceLimit =cTarget;
3875 test =uBuf;
3876 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode);
3877 if(U_FAILURE(errorCode)){
3878 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3879 return;
3880 }
3881 uSource = (const UChar*)in;
3882 while(uSource<uSourceLimit){
3883 if(*test!=*uSource){
3884
3885 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3886 }
3887 uSource++;
3888 test++;
3889 }
3890 /*ucnv_close(cnv);
3891 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3892 /*Test for the condition where there is an invalid character*/
3893 ucnv_reset(cnv);
3894 {
3895 static const uint8_t source2[]={0x0e,0x24,0x053};
3896 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
3897 }
3898 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3899 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3900 ucnv_close(cnv);
3901 free(uBuf);
3902 free(cBuf);
3903 }
3904
3905 static void
3906 TestISO_2022_JP_2() {
3907 /* test input */
3908 static const uint16_t in[]={
3909 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3910 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3911 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3912 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3913 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3914 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3915 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3916 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3917 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3918 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3919 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3920 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3921 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3922 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3923 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3924 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3925 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3926 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3927 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3928 };
3929 const UChar* uSource;
3930 const UChar* uSourceLimit;
3931 const char* cSource;
3932 const char* cSourceLimit;
3933 UChar *uTargetLimit =NULL;
3934 UChar *uTarget;
3935 char *cTarget;
3936 const char *cTargetLimit;
3937 char *cBuf;
3938 UChar *uBuf,*test;
3939 int32_t uBufSize = 120;
3940 UErrorCode errorCode=U_ZERO_ERROR;
3941 UConverter *cnv;
3942 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3943 int32_t* myOff= offsets;
3944 cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
3945 if(U_FAILURE(errorCode)) {
3946 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3947 return;
3948 }
3949
3950 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3951 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3952 uSource = (const UChar*)in;
3953 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3954 cTarget = cBuf;
3955 cTargetLimit = cBuf +uBufSize*5;
3956 uTarget = uBuf;
3957 uTargetLimit = uBuf+ uBufSize*5;
3958 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3959 if(U_FAILURE(errorCode)){
3960 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3961 return;
3962 }
3963 cSource = cBuf;
3964 cSourceLimit =cTarget;
3965 test =uBuf;
3966 myOff=offsets;
3967 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3968 if(U_FAILURE(errorCode)){
3969 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3970 return;
3971 }
3972 uSource = (const UChar*)in;
3973 while(uSource<uSourceLimit){
3974 if(*test!=*uSource){
3975
3976 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3977 }
3978 uSource++;
3979 test++;
3980 }
3981 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3982 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3983 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3984 /*Test for the condition where there is an invalid character*/
3985 ucnv_reset(cnv);
3986 {
3987 static const uint8_t source2[]={0x0e,0x24,0x053};
3988 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
3989 }
3990 ucnv_close(cnv);
3991 free(uBuf);
3992 free(cBuf);
3993 free(offsets);
3994 }
3995
3996 static void
3997 TestISO_2022_KR() {
3998 /* test input */
3999 static const uint16_t in[]={
4000 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4001 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4002 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4003 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4004 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4005 ,0x53E3,0x53E4,0x000A,0x000D};
4006 const UChar* uSource;
4007 const UChar* uSourceLimit;
4008 const char* cSource;
4009 const char* cSourceLimit;
4010 UChar *uTargetLimit =NULL;
4011 UChar *uTarget;
4012 char *cTarget;
4013 const char *cTargetLimit;
4014 char *cBuf;
4015 UChar *uBuf,*test;
4016 int32_t uBufSize = 120;
4017 UErrorCode errorCode=U_ZERO_ERROR;
4018 UConverter *cnv;
4019 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4020 int32_t* myOff= offsets;
4021 cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
4022 if(U_FAILURE(errorCode)) {
4023 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4024 return;
4025 }
4026
4027 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4028 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4029 uSource = (const UChar*)in;
4030 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4031 cTarget = cBuf;
4032 cTargetLimit = cBuf +uBufSize*5;
4033 uTarget = uBuf;
4034 uTargetLimit = uBuf+ uBufSize*5;
4035 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4036 if(U_FAILURE(errorCode)){
4037 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4038 return;
4039 }
4040 cSource = cBuf;
4041 cSourceLimit =cTarget;
4042 test =uBuf;
4043 myOff=offsets;
4044 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4045 if(U_FAILURE(errorCode)){
4046 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4047 return;
4048 }
4049 uSource = (const UChar*)in;
4050 while(uSource<uSourceLimit){
4051 if(*test!=*uSource){
4052 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4053 }
4054 uSource++;
4055 test++;
4056 }
4057 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4058 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4059 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4060 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4061 TestJitterbug930("csISO2022KR");
4062 /*Test for the condition where there is an invalid character*/
4063 ucnv_reset(cnv);
4064 {
4065 static const uint8_t source2[]={0x1b,0x24,0x053};
4066 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4067 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4068 }
4069 ucnv_close(cnv);
4070 free(uBuf);
4071 free(cBuf);
4072 free(offsets);
4073 }
4074
4075 static void
4076 TestISO_2022_KR_1() {
4077 /* test input */
4078 static const uint16_t in[]={
4079 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4080 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4081 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4082 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4083 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4084 ,0x53E3,0x53E4,0x000A,0x000D};
4085 const UChar* uSource;
4086 const UChar* uSourceLimit;
4087 const char* cSource;
4088 const char* cSourceLimit;
4089 UChar *uTargetLimit =NULL;
4090 UChar *uTarget;
4091 char *cTarget;
4092 const char *cTargetLimit;
4093 char *cBuf;
4094 UChar *uBuf,*test;
4095 int32_t uBufSize = 120;
4096 UErrorCode errorCode=U_ZERO_ERROR;
4097 UConverter *cnv;
4098 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4099 int32_t* myOff= offsets;
4100 cnv=ucnv_open("ibm-25546", &errorCode);
4101 if(U_FAILURE(errorCode)) {
4102 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4103 return;
4104 }
4105
4106 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4107 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4108 uSource = (const UChar*)in;
4109 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4110 cTarget = cBuf;
4111 cTargetLimit = cBuf +uBufSize*5;
4112 uTarget = uBuf;
4113 uTargetLimit = uBuf+ uBufSize*5;
4114 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4115 if(U_FAILURE(errorCode)){
4116 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4117 return;
4118 }
4119 cSource = cBuf;
4120 cSourceLimit =cTarget;
4121 test =uBuf;
4122 myOff=offsets;
4123 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4124 if(U_FAILURE(errorCode)){
4125 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4126 return;
4127 }
4128 uSource = (const UChar*)in;
4129 while(uSource<uSourceLimit){
4130 if(*test!=*uSource){
4131 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4132 }
4133 uSource++;
4134 test++;
4135 }
4136 ucnv_reset(cnv);
4137 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4138 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4139 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4140 ucnv_reset(cnv);
4141 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4142 /*Test for the condition where there is an invalid character*/
4143 ucnv_reset(cnv);
4144 {
4145 static const uint8_t source2[]={0x1b,0x24,0x053};
4146 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4147 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4148 }
4149 ucnv_close(cnv);
4150 free(uBuf);
4151 free(cBuf);
4152 free(offsets);
4153 }
4154
4155 static void TestJitterbug2411(){
4156 static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
4157 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4158 UConverter* kr=NULL, *kr1=NULL;
4159 UErrorCode errorCode = U_ZERO_ERROR;
4160 UChar tgt[100]={'\0'};
4161 UChar* target = tgt;
4162 UChar* targetLimit = target+100;
4163 kr=ucnv_open("iso-2022-kr", &errorCode);
4164 if(U_FAILURE(errorCode)) {
4165 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
4166 return;
4167 }
4168 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4169 if(U_FAILURE(errorCode)) {
4170 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4171 return;
4172 }
4173 kr1 = ucnv_open("ibm-25546", &errorCode);
4174 if(U_FAILURE(errorCode)) {
4175 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
4176 return;
4177 }
4178 target = tgt;
4179 targetLimit = target+100;
4180 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4181
4182 if(U_FAILURE(errorCode)) {
4183 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4184 return;
4185 }
4186
4187 ucnv_close(kr);
4188 ucnv_close(kr1);
4189
4190 }
4191
4192 static void
4193 TestJIS(){
4194 /* From Unicode moved to testdata/conversion.txt */
4195 /*To Unicode*/
4196 {
4197 static const uint8_t sampleTextJIS[] = {
4198 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4199 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4200 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4201 };
4202 static const uint16_t expectedISO2022JIS[] = {
4203 0x0041, 0x0042,
4204 0xFF81, 0xFF82,
4205 0x3000
4206 };
4207 static const int32_t toISO2022JISOffs[]={
4208 3,4,
4209 8,9,
4210 16
4211 };
4212
4213 static const uint8_t sampleTextJIS7[] = {
4214 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4215 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4216 0x1b,0x24,0x42,0x21,0x21,
4217 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */
4218 0x21,0x22,
4219 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4220 };
4221 static const uint16_t expectedISO2022JIS7[] = {
4222 0x0041, 0x0042,
4223 0xFF81, 0xFF82,
4224 0x3000,
4225 0xFF81, 0xFF82,
4226 0x3001,
4227 0x3000
4228 };
4229 static const int32_t toISO2022JIS7Offs[]={
4230 3,4,
4231 8,9,
4232 13,16,
4233 17,
4234 19,27
4235 };
4236 static const uint8_t sampleTextJIS8[] = {
4237 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4238 0xa1,0xc8,0xd9,/*Katakana Set*/
4239 0x1b,0x28,0x42,
4240 0x41,0x42,
4241 0xb1,0xc3, /*Katakana Set*/
4242 0x1b,0x24,0x42,0x21,0x21
4243 };
4244 static const uint16_t expectedISO2022JIS8[] = {
4245 0x0041, 0x0042,
4246 0xff61, 0xff88, 0xff99,
4247 0x0041, 0x0042,
4248 0xff71, 0xff83,
4249 0x3000
4250 };
4251 static const int32_t toISO2022JIS8Offs[]={
4252 3, 4, 5, 6,
4253 7, 11, 12, 13,
4254 14, 18,
4255 };
4256
4257 testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
4258 UPRV_LENGTHOF(expectedISO2022JIS),"JIS", toISO2022JISOffs,TRUE);
4259 testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
4260 UPRV_LENGTHOF(expectedISO2022JIS7),"JIS7", toISO2022JIS7Offs,TRUE);
4261 testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
4262 UPRV_LENGTHOF(expectedISO2022JIS8),"JIS8", toISO2022JIS8Offs,TRUE);
4263 }
4264
4265 }
4266
4267
4268 #if 0
4269 ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
4270
4271 static void TestJitterbug915(){
4272 /* tests for roundtripping of the below sequence
4273 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * /
4274 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4275 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4276 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4277 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4278 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4279 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4280 */
4281 static const char cSource[]={
4282 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4283 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4284 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4285 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4286 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4287 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4288 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4289 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4290 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4291 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4292 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4293 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4294 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4295 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4296 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4297 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4298 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4299 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4300 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4301 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4302 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4303 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4304 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4305 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4306 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4307 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4308 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4309 0x37, 0x20, 0x2A, 0x2F
4310 };
4311 UChar uTarget[500]={'\0'};
4312 UChar* utarget=uTarget;
4313 UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
4314
4315 char cTarget[500]={'\0'};
4316 char* ctarget=cTarget;
4317 char* ctargetLimit=cTarget+sizeof(cTarget);
4318 const char* csource=cSource;
4319 const char* tempSrc = cSource;
4320 UErrorCode err=U_ZERO_ERROR;
4321
4322 UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
4323 if(U_FAILURE(err)) {
4324 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
4325 return;
4326 }
4327 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err);
4328 if(U_FAILURE(err)) {
4329 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
4330 return;
4331 }
4332 utargetLimit=utarget;
4333 utarget = uTarget;
4334 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
4335 if(U_FAILURE(err)) {
4336 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
4337 return;
4338 }
4339 ctargetLimit=ctarget;
4340 ctarget =cTarget;
4341 while(ctarget<ctargetLimit){
4342 if(*ctarget != *tempSrc){
4343 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ;
4344 }
4345 ++ctarget;
4346 ++tempSrc;
4347 }
4348
4349 ucnv_close(conv);
4350 }
4351
4352 static void
4353 TestISO_2022_CN_EXT() {
4354 /* test input */
4355 static const uint16_t in[]={
4356 /* test Non-BMP code points */
4357 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4358 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4359 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4360 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4361 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4362 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4363 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4364 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4365 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4366 0xD869, 0xDED5,
4367
4368 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4369 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4370 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4371 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4372 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4373 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4374 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4375 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4376 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4377 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4378 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4379 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4380 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4381 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4382 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4383 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4384 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4385 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4386
4387 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4388
4389 };
4390
4391 const UChar* uSource;
4392 const UChar* uSourceLimit;
4393 const char* cSource;
4394 const char* cSourceLimit;
4395 UChar *uTargetLimit =NULL;
4396 UChar *uTarget;
4397 char *cTarget;
4398 const char *cTargetLimit;
4399 char *cBuf;
4400 UChar *uBuf,*test;
4401 int32_t uBufSize = 180;
4402 UErrorCode errorCode=U_ZERO_ERROR;
4403 UConverter *cnv;
4404 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4405 int32_t* myOff= offsets;
4406 cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
4407 if(U_FAILURE(errorCode)) {
4408 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4409 return;
4410 }
4411
4412 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4413 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4414 uSource = (const UChar*)in;
4415 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4416 cTarget = cBuf;
4417 cTargetLimit = cBuf +uBufSize*5;
4418 uTarget = uBuf;
4419 uTargetLimit = uBuf+ uBufSize*5;
4420 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4421 if(U_FAILURE(errorCode)){
4422 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4423 return;
4424 }
4425 cSource = cBuf;
4426 cSourceLimit =cTarget;
4427 test =uBuf;
4428 myOff=offsets;
4429 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4430 if(U_FAILURE(errorCode)){
4431 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4432 return;
4433 }
4434 uSource = (const UChar*)in;
4435 while(uSource<uSourceLimit){
4436 if(*test!=*uSource){
4437 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4438 }
4439 else{
4440 log_verbose(" Got: \\u%04X\n",(int)*test) ;
4441 }
4442 uSource++;
4443 test++;
4444 }
4445 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4446 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4447 /*Test for the condition where there is an invalid character*/
4448 ucnv_reset(cnv);
4449 {
4450 static const uint8_t source2[]={0x0e,0x24,0x053};
4451 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
4452 }
4453 ucnv_close(cnv);
4454 free(uBuf);
4455 free(cBuf);
4456 free(offsets);
4457 }
4458 #endif
4459
4460 static void
4461 TestISO_2022_CN() {
4462 /* test input */
4463 static const uint16_t in[]={
4464 /* jitterbug 951 */
4465 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4466 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4467 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4468 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4469 0x0020, 0x0045, 0x004e, 0x0044,
4470 /**/
4471 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4472 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4473 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4474 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4475 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4476 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4477 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4478 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4479 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4480 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4481 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4482 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4483 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4484 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4485 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4486 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4487 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4488
4489 };
4490 const UChar* uSource;
4491 const UChar* uSourceLimit;
4492 const char* cSource;
4493 const char* cSourceLimit;
4494 UChar *uTargetLimit =NULL;
4495 UChar *uTarget;
4496 char *cTarget;
4497 const char *cTargetLimit;
4498 char *cBuf;
4499 UChar *uBuf,*test;
4500 int32_t uBufSize = 180;
4501 UErrorCode errorCode=U_ZERO_ERROR;
4502 UConverter *cnv;
4503 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4504 int32_t* myOff= offsets;
4505 cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
4506 if(U_FAILURE(errorCode)) {
4507 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4508 return;
4509 }
4510
4511 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4512 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4513 uSource = (const UChar*)in;
4514 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4515 cTarget = cBuf;
4516 cTargetLimit = cBuf +uBufSize*5;
4517 uTarget = uBuf;
4518 uTargetLimit = uBuf+ uBufSize*5;
4519 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4520 if(U_FAILURE(errorCode)){
4521 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4522 return;
4523 }
4524 cSource = cBuf;
4525 cSourceLimit =cTarget;
4526 test =uBuf;
4527 myOff=offsets;
4528 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4529 if(U_FAILURE(errorCode)){
4530 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4531 return;
4532 }
4533 uSource = (const UChar*)in;
4534 while(uSource<uSourceLimit){
4535 if(*test!=*uSource){
4536 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4537 }
4538 else{
4539 log_verbose(" Got: \\u%04X\n",(int)*test) ;
4540 }
4541 uSource++;
4542 test++;
4543 }
4544 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
4545 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4546 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4547 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4548 TestJitterbug930("csISO2022CN");
4549 /*Test for the condition where there is an invalid character*/
4550 ucnv_reset(cnv);
4551 {
4552 static const uint8_t source2[]={0x0e,0x24,0x053};
4553 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
4554 }
4555
4556 ucnv_close(cnv);
4557 free(uBuf);
4558 free(cBuf);
4559 free(offsets);
4560 }
4561
4562 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
4563 typedef struct {
4564 const char * converterName;
4565 const char * inputText;
4566 int inputTextLength;
4567 } EmptySegmentTest;
4568
4569 /* Callback for TestJitterbug6175, should only get called for empty segment errors */
4570 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits,
4571 int32_t length, UConverterCallbackReason reason, UErrorCode * err ) {
4572 if (reason > UCNV_IRREGULAR) {
4573 return;
4574 }
4575 if (reason != UCNV_IRREGULAR) {
4576 log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
4577 }
4578 /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
4579 *err = U_ZERO_ERROR;
4580 ucnv_cbToUWriteSub(toArgs,0,err);
4581 }
4582
4583 enum { kEmptySegmentToUCharsMax = 64 };
4584 static void TestJitterbug6175(void) {
4585 static const char iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
4586 static const char iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
4587 static const char iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
4588 static const char iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
4589 static const char hzGB2312_a[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
4590 static const EmptySegmentTest emptySegmentTests[] = {
4591 /* converterName inputText inputTextLength */
4592 { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) },
4593 { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) },
4594 { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) },
4595 { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) },
4596 { "HZ-GB-2312", hzGB2312_a, sizeof(hzGB2312_a) },
4597 /* terminator: */
4598 { NULL, NULL, 0, }
4599 };
4600 const EmptySegmentTest * testPtr;
4601 for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) {
4602 UErrorCode err = U_ZERO_ERROR;
4603 UConverter * cnv = ucnv_open(testPtr->converterName, &err);
4604 if (U_FAILURE(err)) {
4605 log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err));
4606 return;
4607 }
4608 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err);
4609 if (U_FAILURE(err)) {
4610 log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err));
4611 ucnv_close(cnv);
4612 return;
4613 }
4614 {
4615 UChar toUChars[kEmptySegmentToUCharsMax];
4616 UChar * toUCharsPtr = toUChars;
4617 const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax;
4618 const char * inCharsPtr = testPtr->inputText;
4619 const char * inCharsLimit = inCharsPtr + testPtr->inputTextLength;
4620 ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err);
4621 }
4622 ucnv_close(cnv);
4623 }
4624 }
4625
4626 static void
4627 TestEBCDIC_STATEFUL() {
4628 /* test input */
4629 static const uint8_t in[]={
4630 0x61,
4631 0x1a,
4632 0x0f, 0x4b,
4633 0x42,
4634 0x40,
4635 0x36,
4636 };
4637
4638 /* expected test results */
4639 static const int32_t results[]={
4640 /* number of bytes read, code point */
4641 1, 0x002f,
4642 1, 0x0092,
4643 2, 0x002e,
4644 1, 0xff62,
4645 1, 0x0020,
4646 1, 0x0096,
4647
4648 };
4649 static const uint8_t in2[]={
4650 0x0f,
4651 0xa1,
4652 0x01
4653 };
4654
4655 /* expected test results */
4656 static const int32_t results2[]={
4657 /* number of bytes read, code point */
4658 2, 0x203E,
4659 1, 0x0001,
4660 };
4661
4662 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
4663 UErrorCode errorCode=U_ZERO_ERROR;
4664 UConverter *cnv=ucnv_open("ibm-930", &errorCode);
4665 if(U_FAILURE(errorCode)) {
4666 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
4667 return;
4668 }
4669 TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
4670 ucnv_reset(cnv);
4671 /* Test the condition when source >= sourceLimit */
4672 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
4673 ucnv_reset(cnv);
4674 /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4675 {
4676 static const uint8_t source1[]={0x0f};
4677 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
4678 }
4679 /*Test for the condition where there is an invalid character*/
4680 ucnv_reset(cnv);
4681 {
4682 static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
4683 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
4684 }
4685 ucnv_reset(cnv);
4686 source=(const char*)in2;
4687 limit=(const char*)in2+sizeof(in2);
4688 TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
4689 ucnv_close(cnv);
4690
4691 }
4692
4693 static void
4694 TestGB18030() {
4695 /* test input */
4696 static const uint8_t in[]={
4697 0x24,
4698 0x7f,
4699 0x81, 0x30, 0x81, 0x30,
4700 0xa8, 0xbf,
4701 0xa2, 0xe3,
4702 0xd2, 0xbb,
4703 0x82, 0x35, 0x8f, 0x33,
4704 0x84, 0x31, 0xa4, 0x39,
4705 0x90, 0x30, 0x81, 0x30,
4706 0xe3, 0x32, 0x9a, 0x35
4707 #if 0
4708 /*
4709 * Feature removed markus 2000-oct-26
4710 * Only some codepages must match surrogate pairs into supplementary code points -
4711 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4712 * GB 18030 provides direct encodings for supplementary code points, therefore
4713 * it must not combine two single-encoded surrogates into one code point.
4714 */
4715 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4716 #endif
4717 };
4718
4719 /* expected test results */
4720 static const int32_t results[]={
4721 /* number of bytes read, code point */
4722 1, 0x24,
4723 1, 0x7f,
4724 4, 0x80,
4725 2, 0x1f9,
4726 2, 0x20ac,
4727 2, 0x4e00,
4728 4, 0x9fa6,
4729 4, 0xffff,
4730 4, 0x10000,
4731 4, 0x10ffff
4732 #if 0
4733 /* Feature removed. See comment above. */
4734 8, 0x10000
4735 #endif
4736 };
4737
4738 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4739 UErrorCode errorCode=U_ZERO_ERROR;
4740 UConverter *cnv=ucnv_open("gb18030", &errorCode);
4741 if(U_FAILURE(errorCode)) {
4742 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
4743 return;
4744 }
4745 TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
4746 ucnv_close(cnv);
4747 }
4748
4749 static void
4750 TestLMBCS() {
4751 /* LMBCS-1 string */
4752 static const uint8_t pszLMBCS[]={
4753 0x61,
4754 0x01, 0x29,
4755 0x81,
4756 0xA0,
4757 0x0F, 0x27,
4758 0x0F, 0x91,
4759 0x14, 0x0a, 0x74,
4760 0x14, 0xF6, 0x02,
4761 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4762 0x10, 0x88, 0xA0,
4763 };
4764
4765 /* Unicode UChar32 equivalents */
4766 static const UChar32 pszUnicode32[]={
4767 /* code point */
4768 0x00000061,
4769 0x00002013,
4770 0x000000FC,
4771 0x000000E1,
4772 0x00000007,
4773 0x00000091,
4774 0x00000a74,
4775 0x00000200,
4776 0x00023456, /* code point for surrogate pair */
4777 0x00005516
4778 };
4779
4780 /* Unicode UChar equivalents */
4781 static const UChar pszUnicode[]={
4782 /* code point */
4783 0x0061,
4784 0x2013,
4785 0x00FC,
4786 0x00E1,
4787 0x0007,
4788 0x0091,
4789 0x0a74,
4790 0x0200,
4791 0xD84D, /* low surrogate */
4792 0xDC56, /* high surrogate */
4793 0x5516
4794 };
4795
4796 /* expected test results */
4797 static const int offsets32[]={
4798 /* number of bytes read, code point */
4799 0,
4800 1,
4801 3,
4802 4,
4803 5,
4804 7,
4805 9,
4806 12,
4807 15,
4808 21,
4809 24
4810 };
4811
4812 /* expected test results */
4813 static const int offsets[]={
4814 /* number of bytes read, code point */
4815 0,
4816 1,
4817 3,
4818 4,
4819 5,
4820 7,
4821 9,
4822 12,
4823 15,
4824 18,
4825 21,
4826 24
4827 };
4828
4829
4830 UConverter *cnv;
4831
4832 #define NAME_LMBCS_1 "LMBCS-1"
4833 #define NAME_LMBCS_2 "LMBCS-2"
4834
4835
4836 /* Some basic open/close/property tests on some LMBCS converters */
4837 {
4838
4839 char expected_subchars[] = {0x3F}; /* ANSI Question Mark */
4840 char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/
4841 char get_subchars [1];
4842 const char * get_name;
4843 UConverter *cnv1;
4844 UConverter *cnv2;
4845
4846 int8_t len = sizeof(get_subchars);
4847
4848 UErrorCode errorCode=U_ZERO_ERROR;
4849
4850 /* Open */
4851 cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
4852 if(U_FAILURE(errorCode)) {
4853 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4854 return;
4855 }
4856 cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
4857 if(U_FAILURE(errorCode)) {
4858 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
4859 return;
4860 }
4861
4862 /* Name */
4863 get_name = ucnv_getName (cnv1, &errorCode);
4864 if (strcmp(NAME_LMBCS_1,get_name)){
4865 log_err("Unexpected converter name: %s\n", get_name);
4866 }
4867 get_name = ucnv_getName (cnv2, &errorCode);
4868 if (strcmp(NAME_LMBCS_2,get_name)){
4869 log_err("Unexpected converter name: %s\n", get_name);
4870 }
4871
4872 /* substitution chars */
4873 ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
4874 if(U_FAILURE(errorCode)) {
4875 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4876 }
4877 if (len!=1){
4878 log_err("Unexpected length of sub chars\n");
4879 }
4880 if (get_subchars[0] != expected_subchars[0]){
4881 log_err("Unexpected value of sub chars\n");
4882 }
4883 ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
4884 if(U_FAILURE(errorCode)) {
4885 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
4886 }
4887 ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
4888 if(U_FAILURE(errorCode)) {
4889 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4890 }
4891 if (len!=1){
4892 log_err("Unexpected length of sub chars\n");
4893 }
4894 if (get_subchars[0] != new_subchars[0]){
4895 log_err("Unexpected value of sub chars\n");
4896 }
4897 ucnv_close(cnv1);
4898 ucnv_close(cnv2);
4899
4900 }
4901
4902 /* LMBCS to Unicode - offsets */
4903 {
4904 UErrorCode errorCode=U_ZERO_ERROR;
4905
4906 const char * pSource = (const char *)pszLMBCS;
4907 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
4908
4909 UChar Out [sizeof(pszUnicode) + 1];
4910 UChar * pOut = Out;
4911 UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode);
4912
4913 int32_t off [sizeof(offsets)];
4914
4915 /* last 'offset' in expected results is just the final size.
4916 (Makes other tests easier). Compensate here: */
4917
4918 off[UPRV_LENGTHOF(offsets)-1] = sizeof(pszLMBCS);
4919
4920
4921
4922 cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */
4923 if(U_FAILURE(errorCode)) {
4924 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
4925 return;
4926 }
4927
4928
4929
4930 ucnv_toUnicode (cnv,
4931 &pOut,
4932 OutLimit,
4933 &pSource,
4934 sourceLimit,
4935 off,
4936 TRUE,
4937 &errorCode);
4938
4939
4940 if (memcmp(off,offsets,sizeof(offsets)))
4941 {
4942 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4943 }
4944 if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
4945 {
4946 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4947 }
4948 ucnv_close(cnv);
4949 }
4950 {
4951 /* LMBCS to Unicode - getNextUChar */
4952 const char * sourceStart;
4953 const char *source=(const char *)pszLMBCS;
4954 const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
4955 const UChar32 *results= pszUnicode32;
4956 const int *off = offsets32;
4957
4958 UErrorCode errorCode=U_ZERO_ERROR;
4959 UChar32 uniChar;
4960
4961 cnv=ucnv_open("LMBCS-1", &errorCode);
4962 if(U_FAILURE(errorCode)) {
4963 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4964 return;
4965 }
4966 else
4967 {
4968
4969 while(source<limit) {
4970 sourceStart=source;
4971 uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
4972 if(U_FAILURE(errorCode)) {
4973 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
4974 break;
4975 } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
4976 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
4977 uniChar, (source-sourceStart), *results, *off);
4978 break;
4979 }
4980 results++;
4981 off++;
4982 }
4983 }
4984 ucnv_close(cnv);
4985 }
4986 { /* test locale & optimization group operations: Unicode to LMBCS */
4987
4988 UErrorCode errorCode=U_ZERO_ERROR;
4989 UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
4990 UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
4991 UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
4992 UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */
4993 const UChar * pUniOut = uniString;
4994 UChar * pUniIn = uniString;
4995 uint8_t lmbcsString [4];
4996 const char * pLMBCSOut = (const char *)lmbcsString;
4997 char * pLMBCSIn = (char *)lmbcsString;
4998
4999 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
5000 ucnv_fromUnicode (cnv16he,
5001 &pLMBCSIn, (pLMBCSIn + UPRV_LENGTHOF(lmbcsString)),
5002 &pUniOut, pUniOut + UPRV_LENGTHOF(uniString),
5003 NULL, 1, &errorCode);
5004
5005 if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
5006 {
5007 log_err("LMBCS-16,locale=he gives unexpected translation\n");
5008 }
5009
5010 pLMBCSIn= (char *)lmbcsString;
5011 pUniOut = uniString;
5012 ucnv_fromUnicode (cnv01us,
5013 &pLMBCSIn, (const char *)(lmbcsString + UPRV_LENGTHOF(lmbcsString)),
5014 &pUniOut, pUniOut + UPRV_LENGTHOF(uniString),
5015 NULL, 1, &errorCode);
5016
5017 if (lmbcsString[0] != 0x9F)
5018 {
5019 log_err("LMBCS-1,locale=US gives unexpected translation\n");
5020 }
5021
5022 /* single byte char from mbcs char set */
5023 lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */
5024 pLMBCSOut = (const char *)lmbcsString;
5025 pUniIn = uniString;
5026 ucnv_toUnicode (cnv16jp,
5027 &pUniIn, pUniIn + 1,
5028 &pLMBCSOut, (pLMBCSOut + 1),
5029 NULL, 1, &errorCode);
5030 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5031 {
5032 log_err("Unexpected results from LMBCS-16 single byte char\n");
5033 }
5034 /* convert to group 1: should be 3 bytes */
5035 pLMBCSIn = (char *)lmbcsString;
5036 pUniOut = uniString;
5037 ucnv_fromUnicode (cnv01us,
5038 &pLMBCSIn, (const char *)(pLMBCSIn + 3),
5039 &pUniOut, pUniOut + 1,
5040 NULL, 1, &errorCode);
5041 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1
5042 || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
5043 {
5044 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
5045 }
5046 pLMBCSOut = (const char *)lmbcsString;
5047 pUniIn = uniString;
5048 ucnv_toUnicode (cnv01us,
5049 &pUniIn, pUniIn + 1,
5050 &pLMBCSOut, (const char *)(pLMBCSOut + 3),
5051 NULL, 1, &errorCode);
5052 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5053 {
5054 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
5055 }
5056 pLMBCSIn = (char *)lmbcsString;
5057 pUniOut = uniString;
5058 ucnv_fromUnicode (cnv16jp,
5059 &pLMBCSIn, (const char *)(pLMBCSIn + 1),
5060 &pUniOut, pUniOut + 1,
5061 NULL, 1, &errorCode);
5062 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
5063 {
5064 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
5065 }
5066 ucnv_close(cnv16he);
5067 ucnv_close(cnv16jp);
5068 ucnv_close(cnv01us);
5069 }
5070 {
5071 /* Small source buffer testing, LMBCS -> Unicode */
5072
5073 UErrorCode errorCode=U_ZERO_ERROR;
5074
5075 const char * pSource = (const char *)pszLMBCS;
5076 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
5077 int codepointCount = 0;
5078
5079 UChar Out [sizeof(pszUnicode) + 1];
5080 UChar * pOut = Out;
5081 UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode);
5082
5083
5084 cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
5085 if(U_FAILURE(errorCode)) {
5086 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
5087 return;
5088 }
5089
5090
5091 while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
5092 {
5093 ucnv_toUnicode (cnv,
5094 &pOut,
5095 OutLimit,
5096 &pSource,
5097 (pSource+1), /* claim that this is a 1- byte buffer */
5098 NULL,
5099 FALSE, /* FALSE means there might be more chars in the next buffer */
5100 &errorCode);
5101
5102 if (U_SUCCESS (errorCode))
5103 {
5104 if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1])
5105 {
5106 /* we are on to the next code point: check value */
5107
5108 if (Out[0] != pszUnicode[codepointCount]){
5109 log_err("LMBCS->Uni result %lx should have been %lx \n",
5110 Out[0], pszUnicode[codepointCount]);
5111 }
5112
5113 pOut = Out; /* reset for accumulating next code point */
5114 codepointCount++;
5115 }
5116 }
5117 else
5118 {
5119 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
5120 }
5121 }
5122 {
5123 /* limits & surrogate error testing */
5124 char LIn [sizeof(pszLMBCS)];
5125 const char * pLIn = LIn;
5126
5127 char LOut [sizeof(pszLMBCS)];
5128 char * pLOut = LOut;
5129
5130 UChar UOut [sizeof(pszUnicode)];
5131 UChar * pUOut = UOut;
5132
5133 UChar UIn [sizeof(pszUnicode)];
5134 const UChar * pUIn = UIn;
5135
5136 int32_t off [sizeof(offsets)];
5137 UChar32 uniChar;
5138
5139 errorCode=U_ZERO_ERROR;
5140
5141 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
5142 pUIn++;
5143 ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode);
5144 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5145 {
5146 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
5147 }
5148 pUIn--;
5149
5150 errorCode=U_ZERO_ERROR;
5151 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode);
5152 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5153 {
5154 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
5155 }
5156 errorCode=U_ZERO_ERROR;
5157
5158 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
5159 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5160 {
5161 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
5162 }
5163 errorCode=U_ZERO_ERROR;
5164
5165 /* 0 byte source request - no error, no pointer movement */
5166 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode);
5167 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode);
5168 if(U_FAILURE(errorCode)) {
5169 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
5170 }
5171 if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
5172 {
5173 log_err("Unexpected pointer move in 0 byte source request \n");
5174 }
5175 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5176 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
5177 if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
5178 {
5179 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
5180 }
5181 if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5182 {
5183 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5184 }
5185 errorCode = U_ZERO_ERROR;
5186
5187 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5188
5189 pUIn = pszUnicode;
5190 ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+UPRV_LENGTHOF(pszUnicode),off,FALSE, &errorCode);
5191 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
5192 {
5193 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5194 }
5195
5196 errorCode = U_ZERO_ERROR;
5197
5198 pLIn = (const char *)pszLMBCS;
5199 ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode);
5200 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4])
5201 {
5202 log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5203 }
5204
5205 /* unpaired or chopped LMBCS surrogates */
5206
5207 /* OK high surrogate, Low surrogate is chopped */
5208 LIn [0] = (char)0x14;
5209 LIn [1] = (char)0xD8;
5210 LIn [2] = (char)0x01;
5211 LIn [3] = (char)0x14;
5212 LIn [4] = (char)0xDC;
5213 pLIn = LIn;
5214 errorCode = U_ZERO_ERROR;
5215 pUOut = UOut;
5216
5217 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
5218 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5219 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5220 {
5221 log_err("Unexpected results on chopped low surrogate\n");
5222 }
5223
5224 /* chopped at surrogate boundary */
5225 LIn [0] = (char)0x14;
5226 LIn [1] = (char)0xD8;
5227 LIn [2] = (char)0x01;
5228 pLIn = LIn;
5229 errorCode = U_ZERO_ERROR;
5230 pUOut = UOut;
5231
5232 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode);
5233 if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
5234 {
5235 log_err("Unexpected results on chopped at surrogate boundary \n");
5236 }
5237
5238 /* unpaired surrogate plus valid Unichar */
5239 LIn [0] = (char)0x14;
5240 LIn [1] = (char)0xD8;
5241 LIn [2] = (char)0x01;
5242 LIn [3] = (char)0x14;
5243 LIn [4] = (char)0xC9;
5244 LIn [5] = (char)0xD0;
5245 pLIn = LIn;
5246 errorCode = U_ZERO_ERROR;
5247 pUOut = UOut;
5248
5249 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode);
5250 if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
5251 {
5252 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5253 }
5254
5255 /* unpaired surrogate plus chopped Unichar */
5256 LIn [0] = (char)0x14;
5257 LIn [1] = (char)0xD8;
5258 LIn [2] = (char)0x01;
5259 LIn [3] = (char)0x14;
5260 LIn [4] = (char)0xC9;
5261
5262 pLIn = LIn;
5263 errorCode = U_ZERO_ERROR;
5264 pUOut = UOut;
5265
5266 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5267 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5268 {
5269 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5270 }
5271
5272 /* unpaired surrogate plus valid non-Unichar */
5273 LIn [0] = (char)0x14;
5274 LIn [1] = (char)0xD8;
5275 LIn [2] = (char)0x01;
5276 LIn [3] = (char)0x0F;
5277 LIn [4] = (char)0x3B;
5278
5279 pLIn = LIn;
5280 errorCode = U_ZERO_ERROR;
5281 pUOut = UOut;
5282
5283 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5284 if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
5285 {
5286 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5287 }
5288
5289 /* unpaired surrogate plus chopped non-Unichar */
5290 LIn [0] = (char)0x14;
5291 LIn [1] = (char)0xD8;
5292 LIn [2] = (char)0x01;
5293 LIn [3] = (char)0x0F;
5294
5295 pLIn = LIn;
5296 errorCode = U_ZERO_ERROR;
5297 pUOut = UOut;
5298
5299 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode);
5300
5301 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
5302 {
5303 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5304 }
5305 }
5306 }
5307 ucnv_close(cnv); /* final cleanup */
5308 }
5309
5310
5311 static void TestJitterbug255()
5312 {
5313 static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5314 const char *testBuffer = (const char *)testBytes;
5315 const char *testEnd = (const char *)testBytes + sizeof(testBytes);
5316 UErrorCode status = U_ZERO_ERROR;
5317 /*UChar32 result;*/
5318 UConverter *cnv = 0;
5319
5320 cnv = ucnv_open("shift-jis", &status);
5321 if (U_FAILURE(status) || cnv == 0) {
5322 log_data_err("Failed to open the converter for SJIS.\n");
5323 return;
5324 }
5325 while (testBuffer != testEnd)
5326 {
5327 /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status);
5328 if (U_FAILURE(status))
5329 {
5330 log_err("Failed to convert the next UChar for SJIS.\n");
5331 break;
5332 }
5333 }
5334 ucnv_close(cnv);
5335 }
5336
5337 static void TestEBCDICUS4XML()
5338 {
5339 UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
5340 static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
5341 static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
5342 static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
5343 char target_x[] = {0x00, 0x00, 0x00, 0x00};
5344 UChar *unicodes = unicodes_x;
5345 const UChar *toUnicodeMaps = toUnicodeMaps_x;
5346 char *target = target_x;
5347 const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
5348 UErrorCode status = U_ZERO_ERROR;
5349 UConverter *cnv = 0;
5350
5351 cnv = ucnv_open("ebcdic-xml-us", &status);
5352 if (U_FAILURE(status) || cnv == 0) {
5353 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5354 return;
5355 }
5356 ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status);
5357 if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
5358 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5359 u_errorName(status));
5360 printUSeqErr(unicodes_x, 3);
5361 printUSeqErr(toUnicodeMaps, 3);
5362 }
5363 status = U_ZERO_ERROR;
5364 ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status);
5365 if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
5366 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5367 u_errorName(status));
5368 printSeqErr((const unsigned char*)target_x, 3);
5369 printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
5370 }
5371 ucnv_close(cnv);
5372 }
5373 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
5374
5375 #if !UCONFIG_NO_COLLATION
5376
5377 static void TestJitterbug981(){
5378 const UChar* rules;
5379 int32_t rules_length, target_cap, bytes_needed, buff_size;
5380 UErrorCode status = U_ZERO_ERROR;
5381 UConverter *utf8cnv;
5382 UCollator* myCollator;
5383 char *buff;
5384 int numNeeded=0;
5385 utf8cnv = ucnv_open ("utf8", &status);
5386 if(U_FAILURE(status)){
5387 log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status));
5388 return;
5389 }
5390 myCollator = ucol_open("zh", &status);
5391 if(U_FAILURE(status)){
5392 log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status));
5393 ucnv_close(utf8cnv);
5394 return;
5395 }
5396
5397 rules = ucol_getRules(myCollator, &rules_length);
5398 if(rules_length == 0) {
5399 log_data_err("missing zh tailoring rule string\n");
5400 ucol_close(myCollator);
5401 ucnv_close(utf8cnv);
5402 return;
5403 }
5404 buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv);
5405 buff = malloc(buff_size);
5406
5407 target_cap = 0;
5408 do {
5409 ucnv_reset(utf8cnv);
5410 status = U_ZERO_ERROR;
5411 if(target_cap >= buff_size) {
5412 log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
5413 break;
5414 }
5415 bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
5416 rules, rules_length, &status);
5417 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5418 if(numNeeded!=0 && numNeeded!= bytes_needed){
5419 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5420 break;
5421 }
5422 numNeeded = bytes_needed;
5423 } while (status == U_BUFFER_OVERFLOW_ERROR);
5424 ucol_close(myCollator);
5425 ucnv_close(utf8cnv);
5426 free(buff);
5427 }
5428
5429 #endif
5430
5431 #if !UCONFIG_NO_LEGACY_CONVERSION
5432 static void TestJitterbug1293(){
5433 static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5434 char target[256];
5435 UErrorCode status = U_ZERO_ERROR;
5436 UConverter* conv=NULL;
5437 int32_t target_cap, bytes_needed, numNeeded = 0;
5438 conv = ucnv_open("shift-jis",&status);
5439 if(U_FAILURE(status)){
5440 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status));
5441 return;
5442 }
5443
5444 do{
5445 target_cap =0;
5446 bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status);
5447 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5448 if(numNeeded!=0 && numNeeded!= bytes_needed){
5449 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5450 }
5451 numNeeded = bytes_needed;
5452 } while (status == U_BUFFER_OVERFLOW_ERROR);
5453 if(U_FAILURE(status)){
5454 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status));
5455 return;
5456 }
5457 ucnv_close(conv);
5458 }
5459 #endif
5460
5461 static void TestJB5275_1(){
5462
5463 static const char* data = "\x3B\xB3\x0A" /* Easy characters */
5464 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5465 /* Switch script: */
5466 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */
5467 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/
5468 "\xEF\x40\x3B\xB3\x0A";
5469 static const UChar expected[] ={
5470 0x003b, 0x0a15, 0x000a, /* Easy characters */
5471 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */
5472 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/
5473 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/
5474 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/
5475 };
5476
5477 UErrorCode status = U_ZERO_ERROR;
5478 UConverter* conv = ucnv_open("iscii-gur", &status);
5479 UChar dest[100] = {'\0'};
5480 UChar* target = dest;
5481 UChar* targetLimit = dest+100;
5482 const char* source = data;
5483 const char* sourceLimit = data+strlen(data);
5484 const UChar* exp = expected;
5485
5486 if (U_FAILURE(status)) {
5487 log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status));
5488 return;
5489 }
5490
5491 log_verbose("Testing switching back to default script when new line is encountered.\n");
5492 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5493 if(U_FAILURE(status)){
5494 log_err("conversion failed: %s \n", u_errorName(status));
5495 }
5496 targetLimit = target;
5497 target = dest;
5498 printUSeq(target, targetLimit-target);
5499 while(target<targetLimit){
5500 if(*exp!=*target){
5501 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5502 }
5503 target++;
5504 exp++;
5505 }
5506 ucnv_close(conv);
5507 }
5508
5509 static void TestJB5275(){
5510 static const char* data =
5511 /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41 */
5512 /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41 */
5513 /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsupported sequence \xEF\x41 */
5514 "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5515 "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */
5516 "\xEF\x48\x38\xB3\x0A" /* Kannada test */
5517 "\xEF\x49\x39\xB3\x0A" /* Malayalam test */
5518 "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */
5519 "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */
5520 /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */;
5521 static const UChar expected[] ={
5522 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */
5523 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati test */
5524 0x0038, 0x0C95, 0x000A, /* Kannada test */
5525 0x0039, 0x0D15, 0x000A, /* Malayalam test */
5526 0x003A, 0x0A95, 0x000A, /* Gujarati test */
5527 0x003B, 0x0A15, 0x000A, /* Punjabi test */
5528 };
5529
5530 UErrorCode status = U_ZERO_ERROR;
5531 UConverter* conv = ucnv_open("iscii", &status);
5532 UChar dest[100] = {'\0'};
5533 UChar* target = dest;
5534 UChar* targetLimit = dest+100;
5535 const char* source = data;
5536 const char* sourceLimit = data+strlen(data);
5537 const UChar* exp = expected;
5538 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5539 if(U_FAILURE(status)){
5540 log_data_err("conversion failed: %s \n", u_errorName(status));
5541 }
5542 targetLimit = target;
5543 target = dest;
5544
5545 printUSeq(target, targetLimit-target);
5546
5547 while(target<targetLimit){
5548 if(*exp!=*target){
5549 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5550 }
5551 target++;
5552 exp++;
5553 }
5554 ucnv_close(conv);
5555 }
5556
5557 static void
5558 TestIsFixedWidth() {
5559 UErrorCode status = U_ZERO_ERROR;
5560 UConverter *cnv = NULL;
5561 int32_t i;
5562
5563 const char *fixedWidth[] = {
5564 "US-ASCII",
5565 "UTF32",
5566 "ibm-5478_P100-1995"
5567 };
5568
5569 const char *notFixedWidth[] = {
5570 "GB18030",
5571 "UTF8",
5572 "windows-949-2000",
5573 "UTF16"
5574 };
5575
5576 for (i = 0; i < UPRV_LENGTHOF(fixedWidth); i++) {
5577 cnv = ucnv_open(fixedWidth[i], &status);
5578 if (cnv == NULL || U_FAILURE(status)) {
5579 log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_errorName(status));
5580 continue;
5581 }
5582
5583 if (!ucnv_isFixedWidth(cnv, &status)) {
5584 log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedWidth[i]);
5585 }
5586 ucnv_close(cnv);
5587 }
5588
5589 for (i = 0; i < UPRV_LENGTHOF(notFixedWidth); i++) {
5590 cnv = ucnv_open(notFixedWidth[i], &status);
5591 if (cnv == NULL || U_FAILURE(status)) {
5592 log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u_errorName(status));
5593 continue;
5594 }
5595
5596 if (ucnv_isFixedWidth(cnv, &status)) {
5597 log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", notFixedWidth[i]);
5598 }
5599 ucnv_close(cnv);
5600 }
5601 }