]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/nucnvtst.c
ICU-64252.0.1.tar.gz
[apple/icu.git] / icuSources / test / cintltst / nucnvtst.c
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/********************************************************************
4 * COPYRIGHT:
2ca993e8 5 * Copyright (c) 1997-2016, International Business Machines Corporation and
b75a7d8f
A
6 * others. All Rights Reserved.
7 ********************************************************************/
73c04bcf 8/*******************************************************************************
b75a7d8f 9*
b331163b 10* File nucnvtst.c
b75a7d8f
A
11*
12* Modification History:
13* Name Description
14* Steven R. Loomis 7/8/1999 Adding input buffer test
73c04bcf 15********************************************************************************
b75a7d8f
A
16*/
17#include <stdio.h>
18#include "cstring.h"
19#include "unicode/uloc.h"
20#include "unicode/ucnv.h"
21#include "unicode/ucnv_err.h"
46f4442e 22#include "unicode/ucnv_cb.h"
b75a7d8f
A
23#include "cintltst.h"
24#include "unicode/utypes.h"
25#include "unicode/ustring.h"
26#include "unicode/ucol.h"
4388f060 27#include "unicode/utf16.h"
b75a7d8f 28#include "cmemory.h"
729e4ab9 29#include "nucnvtst.h"
b75a7d8f 30
374ca955 31static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
b75a7d8f
A
32static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
33#if !UCONFIG_NO_COLLATION
34static void TestJitterbug981(void);
35#endif
51004dcb 36#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f 37static void TestJitterbug1293(void);
51004dcb 38#endif
b75a7d8f
A
39static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
40static void TestConverterTypesAndStarters(void);
41static void TestAmbiguous(void);
42static void TestSignatureDetection(void);
43static void TestUTF7(void);
44static void TestIMAP(void);
45static void TestUTF8(void);
46static void TestCESU8(void);
47static void TestUTF16(void);
48static void TestUTF16BE(void);
49static void TestUTF16LE(void);
50static void TestUTF32(void);
51static void TestUTF32BE(void);
52static void TestUTF32LE(void);
53static void TestLATIN1(void);
73c04bcf
A
54
55#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
56static void TestSBCS(void);
57static void TestDBCS(void);
58static void TestMBCS(void);
729e4ab9
A
59#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
60static void TestICCRunout(void);
61#endif
73c04bcf 62
374ca955 63#ifdef U_ENABLE_GENERIC_ISO_2022
b75a7d8f 64static void TestISO_2022(void);
374ca955 65#endif
73c04bcf 66
b75a7d8f
A
67static void TestISO_2022_JP(void);
68static void TestISO_2022_JP_1(void);
69static void TestISO_2022_JP_2(void);
70static void TestISO_2022_KR(void);
71static void TestISO_2022_KR_1(void);
72static void TestISO_2022_CN(void);
729e4ab9
A
73#if 0
74 /*
75 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
76 */
b75a7d8f 77static void TestISO_2022_CN_EXT(void);
729e4ab9 78#endif
b75a7d8f
A
79static void TestJIS(void);
80static void TestHZ(void);
73c04bcf
A
81#endif
82
b75a7d8f 83static void TestSCSU(void);
73c04bcf
A
84
85#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
86static void TestEBCDIC_STATEFUL(void);
87static void TestGB18030(void);
88static void TestLMBCS(void);
89static void TestJitterbug255(void);
90static void TestEBCDICUS4XML(void);
729e4ab9
A
91#if 0
92 /*
93 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
94 */
b75a7d8f 95static void TestJitterbug915(void);
729e4ab9 96#endif
b75a7d8f 97static void TestISCII(void);
73c04bcf
A
98
99static void TestCoverageMBCS(void);
100static void TestJitterbug2346(void);
101static void TestJitterbug2411(void);
46f4442e
A
102static void TestJB5275(void);
103static void TestJB5275_1(void);
d5d484b0 104static void TestJitterbug6175(void);
4388f060
A
105
106static void TestIsFixedWidth(void);
73c04bcf
A
107#endif
108
729e4ab9
A
109static void TestInBufSizes(void);
110
73c04bcf 111static void TestRoundTrippingAllUTF(void);
b75a7d8f
A
112static void TestConv(const uint16_t in[],
113 int len,
114 const char* conv,
115 const char* lang,
116 char byteArr[],
117 int byteArrLen);
b75a7d8f
A
118
119/* open a converter, using test data if it begins with '@' */
120static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
121
122
123#define NEW_MAX_BUFFER 999
124
125static int32_t gInBufferSize = NEW_MAX_BUFFER;
126static int32_t gOutBufferSize = NEW_MAX_BUFFER;
127static char gNuConvTestName[1024];
128
129#define nct_min(x,y) ((x<y) ? x : y)
130
131static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
132{
133 if(cnv && cnv[0] == '@') {
374ca955 134 return ucnv_openPackage(loadTestData(err), cnv+1, err);
b75a7d8f
A
135 } else {
136 return ucnv_open(cnv, err);
137 }
138}
139
140static void printSeq(const unsigned char* a, int len)
141{
142 int i=0;
143 log_verbose("{");
144 while (i<len)
145 log_verbose("0x%02x ", a[i++]);
146 log_verbose("}\n");
147}
148
149static void printUSeq(const UChar* a, int len)
150{
151 int i=0;
152 log_verbose("{U+");
153 while (i<len) log_verbose("0x%04x ", a[i++]);
154 log_verbose("}\n");
155}
156
157static void printSeqErr(const unsigned char* a, int len)
158{
159 int i=0;
160 fprintf(stderr, "{");
161 while (i<len)
162 fprintf(stderr, "0x%02x ", a[i++]);
163 fprintf(stderr, "}\n");
164}
165
166static void printUSeqErr(const UChar* a, int len)
167{
168 int i=0;
169 fprintf(stderr, "{U+");
170 while (i<len)
171 fprintf(stderr, "0x%04x ", a[i++]);
172 fprintf(stderr,"}\n");
173}
174
175static void
374ca955 176TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
b75a7d8f
A
177{
178 const char* s0;
179 const char* s=(char*)source;
374ca955 180 const int32_t *r=results;
b75a7d8f 181 UErrorCode errorCode=U_ZERO_ERROR;
374ca955 182 UChar32 c;
b75a7d8f
A
183
184 while(s<limit) {
185 s0=s;
186 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
187 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
188 break; /* no more significant input */
189 } else if(U_FAILURE(errorCode)) {
190 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
191 break;
374ca955
A
192 } else if(
193 /* test the expected number of input bytes only if >=0 */
194 (*r>=0 && (int32_t)(s-s0)!=*r) ||
195 c!=*(r+1)
196 ) {
b75a7d8f
A
197 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
198 message, c, (s-s0), *(r+1), *r);
199 break;
200 }
201 r+=2;
202 }
203}
204
205static void
206TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
207{
208 const char* s=(char*)source;
209 UErrorCode errorCode=U_ZERO_ERROR;
210 uint32_t c;
211 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
212 if(errorCode != expected){
213 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
214 }
215 if(c != 0xFFFD && c != 0xffff){
216 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
217 }
218
219}
220
221static void TestInBufSizes(void)
222{
223 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
224#if 1
225 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
226 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
227 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
228 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
229 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
230 TestNewConvertWithBufferSizes(1,1);
231 TestNewConvertWithBufferSizes(2,3);
232 TestNewConvertWithBufferSizes(3,2);
233#endif
234}
235
236static void TestOutBufSizes(void)
237{
238#if 1
239 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
240 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
241 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
242 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
243 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
244 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
245
246#endif
247}
248
249
250void addTestNewConvert(TestNode** root)
251{
729e4ab9 252#if !UCONFIG_NO_FILE_IO
b75a7d8f
A
253 addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
254 addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
729e4ab9 255#endif
b75a7d8f
A
256 addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
257 addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
258 addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
259 addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
260 addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
261 addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
374ca955
A
262
263 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
b75a7d8f
A
264 addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
265 addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
266 addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
267 addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
268 addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
269 addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
270 addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
73c04bcf
A
271
272#if !UCONFIG_NO_LEGACY_CONVERSION
374ca955 273 addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
73c04bcf 274#endif
374ca955 275
b75a7d8f 276 addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
73c04bcf
A
277
278#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f 279 addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
729e4ab9 280#if !UCONFIG_NO_FILE_IO
b75a7d8f 281 addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
729e4ab9
A
282 addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout");
283#endif
b75a7d8f 284 addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
73c04bcf 285
374ca955 286#ifdef U_ENABLE_GENERIC_ISO_2022
b75a7d8f 287 addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
374ca955 288#endif
73c04bcf 289
b75a7d8f
A
290 addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
291 addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
292 addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
293 addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
294 addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
295 addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
296 addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
729e4ab9
A
297 /*
298 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
b75a7d8f
A
299 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
300 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
729e4ab9 301 */
b75a7d8f 302 addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
73c04bcf
A
303#endif
304
b75a7d8f 305 addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
73c04bcf
A
306
307#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
308 addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
309 addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
b75a7d8f
A
310 addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
311 addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
312 addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
46f4442e
A
313 addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275");
314 addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1");
b75a7d8f
A
315#if !UCONFIG_NO_COLLATION
316 addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
317#endif
73c04bcf 318
b75a7d8f 319 addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
73c04bcf
A
320#endif
321
322
729e4ab9 323#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
b75a7d8f 324 addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
73c04bcf
A
325#endif
326
b75a7d8f 327 addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
73c04bcf
A
328
329#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
330 addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
331 addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
d5d484b0 332 addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175");
46f4442e 333
4388f060
A
334 addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth");
335#endif
b75a7d8f
A
336}
337
338
339/* Note that this test already makes use of statics, so it's not really
340 multithread safe.
341 This convenience function lets us make the error messages actually useful.
342*/
343
344static void setNuConvTestName(const char *codepage, const char *direction)
345{
374ca955
A
346 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
347 codepage,
348 direction,
349 (int)gInBufferSize,
350 (int)gOutBufferSize);
b75a7d8f
A
351}
352
353typedef enum
354{
355 TC_OK = 0, /* test was OK */
356 TC_MISMATCH = 1, /* Match failed - err was printed */
357 TC_FAIL = 2 /* Test failed, don't print an err because it was already printed. */
358} ETestConvertResult;
359
360/* Note: This function uses global variables and it will not do offset
361checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
362static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
363 const char *codepage, const int32_t *expectOffsets , UBool useFallback)
364{
365 UErrorCode status = U_ZERO_ERROR;
366 UConverter *conv = 0;
73c04bcf 367 char junkout[NEW_MAX_BUFFER]; /* FIX */
b75a7d8f 368 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
73c04bcf 369 char *p;
b75a7d8f 370 const UChar *src;
73c04bcf
A
371 char *end;
372 char *targ;
b75a7d8f
A
373 int32_t *offs;
374 int i;
375 int32_t realBufferSize;
73c04bcf 376 char *realBufferEnd;
b75a7d8f
A
377 const UChar *realSourceEnd;
378 const UChar *sourceLimit;
379 UBool checkOffsets = TRUE;
380 UBool doFlush;
381
382 for(i=0;i<NEW_MAX_BUFFER;i++)
73c04bcf 383 junkout[i] = (char)0xF0;
b75a7d8f
A
384 for(i=0;i<NEW_MAX_BUFFER;i++)
385 junokout[i] = 0xFF;
386
387 setNuConvTestName(codepage, "FROM");
388
389 log_verbose("\n========= %s\n", gNuConvTestName);
390
391 conv = my_ucnv_open(codepage, &status);
392
393 if(U_FAILURE(status))
394 {
395 log_data_err("Couldn't open converter %s\n",codepage);
396 return TC_FAIL;
397 }
398 if(useFallback){
399 ucnv_setFallback(conv,useFallback);
400 }
401
402 log_verbose("Converter opened..\n");
403
404 src = source;
405 targ = junkout;
406 offs = junokout;
407
2ca993e8 408 realBufferSize = UPRV_LENGTHOF(junkout);
b75a7d8f
A
409 realBufferEnd = junkout + realBufferSize;
410 realSourceEnd = source + sourceLen;
411
412 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
413 checkOffsets = FALSE;
414
415 do
416 {
417 end = nct_min(targ + gOutBufferSize, realBufferEnd);
418 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
419
420 doFlush = (UBool)(sourceLimit == realSourceEnd);
421
422 if(targ == realBufferEnd) {
423 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
424 return TC_FAIL;
425 }
426 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
427
428
429 status = U_ZERO_ERROR;
430
431 ucnv_fromUnicode (conv,
73c04bcf
A
432 &targ,
433 end,
b75a7d8f
A
434 &src,
435 sourceLimit,
436 checkOffsets ? offs : NULL,
437 doFlush, /* flush if we're at the end of the input data */
438 &status);
439 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
440
441 if(U_FAILURE(status)) {
442 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
443 return TC_FAIL;
444 }
445
446 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
447 sourceLen, targ-junkout);
448
729e4ab9 449 if(getTestOption(VERBOSITY_OPTION))
b75a7d8f
A
450 {
451 char junk[9999];
452 char offset_str[9999];
73c04bcf 453 char *ptr;
b75a7d8f
A
454
455 junk[0] = 0;
456 offset_str[0] = 0;
457 for(ptr = junkout;ptr<targ;ptr++) {
458 sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
459 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
460 }
461
462 log_verbose(junk);
463 printSeq((const uint8_t *)expect, expectLen);
464 if ( checkOffsets ) {
465 log_verbose("\nOffsets:");
466 log_verbose(offset_str);
467 }
468 log_verbose("\n");
469 }
470 ucnv_close(conv);
471
472 if(expectLen != targ-junkout) {
473 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
474 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
4388f060 475 fprintf(stderr, "Got:\n");
73c04bcf 476 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
4388f060 477 fprintf(stderr, "Expected:\n");
b75a7d8f
A
478 printSeqErr((const unsigned char*)expect, expectLen);
479 return TC_MISMATCH;
480 }
481
482 if (checkOffsets && (expectOffsets != 0) ) {
483 log_verbose("comparing %d offsets..\n", targ-junkout);
484 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
485 log_err("did not get the expected offsets. %s\n", gNuConvTestName);
73c04bcf 486 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
b75a7d8f
A
487 log_err("\n");
488 log_err("Got : ");
489 for(p=junkout;p<targ;p++) {
490 log_err("%d,", junokout[p-junkout]);
491 }
492 log_err("\n");
493 log_err("Expected: ");
494 for(i=0; i<(targ-junkout); i++) {
495 log_err("%d,", expectOffsets[i]);
496 }
497 log_err("\n");
498 }
499 }
500
501 log_verbose("comparing..\n");
502 if(!memcmp(junkout, expect, expectLen)) {
503 log_verbose("Matches!\n");
504 return TC_OK;
505 } else {
506 log_err("String does not match u->%s\n", gNuConvTestName);
507 printUSeqErr(source, sourceLen);
4388f060 508 fprintf(stderr, "Got:\n");
b75a7d8f 509 printSeqErr((const unsigned char *)junkout, expectLen);
4388f060 510 fprintf(stderr, "Expected:\n");
b75a7d8f
A
511 printSeqErr((const unsigned char *)expect, expectLen);
512
513 return TC_MISMATCH;
514 }
515}
516
517/* Note: This function uses global variables and it will not do offset
518checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
519static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
520 const char *codepage, const int32_t *expectOffsets, UBool useFallback)
521{
522 UErrorCode status = U_ZERO_ERROR;
523 UConverter *conv = 0;
524 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
525 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
73c04bcf
A
526 const char *src;
527 const char *realSourceEnd;
528 const char *srcLimit;
b75a7d8f
A
529 UChar *p;
530 UChar *targ;
531 UChar *end;
532 int32_t *offs;
533 int i;
534 UBool checkOffsets = TRUE;
535
536 int32_t realBufferSize;
537 UChar *realBufferEnd;
538
539
540 for(i=0;i<NEW_MAX_BUFFER;i++)
541 junkout[i] = 0xFFFE;
542
543 for(i=0;i<NEW_MAX_BUFFER;i++)
544 junokout[i] = -1;
545
546 setNuConvTestName(codepage, "TO");
547
548 log_verbose("\n========= %s\n", gNuConvTestName);
549
550 conv = my_ucnv_open(codepage, &status);
551
552 if(U_FAILURE(status))
553 {
554 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
555 return TC_FAIL;
556 }
557 if(useFallback){
558 ucnv_setFallback(conv,useFallback);
559 }
560 log_verbose("Converter opened..\n");
561
73c04bcf 562 src = (const char *)source;
b75a7d8f
A
563 targ = junkout;
564 offs = junokout;
565
2ca993e8 566 realBufferSize = UPRV_LENGTHOF(junkout);
b75a7d8f
A
567 realBufferEnd = junkout + realBufferSize;
568 realSourceEnd = src + sourcelen;
569
570 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
571 checkOffsets = FALSE;
572
573 do
574 {
575 end = nct_min( targ + gOutBufferSize, realBufferEnd);
576 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
577
578 if(targ == realBufferEnd)
579 {
580 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
581 return TC_FAIL;
582 }
583 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
584
585 /* oldTarg = targ; */
586
587 status = U_ZERO_ERROR;
588
589 ucnv_toUnicode (conv,
590 &targ,
591 end,
73c04bcf
A
592 &src,
593 srcLimit,
b75a7d8f
A
594 checkOffsets ? offs : NULL,
595 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */
596 &status);
597
598 /* offs += (targ-oldTarg); */
599
600 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
601
602 if(U_FAILURE(status))
603 {
604 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
605 return TC_FAIL;
606 }
607
608 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
609 sourcelen, targ-junkout);
729e4ab9 610 if(getTestOption(VERBOSITY_OPTION))
b75a7d8f
A
611 {
612 char junk[9999];
613 char offset_str[9999];
614 UChar *ptr;
615
616 junk[0] = 0;
617 offset_str[0] = 0;
618
619 for(ptr = junkout;ptr<targ;ptr++)
620 {
621 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
622 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
623 }
624
625 log_verbose(junk);
626 printUSeq(expect, expectlen);
627 if ( checkOffsets )
628 {
629 log_verbose("\nOffsets:");
630 log_verbose(offset_str);
631 }
632 log_verbose("\n");
633 }
634 ucnv_close(conv);
635
636 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
637
638 if (checkOffsets && (expectOffsets != 0))
639 {
640 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
641 log_err("did not get the expected offsets. %s\n",gNuConvTestName);
642 log_err("Got: ");
643 for(p=junkout;p<targ;p++) {
644 log_err("%d,", junokout[p-junkout]);
645 }
646 log_err("\n");
647 log_err("Expected: ");
648 for(i=0; i<(targ-junkout); i++) {
649 log_err("%d,", expectOffsets[i]);
650 }
651 log_err("\n");
652 log_err("output: ");
653 for(i=0; i<(targ-junkout); i++) {
654 log_err("%X,", junkout[i]);
655 }
656 log_err("\n");
657 log_err("input: ");
73c04bcf 658 for(i=0; i<(src-(const char *)source); i++) {
b75a7d8f
A
659 log_err("%X,", (unsigned char)source[i]);
660 }
661 log_err("\n");
662 }
663 }
664
665 if(!memcmp(junkout, expect, expectlen*2))
666 {
667 log_verbose("Matches!\n");
668 return TC_OK;
669 }
670 else
671 {
672 log_err("String does not match. %s\n", gNuConvTestName);
673 log_verbose("String does not match. %s\n", gNuConvTestName);
674 printf("\nGot:");
675 printUSeqErr(junkout, expectlen);
676 printf("\nExpected:");
677 printUSeqErr(expect, expectlen);
678 return TC_MISMATCH;
679 }
680}
681
682
683static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
684{
685/** test chars #1 */
686 /* 1 2 3 1Han 2Han 3Han . */
73c04bcf 687 static const UChar sampleText[] =
729e4ab9
A
688 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 };
689 static const UChar sampleTextRoundTripUnmappable[] =
690 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd };
b75a7d8f
A
691
692
73c04bcf 693 static const uint8_t expectedUTF8[] =
729e4ab9 694 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 };
73c04bcf 695 static const int32_t toUTF8Offs[] =
729e4ab9 696 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 };
73c04bcf 697 static const int32_t fmUTF8Offs[] =
729e4ab9 698 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e };
b75a7d8f 699
374ca955 700#ifdef U_ENABLE_GENERIC_ISO_2022
b75a7d8f 701 /* Same as UTF8, but with ^[%B preceeding */
73c04bcf 702 static const const uint8_t expectedISO2022[] =
b75a7d8f 703 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
73c04bcf 704 static const int32_t toISO2022Offs[] =
b75a7d8f
A
705 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
706 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
73c04bcf 707 static const int32_t fmISO2022Offs[] =
b75a7d8f 708 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
374ca955 709#endif
b75a7d8f
A
710
711 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
73c04bcf 712 static const uint8_t expectedIBM930[] =
729e4ab9 713 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f };
73c04bcf 714 static const int32_t toIBM930Offs[] =
729e4ab9 715 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 };
73c04bcf 716 static const int32_t fmIBM930Offs[] =
729e4ab9 717 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e };
b75a7d8f
A
718
719 /* 1 2 3 0 h1 h2 h3 . MBCS*/
73c04bcf 720 static const uint8_t expectedIBM943[] =
729e4ab9 721 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc };
73c04bcf 722 static const int32_t toIBM943Offs [] =
729e4ab9 723 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 };
73c04bcf 724 static const int32_t fmIBM943Offs[] =
729e4ab9 725 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b };
b75a7d8f
A
726
727 /* 1 2 3 0 h1 h2 h3 . DBCS*/
73c04bcf 728 static const uint8_t expectedIBM9027[] =
729e4ab9 729 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe };
73c04bcf 730 static const int32_t toIBM9027Offs [] =
729e4ab9 731 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 };
b75a7d8f
A
732
733 /* 1 2 3 0 <?> <?> <?> . SBCS*/
73c04bcf 734 static const uint8_t expectedIBM920[] =
729e4ab9 735 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a };
73c04bcf 736 static const int32_t toIBM920Offs [] =
729e4ab9 737 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
b75a7d8f
A
738
739 /* 1 2 3 0 <?> <?> <?> . SBCS*/
73c04bcf 740 static const uint8_t expectedISO88593[] =
729e4ab9 741 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
73c04bcf 742 static const int32_t toISO88593Offs[] =
729e4ab9 743 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
b75a7d8f 744
73c04bcf
A
745 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
746 static const uint8_t expectedLATIN1[] =
729e4ab9 747 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
73c04bcf 748 static const int32_t toLATIN1Offs[] =
729e4ab9 749 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
b75a7d8f
A
750
751
752 /* etc */
73c04bcf 753 static const uint8_t expectedUTF16BE[] =
729e4ab9 754 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 };
73c04bcf 755 static const int32_t toUTF16BEOffs[]=
729e4ab9 756 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
73c04bcf 757 static const int32_t fmUTF16BEOffs[] =
729e4ab9 758 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
b75a7d8f 759
73c04bcf 760 static const uint8_t expectedUTF16LE[] =
729e4ab9 761 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc };
73c04bcf 762 static const int32_t toUTF16LEOffs[]=
729e4ab9 763 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
73c04bcf 764 static const int32_t fmUTF16LEOffs[] =
729e4ab9 765 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
b75a7d8f 766
73c04bcf 767 static const uint8_t expectedUTF32BE[] =
b75a7d8f
A
768 { 0x00, 0x00, 0x00, 0x31,
769 0x00, 0x00, 0x00, 0x32,
770 0x00, 0x00, 0x00, 0x33,
771 0x00, 0x00, 0x00, 0x00,
772 0x00, 0x00, 0x4e, 0x00,
773 0x00, 0x00, 0x4e, 0x8c,
774 0x00, 0x00, 0x4e, 0x09,
729e4ab9
A
775 0x00, 0x00, 0x00, 0x2e,
776 0x00, 0x02, 0x00, 0x21 };
73c04bcf 777 static const int32_t toUTF32BEOffs[]=
b75a7d8f
A
778 { 0x00, 0x00, 0x00, 0x00,
779 0x01, 0x01, 0x01, 0x01,
780 0x02, 0x02, 0x02, 0x02,
781 0x03, 0x03, 0x03, 0x03,
782 0x04, 0x04, 0x04, 0x04,
783 0x05, 0x05, 0x05, 0x05,
784 0x06, 0x06, 0x06, 0x06,
785 0x07, 0x07, 0x07, 0x07,
729e4ab9 786 0x08, 0x08, 0x08, 0x08,
b75a7d8f 787 0x08, 0x08, 0x08, 0x08 };
73c04bcf 788 static const int32_t fmUTF32BEOffs[] =
729e4ab9 789 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
b75a7d8f 790
73c04bcf 791 static const uint8_t expectedUTF32LE[] =
b75a7d8f
A
792 { 0x31, 0x00, 0x00, 0x00,
793 0x32, 0x00, 0x00, 0x00,
794 0x33, 0x00, 0x00, 0x00,
795 0x00, 0x00, 0x00, 0x00,
796 0x00, 0x4e, 0x00, 0x00,
797 0x8c, 0x4e, 0x00, 0x00,
798 0x09, 0x4e, 0x00, 0x00,
729e4ab9
A
799 0x2e, 0x00, 0x00, 0x00,
800 0x21, 0x00, 0x02, 0x00 };
73c04bcf 801 static const int32_t toUTF32LEOffs[]=
b75a7d8f
A
802 { 0x00, 0x00, 0x00, 0x00,
803 0x01, 0x01, 0x01, 0x01,
804 0x02, 0x02, 0x02, 0x02,
805 0x03, 0x03, 0x03, 0x03,
806 0x04, 0x04, 0x04, 0x04,
807 0x05, 0x05, 0x05, 0x05,
808 0x06, 0x06, 0x06, 0x06,
809 0x07, 0x07, 0x07, 0x07,
729e4ab9 810 0x08, 0x08, 0x08, 0x08,
b75a7d8f 811 0x08, 0x08, 0x08, 0x08 };
73c04bcf 812 static const int32_t fmUTF32LEOffs[] =
729e4ab9 813 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
b75a7d8f
A
814
815
816
817
818/** Test chars #2 **/
819
820 /* Sahha [health], slashed h's */
73c04bcf
A
821 static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
822 static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
b75a7d8f
A
823
824 /* LMBCS */
73c04bcf
A
825 static const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
826 static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
827 static const int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
828 static const int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
b75a7d8f
A
829 /*********************************** START OF CODE finally *************/
830
73c04bcf
A
831 gInBufferSize = insize;
832 gOutBufferSize = outsize;
b75a7d8f 833
73c04bcf 834 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
b75a7d8f
A
835
836
b75a7d8f 837 /*UTF-8*/
2ca993e8 838 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
b75a7d8f
A
839 expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE );
840
841 log_verbose("Test surrogate behaviour for UTF8\n");
842 {
73c04bcf
A
843 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
844 static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
b75a7d8f
A
845 0xf0, 0x90, 0x90, 0x81,
846 0xef, 0xbf, 0xbd
847 };
73c04bcf 848 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
2ca993e8 849 testConvertFromU(testinput, UPRV_LENGTHOF(testinput),
b75a7d8f
A
850 expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE );
851
852
853 }
73c04bcf
A
854
855#if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
b75a7d8f 856 /*ISO-2022*/
2ca993e8 857 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
b75a7d8f 858 expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE );
374ca955 859#endif
73c04bcf 860
b75a7d8f 861 /*UTF16 LE*/
2ca993e8 862 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
b75a7d8f
A
863 expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE );
864 /*UTF16 BE*/
2ca993e8 865 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
b75a7d8f
A
866 expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE );
867 /*UTF32 LE*/
2ca993e8 868 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
b75a7d8f
A
869 expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE );
870 /*UTF32 BE*/
2ca993e8 871 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
b75a7d8f 872 expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE );
73c04bcf 873
b75a7d8f 874 /*LATIN_1*/
2ca993e8 875 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
b75a7d8f 876 expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE );
73c04bcf
A
877
878#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f 879 /*EBCDIC_STATEFUL*/
2ca993e8 880 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
b75a7d8f
A
881 expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE );
882
2ca993e8 883 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
b75a7d8f
A
884 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
885
886 /*MBCS*/
887
2ca993e8 888 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
b75a7d8f
A
889 expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE );
890 /*DBCS*/
2ca993e8 891 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
b75a7d8f
A
892 expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE );
893 /*SBCS*/
2ca993e8 894 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
b75a7d8f
A
895 expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE );
896 /*SBCS*/
2ca993e8 897 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
b75a7d8f 898 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
73c04bcf 899#endif
b75a7d8f
A
900
901
902/****/
b75a7d8f 903
b75a7d8f
A
904 /*UTF-8*/
905 testConvertToU(expectedUTF8, sizeof(expectedUTF8),
2ca993e8 906 sampleText, UPRV_LENGTHOF(sampleText), "utf8", fmUTF8Offs,FALSE);
73c04bcf 907#if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
b75a7d8f
A
908 /*ISO-2022*/
909 testConvertToU(expectedISO2022, sizeof(expectedISO2022),
2ca993e8 910 sampleText, UPRV_LENGTHOF(sampleText), "ISO_2022", fmISO2022Offs,FALSE);
374ca955 911#endif
73c04bcf 912
b75a7d8f
A
913 /*UTF16 LE*/
914 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
2ca993e8 915 sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,FALSE);
b75a7d8f
A
916 /*UTF16 BE*/
917 testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
2ca993e8 918 sampleText, UPRV_LENGTHOF(sampleText), "utf-16be", fmUTF16BEOffs,FALSE);
b75a7d8f
A
919 /*UTF32 LE*/
920 testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
2ca993e8 921 sampleText, UPRV_LENGTHOF(sampleText), "utf-32le", fmUTF32LEOffs,FALSE);
b75a7d8f
A
922 /*UTF32 BE*/
923 testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
2ca993e8 924 sampleText, UPRV_LENGTHOF(sampleText), "utf-32be", fmUTF32BEOffs,FALSE);
73c04bcf
A
925
926#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f 927 /*EBCDIC_STATEFUL*/
729e4ab9 928 testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable,
2ca993e8 929 UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-930", fmIBM930Offs,FALSE);
b75a7d8f 930 /*MBCS*/
729e4ab9 931 testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable,
2ca993e8 932 UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-943", fmIBM943Offs,FALSE);
73c04bcf 933#endif
b75a7d8f
A
934
935 /* Try it again to make sure it still works */
936 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
2ca993e8 937 sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,FALSE);
b75a7d8f 938
73c04bcf 939#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f 940 testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
2ca993e8 941 malteseUChars, UPRV_LENGTHOF(malteseUChars), "latin3", NULL,FALSE);
b75a7d8f 942
2ca993e8 943 testConvertFromU(malteseUChars, UPRV_LENGTHOF(malteseUChars),
b75a7d8f
A
944 expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE );
945
946 /*LMBCS*/
2ca993e8 947 testConvertFromU(LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars),
b75a7d8f
A
948 expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE );
949 testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
2ca993e8 950 LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars), "LMBCS-1", fmLMBCSOffs,FALSE);
73c04bcf 951#endif
b75a7d8f
A
952
953 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
954 {
955 /* encode directly set D and set O */
956 static const uint8_t utf7[] = {
957 /*
958 Hi Mom -+Jjo--!
959 A+ImIDkQ.
960 +-
4388f060 961 +ZeVnLIqe-
b75a7d8f
A
962 */
963 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
964 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
965 0x2b, 0x2d,
4388f060 966 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
b75a7d8f
A
967 };
968 static const UChar unicode[] = {
969 /*
970 Hi Mom -<WHITE SMILING FACE>-!
971 A<NOT IDENTICAL TO><ALPHA>.
972 +
973 [Japanese word "nihongo"]
974 */
975 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
976 0x41, 0x2262, 0x0391, 0x2e,
977 0x2b,
978 0x65e5, 0x672c, 0x8a9e
979 };
980 static const int32_t toUnicodeOffsets[] = {
981 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
982 15, 17, 19, 23,
983 24,
984 27, 29, 32
985 };
986 static const int32_t fromUnicodeOffsets[] = {
987 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
988 11, 12, 12, 12, 13, 13, 13, 13, 14,
989 15, 15,
4388f060 990 16, 16, 16, 17, 17, 17, 18, 18, 18, 18
b75a7d8f
A
991 };
992
993 /* same but escaping set O (the exclamation mark) */
994 static const uint8_t utf7Restricted[] = {
995 /*
996 Hi Mom -+Jjo--+ACE-
997 A+ImIDkQ.
998 +-
4388f060 999 +ZeVnLIqe-
b75a7d8f
A
1000 */
1001 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
1002 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
1003 0x2b, 0x2d,
4388f060 1004 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
b75a7d8f
A
1005 };
1006 static const int32_t toUnicodeOffsetsR[] = {
1007 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
1008 19, 21, 23, 27,
1009 28,
1010 31, 33, 36
1011 };
1012 static const int32_t fromUnicodeOffsetsR[] = {
1013 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
1014 11, 12, 12, 12, 13, 13, 13, 13, 14,
1015 15, 15,
4388f060 1016 16, 16, 16, 17, 17, 17, 18, 18, 18, 18
b75a7d8f
A
1017 };
1018
2ca993e8 1019 testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
b75a7d8f 1020
2ca993e8 1021 testConvertToU(utf7, sizeof(utf7), unicode, UPRV_LENGTHOF(unicode), "UTF-7", toUnicodeOffsets,FALSE);
b75a7d8f 1022
2ca993e8 1023 testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
b75a7d8f 1024
2ca993e8 1025 testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, UPRV_LENGTHOF(unicode), "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
b75a7d8f
A
1026 }
1027
1028 /*
1029 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
1030 * modified according to RFC 2060,
1031 * and supplemented with the one example in RFC 2060 itself.
1032 */
1033 {
1034 static const uint8_t imap[] = {
1035 /* Hi Mom -&Jjo--!
1036 A&ImIDkQ-.
1037 &-
1038 &ZeVnLIqe-
1039 \
1040 ~peter
1041 /mail
1042 /&ZeVnLIqe-
1043 /&U,BTFw-
1044 */
1045 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1046 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1047 0x26, 0x2d,
1048 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1049 0x5c,
1050 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1051 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1052 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1053 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1054 };
1055 static const UChar unicode[] = {
1056 /* Hi Mom -<WHITE SMILING FACE>-!
1057 A<NOT IDENTICAL TO><ALPHA>.
1058 &
1059 [Japanese word "nihongo"]
1060 \
1061 ~peter
1062 /mail
1063 /<65e5, 672c, 8a9e>
1064 /<53f0, 5317>
1065 */
1066 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1067 0x41, 0x2262, 0x0391, 0x2e,
1068 0x26,
1069 0x65e5, 0x672c, 0x8a9e,
1070 0x5c,
1071 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1072 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1073 0x2f, 0x65e5, 0x672c, 0x8a9e,
1074 0x2f, 0x53f0, 0x5317
1075 };
1076 static const int32_t toUnicodeOffsets[] = {
1077 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1078 15, 17, 19, 24,
1079 25,
1080 28, 30, 33,
1081 37,
1082 38, 39, 40, 41, 42, 43,
1083 44, 45, 46, 47, 48,
1084 49, 51, 53, 56,
1085 60, 62, 64
1086 };
1087 static const int32_t fromUnicodeOffsets[] = {
1088 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1089 11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1090 15, 15,
1091 16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1092 19,
1093 20, 21, 22, 23, 24, 25,
1094 26, 27, 28, 29, 30,
1095 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1096 35, 36, 36, 36, 37, 37, 37, 37, 37
1097 };
1098
2ca993e8 1099 testConvertFromU(unicode, UPRV_LENGTHOF(unicode), imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
b75a7d8f 1100
2ca993e8 1101 testConvertToU(imap, sizeof(imap), unicode, UPRV_LENGTHOF(unicode), "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
b75a7d8f
A
1102 }
1103
1104 /* Test UTF-8 bad data handling*/
1105 {
1106 static const uint8_t utf8[]={
1107 0x61,
1108 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1109 0x00,
1110 0x62,
1111 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1112 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1113 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */
1114 0xdf, 0xbf, /* 7ff */
1115 0xbf, /* truncated tail */
0f5d89e8 1116 0xf4, 0x90, 0x80, 0x80, /* 110000 */
b75a7d8f
A
1117 0x02
1118 };
1119
1120 static const uint16_t utf8Expected[]={
1121 0x0061,
0f5d89e8 1122 0xfffd, 0xfffd, 0xfffd, 0xfffd,
b75a7d8f
A
1123 0x0000,
1124 0x0062,
0f5d89e8
A
1125 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1126 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
b75a7d8f
A
1127 0xdbff, 0xdfff,
1128 0x07ff,
1129 0xfffd,
0f5d89e8 1130 0xfffd, 0xfffd, 0xfffd, 0xfffd,
b75a7d8f
A
1131 0x0002
1132 };
1133
1134 static const int32_t utf8Offsets[]={
0f5d89e8
A
1135 0,
1136 1, 2, 3, 4,
1137 5,
1138 6,
1139 7, 8, 9, 10, 11,
1140 12, 13, 14, 15, 16,
1141 17, 17,
1142 21,
1143 23,
1144 24, 25, 26, 27,
1145 28
b75a7d8f
A
1146 };
1147 testConvertToU(utf8, sizeof(utf8),
2ca993e8 1148 utf8Expected, UPRV_LENGTHOF(utf8Expected), "utf-8", utf8Offsets ,FALSE);
b75a7d8f
A
1149
1150 }
1151
1152 /* Test UTF-32BE bad data handling*/
1153 {
1154 static const uint8_t utf32[]={
1155 0x00, 0x00, 0x00, 0x61,
1156 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
1157 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1158 0x00, 0x00, 0x00, 0x62,
1159 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1160 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
1161 0x00, 0x00, 0x01, 0x62,
1162 0x00, 0x00, 0x02, 0x62
1163 };
b75a7d8f
A
1164 static const uint16_t utf32Expected[]={
1165 0x0061,
1166 0xfffd, /* 0x110000 out of range */
1167 0xDBFF, /* 0x10FFFF in range */
1168 0xDFFF,
1169 0x0062,
1170 0xfffd, /* 0xffffffff out of range */
1171 0xfffd, /* 0x7fffffff out of range */
1172 0x0162,
1173 0x0262
1174 };
b75a7d8f
A
1175 static const int32_t utf32Offsets[]={
1176 0, 4, 8, 8, 12, 16, 20, 24, 28
1177 };
73c04bcf
A
1178 static const uint8_t utf32ExpectedBack[]={
1179 0x00, 0x00, 0x00, 0x61,
1180 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */
1181 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1182 0x00, 0x00, 0x00, 0x62,
1183 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */
1184 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */
1185 0x00, 0x00, 0x01, 0x62,
1186 0x00, 0x00, 0x02, 0x62
1187 };
1188 static const int32_t utf32OffsetsBack[]={
1189 0,0,0,0,
1190 1,1,1,1,
1191 2,2,2,2,
1192 4,4,4,4,
1193 5,5,5,5,
1194 6,6,6,6,
1195 7,7,7,7,
1196 8,8,8,8
1197 };
1198
b75a7d8f 1199 testConvertToU(utf32, sizeof(utf32),
2ca993e8
A
1200 utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32be", utf32Offsets ,FALSE);
1201 testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected),
73c04bcf 1202 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE);
b75a7d8f
A
1203 }
1204
1205 /* Test UTF-32LE bad data handling*/
1206 {
1207 static const uint8_t utf32[]={
1208 0x61, 0x00, 0x00, 0x00,
1209 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
1210 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1211 0x62, 0x00, 0x00, 0x00,
1212 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1213 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
1214 0x62, 0x01, 0x00, 0x00,
1215 0x62, 0x02, 0x00, 0x00,
1216 };
1217
1218 static const uint16_t utf32Expected[]={
1219 0x0061,
1220 0xfffd, /* 0x110000 out of range */
1221 0xDBFF, /* 0x10FFFF in range */
1222 0xDFFF,
1223 0x0062,
1224 0xfffd, /* 0xffffffff out of range */
1225 0xfffd, /* 0x7fffffff out of range */
1226 0x0162,
1227 0x0262
1228 };
b75a7d8f
A
1229 static const int32_t utf32Offsets[]={
1230 0, 4, 8, 8, 12, 16, 20, 24, 28
1231 };
73c04bcf
A
1232 static const uint8_t utf32ExpectedBack[]={
1233 0x61, 0x00, 0x00, 0x00,
1234 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */
1235 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1236 0x62, 0x00, 0x00, 0x00,
1237 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */
1238 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */
1239 0x62, 0x01, 0x00, 0x00,
1240 0x62, 0x02, 0x00, 0x00
1241 };
1242 static const int32_t utf32OffsetsBack[]={
1243 0,0,0,0,
1244 1,1,1,1,
1245 2,2,2,2,
1246 4,4,4,4,
1247 5,5,5,5,
1248 6,6,6,6,
1249 7,7,7,7,
1250 8,8,8,8
1251 };
b75a7d8f 1252 testConvertToU(utf32, sizeof(utf32),
2ca993e8
A
1253 utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32le", utf32Offsets,FALSE );
1254 testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected),
73c04bcf 1255 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE);
b75a7d8f
A
1256 }
1257}
1258
1259static void TestCoverageMBCS(){
1260#if 0
1261 UErrorCode status = U_ZERO_ERROR;
1262 const char *directory = loadTestData(&status);
1263 char* tdpath = NULL;
1264 char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1265 int len = strlen(directory);
1266 char* index=NULL;
1267
1268 tdpath = (char*) malloc(sizeof(char) * (len * 2));
1269 uprv_strcpy(saveDirectory,u_getDataDirectory());
1270 log_verbose("Retrieved data directory %s \n",saveDirectory);
1271 uprv_strcpy(tdpath,directory);
1272 index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
1273
1274 if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
1275 *(index+1)=0;
1276 }
1277 u_setDataDirectory(tdpath);
1278 log_verbose("ICU data directory is set to: %s \n" ,tdpath);
1279#endif
1280
1281 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm
1282 which is test file for MBCS conversion with single-byte codepage data.*/
1283 {
1284
1285 /* MBCS with single byte codepage data test1.ucm*/
1286 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1287 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1288 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, };
1289
b75a7d8f 1290 /*from Unicode*/
2ca993e8 1291 testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
b75a7d8f 1292 expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
b75a7d8f
A
1293 }
1294
1295 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm
1296 which is test file for MBCS conversion with three-byte codepage data.*/
1297 {
1298
1299 /* MBCS with three byte codepage data test3.ucm*/
1300 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1301 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,};
1302 int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1303
1304 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1305 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1306 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1307
1308 /*from Unicode*/
2ca993e8 1309 testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
b75a7d8f
A
1310 expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE );
1311
1312 /*to Unicode*/
1313 testConvertToU(test3input, sizeof(test3input),
2ca993e8 1314 expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test3", fromtest3Offs ,FALSE);
b75a7d8f
A
1315
1316 }
1317
1318 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm
1319 which is test file for MBCS conversion with four-byte codepage data.*/
1320 {
1321
1322 /* MBCS with three byte codepage data test4.ucm*/
1323 static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1324 static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,};
1325 static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1326
1327 static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1328 static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1329 static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1330
1331 /*from Unicode*/
2ca993e8 1332 testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
b75a7d8f
A
1333 expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE );
1334
1335 /*to Unicode*/
1336 testConvertToU(test4input, sizeof(test4input),
2ca993e8 1337 expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test4", fromtest4Offs,FALSE );
b75a7d8f
A
1338
1339 }
1340#if 0
1341 free(tdpath);
1342 /* restore the original data directory */
1343 log_verbose("Setting the data directory to %s \n", saveDirectory);
1344 u_setDataDirectory(saveDirectory);
1345 free(saveDirectory);
1346#endif
1347
1348}
1349
1350static void TestConverterType(const char *convName, UConverterType convType) {
1351 UConverter* myConverter;
1352 UErrorCode err = U_ZERO_ERROR;
1353
1354 myConverter = my_ucnv_open(convName, &err);
1355
1356 if (U_FAILURE(err)) {
1357 log_data_err("Failed to create an %s converter\n", convName);
1358 return;
1359 }
1360 else
1361 {
1362 if (ucnv_getType(myConverter)!=convType) {
1363 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1364 convName, convType);
1365 }
1366 else {
1367 log_verbose("ucnv_getType %s ok\n", convName);
1368 }
1369 }
1370 ucnv_close(myConverter);
1371}
1372
1373static void TestConverterTypesAndStarters()
1374{
73c04bcf 1375#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
1376 UConverter* myConverter;
1377 UErrorCode err = U_ZERO_ERROR;
1378 UBool mystarters[256];
1379
1380/* const UBool expectedKSCstarters[256] = {
1381 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1382 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1383 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1384 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1385 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1386 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1387 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1388 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1389 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1390 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1391 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1392 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1393 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1394 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1395 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1396 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1397 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1398 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1399 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1400 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1401 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1402 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1403 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1404 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1405 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1406 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1407
1408
1409 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types.");
1410
1411 myConverter = ucnv_open("ksc", &err);
1412 if (U_FAILURE(err)) {
1413 log_data_err("Failed to create an ibm-ksc converter\n");
1414 return;
1415 }
1416 else
1417 {
1418 if (ucnv_getType(myConverter)!=UCNV_MBCS)
1419 log_err("ucnv_getType Failed for ibm-949\n");
1420 else
1421 log_verbose("ucnv_getType ibm-949 ok\n");
1422
1423 if(myConverter!=NULL)
1424 ucnv_getStarters(myConverter, mystarters, &err);
1425
1426 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1427 log_err("Failed ucnv_getStarters for ksc\n");
1428 else
1429 log_verbose("ucnv_getStarters ok\n");*/
1430
1431 }
1432 ucnv_close(myConverter);
1433
1434 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
1435 TestConverterType("ibm-878", UCNV_SBCS);
73c04bcf
A
1436#endif
1437
b75a7d8f 1438 TestConverterType("iso-8859-1", UCNV_LATIN_1);
73c04bcf 1439
b75a7d8f 1440 TestConverterType("ibm-1208", UCNV_UTF8);
73c04bcf 1441
b75a7d8f
A
1442 TestConverterType("utf-8", UCNV_UTF8);
1443 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
1444 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
1445 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
1446 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
73c04bcf
A
1447
1448#if !UCONFIG_NO_LEGACY_CONVERSION
1449
1450#if defined(U_ENABLE_GENERIC_ISO_2022)
b75a7d8f 1451 TestConverterType("iso-2022", UCNV_ISO_2022);
374ca955 1452#endif
73c04bcf 1453
b75a7d8f 1454 TestConverterType("hz", UCNV_HZ);
73c04bcf
A
1455#endif
1456
b75a7d8f 1457 TestConverterType("scsu", UCNV_SCSU);
73c04bcf
A
1458
1459#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f 1460 TestConverterType("x-iscii-de", UCNV_ISCII);
73c04bcf
A
1461#endif
1462
b75a7d8f
A
1463 TestConverterType("ascii", UCNV_US_ASCII);
1464 TestConverterType("utf-7", UCNV_UTF7);
1465 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
1466 TestConverterType("bocu-1", UCNV_BOCU1);
1467}
1468
1469static void
1470TestAmbiguousConverter(UConverter *cnv) {
46f4442e 1471 static const char inBytes[3]={ 0x61, 0x5B, 0x5c };
b75a7d8f
A
1472 UChar outUnicode[20]={ 0, 0, 0, 0 };
1473
1474 const char *s;
1475 UChar *u;
1476 UErrorCode errorCode;
1477 UBool isAmbiguous;
1478
46f4442e 1479 /* try to convert an 'a', a square bracket and a US-ASCII backslash */
b75a7d8f
A
1480 errorCode=U_ZERO_ERROR;
1481 s=inBytes;
1482 u=outUnicode;
46f4442e 1483 ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode);
b75a7d8f
A
1484 if(U_FAILURE(errorCode)) {
1485 /* we do not care about general failures in this test; the input may just not be mappable */
1486 return;
1487 }
1488
46f4442e
A
1489 if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) {
1490 /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1491 /* There are some encodings that are partially ASCII based,
1492 like the ISO-7 and GSM series of codepages, which we ignore. */
b75a7d8f
A
1493 return;
1494 }
1495
1496 isAmbiguous=ucnv_isAmbiguous(cnv);
1497
1498 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
46f4442e 1499 if((outUnicode[2]!=0x5c)!=isAmbiguous) {
b75a7d8f 1500 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
46f4442e 1501 ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous);
b75a7d8f
A
1502 return;
1503 }
1504
46f4442e 1505 if(outUnicode[2]!=0x5c) {
b75a7d8f
A
1506 /* needs fixup, fix it */
1507 ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
46f4442e 1508 if(outUnicode[2]!=0x5c) {
b75a7d8f
A
1509 /* the fix failed */
1510 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
1511 return;
1512 }
1513 }
1514}
1515
1516static void TestAmbiguous()
1517{
1518 UErrorCode status = U_ZERO_ERROR;
1519 UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
73c04bcf 1520 static const char target[] = {
b75a7d8f
A
1521 /* "\\usr\\local\\share\\data\\icutest.txt" */
1522 0x5c, 0x75, 0x73, 0x72,
1523 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1524 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1525 0x5c, 0x64, 0x61, 0x74, 0x61,
1526 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1527 0
1528 };
1529 UChar asciiResult[200], sjisResult[200];
729e4ab9 1530 int32_t /*asciiLength = 0,*/ sjisLength = 0, i;
b75a7d8f
A
1531 const char *name;
1532
1533 /* enumerate all converters */
1534 status=U_ZERO_ERROR;
1535 for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
1536 cnv=ucnv_open(name, &status);
1537 if(U_SUCCESS(status)) {
1538 TestAmbiguousConverter(cnv);
1539 ucnv_close(cnv);
1540 } else {
1541 log_err("error: unable to open available converter \"%s\"\n", name);
1542 status=U_ZERO_ERROR;
1543 }
1544 }
1545
73c04bcf 1546#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
1547 sjis_cnv = ucnv_open("ibm-943", &status);
1548 if (U_FAILURE(status))
1549 {
1550 log_data_err("Failed to create a SJIS converter\n");
1551 return;
1552 }
1553 ascii_cnv = ucnv_open("LATIN-1", &status);
1554 if (U_FAILURE(status))
1555 {
1556 log_data_err("Failed to create a LATIN-1 converter\n");
1557 ucnv_close(sjis_cnv);
1558 return;
1559 }
1560 /* convert target from SJIS to Unicode */
2ca993e8 1561 sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, UPRV_LENGTHOF(sjisResult), target, (int32_t)strlen(target), &status);
b75a7d8f
A
1562 if (U_FAILURE(status))
1563 {
1564 log_err("Failed to convert the SJIS string.\n");
1565 ucnv_close(sjis_cnv);
1566 ucnv_close(ascii_cnv);
1567 return;
1568 }
1569 /* convert target from Latin-1 to Unicode */
2ca993e8 1570 /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, UPRV_LENGTHOF(asciiResult), target, (int32_t)strlen(target), &status);
b75a7d8f
A
1571 if (U_FAILURE(status))
1572 {
1573 log_err("Failed to convert the Latin-1 string.\n");
b75a7d8f
A
1574 ucnv_close(sjis_cnv);
1575 ucnv_close(ascii_cnv);
1576 return;
1577 }
1578 if (!ucnv_isAmbiguous(sjis_cnv))
1579 {
1580 log_err("SJIS converter should contain ambiguous character mappings.\n");
b75a7d8f
A
1581 ucnv_close(sjis_cnv);
1582 ucnv_close(ascii_cnv);
1583 return;
1584 }
1585 if (u_strcmp(sjisResult, asciiResult) == 0)
1586 {
1587 log_err("File separators for SJIS don't need to be fixed.\n");
1588 }
1589 ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
1590 if (u_strcmp(sjisResult, asciiResult) != 0)
1591 {
1592 log_err("Fixing file separator for SJIS failed.\n");
1593 }
1594 ucnv_close(sjis_cnv);
1595 ucnv_close(ascii_cnv);
73c04bcf 1596#endif
b75a7d8f
A
1597}
1598
1599static void
1600TestSignatureDetection(){
1601 /* with null terminated strings */
1602 {
1603 static const char* data[] = {
1604 "\xFE\xFF\x00\x00", /* UTF-16BE */
1605 "\xFF\xFE\x00\x00", /* UTF-16LE */
1606 "\xEF\xBB\xBF\x00", /* UTF-8 */
1607 "\x0E\xFE\xFF\x00", /* SCSU */
1608
1609 "\xFE\xFF", /* UTF-16BE */
1610 "\xFF\xFE", /* UTF-16LE */
1611 "\xEF\xBB\xBF", /* UTF-8 */
1612 "\x0E\xFE\xFF", /* SCSU */
1613
1614 "\xFE\xFF\x41\x42", /* UTF-16BE */
1615 "\xFF\xFE\x41\x41", /* UTF-16LE */
1616 "\xEF\xBB\xBF\x41", /* UTF-8 */
1617 "\x0E\xFE\xFF\x41", /* SCSU */
1618
1619 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */
1620 "\x2B\x2F\x76\x38\x41", /* UTF-7 */
1621 "\x2B\x2F\x76\x39\x41", /* UTF-7 */
1622 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */
374ca955
A
1623 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */
1624
1625 "\xDD\x73\x66\x73" /* UTF-EBCDIC */
b75a7d8f
A
1626 };
1627 static const char* expected[] = {
1628 "UTF-16BE",
1629 "UTF-16LE",
1630 "UTF-8",
1631 "SCSU",
1632
1633 "UTF-16BE",
1634 "UTF-16LE",
1635 "UTF-8",
1636 "SCSU",
1637
1638 "UTF-16BE",
1639 "UTF-16LE",
1640 "UTF-8",
1641 "SCSU",
1642
1643 "UTF-7",
1644 "UTF-7",
1645 "UTF-7",
1646 "UTF-7",
374ca955
A
1647 "UTF-7",
1648 "UTF-EBCDIC"
b75a7d8f
A
1649 };
1650 static const int32_t expectedLength[] ={
1651 2,
1652 2,
1653 3,
1654 3,
1655
1656 2,
1657 2,
1658 3,
1659 3,
1660
1661 2,
1662 2,
1663 3,
1664 3,
1665
1666 5,
1667 4,
1668 4,
1669 4,
374ca955 1670 4,
b75a7d8f
A
1671 4
1672 };
1673 int i=0;
1674 UErrorCode err;
1675 int32_t signatureLength = -1;
1676 const char* source = NULL;
1677 const char* enc = NULL;
2ca993e8 1678 for( ; i<UPRV_LENGTHOF(data); i++){
b75a7d8f
A
1679 err = U_ZERO_ERROR;
1680 source = data[i];
1681 enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
1682 if(U_FAILURE(err)){
1683 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1684 continue;
1685 }
1686 if(enc == NULL || strcmp(enc,expected[i]) !=0){
1687 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1688 continue;
1689 }
1690 if(signatureLength != expectedLength[i]){
1691 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1692 }
1693 }
1694 }
1695 {
1696 static const char* data[] = {
1697 "\xFE\xFF\x00", /* UTF-16BE */
1698 "\xFF\xFE\x00", /* UTF-16LE */
1699 "\xEF\xBB\xBF\x00", /* UTF-8 */
1700 "\x0E\xFE\xFF\x00", /* SCSU */
1701 "\x00\x00\xFE\xFF", /* UTF-32BE */
1702 "\xFF\xFE\x00\x00", /* UTF-32LE */
1703 "\xFE\xFF", /* UTF-16BE */
1704 "\xFF\xFE", /* UTF-16LE */
1705 "\xEF\xBB\xBF", /* UTF-8 */
1706 "\x0E\xFE\xFF", /* SCSU */
1707 "\x00\x00\xFE\xFF", /* UTF-32BE */
1708 "\xFF\xFE\x00\x00", /* UTF-32LE */
1709 "\xFE\xFF\x41\x42", /* UTF-16BE */
1710 "\xFF\xFE\x41\x41", /* UTF-16LE */
1711 "\xEF\xBB\xBF\x41", /* UTF-8 */
1712 "\x0E\xFE\xFF\x41", /* SCSU */
1713 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1714 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1715 "\xFB\xEE\x28", /* BOCU-1 */
1716 "\xFF\x41\x42" /* NULL */
1717 };
1718 static const int len[] = {
1719 3,
1720 3,
1721 4,
1722 4,
1723 4,
1724 4,
1725 2,
1726 2,
1727 3,
1728 3,
1729 4,
1730 4,
1731 4,
1732 4,
1733 4,
1734 4,
1735 5,
1736 5,
1737 3,
1738 3
1739 };
1740
1741 static const char* expected[] = {
1742 "UTF-16BE",
1743 "UTF-16LE",
1744 "UTF-8",
1745 "SCSU",
1746 "UTF-32BE",
1747 "UTF-32LE",
1748 "UTF-16BE",
1749 "UTF-16LE",
1750 "UTF-8",
1751 "SCSU",
1752 "UTF-32BE",
1753 "UTF-32LE",
1754 "UTF-16BE",
1755 "UTF-16LE",
1756 "UTF-8",
1757 "SCSU",
1758 "UTF-32BE",
1759 "UTF-32LE",
1760 "BOCU-1",
1761 NULL
1762 };
1763 static const int32_t expectedLength[] ={
1764 2,
1765 2,
1766 3,
1767 3,
1768 4,
1769 4,
1770 2,
1771 2,
1772 3,
1773 3,
1774 4,
1775 4,
1776 2,
1777 2,
1778 3,
1779 3,
1780 4,
1781 4,
1782 3,
1783 0
1784 };
1785 int i=0;
1786 UErrorCode err;
1787 int32_t signatureLength = -1;
1788 int32_t sourceLength=-1;
1789 const char* source = NULL;
1790 const char* enc = NULL;
2ca993e8 1791 for( ; i<UPRV_LENGTHOF(data); i++){
b75a7d8f
A
1792 err = U_ZERO_ERROR;
1793 source = data[i];
1794 sourceLength = len[i];
1795 enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
1796 if(U_FAILURE(err)){
1797 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1798 continue;
1799 }
1800 if(enc == NULL || strcmp(enc,expected[i]) !=0){
1801 if(expected[i] !=NULL){
1802 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1803 continue;
1804 }
1805 }
1806 if(signatureLength != expectedLength[i]){
1807 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1808 }
1809 }
1810 }
1811}
1812
729e4ab9 1813static void TestUTF7() {
b75a7d8f
A
1814 /* test input */
1815 static const uint8_t in[]={
1816 /* H - +Jjo- - ! +- +2AHcAQ */
1817 0x48,
1818 0x2d,
1819 0x2b, 0x4a, 0x6a, 0x6f,
1820 0x2d, 0x2d,
1821 0x21,
1822 0x2b, 0x2d,
1823 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1824 };
1825
1826 /* expected test results */
374ca955 1827 static const int32_t results[]={
b75a7d8f
A
1828 /* number of bytes read, code point */
1829 1, 0x48,
1830 1, 0x2d,
1831 4, 0x263a, /* <WHITE SMILING FACE> */
1832 2, 0x2d,
1833 1, 0x21,
1834 2, 0x2b,
1835 7, 0x10401
1836 };
1837
1838 const char *cnvName;
1839 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1840 UErrorCode errorCode=U_ZERO_ERROR;
1841 UConverter *cnv=ucnv_open("UTF-7", &errorCode);
1842 if(U_FAILURE(errorCode)) {
b331163b 1843 log_data_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode));
b75a7d8f
A
1844 return;
1845 }
1846 TestNextUChar(cnv, source, limit, results, "UTF-7");
1847 /* Test the condition when source >= sourceLimit */
1848 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1849 cnvName = ucnv_getName(cnv, &errorCode);
1850 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
1851 log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1852 }
1853 ucnv_close(cnv);
1854}
1855
729e4ab9 1856static void TestIMAP() {
b75a7d8f
A
1857 /* test input */
1858 static const uint8_t in[]={
1859 /* H - &Jjo- - ! &- &2AHcAQ- \ */
1860 0x48,
1861 0x2d,
1862 0x26, 0x4a, 0x6a, 0x6f,
1863 0x2d, 0x2d,
1864 0x21,
1865 0x26, 0x2d,
1866 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1867 };
1868
1869 /* expected test results */
374ca955 1870 static const int32_t results[]={
b75a7d8f
A
1871 /* number of bytes read, code point */
1872 1, 0x48,
1873 1, 0x2d,
1874 4, 0x263a, /* <WHITE SMILING FACE> */
1875 2, 0x2d,
1876 1, 0x21,
1877 2, 0x26,
1878 7, 0x10401
1879 };
1880
1881 const char *cnvName;
1882 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1883 UErrorCode errorCode=U_ZERO_ERROR;
1884 UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
1885 if(U_FAILURE(errorCode)) {
b331163b 1886 log_data_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode));
b75a7d8f
A
1887 return;
1888 }
1889 TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
1890 /* Test the condition when source >= sourceLimit */
1891 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1892 cnvName = ucnv_getName(cnv, &errorCode);
1893 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
1894 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1895 }
1896 ucnv_close(cnv);
1897}
1898
729e4ab9 1899static void TestUTF8() {
b75a7d8f
A
1900 /* test input */
1901 static const uint8_t in[]={
1902 0x61,
1903 0xc2, 0x80,
1904 0xe0, 0xa0, 0x80,
1905 0xf0, 0x90, 0x80, 0x80,
1906 0xf4, 0x84, 0x8c, 0xa1,
1907 0xf0, 0x90, 0x90, 0x81
1908 };
1909
1910 /* expected test results */
374ca955 1911 static const int32_t results[]={
b75a7d8f
A
1912 /* number of bytes read, code point */
1913 1, 0x61,
1914 2, 0x80,
1915 3, 0x800,
1916 4, 0x10000,
1917 4, 0x104321,
1918 4, 0x10401
1919 };
1920
1921 /* error test input */
1922 static const uint8_t in2[]={
1923 0x61,
1924 0xc0, 0x80, /* illegal non-shortest form */
1925 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1926 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1927 0xc0, 0xc0, /* illegal trail byte */
1928 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1929 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1930 0xfe, /* illegal byte altogether */
1931 0x62
1932 };
1933
1934 /* expected error test results */
374ca955 1935 static const int32_t results2[]={
b75a7d8f
A
1936 /* number of bytes read, code point */
1937 1, 0x61,
1938 22, 0x62
1939 };
1940
1941 UConverterToUCallback cb;
1942 const void *p;
1943
1944 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1945 UErrorCode errorCode=U_ZERO_ERROR;
1946 UConverter *cnv=ucnv_open("UTF-8", &errorCode);
1947 if(U_FAILURE(errorCode)) {
1948 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
1949 return;
1950 }
1951 TestNextUChar(cnv, source, limit, results, "UTF-8");
1952 /* Test the condition when source >= sourceLimit */
1953 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1954
1955 /* test error behavior with a skip callback */
1956 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1957 source=(const char *)in2;
1958 limit=(const char *)(in2+sizeof(in2));
1959 TestNextUChar(cnv, source, limit, results2, "UTF-8");
1960
1961 ucnv_close(cnv);
1962}
1963
729e4ab9 1964static void TestCESU8() {
b75a7d8f
A
1965 /* test input */
1966 static const uint8_t in[]={
1967 0x61,
1968 0xc2, 0x80,
1969 0xe0, 0xa0, 0x80,
1970 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1971 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1972 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1973 0xef, 0xbf, 0xbc
1974 };
1975
1976 /* expected test results */
374ca955 1977 static const int32_t results[]={
b75a7d8f
A
1978 /* number of bytes read, code point */
1979 1, 0x61,
1980 2, 0x80,
1981 3, 0x800,
1982 6, 0x10000,
1983 3, 0xdc01,
374ca955
A
1984 -1,0xd802, /* may read 3 or 6 bytes */
1985 -1,0x10ffff,/* may read 0 or 3 bytes */
b75a7d8f
A
1986 3, 0xfffc
1987 };
1988
1989 /* error test input */
1990 static const uint8_t in2[]={
1991 0x61,
1992 0xc0, 0x80, /* illegal non-shortest form */
1993 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1994 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1995 0xc0, 0xc0, /* illegal trail byte */
1996 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */
1997 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */
1998 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */
1999 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
2000 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
2001 0xfe, /* illegal byte altogether */
2002 0x62
2003 };
2004
2005 /* expected error test results */
374ca955 2006 static const int32_t results2[]={
b75a7d8f
A
2007 /* number of bytes read, code point */
2008 1, 0x61,
2009 34, 0x62
2010 };
2011
2012 UConverterToUCallback cb;
2013 const void *p;
2014
2015 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
2016 UErrorCode errorCode=U_ZERO_ERROR;
2017 UConverter *cnv=ucnv_open("CESU-8", &errorCode);
2018 if(U_FAILURE(errorCode)) {
b331163b 2019 log_data_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
b75a7d8f
A
2020 return;
2021 }
2022 TestNextUChar(cnv, source, limit, results, "CESU-8");
2023 /* Test the condition when source >= sourceLimit */
2024 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2025
2026 /* test error behavior with a skip callback */
2027 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2028 source=(const char *)in2;
2029 limit=(const char *)(in2+sizeof(in2));
2030 TestNextUChar(cnv, source, limit, results2, "CESU-8");
2031
2032 ucnv_close(cnv);
2033}
2034
729e4ab9 2035static void TestUTF16() {
b75a7d8f
A
2036 /* test input */
2037 static const uint8_t in1[]={
2038 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
2039 };
2040 static const uint8_t in2[]={
2041 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
2042 };
2043 static const uint8_t in3[]={
2044 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
2045 };
2046
2047 /* expected test results */
374ca955 2048 static const int32_t results1[]={
b75a7d8f
A
2049 /* number of bytes read, code point */
2050 4, 0x4e00,
2051 2, 0xfeff
2052 };
374ca955 2053 static const int32_t results2[]={
b75a7d8f
A
2054 /* number of bytes read, code point */
2055 4, 0x004e,
2056 2, 0xfffe
2057 };
374ca955 2058 static const int32_t results3[]={
b75a7d8f
A
2059 /* number of bytes read, code point */
2060 2, 0xfefe,
2061 2, 0x4e00,
2062 2, 0xfeff,
2063 4, 0x20001
2064 };
2065
2066 const char *source, *limit;
2067
2068 UErrorCode errorCode=U_ZERO_ERROR;
2069 UConverter *cnv=ucnv_open("UTF-16", &errorCode);
2070 if(U_FAILURE(errorCode)) {
2071 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
2072 return;
2073 }
2074
2075 source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2076 TestNextUChar(cnv, source, limit, results1, "UTF-16");
2077
2078 source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2079 ucnv_resetToUnicode(cnv);
2080 TestNextUChar(cnv, source, limit, results2, "UTF-16");
2081
2082 source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2083 ucnv_resetToUnicode(cnv);
2084 TestNextUChar(cnv, source, limit, results3, "UTF-16");
2085
2086 /* Test the condition when source >= sourceLimit */
2087 ucnv_resetToUnicode(cnv);
2088 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2089
2090 ucnv_close(cnv);
2091}
2092
729e4ab9 2093static void TestUTF16BE() {
b75a7d8f
A
2094 /* test input */
2095 static const uint8_t in[]={
2096 0x00, 0x61,
2097 0x00, 0xc0,
2098 0x00, 0x31,
2099 0x00, 0xf4,
2100 0xce, 0xfe,
2101 0xd8, 0x01, 0xdc, 0x01
2102 };
2103
2104 /* expected test results */
374ca955 2105 static const int32_t results[]={
b75a7d8f
A
2106 /* number of bytes read, code point */
2107 2, 0x61,
2108 2, 0xc0,
2109 2, 0x31,
2110 2, 0xf4,
2111 2, 0xcefe,
2112 4, 0x10401
2113 };
2114
2115 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2116 UErrorCode errorCode=U_ZERO_ERROR;
2117 UConverter *cnv=ucnv_open("utf-16be", &errorCode);
2118 if(U_FAILURE(errorCode)) {
2119 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
2120 return;
2121 }
2122 TestNextUChar(cnv, source, limit, results, "UTF-16BE");
2123 /* Test the condition when source >= sourceLimit */
2124 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2125 /*Test for the condition where there is an invalid character*/
2126 {
2127 static const uint8_t source2[]={0x61};
374ca955 2128 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
b75a7d8f
A
2129 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2130 }
374ca955
A
2131#if 0
2132 /*
2133 * Test disabled because currently the UTF-16BE/LE converters are supposed
2134 * to not set errors for unpaired surrogates.
2135 * This may change with
2136 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2137 */
2138
b75a7d8f
A
2139 /*Test for the condition where there is a surrogate pair*/
2140 {
2141 const uint8_t source2[]={0xd8, 0x01};
2142 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2143 }
374ca955 2144#endif
b75a7d8f
A
2145 ucnv_close(cnv);
2146}
2147
2148static void
2149TestUTF16LE() {
2150 /* test input */
2151 static const uint8_t in[]={
2152 0x61, 0x00,
2153 0x31, 0x00,
2154 0x4e, 0x2e,
2155 0x4e, 0x00,
2156 0x01, 0xd8, 0x01, 0xdc
2157 };
2158
2159 /* expected test results */
374ca955 2160 static const int32_t results[]={
b75a7d8f
A
2161 /* number of bytes read, code point */
2162 2, 0x61,
2163 2, 0x31,
2164 2, 0x2e4e,
2165 2, 0x4e,
2166 4, 0x10401
2167 };
2168
2169 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2170 UErrorCode errorCode=U_ZERO_ERROR;
2171 UConverter *cnv=ucnv_open("utf-16le", &errorCode);
2172 if(U_FAILURE(errorCode)) {
2173 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
2174 return;
2175 }
2176 TestNextUChar(cnv, source, limit, results, "UTF-16LE");
2177 /* Test the condition when source >= sourceLimit */
2178 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2179 /*Test for the condition where there is an invalid character*/
2180 {
2181 static const uint8_t source2[]={0x61};
374ca955 2182 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
b75a7d8f
A
2183 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2184 }
374ca955
A
2185#if 0
2186 /*
2187 * Test disabled because currently the UTF-16BE/LE converters are supposed
2188 * to not set errors for unpaired surrogates.
2189 * This may change with
2190 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2191 */
2192
b75a7d8f
A
2193 /*Test for the condition where there is a surrogate character*/
2194 {
2195 static const uint8_t source2[]={0x01, 0xd8};
2196 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2197 }
374ca955 2198#endif
b75a7d8f
A
2199
2200 ucnv_close(cnv);
2201}
2202
729e4ab9 2203static void TestUTF32() {
b75a7d8f
A
2204 /* test input */
2205 static const uint8_t in1[]={
2206 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff
2207 };
2208 static const uint8_t in2[]={
2209 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00
2210 };
2211 static const uint8_t in3[]={
2212 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01
2213 };
2214
2215 /* expected test results */
374ca955 2216 static const int32_t results1[]={
b75a7d8f
A
2217 /* number of bytes read, code point */
2218 8, 0x100f00,
2219 4, 0xfeff
2220 };
374ca955 2221 static const int32_t results2[]={
b75a7d8f
A
2222 /* number of bytes read, code point */
2223 8, 0x0f1000,
2224 4, 0xfffe
2225 };
374ca955 2226 static const int32_t results3[]={
b75a7d8f
A
2227 /* number of bytes read, code point */
2228 4, 0xfefe,
2229 4, 0x100f00,
374ca955
A
2230 4, 0xfffd, /* unmatched surrogate */
2231 4, 0xfffd /* unmatched surrogate */
b75a7d8f
A
2232 };
2233
2234 const char *source, *limit;
2235
2236 UErrorCode errorCode=U_ZERO_ERROR;
2237 UConverter *cnv=ucnv_open("UTF-32", &errorCode);
2238 if(U_FAILURE(errorCode)) {
b331163b 2239 log_data_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
b75a7d8f
A
2240 return;
2241 }
2242
2243 source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2244 TestNextUChar(cnv, source, limit, results1, "UTF-32");
2245
2246 source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2247 ucnv_resetToUnicode(cnv);
2248 TestNextUChar(cnv, source, limit, results2, "UTF-32");
2249
2250 source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2251 ucnv_resetToUnicode(cnv);
2252 TestNextUChar(cnv, source, limit, results3, "UTF-32");
2253
2254 /* Test the condition when source >= sourceLimit */
2255 ucnv_resetToUnicode(cnv);
2256 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2257
2258 ucnv_close(cnv);
2259}
2260
2261static void
2262TestUTF32BE() {
2263 /* test input */
2264 static const uint8_t in[]={
2265 0x00, 0x00, 0x00, 0x61,
374ca955 2266 0x00, 0x00, 0x30, 0x61,
b75a7d8f
A
2267 0x00, 0x00, 0xdc, 0x00,
2268 0x00, 0x00, 0xd8, 0x00,
2269 0x00, 0x00, 0xdf, 0xff,
374ca955 2270 0x00, 0x00, 0xff, 0xfe,
b75a7d8f
A
2271 0x00, 0x10, 0xab, 0xcd,
2272 0x00, 0x10, 0xff, 0xff
2273 };
2274
2275 /* expected test results */
374ca955 2276 static const int32_t results[]={
b75a7d8f
A
2277 /* number of bytes read, code point */
2278 4, 0x61,
374ca955
A
2279 4, 0x3061,
2280 4, 0xfffd,
2281 4, 0xfffd,
b75a7d8f 2282 4, 0xfffd,
374ca955 2283 4, 0xfffe,
b75a7d8f
A
2284 4, 0x10abcd,
2285 4, 0x10ffff
2286 };
2287
2288 /* error test input */
2289 static const uint8_t in2[]={
2290 0x00, 0x00, 0x00, 0x61,
2291 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
2292 0x00, 0x00, 0x00, 0x62,
2293 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2294 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
2295 0x00, 0x00, 0x01, 0x62,
2296 0x00, 0x00, 0x02, 0x62
2297 };
2298
2299 /* expected error test results */
374ca955 2300 static const int32_t results2[]={
b75a7d8f
A
2301 /* number of bytes read, code point */
2302 4, 0x61,
2303 8, 0x62,
2304 12, 0x162,
2305 4, 0x262
2306 };
2307
2308 UConverterToUCallback cb;
2309 const void *p;
2310
2311 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2312 UErrorCode errorCode=U_ZERO_ERROR;
2313 UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
2314 if(U_FAILURE(errorCode)) {
b331163b 2315 log_data_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
b75a7d8f
A
2316 return;
2317 }
2318 TestNextUChar(cnv, source, limit, results, "UTF-32BE");
2319
2320 /* Test the condition when source >= sourceLimit */
2321 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2322
2323 /* test error behavior with a skip callback */
2324 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2325 source=(const char *)in2;
2326 limit=(const char *)(in2+sizeof(in2));
2327 TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
2328
2329 ucnv_close(cnv);
2330}
2331
2332static void
2333TestUTF32LE() {
2334 /* test input */
2335 static const uint8_t in[]={
2336 0x61, 0x00, 0x00, 0x00,
374ca955 2337 0x61, 0x30, 0x00, 0x00,
b75a7d8f
A
2338 0x00, 0xdc, 0x00, 0x00,
2339 0x00, 0xd8, 0x00, 0x00,
2340 0xff, 0xdf, 0x00, 0x00,
374ca955 2341 0xfe, 0xff, 0x00, 0x00,
b75a7d8f
A
2342 0xcd, 0xab, 0x10, 0x00,
2343 0xff, 0xff, 0x10, 0x00
2344 };
2345
2346 /* expected test results */
374ca955 2347 static const int32_t results[]={
b75a7d8f
A
2348 /* number of bytes read, code point */
2349 4, 0x61,
374ca955 2350 4, 0x3061,
b75a7d8f 2351 4, 0xfffd,
374ca955
A
2352 4, 0xfffd,
2353 4, 0xfffd,
2354 4, 0xfffe,
b75a7d8f
A
2355 4, 0x10abcd,
2356 4, 0x10ffff
2357 };
2358
2359 /* error test input */
2360 static const uint8_t in2[]={
2361 0x61, 0x00, 0x00, 0x00,
2362 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
2363 0x62, 0x00, 0x00, 0x00,
2364 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2365 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
2366 0x62, 0x01, 0x00, 0x00,
2367 0x62, 0x02, 0x00, 0x00,
2368 };
2369
2370 /* expected error test results */
374ca955 2371 static const int32_t results2[]={
b75a7d8f
A
2372 /* number of bytes read, code point */
2373 4, 0x61,
2374 8, 0x62,
2375 12, 0x162,
2376 4, 0x262,
2377 };
2378
2379 UConverterToUCallback cb;
2380 const void *p;
2381
2382 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2383 UErrorCode errorCode=U_ZERO_ERROR;
2384 UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
2385 if(U_FAILURE(errorCode)) {
b331163b 2386 log_data_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
b75a7d8f
A
2387 return;
2388 }
2389 TestNextUChar(cnv, source, limit, results, "UTF-32LE");
2390
2391 /* Test the condition when source >= sourceLimit */
2392 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2393
2394 /* test error behavior with a skip callback */
2395 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2396 source=(const char *)in2;
2397 limit=(const char *)(in2+sizeof(in2));
2398 TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
2399
2400 ucnv_close(cnv);
2401}
2402
2403static void
2404TestLATIN1() {
2405 /* test input */
2406 static const uint8_t in[]={
2407 0x61,
2408 0x31,
2409 0x32,
2410 0xc0,
2411 0xf0,
2412 0xf4,
2413 };
2414
2415 /* expected test results */
374ca955 2416 static const int32_t results[]={
b75a7d8f
A
2417 /* number of bytes read, code point */
2418 1, 0x61,
2419 1, 0x31,
2420 1, 0x32,
2421 1, 0xc0,
2422 1, 0xf0,
2423 1, 0xf4,
2424 };
2425 static const uint16_t in1[] = {
2426 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2427 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2428 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2429 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2430 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2431 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2432 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2433 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2434 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2435 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2436 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2437 0xcb, 0x82
2438 };
2439 static const uint8_t out1[] = {
2440 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2441 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2442 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2443 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2444 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2445 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2446 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2447 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2448 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2449 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2450 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2451 0xcb, 0x82
2452 };
2453 static const uint16_t in2[]={
2454 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2455 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2456 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2457 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2458 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2459 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2460 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2461 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2462 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2463 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2464 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2465 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2466 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2467 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2468 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2469 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2470 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2471 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2472 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2473 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2474 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2475 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2476 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2477 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2478 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2479 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2480 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2481 0x37, 0x20, 0x2A, 0x2F,
2482 };
2483 static const unsigned char out2[]={
2484 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2485 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2486 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2487 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2488 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2489 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2490 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2491 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2492 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2493 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2494 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2495 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2496 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2497 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2498 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2499 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2500 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2501 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2502 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2503 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2504 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2505 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2506 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2507 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2508 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2509 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2510 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2511 0x37, 0x20, 0x2A, 0x2F,
2512 };
2513 const char *source=(const char *)in;
2514 const char *limit=(const char *)in+sizeof(in);
2515
2516 UErrorCode errorCode=U_ZERO_ERROR;
2517 UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
2518 if(U_FAILURE(errorCode)) {
729e4ab9 2519 log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
b75a7d8f
A
2520 return;
2521 }
2522 TestNextUChar(cnv, source, limit, results, "LATIN_1");
2523 /* Test the condition when source >= sourceLimit */
2524 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2525 TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
2526 TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
2527
2528 ucnv_close(cnv);
2529}
2530
2531static void
2532TestSBCS() {
2533 /* test input */
2534 static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2535 /* expected test results */
374ca955 2536 static const int32_t results[]={
b75a7d8f
A
2537 /* number of bytes read, code point */
2538 1, 0x61,
2539 1, 0xbf,
2540 1, 0xc4,
2541 1, 0x2021,
2542 1, 0xf8ff,
2543 1, 0x00d9
2544 };
2545
2546 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2547 UErrorCode errorCode=U_ZERO_ERROR;
374ca955 2548 UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode);
b75a7d8f 2549 if(U_FAILURE(errorCode)) {
374ca955 2550 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode));
b75a7d8f
A
2551 return;
2552 }
374ca955 2553 TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)");
b75a7d8f
A
2554 /* Test the condition when source >= sourceLimit */
2555 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2556 /*Test for Illegal character */ /*
2557 {
2558 static const uint8_t input1[]={ 0xA1 };
2559 const char* illegalsource=(const char*)input1;
2560 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2561 }
2562 */
2563 ucnv_close(cnv);
2564}
2565
2566static void
2567TestDBCS() {
2568 /* test input */
2569 static const uint8_t in[]={
2570 0x44, 0x6a,
2571 0xc4, 0x9c,
2572 0x7a, 0x74,
2573 0x46, 0xab,
2574 0x42, 0x5b,
2575
2576 };
2577
2578 /* expected test results */
374ca955 2579 static const int32_t results[]={
b75a7d8f
A
2580 /* number of bytes read, code point */
2581 2, 0x00a7,
2582 2, 0xe1d2,
2583 2, 0x6962,
2584 2, 0xf842,
2585 2, 0xffe5,
2586 };
2587
2588 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2589 UErrorCode errorCode=U_ZERO_ERROR;
2590
2591 UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
2592 if(U_FAILURE(errorCode)) {
2593 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
2594 return;
2595 }
2596 TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
2597 /* Test the condition when source >= sourceLimit */
2598 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
b75a7d8f
A
2599 /*Test for the condition where there is an invalid character*/
2600 {
2601 static const uint8_t source2[]={0x1a, 0x1b};
2602 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2603 }
374ca955
A
2604 /*Test for the condition where we have a truncated char*/
2605 {
2606 static const uint8_t source1[]={0xc4};
2607 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2608 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2609 }
b75a7d8f
A
2610 ucnv_close(cnv);
2611}
2612
2613static void
2614TestMBCS() {
2615 /* test input */
2616 static const uint8_t in[]={
2617 0x01,
2618 0xa6, 0xa3,
2619 0x00,
2620 0xa6, 0xa1,
2621 0x08,
2622 0xc2, 0x76,
2623 0xc2, 0x78,
2624
2625 };
2626
2627 /* expected test results */
374ca955 2628 static const int32_t results[]={
b75a7d8f
A
2629 /* number of bytes read, code point */
2630 1, 0x0001,
2631 2, 0x250c,
2632 1, 0x0000,
2633 2, 0x2500,
2634 1, 0x0008,
2635 2, 0xd60c,
2636 2, 0xd60e,
2637 };
2638
2639 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2640 UErrorCode errorCode=U_ZERO_ERROR;
2641
2642 UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
2643 if(U_FAILURE(errorCode)) {
2644 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
2645 return;
2646 }
2647 TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
2648 /* Test the condition when source >= sourceLimit */
2649 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
b75a7d8f
A
2650 /*Test for the condition where there is an invalid character*/
2651 {
fd0068a8 2652 static const uint8_t source2[]={0xa1, 0x80};
b75a7d8f
A
2653 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2654 }
374ca955
A
2655 /*Test for the condition where we have a truncated char*/
2656 {
2657 static const uint8_t source1[]={0xc4};
2658 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2659 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2660 }
b75a7d8f
A
2661 ucnv_close(cnv);
2662
2663}
2664
729e4ab9
A
2665#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
2666static void
2667TestICCRunout() {
2668/* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */
2669
2670 const char *cnvName = "ibm-1363";
2671 UErrorCode status = U_ZERO_ERROR;
2672 const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 };
2673 /* UChar expectUData[] = { 0x00a1, 0x001a }; */
2674 const char *source = sourceData;
2675 const char *sourceLim = sourceData+sizeof(sourceData);
2676 UChar c1, c2, c3;
2677 UConverter *cnv=ucnv_open(cnvName, &status);
2678 if(U_FAILURE(status)) {
2679 log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status));
2680 return;
2681 }
2682
2683#if 0
2684 {
2685 UChar targetBuf[256];
2686 UChar *target = targetBuf;
2687 UChar *targetLim = target+256;
2688 ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status);
2689
2690 log_info("After convert: target@%d, source@%d, status%s\n",
2691 target-targetBuf, source-sourceData, u_errorName(status));
2692
2693 if(U_FAILURE(status)) {
2694 log_err("Failed to convert: %s\n", u_errorName(status));
2695 } else {
2696
2697 }
2698 }
2699#endif
2700
2701 c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2702 log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status));
2703
2704 c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2705 log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status));
2706
2707 c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2708 log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status));
2709
2710 if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) {
2711 log_verbose("OK\n");
2712 } else {
2713 log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n");
2714 }
2715
2716 ucnv_close(cnv);
2717
2718}
2719#endif
2720
374ca955
A
2721#ifdef U_ENABLE_GENERIC_ISO_2022
2722
b75a7d8f
A
2723static void
2724TestISO_2022() {
2725 /* test input */
2726 static const uint8_t in[]={
374ca955
A
2727 0x1b, 0x25, 0x42,
2728 0x31,
b75a7d8f
A
2729 0x32,
2730 0x61,
2731 0xc2, 0x80,
2732 0xe0, 0xa0, 0x80,
2733 0xf0, 0x90, 0x80, 0x80
2734 };
2735
2736
2737
2738 /* expected test results */
374ca955 2739 static const int32_t results[]={
b75a7d8f 2740 /* number of bytes read, code point */
374ca955 2741 4, 0x0031, /* 4 bytes including the escape sequence */
b75a7d8f
A
2742 1, 0x0032,
2743 1, 0x61,
2744 2, 0x80,
2745 3, 0x800,
374ca955 2746 4, 0x10000
b75a7d8f
A
2747 };
2748
2749 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2750 UErrorCode errorCode=U_ZERO_ERROR;
2751 UConverter *cnv;
2752
2753 cnv=ucnv_open("ISO_2022", &errorCode);
2754 if(U_FAILURE(errorCode)) {
2755 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
2756 return;
2757 }
2758 TestNextUChar(cnv, source, limit, results, "ISO_2022");
2759
2760 /* Test the condition when source >= sourceLimit */
2761 TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
2762 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2763 /*Test for the condition where we have a truncated char*/
2764 {
2765 static const uint8_t source1[]={0xc4};
374ca955 2766 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
b75a7d8f
A
2767 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2768 }
2769 /*Test for the condition where there is an invalid character*/
2770 {
2771 static const uint8_t source2[]={0xa1, 0x01};
374ca955 2772 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
b75a7d8f
A
2773 }
2774 ucnv_close(cnv);
2775}
2776
374ca955
A
2777#endif
2778
b75a7d8f
A
2779static void
2780TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2781 const UChar* uSource;
2782 const UChar* uSourceLimit;
2783 const char* cSource;
2784 const char* cSourceLimit;
2785 UChar *uTargetLimit =NULL;
2786 UChar *uTarget;
2787 char *cTarget;
2788 const char *cTargetLimit;
2789 char *cBuf;
729e4ab9 2790 UChar *uBuf; /*,*test;*/
b75a7d8f
A
2791 int32_t uBufSize = 120;
2792 int len=0;
2793 int i=2;
2794 UErrorCode errorCode=U_ZERO_ERROR;
2795 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2796 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2797 ucnv_reset(cnv);
2798 for(;--i>0; ){
2799 uSource = (UChar*) source;
2800 uSourceLimit=(const UChar*)sourceLimit;
2801 cTarget = cBuf;
2802 uTarget = uBuf;
2803 cSource = cBuf;
2804 cTargetLimit = cBuf;
2805 uTargetLimit = uBuf;
2806
2807 do{
2808
2809 cTargetLimit = cTargetLimit+ i;
2810 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2811 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2812 errorCode=U_ZERO_ERROR;
2813 continue;
2814 }
2815
2816 if(U_FAILURE(errorCode)){
2817 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2818 return;
2819 }
2820
2821 }while (uSource<uSourceLimit);
2822
2823 cSourceLimit =cTarget;
2824 do{
2825 uTargetLimit=uTargetLimit+i;
2826 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2827 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2828 errorCode=U_ZERO_ERROR;
2829 continue;
2830 }
2831 if(U_FAILURE(errorCode)){
2832 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2833 return;
2834 }
2835 }while(cSource<cSourceLimit);
2836
2837 uSource = source;
729e4ab9 2838 /*test =uBuf;*/
b75a7d8f
A
2839 for(len=0;len<(int)(source - sourceLimit);len++){
2840 if(uBuf[len]!=uSource[len]){
2841 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2842 }
2843 }
2844 }
2845 free(uBuf);
2846 free(cBuf);
2847}
2848/* Test for Jitterbug 778 */
2849static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2850 const UChar* uSource;
2851 const UChar* uSourceLimit;
2852 const char* cSource;
2853 UChar *uTargetLimit =NULL;
2854 UChar *uTarget;
2855 char *cTarget;
2856 const char *cTargetLimit;
2857 char *cBuf;
2858 UChar *uBuf,*test;
2859 int32_t uBufSize = 120;
2860 int numCharsInTarget=0;
2861 UErrorCode errorCode=U_ZERO_ERROR;
2862 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2863 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2864 uSource = source;
2865 uSourceLimit=sourceLimit;
2866 cTarget = cBuf;
2867 cTargetLimit = cBuf +uBufSize*5;
2868 uTarget = uBuf;
2869 uTargetLimit = uBuf+ uBufSize*5;
2870 ucnv_reset(cnv);
73c04bcf 2871 numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode);
b75a7d8f
A
2872 if(U_FAILURE(errorCode)){
2873 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2874 return;
2875 }
2876 cSource = cBuf;
2877 test =uBuf;
73c04bcf 2878 ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
b75a7d8f
A
2879 if(U_FAILURE(errorCode)){
2880 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
2881 return;
2882 }
2883 uSource = source;
2884 while(uSource<uSourceLimit){
2885 if(*test!=*uSource){
2886
2887 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2888 }
2889 uSource++;
2890 test++;
2891 }
2892 free(uBuf);
2893 free(cBuf);
2894}
2895
2896static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2897 const UChar* uSource;
2898 const UChar* uSourceLimit;
2899 const char* cSource;
2900 const char* cSourceLimit;
2901 UChar *uTargetLimit =NULL;
2902 UChar *uTarget;
2903 char *cTarget;
2904 const char *cTargetLimit;
2905 char *cBuf;
729e4ab9 2906 UChar *uBuf; /*,*test;*/
b75a7d8f
A
2907 int32_t uBufSize = 120;
2908 int len=0;
2909 int i=2;
2910 const UChar *temp = sourceLimit;
2911 UErrorCode errorCode=U_ZERO_ERROR;
2912 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2913 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2914
2915 ucnv_reset(cnv);
2916 for(;--i>0;){
2917 uSource = (UChar*) source;
2918 cTarget = cBuf;
2919 uTarget = uBuf;
2920 cSource = cBuf;
2921 cTargetLimit = cBuf;
2922 uTargetLimit = uBuf+uBufSize*5;
2923 cTargetLimit = cTargetLimit+uBufSize*10;
2924 uSourceLimit=uSource;
2925 do{
2926
2927 if (uSourceLimit < sourceLimit) {
2928 uSourceLimit = uSourceLimit+1;
2929 }
2930 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2931 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2932 errorCode=U_ZERO_ERROR;
2933 continue;
2934 }
2935
2936 if(U_FAILURE(errorCode)){
2937 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2938 return;
2939 }
2940
2941 }while (uSource<temp);
2942
2943 cSourceLimit =cBuf;
2944 do{
2945 if (cSourceLimit < cBuf + (cTarget - cBuf)) {
2946 cSourceLimit = cSourceLimit+1;
2947 }
2948 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2949 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2950 errorCode=U_ZERO_ERROR;
2951 continue;
2952 }
2953 if(U_FAILURE(errorCode)){
2954 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2955 return;
2956 }
2957 }while(cSource<cTarget);
2958
2959 uSource = source;
729e4ab9 2960 /*test =uBuf;*/
b75a7d8f
A
2961 for(;len<(int)(source - sourceLimit);len++){
2962 if(uBuf[len]!=uSource[len]){
2963 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2964 }
2965 }
2966 }
2967 free(uBuf);
2968 free(cBuf);
2969}
2970static void
2971TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
2972 const uint16_t results[], const char* message){
729e4ab9 2973/* const char* s0; */
b75a7d8f
A
2974 const char* s=(char*)source;
2975 const uint16_t *r=results;
2976 UErrorCode errorCode=U_ZERO_ERROR;
2977 uint32_t c,exC;
2978 ucnv_reset(cnv);
2979 while(s<limit) {
729e4ab9 2980 /* s0=s; */
b75a7d8f
A
2981 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
2982 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2983 break; /* no more significant input */
2984 } else if(U_FAILURE(errorCode)) {
2985 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
2986 break;
2987 } else {
4388f060 2988 if(U16_IS_LEAD(*r)){
b75a7d8f 2989 int i =0, len = 2;
4388f060 2990 U16_NEXT(r, i, len, exC);
b75a7d8f
A
2991 r++;
2992 }else{
2993 exC = *r;
2994 }
2995 if(c!=(uint32_t)(exC))
2996 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message,(uint32_t) (*r),c);
2997 }
2998 r++;
2999 }
3000}
3001
3002static int TestJitterbug930(const char* enc){
3003 UErrorCode err = U_ZERO_ERROR;
3004 UConverter*converter;
3005 char out[80];
3006 char*target = out;
3007 UChar in[4];
3008 const UChar*source = in;
3009 int32_t off[80];
3010 int32_t* offsets = off;
3011 int numOffWritten=0;
3012 UBool flush = 0;
3013 converter = my_ucnv_open(enc, &err);
3014
3015 in[0] = 0x41; /* 0x4E00;*/
3016 in[1] = 0x4E01;
3017 in[2] = 0x4E02;
3018 in[3] = 0x4E03;
3019
3020 memset(off, '*', sizeof(off));
3021
3022 ucnv_fromUnicode (converter,
3023 &target,
3024 target+2,
3025 &source,
3026 source+3,
3027 offsets,
3028 flush,
3029 &err);
3030
3031 /* writes three bytes into the output buffer: 41 1B 24
3032 * but offsets contains 0 1 1
3033 */
3034 while(*offsets< off[10]){
3035 numOffWritten++;
3036 offsets++;
3037 }
3038 log_verbose("Testing Jitterbug 930 for encoding %s",enc);
3039 if(numOffWritten!= (int)(target-out)){
3040 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
3041 }
3042
3043 err = U_ZERO_ERROR;
3044
3045 memset(off,'*' , sizeof(off));
3046
3047 flush = 1;
3048 offsets=off;
3049 ucnv_fromUnicode (converter,
3050 &target,
3051 target+4,
3052 &source,
3053 source,
3054 offsets,
3055 flush,
3056 &err);
3057 numOffWritten=0;
3058 while(*offsets< off[10]){
3059 numOffWritten++;
3060 if(*offsets!= -1){
3061 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
3062 }
3063 offsets++;
3064 }
3065
3066 /* writes 42 43 7A into output buffer,
3067 * offsets contains -1 -1 -1
3068 */
3069 ucnv_close(converter);
3070 return 0;
3071}
3072
3073static void
3074TestHZ() {
3075 /* test input */
3076 static const uint16_t in[]={
3077 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
3078 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
3079 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
3080 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
3081 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
3082 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
3083 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
3084 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
3085 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
3086 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
3087 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
3088 0x005A, 0x005B, 0x005C, 0x000A
3089 };
3090 const UChar* uSource;
3091 const UChar* uSourceLimit;
3092 const char* cSource;
3093 const char* cSourceLimit;
3094 UChar *uTargetLimit =NULL;
3095 UChar *uTarget;
3096 char *cTarget;
3097 const char *cTargetLimit;
3098 char *cBuf;
3099 UChar *uBuf,*test;
3100 int32_t uBufSize = 120;
3101 UErrorCode errorCode=U_ZERO_ERROR;
3102 UConverter *cnv;
3103 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3104 int32_t* myOff= offsets;
3105 cnv=ucnv_open("HZ", &errorCode);
3106 if(U_FAILURE(errorCode)) {
3107 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
3108 return;
3109 }
3110
3111 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3112 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
73c04bcf 3113 uSource = (const UChar*)in;
2ca993e8 3114 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
b75a7d8f
A
3115 cTarget = cBuf;
3116 cTargetLimit = cBuf +uBufSize*5;
3117 uTarget = uBuf;
3118 uTargetLimit = uBuf+ uBufSize*5;
3119 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3120 if(U_FAILURE(errorCode)){
3121 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3122 return;
3123 }
3124 cSource = cBuf;
3125 cSourceLimit =cTarget;
3126 test =uBuf;
3127 myOff=offsets;
3128 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3129 if(U_FAILURE(errorCode)){
3130 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3131 return;
3132 }
73c04bcf 3133 uSource = (const UChar*)in;
b75a7d8f
A
3134 while(uSource<uSourceLimit){
3135 if(*test!=*uSource){
3136
3137 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3138 }
3139 uSource++;
3140 test++;
3141 }
3142 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
2ca993e8
A
3143 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3144 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3145 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
b75a7d8f
A
3146 TestJitterbug930("csISO2022JP");
3147 ucnv_close(cnv);
3148 free(offsets);
3149 free(uBuf);
3150 free(cBuf);
3151}
3152
3153static void
3154TestISCII(){
3155 /* test input */
3156 static const uint16_t in[]={
3157 /* test full range of Devanagari */
3158 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3159 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3160 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3161 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3162 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3163 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3164 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3165 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3166 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3167 0x096D,0x096E,0x096F,
3168 /* test Soft halant*/
3169 0x0915,0x094d, 0x200D,
3170 /* test explicit halant */
3171 0x0915,0x094d, 0x200c,
3172 /* test double danda */
3173 0x965,
3174 /* test ASCII */
3175 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3176 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3177 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3178 /* tests from Lotus */
3179 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3180 0x0930,0x094D,0x200D,
3181 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3182 0x0915,0x0921,0x002B,0x095F,
3183 /* tamil range */
3184 0x0B86, 0xB87, 0xB88,
3185 /* telugu range */
3186 0x0C05, 0x0C02, 0x0C03,0x0c31,
3187 /* kannada range */
3188 0x0C85, 0xC82, 0x0C83,
3189 /* test Abbr sign and Anudatta */
3190 0x0970, 0x952,
3191 /* 0x0958,
3192 0x0959,
3193 0x095A,
3194 0x095B,
3195 0x095C,
3196 0x095D,
3197 0x095E,
3198 0x095F,*/
3199 0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3200 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3201 0x090C ,
3202 0x0962,
3203 0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3204 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3205 0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3206 0x093D /* Avagraha 0xEA, 0xE9*/,
3207 0x0958,
3208 0x0959,
3209 0x095A,
3210 0x095B,
3211 0x095C,
3212 0x095D,
3213 0x095E,
3214 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3215 };
3216 static const unsigned char byteArr[]={
3217
3218 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3219 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3220 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3221 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3222 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3223 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3224 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3225 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3226 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3227 0xf8,0xf9,0xfa,
3228 /* test soft halant */
3229 0xb3, 0xE8, 0xE9,
3230 /* test explicit halant */
3231 0xb3, 0xE8, 0xE8,
3232 /* test double danda */
3233 0xea, 0xea,
3234 /* test ASCII */
3235 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3236 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3237 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3238 /* test ATR code */
3239
3240 /* tests from Lotus */
3241 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3242 0xEF,0x42,0xCF,0xE8,0xD9,
3243 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3244 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3245 /* tamil range */
3246 0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3247 /* telugu range */
3248 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3249 /* kannada range */
3250 0xEF, 0x48,0xa4, 0xa2, 0xa3,
3251 /* anudatta and abbreviation sign */
3252 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3253
3254
3255 0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3256
3257 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3258
3259 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3260
3261 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3262
3263 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3264
3265 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3266
3267 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3268
3269 0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3270
3271 0xB3, 0xE9, /* Ka + NUKTA */
3272
3273 0xB4, 0xE9, /* Kha + NUKTA */
3274
3275 0xB5, 0xE9, /* Ga + NUKTA */
3276
3277 0xBA, 0xE9,
3278
3279 0xBF, 0xE9,
3280
3281 0xC0, 0xE9,
3282
3283 0xC9, 0xE9,
3284 /* INV halant RA */
3285 0xD9, 0xE8, 0xCF,
3286 0x00, 0x00A0,
3287 /* just consume unhandled codepoints */
3288 0xEF, 0x30,
3289
3290 };
2ca993e8 3291 testConvertToU(byteArr,(sizeof(byteArr)),in,UPRV_LENGTHOF(in),"x-iscii-de",NULL,TRUE);
b75a7d8f
A
3292 TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
3293
3294}
3295
3296static void
3297TestISO_2022_JP() {
3298 /* test input */
3299 static const uint16_t in[]={
3300 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3301 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3302 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3303 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
46f4442e 3304 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
b75a7d8f
A
3305 0x201D, 0x3014, 0x000D, 0x000A,
3306 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3307 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3308 };
3309 const UChar* uSource;
3310 const UChar* uSourceLimit;
3311 const char* cSource;
3312 const char* cSourceLimit;
3313 UChar *uTargetLimit =NULL;
3314 UChar *uTarget;
3315 char *cTarget;
3316 const char *cTargetLimit;
3317 char *cBuf;
3318 UChar *uBuf,*test;
3319 int32_t uBufSize = 120;
3320 UErrorCode errorCode=U_ZERO_ERROR;
3321 UConverter *cnv;
3322 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3323 int32_t* myOff= offsets;
3324 cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3325 if(U_FAILURE(errorCode)) {
374ca955 3326 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode));
b75a7d8f
A
3327 return;
3328 }
3329
3330 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3331 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
73c04bcf 3332 uSource = (const UChar*)in;
2ca993e8 3333 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
b75a7d8f
A
3334 cTarget = cBuf;
3335 cTargetLimit = cBuf +uBufSize*5;
3336 uTarget = uBuf;
3337 uTargetLimit = uBuf+ uBufSize*5;
3338 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3339 if(U_FAILURE(errorCode)){
3340 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3341 return;
3342 }
3343 cSource = cBuf;
3344 cSourceLimit =cTarget;
3345 test =uBuf;
3346 myOff=offsets;
3347 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3348 if(U_FAILURE(errorCode)){
3349 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3350 return;
3351 }
3352
73c04bcf 3353 uSource = (const UChar*)in;
b75a7d8f
A
3354 while(uSource<uSourceLimit){
3355 if(*test!=*uSource){
3356
3357 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3358 }
3359 uSource++;
3360 test++;
3361 }
3362
2ca993e8
A
3363 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3364 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
b75a7d8f 3365 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
2ca993e8 3366 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
b75a7d8f
A
3367 TestJitterbug930("csISO2022JP");
3368 ucnv_close(cnv);
3369 free(uBuf);
3370 free(cBuf);
3371 free(offsets);
3372}
3373
3374static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
3375 const UChar* uSource;
3376 const UChar* uSourceLimit;
3377 const char* cSource;
3378 const char* cSourceLimit;
3379 UChar *uTargetLimit =NULL;
3380 UChar *uTarget;
3381 char *cTarget;
3382 const char *cTargetLimit;
3383 char *cBuf;
3384 UChar *uBuf,*test;
3385 int32_t uBufSize = 120*10;
3386 UErrorCode errorCode=U_ZERO_ERROR;
3387 UConverter *cnv;
3388 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
3389 int32_t* myOff= offsets;
3390 cnv=my_ucnv_open(conv, &errorCode);
3391 if(U_FAILURE(errorCode)) {
3392 log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
3393 return;
3394 }
3395
3396 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
3397 cBuf =(char*)malloc(uBufSize * sizeof(char));
73c04bcf 3398 uSource = (const UChar*)in;
b75a7d8f
A
3399 uSourceLimit=uSource+len;
3400 cTarget = cBuf;
3401 cTargetLimit = cBuf +uBufSize;
3402 uTarget = uBuf;
3403 uTargetLimit = uBuf+ uBufSize;
3404 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3405 if(U_FAILURE(errorCode)){
3406 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3407 return;
3408 }
3409 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3410 cSource = cBuf;
3411 cSourceLimit =cTarget;
3412 test =uBuf;
3413 myOff=offsets;
3414 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3415 if(U_FAILURE(errorCode)){
3416 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
3417 return;
3418 }
3419
73c04bcf 3420 uSource = (const UChar*)in;
b75a7d8f
A
3421 while(uSource<uSourceLimit){
3422 if(*test!=*uSource){
3423 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
3424 }
3425 uSource++;
3426 test++;
3427 }
73c04bcf
A
3428 TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv);
3429 TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv);
b75a7d8f
A
3430 TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
3431 if(byteArr && byteArrLen!=0){
3432 TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
73c04bcf 3433 TestToAndFromUChars(in,(const UChar*)&in[len],cnv);
b75a7d8f
A
3434 {
3435 cSource = byteArr;
3436 cSourceLimit = cSource+byteArrLen;
3437 test=uBuf;
3438 myOff = offsets;
3439 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3440 if(U_FAILURE(errorCode)){
3441 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3442 return;
3443 }
3444
73c04bcf 3445 uSource = (const UChar*)in;
b75a7d8f
A
3446 while(uSource<uSourceLimit){
3447 if(*test!=*uSource){
3448 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3449 }
3450 uSource++;
3451 test++;
3452 }
3453 }
3454 }
3455
3456 ucnv_close(cnv);
3457 free(uBuf);
3458 free(cBuf);
3459 free(offsets);
3460}
3461static UChar U_CALLCONV
3462_charAt(int32_t offset, void *context) {
3463 return ((char*)context)[offset];
3464}
3465
3466static int32_t
3467unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
3468 int32_t srcIndex=0;
3469 int32_t dstIndex=0;
3470 if(U_FAILURE(*status)){
3471 return 0;
3472 }
3473 if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
3474 *status = U_ILLEGAL_ARGUMENT_ERROR;
3475 return 0;
3476 }
3477 if(srcLen==-1){
73c04bcf 3478 srcLen = (int32_t)uprv_strlen(src);
b75a7d8f
A
3479 }
3480
3481 for (; srcIndex<srcLen; ) {
3482 UChar32 c = src[srcIndex++];
3483 if (c == 0x005C /*'\\'*/) {
3484 c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
3485 if (c == (UChar32)0xFFFFFFFF) {
3486 *status=U_INVALID_CHAR_FOUND; /* return empty string */
3487 break; /* invalid escape sequence */
3488 }
3489 }
3490 if(dstIndex < dstLen){
3491 if(c>0xFFFF){
4388f060 3492 dst[dstIndex++] = U16_LEAD(c);
b75a7d8f 3493 if(dstIndex<dstLen){
4388f060 3494 dst[dstIndex]=U16_TRAIL(c);
b75a7d8f
A
3495 }else{
3496 *status=U_BUFFER_OVERFLOW_ERROR;
3497 }
3498 }else{
3499 dst[dstIndex]=(UChar)c;
3500 }
3501
3502 }else{
3503 *status = U_BUFFER_OVERFLOW_ERROR;
3504 }
3505 dstIndex++; /* for preflighting */
3506 }
3507 return dstIndex;
3508}
3509
3510static void
3511TestFullRoundtrip(const char* cp){
3512 UChar usource[10] ={0};
3513 UChar nsrc[10] = {0};
3514 uint32_t i=1;
3515 int len=0, ulen;
3516 nsrc[0]=0x0061;
3517 /* Test codepoint 0 */
3518 TestConv(usource,1,cp,"",NULL,0);
3519 TestConv(usource,2,cp,"",NULL,0);
3520 nsrc[2]=0x5555;
3521 TestConv(nsrc,3,cp,"",NULL,0);
3522
3523 for(;i<=0x10FFFF;i++){
3524 if(i==0xD800){
3525 i=0xDFFF;
3526 continue;
3527 }
3528 if(i<=0xFFFF){
3529 usource[0] =(UChar) i;
3530 len=1;
3531 }else{
4388f060
A
3532 usource[0]=U16_LEAD(i);
3533 usource[1]=U16_TRAIL(i);
b75a7d8f
A
3534 len=2;
3535 }
3536 ulen=len;
3537 if(i==0x80) {
3538 usource[2]=0;
3539 }
3540 /* Test only single code points */
3541 TestConv(usource,ulen,cp,"",NULL,0);
3542 /* Test codepoint repeated twice */
3543 usource[ulen]=usource[0];
3544 usource[ulen+1]=usource[1];
3545 ulen+=len;
3546 TestConv(usource,ulen,cp,"",NULL,0);
3547 /* Test codepoint repeated 3 times */
3548 usource[ulen]=usource[0];
3549 usource[ulen+1]=usource[1];
3550 ulen+=len;
3551 TestConv(usource,ulen,cp,"",NULL,0);
3552 /* Test codepoint in between 2 codepoints */
3553 nsrc[1]=usource[0];
3554 nsrc[2]=usource[1];
3555 nsrc[len+1]=0x5555;
3556 TestConv(nsrc,len+2,cp,"",NULL,0);
3557 uprv_memset(usource,0,sizeof(UChar)*10);
3558 }
3559}
3560
3561static void
3562TestRoundTrippingAllUTF(void){
729e4ab9 3563 if(!getTestOption(QUICK_OPTION)){
b75a7d8f
A
3564 log_verbose("Running exhaustive round trip test for BOCU-1\n");
3565 TestFullRoundtrip("BOCU-1");
3566 log_verbose("Running exhaustive round trip test for SCSU\n");
3567 TestFullRoundtrip("SCSU");
3568 log_verbose("Running exhaustive round trip test for UTF-8\n");
3569 TestFullRoundtrip("UTF-8");
3570 log_verbose("Running exhaustive round trip test for CESU-8\n");
3571 TestFullRoundtrip("CESU-8");
3572 log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3573 TestFullRoundtrip("UTF-16BE");
3574 log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3575 TestFullRoundtrip("UTF-16LE");
3576 log_verbose("Running exhaustive round trip test for UTF-16\n");
3577 TestFullRoundtrip("UTF-16");
3578 log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3579 TestFullRoundtrip("UTF-32BE");
3580 log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3581 TestFullRoundtrip("UTF-32LE");
3582 log_verbose("Running exhaustive round trip test for UTF-32\n");
3583 TestFullRoundtrip("UTF-32");
3584 log_verbose("Running exhaustive round trip test for UTF-7\n");
3585 TestFullRoundtrip("UTF-7");
3586 log_verbose("Running exhaustive round trip test for UTF-7\n");
3587 TestFullRoundtrip("UTF-7,version=1");
3588 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3589 TestFullRoundtrip("IMAP-mailbox-name");
4388f060
A
3590 /*
3591 *
3592 * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of
3593 * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA).
3594 * The old mappings remain as fallbacks.
3595 * This test may be reintroduced at a later time.
3596 *
3597 * 110118 - mow
3598 */
3599 /*
3600 log_verbose("Running exhaustive round trip test for GB18030\n");
3601 TestFullRoundtrip("GB18030");
3602 */
b75a7d8f
A
3603 }
3604}
3605
3606static void
3607TestSCSU() {
3608
3609 static const uint16_t germanUTF16[]={
3610 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3611 };
3612
3613 static const uint8_t germanSCSU[]={
3614 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3615 };
3616
3617 static const uint16_t russianUTF16[]={
3618 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3619 };
3620
3621 static const uint8_t russianSCSU[]={
3622 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3623 };
3624
3625 static const uint16_t japaneseUTF16[]={
3626 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3627 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3628 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3629 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3630 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3631 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3632 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3633 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3634 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3635 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3636 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3637 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3638 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3639 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3640 0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3641 };
3642
3643 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3644 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3645 static const uint8_t japaneseSCSU[]={
3646 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3647 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3648 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3649 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3650 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3651 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3652 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3653 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3654 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3655 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3656 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3657 0xcb, 0x82
3658 };
3659
3660 static const uint16_t allFeaturesUTF16[]={
3661 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3662 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3663 0x01df, 0xf000, 0xdbff, 0xdfff
3664 };
3665
3666 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3667 * result here (34B vs. 35B)
3668 */
3669 static const uint8_t allFeaturesSCSU[]={
3670 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3671 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3672 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3673 0xdf, 0x14, 0x80, 0x15, 0xff
3674 };
3675 static const uint16_t monkeyIn[]={
3676 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3677 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3678 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3679 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3680 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3681 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3682 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3683 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3684 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3685 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3686 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3687 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3688 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3689 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3690 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3691 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3692 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3693 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3694 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3695 /* test non-BMP code points */
3696 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3697 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3698 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3699 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3700 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3701 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3702 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3703 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3704 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3705 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3706 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3707
3708
3709 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3710 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3711 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3712 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3713 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3714 };
3715 static const char *fTestCases [] = {
3716 "\\ud800\\udc00", /* smallest surrogate*/
3717 "\\ud8ff\\udcff",
3718 "\\udBff\\udFff", /* largest surrogate pair*/
3719 "\\ud834\\udc00",
3720 "\\U0010FFFF",
3721 "Hello \\u9292 \\u9192 World!",
3722 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3723 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3724
3725 "\\u0648\\u06c8", /* catch missing reset*/
3726 "\\u0648\\u06c8",
3727
3728 "\\u4444\\uE001", /* lowest quotable*/
3729 "\\u4444\\uf2FF", /* highest quotable*/
3730 "\\u4444\\uf188\\u4444",
3731 "\\u4444\\uf188\\uf288",
3732 "\\u4444\\uf188abc\\u0429\\uf288",
3733 "\\u9292\\u2222",
3734 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3735 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3736 "Hello World!123456",
3737 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3738
3739 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/
3740 "abc\\u4411d", /* uses SQU*/
3741 "abc\\u4411\\u4412d",/* uses SCU*/
3742 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3743 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3744 "\\u9292\\u2222",
3745 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3746 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3747 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3748
3749 "", /* empty input*/
3750 "\\u0000", /* smallest BMP character*/
3751 "\\uFFFF", /* largest BMP character*/
3752
3753 /* regression tests*/
3754 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3755 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3756 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3757 "\\u0041\\u00df\\u0401\\u015f",
3758 "\\u9066\\u2123abc",
3759 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3760 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3761 };
3762 int i=0;
2ca993e8 3763 for(;i<UPRV_LENGTHOF(fTestCases);i++){
b75a7d8f
A
3764 const char* cSrc = fTestCases[i];
3765 UErrorCode status = U_ZERO_ERROR;
3766 int32_t cSrcLen,srcLen;
3767 UChar* src;
3768 /* UConverter* cnv = ucnv_open("SCSU",&status); */
73c04bcf 3769 cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]);
b75a7d8f
A
3770 src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
3771 srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
3772 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
3773 TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
3774 free(src);
3775 }
3776 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3777 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3778 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3779 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3780 TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
3781 TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
3782 TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
3783}
73c04bcf
A
3784
3785#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
3786static void TestJitterbug2346(){
3787 char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3788 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3789 uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3790
3791 UChar uTarget[500]={'\0'};
3792 UChar* utarget=uTarget;
3793 UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
3794
3795 char cTarget[500]={'\0'};
3796 char* ctarget=cTarget;
3797 char* ctargetLimit=cTarget+sizeof(cTarget);
3798 const char* csource=source;
3799 UChar* temp = expected;
3800 UErrorCode err=U_ZERO_ERROR;
3801
3802 UConverter* conv =ucnv_open("ISO_2022_JP",&err);
3803 if(U_FAILURE(err)) {
3804 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
3805 return;
3806 }
3807 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err);
3808 if(U_FAILURE(err)) {
3809 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
3810 return;
3811 }
3812 utargetLimit=utarget;
3813 utarget = uTarget;
3814 while(utarget<utargetLimit){
3815 if(*temp!=*utarget){
3816
3817 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
3818 }
3819 utarget++;
3820 temp++;
3821 }
3822 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
3823 if(U_FAILURE(err)) {
3824 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
3825 return;
3826 }
3827 ctargetLimit=ctarget;
3828 ctarget =cTarget;
3829 ucnv_close(conv);
3830
3831
3832}
73c04bcf 3833
b75a7d8f
A
3834static void
3835TestISO_2022_JP_1() {
3836 /* test input */
3837 static const uint16_t in[]={
3838 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3839 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3840 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3841 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3842 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
46f4442e 3843 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
b75a7d8f
A
3844 0x201D, 0x000D, 0x000A,
3845 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3846 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3847 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3848 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3849 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3850 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3851 };
3852 const UChar* uSource;
3853 const UChar* uSourceLimit;
3854 const char* cSource;
3855 const char* cSourceLimit;
3856 UChar *uTargetLimit =NULL;
3857 UChar *uTarget;
3858 char *cTarget;
3859 const char *cTargetLimit;
3860 char *cBuf;
3861 UChar *uBuf,*test;
3862 int32_t uBufSize = 120;
3863 UErrorCode errorCode=U_ZERO_ERROR;
3864 UConverter *cnv;
3865
3866 cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3867 if(U_FAILURE(errorCode)) {
3868 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3869 return;
3870 }
3871
3872 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3873 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
73c04bcf 3874 uSource = (const UChar*)in;
2ca993e8 3875 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
b75a7d8f
A
3876 cTarget = cBuf;
3877 cTargetLimit = cBuf +uBufSize*5;
3878 uTarget = uBuf;
3879 uTargetLimit = uBuf+ uBufSize*5;
3880 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode);
3881 if(U_FAILURE(errorCode)){
3882 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3883 return;
3884 }
3885 cSource = cBuf;
3886 cSourceLimit =cTarget;
3887 test =uBuf;
3888 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode);
3889 if(U_FAILURE(errorCode)){
3890 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3891 return;
3892 }
73c04bcf 3893 uSource = (const UChar*)in;
b75a7d8f
A
3894 while(uSource<uSourceLimit){
3895 if(*test!=*uSource){
3896
3897 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3898 }
3899 uSource++;
3900 test++;
3901 }
3902 /*ucnv_close(cnv);
3903 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3904 /*Test for the condition where there is an invalid character*/
3905 ucnv_reset(cnv);
3906 {
3907 static const uint8_t source2[]={0x0e,0x24,0x053};
3908 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
3909 }
2ca993e8
A
3910 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3911 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
b75a7d8f
A
3912 ucnv_close(cnv);
3913 free(uBuf);
3914 free(cBuf);
3915}
3916
3917static void
3918TestISO_2022_JP_2() {
3919 /* test input */
3920 static const uint16_t in[]={
3921 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3922 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3923 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3924 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3925 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3926 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3927 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3928 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3929 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3930 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3931 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3932 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3933 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3934 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3935 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3936 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3937 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3938 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3939 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3940 };
3941 const UChar* uSource;
3942 const UChar* uSourceLimit;
3943 const char* cSource;
3944 const char* cSourceLimit;
3945 UChar *uTargetLimit =NULL;
3946 UChar *uTarget;
3947 char *cTarget;
3948 const char *cTargetLimit;
3949 char *cBuf;
3950 UChar *uBuf,*test;
3951 int32_t uBufSize = 120;
3952 UErrorCode errorCode=U_ZERO_ERROR;
3953 UConverter *cnv;
3954 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3955 int32_t* myOff= offsets;
3956 cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
3957 if(U_FAILURE(errorCode)) {
3958 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3959 return;
3960 }
3961
3962 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3963 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
73c04bcf 3964 uSource = (const UChar*)in;
2ca993e8 3965 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
b75a7d8f
A
3966 cTarget = cBuf;
3967 cTargetLimit = cBuf +uBufSize*5;
3968 uTarget = uBuf;
3969 uTargetLimit = uBuf+ uBufSize*5;
3970 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3971 if(U_FAILURE(errorCode)){
3972 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3973 return;
3974 }
3975 cSource = cBuf;
3976 cSourceLimit =cTarget;
3977 test =uBuf;
3978 myOff=offsets;
3979 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3980 if(U_FAILURE(errorCode)){
3981 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3982 return;
3983 }
73c04bcf 3984 uSource = (const UChar*)in;
b75a7d8f
A
3985 while(uSource<uSourceLimit){
3986 if(*test!=*uSource){
3987
3988 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3989 }
3990 uSource++;
3991 test++;
3992 }
2ca993e8
A
3993 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3994 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3995 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
b75a7d8f
A
3996 /*Test for the condition where there is an invalid character*/
3997 ucnv_reset(cnv);
3998 {
3999 static const uint8_t source2[]={0x0e,0x24,0x053};
4000 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
4001 }
4002 ucnv_close(cnv);
4003 free(uBuf);
4004 free(cBuf);
4005 free(offsets);
4006}
4007
4008static void
4009TestISO_2022_KR() {
4010 /* test input */
4011 static const uint16_t in[]={
fd0068a8
A
4012 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4013 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
b75a7d8f
A
4014 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4015 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
fd0068a8 4016 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
b75a7d8f
A
4017 ,0x53E3,0x53E4,0x000A,0x000D};
4018 const UChar* uSource;
4019 const UChar* uSourceLimit;
4020 const char* cSource;
4021 const char* cSourceLimit;
4022 UChar *uTargetLimit =NULL;
4023 UChar *uTarget;
4024 char *cTarget;
4025 const char *cTargetLimit;
4026 char *cBuf;
4027 UChar *uBuf,*test;
4028 int32_t uBufSize = 120;
4029 UErrorCode errorCode=U_ZERO_ERROR;
4030 UConverter *cnv;
4031 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4032 int32_t* myOff= offsets;
4033 cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
4034 if(U_FAILURE(errorCode)) {
4035 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4036 return;
4037 }
4038
4039 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4040 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
73c04bcf 4041 uSource = (const UChar*)in;
2ca993e8 4042 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
b75a7d8f
A
4043 cTarget = cBuf;
4044 cTargetLimit = cBuf +uBufSize*5;
4045 uTarget = uBuf;
4046 uTargetLimit = uBuf+ uBufSize*5;
4047 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4048 if(U_FAILURE(errorCode)){
4049 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4050 return;
4051 }
4052 cSource = cBuf;
4053 cSourceLimit =cTarget;
4054 test =uBuf;
4055 myOff=offsets;
4056 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4057 if(U_FAILURE(errorCode)){
4058 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4059 return;
4060 }
73c04bcf 4061 uSource = (const UChar*)in;
b75a7d8f
A
4062 while(uSource<uSourceLimit){
4063 if(*test!=*uSource){
4064 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4065 }
4066 uSource++;
4067 test++;
4068 }
4069 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
2ca993e8
A
4070 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4071 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4072 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
b75a7d8f
A
4073 TestJitterbug930("csISO2022KR");
4074 /*Test for the condition where there is an invalid character*/
4075 ucnv_reset(cnv);
4076 {
4077 static const uint8_t source2[]={0x1b,0x24,0x053};
374ca955 4078 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
b75a7d8f
A
4079 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4080 }
4081 ucnv_close(cnv);
4082 free(uBuf);
4083 free(cBuf);
4084 free(offsets);
4085}
4086
4087static void
4088TestISO_2022_KR_1() {
4089 /* test input */
4090 static const uint16_t in[]={
4091 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4092 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4093 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4094 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4095 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4096 ,0x53E3,0x53E4,0x000A,0x000D};
4097 const UChar* uSource;
4098 const UChar* uSourceLimit;
4099 const char* cSource;
4100 const char* cSourceLimit;
4101 UChar *uTargetLimit =NULL;
4102 UChar *uTarget;
4103 char *cTarget;
4104 const char *cTargetLimit;
4105 char *cBuf;
4106 UChar *uBuf,*test;
4107 int32_t uBufSize = 120;
4108 UErrorCode errorCode=U_ZERO_ERROR;
4109 UConverter *cnv;
4110 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4111 int32_t* myOff= offsets;
4112 cnv=ucnv_open("ibm-25546", &errorCode);
4113 if(U_FAILURE(errorCode)) {
4114 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4115 return;
4116 }
4117
4118 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4119 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
73c04bcf 4120 uSource = (const UChar*)in;
2ca993e8 4121 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
b75a7d8f
A
4122 cTarget = cBuf;
4123 cTargetLimit = cBuf +uBufSize*5;
4124 uTarget = uBuf;
4125 uTargetLimit = uBuf+ uBufSize*5;
4126 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4127 if(U_FAILURE(errorCode)){
4128 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4129 return;
4130 }
4131 cSource = cBuf;
4132 cSourceLimit =cTarget;
4133 test =uBuf;
4134 myOff=offsets;
4135 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4136 if(U_FAILURE(errorCode)){
4137 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4138 return;
4139 }
73c04bcf 4140 uSource = (const UChar*)in;
b75a7d8f
A
4141 while(uSource<uSourceLimit){
4142 if(*test!=*uSource){
4143 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4144 }
4145 uSource++;
4146 test++;
4147 }
4148 ucnv_reset(cnv);
4149 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
2ca993e8
A
4150 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4151 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
b75a7d8f 4152 ucnv_reset(cnv);
2ca993e8 4153 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
b75a7d8f
A
4154 /*Test for the condition where there is an invalid character*/
4155 ucnv_reset(cnv);
4156 {
4157 static const uint8_t source2[]={0x1b,0x24,0x053};
374ca955 4158 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
b75a7d8f
A
4159 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4160 }
4161 ucnv_close(cnv);
4162 free(uBuf);
4163 free(cBuf);
4164 free(offsets);
4165}
4166
4167static void TestJitterbug2411(){
73c04bcf 4168 static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
b75a7d8f
A
4169 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4170 UConverter* kr=NULL, *kr1=NULL;
4171 UErrorCode errorCode = U_ZERO_ERROR;
4172 UChar tgt[100]={'\0'};
4173 UChar* target = tgt;
4174 UChar* targetLimit = target+100;
4175 kr=ucnv_open("iso-2022-kr", &errorCode);
4176 if(U_FAILURE(errorCode)) {
4177 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
4178 return;
4179 }
4180 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4181 if(U_FAILURE(errorCode)) {
4182 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4183 return;
4184 }
4185 kr1 = ucnv_open("ibm-25546", &errorCode);
4186 if(U_FAILURE(errorCode)) {
4187 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
4188 return;
4189 }
4190 target = tgt;
4191 targetLimit = target+100;
4192 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4193
4194 if(U_FAILURE(errorCode)) {
4195 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4196 return;
4197 }
4198
4199 ucnv_close(kr);
4200 ucnv_close(kr1);
4201
4202}
4203
4204static void
4205TestJIS(){
374ca955 4206 /* From Unicode moved to testdata/conversion.txt */
b75a7d8f
A
4207 /*To Unicode*/
4208 {
73c04bcf 4209 static const uint8_t sampleTextJIS[] = {
b75a7d8f
A
4210 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4211 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4212 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4213 };
73c04bcf 4214 static const uint16_t expectedISO2022JIS[] = {
b75a7d8f
A
4215 0x0041, 0x0042,
4216 0xFF81, 0xFF82,
4217 0x3000
4218 };
73c04bcf 4219 static const int32_t toISO2022JISOffs[]={
b75a7d8f
A
4220 3,4,
4221 8,9,
4222 16
4223 };
4224
73c04bcf 4225 static const uint8_t sampleTextJIS7[] = {
b75a7d8f
A
4226 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4227 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4228 0x1b,0x24,0x42,0x21,0x21,
4229 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */
4230 0x21,0x22,
4231 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4232 };
73c04bcf 4233 static const uint16_t expectedISO2022JIS7[] = {
b75a7d8f
A
4234 0x0041, 0x0042,
4235 0xFF81, 0xFF82,
4236 0x3000,
4237 0xFF81, 0xFF82,
4238 0x3001,
4239 0x3000
4240 };
73c04bcf 4241 static const int32_t toISO2022JIS7Offs[]={
b75a7d8f
A
4242 3,4,
4243 8,9,
4244 13,16,
4245 17,
4246 19,27
4247 };
73c04bcf 4248 static const uint8_t sampleTextJIS8[] = {
b75a7d8f
A
4249 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4250 0xa1,0xc8,0xd9,/*Katakana Set*/
4251 0x1b,0x28,0x42,
4252 0x41,0x42,
4253 0xb1,0xc3, /*Katakana Set*/
4254 0x1b,0x24,0x42,0x21,0x21
4255 };
73c04bcf 4256 static const uint16_t expectedISO2022JIS8[] = {
b75a7d8f
A
4257 0x0041, 0x0042,
4258 0xff61, 0xff88, 0xff99,
4259 0x0041, 0x0042,
4260 0xff71, 0xff83,
4261 0x3000
4262 };
73c04bcf 4263 static const int32_t toISO2022JIS8Offs[]={
b75a7d8f
A
4264 3, 4, 5, 6,
4265 7, 11, 12, 13,
4266 14, 18,
4267 };
4268
4269 testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
2ca993e8 4270 UPRV_LENGTHOF(expectedISO2022JIS),"JIS", toISO2022JISOffs,TRUE);
b75a7d8f 4271 testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
2ca993e8 4272 UPRV_LENGTHOF(expectedISO2022JIS7),"JIS7", toISO2022JIS7Offs,TRUE);
b75a7d8f 4273 testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
2ca993e8 4274 UPRV_LENGTHOF(expectedISO2022JIS8),"JIS8", toISO2022JIS8Offs,TRUE);
b75a7d8f
A
4275 }
4276
4277}
4278
729e4ab9
A
4279
4280#if 0
4281 ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
4282
b75a7d8f
A
4283static void TestJitterbug915(){
4284/* tests for roundtripping of the below sequence
4285\x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * /
4286\x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4287\x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4288\x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4289\x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4290\x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4291\x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4292*/
73c04bcf 4293 static const char cSource[]={
b75a7d8f
A
4294 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4295 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4296 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4297 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4298 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4299 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
374ca955 4300 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
b75a7d8f
A
4301 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4302 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4303 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
374ca955 4304 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
b75a7d8f
A
4305 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4306 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4307 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
374ca955 4308 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
b75a7d8f
A
4309 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4310 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4311 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
374ca955 4312 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
b75a7d8f
A
4313 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4314 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4315 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
374ca955 4316 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
b75a7d8f
A
4317 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4318 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4319 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
374ca955
A
4320 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4321 0x37, 0x20, 0x2A, 0x2F
b75a7d8f
A
4322 };
4323 UChar uTarget[500]={'\0'};
4324 UChar* utarget=uTarget;
4325 UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
4326
4327 char cTarget[500]={'\0'};
4328 char* ctarget=cTarget;
4329 char* ctargetLimit=cTarget+sizeof(cTarget);
4330 const char* csource=cSource;
73c04bcf 4331 const char* tempSrc = cSource;
b75a7d8f
A
4332 UErrorCode err=U_ZERO_ERROR;
4333
4334 UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
4335 if(U_FAILURE(err)) {
4336 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
4337 return;
4338 }
4339 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err);
4340 if(U_FAILURE(err)) {
4341 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
4342 return;
4343 }
4344 utargetLimit=utarget;
4345 utarget = uTarget;
4346 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
4347 if(U_FAILURE(err)) {
4348 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
4349 return;
4350 }
4351 ctargetLimit=ctarget;
4352 ctarget =cTarget;
4353 while(ctarget<ctargetLimit){
374ca955
A
4354 if(*ctarget != *tempSrc){
4355 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ;
b75a7d8f 4356 }
374ca955
A
4357 ++ctarget;
4358 ++tempSrc;
b75a7d8f
A
4359 }
4360
4361 ucnv_close(conv);
4362}
4363
4364static void
4365TestISO_2022_CN_EXT() {
4366 /* test input */
4367 static const uint16_t in[]={
4368 /* test Non-BMP code points */
4369 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4370 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4371 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4372 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4373 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4374 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4375 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4376 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4377 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4378 0xD869, 0xDED5,
4379
4380 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4381 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4382 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4383 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4384 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4385 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4386 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4387 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4388 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4389 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4390 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4391 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4392 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4393 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4394 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4395 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4396 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4397 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4398
4399 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4400
4401 };
4402
4403 const UChar* uSource;
4404 const UChar* uSourceLimit;
4405 const char* cSource;
4406 const char* cSourceLimit;
4407 UChar *uTargetLimit =NULL;
4408 UChar *uTarget;
4409 char *cTarget;
4410 const char *cTargetLimit;
4411 char *cBuf;
4412 UChar *uBuf,*test;
4413 int32_t uBufSize = 180;
4414 UErrorCode errorCode=U_ZERO_ERROR;
4415 UConverter *cnv;
4416 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4417 int32_t* myOff= offsets;
4418 cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
4419 if(U_FAILURE(errorCode)) {
4420 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4421 return;
4422 }
4423
4424 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4425 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
73c04bcf 4426 uSource = (const UChar*)in;
2ca993e8 4427 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
b75a7d8f
A
4428 cTarget = cBuf;
4429 cTargetLimit = cBuf +uBufSize*5;
4430 uTarget = uBuf;
4431 uTargetLimit = uBuf+ uBufSize*5;
4432 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4433 if(U_FAILURE(errorCode)){
4434 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4435 return;
4436 }
4437 cSource = cBuf;
4438 cSourceLimit =cTarget;
4439 test =uBuf;
4440 myOff=offsets;
4441 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4442 if(U_FAILURE(errorCode)){
4443 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4444 return;
4445 }
73c04bcf 4446 uSource = (const UChar*)in;
b75a7d8f
A
4447 while(uSource<uSourceLimit){
4448 if(*test!=*uSource){
4449 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4450 }
4451 else{
4452 log_verbose(" Got: \\u%04X\n",(int)*test) ;
4453 }
4454 uSource++;
4455 test++;
4456 }
2ca993e8
A
4457 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4458 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
b75a7d8f
A
4459 /*Test for the condition where there is an invalid character*/
4460 ucnv_reset(cnv);
4461 {
4462 static const uint8_t source2[]={0x0e,0x24,0x053};
4463 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
4464 }
4465 ucnv_close(cnv);
4466 free(uBuf);
4467 free(cBuf);
4468 free(offsets);
4469}
729e4ab9 4470#endif
b75a7d8f
A
4471
4472static void
4473TestISO_2022_CN() {
4474 /* test input */
4475 static const uint16_t in[]={
4476 /* jitterbug 951 */
4477 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4478 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4479 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4480 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4481 0x0020, 0x0045, 0x004e, 0x0044,
4482 /**/
4483 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4484 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4485 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4486 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4487 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4488 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4489 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4490 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4491 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4492 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4493 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4494 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4495 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4496 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4497 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4498 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4499 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4500
4501 };
4502 const UChar* uSource;
4503 const UChar* uSourceLimit;
4504 const char* cSource;
4505 const char* cSourceLimit;
4506 UChar *uTargetLimit =NULL;
4507 UChar *uTarget;
4508 char *cTarget;
4509 const char *cTargetLimit;
4510 char *cBuf;
4511 UChar *uBuf,*test;
4512 int32_t uBufSize = 180;
4513 UErrorCode errorCode=U_ZERO_ERROR;
4514 UConverter *cnv;
4515 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4516 int32_t* myOff= offsets;
4517 cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
4518 if(U_FAILURE(errorCode)) {
4519 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4520 return;
4521 }
4522
4523 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4524 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
73c04bcf 4525 uSource = (const UChar*)in;
2ca993e8 4526 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
b75a7d8f
A
4527 cTarget = cBuf;
4528 cTargetLimit = cBuf +uBufSize*5;
4529 uTarget = uBuf;
4530 uTargetLimit = uBuf+ uBufSize*5;
4531 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4532 if(U_FAILURE(errorCode)){
4533 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4534 return;
4535 }
4536 cSource = cBuf;
4537 cSourceLimit =cTarget;
4538 test =uBuf;
4539 myOff=offsets;
4540 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4541 if(U_FAILURE(errorCode)){
4542 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4543 return;
4544 }
73c04bcf 4545 uSource = (const UChar*)in;
b75a7d8f
A
4546 while(uSource<uSourceLimit){
4547 if(*test!=*uSource){
4548 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4549 }
4550 else{
4551 log_verbose(" Got: \\u%04X\n",(int)*test) ;
4552 }
4553 uSource++;
4554 test++;
4555 }
4556 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
2ca993e8
A
4557 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4558 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4559 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
b75a7d8f
A
4560 TestJitterbug930("csISO2022CN");
4561 /*Test for the condition where there is an invalid character*/
4562 ucnv_reset(cnv);
4563 {
4564 static const uint8_t source2[]={0x0e,0x24,0x053};
4565 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
4566 }
4567
4568 ucnv_close(cnv);
4569 free(uBuf);
4570 free(cBuf);
4571 free(offsets);
4572}
4573
d5d484b0
A
4574/* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
4575typedef struct {
4576 const char * converterName;
4577 const char * inputText;
4578 int inputTextLength;
4579} EmptySegmentTest;
4580
4581/* Callback for TestJitterbug6175, should only get called for empty segment errors */
4582static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits,
4583 int32_t length, UConverterCallbackReason reason, UErrorCode * err ) {
46f4442e 4584 if (reason > UCNV_IRREGULAR) {
d5d484b0 4585 return;
46f4442e
A
4586 }
4587 if (reason != UCNV_IRREGULAR) {
d5d484b0 4588 log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
46f4442e 4589 }
d5d484b0
A
4590 /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
4591 *err = U_ZERO_ERROR;
4592 ucnv_cbToUWriteSub(toArgs,0,err);
4593}
4594
4595enum { kEmptySegmentToUCharsMax = 64 };
4596static void TestJitterbug6175(void) {
4597 static const char iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
4598 static const char iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
4599 static const char iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
4600 static const char iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
4601 static const char hzGB2312_a[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
4602 static const EmptySegmentTest emptySegmentTests[] = {
4603 /* converterName inputText inputTextLength */
4604 { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) },
4605 { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) },
4606 { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) },
4607 { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) },
4608 { "HZ-GB-2312", hzGB2312_a, sizeof(hzGB2312_a) },
4609 /* terminator: */
4610 { NULL, NULL, 0, }
4611 };
4612 const EmptySegmentTest * testPtr;
4613 for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) {
4614 UErrorCode err = U_ZERO_ERROR;
4615 UConverter * cnv = ucnv_open(testPtr->converterName, &err);
4616 if (U_FAILURE(err)) {
4617 log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err));
4618 return;
4619 }
4620 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err);
4621 if (U_FAILURE(err)) {
4622 log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err));
4623 ucnv_close(cnv);
4624 return;
4625 }
4626 {
4627 UChar toUChars[kEmptySegmentToUCharsMax];
4628 UChar * toUCharsPtr = toUChars;
4629 const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax;
4630 const char * inCharsPtr = testPtr->inputText;
4631 const char * inCharsLimit = inCharsPtr + testPtr->inputTextLength;
4632 ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err);
4633 }
4634 ucnv_close(cnv);
4635 }
4636}
4637
b75a7d8f
A
4638static void
4639TestEBCDIC_STATEFUL() {
4640 /* test input */
4641 static const uint8_t in[]={
4642 0x61,
4643 0x1a,
4644 0x0f, 0x4b,
4645 0x42,
4646 0x40,
4647 0x36,
4648 };
4649
4650 /* expected test results */
374ca955 4651 static const int32_t results[]={
b75a7d8f
A
4652 /* number of bytes read, code point */
4653 1, 0x002f,
4654 1, 0x0092,
4655 2, 0x002e,
4656 1, 0xff62,
4657 1, 0x0020,
4658 1, 0x0096,
4659
4660 };
4661 static const uint8_t in2[]={
4662 0x0f,
4663 0xa1,
4664 0x01
4665 };
4666
4667 /* expected test results */
374ca955 4668 static const int32_t results2[]={
b75a7d8f
A
4669 /* number of bytes read, code point */
4670 2, 0x203E,
4671 1, 0x0001,
4672 };
4673
4674 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
4675 UErrorCode errorCode=U_ZERO_ERROR;
4676 UConverter *cnv=ucnv_open("ibm-930", &errorCode);
4677 if(U_FAILURE(errorCode)) {
4678 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
4679 return;
4680 }
4681 TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
4682 ucnv_reset(cnv);
4683 /* Test the condition when source >= sourceLimit */
4684 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
4685 ucnv_reset(cnv);
4686 /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4687 {
4688 static const uint8_t source1[]={0x0f};
4689 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
4690 }
4691 /*Test for the condition where there is an invalid character*/
4692 ucnv_reset(cnv);
4693 {
4694 static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
4695 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
4696 }
4697 ucnv_reset(cnv);
4698 source=(const char*)in2;
4699 limit=(const char*)in2+sizeof(in2);
4700 TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
4701 ucnv_close(cnv);
4702
4703}
4704
4705static void
4706TestGB18030() {
4707 /* test input */
4708 static const uint8_t in[]={
4709 0x24,
4710 0x7f,
4711 0x81, 0x30, 0x81, 0x30,
4712 0xa8, 0xbf,
4713 0xa2, 0xe3,
4714 0xd2, 0xbb,
4715 0x82, 0x35, 0x8f, 0x33,
4716 0x84, 0x31, 0xa4, 0x39,
4717 0x90, 0x30, 0x81, 0x30,
4718 0xe3, 0x32, 0x9a, 0x35
4719#if 0
4720 /*
4721 * Feature removed markus 2000-oct-26
4722 * Only some codepages must match surrogate pairs into supplementary code points -
4723 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4724 * GB 18030 provides direct encodings for supplementary code points, therefore
4725 * it must not combine two single-encoded surrogates into one code point.
4726 */
4727 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4728#endif
4729 };
4730
4731 /* expected test results */
374ca955 4732 static const int32_t results[]={
b75a7d8f
A
4733 /* number of bytes read, code point */
4734 1, 0x24,
4735 1, 0x7f,
4736 4, 0x80,
4737 2, 0x1f9,
4738 2, 0x20ac,
4739 2, 0x4e00,
4740 4, 0x9fa6,
4741 4, 0xffff,
4742 4, 0x10000,
4743 4, 0x10ffff
4744#if 0
4745 /* Feature removed. See comment above. */
4746 8, 0x10000
4747#endif
4748 };
4749
4750/* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4751 UErrorCode errorCode=U_ZERO_ERROR;
4752 UConverter *cnv=ucnv_open("gb18030", &errorCode);
4753 if(U_FAILURE(errorCode)) {
4754 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
4755 return;
4756 }
4757 TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
4758 ucnv_close(cnv);
4759}
4760
4761static void
4762TestLMBCS() {
4763 /* LMBCS-1 string */
4764 static const uint8_t pszLMBCS[]={
4765 0x61,
4766 0x01, 0x29,
4767 0x81,
4768 0xA0,
4769 0x0F, 0x27,
4770 0x0F, 0x91,
4771 0x14, 0x0a, 0x74,
4772 0x14, 0xF6, 0x02,
4773 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4774 0x10, 0x88, 0xA0,
4775 };
4776
4777 /* Unicode UChar32 equivalents */
4778 static const UChar32 pszUnicode32[]={
4779 /* code point */
4780 0x00000061,
4781 0x00002013,
4782 0x000000FC,
4783 0x000000E1,
4784 0x00000007,
4785 0x00000091,
4786 0x00000a74,
4787 0x00000200,
4788 0x00023456, /* code point for surrogate pair */
4789 0x00005516
4790 };
4791
4792/* Unicode UChar equivalents */
4793 static const UChar pszUnicode[]={
4794 /* code point */
4795 0x0061,
4796 0x2013,
4797 0x00FC,
4798 0x00E1,
4799 0x0007,
4800 0x0091,
4801 0x0a74,
4802 0x0200,
4803 0xD84D, /* low surrogate */
4804 0xDC56, /* high surrogate */
4805 0x5516
4806 };
4807
4808/* expected test results */
4809 static const int offsets32[]={
4810 /* number of bytes read, code point */
4811 0,
4812 1,
4813 3,
4814 4,
4815 5,
4816 7,
4817 9,
4818 12,
4819 15,
4820 21,
4821 24
4822 };
4823
4824/* expected test results */
4825 static const int offsets[]={
4826 /* number of bytes read, code point */
4827 0,
4828 1,
4829 3,
4830 4,
4831 5,
4832 7,
4833 9,
4834 12,
4835 15,
4836 18,
4837 21,
4838 24
4839 };
4840
4841
4842 UConverter *cnv;
4843
4844#define NAME_LMBCS_1 "LMBCS-1"
4845#define NAME_LMBCS_2 "LMBCS-2"
4846
4847
4848 /* Some basic open/close/property tests on some LMBCS converters */
4849 {
4850
4851 char expected_subchars[] = {0x3F}; /* ANSI Question Mark */
4852 char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/
4853 char get_subchars [1];
4854 const char * get_name;
4855 UConverter *cnv1;
4856 UConverter *cnv2;
4857
4858 int8_t len = sizeof(get_subchars);
4859
4860 UErrorCode errorCode=U_ZERO_ERROR;
4861
4862 /* Open */
4863 cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
4864 if(U_FAILURE(errorCode)) {
4865 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4866 return;
4867 }
4868 cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
4869 if(U_FAILURE(errorCode)) {
4870 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
4871 return;
4872 }
4873
4874 /* Name */
4875 get_name = ucnv_getName (cnv1, &errorCode);
4876 if (strcmp(NAME_LMBCS_1,get_name)){
4877 log_err("Unexpected converter name: %s\n", get_name);
4878 }
4879 get_name = ucnv_getName (cnv2, &errorCode);
4880 if (strcmp(NAME_LMBCS_2,get_name)){
4881 log_err("Unexpected converter name: %s\n", get_name);
4882 }
4883
4884 /* substitution chars */
4885 ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
4886 if(U_FAILURE(errorCode)) {
4887 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4888 }
4889 if (len!=1){
4890 log_err("Unexpected length of sub chars\n");
4891 }
4892 if (get_subchars[0] != expected_subchars[0]){
4893 log_err("Unexpected value of sub chars\n");
4894 }
4895 ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
4896 if(U_FAILURE(errorCode)) {
4897 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
4898 }
4899 ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
4900 if(U_FAILURE(errorCode)) {
4901 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4902 }
4903 if (len!=1){
4904 log_err("Unexpected length of sub chars\n");
4905 }
4906 if (get_subchars[0] != new_subchars[0]){
4907 log_err("Unexpected value of sub chars\n");
4908 }
4909 ucnv_close(cnv1);
4910 ucnv_close(cnv2);
4911
4912 }
4913
4914 /* LMBCS to Unicode - offsets */
4915 {
4916 UErrorCode errorCode=U_ZERO_ERROR;
4917
73c04bcf
A
4918 const char * pSource = (const char *)pszLMBCS;
4919 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
b75a7d8f
A
4920
4921 UChar Out [sizeof(pszUnicode) + 1];
4922 UChar * pOut = Out;
2ca993e8 4923 UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode);
b75a7d8f
A
4924
4925 int32_t off [sizeof(offsets)];
4926
4927 /* last 'offset' in expected results is just the final size.
4928 (Makes other tests easier). Compensate here: */
4929
2ca993e8 4930 off[UPRV_LENGTHOF(offsets)-1] = sizeof(pszLMBCS);
b75a7d8f
A
4931
4932
4933
4934 cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */
4935 if(U_FAILURE(errorCode)) {
4936 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
4937 return;
4938 }
4939
4940
4941
4942 ucnv_toUnicode (cnv,
4943 &pOut,
4944 OutLimit,
73c04bcf
A
4945 &pSource,
4946 sourceLimit,
b75a7d8f
A
4947 off,
4948 TRUE,
4949 &errorCode);
4950
4951
4952 if (memcmp(off,offsets,sizeof(offsets)))
4953 {
4954 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4955 }
4956 if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
4957 {
4958 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4959 }
4960 ucnv_close(cnv);
4961 }
4962 {
4963 /* LMBCS to Unicode - getNextUChar */
4964 const char * sourceStart;
4965 const char *source=(const char *)pszLMBCS;
4966 const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
4967 const UChar32 *results= pszUnicode32;
4968 const int *off = offsets32;
4969
4970 UErrorCode errorCode=U_ZERO_ERROR;
4971 UChar32 uniChar;
4972
4973 cnv=ucnv_open("LMBCS-1", &errorCode);
4974 if(U_FAILURE(errorCode)) {
4975 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4976 return;
4977 }
4978 else
4979 {
4980
4981 while(source<limit) {
4982 sourceStart=source;
4983 uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
4984 if(U_FAILURE(errorCode)) {
4985 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
4986 break;
4987 } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
4988 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
4989 uniChar, (source-sourceStart), *results, *off);
4990 break;
4991 }
4992 results++;
4993 off++;
4994 }
4995 }
4996 ucnv_close(cnv);
4997 }
4998 { /* test locale & optimization group operations: Unicode to LMBCS */
4999
5000 UErrorCode errorCode=U_ZERO_ERROR;
5001 UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
5002 UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
5003 UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
5004 UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */
5005 const UChar * pUniOut = uniString;
5006 UChar * pUniIn = uniString;
5007 uint8_t lmbcsString [4];
73c04bcf
A
5008 const char * pLMBCSOut = (const char *)lmbcsString;
5009 char * pLMBCSIn = (char *)lmbcsString;
b75a7d8f
A
5010
5011 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
5012 ucnv_fromUnicode (cnv16he,
2ca993e8
A
5013 &pLMBCSIn, (pLMBCSIn + UPRV_LENGTHOF(lmbcsString)),
5014 &pUniOut, pUniOut + UPRV_LENGTHOF(uniString),
b75a7d8f
A
5015 NULL, 1, &errorCode);
5016
5017 if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
5018 {
5019 log_err("LMBCS-16,locale=he gives unexpected translation\n");
5020 }
5021
73c04bcf 5022 pLMBCSIn= (char *)lmbcsString;
b75a7d8f
A
5023 pUniOut = uniString;
5024 ucnv_fromUnicode (cnv01us,
2ca993e8
A
5025 &pLMBCSIn, (const char *)(lmbcsString + UPRV_LENGTHOF(lmbcsString)),
5026 &pUniOut, pUniOut + UPRV_LENGTHOF(uniString),
b75a7d8f
A
5027 NULL, 1, &errorCode);
5028
5029 if (lmbcsString[0] != 0x9F)
5030 {
5031 log_err("LMBCS-1,locale=US gives unexpected translation\n");
5032 }
5033
5034 /* single byte char from mbcs char set */
5035 lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */
73c04bcf 5036 pLMBCSOut = (const char *)lmbcsString;
b75a7d8f
A
5037 pUniIn = uniString;
5038 ucnv_toUnicode (cnv16jp,
5039 &pUniIn, pUniIn + 1,
73c04bcf 5040 &pLMBCSOut, (pLMBCSOut + 1),
b75a7d8f 5041 NULL, 1, &errorCode);
73c04bcf 5042 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
b75a7d8f
A
5043 {
5044 log_err("Unexpected results from LMBCS-16 single byte char\n");
5045 }
5046 /* convert to group 1: should be 3 bytes */
73c04bcf 5047 pLMBCSIn = (char *)lmbcsString;
b75a7d8f
A
5048 pUniOut = uniString;
5049 ucnv_fromUnicode (cnv01us,
73c04bcf 5050 &pLMBCSIn, (const char *)(pLMBCSIn + 3),
b75a7d8f
A
5051 &pUniOut, pUniOut + 1,
5052 NULL, 1, &errorCode);
73c04bcf 5053 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1
b75a7d8f
A
5054 || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
5055 {
5056 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
5057 }
73c04bcf 5058 pLMBCSOut = (const char *)lmbcsString;
b75a7d8f
A
5059 pUniIn = uniString;
5060 ucnv_toUnicode (cnv01us,
5061 &pUniIn, pUniIn + 1,
73c04bcf 5062 &pLMBCSOut, (const char *)(pLMBCSOut + 3),
b75a7d8f 5063 NULL, 1, &errorCode);
73c04bcf 5064 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
b75a7d8f
A
5065 {
5066 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
5067 }
73c04bcf 5068 pLMBCSIn = (char *)lmbcsString;
b75a7d8f
A
5069 pUniOut = uniString;
5070 ucnv_fromUnicode (cnv16jp,
73c04bcf 5071 &pLMBCSIn, (const char *)(pLMBCSIn + 1),
b75a7d8f
A
5072 &pUniOut, pUniOut + 1,
5073 NULL, 1, &errorCode);
73c04bcf 5074 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
b75a7d8f
A
5075 {
5076 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
5077 }
5078 ucnv_close(cnv16he);
5079 ucnv_close(cnv16jp);
5080 ucnv_close(cnv01us);
5081 }
5082 {
5083 /* Small source buffer testing, LMBCS -> Unicode */
5084
5085 UErrorCode errorCode=U_ZERO_ERROR;
5086
73c04bcf
A
5087 const char * pSource = (const char *)pszLMBCS;
5088 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
b75a7d8f
A
5089 int codepointCount = 0;
5090
5091 UChar Out [sizeof(pszUnicode) + 1];
5092 UChar * pOut = Out;
2ca993e8 5093 UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode);
b75a7d8f
A
5094
5095
5096 cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
5097 if(U_FAILURE(errorCode)) {
5098 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
5099 return;
5100 }
5101
5102
5103 while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
5104 {
5105 ucnv_toUnicode (cnv,
5106 &pOut,
5107 OutLimit,
73c04bcf
A
5108 &pSource,
5109 (pSource+1), /* claim that this is a 1- byte buffer */
b75a7d8f
A
5110 NULL,
5111 FALSE, /* FALSE means there might be more chars in the next buffer */
5112 &errorCode);
5113
5114 if (U_SUCCESS (errorCode))
5115 {
73c04bcf 5116 if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1])
b75a7d8f
A
5117 {
5118 /* we are on to the next code point: check value */
5119
5120 if (Out[0] != pszUnicode[codepointCount]){
5121 log_err("LMBCS->Uni result %lx should have been %lx \n",
5122 Out[0], pszUnicode[codepointCount]);
5123 }
5124
5125 pOut = Out; /* reset for accumulating next code point */
5126 codepointCount++;
5127 }
5128 }
5129 else
5130 {
5131 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
5132 }
5133 }
5134 {
5135 /* limits & surrogate error testing */
73c04bcf
A
5136 char LIn [sizeof(pszLMBCS)];
5137 const char * pLIn = LIn;
b75a7d8f
A
5138
5139 char LOut [sizeof(pszLMBCS)];
5140 char * pLOut = LOut;
5141
5142 UChar UOut [sizeof(pszUnicode)];
5143 UChar * pUOut = UOut;
5144
5145 UChar UIn [sizeof(pszUnicode)];
5146 const UChar * pUIn = UIn;
5147
5148 int32_t off [sizeof(offsets)];
5149 UChar32 uniChar;
5150
5151 errorCode=U_ZERO_ERROR;
5152
5153 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
729e4ab9
A
5154 pUIn++;
5155 ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode);
b75a7d8f
A
5156 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5157 {
5158 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
5159 }
729e4ab9
A
5160 pUIn--;
5161
b75a7d8f
A
5162 errorCode=U_ZERO_ERROR;
5163 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode);
5164 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5165 {
5166 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
5167 }
5168 errorCode=U_ZERO_ERROR;
5169
5170 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
5171 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5172 {
5173 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
5174 }
5175 errorCode=U_ZERO_ERROR;
5176
5177 /* 0 byte source request - no error, no pointer movement */
5178 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode);
5179 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode);
5180 if(U_FAILURE(errorCode)) {
5181 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
5182 }
5183 if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
5184 {
5185 log_err("Unexpected pointer move in 0 byte source request \n");
5186 }
5187 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5188 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
374ca955 5189 if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
b75a7d8f
A
5190 {
5191 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
5192 }
5193 if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5194 {
5195 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5196 }
5197 errorCode = U_ZERO_ERROR;
5198
5199 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5200
5201 pUIn = pszUnicode;
2ca993e8 5202 ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+UPRV_LENGTHOF(pszUnicode),off,FALSE, &errorCode);
b75a7d8f
A
5203 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
5204 {
5205 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5206 }
5207
5208 errorCode = U_ZERO_ERROR;
5209
73c04bcf
A
5210 pLIn = (const char *)pszLMBCS;
5211 ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode);
5212 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4])
b75a7d8f
A
5213 {
5214 log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5215 }
5216
5217 /* unpaired or chopped LMBCS surrogates */
5218
5219 /* OK high surrogate, Low surrogate is chopped */
73c04bcf
A
5220 LIn [0] = (char)0x14;
5221 LIn [1] = (char)0xD8;
5222 LIn [2] = (char)0x01;
5223 LIn [3] = (char)0x14;
5224 LIn [4] = (char)0xDC;
b75a7d8f
A
5225 pLIn = LIn;
5226 errorCode = U_ZERO_ERROR;
5227 pUOut = UOut;
5228
374ca955 5229 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2ca993e8 5230 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
b75a7d8f
A
5231 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5232 {
5233 log_err("Unexpected results on chopped low surrogate\n");
5234 }
5235
5236 /* chopped at surrogate boundary */
73c04bcf
A
5237 LIn [0] = (char)0x14;
5238 LIn [1] = (char)0xD8;
5239 LIn [2] = (char)0x01;
b75a7d8f
A
5240 pLIn = LIn;
5241 errorCode = U_ZERO_ERROR;
5242 pUOut = UOut;
5243
2ca993e8 5244 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode);
b75a7d8f
A
5245 if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
5246 {
5247 log_err("Unexpected results on chopped at surrogate boundary \n");
5248 }
5249
5250 /* unpaired surrogate plus valid Unichar */
73c04bcf
A
5251 LIn [0] = (char)0x14;
5252 LIn [1] = (char)0xD8;
5253 LIn [2] = (char)0x01;
5254 LIn [3] = (char)0x14;
5255 LIn [4] = (char)0xC9;
5256 LIn [5] = (char)0xD0;
b75a7d8f
A
5257 pLIn = LIn;
5258 errorCode = U_ZERO_ERROR;
5259 pUOut = UOut;
5260
2ca993e8 5261 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode);
b75a7d8f
A
5262 if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
5263 {
5264 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5265 }
5266
5267 /* unpaired surrogate plus chopped Unichar */
73c04bcf
A
5268 LIn [0] = (char)0x14;
5269 LIn [1] = (char)0xD8;
5270 LIn [2] = (char)0x01;
5271 LIn [3] = (char)0x14;
5272 LIn [4] = (char)0xC9;
b75a7d8f
A
5273
5274 pLIn = LIn;
5275 errorCode = U_ZERO_ERROR;
5276 pUOut = UOut;
5277
2ca993e8 5278 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
b75a7d8f
A
5279 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5280 {
5281 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5282 }
5283
5284 /* unpaired surrogate plus valid non-Unichar */
73c04bcf
A
5285 LIn [0] = (char)0x14;
5286 LIn [1] = (char)0xD8;
5287 LIn [2] = (char)0x01;
5288 LIn [3] = (char)0x0F;
5289 LIn [4] = (char)0x3B;
b75a7d8f
A
5290
5291 pLIn = LIn;
5292 errorCode = U_ZERO_ERROR;
5293 pUOut = UOut;
5294
2ca993e8 5295 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
b75a7d8f
A
5296 if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
5297 {
5298 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5299 }
5300
5301 /* unpaired surrogate plus chopped non-Unichar */
73c04bcf
A
5302 LIn [0] = (char)0x14;
5303 LIn [1] = (char)0xD8;
5304 LIn [2] = (char)0x01;
5305 LIn [3] = (char)0x0F;
b75a7d8f
A
5306
5307 pLIn = LIn;
5308 errorCode = U_ZERO_ERROR;
5309 pUOut = UOut;
5310
2ca993e8 5311 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode);
b75a7d8f
A
5312
5313 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
5314 {
5315 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5316 }
5317 }
5318 }
5319 ucnv_close(cnv); /* final cleanup */
5320}
5321
5322
5323static void TestJitterbug255()
5324{
73c04bcf
A
5325 static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5326 const char *testBuffer = (const char *)testBytes;
5327 const char *testEnd = (const char *)testBytes + sizeof(testBytes);
b75a7d8f 5328 UErrorCode status = U_ZERO_ERROR;
729e4ab9 5329 /*UChar32 result;*/
b75a7d8f
A
5330 UConverter *cnv = 0;
5331
5332 cnv = ucnv_open("shift-jis", &status);
5333 if (U_FAILURE(status) || cnv == 0) {
5334 log_data_err("Failed to open the converter for SJIS.\n");
5335 return;
5336 }
5337 while (testBuffer != testEnd)
5338 {
729e4ab9 5339 /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status);
b75a7d8f
A
5340 if (U_FAILURE(status))
5341 {
5342 log_err("Failed to convert the next UChar for SJIS.\n");
5343 break;
5344 }
5345 }
5346 ucnv_close(cnv);
5347}
5348
5349static void TestEBCDICUS4XML()
5350{
5351 UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
5352 static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
5353 static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
5354 static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
5355 char target_x[] = {0x00, 0x00, 0x00, 0x00};
5356 UChar *unicodes = unicodes_x;
5357 const UChar *toUnicodeMaps = toUnicodeMaps_x;
5358 char *target = target_x;
5359 const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
5360 UErrorCode status = U_ZERO_ERROR;
5361 UConverter *cnv = 0;
5362
5363 cnv = ucnv_open("ebcdic-xml-us", &status);
5364 if (U_FAILURE(status) || cnv == 0) {
5365 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5366 return;
5367 }
5368 ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status);
5369 if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
5370 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5371 u_errorName(status));
5372 printUSeqErr(unicodes_x, 3);
5373 printUSeqErr(toUnicodeMaps, 3);
5374 }
5375 status = U_ZERO_ERROR;
5376 ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status);
5377 if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
5378 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5379 u_errorName(status));
5380 printSeqErr((const unsigned char*)target_x, 3);
5381 printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
5382 }
5383 ucnv_close(cnv);
5384}
73c04bcf 5385#endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
b75a7d8f
A
5386
5387#if !UCONFIG_NO_COLLATION
5388
5389static void TestJitterbug981(){
374ca955
A
5390 const UChar* rules;
5391 int32_t rules_length, target_cap, bytes_needed, buff_size;
5392 UErrorCode status = U_ZERO_ERROR;
5393 UConverter *utf8cnv;
5394 UCollator* myCollator;
5395 char *buff;
5396 int numNeeded=0;
5397 utf8cnv = ucnv_open ("utf8", &status);
5398 if(U_FAILURE(status)){
46f4442e 5399 log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status));
374ca955
A
5400 return;
5401 }
5402 myCollator = ucol_open("zh", &status);
5403 if(U_FAILURE(status)){
729e4ab9 5404 log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status));
46f4442e 5405 ucnv_close(utf8cnv);
374ca955
A
5406 return;
5407 }
b75a7d8f 5408
374ca955 5409 rules = ucol_getRules(myCollator, &rules_length);
b331163b
A
5410 if(rules_length == 0) {
5411 log_data_err("missing zh tailoring rule string\n");
5412 ucol_close(myCollator);
5413 ucnv_close(utf8cnv);
5414 return;
5415 }
374ca955
A
5416 buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv);
5417 buff = malloc(buff_size);
b75a7d8f 5418
374ca955
A
5419 target_cap = 0;
5420 do {
5421 ucnv_reset(utf8cnv);
5422 status = U_ZERO_ERROR;
5423 if(target_cap >= buff_size) {
5424 log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
46f4442e 5425 break;
374ca955
A
5426 }
5427 bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
5428 rules, rules_length, &status);
5429 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5430 if(numNeeded!=0 && numNeeded!= bytes_needed){
5431 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
46f4442e 5432 break;
374ca955
A
5433 }
5434 numNeeded = bytes_needed;
5435 } while (status == U_BUFFER_OVERFLOW_ERROR);
5436 ucol_close(myCollator);
5437 ucnv_close(utf8cnv);
5438 free(buff);
b75a7d8f
A
5439}
5440
5441#endif
5442
51004dcb 5443#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f 5444static void TestJitterbug1293(){
73c04bcf 5445 static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
b75a7d8f
A
5446 char target[256];
5447 UErrorCode status = U_ZERO_ERROR;
5448 UConverter* conv=NULL;
5449 int32_t target_cap, bytes_needed, numNeeded = 0;
5450 conv = ucnv_open("shift-jis",&status);
5451 if(U_FAILURE(status)){
5452 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status));
5453 return;
5454 }
5455
5456 do{
5457 target_cap =0;
5458 bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status);
5459 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5460 if(numNeeded!=0 && numNeeded!= bytes_needed){
5461 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5462 }
5463 numNeeded = bytes_needed;
5464 } while (status == U_BUFFER_OVERFLOW_ERROR);
5465 if(U_FAILURE(status)){
5466 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status));
5467 return;
5468 }
5469 ucnv_close(conv);
5470}
51004dcb
A
5471#endif
5472
46f4442e
A
5473static void TestJB5275_1(){
5474
5475 static const char* data = "\x3B\xB3\x0A" /* Easy characters */
5476 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5477 /* Switch script: */
5478 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */
5479 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/
5480 "\xEF\x40\x3B\xB3\x0A";
5481 static const UChar expected[] ={
5482 0x003b, 0x0a15, 0x000a, /* Easy characters */
729e4ab9 5483 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */
46f4442e
A
5484 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/
5485 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/
5486 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/
5487 };
5488
5489 UErrorCode status = U_ZERO_ERROR;
5490 UConverter* conv = ucnv_open("iscii-gur", &status);
5491 UChar dest[100] = {'\0'};
5492 UChar* target = dest;
5493 UChar* targetLimit = dest+100;
5494 const char* source = data;
5495 const char* sourceLimit = data+strlen(data);
5496 const UChar* exp = expected;
729e4ab9
A
5497
5498 if (U_FAILURE(status)) {
5499 log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status));
5500 return;
5501 }
5502
46f4442e
A
5503 log_verbose("Testing switching back to default script when new line is encountered.\n");
5504 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5505 if(U_FAILURE(status)){
5506 log_err("conversion failed: %s \n", u_errorName(status));
5507 }
5508 targetLimit = target;
5509 target = dest;
3d1f044b 5510 printUSeq(target, (int)(targetLimit-target));
46f4442e
A
5511 while(target<targetLimit){
5512 if(*exp!=*target){
5513 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5514 }
5515 target++;
5516 exp++;
5517 }
5518 ucnv_close(conv);
5519}
5520
5521static void TestJB5275(){
5522 static const char* data =
5523 /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41 */
5524 /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41 */
5525 /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsupported sequence \xEF\x41 */
5526 "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5527 "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */
5528 "\xEF\x48\x38\xB3\x0A" /* Kannada test */
5529 "\xEF\x49\x39\xB3\x0A" /* Malayalam test */
5530 "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */
5531 "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */
5532 /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */;
5533 static const UChar expected[] ={
729e4ab9 5534 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */
46f4442e
A
5535 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati test */
5536 0x0038, 0x0C95, 0x000A, /* Kannada test */
5537 0x0039, 0x0D15, 0x000A, /* Malayalam test */
5538 0x003A, 0x0A95, 0x000A, /* Gujarati test */
5539 0x003B, 0x0A15, 0x000A, /* Punjabi test */
5540 };
5541
5542 UErrorCode status = U_ZERO_ERROR;
5543 UConverter* conv = ucnv_open("iscii", &status);
5544 UChar dest[100] = {'\0'};
5545 UChar* target = dest;
5546 UChar* targetLimit = dest+100;
5547 const char* source = data;
5548 const char* sourceLimit = data+strlen(data);
5549 const UChar* exp = expected;
5550 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5551 if(U_FAILURE(status)){
b331163b 5552 log_data_err("conversion failed: %s \n", u_errorName(status));
46f4442e
A
5553 }
5554 targetLimit = target;
5555 target = dest;
b75a7d8f 5556
3d1f044b 5557 printUSeq(target, (int)(targetLimit-target));
46f4442e
A
5558
5559 while(target<targetLimit){
5560 if(*exp!=*target){
5561 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5562 }
5563 target++;
5564 exp++;
5565 }
5566 ucnv_close(conv);
5567}
4388f060
A
5568
5569static void
5570TestIsFixedWidth() {
5571 UErrorCode status = U_ZERO_ERROR;
5572 UConverter *cnv = NULL;
5573 int32_t i;
5574
5575 const char *fixedWidth[] = {
5576 "US-ASCII",
5577 "UTF32",
5578 "ibm-5478_P100-1995"
5579 };
5580
5581 const char *notFixedWidth[] = {
5582 "GB18030",
5583 "UTF8",
5584 "windows-949-2000",
5585 "UTF16"
5586 };
5587
b331163b 5588 for (i = 0; i < UPRV_LENGTHOF(fixedWidth); i++) {
4388f060
A
5589 cnv = ucnv_open(fixedWidth[i], &status);
5590 if (cnv == NULL || U_FAILURE(status)) {
5591 log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_errorName(status));
5592 continue;
5593 }
5594
5595 if (!ucnv_isFixedWidth(cnv, &status)) {
5596 log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedWidth[i]);
5597 }
5598 ucnv_close(cnv);
5599 }
5600
b331163b 5601 for (i = 0; i < UPRV_LENGTHOF(notFixedWidth); i++) {
4388f060
A
5602 cnv = ucnv_open(notFixedWidth[i], &status);
5603 if (cnv == NULL || U_FAILURE(status)) {
5604 log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u_errorName(status));
5605 continue;
5606 }
5607
5608 if (ucnv_isFixedWidth(cnv, &status)) {
5609 log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", notFixedWidth[i]);
5610 }
5611 ucnv_close(cnv);
5612 }
5613}