1 /********************************************************************
3 * Copyright (c) 1997-2013, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
7 ********************************************************************************
10 * Modification History:
12 * Madhu Katragadda 7/21/1999 Testing error callback routines
13 ********************************************************************************
20 #include "unicode/uloc.h"
21 #include "unicode/ucnv.h"
22 #include "unicode/ucnv_err.h"
24 #include "unicode/utypes.h"
25 #include "unicode/ustring.h"
27 #include "unicode/ucnv_cb.h"
28 #include "unicode/utf16.h"
30 #define NEW_MAX_BUFFER 999
32 #define nct_min(x,y) ((x<y) ? x : y)
33 #define ARRAY_LENGTH(array) (sizeof(array)/sizeof((array)[0]))
35 static int32_t gInBufferSize
= 0;
36 static int32_t gOutBufferSize
= 0;
37 static char gNuConvTestName
[1024];
39 static void printSeq(const uint8_t* a
, int len
)
44 log_verbose("0x%02X, ", a
[i
++]);
48 static void printUSeq(const UChar
* a
, int len
)
53 log_verbose(" 0x%04x, ", a
[i
++]);
57 static void printSeqErr(const uint8_t* a
, int len
)
62 fprintf(stderr
, " 0x%02x, ", a
[i
++]);
63 fprintf(stderr
, "}\n");
66 static void printUSeqErr(const UChar
* a
, int len
)
71 fprintf(stderr
, "0x%04x, ", a
[i
++]);
72 fprintf(stderr
,"}\n");
75 static void setNuConvTestName(const char *codepage
, const char *direction
)
77 sprintf(gNuConvTestName
, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
85 static void TestCallBackFailure(void);
87 void addTestConvertErrorCallBack(TestNode
** root
);
89 void addTestConvertErrorCallBack(TestNode
** root
)
91 addTest(root
, &TestSkipCallBack
, "tsconv/nccbtst/TestSkipCallBack");
92 addTest(root
, &TestStopCallBack
, "tsconv/nccbtst/TestStopCallBack");
93 addTest(root
, &TestSubCallBack
, "tsconv/nccbtst/TestSubCallBack");
94 addTest(root
, &TestSubWithValueCallBack
, "tsconv/nccbtst/TestSubWithValueCallBack");
96 #if !UCONFIG_NO_LEGACY_CONVERSION
97 addTest(root
, &TestLegalAndOtherCallBack
, "tsconv/nccbtst/TestLegalAndOtherCallBack");
98 addTest(root
, &TestSingleByteCallBack
, "tsconv/nccbtst/TestSingleByteCallBack");
101 addTest(root
, &TestCallBackFailure
, "tsconv/nccbtst/TestCallBackFailure");
104 static void TestSkipCallBack()
106 TestSkip(NEW_MAX_BUFFER
, NEW_MAX_BUFFER
);
107 TestSkip(1,NEW_MAX_BUFFER
);
109 TestSkip(NEW_MAX_BUFFER
, 1);
112 static void TestStopCallBack()
114 TestStop(NEW_MAX_BUFFER
, NEW_MAX_BUFFER
);
115 TestStop(1,NEW_MAX_BUFFER
);
117 TestStop(NEW_MAX_BUFFER
, 1);
120 static void TestSubCallBack()
122 TestSub(NEW_MAX_BUFFER
, NEW_MAX_BUFFER
);
123 TestSub(1,NEW_MAX_BUFFER
);
125 TestSub(NEW_MAX_BUFFER
, 1);
127 #if !UCONFIG_NO_LEGACY_CONVERSION
128 TestEBCDIC_STATEFUL_Sub(1, 1);
129 TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER
);
130 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER
, 1);
131 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER
, NEW_MAX_BUFFER
);
135 static void TestSubWithValueCallBack()
137 TestSubWithValue(NEW_MAX_BUFFER
, NEW_MAX_BUFFER
);
138 TestSubWithValue(1,NEW_MAX_BUFFER
);
139 TestSubWithValue(1,1);
140 TestSubWithValue(NEW_MAX_BUFFER
, 1);
143 #if !UCONFIG_NO_LEGACY_CONVERSION
144 static void TestLegalAndOtherCallBack()
146 TestLegalAndOthers(NEW_MAX_BUFFER
, NEW_MAX_BUFFER
);
147 TestLegalAndOthers(1,NEW_MAX_BUFFER
);
148 TestLegalAndOthers(1,1);
149 TestLegalAndOthers(NEW_MAX_BUFFER
, 1);
152 static void TestSingleByteCallBack()
154 TestSingleByte(NEW_MAX_BUFFER
, NEW_MAX_BUFFER
);
155 TestSingleByte(1,NEW_MAX_BUFFER
);
157 TestSingleByte(NEW_MAX_BUFFER
, 1);
161 static void TestSkip(int32_t inputsize
, int32_t outputsize
)
163 static const uint8_t expskipIBM_949
[]= {
164 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
166 static const uint8_t expskipIBM_943
[] = {
167 0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 };
169 static const uint8_t expskipIBM_930
[] = {
170 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f };
172 gInBufferSize
= inputsize
;
173 gOutBufferSize
= outputsize
;
176 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP \n");
178 #if !UCONFIG_NO_LEGACY_CONVERSION
180 static const UChar sampleText
[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
181 static const UChar sampleText2
[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
183 static const int32_t toIBM949Offsskip
[] = { 0, 1, 1, 2, 2, 4, 4 };
184 static const int32_t toIBM943Offsskip
[] = { 0, 0, 1, 1, 3, 3 };
186 if(!testConvertFromUnicode(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
187 expskipIBM_949
, sizeof(expskipIBM_949
), "ibm-949",
188 UCNV_FROM_U_CALLBACK_SKIP
, toIBM949Offsskip
, NULL
, 0 ))
189 log_err("u-> ibm-949 with skip did not match.\n");
190 if(!testConvertFromUnicode(sampleText2
, sizeof(sampleText2
)/sizeof(sampleText2
[0]),
191 expskipIBM_943
, sizeof(expskipIBM_943
), "ibm-943",
192 UCNV_FROM_U_CALLBACK_SKIP
, toIBM943Offsskip
, NULL
, 0 ))
193 log_err("u-> ibm-943 with skip did not match.\n");
197 static const UChar fromU
[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 };
198 static const uint8_t fromUBytes
[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f };
199 static const int32_t fromUOffsets
[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 };
201 /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */
202 if(!testConvertFromUnicode(fromU
, sizeof(fromU
)/U_SIZEOF_UCHAR
,
203 fromUBytes
, sizeof(fromUBytes
),
205 UCNV_FROM_U_CALLBACK_SKIP
, fromUOffsets
,
208 log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n");
214 static const UChar usasciiFromU
[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
215 static const uint8_t usasciiFromUBytes
[] = { 0x61, 0x31, 0x39 };
216 static const int32_t usasciiFromUOffsets
[] = { 0, 3, 6 };
218 static const UChar latin1FromU
[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
219 static const uint8_t latin1FromUBytes
[] = { 0x61, 0xa0, 0x31, 0x39 };
220 static const int32_t latin1FromUOffsets
[] = { 0, 1, 3, 6 };
223 if(!testConvertFromUnicode(usasciiFromU
, sizeof(usasciiFromU
)/U_SIZEOF_UCHAR
,
224 usasciiFromUBytes
, sizeof(usasciiFromUBytes
),
226 UCNV_FROM_U_CALLBACK_SKIP
, usasciiFromUOffsets
,
229 log_err("u->US-ASCII with skip did not match.\n");
232 #if !UCONFIG_NO_LEGACY_CONVERSION
233 /* SBCS NLTC codepage 367 for US-ASCII */
234 if(!testConvertFromUnicode(usasciiFromU
, sizeof(usasciiFromU
)/U_SIZEOF_UCHAR
,
235 usasciiFromUBytes
, sizeof(usasciiFromUBytes
),
237 UCNV_FROM_U_CALLBACK_SKIP
, usasciiFromUOffsets
,
240 log_err("u->ibm-367 with skip did not match.\n");
245 if(!testConvertFromUnicode(latin1FromU
, sizeof(latin1FromU
)/U_SIZEOF_UCHAR
,
246 latin1FromUBytes
, sizeof(latin1FromUBytes
),
248 UCNV_FROM_U_CALLBACK_SKIP
, latin1FromUOffsets
,
251 log_err("u->LATIN_1 with skip did not match.\n");
254 #if !UCONFIG_NO_LEGACY_CONVERSION
256 if(!testConvertFromUnicode(latin1FromU
, sizeof(latin1FromU
)/U_SIZEOF_UCHAR
,
257 latin1FromUBytes
, sizeof(latin1FromUBytes
),
259 UCNV_FROM_U_CALLBACK_SKIP
, latin1FromUOffsets
,
262 log_err("u->windows-1252 with skip did not match.\n");
267 static const UChar inputTest
[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
268 static const uint8_t toIBM943
[]= { 0x61, 0x61 };
269 static const int32_t offset
[]= {0, 4};
272 static const UChar euc_jp_inputText
[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
273 static const uint8_t to_euc_jp
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
276 static const int32_t fromEUC_JPOffs
[] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7};
279 static const UChar euc_tw_inputText
[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
280 static const uint8_t to_euc_tw
[]={
281 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
282 0x61, 0xe6, 0xca, 0x8a,
284 static const int32_t from_euc_twOffs
[] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,};
287 static const UChar iso_2022_jp_inputText
[]={0x0041, 0x00E9/*unassigned*/,0x0042, };
288 static const uint8_t to_iso_2022_jp
[]={
293 static const int32_t from_iso_2022_jpOffs
[] ={0,2};
296 UChar
const iso_2022_jp_inputText2
[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
297 static const uint8_t to_iso_2022_jp2
[]={
302 static const int32_t from_iso_2022_jpOffs2
[] ={0,2};
305 static const UChar iso_2022_cn_inputText
[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
306 static const uint8_t to_iso_2022_cn
[]={
309 static const int32_t from_iso_2022_cnOffs
[] ={
314 static const UChar iso_2022_cn_inputText1
[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
315 static const uint8_t to_iso_2022_cn1
[]={
319 static const int32_t from_iso_2022_cnOffs1
[] ={ 0, 2 };
322 static const UChar iso_2022_kr_inputText
[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
323 static const uint8_t to_iso_2022_kr
[]={
324 0x1b, 0x24, 0x29, 0x43,
330 static const int32_t from_iso_2022_krOffs
[] ={
339 static const UChar iso_2022_kr_inputText1
[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
340 static const uint8_t to_iso_2022_kr1
[]={
341 0x1b, 0x24, 0x29, 0x43,
347 static const int32_t from_iso_2022_krOffs1
[] ={
355 static const UChar hz_inputText
[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
357 static const uint8_t to_hz
[]={
359 0x7e, 0x7b, 0x26, 0x30,
364 static const int32_t from_hzOffs
[] ={
371 static const UChar hz_inputText1
[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
373 static const uint8_t to_hz1
[]={
375 0x7e, 0x7b, 0x26, 0x30,
380 static const int32_t from_hzOffs1
[] ={
389 static const UChar SCSU_inputText
[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
391 static const uint8_t to_SCSU
[]={
397 static const int32_t from_SCSUOffs
[] ={
403 #if !UCONFIG_NO_LEGACY_CONVERSION
405 static const UChar iscii_inputText
[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
406 static const uint8_t to_iscii
[]={
410 static const int32_t from_isciiOffs
[] ={
415 static const UChar iscii_inputText1
[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
416 static const uint8_t to_iscii1
[]={
421 static const int32_t from_isciiOffs1
[] ={0,2};
423 if(!testConvertFromUnicode(inputTest
, sizeof(inputTest
)/sizeof(inputTest
[0]),
424 toIBM943
, sizeof(toIBM943
), "ibm-943",
425 UCNV_FROM_U_CALLBACK_SKIP
, offset
, NULL
, 0 ))
426 log_err("u-> ibm-943 with skip did not match.\n");
428 if(!testConvertFromUnicode(euc_jp_inputText
, sizeof(euc_jp_inputText
)/sizeof(euc_jp_inputText
[0]),
429 to_euc_jp
, sizeof(to_euc_jp
), "IBM-eucJP",
430 UCNV_FROM_U_CALLBACK_SKIP
, fromEUC_JPOffs
, NULL
, 0 ))
431 log_err("u-> euc-jp with skip did not match.\n");
433 if(!testConvertFromUnicode(euc_tw_inputText
, sizeof(euc_tw_inputText
)/sizeof(euc_tw_inputText
[0]),
434 to_euc_tw
, sizeof(to_euc_tw
), "euc-tw",
435 UCNV_FROM_U_CALLBACK_SKIP
, from_euc_twOffs
, NULL
, 0 ))
436 log_err("u-> euc-tw with skip did not match.\n");
439 if(!testConvertFromUnicode(iso_2022_jp_inputText
, sizeof(iso_2022_jp_inputText
)/sizeof(iso_2022_jp_inputText
[0]),
440 to_iso_2022_jp
, sizeof(to_iso_2022_jp
), "iso-2022-jp",
441 UCNV_FROM_U_CALLBACK_SKIP
, from_iso_2022_jpOffs
, NULL
, 0 ))
442 log_err("u-> iso-2022-jp with skip did not match.\n");
445 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2
, sizeof(iso_2022_jp_inputText2
)/sizeof(iso_2022_jp_inputText2
[0]),
446 to_iso_2022_jp2
, sizeof(to_iso_2022_jp2
), "iso-2022-jp",
447 UCNV_FROM_U_CALLBACK_SKIP
, from_iso_2022_jpOffs2
, NULL
, 0,UCNV_SKIP_STOP_ON_ILLEGAL
,U_ILLEGAL_CHAR_FOUND
))
448 log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
451 if(!testConvertFromUnicode(iso_2022_cn_inputText
, sizeof(iso_2022_cn_inputText
)/sizeof(iso_2022_cn_inputText
[0]),
452 to_iso_2022_cn
, sizeof(to_iso_2022_cn
), "iso-2022-cn",
453 UCNV_FROM_U_CALLBACK_SKIP
, from_iso_2022_cnOffs
, NULL
, 0 ))
454 log_err("u-> iso-2022-cn with skip did not match.\n");
456 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1
, sizeof(iso_2022_cn_inputText1
)/sizeof(iso_2022_cn_inputText1
[0]),
457 to_iso_2022_cn1
, sizeof(to_iso_2022_cn1
), "iso-2022-cn",
458 UCNV_FROM_U_CALLBACK_SKIP
, from_iso_2022_cnOffs1
, NULL
, 0,UCNV_SKIP_STOP_ON_ILLEGAL
,U_ILLEGAL_CHAR_FOUND
))
459 log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
462 if(!testConvertFromUnicode(iso_2022_kr_inputText
, sizeof(iso_2022_kr_inputText
)/sizeof(iso_2022_kr_inputText
[0]),
463 to_iso_2022_kr
, sizeof(to_iso_2022_kr
), "iso-2022-kr",
464 UCNV_FROM_U_CALLBACK_SKIP
, from_iso_2022_krOffs
, NULL
, 0 ))
465 log_err("u-> iso-2022-kr with skip did not match.\n");
467 if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1
, sizeof(iso_2022_kr_inputText1
)/sizeof(iso_2022_kr_inputText1
[0]),
468 to_iso_2022_kr1
, sizeof(to_iso_2022_kr1
), "iso-2022-kr",
469 UCNV_FROM_U_CALLBACK_SKIP
, from_iso_2022_krOffs1
, NULL
, 0,UCNV_SKIP_STOP_ON_ILLEGAL
,U_ILLEGAL_CHAR_FOUND
))
470 log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
473 if(!testConvertFromUnicode(hz_inputText
, sizeof(hz_inputText
)/sizeof(hz_inputText
[0]),
474 to_hz
, sizeof(to_hz
), "HZ",
475 UCNV_FROM_U_CALLBACK_SKIP
, from_hzOffs
, NULL
, 0 ))
476 log_err("u-> HZ with skip did not match.\n");
478 if(!testConvertFromUnicodeWithContext(hz_inputText1
, sizeof(hz_inputText1
)/sizeof(hz_inputText1
[0]),
479 to_hz1
, sizeof(to_hz1
), "hz",
480 UCNV_FROM_U_CALLBACK_SKIP
, from_hzOffs1
, NULL
, 0,UCNV_SKIP_STOP_ON_ILLEGAL
,U_ILLEGAL_CHAR_FOUND
))
481 log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
485 if(!testConvertFromUnicode(SCSU_inputText
, sizeof(SCSU_inputText
)/sizeof(SCSU_inputText
[0]),
486 to_SCSU
, sizeof(to_SCSU
), "SCSU",
487 UCNV_FROM_U_CALLBACK_SKIP
, from_SCSUOffs
, NULL
, 0 ))
488 log_err("u-> SCSU with skip did not match.\n");
490 #if !UCONFIG_NO_LEGACY_CONVERSION
492 if(!testConvertFromUnicode(iscii_inputText
, sizeof(iscii_inputText
)/sizeof(iscii_inputText
[0]),
493 to_iscii
, sizeof(to_iscii
), "ISCII,version=0",
494 UCNV_FROM_U_CALLBACK_SKIP
, from_isciiOffs
, NULL
, 0 ))
495 log_err("u-> iscii with skip did not match.\n");
497 if(!testConvertFromUnicodeWithContext(iscii_inputText1
, sizeof(iscii_inputText1
)/sizeof(iscii_inputText1
[0]),
498 to_iscii1
, sizeof(to_iscii1
), "ISCII,version=0",
499 UCNV_FROM_U_CALLBACK_SKIP
, from_isciiOffs1
, NULL
, 0,UCNV_SKIP_STOP_ON_ILLEGAL
,U_ILLEGAL_CHAR_FOUND
))
500 log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
504 log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
506 static const uint8_t sampleText
[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */
507 0xFB, 0xEE, 0x28, /* from source offset 0 */
525 0xF9, 0x28, /* from 16 */
534 0xFA, 0x83, /* from 24 */
543 0xF9, 0xA2, /* from 32 */
545 0xFE, 0x16, 0x3A, 0x8C,
554 static const UChar expected
[]={
555 0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */
556 0x0063, 0x0061, 0x000D, 0x000A,
558 0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */
559 0x0930, 0x0020, 0x0918, 0x0909,
561 0x3086, 0x304D, 0x0020, 0x3053, /* 16 */
562 0x4000, 0x4E00, 0x7777, 0x0020,
564 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */
565 0x0020, 0xD7A3, 0xDC00, 0xD800,
567 0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */
568 0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
570 0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */
573 static const int32_t offsets
[]={
574 0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7,
575 8, 9, 10, 10, 11, 12, 12, 13, 14, 15,
576 16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23,
577 24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31,
578 32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39,
582 /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */
583 if(!testConvertFromUnicode(expected
, ARRAY_LENGTH(expected
),
584 sampleText
, sizeof(sampleText
),
586 UCNV_FROM_U_CALLBACK_SKIP
, offsets
, NULL
, 0)
588 log_err("u->BOCU-1 with skip did not match.\n");
592 log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
594 const uint8_t sampleText
[]={
596 0xc4, 0xb5, /* U+0135 */
597 0xed, 0x80, 0xa0, /* Hangul U+d020 */
598 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */
599 0xee, 0x80, 0x80, /* PUA U+e000 */
600 0xed, 0xb0, 0x81, /* unpaired trail surrogate U+dc01 */
602 0xed, 0xa0, 0x81, /* unpaired lead surrogate U+d801 */
603 0xd0, 0x80 /* U+0400 */
628 /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */
630 /* without offsets */
631 if(!testConvertFromUnicode(expected
, ARRAY_LENGTH(expected
),
632 sampleText
, sizeof(sampleText
),
634 UCNV_FROM_U_CALLBACK_SKIP
, NULL
, NULL
, 0)
636 log_err("u->CESU-8 with skip did not match.\n");
640 if(!testConvertFromUnicode(expected
, ARRAY_LENGTH(expected
),
641 sampleText
, sizeof(sampleText
),
643 UCNV_FROM_U_CALLBACK_SKIP
, offsets
, NULL
, 0)
645 log_err("u->CESU-8 with skip did not match.\n");
650 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP \n");
652 #if !UCONFIG_NO_LEGACY_CONVERSION
655 static const UChar IBM_949skiptoUnicode
[]= {0x0000, 0xAC00, 0xAC01, 0xD700 };
656 static const UChar IBM_943skiptoUnicode
[]= { 0x6D63, 0x6D64, 0x6D66 };
657 static const UChar IBM_930skiptoUnicode
[]= { 0x6D63, 0x6D64, 0x6D66 };
659 static const int32_t fromIBM949Offs
[] = { 0, 1, 3, 5};
660 static const int32_t fromIBM943Offs
[] = { 0, 2, 4};
661 static const int32_t fromIBM930Offs
[] = { 1, 3, 5};
663 if(!testConvertToUnicode(expskipIBM_949
, sizeof(expskipIBM_949
),
664 IBM_949skiptoUnicode
, sizeof(IBM_949skiptoUnicode
)/sizeof(IBM_949skiptoUnicode
),"ibm-949",
665 UCNV_TO_U_CALLBACK_SKIP
, fromIBM949Offs
, NULL
, 0 ))
666 log_err("ibm-949->u with skip did not match.\n");
667 if(!testConvertToUnicode(expskipIBM_943
, sizeof(expskipIBM_943
),
668 IBM_943skiptoUnicode
, sizeof(IBM_943skiptoUnicode
)/sizeof(IBM_943skiptoUnicode
[0]),"ibm-943",
669 UCNV_TO_U_CALLBACK_SKIP
, fromIBM943Offs
, NULL
, 0 ))
670 log_err("ibm-943->u with skip did not match.\n");
673 if(!testConvertToUnicode(expskipIBM_930
, sizeof(expskipIBM_930
),
674 IBM_930skiptoUnicode
, sizeof(IBM_930skiptoUnicode
)/sizeof(IBM_930skiptoUnicode
[0]),"ibm-930",
675 UCNV_TO_U_CALLBACK_SKIP
, fromIBM930Offs
, NULL
, 0 ))
676 log_err("ibm-930->u with skip did not match.\n");
679 if(!testConvertToUnicodeWithContext(expskipIBM_930
, sizeof(expskipIBM_930
),
680 IBM_930skiptoUnicode
, sizeof(IBM_930skiptoUnicode
)/sizeof(IBM_930skiptoUnicode
[0]),"ibm-930",
681 UCNV_TO_U_CALLBACK_SKIP
, fromIBM930Offs
, NULL
, 0,"i",U_ILLEGAL_CHAR_FOUND
))
682 log_err("ibm-930->u with skip did not match.\n");
687 static const uint8_t usasciiToUBytes
[] = { 0x61, 0x80, 0x31 };
688 static const UChar usasciiToU
[] = { 0x61, 0x31 };
689 static const int32_t usasciiToUOffsets
[] = { 0, 2 };
691 static const uint8_t latin1ToUBytes
[] = { 0x61, 0xa0, 0x31 };
692 static const UChar latin1ToU
[] = { 0x61, 0xa0, 0x31 };
693 static const int32_t latin1ToUOffsets
[] = { 0, 1, 2 };
696 if(!testConvertToUnicode(usasciiToUBytes
, sizeof(usasciiToUBytes
),
697 usasciiToU
, sizeof(usasciiToU
)/U_SIZEOF_UCHAR
,
699 UCNV_TO_U_CALLBACK_SKIP
, usasciiToUOffsets
,
702 log_err("US-ASCII->u with skip did not match.\n");
705 #if !UCONFIG_NO_LEGACY_CONVERSION
706 /* SBCS NLTC codepage 367 for US-ASCII */
707 if(!testConvertToUnicode(usasciiToUBytes
, sizeof(usasciiToUBytes
),
708 usasciiToU
, sizeof(usasciiToU
)/U_SIZEOF_UCHAR
,
710 UCNV_TO_U_CALLBACK_SKIP
, usasciiToUOffsets
,
713 log_err("ibm-367->u with skip did not match.\n");
718 if(!testConvertToUnicode(latin1ToUBytes
, sizeof(latin1ToUBytes
),
719 latin1ToU
, sizeof(latin1ToU
)/U_SIZEOF_UCHAR
,
721 UCNV_TO_U_CALLBACK_SKIP
, latin1ToUOffsets
,
724 log_err("LATIN_1->u with skip did not match.\n");
727 #if !UCONFIG_NO_LEGACY_CONVERSION
729 if(!testConvertToUnicode(latin1ToUBytes
, sizeof(latin1ToUBytes
),
730 latin1ToU
, sizeof(latin1ToU
)/U_SIZEOF_UCHAR
,
732 UCNV_TO_U_CALLBACK_SKIP
, latin1ToUOffsets
,
735 log_err("windows-1252->u with skip did not match.\n");
740 #if !UCONFIG_NO_LEGACY_CONVERSION
742 static const uint8_t sampleTxtEBCIDIC_STATEFUL
[] ={
743 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
745 static const UChar EBCIDIC_STATEFUL_toUnicode
[] ={ 0x6d63, 0x03b4
747 static const int32_t from_EBCIDIC_STATEFULOffsets
[]={ 1, 5};
751 static const uint8_t sampleTxt_euc_jp
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
752 0x8f, 0xda, 0xa1, /*unassigned*/
755 static const UChar euc_jptoUnicode
[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2};
756 static const int32_t from_euc_jpOffs
[] ={ 0, 1, 3, 9};
759 static const uint8_t sampleTxt_euc_tw
[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
760 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
763 static const UChar euc_twtoUnicode
[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, };
764 static const int32_t from_euc_twOffs
[] ={ 0, 1, 3, 11, 13};
766 static const uint8_t sampleTxt_iso_2022_jp
[]={
768 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/
769 0x1b, 0x28, 0x42, 0x42,
772 static const UChar iso_2022_jptoUnicode
[]={ 0x41,0x42 };
773 static const int32_t from_iso_2022_jpOffs
[] ={ 0,9 };
776 static const uint8_t sampleTxt_iso_2022_cn
[]={
778 0x1B, 0x24, 0x29, 0x47,
779 0x0E, 0x40, 0x6f, /*unassigned*/
784 static const UChar iso_2022_cntoUnicode
[]={ 0x41, 0x44,0x42 };
785 static const int32_t from_iso_2022_cnOffs
[] ={ 1, 2, 11 };
788 static const uint8_t sampleTxt_iso_2022_kr
[]={
789 0x1b, 0x24, 0x29, 0x43,
797 static const UChar iso_2022_krtoUnicode
[]={ 0x41,0x03A0,0x51, 0x42,0x43};
798 static const int32_t from_iso_2022_krOffs
[] ={ 4, 9, 12, 13 , 14 };
801 static const uint8_t sampleTxt_hz
[]={
803 0x7e, 0x7b, 0x26, 0x30,
804 0x7f, 0x1E, /*unassigned*/
807 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/
810 static const UChar hztoUnicode
[]={
817 static const int32_t from_hzOffs
[] ={0,3,7,11,18, };
820 static const uint8_t sampleTxt_iscii
[]={
830 static const UChar isciitoUnicode
[]={
839 static const int32_t from_isciiOffs
[] ={0,1,3,4,5,7 };
842 static const uint8_t sampleTxtLMBCS
[]={ 0x12, 0xc9, 0x50,
843 0x12, 0x92, 0xa0, /*unassigned*/
846 static const UChar LMBCSToUnicode
[]={ 0x4e2e, 0xe5c4};
847 static const int32_t fromLMBCS
[] = {0, 6};
849 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL
, sizeof(sampleTxtEBCIDIC_STATEFUL
),
850 EBCIDIC_STATEFUL_toUnicode
, sizeof(EBCIDIC_STATEFUL_toUnicode
)/sizeof(EBCIDIC_STATEFUL_toUnicode
[0]),"ibm-930",
851 UCNV_TO_U_CALLBACK_SKIP
, from_EBCIDIC_STATEFULOffsets
, NULL
, 0 ))
852 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
854 if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL
, sizeof(sampleTxtEBCIDIC_STATEFUL
),
855 EBCIDIC_STATEFUL_toUnicode
, sizeof(EBCIDIC_STATEFUL_toUnicode
)/sizeof(EBCIDIC_STATEFUL_toUnicode
[0]),"ibm-930",
856 UCNV_TO_U_CALLBACK_SKIP
, from_EBCIDIC_STATEFULOffsets
, NULL
, 0,"i",U_ILLEGAL_CHAR_FOUND
))
857 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
859 if(!testConvertToUnicode(sampleTxt_euc_jp
, sizeof(sampleTxt_euc_jp
),
860 euc_jptoUnicode
, sizeof(euc_jptoUnicode
)/sizeof(euc_jptoUnicode
[0]),"IBM-eucJP",
861 UCNV_TO_U_CALLBACK_SKIP
, from_euc_jpOffs
, NULL
, 0))
862 log_err("euc-jp->u with skip did not match.\n");
866 if(!testConvertToUnicode(sampleTxt_euc_tw
, sizeof(sampleTxt_euc_tw
),
867 euc_twtoUnicode
, sizeof(euc_twtoUnicode
)/sizeof(euc_twtoUnicode
[0]),"euc-tw",
868 UCNV_TO_U_CALLBACK_SKIP
, from_euc_twOffs
, NULL
, 0))
869 log_err("euc-tw->u with skip did not match.\n");
872 if(!testConvertToUnicode(sampleTxt_iso_2022_jp
, sizeof(sampleTxt_iso_2022_jp
),
873 iso_2022_jptoUnicode
, sizeof(iso_2022_jptoUnicode
)/sizeof(iso_2022_jptoUnicode
[0]),"iso-2022-jp",
874 UCNV_TO_U_CALLBACK_SKIP
, from_iso_2022_jpOffs
, NULL
, 0))
875 log_err("iso-2022-jp->u with skip did not match.\n");
877 if(!testConvertToUnicode(sampleTxt_iso_2022_cn
, sizeof(sampleTxt_iso_2022_cn
),
878 iso_2022_cntoUnicode
, sizeof(iso_2022_cntoUnicode
)/sizeof(iso_2022_cntoUnicode
[0]),"iso-2022-cn",
879 UCNV_TO_U_CALLBACK_SKIP
, from_iso_2022_cnOffs
, NULL
, 0))
880 log_err("iso-2022-cn->u with skip did not match.\n");
882 if(!testConvertToUnicode(sampleTxt_iso_2022_kr
, sizeof(sampleTxt_iso_2022_kr
),
883 iso_2022_krtoUnicode
, sizeof(iso_2022_krtoUnicode
)/sizeof(iso_2022_krtoUnicode
[0]),"iso-2022-kr",
884 UCNV_TO_U_CALLBACK_SKIP
, from_iso_2022_krOffs
, NULL
, 0))
885 log_err("iso-2022-kr->u with skip did not match.\n");
887 if(!testConvertToUnicode(sampleTxt_hz
, sizeof(sampleTxt_hz
),
888 hztoUnicode
, sizeof(hztoUnicode
)/sizeof(hztoUnicode
[0]),"HZ",
889 UCNV_TO_U_CALLBACK_SKIP
, from_hzOffs
, NULL
, 0))
890 log_err("HZ->u with skip did not match.\n");
892 if(!testConvertToUnicode(sampleTxt_iscii
, sizeof(sampleTxt_iscii
),
893 isciitoUnicode
, sizeof(isciitoUnicode
)/sizeof(isciitoUnicode
[0]),"ISCII,version=0",
894 UCNV_TO_U_CALLBACK_SKIP
, from_isciiOffs
, NULL
, 0))
895 log_err("iscii->u with skip did not match.\n");
897 if(!testConvertToUnicode(sampleTxtLMBCS
, sizeof(sampleTxtLMBCS
),
898 LMBCSToUnicode
, sizeof(LMBCSToUnicode
)/sizeof(LMBCSToUnicode
[0]),"LMBCS-1",
899 UCNV_TO_U_CALLBACK_SKIP
, fromLMBCS
, NULL
, 0))
900 log_err("LMBCS->u with skip did not match.\n");
905 log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n");
907 const uint8_t sampleText1
[] = { 0x31, 0xe4, 0xba, 0x8c,
909 UChar expected1
[] = { 0x0031, 0x4e8c, 0x0061};
910 int32_t offsets1
[] = { 0x0000, 0x0001, 0x0006};
912 if(!testConvertToUnicode(sampleText1
, sizeof(sampleText1
),
913 expected1
, sizeof(expected1
)/sizeof(expected1
[0]),"utf8",
914 UCNV_TO_U_CALLBACK_SKIP
, offsets1
, NULL
, 0 ))
915 log_err("utf8->u with skip did not match.\n");;
918 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n");
920 const uint8_t sampleText1
[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
921 UChar expected1
[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffe,0xfffe};
922 int32_t offsets1
[] = { 0x0000, 0x0001,0x0002,0x0003,4,5};
924 if(!testConvertToUnicode(sampleText1
, sizeof(sampleText1
),
925 expected1
, sizeof(expected1
)/sizeof(expected1
[0]),"SCSU",
926 UCNV_TO_U_CALLBACK_SKIP
, offsets1
, NULL
, 0 ))
927 log_err("scsu->u with skip did not match.\n");
930 log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
932 const uint8_t sampleText
[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */
933 0xFB, 0xEE, 0x28, /* single-code point sequence at offset 0 */
934 0x24, 0x1E, 0x52, /* 3 */
937 0x40, 0x07, /* 8 - wrong trail byte */
940 0xD0, 0x20, /* 12 - wrong trail byte */
961 0xFB, 0x16, 0x87, /* 42 */
968 0xFC, 0x10, 0x3E, /* 56 */
969 0xFE, 0x16, 0x3A, 0x8C, /* 59 */
971 0xFC, 0x03, 0xAC, /* 64 */
972 0xFF, /* 67 - FF just resets the state without encoding anything */
979 0xFEFF, 0x0061, 0x0062, 0x0020,
980 0x0063, 0x0061, 0x000D, 0x000A,
981 0x0020, 0x0000, 0x00DF, 0x00E6,
982 0x0930, 0x0020, 0x0918, 0x0909,
983 0x3086, 0x304D, 0x0020, 0x3053,
984 0x4000, 0x4E00, 0x7777, 0x0020,
985 0x9FA5, 0x4E00, 0xAC00, 0xBCDE,
986 0x0020, 0xD7A3, 0xDC00, 0xD800,
987 0xD800, 0xDC00, 0xD845, 0xDDDD,
988 0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
989 0xDFFF, 0x0001, 0x0E40, 0x0020,
993 0, 3, 6, 7, /* skip 8, */
994 10, 11, /* skip 12, */
996 20, 21, 23, 24, 25, 26, 28, 29,
997 30, 31, 33, 35, 37, 38,
999 50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59,
1000 63, 64, /* trail */ 64, /* reset only 67, */
1005 if(!testConvertToUnicode(sampleText
, sizeof(sampleText
),
1006 expected
, ARRAY_LENGTH(expected
), "BOCU-1",
1007 UCNV_TO_U_CALLBACK_SKIP
, offsets
, NULL
, 0)
1009 log_err("BOCU-1->u with skip did not match.\n");
1013 log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
1015 const uint8_t sampleText
[]={
1017 0xc0, 0x80, /* 1 non-shortest form */
1018 0xc4, 0xb5, /* 3 U+0135 */
1019 0xed, 0x80, 0xa0, /* 5 Hangul U+d020 */
1020 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8 surrogate pair for U+10401 */
1021 0xee, 0x80, 0x80, /* 14 PUA U+e000 */
1022 0xed, 0xb0, 0x81, /* 17 unpaired trail surrogate U+dc01 */
1023 0xf0, 0x90, 0x80, 0x80, /* 20 illegal 4-byte form for U+10000 */
1025 0xed, 0xa0, 0x81, /* 25 unpaired lead surrogate U+d801 */
1026 0xed, 0xa0, /* 28 incomplete sequence */
1027 0xd0, 0x80 /* 30 U+0400 */
1057 /* without offsets */
1058 if(!testConvertToUnicode(sampleText
, sizeof(sampleText
),
1059 expected
, ARRAY_LENGTH(expected
), "CESU-8",
1060 UCNV_TO_U_CALLBACK_SKIP
, NULL
, NULL
, 0)
1062 log_err("CESU-8->u with skip did not match.\n");
1066 if(!testConvertToUnicode(sampleText
, sizeof(sampleText
),
1067 expected
, ARRAY_LENGTH(expected
), "CESU-8",
1068 UCNV_TO_U_CALLBACK_SKIP
, offsets
, NULL
, 0)
1070 log_err("CESU-8->u with skip did not match.\n");
1075 static void TestStop(int32_t inputsize
, int32_t outputsize
)
1077 static const UChar sampleText
[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1078 static const UChar sampleText2
[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1080 static const uint8_t expstopIBM_949
[]= {
1081 0x00, 0xb0, 0xa1, 0xb0, 0xa2};
1083 static const uint8_t expstopIBM_943
[] = {
1084 0x9f, 0xaf, 0x9f, 0xb1};
1086 static const uint8_t expstopIBM_930
[] = {
1087 0x0e, 0x5d, 0x5f, 0x5d, 0x63};
1089 static const UChar IBM_949stoptoUnicode
[]= {0x0000, 0xAC00, 0xAC01};
1090 static const UChar IBM_943stoptoUnicode
[]= { 0x6D63, 0x6D64};
1091 static const UChar IBM_930stoptoUnicode
[]= { 0x6D63, 0x6D64};
1094 static const int32_t toIBM949Offsstop
[] = { 0, 1, 1, 2, 2};
1095 static const int32_t toIBM943Offsstop
[] = { 0, 0, 1, 1};
1096 static const int32_t toIBM930Offsstop
[] = { 0, 0, 0, 1, 1};
1098 static const int32_t fromIBM949Offs
[] = { 0, 1, 3};
1099 static const int32_t fromIBM943Offs
[] = { 0, 2};
1100 static const int32_t fromIBM930Offs
[] = { 1, 3};
1102 gInBufferSize
= inputsize
;
1103 gOutBufferSize
= outputsize
;
1107 #if !UCONFIG_NO_LEGACY_CONVERSION
1108 if(!testConvertFromUnicode(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
1109 expstopIBM_949
, sizeof(expstopIBM_949
), "ibm-949",
1110 UCNV_FROM_U_CALLBACK_STOP
, toIBM949Offsstop
, NULL
, 0 ))
1111 log_err("u-> ibm-949 with stop did not match.\n");
1112 if(!testConvertFromUnicode(sampleText2
, sizeof(sampleText2
)/sizeof(sampleText2
[0]),
1113 expstopIBM_943
, sizeof(expstopIBM_943
), "ibm-943",
1114 UCNV_FROM_U_CALLBACK_STOP
, toIBM943Offsstop
, NULL
, 0))
1115 log_err("u-> ibm-943 with stop did not match.\n");
1116 if(!testConvertFromUnicode(sampleText2
, sizeof(sampleText2
)/sizeof(sampleText2
[0]),
1117 expstopIBM_930
, sizeof(expstopIBM_930
), "ibm-930",
1118 UCNV_FROM_U_CALLBACK_STOP
, toIBM930Offsstop
, NULL
, 0 ))
1119 log_err("u-> ibm-930 with stop did not match.\n");
1121 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP \n");
1123 static const UChar inputTest
[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1124 static const uint8_t toIBM943
[]= { 0x61,};
1125 static const int32_t offset
[]= {0,} ;
1128 static const UChar euc_jp_inputText
[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1129 static const uint8_t to_euc_jp
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,};
1130 static const int32_t fromEUC_JPOffs
[] ={ 0, 1, 1, 2, 2, 2,};
1133 static const UChar euc_tw_inputText
[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1134 static const uint8_t to_euc_tw
[]={
1135 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,};
1136 static const int32_t from_euc_twOffs
[] ={ 0, 1, 1, 2, 2, 2, 2,};
1139 static const UChar iso_2022_jp_inputText
[]={0x0041, 0x00E9, 0x0042, };
1140 static const uint8_t to_iso_2022_jp
[]={
1144 static const int32_t from_iso_2022_jpOffs
[] ={0,};
1147 static const UChar iso_2022_cn_inputText
[]={ 0x0041, 0x3712, 0x0042, };
1148 static const uint8_t to_iso_2022_cn
[]={
1152 static const int32_t from_iso_2022_cnOffs
[] ={
1158 static const UChar iso_2022_kr_inputText
[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
1159 static const uint8_t to_iso_2022_kr
[]={
1160 0x1b, 0x24, 0x29, 0x43,
1164 static const int32_t from_iso_2022_krOffs
[] ={
1171 static const UChar hz_inputText
[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1173 static const uint8_t to_hz
[]={
1175 0x7e, 0x7b, 0x26, 0x30,
1178 static const int32_t from_hzOffs
[] ={
1184 static const UChar iscii_inputText
[]={ 0x0041, 0x3712, 0x0042, };
1185 static const uint8_t to_iscii
[]={
1188 static const int32_t from_isciiOffs
[] ={
1192 if(!testConvertFromUnicode(inputTest
, sizeof(inputTest
)/sizeof(inputTest
[0]),
1193 toIBM943
, sizeof(toIBM943
), "ibm-943",
1194 UCNV_FROM_U_CALLBACK_STOP
, offset
, NULL
, 0 ))
1195 log_err("u-> ibm-943 with stop did not match.\n");
1197 if(!testConvertFromUnicode(euc_jp_inputText
, sizeof(euc_jp_inputText
)/sizeof(euc_jp_inputText
[0]),
1198 to_euc_jp
, sizeof(to_euc_jp
), "IBM-eucJP",
1199 UCNV_FROM_U_CALLBACK_STOP
, fromEUC_JPOffs
, NULL
, 0 ))
1200 log_err("u-> euc-jp with stop did not match.\n");
1202 if(!testConvertFromUnicode(euc_tw_inputText
, sizeof(euc_tw_inputText
)/sizeof(euc_tw_inputText
[0]),
1203 to_euc_tw
, sizeof(to_euc_tw
), "euc-tw",
1204 UCNV_FROM_U_CALLBACK_STOP
, from_euc_twOffs
, NULL
, 0 ))
1205 log_err("u-> euc-tw with stop did not match.\n");
1207 if(!testConvertFromUnicode(iso_2022_jp_inputText
, sizeof(iso_2022_jp_inputText
)/sizeof(iso_2022_jp_inputText
[0]),
1208 to_iso_2022_jp
, sizeof(to_iso_2022_jp
), "iso-2022-jp",
1209 UCNV_FROM_U_CALLBACK_STOP
, from_iso_2022_jpOffs
, NULL
, 0 ))
1210 log_err("u-> iso-2022-jp with stop did not match.\n");
1212 if(!testConvertFromUnicode(iso_2022_jp_inputText
, sizeof(iso_2022_jp_inputText
)/sizeof(iso_2022_jp_inputText
[0]),
1213 to_iso_2022_jp
, sizeof(to_iso_2022_jp
), "iso-2022-jp",
1214 UCNV_FROM_U_CALLBACK_STOP
, from_iso_2022_jpOffs
, NULL
, 0 ))
1215 log_err("u-> iso-2022-jp with stop did not match.\n");
1217 if(!testConvertFromUnicode(iso_2022_cn_inputText
, sizeof(iso_2022_cn_inputText
)/sizeof(iso_2022_cn_inputText
[0]),
1218 to_iso_2022_cn
, sizeof(to_iso_2022_cn
), "iso-2022-cn",
1219 UCNV_FROM_U_CALLBACK_STOP
, from_iso_2022_cnOffs
, NULL
, 0 ))
1220 log_err("u-> iso-2022-cn with stop did not match.\n");
1222 if(!testConvertFromUnicode(iso_2022_kr_inputText
, sizeof(iso_2022_kr_inputText
)/sizeof(iso_2022_kr_inputText
[0]),
1223 to_iso_2022_kr
, sizeof(to_iso_2022_kr
), "iso-2022-kr",
1224 UCNV_FROM_U_CALLBACK_STOP
, from_iso_2022_krOffs
, NULL
, 0 ))
1225 log_err("u-> iso-2022-kr with stop did not match.\n");
1227 if(!testConvertFromUnicode(hz_inputText
, sizeof(hz_inputText
)/sizeof(hz_inputText
[0]),
1228 to_hz
, sizeof(to_hz
), "HZ",
1229 UCNV_FROM_U_CALLBACK_STOP
, from_hzOffs
, NULL
, 0 ))
1230 log_err("u-> HZ with stop did not match.\n");\
1232 if(!testConvertFromUnicode(iscii_inputText
, sizeof(iscii_inputText
)/sizeof(iscii_inputText
[0]),
1233 to_iscii
, sizeof(to_iscii
), "ISCII,version=0",
1234 UCNV_FROM_U_CALLBACK_STOP
, from_isciiOffs
, NULL
, 0 ))
1235 log_err("u-> iscii with stop did not match.\n");
1241 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n");
1243 static const UChar SCSU_inputText
[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1245 static const uint8_t to_SCSU
[]={
1249 int32_t from_SCSUOffs
[] ={
1253 if(!testConvertFromUnicode(SCSU_inputText
, sizeof(SCSU_inputText
)/sizeof(SCSU_inputText
[0]),
1254 to_SCSU
, sizeof(to_SCSU
), "SCSU",
1255 UCNV_FROM_U_CALLBACK_STOP
, from_SCSUOffs
, NULL
, 0 ))
1256 log_err("u-> SCSU with skip did not match.\n");
1262 #if !UCONFIG_NO_LEGACY_CONVERSION
1263 if(!testConvertToUnicode(expstopIBM_949
, sizeof(expstopIBM_949
),
1264 IBM_949stoptoUnicode
, sizeof(IBM_949stoptoUnicode
)/sizeof(IBM_949stoptoUnicode
[0]),"ibm-949",
1265 UCNV_TO_U_CALLBACK_STOP
, fromIBM949Offs
, NULL
, 0 ))
1266 log_err("ibm-949->u with stop did not match.\n");
1267 if(!testConvertToUnicode(expstopIBM_943
, sizeof(expstopIBM_943
),
1268 IBM_943stoptoUnicode
, sizeof(IBM_943stoptoUnicode
)/sizeof(IBM_943stoptoUnicode
[0]),"ibm-943",
1269 UCNV_TO_U_CALLBACK_STOP
, fromIBM943Offs
, NULL
, 0 ))
1270 log_err("ibm-943->u with stop did not match.\n");
1271 if(!testConvertToUnicode(expstopIBM_930
, sizeof(expstopIBM_930
),
1272 IBM_930stoptoUnicode
, sizeof(IBM_930stoptoUnicode
)/sizeof(IBM_930stoptoUnicode
[0]),"ibm-930",
1273 UCNV_TO_U_CALLBACK_STOP
, fromIBM930Offs
, NULL
, 0 ))
1274 log_err("ibm-930->u with stop did not match.\n");
1276 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n");
1279 static const uint8_t sampleTxtEBCIDIC_STATEFUL
[] ={
1280 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1282 static const UChar EBCIDIC_STATEFUL_toUnicode
[] ={ 0x6d63 };
1283 static const int32_t from_EBCIDIC_STATEFULOffsets
[]={ 1};
1287 static const uint8_t sampleTxt_euc_jp
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1288 0x8f, 0xda, 0xa1, /*unassigned*/
1291 static const UChar euc_jptoUnicode
[]={ 0x0061, 0x4edd, 0x5bec};
1292 static const int32_t from_euc_jpOffs
[] ={ 0, 1, 3};
1295 static const uint8_t sampleTxt_euc_tw
[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1296 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1299 UChar euc_twtoUnicode
[]={ 0x0061, 0x2295, 0x5BF2};
1300 int32_t from_euc_twOffs
[] ={ 0, 1, 3};
1304 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL
, sizeof(sampleTxtEBCIDIC_STATEFUL
),
1305 EBCIDIC_STATEFUL_toUnicode
, sizeof(EBCIDIC_STATEFUL_toUnicode
)/sizeof(EBCIDIC_STATEFUL_toUnicode
[0]),"ibm-930",
1306 UCNV_TO_U_CALLBACK_STOP
, from_EBCIDIC_STATEFULOffsets
, NULL
, 0 ))
1307 log_err("EBCIDIC_STATEFUL->u with stop did not match.\n");
1309 if(!testConvertToUnicode(sampleTxt_euc_jp
, sizeof(sampleTxt_euc_jp
),
1310 euc_jptoUnicode
, sizeof(euc_jptoUnicode
)/sizeof(euc_jptoUnicode
[0]),"IBM-eucJP",
1311 UCNV_TO_U_CALLBACK_STOP
, from_euc_jpOffs
, NULL
, 0))
1312 log_err("euc-jp->u with stop did not match.\n");
1314 if(!testConvertToUnicode(sampleTxt_euc_tw
, sizeof(sampleTxt_euc_tw
),
1315 euc_twtoUnicode
, sizeof(euc_twtoUnicode
)/sizeof(euc_twtoUnicode
[0]),"euc-tw",
1316 UCNV_TO_U_CALLBACK_STOP
, from_euc_twOffs
, NULL
, 0 ))
1317 log_err("euc-tw->u with stop did not match.\n");
1321 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n");
1323 static const uint8_t sampleText1
[] = { 0x31, 0xe4, 0xba, 0x8c,
1325 static const UChar expected1
[] = { 0x0031, 0x4e8c,};
1326 static const int32_t offsets1
[] = { 0x0000, 0x0001};
1328 if(!testConvertToUnicode(sampleText1
, sizeof(sampleText1
),
1329 expected1
, sizeof(expected1
)/sizeof(expected1
[0]),"utf8",
1330 UCNV_TO_U_CALLBACK_STOP
, offsets1
, NULL
, 0 ))
1331 log_err("utf8->u with stop did not match.\n");;
1333 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n");
1335 static const uint8_t sampleText1
[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04};
1336 static const UChar expected1
[] = { 0x00ba, 0x008c, 0x00f8, 0x0061};
1337 static const int32_t offsets1
[] = { 0x0000, 0x0001,0x0002,0x0003};
1339 if(!testConvertToUnicode(sampleText1
, sizeof(sampleText1
),
1340 expected1
, sizeof(expected1
)/sizeof(expected1
[0]),"SCSU",
1341 UCNV_TO_U_CALLBACK_STOP
, offsets1
, NULL
, 0 ))
1342 log_err("scsu->u with stop did not match.\n");;
1347 static void TestSub(int32_t inputsize
, int32_t outputsize
)
1349 static const UChar sampleText
[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1350 static const UChar sampleText2
[]= { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1352 static const uint8_t expsubIBM_949
[] =
1353 { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 };
1355 static const uint8_t expsubIBM_943
[] = {
1356 0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 };
1358 static const uint8_t expsubIBM_930
[] = {
1359 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f };
1361 static const UChar IBM_949subtoUnicode
[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 };
1362 static const UChar IBM_943subtoUnicode
[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1363 static const UChar IBM_930subtoUnicode
[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1365 static const int32_t toIBM949Offssub
[] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1366 static const int32_t toIBM943Offssub
[] ={ 0, 0, 1, 1, 2, 2, 3, 3 };
1367 static const int32_t toIBM930Offssub
[] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 };
1369 static const int32_t fromIBM949Offs
[] = { 0, 1, 3, 5, 7 };
1370 static const int32_t fromIBM943Offs
[] = { 0, 2, 4, 6 };
1371 static const int32_t fromIBM930Offs
[] = { 1, 3, 5, 7 };
1373 gInBufferSize
= inputsize
;
1374 gOutBufferSize
= outputsize
;
1378 #if !UCONFIG_NO_LEGACY_CONVERSION
1379 if(!testConvertFromUnicode(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
1380 expsubIBM_949
, sizeof(expsubIBM_949
), "ibm-949",
1381 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, toIBM949Offssub
, NULL
, 0 ))
1382 log_err("u-> ibm-949 with subst did not match.\n");
1383 if(!testConvertFromUnicode(sampleText2
, sizeof(sampleText2
)/sizeof(sampleText2
[0]),
1384 expsubIBM_943
, sizeof(expsubIBM_943
), "ibm-943",
1385 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, toIBM943Offssub
, NULL
, 0))
1386 log_err("u-> ibm-943 with subst did not match.\n");
1387 if(!testConvertFromUnicode(sampleText2
, sizeof(sampleText2
)/sizeof(sampleText2
[0]),
1388 expsubIBM_930
, sizeof(expsubIBM_930
), "ibm-930",
1389 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, toIBM930Offssub
, NULL
, 0 ))
1390 log_err("u-> ibm-930 with subst did not match.\n");
1392 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1394 static const UChar inputTest
[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1395 static const uint8_t toIBM943
[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 };
1396 static const int32_t offset
[]= {0, 1, 1, 3, 3, 4};
1400 static const UChar euc_jp_inputText
[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1401 static const uint8_t to_euc_jp
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1402 0xf4, 0xfe, 0xf4, 0xfe,
1405 static const int32_t fromEUC_JPOffs
[] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7};
1408 static const UChar euc_tw_inputText
[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1409 static const uint8_t to_euc_tw
[]={
1410 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1411 0xfd, 0xfe, 0xfd, 0xfe,
1412 0x61, 0xe6, 0xca, 0x8a,
1415 static const int32_t from_euc_twOffs
[] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,};
1417 if(!testConvertFromUnicode(inputTest
, sizeof(inputTest
)/sizeof(inputTest
[0]),
1418 toIBM943
, sizeof(toIBM943
), "ibm-943",
1419 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, offset
, NULL
, 0 ))
1420 log_err("u-> ibm-943 with substitute did not match.\n");
1422 if(!testConvertFromUnicode(euc_jp_inputText
, sizeof(euc_jp_inputText
)/sizeof(euc_jp_inputText
[0]),
1423 to_euc_jp
, sizeof(to_euc_jp
), "IBM-eucJP",
1424 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, fromEUC_JPOffs
, NULL
, 0 ))
1425 log_err("u-> euc-jp with substitute did not match.\n");
1427 if(!testConvertFromUnicode(euc_tw_inputText
, sizeof(euc_tw_inputText
)/sizeof(euc_tw_inputText
[0]),
1428 to_euc_tw
, sizeof(to_euc_tw
), "euc-tw",
1429 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, from_euc_twOffs
, NULL
, 0 ))
1430 log_err("u-> euc-tw with substitute did not match.\n");
1434 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1436 UChar SCSU_inputText
[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1438 const uint8_t to_SCSU
[]={
1445 int32_t from_SCSUOffs
[] ={
1451 const uint8_t to_SCSU_1
[]={
1455 int32_t from_SCSUOffs_1
[] ={
1459 if(!testConvertFromUnicode(SCSU_inputText
, sizeof(SCSU_inputText
)/sizeof(SCSU_inputText
[0]),
1460 to_SCSU
, sizeof(to_SCSU
), "SCSU",
1461 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, from_SCSUOffs
, NULL
, 0 ))
1462 log_err("u-> SCSU with substitute did not match.\n");
1464 if(!testConvertFromUnicodeWithContext(SCSU_inputText
, sizeof(SCSU_inputText
)/sizeof(SCSU_inputText
[0]),
1465 to_SCSU_1
, sizeof(to_SCSU_1
), "SCSU",
1466 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, from_SCSUOffs_1
, NULL
, 0,"i",U_ILLEGAL_CHAR_FOUND
))
1467 log_err("u-> SCSU with substitute did not match.\n");
1470 log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1472 static const UChar testinput
[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,};
1473 static const uint8_t expectedUTF8
[]= { 0xe2, 0x82, 0xac,
1474 0xf0, 0x90, 0x90, 0x81,
1475 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd,
1476 0xef, 0xbf, 0xbf, 0x61,
1479 static const int32_t offsets
[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 };
1480 if(!testConvertFromUnicode(testinput
, sizeof(testinput
)/sizeof(testinput
[0]),
1481 expectedUTF8
, sizeof(expectedUTF8
), "utf8",
1482 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, offsets
, NULL
, 0 )) {
1483 log_err("u-> utf8 with stop did not match.\n");
1487 log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1489 static const UChar in
[]={ 0x0041, 0xfeff };
1491 static const uint8_t out
[]={
1502 static const int32_t offsets
[]={
1506 if(!testConvertFromUnicode(in
, ARRAY_LENGTH(in
),
1507 out
, sizeof(out
), "UTF-16",
1508 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, offsets
, NULL
, 0)
1510 log_err("u->UTF-16 with substitute did not match.\n");
1514 log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1516 static const UChar in
[]={ 0x0041, 0xfeff };
1518 static const uint8_t out
[]={
1520 0x00, 0x00, 0xfe, 0xff,
1521 0x00, 0x00, 0x00, 0x41,
1522 0x00, 0x00, 0xfe, 0xff
1524 0xff, 0xfe, 0x00, 0x00,
1525 0x41, 0x00, 0x00, 0x00,
1526 0xff, 0xfe, 0x00, 0x00
1529 static const int32_t offsets
[]={
1530 -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1
1533 if(!testConvertFromUnicode(in
, ARRAY_LENGTH(in
),
1534 out
, sizeof(out
), "UTF-32",
1535 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, offsets
, NULL
, 0)
1537 log_err("u->UTF-32 with substitute did not match.\n");
1543 #if !UCONFIG_NO_LEGACY_CONVERSION
1544 if(!testConvertToUnicode(expsubIBM_949
, sizeof(expsubIBM_949
),
1545 IBM_949subtoUnicode
, sizeof(IBM_949subtoUnicode
)/sizeof(IBM_949subtoUnicode
[0]),"ibm-949",
1546 UCNV_TO_U_CALLBACK_SUBSTITUTE
, fromIBM949Offs
, NULL
, 0 ))
1547 log_err("ibm-949->u with substitute did not match.\n");
1548 if(!testConvertToUnicode(expsubIBM_943
, sizeof(expsubIBM_943
),
1549 IBM_943subtoUnicode
, sizeof(IBM_943subtoUnicode
)/sizeof(IBM_943subtoUnicode
[0]),"ibm-943",
1550 UCNV_TO_U_CALLBACK_SUBSTITUTE
, fromIBM943Offs
, NULL
, 0 ))
1551 log_err("ibm-943->u with substitute did not match.\n");
1552 if(!testConvertToUnicode(expsubIBM_930
, sizeof(expsubIBM_930
),
1553 IBM_930subtoUnicode
, sizeof(IBM_930subtoUnicode
)/sizeof(IBM_930subtoUnicode
[0]),"ibm-930",
1554 UCNV_TO_U_CALLBACK_SUBSTITUTE
, fromIBM930Offs
, NULL
, 0 ))
1555 log_err("ibm-930->u with substitute did not match.\n");
1557 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1560 const uint8_t sampleTxtEBCIDIC_STATEFUL
[] ={
1561 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1563 UChar EBCIDIC_STATEFUL_toUnicode
[] ={ 0x6d63, 0xfffd, 0x03b4
1565 int32_t from_EBCIDIC_STATEFULOffsets
[]={ 1, 3, 5};
1569 const uint8_t sampleTxt_euc_jp
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1570 0x8f, 0xda, 0xa1, /*unassigned*/
1573 UChar euc_jptoUnicode
[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a };
1574 int32_t from_euc_jpOffs
[] ={ 0, 1, 3, 6, 9, 11 };
1577 const uint8_t sampleTxt_euc_tw
[]={
1578 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1579 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1582 UChar euc_twtoUnicode
[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, };
1583 int32_t from_euc_twOffs
[] ={ 0, 1, 3, 7, 11, 13};
1586 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL
, sizeof(sampleTxtEBCIDIC_STATEFUL
),
1587 EBCIDIC_STATEFUL_toUnicode
, sizeof(EBCIDIC_STATEFUL_toUnicode
)/sizeof(EBCIDIC_STATEFUL_toUnicode
[0]),"ibm-930",
1588 UCNV_TO_U_CALLBACK_SUBSTITUTE
, from_EBCIDIC_STATEFULOffsets
, NULL
, 0 ))
1589 log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n");
1592 if(!testConvertToUnicode(sampleTxt_euc_jp
, sizeof(sampleTxt_euc_jp
),
1593 euc_jptoUnicode
, sizeof(euc_jptoUnicode
)/sizeof(euc_jptoUnicode
[0]),"IBM-eucJP",
1594 UCNV_TO_U_CALLBACK_SUBSTITUTE
, from_euc_jpOffs
, NULL
, 0 ))
1595 log_err("euc-jp->u with substitute did not match.\n");
1598 if(!testConvertToUnicode(sampleTxt_euc_tw
, sizeof(sampleTxt_euc_tw
),
1599 euc_twtoUnicode
, sizeof(euc_twtoUnicode
)/sizeof(euc_twtoUnicode
[0]),"euc-tw",
1600 UCNV_TO_U_CALLBACK_SUBSTITUTE
, from_euc_twOffs
, NULL
, 0 ))
1601 log_err("euc-tw->u with substitute did not match.\n");
1604 if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp
, sizeof(sampleTxt_euc_jp
),
1605 euc_jptoUnicode
, sizeof(euc_jptoUnicode
)/sizeof(euc_jptoUnicode
[0]),"IBM-eucJP",
1606 UCNV_TO_U_CALLBACK_SUBSTITUTE
, from_euc_jpOffs
, NULL
, 0 ,"i", U_ILLEGAL_CHAR_FOUND
))
1607 log_err("euc-jp->u with substitute did not match.\n");
1611 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1613 const uint8_t sampleText1
[] = { 0x31, 0xe4, 0xba, 0x8c,
1615 UChar expected1
[] = { 0x0031, 0x4e8c, 0xfffd, 0x0061};
1616 int32_t offsets1
[] = { 0x0000, 0x0001, 0x0004, 0x0006};
1618 if(!testConvertToUnicode(sampleText1
, sizeof(sampleText1
),
1619 expected1
, sizeof(expected1
)/sizeof(expected1
[0]),"utf8",
1620 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets1
, NULL
, 0 ))
1621 log_err("utf8->u with substitute did not match.\n");;
1623 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1625 const uint8_t sampleText1
[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
1626 UChar expected1
[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffd,0xfffd};
1627 int32_t offsets1
[] = { 0x0000, 0x0001,0x0002,0x0003,4,5};
1629 if(!testConvertToUnicode(sampleText1
, sizeof(sampleText1
),
1630 expected1
, sizeof(expected1
)/sizeof(expected1
[0]),"SCSU",
1631 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets1
, NULL
, 0 ))
1632 log_err("scsu->u with stop did not match.\n");;
1635 #if !UCONFIG_NO_LEGACY_CONVERSION
1636 log_verbose("Testing ibm-930 subchar/subchar1\n");
1638 static const UChar u1
[]={ 0x6d63, 0x6d64, 0x6d65, 0x6d66, 0xdf };
1639 static const uint8_t s1
[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f };
1640 static const int32_t offsets1
[]={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1642 static const UChar u2
[]={ 0x6d63, 0x6d64, 0xfffd, 0x6d66, 0x1a };
1643 static const uint8_t s2
[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 };
1644 static const int32_t offsets2
[]={ 1, 3, 5, 7, 10 };
1646 if(!testConvertFromUnicode(u1
, ARRAY_LENGTH(u1
), s1
, ARRAY_LENGTH(s1
), "ibm-930",
1647 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, offsets1
, NULL
, 0)
1649 log_err("u->ibm-930 subchar/subchar1 did not match.\n");
1652 if(!testConvertToUnicode(s2
, ARRAY_LENGTH(s2
), u2
, ARRAY_LENGTH(u2
), "ibm-930",
1653 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets2
, NULL
, 0)
1655 log_err("ibm-930->u subchar/subchar1 did not match.\n");
1659 log_verbose("Testing GB 18030 with substitute callbacks\n");
1661 static const UChar u2
[]={
1662 0x24, 0x7f, 0x80, 0x1f9, 0x20ac, 0x4e00, 0x9fa6, 0xffff, 0xd800, 0xdc00, 0xfffd, 0xdbff, 0xdfff };
1663 static const uint8_t gb2
[]={
1664 0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 };
1665 static const int32_t offsets2
[]={
1666 0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 };
1668 if(!testConvertToUnicode(gb2
, ARRAY_LENGTH(gb2
), u2
, ARRAY_LENGTH(u2
), "gb18030",
1669 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets2
, NULL
, 0)
1671 log_err("gb18030->u with substitute did not match.\n");
1676 log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n");
1678 static const uint8_t utf7
[]={
1679 /* a~ a+AB~ a+AB\x0c a+AB- a+AB. a+. */
1680 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x2e
1682 static const UChar unicode
[]={
1683 0x61, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x2e, 0x61, 0xfffd, 0x2e
1685 static const int32_t offsets
[]={
1686 0, 1, 2, 4, 6, 7, 9, 11, 12, 14, 17, 19, 21, 22, 23, 24
1689 if(!testConvertToUnicode(utf7
, ARRAY_LENGTH(utf7
), unicode
, ARRAY_LENGTH(unicode
), "UTF-7",
1690 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets
, NULL
, 0)
1692 log_err("UTF-7->u with substitute did not match.\n");
1696 log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n");
1698 static const uint8_t
1699 in1
[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff },
1700 in2
[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff },
1701 in3
[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff };
1704 out1
[]={ 0x4e00, 0xfeff },
1705 out2
[]={ 0x004e, 0xfffe },
1706 out3
[]={ 0xfefd, 0x4e00, 0xfeff };
1708 static const int32_t
1709 offsets1
[]={ 2, 4 },
1710 offsets2
[]={ 2, 4 },
1711 offsets3
[]={ 0, 2, 4 };
1713 if(!testConvertToUnicode(in1
, ARRAY_LENGTH(in1
), out1
, ARRAY_LENGTH(out1
), "UTF-16",
1714 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets1
, NULL
, 0)
1716 log_err("UTF-16 (BE BOM)->u with substitute did not match.\n");
1719 if(!testConvertToUnicode(in2
, ARRAY_LENGTH(in2
), out2
, ARRAY_LENGTH(out2
), "UTF-16",
1720 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets2
, NULL
, 0)
1722 log_err("UTF-16 (LE BOM)->u with substitute did not match.\n");
1725 if(!testConvertToUnicode(in3
, ARRAY_LENGTH(in3
), out3
, ARRAY_LENGTH(out3
), "UTF-16",
1726 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets3
, NULL
, 0)
1728 log_err("UTF-16 (no BOM)->u with substitute did not match.\n");
1732 log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n");
1734 static const uint8_t
1735 in1
[]={ 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff },
1736 in2
[]={ 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 },
1737 in3
[]={ 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 },
1738 in4
[]={ 0x00, 0x01, 0x02, 0x03, 0x00, 0x11, 0x12, 0x00, 0x00, 0x00, 0x4e, 0x00 };
1741 out1
[]={ U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfeff },
1742 out2
[]={ U16_LEAD(0x0f1000), U16_TRAIL(0x0f1000), 0xfffe },
1743 out3
[]={ 0xfefe, U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfffd, 0xfffd },
1744 out4
[]={ U16_LEAD(0x10203), U16_TRAIL(0x10203), 0xfffd, 0x4e00 };
1746 static const int32_t
1747 offsets1
[]={ 4, 4, 8 },
1748 offsets2
[]={ 4, 4, 8 },
1749 offsets3
[]={ 0, 4, 4, 8, 12 },
1750 offsets4
[]={ 0, 0, 4, 8 };
1752 if(!testConvertToUnicode(in1
, ARRAY_LENGTH(in1
), out1
, ARRAY_LENGTH(out1
), "UTF-32",
1753 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets1
, NULL
, 0)
1755 log_err("UTF-32 (BE BOM)->u with substitute did not match.\n");
1758 if(!testConvertToUnicode(in2
, ARRAY_LENGTH(in2
), out2
, ARRAY_LENGTH(out2
), "UTF-32",
1759 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets2
, NULL
, 0)
1761 log_err("UTF-32 (LE BOM)->u with substitute did not match.\n");
1764 if(!testConvertToUnicode(in3
, ARRAY_LENGTH(in3
), out3
, ARRAY_LENGTH(out3
), "UTF-32",
1765 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets3
, NULL
, 0)
1767 log_err("UTF-32 (no BOM)->u with substitute did not match.\n");
1770 if(!testConvertToUnicode(in4
, ARRAY_LENGTH(in4
), out4
, ARRAY_LENGTH(out4
), "UTF-32",
1771 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets4
, NULL
, 0)
1773 log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n");
1778 static void TestSubWithValue(int32_t inputsize
, int32_t outputsize
)
1780 UChar sampleText
[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1781 UChar sampleText2
[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1783 const uint8_t expsubwvalIBM_949
[]= {
1784 0x00, 0xb0, 0xa1, 0xb0, 0xa2,
1785 0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 };
1787 const uint8_t expsubwvalIBM_943
[]= {
1788 0x9f, 0xaf, 0x9f, 0xb1,
1789 0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 };
1791 const uint8_t expsubwvalIBM_930
[] = {
1792 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f };
1794 int32_t toIBM949Offs
[] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 };
1795 int32_t toIBM943Offs
[] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 };
1796 int32_t toIBM930Offs
[] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */
1798 gInBufferSize
= inputsize
;
1799 gOutBufferSize
= outputsize
;
1803 #if !UCONFIG_NO_LEGACY_CONVERSION
1804 if(!testConvertFromUnicode(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
1805 expsubwvalIBM_949
, sizeof(expsubwvalIBM_949
), "ibm-949",
1806 UCNV_FROM_U_CALLBACK_ESCAPE
, toIBM949Offs
, NULL
, 0 ))
1807 log_err("u-> ibm-949 with subst with value did not match.\n");
1809 if(!testConvertFromUnicode(sampleText2
, sizeof(sampleText2
)/sizeof(sampleText2
[0]),
1810 expsubwvalIBM_943
, sizeof(expsubwvalIBM_943
), "ibm-943",
1811 UCNV_FROM_U_CALLBACK_ESCAPE
, toIBM943Offs
, NULL
, 0 ))
1812 log_err("u-> ibm-943 with sub with value did not match.\n");
1814 if(!testConvertFromUnicode(sampleText2
, sizeof(sampleText2
)/sizeof(sampleText2
[0]),
1815 expsubwvalIBM_930
, sizeof(expsubwvalIBM_930
), "ibm-930",
1816 UCNV_FROM_U_CALLBACK_ESCAPE
, toIBM930Offs
, NULL
, 0 ))
1817 log_err("u-> ibm-930 with subst with value did not match.\n");
1820 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n");
1822 static const UChar inputTest
[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1823 static const uint8_t toIBM943
[]= { 0x61,
1824 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1825 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1826 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1828 static const int32_t offset
[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
1832 static const UChar euc_jp_inputText
[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, };
1833 static const uint8_t to_euc_jp
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1834 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1835 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1836 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1839 static const int32_t fromEUC_JPOffs
[] ={ 0, 1, 1, 2, 2, 2,
1847 static const UChar euc_tw_inputText
[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1848 static const uint8_t to_euc_tw
[]={
1849 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1850 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1851 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1852 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1853 0x61, 0xe6, 0xca, 0x8a,
1855 static const int32_t from_euc_twOffs
[] ={ 0, 1, 1, 2, 2, 2, 2,
1856 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5,
1860 static const UChar iso_2022_jp_inputText1
[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ;
1861 static const uint8_t to_iso_2022_jp1
[]={
1862 0x1b, 0x24, 0x42, 0x21, 0x21,
1863 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
1864 0x1b, 0x24, 0x42, 0x21, 0x22,
1865 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
1869 static const int32_t from_iso_2022_jpOffs1
[] ={
1877 static const UChar iso_2022_jp_inputText2
[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ;
1878 static const uint8_t to_iso_2022_jp2
[]={
1879 0x1b, 0x24, 0x42, 0x21, 0x21,
1880 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1881 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1882 0x1b, 0x24, 0x42, 0x21, 0x22,
1883 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1884 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1887 static const int32_t from_iso_2022_jpOffs2
[] ={
1898 static const UChar iso_2022_cn_inputText
[]={ 0x0041, 0x3712, 0x0042, };
1899 static const uint8_t to_iso_2022_cn
[]={
1901 0x25, 0x55, 0x33, 0x37, 0x31, 0x32,
1904 static const int32_t from_iso_2022_cnOffs
[] ={
1910 static const UChar iso_2022_cn_inputText4
[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042};
1912 static const uint8_t to_iso_2022_cn4
[]={
1913 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
1914 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1915 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1917 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1918 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1921 static const int32_t from_iso_2022_cnOffs4
[] ={
1933 static const UChar iso_2022_kr_inputText2
[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
1934 static const uint8_t to_iso_2022_kr2
[]={
1935 0x1b, 0x24, 0x29, 0x43,
1938 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1939 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1942 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1943 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1946 static const int32_t from_iso_2022_krOffs2
[] ={
1959 static const UChar iso_2022_kr_inputText
[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 };
1960 static const uint8_t to_iso_2022_kr
[]={
1961 0x1b, 0x24, 0x29, 0x43,
1964 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1967 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1972 static const int32_t from_iso_2022_krOffs
[] ={
1983 static const UChar hz_inputText
[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1985 static const uint8_t to_hz
[]={
1987 0x7e, 0x7b, 0x26, 0x30,
1988 0x7e, 0x7d, 0x25, 0x55, 0x30, 0x36, 0x36, 0x32, /*unassigned*/
1989 0x7e, 0x7b, 0x26, 0x30,
1993 static const int32_t from_hzOffs
[] ={
2001 static const UChar hz_inputText2
[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
2002 static const uint8_t to_hz2
[]={
2004 0x7e, 0x7b, 0x26, 0x30,
2005 0x7e, 0x7d, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
2006 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2007 0x7e, 0x7b, 0x26, 0x30,
2009 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
2010 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2013 static const int32_t from_hzOffs2
[] ={
2026 static const UChar iscii_inputText
[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 };
2027 static const uint8_t to_iscii
[]={
2030 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
2033 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
2038 static const int32_t from_isciiOffs
[] ={
2048 if(!testConvertFromUnicode(inputTest
, sizeof(inputTest
)/sizeof(inputTest
[0]),
2049 toIBM943
, sizeof(toIBM943
), "ibm-943",
2050 UCNV_FROM_U_CALLBACK_ESCAPE
, offset
, NULL
, 0 ))
2051 log_err("u-> ibm-943 with subst with value did not match.\n");
2053 if(!testConvertFromUnicode(euc_jp_inputText
, sizeof(euc_jp_inputText
)/sizeof(euc_jp_inputText
[0]),
2054 to_euc_jp
, sizeof(to_euc_jp
), "IBM-eucJP",
2055 UCNV_FROM_U_CALLBACK_ESCAPE
, fromEUC_JPOffs
, NULL
, 0 ))
2056 log_err("u-> euc-jp with subst with value did not match.\n");
2058 if(!testConvertFromUnicode(euc_tw_inputText
, sizeof(euc_tw_inputText
)/sizeof(euc_tw_inputText
[0]),
2059 to_euc_tw
, sizeof(to_euc_tw
), "euc-tw",
2060 UCNV_FROM_U_CALLBACK_ESCAPE
, from_euc_twOffs
, NULL
, 0 ))
2061 log_err("u-> euc-tw with subst with value did not match.\n");
2063 if(!testConvertFromUnicode(iso_2022_jp_inputText1
, sizeof(iso_2022_jp_inputText1
)/sizeof(iso_2022_jp_inputText1
[0]),
2064 to_iso_2022_jp1
, sizeof(to_iso_2022_jp1
), "iso-2022-jp",
2065 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffs1
, NULL
, 0 ))
2066 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2068 if(!testConvertFromUnicode(iso_2022_jp_inputText1
, sizeof(iso_2022_jp_inputText1
)/sizeof(iso_2022_jp_inputText1
[0]),
2069 to_iso_2022_jp1
, sizeof(to_iso_2022_jp1
), "iso-2022-jp",
2070 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffs1
, NULL
, 0 ))
2071 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2073 if(!testConvertFromUnicode(iso_2022_jp_inputText2
, sizeof(iso_2022_jp_inputText2
)/sizeof(iso_2022_jp_inputText2
[0]),
2074 to_iso_2022_jp2
, sizeof(to_iso_2022_jp2
), "iso-2022-jp",
2075 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffs2
, NULL
, 0 ))
2076 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2080 static const UChar iso_2022_jp_inputText3
[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ;
2081 static const uint8_t to_iso_2022_jp3_v2
[]={
2082 0x1b, 0x24, 0x42, 0x21, 0x21,
2083 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b,
2085 0x1b, 0x24, 0x42, 0x21, 0x22,
2086 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b,
2089 0x26, 0x23, 0x33, 0x36, 0x38, 0x39, 0x32, 0x3b,
2092 static const int32_t from_iso_2022_jpOffs3_v2
[] ={
2094 1,1,1,1,1,1,1,1,1,1,1,1,
2097 4,4,4,4,4,4,4,4,4,4,4,4,
2103 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3
, sizeof(iso_2022_jp_inputText3
)/sizeof(iso_2022_jp_inputText3
[0]),
2104 to_iso_2022_jp3_v2
, sizeof(to_iso_2022_jp3_v2
), "iso-2022-jp",
2105 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffs3_v2
, NULL
, 0,UCNV_ESCAPE_XML_DEC
,U_ZERO_ERROR
))
2106 log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n");
2109 static const UChar iso_2022_cn_inputText5
[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2110 static const uint8_t to_iso_2022_cn5_v2
[]={
2111 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2112 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44,
2113 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36,
2115 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44,
2116 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36,
2118 0x5c, 0x75, 0x30, 0x39, 0x30, 0x32,
2120 static const int32_t from_iso_2022_cnOffs5_v2
[] ={
2130 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5
, sizeof(iso_2022_cn_inputText5
)/sizeof(iso_2022_cn_inputText5
[0]),
2131 to_iso_2022_cn5_v2
, sizeof(to_iso_2022_cn5_v2
), "iso-2022-cn",
2132 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs5_v2
, NULL
, 0,UCNV_ESCAPE_JAVA
,U_ZERO_ERROR
))
2133 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n");
2137 static const UChar iso_2022_cn_inputText6
[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2138 static const uint8_t to_iso_2022_cn6_v2
[]={
2139 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2140 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d,
2142 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d,
2144 0x7b, 0x55, 0x2b, 0x30, 0x39, 0x30, 0x32, 0x7d
2146 static const int32_t from_iso_2022_cnOffs6_v2
[] ={
2147 0, 0, 0, 0, 0, 0, 0,
2148 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2150 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2152 7, 7, 7, 7, 7, 7, 7, 7,
2154 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6
, sizeof(iso_2022_cn_inputText6
)/sizeof(iso_2022_cn_inputText6
[0]),
2155 to_iso_2022_cn6_v2
, sizeof(to_iso_2022_cn6_v2
), "iso-2022-cn",
2156 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs6_v2
, NULL
, 0,UCNV_ESCAPE_UNICODE
,U_ZERO_ERROR
))
2157 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n");
2161 static const UChar iso_2022_cn_inputText7
[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2162 static const uint8_t to_iso_2022_cn7_v2
[]={
2163 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2164 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2166 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2167 0x42, 0x25, 0x55, 0x30, 0x39, 0x30, 0x32,
2169 static const int32_t from_iso_2022_cnOffs7_v2
[] ={
2170 0, 0, 0, 0, 0, 0, 0,
2171 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2173 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2177 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7
, sizeof(iso_2022_cn_inputText7
)/sizeof(iso_2022_cn_inputText7
[0]),
2178 to_iso_2022_cn7_v2
, sizeof(to_iso_2022_cn7_v2
), "iso-2022-cn",
2179 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs7_v2
, NULL
, 0,"K" ,U_ZERO_ERROR
))
2180 log_err("u-> iso-2022-cn with sub & K did not match.\n");
2184 static const UChar iso_2022_cn_inputText8
[]={
2192 static const uint8_t to_iso_2022_cn8_v2
[]={
2193 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2194 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20,
2196 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20,
2197 0x5c, 0x31, 0x30, 0x46, 0x46, 0x46, 0x46, 0x20,
2199 0x5c, 0x39, 0x30, 0x32, 0x20
2201 static const int32_t from_iso_2022_cnOffs8_v2
[] ={
2202 0, 0, 0, 0, 0, 0, 0,
2203 1, 1, 1, 1, 1, 1, 1, 1,
2205 4, 4, 4, 4, 4, 4, 4, 4,
2206 6, 6, 6, 6, 6, 6, 6, 6,
2210 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8
, sizeof(iso_2022_cn_inputText8
)/sizeof(iso_2022_cn_inputText8
[0]),
2211 to_iso_2022_cn8_v2
, sizeof(to_iso_2022_cn8_v2
), "iso-2022-cn",
2212 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs8_v2
, NULL
, 0,UCNV_ESCAPE_CSS2
,U_ZERO_ERROR
))
2213 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not match.\n");
2217 static const uint8_t to_iso_2022_cn4_v3
[]={
2218 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2219 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36,
2221 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36,
2226 static const int32_t from_iso_2022_cnOffs4_v3
[] ={
2228 1,1,1,1,1,1,1,1,1,1,1,
2231 4,4,4,4,4,4,4,4,4,4,4,
2236 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4
, sizeof(iso_2022_cn_inputText4
)/sizeof(iso_2022_cn_inputText4
[0]),
2237 to_iso_2022_cn4_v3
, sizeof(to_iso_2022_cn4_v3
), "iso-2022-cn",
2238 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs4_v3
, NULL
, 0,UCNV_ESCAPE_C
,U_ZERO_ERROR
))
2240 log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n");
2243 if(!testConvertFromUnicode(iso_2022_cn_inputText
, sizeof(iso_2022_cn_inputText
)/sizeof(iso_2022_cn_inputText
[0]),
2244 to_iso_2022_cn
, sizeof(to_iso_2022_cn
), "iso-2022-cn",
2245 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs
, NULL
, 0 ))
2246 log_err("u-> iso_2022_cn with subst with value did not match.\n");
2248 if(!testConvertFromUnicode(iso_2022_cn_inputText4
, sizeof(iso_2022_cn_inputText4
)/sizeof(iso_2022_cn_inputText4
[0]),
2249 to_iso_2022_cn4
, sizeof(to_iso_2022_cn4
), "iso-2022-cn",
2250 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs4
, NULL
, 0 ))
2251 log_err("u-> iso_2022_cn with subst with value did not match.\n");
2252 if(!testConvertFromUnicode(iso_2022_kr_inputText
, sizeof(iso_2022_kr_inputText
)/sizeof(iso_2022_kr_inputText
[0]),
2253 to_iso_2022_kr
, sizeof(to_iso_2022_kr
), "iso-2022-kr",
2254 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_krOffs
, NULL
, 0 ))
2255 log_err("u-> iso_2022_kr with subst with value did not match.\n");
2256 if(!testConvertFromUnicode(iso_2022_kr_inputText2
, sizeof(iso_2022_kr_inputText2
)/sizeof(iso_2022_kr_inputText2
[0]),
2257 to_iso_2022_kr2
, sizeof(to_iso_2022_kr2
), "iso-2022-kr",
2258 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_krOffs2
, NULL
, 0 ))
2259 log_err("u-> iso_2022_kr2 with subst with value did not match.\n");
2260 if(!testConvertFromUnicode(hz_inputText
, sizeof(hz_inputText
)/sizeof(hz_inputText
[0]),
2261 to_hz
, sizeof(to_hz
), "HZ",
2262 UCNV_FROM_U_CALLBACK_ESCAPE
, from_hzOffs
, NULL
, 0 ))
2263 log_err("u-> hz with subst with value did not match.\n");
2264 if(!testConvertFromUnicode(hz_inputText2
, sizeof(hz_inputText2
)/sizeof(hz_inputText2
[0]),
2265 to_hz2
, sizeof(to_hz2
), "HZ",
2266 UCNV_FROM_U_CALLBACK_ESCAPE
, from_hzOffs2
, NULL
, 0 ))
2267 log_err("u-> hz with subst with value did not match.\n");
2269 if(!testConvertFromUnicode(iscii_inputText
, sizeof(iscii_inputText
)/sizeof(iscii_inputText
[0]),
2270 to_iscii
, sizeof(to_iscii
), "ISCII,version=0",
2271 UCNV_FROM_U_CALLBACK_ESCAPE
, from_isciiOffs
, NULL
, 0 ))
2272 log_err("u-> iscii with subst with value did not match.\n");
2276 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
2279 #if !UCONFIG_NO_LEGACY_CONVERSION
2280 static const uint8_t sampleTxtToU
[]= { 0x00, 0x9f, 0xaf,
2281 0x81, 0xad, /*unassigned*/
2283 static const UChar IBM_943toUnicode
[] = { 0x0000, 0x6D63,
2284 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
2286 static const int32_t fromIBM943Offs
[] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
2289 static const uint8_t sampleTxt_EUC_JP
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
2290 0x8f, 0xda, 0xa1, /*unassigned*/
2293 static const UChar EUC_JPtoUnicode
[]={ 0x0061, 0x4edd, 0x5bec,
2294 0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31,
2296 static const int32_t fromEUC_JPOffs
[] ={ 0, 1, 3,
2297 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
2302 static const uint8_t sampleTxt_euc_tw
[]={
2303 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
2304 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
2307 static const UChar euc_twtoUnicode
[]={ 0x0061, 0x2295, 0x5BF2,
2308 0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43,
2310 static const int32_t from_euc_twOffs
[] ={ 0, 1, 3,
2311 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2315 static const uint8_t sampleTxt_iso_2022_jp
[]={
2316 0x1b, 0x28, 0x42, 0x41,
2317 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/
2318 0x1b, 0x28, 0x42, 0x42,
2321 /* A % X 3 A % X 1 A B */
2322 static const UChar iso_2022_jptoUnicode
[]={ 0x41,0x25,0x58,0x33,0x41,0x25,0x58,0x31,0x41, 0x42 };
2323 static const int32_t from_iso_2022_jpOffs
[] ={ 3, 7, 7, 7, 7, 7, 7, 7, 7, 12 };
2326 static const uint8_t sampleTxt_iso_2022_cn
[]={
2328 0x1B, 0x24, 0x29, 0x47,
2329 0x0E, 0x40, 0x6c, /*unassigned*/
2333 static const UChar iso_2022_cntoUnicode
[]={ 0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 };
2334 static const int32_t from_iso_2022_cnOffs
[] ={ 1, 2, 8, 8, 8, 8, 8, 8, 8, 8, 11 };
2337 static const uint8_t sampleTxt_iso_2022_kr
[]={
2338 0x1b, 0x24, 0x29, 0x43,
2346 static const UChar iso_2022_krtoUnicode
[]={ 0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43};
2347 static const int32_t from_iso_2022_krOffs
[] ={ 4, 6, 6, 6, 6, 6, 6, 6, 6, 9, 12, 13 , 14 };
2350 static const uint8_t sampleTxt_hz
[]={
2352 0x7e, 0x7b, 0x26, 0x30,
2353 0x7f, 0x1E, /*unassigned*/
2356 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/
2359 static const UChar hztoUnicode
[]={
2362 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2365 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2368 static const int32_t from_hzOffs
[] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18, };
2372 static const uint8_t sampleTxt_iscii
[]={
2375 0xEB, /*unassigned*/
2378 0xEC, /*unassigned*/
2381 static const UChar isciitoUnicode
[]={
2384 0x25, 0x58, 0x45, 0x42,
2387 0x25, 0x58, 0x45, 0x43,
2390 static const int32_t from_isciiOffs
[] ={0,1,2,2,2,2,3,4,5,5,5,5,6 };
2394 static const uint8_t sampleTxtUTF8
[]={
2396 0xC2, 0x7E, /* truncated char */
2398 0xE0, 0xB5, 0x7E, /* truncated char */
2401 static const UChar UTF8ToUnicode
[]={
2402 0x0020, 0x0064, 0x0050,
2403 0x0025, 0x0058, 0x0043, 0x0032, 0x007E, /* \xC2~ */
2405 0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E,
2408 static const int32_t fromUTF8
[] = {
2412 6, 6, 6, 6, 6, 6, 6, 6, 8,
2415 static const UChar UTF8ToUnicodeXML_DEC
[]={
2416 0x0020, 0x0064, 0x0050,
2417 0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E, /* Â~ */
2419 0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E,
2422 static const int32_t fromUTF8XML_DEC
[] = {
2424 3, 3, 3, 3, 3, 3, 4,
2426 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8,
2431 #if !UCONFIG_NO_LEGACY_CONVERSION
2432 if(!testConvertToUnicode(sampleTxtToU
, sizeof(sampleTxtToU
),
2433 IBM_943toUnicode
, sizeof(IBM_943toUnicode
)/sizeof(IBM_943toUnicode
[0]),"ibm-943",
2434 UCNV_TO_U_CALLBACK_ESCAPE
, fromIBM943Offs
, NULL
, 0 ))
2435 log_err("ibm-943->u with substitute with value did not match.\n");
2437 if(!testConvertToUnicode(sampleTxt_EUC_JP
, sizeof(sampleTxt_EUC_JP
),
2438 EUC_JPtoUnicode
, sizeof(EUC_JPtoUnicode
)/sizeof(EUC_JPtoUnicode
[0]),"IBM-eucJP",
2439 UCNV_TO_U_CALLBACK_ESCAPE
, fromEUC_JPOffs
, NULL
, 0))
2440 log_err("euc-jp->u with substitute with value did not match.\n");
2442 if(!testConvertToUnicode(sampleTxt_euc_tw
, sizeof(sampleTxt_euc_tw
),
2443 euc_twtoUnicode
, sizeof(euc_twtoUnicode
)/sizeof(euc_twtoUnicode
[0]),"euc-tw",
2444 UCNV_TO_U_CALLBACK_ESCAPE
, from_euc_twOffs
, NULL
, 0))
2445 log_err("euc-tw->u with substitute with value did not match.\n");
2447 if(!testConvertToUnicode(sampleTxt_iso_2022_jp
, sizeof(sampleTxt_iso_2022_jp
),
2448 iso_2022_jptoUnicode
, sizeof(iso_2022_jptoUnicode
)/sizeof(iso_2022_jptoUnicode
[0]),"iso-2022-jp",
2449 UCNV_TO_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffs
, NULL
, 0))
2450 log_err("iso-2022-jp->u with substitute with value did not match.\n");
2452 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp
, sizeof(sampleTxt_iso_2022_jp
),
2453 iso_2022_jptoUnicode
, sizeof(iso_2022_jptoUnicode
)/sizeof(iso_2022_jptoUnicode
[0]),"iso-2022-jp",
2454 UCNV_TO_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffs
, NULL
, 0,"K",U_ZERO_ERROR
))
2455 log_err("iso-2022-jp->u with substitute with value did not match.\n");
2457 {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */
2459 static const UChar iso_2022_jptoUnicodeDec
[]={
2462 0x0026, 0x0023, 0x0035, 0x0038, 0x003b,
2463 0x0026, 0x0023, 0x0032, 0x0036, 0x003b,
2465 static const int32_t from_iso_2022_jpOffsDec
[] ={ 3,7,7,7,7,7,7,7,7,7,7,12, };
2466 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp
, sizeof(sampleTxt_iso_2022_jp
),
2467 iso_2022_jptoUnicodeDec
, sizeof(iso_2022_jptoUnicodeDec
)/sizeof(iso_2022_jptoUnicode
[0]),"iso-2022-jp",
2468 UCNV_TO_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffsDec
, NULL
, 0,UCNV_ESCAPE_XML_DEC
,U_ZERO_ERROR
))
2469 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n");
2472 static const UChar iso_2022_jptoUnicodeHex
[]={
2475 0x0026, 0x0023, 0x0078, 0x0033, 0x0041, 0x003b,
2476 0x0026, 0x0023, 0x0078, 0x0031, 0x0041, 0x003b,
2478 static const int32_t from_iso_2022_jpOffsHex
[] ={ 3,7,7,7,7,7,7,7,7,7,7,7,7,12 };
2479 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp
, sizeof(sampleTxt_iso_2022_jp
),
2480 iso_2022_jptoUnicodeHex
, sizeof(iso_2022_jptoUnicodeHex
)/sizeof(iso_2022_jptoUnicode
[0]),"iso-2022-jp",
2481 UCNV_TO_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffsHex
, NULL
, 0,UCNV_ESCAPE_XML_HEX
,U_ZERO_ERROR
))
2482 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n");
2485 static const UChar iso_2022_jptoUnicodeC
[]={
2487 0x005C, 0x0078, 0x0033, 0x0041, /* \x3A */
2488 0x005C, 0x0078, 0x0031, 0x0041, /* \x1A */
2490 int32_t from_iso_2022_jpOffsC
[] ={ 3,7,7,7,7,7,7,7,7,12 };
2491 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp
, sizeof(sampleTxt_iso_2022_jp
),
2492 iso_2022_jptoUnicodeC
, sizeof(iso_2022_jptoUnicodeC
)/sizeof(iso_2022_jptoUnicode
[0]),"iso-2022-jp",
2493 UCNV_TO_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffsC
, NULL
, 0,UCNV_ESCAPE_C
,U_ZERO_ERROR
))
2494 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n");
2497 if(!testConvertToUnicode(sampleTxt_iso_2022_cn
, sizeof(sampleTxt_iso_2022_cn
),
2498 iso_2022_cntoUnicode
, sizeof(iso_2022_cntoUnicode
)/sizeof(iso_2022_cntoUnicode
[0]),"iso-2022-cn",
2499 UCNV_TO_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs
, NULL
, 0))
2500 log_err("iso-2022-cn->u with substitute with value did not match.\n");
2502 if(!testConvertToUnicode(sampleTxt_iso_2022_kr
, sizeof(sampleTxt_iso_2022_kr
),
2503 iso_2022_krtoUnicode
, sizeof(iso_2022_krtoUnicode
)/sizeof(iso_2022_krtoUnicode
[0]),"iso-2022-kr",
2504 UCNV_TO_U_CALLBACK_ESCAPE
, from_iso_2022_krOffs
, NULL
, 0))
2505 log_err("iso-2022-kr->u with substitute with value did not match.\n");
2507 if(!testConvertToUnicode(sampleTxt_hz
, sizeof(sampleTxt_hz
),
2508 hztoUnicode
, sizeof(hztoUnicode
)/sizeof(hztoUnicode
[0]),"HZ",
2509 UCNV_TO_U_CALLBACK_ESCAPE
, from_hzOffs
, NULL
, 0))
2510 log_err("hz->u with substitute with value did not match.\n");
2512 if(!testConvertToUnicode(sampleTxt_iscii
, sizeof(sampleTxt_iscii
),
2513 isciitoUnicode
, sizeof(isciitoUnicode
)/sizeof(isciitoUnicode
[0]),"ISCII,version=0",
2514 UCNV_TO_U_CALLBACK_ESCAPE
, from_isciiOffs
, NULL
, 0))
2515 log_err("ISCII ->u with substitute with value did not match.\n");
2518 if(!testConvertToUnicode(sampleTxtUTF8
, sizeof(sampleTxtUTF8
),
2519 UTF8ToUnicode
, sizeof(UTF8ToUnicode
)/sizeof(UTF8ToUnicode
[0]),"UTF-8",
2520 UCNV_TO_U_CALLBACK_ESCAPE
, fromUTF8
, NULL
, 0))
2521 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2522 if(!testConvertToUnicodeWithContext(sampleTxtUTF8
, sizeof(sampleTxtUTF8
),
2523 UTF8ToUnicodeXML_DEC
, sizeof(UTF8ToUnicodeXML_DEC
)/sizeof(UTF8ToUnicodeXML_DEC
[0]),"UTF-8",
2524 UCNV_TO_U_CALLBACK_ESCAPE
, fromUTF8XML_DEC
, NULL
, 0, UCNV_ESCAPE_XML_DEC
, U_ZERO_ERROR
))
2525 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2529 #if !UCONFIG_NO_LEGACY_CONVERSION
2530 static void TestLegalAndOthers(int32_t inputsize
, int32_t outputsize
)
2532 static const UChar legalText
[] = { 0x0000, 0xAC00, 0xAC01, 0xD700 };
2533 static const uint8_t templegal949
[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
2534 static const int32_t to949legal
[] = {0, 1, 1, 2, 2, 3, 3};
2537 static const uint8_t text943
[] = {
2538 0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a };
2539 static const UChar toUnicode943sub
[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22, 0x5b57 };
2540 static const UChar toUnicode943skip
[]= { 0x304b, 0x20, 0x0061, 0x6f22, 0x5b57 };
2541 static const UChar toUnicode943stop
[]= { 0x304b};
2543 static const int32_t fromIBM943Offssub
[] = { 0, 2, 3, 4, 5, 7 };
2544 static const int32_t fromIBM943Offsskip
[] = { 0, 3, 4, 5, 7 };
2545 static const int32_t fromIBM943Offsstop
[] = { 0};
2547 gInBufferSize
= inputsize
;
2548 gOutBufferSize
= outputsize
;
2549 /*checking with a legal value*/
2550 if(!testConvertFromUnicode(legalText
, sizeof(legalText
)/sizeof(legalText
[0]),
2551 templegal949
, sizeof(templegal949
), "ibm-949",
2552 UCNV_FROM_U_CALLBACK_SKIP
, to949legal
, NULL
, 0 ))
2553 log_err("u-> ibm-949 with skip did not match.\n");
2555 /*checking illegal value for ibm-943 with substitute*/
2556 if(!testConvertToUnicode(text943
, sizeof(text943
),
2557 toUnicode943sub
, sizeof(toUnicode943sub
)/sizeof(toUnicode943sub
[0]),"ibm-943",
2558 UCNV_TO_U_CALLBACK_SUBSTITUTE
, fromIBM943Offssub
, NULL
, 0 ))
2559 log_err("ibm-943->u with subst did not match.\n");
2560 /*checking illegal value for ibm-943 with skip */
2561 if(!testConvertToUnicode(text943
, sizeof(text943
),
2562 toUnicode943skip
, sizeof(toUnicode943skip
)/sizeof(toUnicode943skip
[0]),"ibm-943",
2563 UCNV_TO_U_CALLBACK_SKIP
, fromIBM943Offsskip
, NULL
, 0 ))
2564 log_err("ibm-943->u with skip did not match.\n");
2566 /*checking illegal value for ibm-943 with stop */
2567 if(!testConvertToUnicode(text943
, sizeof(text943
),
2568 toUnicode943stop
, sizeof(toUnicode943stop
)/sizeof(toUnicode943stop
[0]),"ibm-943",
2569 UCNV_TO_U_CALLBACK_STOP
, fromIBM943Offsstop
, NULL
, 0 ))
2570 log_err("ibm-943->u with stop did not match.\n");
2574 static void TestSingleByte(int32_t inputsize
, int32_t outputsize
)
2576 static const uint8_t sampleText
[] = {
2577 0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82,
2579 static const UChar toUnicode943sub
[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 };
2580 static const int32_t fromIBM943Offssub
[] = { 0, 2, 3, 4, 5, 6, 7, 8 };
2581 /*checking illegal value for ibm-943 with substitute*/
2582 gInBufferSize
= inputsize
;
2583 gOutBufferSize
= outputsize
;
2585 if(!testConvertToUnicode(sampleText
, sizeof(sampleText
),
2586 toUnicode943sub
, sizeof(toUnicode943sub
)/sizeof(toUnicode943sub
[0]),"ibm-943",
2587 UCNV_TO_U_CALLBACK_SUBSTITUTE
, fromIBM943Offssub
, NULL
, 0 ))
2588 log_err("ibm-943->u with subst did not match.\n");
2591 static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize
, int32_t outputsize
)
2594 static const UChar ebcdic_inputTest
[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 };
2595 static const uint8_t toIBM930
[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 };
2596 static const int32_t offset_930
[]= { 0, 1, 1, 1, 2, 2, 3, 4, 4, 4, 5, 5 };
2597 /* s SO doubl SI sng s SO fe fe SI s */
2599 /*EBCDIC_STATEFUL with subChar=3f*/
2600 static const uint8_t toIBM930_subvaried
[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 };
2601 static const int32_t offset_930_subvaried
[]= { 0, 1, 1, 1, 2, 2, 3, 4, 5 };
2602 static const char mySubChar
[]={ 0x3f};
2604 gInBufferSize
= inputsize
;
2605 gOutBufferSize
= outputsize
;
2607 if(!testConvertFromUnicode(ebcdic_inputTest
, sizeof(ebcdic_inputTest
)/sizeof(ebcdic_inputTest
[0]),
2608 toIBM930
, sizeof(toIBM930
), "ibm-930",
2609 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, offset_930
, NULL
, 0 ))
2610 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n");
2612 if(!testConvertFromUnicode(ebcdic_inputTest
, sizeof(ebcdic_inputTest
)/sizeof(ebcdic_inputTest
[0]),
2613 toIBM930_subvaried
, sizeof(toIBM930_subvaried
), "ibm-930",
2614 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, offset_930_subvaried
, mySubChar
, 1 ))
2615 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n");
2619 UBool
testConvertFromUnicode(const UChar
*source
, int sourceLen
, const uint8_t *expect
, int expectLen
,
2620 const char *codepage
, UConverterFromUCallback callback
, const int32_t *expectOffsets
,
2621 const char *mySubChar
, int8_t len
)
2625 UErrorCode status
= U_ZERO_ERROR
;
2626 UConverter
*conv
= 0;
2627 char junkout
[NEW_MAX_BUFFER
]; /* FIX */
2628 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
2634 int32_t realBufferSize
;
2635 char *realBufferEnd
;
2636 const UChar
*realSourceEnd
;
2637 const UChar
*sourceLimit
;
2638 UBool checkOffsets
= TRUE
;
2641 char offset_str
[9999];
2643 UConverterFromUCallback oldAction
= NULL
;
2644 const void* oldContext
= NULL
;
2647 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
2648 junkout
[i
] = (char)0xF0;
2649 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
2651 setNuConvTestName(codepage
, "FROM");
2653 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage
, gInBufferSize
,
2656 conv
= ucnv_open(codepage
, &status
);
2657 if(U_FAILURE(status
))
2659 log_data_err("Couldn't open converter %s\n",codepage
);
2663 log_verbose("Converter opened..\n");
2665 /*----setting the callback routine----*/
2666 ucnv_setFromUCallBack (conv
, callback
, NULL
, &oldAction
, &oldContext
, &status
);
2667 if (U_FAILURE(status
))
2669 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status
));
2671 /*------------------------*/
2672 /*setting the subChar*/
2673 if(mySubChar
!= NULL
){
2674 ucnv_setSubstChars(conv
, mySubChar
, len
, &status
);
2675 if (U_FAILURE(status
)) {
2676 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status
));
2685 realBufferSize
= (sizeof(junkout
)/sizeof(junkout
[0]));
2686 realBufferEnd
= junkout
+ realBufferSize
;
2687 realSourceEnd
= source
+ sourceLen
;
2689 if ( gOutBufferSize
!= realBufferSize
)
2690 checkOffsets
= FALSE
;
2692 if( gInBufferSize
!= NEW_MAX_BUFFER
)
2693 checkOffsets
= FALSE
;
2697 end
= nct_min(targ
+ gOutBufferSize
, realBufferEnd
);
2698 sourceLimit
= nct_min(src
+ gInBufferSize
, realSourceEnd
);
2700 doFlush
= (UBool
)(sourceLimit
== realSourceEnd
);
2702 if(targ
== realBufferEnd
)
2704 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ
, gNuConvTestName
);
2707 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src
,sourceLimit
, targ
,end
, doFlush
?"TRUE":"FALSE");
2710 status
= U_ZERO_ERROR
;
2712 ucnv_fromUnicode (conv
,
2717 checkOffsets
? offs
: NULL
,
2718 doFlush
, /* flush if we're at the end of the input data */
2720 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && (sourceLimit
< realSourceEnd
)) );
2723 if(status
==U_INVALID_CHAR_FOUND
|| status
== U_ILLEGAL_CHAR_FOUND
){
2724 UChar errChars
[50]; /* should be sufficient */
2726 UErrorCode err
= U_ZERO_ERROR
;
2727 const UChar
* start
= NULL
;
2728 ucnv_getInvalidUChars(conv
,errChars
, &errLen
, &err
);
2730 log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err
));
2732 /* length of in invalid chars should be equal to returned length*/
2733 start
= src
- errLen
;
2734 if(u_strncmp(errChars
,start
,errLen
)!=0){
2735 log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv
,&err
));
2738 /* allow failure codes for the stop callback */
2739 if(U_FAILURE(status
) &&
2740 (callback
!= UCNV_FROM_U_CALLBACK_STOP
|| (status
!= U_INVALID_CHAR_FOUND
&& status
!= U_ILLEGAL_CHAR_FOUND
)))
2742 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status
), gNuConvTestName
);
2746 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
2747 sourceLen
, targ
-junkout
);
2748 if(getTestOption(VERBOSITY_OPTION
))
2753 for(p
= junkout
;p
<targ
;p
++)
2755 sprintf(junk
+ strlen(junk
), "0x%02x, ", (0xFF) & (unsigned int)*p
);
2756 sprintf(offset_str
+ strlen(offset_str
), "0x%02x, ", (0xFF) & (unsigned int)junokout
[p
-junkout
]);
2760 printSeq(expect
, expectLen
);
2763 log_verbose("\nOffsets:");
2764 log_verbose(offset_str
);
2771 if(expectLen
!= targ
-junkout
)
2773 log_err("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
2774 log_verbose("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
2775 printSeqErr((const uint8_t *)junkout
, (int32_t)(targ
-junkout
));
2776 printSeqErr(expect
, expectLen
);
2780 if (checkOffsets
&& (expectOffsets
!= 0) )
2782 log_verbose("comparing %d offsets..\n", targ
-junkout
);
2783 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t) )){
2784 log_err("did not get the expected offsets while %s \n", gNuConvTestName
);
2785 log_err("Got Output : ");
2786 printSeqErr((const uint8_t *)junkout
, (int32_t)(targ
-junkout
));
2787 log_err("Got Offsets: ");
2788 for(p
=junkout
;p
<targ
;p
++)
2789 log_err("%d,", junokout
[p
-junkout
]);
2791 log_err("Expected Offsets: ");
2792 for(i
=0; i
<(targ
-junkout
); i
++)
2793 log_err("%d,", expectOffsets
[i
]);
2799 if(!memcmp(junkout
, expect
, expectLen
))
2801 log_verbose("String matches! %s\n", gNuConvTestName
);
2806 log_err("String does not match. %s\n", gNuConvTestName
);
2807 log_err("source: ");
2808 printUSeqErr(source
, sourceLen
);
2810 printSeqErr((const uint8_t *)junkout
, expectLen
);
2811 log_err("Expected: ");
2812 printSeqErr(expect
, expectLen
);
2817 UBool
testConvertToUnicode( const uint8_t *source
, int sourcelen
, const UChar
*expect
, int expectlen
,
2818 const char *codepage
, UConverterToUCallback callback
, const int32_t *expectOffsets
,
2819 const char *mySubChar
, int8_t len
)
2821 UErrorCode status
= U_ZERO_ERROR
;
2822 UConverter
*conv
= 0;
2823 UChar junkout
[NEW_MAX_BUFFER
]; /* FIX */
2824 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
2826 const char *realSourceEnd
;
2827 const char *srcLimit
;
2832 UBool checkOffsets
= TRUE
;
2834 char offset_str
[9999];
2836 UConverterToUCallback oldAction
= NULL
;
2837 const void* oldContext
= NULL
;
2839 int32_t realBufferSize
;
2840 UChar
*realBufferEnd
;
2843 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
2844 junkout
[i
] = 0xFFFE;
2846 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
2849 setNuConvTestName(codepage
, "TO");
2851 log_verbose("\n========= %s\n", gNuConvTestName
);
2853 conv
= ucnv_open(codepage
, &status
);
2854 if(U_FAILURE(status
))
2856 log_data_err("Couldn't open converter %s\n",gNuConvTestName
);
2860 log_verbose("Converter opened..\n");
2862 src
= (const char *)source
;
2866 realBufferSize
= (sizeof(junkout
)/sizeof(junkout
[0]));
2867 realBufferEnd
= junkout
+ realBufferSize
;
2868 realSourceEnd
= src
+ sourcelen
;
2869 /*----setting the callback routine----*/
2870 ucnv_setToUCallBack (conv
, callback
, NULL
, &oldAction
, &oldContext
, &status
);
2871 if (U_FAILURE(status
))
2873 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status
));
2875 /*-------------------------------------*/
2876 /*setting the subChar*/
2877 if(mySubChar
!= NULL
){
2878 ucnv_setSubstChars(conv
, mySubChar
, len
, &status
);
2879 if (U_FAILURE(status
)) {
2880 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status
));
2886 if ( gOutBufferSize
!= realBufferSize
)
2887 checkOffsets
= FALSE
;
2889 if( gInBufferSize
!= NEW_MAX_BUFFER
)
2890 checkOffsets
= FALSE
;
2894 end
= nct_min( targ
+ gOutBufferSize
, realBufferEnd
);
2895 srcLimit
= nct_min(realSourceEnd
, src
+ gInBufferSize
);
2897 if(targ
== realBufferEnd
)
2899 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ
,gNuConvTestName
);
2902 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ
,end
);
2906 status
= U_ZERO_ERROR
;
2908 ucnv_toUnicode (conv
,
2911 (const char **)&src
,
2912 (const char *)srcLimit
,
2913 checkOffsets
? offs
: NULL
,
2914 (UBool
)(srcLimit
== realSourceEnd
), /* flush if we're at the end of the source data */
2916 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && (srcLimit
< realSourceEnd
)) ); /* while we just need another buffer */
2918 if(status
==U_INVALID_CHAR_FOUND
|| status
== U_ILLEGAL_CHAR_FOUND
){
2919 char errChars
[50]; /* should be sufficient */
2921 UErrorCode err
= U_ZERO_ERROR
;
2922 const char* start
= NULL
;
2923 ucnv_getInvalidChars(conv
,errChars
, &errLen
, &err
);
2925 log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err
));
2927 /* length of in invalid chars should be equal to returned length*/
2928 start
= src
- errLen
;
2929 if(uprv_strncmp(errChars
,start
,errLen
)!=0){
2930 log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv
,&err
));
2933 /* allow failure codes for the stop callback */
2934 if(U_FAILURE(status
) &&
2935 (callback
!= UCNV_TO_U_CALLBACK_STOP
|| (status
!= U_INVALID_CHAR_FOUND
&& status
!= U_ILLEGAL_CHAR_FOUND
&& status
!= U_TRUNCATED_CHAR_FOUND
)))
2937 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status
), gNuConvTestName
);
2941 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
2942 sourcelen
, targ
-junkout
);
2943 if(getTestOption(VERBOSITY_OPTION
))
2949 for(p
= junkout
;p
<targ
;p
++)
2951 sprintf(junk
+ strlen(junk
), "0x%04x, ", (0xFFFF) & (unsigned int)*p
);
2952 sprintf(offset_str
+ strlen(offset_str
), "0x%04x, ", (0xFFFF) & (unsigned int)junokout
[p
-junkout
]);
2956 printUSeq(expect
, expectlen
);
2959 log_verbose("\nOffsets:");
2960 log_verbose(offset_str
);
2966 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen
,expectlen
*2);
2968 if (checkOffsets
&& (expectOffsets
!= 0))
2970 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t)))
2972 log_err("did not get the expected offsets while %s \n", gNuConvTestName
);
2973 log_err("Got offsets: ");
2974 for(p
=junkout
;p
<targ
;p
++)
2975 log_err(" %2d,", junokout
[p
-junkout
]);
2977 log_err("Expected offsets: ");
2978 for(i
=0; i
<(targ
-junkout
); i
++)
2979 log_err(" %2d,", expectOffsets
[i
]);
2981 log_err("Got output: ");
2982 for(i
=0; i
<(targ
-junkout
); i
++)
2983 log_err("0x%04x,", junkout
[i
]);
2985 log_err("From source: ");
2986 for(i
=0; i
<(src
-(const char *)source
); i
++)
2987 log_err(" 0x%02x,", (unsigned char)source
[i
]);
2992 if(!memcmp(junkout
, expect
, expectlen
*2))
2994 log_verbose("Matches!\n");
2999 log_err("String does not match. %s\n", gNuConvTestName
);
3000 log_verbose("String does not match. %s\n", gNuConvTestName
);
3002 printUSeqErr(junkout
, expectlen
);
3003 log_err("Expected: ");
3004 printUSeqErr(expect
, expectlen
);
3010 UBool
testConvertFromUnicodeWithContext(const UChar
*source
, int sourceLen
, const uint8_t *expect
, int expectLen
,
3011 const char *codepage
, UConverterFromUCallback callback
, const int32_t *expectOffsets
,
3012 const char *mySubChar
, int8_t len
, const void* context
, UErrorCode expectedError
)
3016 UErrorCode status
= U_ZERO_ERROR
;
3017 UConverter
*conv
= 0;
3018 char junkout
[NEW_MAX_BUFFER
]; /* FIX */
3019 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
3025 int32_t realBufferSize
;
3026 char *realBufferEnd
;
3027 const UChar
*realSourceEnd
;
3028 const UChar
*sourceLimit
;
3029 UBool checkOffsets
= TRUE
;
3032 char offset_str
[9999];
3034 UConverterFromUCallback oldAction
= NULL
;
3035 const void* oldContext
= NULL
;
3038 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
3039 junkout
[i
] = (char)0xF0;
3040 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
3042 setNuConvTestName(codepage
, "FROM");
3044 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage
, gInBufferSize
,
3047 conv
= ucnv_open(codepage
, &status
);
3048 if(U_FAILURE(status
))
3050 log_data_err("Couldn't open converter %s\n",codepage
);
3051 return TRUE
; /* Because the err has already been logged. */
3054 log_verbose("Converter opened..\n");
3056 /*----setting the callback routine----*/
3057 ucnv_setFromUCallBack (conv
, callback
, context
, &oldAction
, &oldContext
, &status
);
3058 if (U_FAILURE(status
))
3060 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status
));
3062 /*------------------------*/
3063 /*setting the subChar*/
3064 if(mySubChar
!= NULL
){
3065 ucnv_setSubstChars(conv
, mySubChar
, len
, &status
);
3066 if (U_FAILURE(status
)) {
3067 log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status
));
3076 realBufferSize
= (sizeof(junkout
)/sizeof(junkout
[0]));
3077 realBufferEnd
= junkout
+ realBufferSize
;
3078 realSourceEnd
= source
+ sourceLen
;
3080 if ( gOutBufferSize
!= realBufferSize
)
3081 checkOffsets
= FALSE
;
3083 if( gInBufferSize
!= NEW_MAX_BUFFER
)
3084 checkOffsets
= FALSE
;
3088 end
= nct_min(targ
+ gOutBufferSize
, realBufferEnd
);
3089 sourceLimit
= nct_min(src
+ gInBufferSize
, realSourceEnd
);
3091 doFlush
= (UBool
)(sourceLimit
== realSourceEnd
);
3093 if(targ
== realBufferEnd
)
3095 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ
, gNuConvTestName
);
3098 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src
,sourceLimit
, targ
,end
, doFlush
?"TRUE":"FALSE");
3101 status
= U_ZERO_ERROR
;
3103 ucnv_fromUnicode (conv
,
3108 checkOffsets
? offs
: NULL
,
3109 doFlush
, /* flush if we're at the end of the input data */
3111 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && (sourceLimit
< realSourceEnd
)) );
3113 /* allow failure codes for the stop callback */
3114 if(U_FAILURE(status
) && status
!= expectedError
)
3116 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status
), gNuConvTestName
);
3120 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
3121 sourceLen
, targ
-junkout
);
3122 if(getTestOption(VERBOSITY_OPTION
))
3127 for(p
= junkout
;p
<targ
;p
++)
3129 sprintf(junk
+ strlen(junk
), "0x%02x, ", (0xFF) & (unsigned int)*p
);
3130 sprintf(offset_str
+ strlen(offset_str
), "0x%02x, ", (0xFF) & (unsigned int)junokout
[p
-junkout
]);
3134 printSeq(expect
, expectLen
);
3137 log_verbose("\nOffsets:");
3138 log_verbose(offset_str
);
3145 if(expectLen
!= targ
-junkout
)
3147 log_err("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
3148 log_verbose("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
3149 printSeqErr((const uint8_t *)junkout
, (int32_t)(targ
-junkout
));
3150 printSeqErr(expect
, expectLen
);
3154 if (checkOffsets
&& (expectOffsets
!= 0) )
3156 log_verbose("comparing %d offsets..\n", targ
-junkout
);
3157 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t) )){
3158 log_err("did not get the expected offsets while %s \n", gNuConvTestName
);
3159 log_err("Got Output : ");
3160 printSeqErr((const uint8_t *)junkout
, (int32_t)(targ
-junkout
));
3161 log_err("Got Offsets: ");
3162 for(p
=junkout
;p
<targ
;p
++)
3163 log_err("%d,", junokout
[p
-junkout
]);
3165 log_err("Expected Offsets: ");
3166 for(i
=0; i
<(targ
-junkout
); i
++)
3167 log_err("%d,", expectOffsets
[i
]);
3173 if(!memcmp(junkout
, expect
, expectLen
))
3175 log_verbose("String matches! %s\n", gNuConvTestName
);
3180 log_err("String does not match. %s\n", gNuConvTestName
);
3181 log_err("source: ");
3182 printUSeqErr(source
, sourceLen
);
3184 printSeqErr((const uint8_t *)junkout
, expectLen
);
3185 log_err("Expected: ");
3186 printSeqErr(expect
, expectLen
);
3190 UBool
testConvertToUnicodeWithContext( const uint8_t *source
, int sourcelen
, const UChar
*expect
, int expectlen
,
3191 const char *codepage
, UConverterToUCallback callback
, const int32_t *expectOffsets
,
3192 const char *mySubChar
, int8_t len
, const void* context
, UErrorCode expectedError
)
3194 UErrorCode status
= U_ZERO_ERROR
;
3195 UConverter
*conv
= 0;
3196 UChar junkout
[NEW_MAX_BUFFER
]; /* FIX */
3197 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
3199 const char *realSourceEnd
;
3200 const char *srcLimit
;
3205 UBool checkOffsets
= TRUE
;
3207 char offset_str
[9999];
3209 UConverterToUCallback oldAction
= NULL
;
3210 const void* oldContext
= NULL
;
3212 int32_t realBufferSize
;
3213 UChar
*realBufferEnd
;
3216 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
3217 junkout
[i
] = 0xFFFE;
3219 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
3222 setNuConvTestName(codepage
, "TO");
3224 log_verbose("\n========= %s\n", gNuConvTestName
);
3226 conv
= ucnv_open(codepage
, &status
);
3227 if(U_FAILURE(status
))
3229 log_data_err("Couldn't open converter %s\n",gNuConvTestName
);
3233 log_verbose("Converter opened..\n");
3235 src
= (const char *)source
;
3239 realBufferSize
= (sizeof(junkout
)/sizeof(junkout
[0]));
3240 realBufferEnd
= junkout
+ realBufferSize
;
3241 realSourceEnd
= src
+ sourcelen
;
3242 /*----setting the callback routine----*/
3243 ucnv_setToUCallBack (conv
, callback
, context
, &oldAction
, &oldContext
, &status
);
3244 if (U_FAILURE(status
))
3246 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status
));
3248 /*-------------------------------------*/
3249 /*setting the subChar*/
3250 if(mySubChar
!= NULL
){
3251 ucnv_setSubstChars(conv
, mySubChar
, len
, &status
);
3252 if (U_FAILURE(status
)) {
3253 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status
));
3259 if ( gOutBufferSize
!= realBufferSize
)
3260 checkOffsets
= FALSE
;
3262 if( gInBufferSize
!= NEW_MAX_BUFFER
)
3263 checkOffsets
= FALSE
;
3267 end
= nct_min( targ
+ gOutBufferSize
, realBufferEnd
);
3268 srcLimit
= nct_min(realSourceEnd
, src
+ gInBufferSize
);
3270 if(targ
== realBufferEnd
)
3272 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ
,gNuConvTestName
);
3275 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ
,end
);
3279 status
= U_ZERO_ERROR
;
3281 ucnv_toUnicode (conv
,
3284 (const char **)&src
,
3285 (const char *)srcLimit
,
3286 checkOffsets
? offs
: NULL
,
3287 (UBool
)(srcLimit
== realSourceEnd
), /* flush if we're at the end of the source data */
3289 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && (srcLimit
< realSourceEnd
)) ); /* while we just need another buffer */
3291 /* allow failure codes for the stop callback */
3292 if(U_FAILURE(status
) && status
!=expectedError
)
3294 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status
), gNuConvTestName
);
3298 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
3299 sourcelen
, targ
-junkout
);
3300 if(getTestOption(VERBOSITY_OPTION
))
3306 for(p
= junkout
;p
<targ
;p
++)
3308 sprintf(junk
+ strlen(junk
), "0x%04x, ", (0xFFFF) & (unsigned int)*p
);
3309 sprintf(offset_str
+ strlen(offset_str
), "0x%04x, ", (0xFFFF) & (unsigned int)junokout
[p
-junkout
]);
3313 printUSeq(expect
, expectlen
);
3316 log_verbose("\nOffsets:");
3317 log_verbose(offset_str
);
3323 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen
,expectlen
*2);
3325 if (checkOffsets
&& (expectOffsets
!= 0))
3327 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t)))
3329 log_err("did not get the expected offsets while %s \n", gNuConvTestName
);
3330 log_err("Got offsets: ");
3331 for(p
=junkout
;p
<targ
;p
++)
3332 log_err(" %2d,", junokout
[p
-junkout
]);
3334 log_err("Expected offsets: ");
3335 for(i
=0; i
<(targ
-junkout
); i
++)
3336 log_err(" %2d,", expectOffsets
[i
]);
3338 log_err("Got output: ");
3339 for(i
=0; i
<(targ
-junkout
); i
++)
3340 log_err("0x%04x,", junkout
[i
]);
3342 log_err("From source: ");
3343 for(i
=0; i
<(src
-(const char *)source
); i
++)
3344 log_err(" 0x%02x,", (unsigned char)source
[i
]);
3349 if(!memcmp(junkout
, expect
, expectlen
*2))
3351 log_verbose("Matches!\n");
3356 log_err("String does not match. %s\n", gNuConvTestName
);
3357 log_verbose("String does not match. %s\n", gNuConvTestName
);
3359 printUSeqErr(junkout
, expectlen
);
3360 log_err("Expected: ");
3361 printUSeqErr(expect
, expectlen
);
3367 static void TestCallBackFailure(void) {
3368 UErrorCode status
= U_USELESS_COLLATOR_ERROR
;
3369 ucnv_cbFromUWriteBytes(NULL
, NULL
, -1, -1, &status
);
3370 if (status
!= U_USELESS_COLLATOR_ERROR
) {
3371 log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad UErrorCode\n");
3373 ucnv_cbFromUWriteUChars(NULL
, NULL
, NULL
, -1, &status
);
3374 if (status
!= U_USELESS_COLLATOR_ERROR
) {
3375 log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad UErrorCode\n");
3377 ucnv_cbFromUWriteSub(NULL
, -1, &status
);
3378 if (status
!= U_USELESS_COLLATOR_ERROR
) {
3379 log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UErrorCode\n");
3381 ucnv_cbToUWriteUChars(NULL
, NULL
, -1, -1, &status
);
3382 if (status
!= U_USELESS_COLLATOR_ERROR
) {
3383 log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad UErrorCode\n");