1 /********************************************************************
3 * Copyright (c) 1997-2011, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
7 ********************************************************************************
10 * Modification History:
12 * Madhu Katragadda 7/21/1999 Testing error callback routines
13 ********************************************************************************
20 #include "unicode/uloc.h"
21 #include "unicode/ucnv.h"
22 #include "unicode/ucnv_err.h"
24 #include "unicode/utypes.h"
25 #include "unicode/ustring.h"
27 #include "unicode/ucnv_cb.h"
28 #include "unicode/utf16.h"
30 #define NEW_MAX_BUFFER 999
32 #define nct_min(x,y) ((x<y) ? x : y)
33 #define ARRAY_LENGTH(array) (sizeof(array)/sizeof((array)[0]))
35 static int32_t gInBufferSize
= 0;
36 static int32_t gOutBufferSize
= 0;
37 static char gNuConvTestName
[1024];
39 static void printSeq(const uint8_t* a
, int len
)
44 log_verbose("0x%02X, ", a
[i
++]);
48 static void printUSeq(const UChar
* a
, int len
)
53 log_verbose(" 0x%04x, ", a
[i
++]);
57 static void printSeqErr(const uint8_t* a
, int len
)
62 fprintf(stderr
, " 0x%02x, ", a
[i
++]);
63 fprintf(stderr
, "}\n");
66 static void printUSeqErr(const UChar
* a
, int len
)
71 fprintf(stderr
, "0x%04x, ", a
[i
++]);
72 fprintf(stderr
,"}\n");
75 static void setNuConvTestName(const char *codepage
, const char *direction
)
77 sprintf(gNuConvTestName
, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
85 static void TestCallBackFailure(void);
87 void addTestConvertErrorCallBack(TestNode
** root
);
89 void addTestConvertErrorCallBack(TestNode
** root
)
91 addTest(root
, &TestSkipCallBack
, "tsconv/nccbtst/TestSkipCallBack");
92 addTest(root
, &TestStopCallBack
, "tsconv/nccbtst/TestStopCallBack");
93 addTest(root
, &TestSubCallBack
, "tsconv/nccbtst/TestSubCallBack");
94 addTest(root
, &TestSubWithValueCallBack
, "tsconv/nccbtst/TestSubWithValueCallBack");
96 #if !UCONFIG_NO_LEGACY_CONVERSION
97 addTest(root
, &TestLegalAndOtherCallBack
, "tsconv/nccbtst/TestLegalAndOtherCallBack");
98 addTest(root
, &TestSingleByteCallBack
, "tsconv/nccbtst/TestSingleByteCallBack");
101 addTest(root
, &TestCallBackFailure
, "tsconv/nccbtst/TestCallBackFailure");
104 static void TestSkipCallBack()
106 TestSkip(NEW_MAX_BUFFER
, NEW_MAX_BUFFER
);
107 TestSkip(1,NEW_MAX_BUFFER
);
109 TestSkip(NEW_MAX_BUFFER
, 1);
112 static void TestStopCallBack()
114 TestStop(NEW_MAX_BUFFER
, NEW_MAX_BUFFER
);
115 TestStop(1,NEW_MAX_BUFFER
);
117 TestStop(NEW_MAX_BUFFER
, 1);
120 static void TestSubCallBack()
122 TestSub(NEW_MAX_BUFFER
, NEW_MAX_BUFFER
);
123 TestSub(1,NEW_MAX_BUFFER
);
125 TestSub(NEW_MAX_BUFFER
, 1);
127 #if !UCONFIG_NO_LEGACY_CONVERSION
128 TestEBCDIC_STATEFUL_Sub(1, 1);
129 TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER
);
130 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER
, 1);
131 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER
, NEW_MAX_BUFFER
);
135 static void TestSubWithValueCallBack()
137 TestSubWithValue(NEW_MAX_BUFFER
, NEW_MAX_BUFFER
);
138 TestSubWithValue(1,NEW_MAX_BUFFER
);
139 TestSubWithValue(1,1);
140 TestSubWithValue(NEW_MAX_BUFFER
, 1);
143 #if !UCONFIG_NO_LEGACY_CONVERSION
144 static void TestLegalAndOtherCallBack()
146 TestLegalAndOthers(NEW_MAX_BUFFER
, NEW_MAX_BUFFER
);
147 TestLegalAndOthers(1,NEW_MAX_BUFFER
);
148 TestLegalAndOthers(1,1);
149 TestLegalAndOthers(NEW_MAX_BUFFER
, 1);
152 static void TestSingleByteCallBack()
154 TestSingleByte(NEW_MAX_BUFFER
, NEW_MAX_BUFFER
);
155 TestSingleByte(1,NEW_MAX_BUFFER
);
157 TestSingleByte(NEW_MAX_BUFFER
, 1);
161 static void TestSkip(int32_t inputsize
, int32_t outputsize
)
163 static const uint8_t expskipIBM_949
[]= {
164 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
166 static const uint8_t expskipIBM_943
[] = {
167 0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 };
169 static const uint8_t expskipIBM_930
[] = {
170 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f };
172 gInBufferSize
= inputsize
;
173 gOutBufferSize
= outputsize
;
176 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP \n");
178 #if !UCONFIG_NO_LEGACY_CONVERSION
180 static const UChar sampleText
[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
181 static const UChar sampleText2
[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
183 static const int32_t toIBM949Offsskip
[] = { 0, 1, 1, 2, 2, 4, 4 };
184 static const int32_t toIBM943Offsskip
[] = { 0, 0, 1, 1, 3, 3 };
186 if(!testConvertFromUnicode(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
187 expskipIBM_949
, sizeof(expskipIBM_949
), "ibm-949",
188 UCNV_FROM_U_CALLBACK_SKIP
, toIBM949Offsskip
, NULL
, 0 ))
189 log_err("u-> ibm-949 with skip did not match.\n");
190 if(!testConvertFromUnicode(sampleText2
, sizeof(sampleText2
)/sizeof(sampleText2
[0]),
191 expskipIBM_943
, sizeof(expskipIBM_943
), "ibm-943",
192 UCNV_FROM_U_CALLBACK_SKIP
, toIBM943Offsskip
, NULL
, 0 ))
193 log_err("u-> ibm-943 with skip did not match.\n");
197 static const UChar fromU
[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 };
198 static const uint8_t fromUBytes
[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f };
199 static const int32_t fromUOffsets
[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 };
201 /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */
202 if(!testConvertFromUnicode(fromU
, sizeof(fromU
)/U_SIZEOF_UCHAR
,
203 fromUBytes
, sizeof(fromUBytes
),
205 UCNV_FROM_U_CALLBACK_SKIP
, fromUOffsets
,
208 log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n");
214 static const UChar usasciiFromU
[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
215 static const uint8_t usasciiFromUBytes
[] = { 0x61, 0x31, 0x39 };
216 static const int32_t usasciiFromUOffsets
[] = { 0, 3, 6 };
218 static const UChar latin1FromU
[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
219 static const uint8_t latin1FromUBytes
[] = { 0x61, 0xa0, 0x31, 0x39 };
220 static const int32_t latin1FromUOffsets
[] = { 0, 1, 3, 6 };
223 if(!testConvertFromUnicode(usasciiFromU
, sizeof(usasciiFromU
)/U_SIZEOF_UCHAR
,
224 usasciiFromUBytes
, sizeof(usasciiFromUBytes
),
226 UCNV_FROM_U_CALLBACK_SKIP
, usasciiFromUOffsets
,
229 log_err("u->US-ASCII with skip did not match.\n");
232 #if !UCONFIG_NO_LEGACY_CONVERSION
233 /* SBCS NLTC codepage 367 for US-ASCII */
234 if(!testConvertFromUnicode(usasciiFromU
, sizeof(usasciiFromU
)/U_SIZEOF_UCHAR
,
235 usasciiFromUBytes
, sizeof(usasciiFromUBytes
),
237 UCNV_FROM_U_CALLBACK_SKIP
, usasciiFromUOffsets
,
240 log_err("u->ibm-367 with skip did not match.\n");
245 if(!testConvertFromUnicode(latin1FromU
, sizeof(latin1FromU
)/U_SIZEOF_UCHAR
,
246 latin1FromUBytes
, sizeof(latin1FromUBytes
),
248 UCNV_FROM_U_CALLBACK_SKIP
, latin1FromUOffsets
,
251 log_err("u->LATIN_1 with skip did not match.\n");
254 #if !UCONFIG_NO_LEGACY_CONVERSION
256 if(!testConvertFromUnicode(latin1FromU
, sizeof(latin1FromU
)/U_SIZEOF_UCHAR
,
257 latin1FromUBytes
, sizeof(latin1FromUBytes
),
259 UCNV_FROM_U_CALLBACK_SKIP
, latin1FromUOffsets
,
262 log_err("u->windows-1252 with skip did not match.\n");
267 static const UChar inputTest
[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
268 static const uint8_t toIBM943
[]= { 0x61, 0x61 };
269 static const int32_t offset
[]= {0, 4};
272 static const UChar euc_jp_inputText
[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
273 static const uint8_t to_euc_jp
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
276 static const int32_t fromEUC_JPOffs
[] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7};
279 static const UChar euc_tw_inputText
[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
280 static const uint8_t to_euc_tw
[]={
281 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
282 0x61, 0xe6, 0xca, 0x8a,
284 static const int32_t from_euc_twOffs
[] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,};
287 static const UChar iso_2022_jp_inputText
[]={0x0041, 0x00E9/*unassigned*/,0x0042, };
288 static const uint8_t to_iso_2022_jp
[]={
293 static const int32_t from_iso_2022_jpOffs
[] ={0,2};
296 UChar
const iso_2022_jp_inputText2
[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
297 static const uint8_t to_iso_2022_jp2
[]={
302 static const int32_t from_iso_2022_jpOffs2
[] ={0,2};
305 static const UChar iso_2022_cn_inputText
[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
306 static const uint8_t to_iso_2022_cn
[]={
309 static const int32_t from_iso_2022_cnOffs
[] ={
314 static const UChar iso_2022_cn_inputText1
[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
315 static const uint8_t to_iso_2022_cn1
[]={
319 static const int32_t from_iso_2022_cnOffs1
[] ={ 0, 2 };
322 static const UChar iso_2022_kr_inputText
[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
323 static const uint8_t to_iso_2022_kr
[]={
324 0x1b, 0x24, 0x29, 0x43,
330 static const int32_t from_iso_2022_krOffs
[] ={
339 static const UChar iso_2022_kr_inputText1
[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
340 static const uint8_t to_iso_2022_kr1
[]={
341 0x1b, 0x24, 0x29, 0x43,
347 static const int32_t from_iso_2022_krOffs1
[] ={
355 static const UChar hz_inputText
[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
357 static const uint8_t to_hz
[]={
359 0x7e, 0x7b, 0x26, 0x30,
364 static const int32_t from_hzOffs
[] ={
371 static const UChar hz_inputText1
[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
373 static const uint8_t to_hz1
[]={
375 0x7e, 0x7b, 0x26, 0x30,
380 static const int32_t from_hzOffs1
[] ={
389 static const UChar SCSU_inputText
[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
391 static const uint8_t to_SCSU
[]={
397 static const int32_t from_SCSUOffs
[] ={
403 #if !UCONFIG_NO_LEGACY_CONVERSION
405 static const UChar iscii_inputText
[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
406 static const uint8_t to_iscii
[]={
410 static const int32_t from_isciiOffs
[] ={
415 static const UChar iscii_inputText1
[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
416 static const uint8_t to_iscii1
[]={
421 static const int32_t from_isciiOffs1
[] ={0,2};
423 if(!testConvertFromUnicode(inputTest
, sizeof(inputTest
)/sizeof(inputTest
[0]),
424 toIBM943
, sizeof(toIBM943
), "ibm-943",
425 UCNV_FROM_U_CALLBACK_SKIP
, offset
, NULL
, 0 ))
426 log_err("u-> ibm-943 with skip did not match.\n");
428 if(!testConvertFromUnicode(euc_jp_inputText
, sizeof(euc_jp_inputText
)/sizeof(euc_jp_inputText
[0]),
429 to_euc_jp
, sizeof(to_euc_jp
), "euc-jp",
430 UCNV_FROM_U_CALLBACK_SKIP
, fromEUC_JPOffs
, NULL
, 0 ))
431 log_err("u-> euc-jp with skip did not match.\n");
433 if(!testConvertFromUnicode(euc_tw_inputText
, sizeof(euc_tw_inputText
)/sizeof(euc_tw_inputText
[0]),
434 to_euc_tw
, sizeof(to_euc_tw
), "euc-tw",
435 UCNV_FROM_U_CALLBACK_SKIP
, from_euc_twOffs
, NULL
, 0 ))
436 log_err("u-> euc-tw with skip did not match.\n");
439 if(!testConvertFromUnicode(iso_2022_jp_inputText
, sizeof(iso_2022_jp_inputText
)/sizeof(iso_2022_jp_inputText
[0]),
440 to_iso_2022_jp
, sizeof(to_iso_2022_jp
), "iso-2022-jp",
441 UCNV_FROM_U_CALLBACK_SKIP
, from_iso_2022_jpOffs
, NULL
, 0 ))
442 log_err("u-> iso-2022-jp with skip did not match.\n");
445 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2
, sizeof(iso_2022_jp_inputText2
)/sizeof(iso_2022_jp_inputText2
[0]),
446 to_iso_2022_jp2
, sizeof(to_iso_2022_jp2
), "iso-2022-jp",
447 UCNV_FROM_U_CALLBACK_SKIP
, from_iso_2022_jpOffs2
, NULL
, 0,UCNV_SKIP_STOP_ON_ILLEGAL
,U_ILLEGAL_CHAR_FOUND
))
448 log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
451 if(!testConvertFromUnicode(iso_2022_cn_inputText
, sizeof(iso_2022_cn_inputText
)/sizeof(iso_2022_cn_inputText
[0]),
452 to_iso_2022_cn
, sizeof(to_iso_2022_cn
), "iso-2022-cn",
453 UCNV_FROM_U_CALLBACK_SKIP
, from_iso_2022_cnOffs
, NULL
, 0 ))
454 log_err("u-> iso-2022-cn with skip did not match.\n");
456 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1
, sizeof(iso_2022_cn_inputText1
)/sizeof(iso_2022_cn_inputText1
[0]),
457 to_iso_2022_cn1
, sizeof(to_iso_2022_cn1
), "iso-2022-cn",
458 UCNV_FROM_U_CALLBACK_SKIP
, from_iso_2022_cnOffs1
, NULL
, 0,UCNV_SKIP_STOP_ON_ILLEGAL
,U_ILLEGAL_CHAR_FOUND
))
459 log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
462 if(!testConvertFromUnicode(iso_2022_kr_inputText
, sizeof(iso_2022_kr_inputText
)/sizeof(iso_2022_kr_inputText
[0]),
463 to_iso_2022_kr
, sizeof(to_iso_2022_kr
), "iso-2022-kr",
464 UCNV_FROM_U_CALLBACK_SKIP
, from_iso_2022_krOffs
, NULL
, 0 ))
465 log_err("u-> iso-2022-kr with skip did not match.\n");
467 if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1
, sizeof(iso_2022_kr_inputText1
)/sizeof(iso_2022_kr_inputText1
[0]),
468 to_iso_2022_kr1
, sizeof(to_iso_2022_kr1
), "iso-2022-kr",
469 UCNV_FROM_U_CALLBACK_SKIP
, from_iso_2022_krOffs1
, NULL
, 0,UCNV_SKIP_STOP_ON_ILLEGAL
,U_ILLEGAL_CHAR_FOUND
))
470 log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
473 if(!testConvertFromUnicode(hz_inputText
, sizeof(hz_inputText
)/sizeof(hz_inputText
[0]),
474 to_hz
, sizeof(to_hz
), "HZ",
475 UCNV_FROM_U_CALLBACK_SKIP
, from_hzOffs
, NULL
, 0 ))
476 log_err("u-> HZ with skip did not match.\n");
478 if(!testConvertFromUnicodeWithContext(hz_inputText1
, sizeof(hz_inputText1
)/sizeof(hz_inputText1
[0]),
479 to_hz1
, sizeof(to_hz1
), "hz",
480 UCNV_FROM_U_CALLBACK_SKIP
, from_hzOffs1
, NULL
, 0,UCNV_SKIP_STOP_ON_ILLEGAL
,U_ILLEGAL_CHAR_FOUND
))
481 log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
485 if(!testConvertFromUnicode(SCSU_inputText
, sizeof(SCSU_inputText
)/sizeof(SCSU_inputText
[0]),
486 to_SCSU
, sizeof(to_SCSU
), "SCSU",
487 UCNV_FROM_U_CALLBACK_SKIP
, from_SCSUOffs
, NULL
, 0 ))
488 log_err("u-> SCSU with skip did not match.\n");
490 #if !UCONFIG_NO_LEGACY_CONVERSION
492 if(!testConvertFromUnicode(iscii_inputText
, sizeof(iscii_inputText
)/sizeof(iscii_inputText
[0]),
493 to_iscii
, sizeof(to_iscii
), "ISCII,version=0",
494 UCNV_FROM_U_CALLBACK_SKIP
, from_isciiOffs
, NULL
, 0 ))
495 log_err("u-> iscii with skip did not match.\n");
497 if(!testConvertFromUnicodeWithContext(iscii_inputText1
, sizeof(iscii_inputText1
)/sizeof(iscii_inputText1
[0]),
498 to_iscii1
, sizeof(to_iscii1
), "ISCII,version=0",
499 UCNV_FROM_U_CALLBACK_SKIP
, from_isciiOffs1
, NULL
, 0,UCNV_SKIP_STOP_ON_ILLEGAL
,U_ILLEGAL_CHAR_FOUND
))
500 log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
504 log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
506 static const uint8_t sampleText
[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */
507 0xFB, 0xEE, 0x28, /* from source offset 0 */
525 0xF9, 0x28, /* from 16 */
534 0xFA, 0x83, /* from 24 */
543 0xF9, 0xA2, /* from 32 */
545 0xFE, 0x16, 0x3A, 0x8C,
554 static const UChar expected
[]={
555 0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */
556 0x0063, 0x0061, 0x000D, 0x000A,
558 0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */
559 0x0930, 0x0020, 0x0918, 0x0909,
561 0x3086, 0x304D, 0x0020, 0x3053, /* 16 */
562 0x4000, 0x4E00, 0x7777, 0x0020,
564 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */
565 0x0020, 0xD7A3, 0xDC00, 0xD800,
567 0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */
568 0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
570 0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */
573 static const int32_t offsets
[]={
574 0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7,
575 8, 9, 10, 10, 11, 12, 12, 13, 14, 15,
576 16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23,
577 24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31,
578 32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39,
582 /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */
583 if(!testConvertFromUnicode(expected
, ARRAY_LENGTH(expected
),
584 sampleText
, sizeof(sampleText
),
586 UCNV_FROM_U_CALLBACK_SKIP
, offsets
, NULL
, 0)
588 log_err("u->BOCU-1 with skip did not match.\n");
592 log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
594 const uint8_t sampleText
[]={
596 0xc4, 0xb5, /* U+0135 */
597 0xed, 0x80, 0xa0, /* Hangul U+d020 */
598 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */
599 0xee, 0x80, 0x80, /* PUA U+e000 */
600 0xed, 0xb0, 0x81, /* unpaired trail surrogate U+dc01 */
602 0xed, 0xa0, 0x81, /* unpaired lead surrogate U+d801 */
603 0xd0, 0x80 /* U+0400 */
628 /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */
630 /* without offsets */
631 if(!testConvertFromUnicode(expected
, ARRAY_LENGTH(expected
),
632 sampleText
, sizeof(sampleText
),
634 UCNV_FROM_U_CALLBACK_SKIP
, NULL
, NULL
, 0)
636 log_err("u->CESU-8 with skip did not match.\n");
640 if(!testConvertFromUnicode(expected
, ARRAY_LENGTH(expected
),
641 sampleText
, sizeof(sampleText
),
643 UCNV_FROM_U_CALLBACK_SKIP
, offsets
, NULL
, 0)
645 log_err("u->CESU-8 with skip did not match.\n");
650 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP \n");
652 #if !UCONFIG_NO_LEGACY_CONVERSION
655 static const UChar IBM_949skiptoUnicode
[]= {0x0000, 0xAC00, 0xAC01, 0xD700 };
656 static const UChar IBM_943skiptoUnicode
[]= { 0x6D63, 0x6D64, 0x6D66 };
657 static const UChar IBM_930skiptoUnicode
[]= { 0x6D63, 0x6D64, 0x6D66 };
659 static const int32_t fromIBM949Offs
[] = { 0, 1, 3, 5};
660 static const int32_t fromIBM943Offs
[] = { 0, 2, 4};
661 static const int32_t fromIBM930Offs
[] = { 1, 3, 5};
663 if(!testConvertToUnicode(expskipIBM_949
, sizeof(expskipIBM_949
),
664 IBM_949skiptoUnicode
, sizeof(IBM_949skiptoUnicode
)/sizeof(IBM_949skiptoUnicode
),"ibm-949",
665 UCNV_TO_U_CALLBACK_SKIP
, fromIBM949Offs
, NULL
, 0 ))
666 log_err("ibm-949->u with skip did not match.\n");
667 if(!testConvertToUnicode(expskipIBM_943
, sizeof(expskipIBM_943
),
668 IBM_943skiptoUnicode
, sizeof(IBM_943skiptoUnicode
)/sizeof(IBM_943skiptoUnicode
[0]),"ibm-943",
669 UCNV_TO_U_CALLBACK_SKIP
, fromIBM943Offs
, NULL
, 0 ))
670 log_err("ibm-943->u with skip did not match.\n");
673 if(!testConvertToUnicode(expskipIBM_930
, sizeof(expskipIBM_930
),
674 IBM_930skiptoUnicode
, sizeof(IBM_930skiptoUnicode
)/sizeof(IBM_930skiptoUnicode
[0]),"ibm-930",
675 UCNV_TO_U_CALLBACK_SKIP
, fromIBM930Offs
, NULL
, 0 ))
676 log_err("ibm-930->u with skip did not match.\n");
679 if(!testConvertToUnicodeWithContext(expskipIBM_930
, sizeof(expskipIBM_930
),
680 IBM_930skiptoUnicode
, sizeof(IBM_930skiptoUnicode
)/sizeof(IBM_930skiptoUnicode
[0]),"ibm-930",
681 UCNV_TO_U_CALLBACK_SKIP
, fromIBM930Offs
, NULL
, 0,"i",U_ILLEGAL_CHAR_FOUND
))
682 log_err("ibm-930->u with skip did not match.\n");
687 static const uint8_t usasciiToUBytes
[] = { 0x61, 0x80, 0x31 };
688 static const UChar usasciiToU
[] = { 0x61, 0x31 };
689 static const int32_t usasciiToUOffsets
[] = { 0, 2 };
691 static const uint8_t latin1ToUBytes
[] = { 0x61, 0xa0, 0x31 };
692 static const UChar latin1ToU
[] = { 0x61, 0xa0, 0x31 };
693 static const int32_t latin1ToUOffsets
[] = { 0, 1, 2 };
696 if(!testConvertToUnicode(usasciiToUBytes
, sizeof(usasciiToUBytes
),
697 usasciiToU
, sizeof(usasciiToU
)/U_SIZEOF_UCHAR
,
699 UCNV_TO_U_CALLBACK_SKIP
, usasciiToUOffsets
,
702 log_err("US-ASCII->u with skip did not match.\n");
705 #if !UCONFIG_NO_LEGACY_CONVERSION
706 /* SBCS NLTC codepage 367 for US-ASCII */
707 if(!testConvertToUnicode(usasciiToUBytes
, sizeof(usasciiToUBytes
),
708 usasciiToU
, sizeof(usasciiToU
)/U_SIZEOF_UCHAR
,
710 UCNV_TO_U_CALLBACK_SKIP
, usasciiToUOffsets
,
713 log_err("ibm-367->u with skip did not match.\n");
718 if(!testConvertToUnicode(latin1ToUBytes
, sizeof(latin1ToUBytes
),
719 latin1ToU
, sizeof(latin1ToU
)/U_SIZEOF_UCHAR
,
721 UCNV_TO_U_CALLBACK_SKIP
, latin1ToUOffsets
,
724 log_err("LATIN_1->u with skip did not match.\n");
727 #if !UCONFIG_NO_LEGACY_CONVERSION
729 if(!testConvertToUnicode(latin1ToUBytes
, sizeof(latin1ToUBytes
),
730 latin1ToU
, sizeof(latin1ToU
)/U_SIZEOF_UCHAR
,
732 UCNV_TO_U_CALLBACK_SKIP
, latin1ToUOffsets
,
735 log_err("windows-1252->u with skip did not match.\n");
740 #if !UCONFIG_NO_LEGACY_CONVERSION
742 static const uint8_t sampleTxtEBCIDIC_STATEFUL
[] ={
743 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
745 static const UChar EBCIDIC_STATEFUL_toUnicode
[] ={ 0x6d63, 0x03b4
747 static const int32_t from_EBCIDIC_STATEFULOffsets
[]={ 1, 5};
751 static const uint8_t sampleTxt_euc_jp
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
752 0x8f, 0xda, 0xa1, /*unassigned*/
755 static const UChar euc_jptoUnicode
[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2};
756 static const int32_t from_euc_jpOffs
[] ={ 0, 1, 3, 9};
759 static const uint8_t sampleTxt_euc_tw
[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
760 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
763 static const UChar euc_twtoUnicode
[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, };
764 static const int32_t from_euc_twOffs
[] ={ 0, 1, 3, 11, 13};
766 static const uint8_t sampleTxt_iso_2022_jp
[]={
768 0x1b, 0x24, 0x42, 0x2A, 0x44, /*unassigned*/
769 0x1b, 0x28, 0x42, 0x42,
772 static const UChar iso_2022_jptoUnicode
[]={ 0x41,0x42 };
773 static const int32_t from_iso_2022_jpOffs
[] ={ 0,9 };
776 static const uint8_t sampleTxt_iso_2022_cn
[]={
778 0x1B, 0x24, 0x29, 0x47,
779 0x0E, 0x40, 0x6f, /*unassigned*/
784 static const UChar iso_2022_cntoUnicode
[]={ 0x41, 0x44,0x42 };
785 static const int32_t from_iso_2022_cnOffs
[] ={ 1, 2, 11 };
788 static const uint8_t sampleTxt_iso_2022_kr
[]={
789 0x1b, 0x24, 0x29, 0x43,
797 static const UChar iso_2022_krtoUnicode
[]={ 0x41,0x03A0,0x51, 0x42,0x43};
798 static const int32_t from_iso_2022_krOffs
[] ={ 4, 9, 12, 13 , 14 };
801 static const uint8_t sampleTxt_hz
[]={
803 0x7e, 0x7b, 0x26, 0x30,
804 0x7f, 0x1E, /*unassigned*/
807 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/
810 static const UChar hztoUnicode
[]={
817 static const int32_t from_hzOffs
[] ={0,3,7,11,18, };
820 static const uint8_t sampleTxt_iscii
[]={
830 static const UChar isciitoUnicode
[]={
839 static const int32_t from_isciiOffs
[] ={0,1,3,4,5,7 };
842 static const uint8_t sampleTxtLMBCS
[]={ 0x12, 0xc9, 0x50,
843 0x12, 0x92, 0xa0, /*unassigned*/
846 static const UChar LMBCSToUnicode
[]={ 0x4e2e, 0xe5c4};
847 static const int32_t fromLMBCS
[] = {0, 6};
849 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL
, sizeof(sampleTxtEBCIDIC_STATEFUL
),
850 EBCIDIC_STATEFUL_toUnicode
, sizeof(EBCIDIC_STATEFUL_toUnicode
)/sizeof(EBCIDIC_STATEFUL_toUnicode
[0]),"ibm-930",
851 UCNV_TO_U_CALLBACK_SKIP
, from_EBCIDIC_STATEFULOffsets
, NULL
, 0 ))
852 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
854 if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL
, sizeof(sampleTxtEBCIDIC_STATEFUL
),
855 EBCIDIC_STATEFUL_toUnicode
, sizeof(EBCIDIC_STATEFUL_toUnicode
)/sizeof(EBCIDIC_STATEFUL_toUnicode
[0]),"ibm-930",
856 UCNV_TO_U_CALLBACK_SKIP
, from_EBCIDIC_STATEFULOffsets
, NULL
, 0,"i",U_ILLEGAL_CHAR_FOUND
))
857 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
859 if(!testConvertToUnicode(sampleTxt_euc_jp
, sizeof(sampleTxt_euc_jp
),
860 euc_jptoUnicode
, sizeof(euc_jptoUnicode
)/sizeof(euc_jptoUnicode
[0]),"euc-jp",
861 UCNV_TO_U_CALLBACK_SKIP
, from_euc_jpOffs
, NULL
, 0))
862 log_err("euc-jp->u with skip did not match.\n");
866 if(!testConvertToUnicode(sampleTxt_euc_tw
, sizeof(sampleTxt_euc_tw
),
867 euc_twtoUnicode
, sizeof(euc_twtoUnicode
)/sizeof(euc_twtoUnicode
[0]),"euc-tw",
868 UCNV_TO_U_CALLBACK_SKIP
, from_euc_twOffs
, NULL
, 0))
869 log_err("euc-tw->u with skip did not match.\n");
872 if(!testConvertToUnicode(sampleTxt_iso_2022_jp
, sizeof(sampleTxt_iso_2022_jp
),
873 iso_2022_jptoUnicode
, sizeof(iso_2022_jptoUnicode
)/sizeof(iso_2022_jptoUnicode
[0]),"iso-2022-jp",
874 UCNV_TO_U_CALLBACK_SKIP
, from_iso_2022_jpOffs
, NULL
, 0))
875 log_err("iso-2022-jp->u with skip did not match.\n");
877 if(!testConvertToUnicode(sampleTxt_iso_2022_cn
, sizeof(sampleTxt_iso_2022_cn
),
878 iso_2022_cntoUnicode
, sizeof(iso_2022_cntoUnicode
)/sizeof(iso_2022_cntoUnicode
[0]),"iso-2022-cn",
879 UCNV_TO_U_CALLBACK_SKIP
, from_iso_2022_cnOffs
, NULL
, 0))
880 log_err("iso-2022-cn->u with skip did not match.\n");
882 if(!testConvertToUnicode(sampleTxt_iso_2022_kr
, sizeof(sampleTxt_iso_2022_kr
),
883 iso_2022_krtoUnicode
, sizeof(iso_2022_krtoUnicode
)/sizeof(iso_2022_krtoUnicode
[0]),"iso-2022-kr",
884 UCNV_TO_U_CALLBACK_SKIP
, from_iso_2022_krOffs
, NULL
, 0))
885 log_err("iso-2022-kr->u with skip did not match.\n");
887 if(!testConvertToUnicode(sampleTxt_hz
, sizeof(sampleTxt_hz
),
888 hztoUnicode
, sizeof(hztoUnicode
)/sizeof(hztoUnicode
[0]),"HZ",
889 UCNV_TO_U_CALLBACK_SKIP
, from_hzOffs
, NULL
, 0))
890 log_err("HZ->u with skip did not match.\n");
892 if(!testConvertToUnicode(sampleTxt_iscii
, sizeof(sampleTxt_iscii
),
893 isciitoUnicode
, sizeof(isciitoUnicode
)/sizeof(isciitoUnicode
[0]),"ISCII,version=0",
894 UCNV_TO_U_CALLBACK_SKIP
, from_isciiOffs
, NULL
, 0))
895 log_err("iscii->u with skip did not match.\n");
897 if(!testConvertToUnicode(sampleTxtLMBCS
, sizeof(sampleTxtLMBCS
),
898 LMBCSToUnicode
, sizeof(LMBCSToUnicode
)/sizeof(LMBCSToUnicode
[0]),"LMBCS-1",
899 UCNV_TO_U_CALLBACK_SKIP
, fromLMBCS
, NULL
, 0))
900 log_err("LMBCS->u with skip did not match.\n");
905 log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n");
907 const uint8_t sampleText1
[] = { 0x31, 0xe4, 0xba, 0x8c,
909 UChar expected1
[] = { 0x0031, 0x4e8c, 0x0061};
910 int32_t offsets1
[] = { 0x0000, 0x0001, 0x0006};
912 if(!testConvertToUnicode(sampleText1
, sizeof(sampleText1
),
913 expected1
, sizeof(expected1
)/sizeof(expected1
[0]),"utf8",
914 UCNV_TO_U_CALLBACK_SKIP
, offsets1
, NULL
, 0 ))
915 log_err("utf8->u with skip did not match.\n");;
918 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n");
920 const uint8_t sampleText1
[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
921 UChar expected1
[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffe,0xfffe};
922 int32_t offsets1
[] = { 0x0000, 0x0001,0x0002,0x0003,4,5};
924 if(!testConvertToUnicode(sampleText1
, sizeof(sampleText1
),
925 expected1
, sizeof(expected1
)/sizeof(expected1
[0]),"SCSU",
926 UCNV_TO_U_CALLBACK_SKIP
, offsets1
, NULL
, 0 ))
927 log_err("scsu->u with skip did not match.\n");
930 log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
932 const uint8_t sampleText
[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */
933 0xFB, 0xEE, 0x28, /* single-code point sequence at offset 0 */
934 0x24, 0x1E, 0x52, /* 3 */
937 0x40, 0x07, /* 8 - wrong trail byte */
940 0xD0, 0x20, /* 12 - wrong trail byte */
961 0xFB, 0x16, 0x87, /* 42 */
968 0xFC, 0x10, 0x3E, /* 56 */
969 0xFE, 0x16, 0x3A, 0x8C, /* 59 */
971 0xFC, 0x03, 0xAC, /* 64 */
972 0xFF, /* 67 - FF just resets the state without encoding anything */
979 0xFEFF, 0x0061, 0x0062, 0x0020,
980 0x0063, 0x0061, 0x000D, 0x000A,
981 0x0020, 0x0000, 0x00DF, 0x00E6,
982 0x0930, 0x0020, 0x0918, 0x0909,
983 0x3086, 0x304D, 0x0020, 0x3053,
984 0x4000, 0x4E00, 0x7777, 0x0020,
985 0x9FA5, 0x4E00, 0xAC00, 0xBCDE,
986 0x0020, 0xD7A3, 0xDC00, 0xD800,
987 0xD800, 0xDC00, 0xD845, 0xDDDD,
988 0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
989 0xDFFF, 0x0001, 0x0E40, 0x0020,
993 0, 3, 6, 7, /* skip 8, */
994 10, 11, /* skip 12, */
996 20, 21, 23, 24, 25, 26, 28, 29,
997 30, 31, 33, 35, 37, 38,
999 50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59,
1000 63, 64, /* trail */ 64, /* reset only 67, */
1005 if(!testConvertToUnicode(sampleText
, sizeof(sampleText
),
1006 expected
, ARRAY_LENGTH(expected
), "BOCU-1",
1007 UCNV_TO_U_CALLBACK_SKIP
, offsets
, NULL
, 0)
1009 log_err("BOCU-1->u with skip did not match.\n");
1013 log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
1015 const uint8_t sampleText
[]={
1017 0xc0, 0x80, /* 1 non-shortest form */
1018 0xc4, 0xb5, /* 3 U+0135 */
1019 0xed, 0x80, 0xa0, /* 5 Hangul U+d020 */
1020 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8 surrogate pair for U+10401 */
1021 0xee, 0x80, 0x80, /* 14 PUA U+e000 */
1022 0xed, 0xb0, 0x81, /* 17 unpaired trail surrogate U+dc01 */
1023 0xf0, 0x90, 0x80, 0x80, /* 20 illegal 4-byte form for U+10000 */
1025 0xed, 0xa0, 0x81, /* 25 unpaired lead surrogate U+d801 */
1026 0xed, 0xa0, /* 28 incomplete sequence */
1027 0xd0, 0x80 /* 30 U+0400 */
1057 /* without offsets */
1058 if(!testConvertToUnicode(sampleText
, sizeof(sampleText
),
1059 expected
, ARRAY_LENGTH(expected
), "CESU-8",
1060 UCNV_TO_U_CALLBACK_SKIP
, NULL
, NULL
, 0)
1062 log_err("CESU-8->u with skip did not match.\n");
1066 if(!testConvertToUnicode(sampleText
, sizeof(sampleText
),
1067 expected
, ARRAY_LENGTH(expected
), "CESU-8",
1068 UCNV_TO_U_CALLBACK_SKIP
, offsets
, NULL
, 0)
1070 log_err("CESU-8->u with skip did not match.\n");
1075 static void TestStop(int32_t inputsize
, int32_t outputsize
)
1077 static const UChar sampleText
[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1078 static const UChar sampleText2
[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1080 static const uint8_t expstopIBM_949
[]= {
1081 0x00, 0xb0, 0xa1, 0xb0, 0xa2};
1083 static const uint8_t expstopIBM_943
[] = {
1084 0x9f, 0xaf, 0x9f, 0xb1};
1086 static const uint8_t expstopIBM_930
[] = {
1087 0x0e, 0x5d, 0x5f, 0x5d, 0x63};
1089 static const UChar IBM_949stoptoUnicode
[]= {0x0000, 0xAC00, 0xAC01};
1090 static const UChar IBM_943stoptoUnicode
[]= { 0x6D63, 0x6D64};
1091 static const UChar IBM_930stoptoUnicode
[]= { 0x6D63, 0x6D64};
1094 static const int32_t toIBM949Offsstop
[] = { 0, 1, 1, 2, 2};
1095 static const int32_t toIBM943Offsstop
[] = { 0, 0, 1, 1};
1096 static const int32_t toIBM930Offsstop
[] = { 0, 0, 0, 1, 1};
1098 static const int32_t fromIBM949Offs
[] = { 0, 1, 3};
1099 static const int32_t fromIBM943Offs
[] = { 0, 2};
1100 static const int32_t fromIBM930Offs
[] = { 1, 3};
1102 gInBufferSize
= inputsize
;
1103 gOutBufferSize
= outputsize
;
1107 #if !UCONFIG_NO_LEGACY_CONVERSION
1108 if(!testConvertFromUnicode(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
1109 expstopIBM_949
, sizeof(expstopIBM_949
), "ibm-949",
1110 UCNV_FROM_U_CALLBACK_STOP
, toIBM949Offsstop
, NULL
, 0 ))
1111 log_err("u-> ibm-949 with stop did not match.\n");
1112 if(!testConvertFromUnicode(sampleText2
, sizeof(sampleText2
)/sizeof(sampleText2
[0]),
1113 expstopIBM_943
, sizeof(expstopIBM_943
), "ibm-943",
1114 UCNV_FROM_U_CALLBACK_STOP
, toIBM943Offsstop
, NULL
, 0))
1115 log_err("u-> ibm-943 with stop did not match.\n");
1116 if(!testConvertFromUnicode(sampleText2
, sizeof(sampleText2
)/sizeof(sampleText2
[0]),
1117 expstopIBM_930
, sizeof(expstopIBM_930
), "ibm-930",
1118 UCNV_FROM_U_CALLBACK_STOP
, toIBM930Offsstop
, NULL
, 0 ))
1119 log_err("u-> ibm-930 with stop did not match.\n");
1121 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP \n");
1123 static const UChar inputTest
[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1124 static const uint8_t toIBM943
[]= { 0x61,};
1125 static const int32_t offset
[]= {0,} ;
1128 static const UChar euc_jp_inputText
[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1129 static const uint8_t to_euc_jp
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,};
1130 static const int32_t fromEUC_JPOffs
[] ={ 0, 1, 1, 2, 2, 2,};
1133 static const UChar euc_tw_inputText
[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1134 static const uint8_t to_euc_tw
[]={
1135 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,};
1136 static const int32_t from_euc_twOffs
[] ={ 0, 1, 1, 2, 2, 2, 2,};
1139 static const UChar iso_2022_jp_inputText
[]={0x0041, 0x00E9, 0x0042, };
1140 static const uint8_t to_iso_2022_jp
[]={
1144 static const int32_t from_iso_2022_jpOffs
[] ={0,};
1147 static const UChar iso_2022_cn_inputText
[]={ 0x0041, 0x3712, 0x0042, };
1148 static const uint8_t to_iso_2022_cn
[]={
1152 static const int32_t from_iso_2022_cnOffs
[] ={
1158 static const UChar iso_2022_kr_inputText
[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
1159 static const uint8_t to_iso_2022_kr
[]={
1160 0x1b, 0x24, 0x29, 0x43,
1164 static const int32_t from_iso_2022_krOffs
[] ={
1171 static const UChar hz_inputText
[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1173 static const uint8_t to_hz
[]={
1175 0x7e, 0x7b, 0x26, 0x30,
1178 static const int32_t from_hzOffs
[] ={
1184 static const UChar iscii_inputText
[]={ 0x0041, 0x3712, 0x0042, };
1185 static const uint8_t to_iscii
[]={
1188 static const int32_t from_isciiOffs
[] ={
1192 if(!testConvertFromUnicode(inputTest
, sizeof(inputTest
)/sizeof(inputTest
[0]),
1193 toIBM943
, sizeof(toIBM943
), "ibm-943",
1194 UCNV_FROM_U_CALLBACK_STOP
, offset
, NULL
, 0 ))
1195 log_err("u-> ibm-943 with stop did not match.\n");
1197 if(!testConvertFromUnicode(euc_jp_inputText
, sizeof(euc_jp_inputText
)/sizeof(euc_jp_inputText
[0]),
1198 to_euc_jp
, sizeof(to_euc_jp
), "euc-jp",
1199 UCNV_FROM_U_CALLBACK_STOP
, fromEUC_JPOffs
, NULL
, 0 ))
1200 log_err("u-> euc-jp with stop did not match.\n");
1202 if(!testConvertFromUnicode(euc_tw_inputText
, sizeof(euc_tw_inputText
)/sizeof(euc_tw_inputText
[0]),
1203 to_euc_tw
, sizeof(to_euc_tw
), "euc-tw",
1204 UCNV_FROM_U_CALLBACK_STOP
, from_euc_twOffs
, NULL
, 0 ))
1205 log_err("u-> euc-tw with stop did not match.\n");
1207 if(!testConvertFromUnicode(iso_2022_jp_inputText
, sizeof(iso_2022_jp_inputText
)/sizeof(iso_2022_jp_inputText
[0]),
1208 to_iso_2022_jp
, sizeof(to_iso_2022_jp
), "iso-2022-jp",
1209 UCNV_FROM_U_CALLBACK_STOP
, from_iso_2022_jpOffs
, NULL
, 0 ))
1210 log_err("u-> iso-2022-jp with stop did not match.\n");
1212 if(!testConvertFromUnicode(iso_2022_jp_inputText
, sizeof(iso_2022_jp_inputText
)/sizeof(iso_2022_jp_inputText
[0]),
1213 to_iso_2022_jp
, sizeof(to_iso_2022_jp
), "iso-2022-jp",
1214 UCNV_FROM_U_CALLBACK_STOP
, from_iso_2022_jpOffs
, NULL
, 0 ))
1215 log_err("u-> iso-2022-jp with stop did not match.\n");
1217 if(!testConvertFromUnicode(iso_2022_cn_inputText
, sizeof(iso_2022_cn_inputText
)/sizeof(iso_2022_cn_inputText
[0]),
1218 to_iso_2022_cn
, sizeof(to_iso_2022_cn
), "iso-2022-cn",
1219 UCNV_FROM_U_CALLBACK_STOP
, from_iso_2022_cnOffs
, NULL
, 0 ))
1220 log_err("u-> iso-2022-cn with stop did not match.\n");
1222 if(!testConvertFromUnicode(iso_2022_kr_inputText
, sizeof(iso_2022_kr_inputText
)/sizeof(iso_2022_kr_inputText
[0]),
1223 to_iso_2022_kr
, sizeof(to_iso_2022_kr
), "iso-2022-kr",
1224 UCNV_FROM_U_CALLBACK_STOP
, from_iso_2022_krOffs
, NULL
, 0 ))
1225 log_err("u-> iso-2022-kr with stop did not match.\n");
1227 if(!testConvertFromUnicode(hz_inputText
, sizeof(hz_inputText
)/sizeof(hz_inputText
[0]),
1228 to_hz
, sizeof(to_hz
), "HZ",
1229 UCNV_FROM_U_CALLBACK_STOP
, from_hzOffs
, NULL
, 0 ))
1230 log_err("u-> HZ with stop did not match.\n");\
1232 if(!testConvertFromUnicode(iscii_inputText
, sizeof(iscii_inputText
)/sizeof(iscii_inputText
[0]),
1233 to_iscii
, sizeof(to_iscii
), "ISCII,version=0",
1234 UCNV_FROM_U_CALLBACK_STOP
, from_isciiOffs
, NULL
, 0 ))
1235 log_err("u-> iscii with stop did not match.\n");
1241 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n");
1243 static const UChar SCSU_inputText
[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1245 static const uint8_t to_SCSU
[]={
1249 int32_t from_SCSUOffs
[] ={
1253 if(!testConvertFromUnicode(SCSU_inputText
, sizeof(SCSU_inputText
)/sizeof(SCSU_inputText
[0]),
1254 to_SCSU
, sizeof(to_SCSU
), "SCSU",
1255 UCNV_FROM_U_CALLBACK_STOP
, from_SCSUOffs
, NULL
, 0 ))
1256 log_err("u-> SCSU with skip did not match.\n");
1262 #if !UCONFIG_NO_LEGACY_CONVERSION
1263 if(!testConvertToUnicode(expstopIBM_949
, sizeof(expstopIBM_949
),
1264 IBM_949stoptoUnicode
, sizeof(IBM_949stoptoUnicode
)/sizeof(IBM_949stoptoUnicode
[0]),"ibm-949",
1265 UCNV_TO_U_CALLBACK_STOP
, fromIBM949Offs
, NULL
, 0 ))
1266 log_err("ibm-949->u with stop did not match.\n");
1267 if(!testConvertToUnicode(expstopIBM_943
, sizeof(expstopIBM_943
),
1268 IBM_943stoptoUnicode
, sizeof(IBM_943stoptoUnicode
)/sizeof(IBM_943stoptoUnicode
[0]),"ibm-943",
1269 UCNV_TO_U_CALLBACK_STOP
, fromIBM943Offs
, NULL
, 0 ))
1270 log_err("ibm-943->u with stop did not match.\n");
1271 if(!testConvertToUnicode(expstopIBM_930
, sizeof(expstopIBM_930
),
1272 IBM_930stoptoUnicode
, sizeof(IBM_930stoptoUnicode
)/sizeof(IBM_930stoptoUnicode
[0]),"ibm-930",
1273 UCNV_TO_U_CALLBACK_STOP
, fromIBM930Offs
, NULL
, 0 ))
1274 log_err("ibm-930->u with stop did not match.\n");
1276 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n");
1279 static const uint8_t sampleTxtEBCIDIC_STATEFUL
[] ={
1280 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1282 static const UChar EBCIDIC_STATEFUL_toUnicode
[] ={ 0x6d63 };
1283 static const int32_t from_EBCIDIC_STATEFULOffsets
[]={ 1};
1287 static const uint8_t sampleTxt_euc_jp
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1288 0x8f, 0xda, 0xa1, /*unassigned*/
1291 static const UChar euc_jptoUnicode
[]={ 0x0061, 0x4edd, 0x5bec};
1292 static const int32_t from_euc_jpOffs
[] ={ 0, 1, 3};
1295 static const uint8_t sampleTxt_euc_tw
[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1296 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1299 UChar euc_twtoUnicode
[]={ 0x0061, 0x2295, 0x5BF2};
1300 int32_t from_euc_twOffs
[] ={ 0, 1, 3};
1304 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL
, sizeof(sampleTxtEBCIDIC_STATEFUL
),
1305 EBCIDIC_STATEFUL_toUnicode
, sizeof(EBCIDIC_STATEFUL_toUnicode
)/sizeof(EBCIDIC_STATEFUL_toUnicode
[0]),"ibm-930",
1306 UCNV_TO_U_CALLBACK_STOP
, from_EBCIDIC_STATEFULOffsets
, NULL
, 0 ))
1307 log_err("EBCIDIC_STATEFUL->u with stop did not match.\n");
1309 if(!testConvertToUnicode(sampleTxt_euc_jp
, sizeof(sampleTxt_euc_jp
),
1310 euc_jptoUnicode
, sizeof(euc_jptoUnicode
)/sizeof(euc_jptoUnicode
[0]),"euc-jp",
1311 UCNV_TO_U_CALLBACK_STOP
, from_euc_jpOffs
, NULL
, 0))
1312 log_err("euc-jp->u with stop did not match.\n");
1314 if(!testConvertToUnicode(sampleTxt_euc_tw
, sizeof(sampleTxt_euc_tw
),
1315 euc_twtoUnicode
, sizeof(euc_twtoUnicode
)/sizeof(euc_twtoUnicode
[0]),"euc-tw",
1316 UCNV_TO_U_CALLBACK_STOP
, from_euc_twOffs
, NULL
, 0 ))
1317 log_err("euc-tw->u with stop did not match.\n");
1321 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n");
1323 static const uint8_t sampleText1
[] = { 0x31, 0xe4, 0xba, 0x8c,
1325 static const UChar expected1
[] = { 0x0031, 0x4e8c,};
1326 static const int32_t offsets1
[] = { 0x0000, 0x0001};
1328 if(!testConvertToUnicode(sampleText1
, sizeof(sampleText1
),
1329 expected1
, sizeof(expected1
)/sizeof(expected1
[0]),"utf8",
1330 UCNV_TO_U_CALLBACK_STOP
, offsets1
, NULL
, 0 ))
1331 log_err("utf8->u with stop did not match.\n");;
1333 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n");
1335 static const uint8_t sampleText1
[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04};
1336 static const UChar expected1
[] = { 0x00ba, 0x008c, 0x00f8, 0x0061};
1337 static const int32_t offsets1
[] = { 0x0000, 0x0001,0x0002,0x0003};
1339 if(!testConvertToUnicode(sampleText1
, sizeof(sampleText1
),
1340 expected1
, sizeof(expected1
)/sizeof(expected1
[0]),"SCSU",
1341 UCNV_TO_U_CALLBACK_STOP
, offsets1
, NULL
, 0 ))
1342 log_err("scsu->u with stop did not match.\n");;
1347 static void TestSub(int32_t inputsize
, int32_t outputsize
)
1349 static const UChar sampleText
[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1350 static const UChar sampleText2
[]= { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1352 static const uint8_t expsubIBM_949
[] =
1353 { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 };
1355 static const uint8_t expsubIBM_943
[] = {
1356 0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 };
1358 static const uint8_t expsubIBM_930
[] = {
1359 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f };
1361 static const UChar IBM_949subtoUnicode
[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 };
1362 static const UChar IBM_943subtoUnicode
[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1363 static const UChar IBM_930subtoUnicode
[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1365 static const int32_t toIBM949Offssub
[] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1366 static const int32_t toIBM943Offssub
[] ={ 0, 0, 1, 1, 2, 2, 3, 3 };
1367 static const int32_t toIBM930Offssub
[] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 };
1369 static const int32_t fromIBM949Offs
[] = { 0, 1, 3, 5, 7 };
1370 static const int32_t fromIBM943Offs
[] = { 0, 2, 4, 6 };
1371 static const int32_t fromIBM930Offs
[] = { 1, 3, 5, 7 };
1373 gInBufferSize
= inputsize
;
1374 gOutBufferSize
= outputsize
;
1378 #if !UCONFIG_NO_LEGACY_CONVERSION
1379 if(!testConvertFromUnicode(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
1380 expsubIBM_949
, sizeof(expsubIBM_949
), "ibm-949",
1381 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, toIBM949Offssub
, NULL
, 0 ))
1382 log_err("u-> ibm-949 with subst did not match.\n");
1383 if(!testConvertFromUnicode(sampleText2
, sizeof(sampleText2
)/sizeof(sampleText2
[0]),
1384 expsubIBM_943
, sizeof(expsubIBM_943
), "ibm-943",
1385 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, toIBM943Offssub
, NULL
, 0))
1386 log_err("u-> ibm-943 with subst did not match.\n");
1387 if(!testConvertFromUnicode(sampleText2
, sizeof(sampleText2
)/sizeof(sampleText2
[0]),
1388 expsubIBM_930
, sizeof(expsubIBM_930
), "ibm-930",
1389 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, toIBM930Offssub
, NULL
, 0 ))
1390 log_err("u-> ibm-930 with subst did not match.\n");
1392 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1394 static const UChar inputTest
[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1395 static const uint8_t toIBM943
[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 };
1396 static const int32_t offset
[]= {0, 1, 1, 3, 3, 4};
1400 static const UChar euc_jp_inputText
[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1401 static const uint8_t to_euc_jp
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1402 0xf4, 0xfe, 0xf4, 0xfe,
1405 static const int32_t fromEUC_JPOffs
[] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7};
1408 static const UChar euc_tw_inputText
[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1409 static const uint8_t to_euc_tw
[]={
1410 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1411 0xfd, 0xfe, 0xfd, 0xfe,
1412 0x61, 0xe6, 0xca, 0x8a,
1415 static const int32_t from_euc_twOffs
[] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,};
1417 if(!testConvertFromUnicode(inputTest
, sizeof(inputTest
)/sizeof(inputTest
[0]),
1418 toIBM943
, sizeof(toIBM943
), "ibm-943",
1419 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, offset
, NULL
, 0 ))
1420 log_err("u-> ibm-943 with substitute did not match.\n");
1422 if(!testConvertFromUnicode(euc_jp_inputText
, sizeof(euc_jp_inputText
)/sizeof(euc_jp_inputText
[0]),
1423 to_euc_jp
, sizeof(to_euc_jp
), "euc-jp",
1424 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, fromEUC_JPOffs
, NULL
, 0 ))
1425 log_err("u-> euc-jp with substitute did not match.\n");
1427 if(!testConvertFromUnicode(euc_tw_inputText
, sizeof(euc_tw_inputText
)/sizeof(euc_tw_inputText
[0]),
1428 to_euc_tw
, sizeof(to_euc_tw
), "euc-tw",
1429 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, from_euc_twOffs
, NULL
, 0 ))
1430 log_err("u-> euc-tw with substitute did not match.\n");
1434 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1436 UChar SCSU_inputText
[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1438 const uint8_t to_SCSU
[]={
1445 int32_t from_SCSUOffs
[] ={
1451 const uint8_t to_SCSU_1
[]={
1455 int32_t from_SCSUOffs_1
[] ={
1459 if(!testConvertFromUnicode(SCSU_inputText
, sizeof(SCSU_inputText
)/sizeof(SCSU_inputText
[0]),
1460 to_SCSU
, sizeof(to_SCSU
), "SCSU",
1461 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, from_SCSUOffs
, NULL
, 0 ))
1462 log_err("u-> SCSU with substitute did not match.\n");
1464 if(!testConvertFromUnicodeWithContext(SCSU_inputText
, sizeof(SCSU_inputText
)/sizeof(SCSU_inputText
[0]),
1465 to_SCSU_1
, sizeof(to_SCSU_1
), "SCSU",
1466 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, from_SCSUOffs_1
, NULL
, 0,"i",U_ILLEGAL_CHAR_FOUND
))
1467 log_err("u-> SCSU with substitute did not match.\n");
1470 log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1472 static const UChar testinput
[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,};
1473 static const uint8_t expectedUTF8
[]= { 0xe2, 0x82, 0xac,
1474 0xf0, 0x90, 0x90, 0x81,
1475 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd,
1476 0xef, 0xbf, 0xbf, 0x61,
1479 static const int32_t offsets
[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 };
1480 if(!testConvertFromUnicode(testinput
, sizeof(testinput
)/sizeof(testinput
[0]),
1481 expectedUTF8
, sizeof(expectedUTF8
), "utf8",
1482 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, offsets
, NULL
, 0 )) {
1483 log_err("u-> utf8 with stop did not match.\n");
1487 log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1489 static const UChar in
[]={ 0x0041, 0xfeff };
1491 static const uint8_t out
[]={
1502 static const int32_t offsets
[]={
1506 if(!testConvertFromUnicode(in
, ARRAY_LENGTH(in
),
1507 out
, sizeof(out
), "UTF-16",
1508 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, offsets
, NULL
, 0)
1510 log_err("u->UTF-16 with substitute did not match.\n");
1514 log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1516 static const UChar in
[]={ 0x0041, 0xfeff };
1518 static const uint8_t out
[]={
1520 0x00, 0x00, 0xfe, 0xff,
1521 0x00, 0x00, 0x00, 0x41,
1522 0x00, 0x00, 0xfe, 0xff
1524 0xff, 0xfe, 0x00, 0x00,
1525 0x41, 0x00, 0x00, 0x00,
1526 0xff, 0xfe, 0x00, 0x00
1529 static const int32_t offsets
[]={
1530 -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1
1533 if(!testConvertFromUnicode(in
, ARRAY_LENGTH(in
),
1534 out
, sizeof(out
), "UTF-32",
1535 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, offsets
, NULL
, 0)
1537 log_err("u->UTF-32 with substitute did not match.\n");
1543 #if !UCONFIG_NO_LEGACY_CONVERSION
1544 if(!testConvertToUnicode(expsubIBM_949
, sizeof(expsubIBM_949
),
1545 IBM_949subtoUnicode
, sizeof(IBM_949subtoUnicode
)/sizeof(IBM_949subtoUnicode
[0]),"ibm-949",
1546 UCNV_TO_U_CALLBACK_SUBSTITUTE
, fromIBM949Offs
, NULL
, 0 ))
1547 log_err("ibm-949->u with substitute did not match.\n");
1548 if(!testConvertToUnicode(expsubIBM_943
, sizeof(expsubIBM_943
),
1549 IBM_943subtoUnicode
, sizeof(IBM_943subtoUnicode
)/sizeof(IBM_943subtoUnicode
[0]),"ibm-943",
1550 UCNV_TO_U_CALLBACK_SUBSTITUTE
, fromIBM943Offs
, NULL
, 0 ))
1551 log_err("ibm-943->u with substitute did not match.\n");
1552 if(!testConvertToUnicode(expsubIBM_930
, sizeof(expsubIBM_930
),
1553 IBM_930subtoUnicode
, sizeof(IBM_930subtoUnicode
)/sizeof(IBM_930subtoUnicode
[0]),"ibm-930",
1554 UCNV_TO_U_CALLBACK_SUBSTITUTE
, fromIBM930Offs
, NULL
, 0 ))
1555 log_err("ibm-930->u with substitute did not match.\n");
1557 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1560 const uint8_t sampleTxtEBCIDIC_STATEFUL
[] ={
1561 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1563 UChar EBCIDIC_STATEFUL_toUnicode
[] ={ 0x6d63, 0xfffd, 0x03b4
1565 int32_t from_EBCIDIC_STATEFULOffsets
[]={ 1, 3, 5};
1569 const uint8_t sampleTxt_euc_jp
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1570 0x8f, 0xda, 0xa1, /*unassigned*/
1573 UChar euc_jptoUnicode
[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a };
1574 int32_t from_euc_jpOffs
[] ={ 0, 1, 3, 6, 9, 11 };
1577 const uint8_t sampleTxt_euc_tw
[]={
1578 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1579 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1582 UChar euc_twtoUnicode
[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, };
1583 int32_t from_euc_twOffs
[] ={ 0, 1, 3, 7, 11, 13};
1586 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL
, sizeof(sampleTxtEBCIDIC_STATEFUL
),
1587 EBCIDIC_STATEFUL_toUnicode
, sizeof(EBCIDIC_STATEFUL_toUnicode
)/sizeof(EBCIDIC_STATEFUL_toUnicode
[0]),"ibm-930",
1588 UCNV_TO_U_CALLBACK_SUBSTITUTE
, from_EBCIDIC_STATEFULOffsets
, NULL
, 0 ))
1589 log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n");
1592 if(!testConvertToUnicode(sampleTxt_euc_jp
, sizeof(sampleTxt_euc_jp
),
1593 euc_jptoUnicode
, sizeof(euc_jptoUnicode
)/sizeof(euc_jptoUnicode
[0]),"euc-jp",
1594 UCNV_TO_U_CALLBACK_SUBSTITUTE
, from_euc_jpOffs
, NULL
, 0 ))
1595 log_err("euc-jp->u with substitute did not match.\n");
1598 if(!testConvertToUnicode(sampleTxt_euc_tw
, sizeof(sampleTxt_euc_tw
),
1599 euc_twtoUnicode
, sizeof(euc_twtoUnicode
)/sizeof(euc_twtoUnicode
[0]),"euc-tw",
1600 UCNV_TO_U_CALLBACK_SUBSTITUTE
, from_euc_twOffs
, NULL
, 0 ))
1601 log_err("euc-tw->u with substitute did not match.\n");
1604 if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp
, sizeof(sampleTxt_euc_jp
),
1605 euc_jptoUnicode
, sizeof(euc_jptoUnicode
)/sizeof(euc_jptoUnicode
[0]),"euc-jp",
1606 UCNV_TO_U_CALLBACK_SUBSTITUTE
, from_euc_jpOffs
, NULL
, 0 ,"i", U_ILLEGAL_CHAR_FOUND
))
1607 log_err("euc-jp->u with substitute did not match.\n");
1611 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1613 const uint8_t sampleText1
[] = { 0x31, 0xe4, 0xba, 0x8c,
1615 UChar expected1
[] = { 0x0031, 0x4e8c, 0xfffd, 0x0061};
1616 int32_t offsets1
[] = { 0x0000, 0x0001, 0x0004, 0x0006};
1618 if(!testConvertToUnicode(sampleText1
, sizeof(sampleText1
),
1619 expected1
, sizeof(expected1
)/sizeof(expected1
[0]),"utf8",
1620 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets1
, NULL
, 0 ))
1621 log_err("utf8->u with substitute did not match.\n");;
1623 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1625 const uint8_t sampleText1
[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
1626 UChar expected1
[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffd,0xfffd};
1627 int32_t offsets1
[] = { 0x0000, 0x0001,0x0002,0x0003,4,5};
1629 if(!testConvertToUnicode(sampleText1
, sizeof(sampleText1
),
1630 expected1
, sizeof(expected1
)/sizeof(expected1
[0]),"SCSU",
1631 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets1
, NULL
, 0 ))
1632 log_err("scsu->u with stop did not match.\n");;
1635 #if !UCONFIG_NO_LEGACY_CONVERSION
1636 log_verbose("Testing ibm-930 subchar/subchar1\n");
1638 static const UChar u1
[]={ 0x6d63, 0x6d64, 0x6d65, 0x6d66, 0xdf };
1639 static const uint8_t s1
[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f };
1640 static const int32_t offsets1
[]={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1642 static const UChar u2
[]={ 0x6d63, 0x6d64, 0xfffd, 0x6d66, 0x1a };
1643 static const uint8_t s2
[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 };
1644 static const int32_t offsets2
[]={ 1, 3, 5, 7, 10 };
1646 if(!testConvertFromUnicode(u1
, ARRAY_LENGTH(u1
), s1
, ARRAY_LENGTH(s1
), "ibm-930",
1647 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, offsets1
, NULL
, 0)
1649 log_err("u->ibm-930 subchar/subchar1 did not match.\n");
1652 if(!testConvertToUnicode(s2
, ARRAY_LENGTH(s2
), u2
, ARRAY_LENGTH(u2
), "ibm-930",
1653 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets2
, NULL
, 0)
1655 log_err("ibm-930->u subchar/subchar1 did not match.\n");
1659 log_verbose("Testing GB 18030 with substitute callbacks\n");
1661 static const UChar u2
[]={
1662 0x24, 0x7f, 0x80, 0x1f9, 0x20ac, 0x4e00, 0x9fa6, 0xffff, 0xd800, 0xdc00, 0xfffd, 0xdbff, 0xdfff };
1663 static const uint8_t gb2
[]={
1664 0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 };
1665 static const int32_t offsets2
[]={
1666 0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 };
1668 if(!testConvertToUnicode(gb2
, ARRAY_LENGTH(gb2
), u2
, ARRAY_LENGTH(u2
), "gb18030",
1669 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets2
, NULL
, 0)
1671 log_err("gb18030->u with substitute did not match.\n");
1676 log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n");
1678 static const uint8_t utf7
[]={
1679 /* a~ a+AB~ a+AB\x0c a+AB- a+AB. a+. */
1680 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x2e
1682 static const UChar unicode
[]={
1683 0x61, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x2e, 0x61, 0xfffd, 0x2e
1685 static const int32_t offsets
[]={
1686 0, 1, 2, 4, 6, 7, 9, 11, 12, 14, 17, 19, 21, 22, 23, 24
1689 if(!testConvertToUnicode(utf7
, ARRAY_LENGTH(utf7
), unicode
, ARRAY_LENGTH(unicode
), "UTF-7",
1690 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets
, NULL
, 0)
1692 log_err("UTF-7->u with substitute did not match.\n");
1696 log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n");
1698 static const uint8_t
1699 in1
[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff },
1700 in2
[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff },
1701 in3
[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff };
1704 out1
[]={ 0x4e00, 0xfeff },
1705 out2
[]={ 0x004e, 0xfffe },
1706 out3
[]={ 0xfefd, 0x4e00, 0xfeff };
1708 static const int32_t
1709 offsets1
[]={ 2, 4 },
1710 offsets2
[]={ 2, 4 },
1711 offsets3
[]={ 0, 2, 4 };
1713 if(!testConvertToUnicode(in1
, ARRAY_LENGTH(in1
), out1
, ARRAY_LENGTH(out1
), "UTF-16",
1714 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets1
, NULL
, 0)
1716 log_err("UTF-16 (BE BOM)->u with substitute did not match.\n");
1719 if(!testConvertToUnicode(in2
, ARRAY_LENGTH(in2
), out2
, ARRAY_LENGTH(out2
), "UTF-16",
1720 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets2
, NULL
, 0)
1722 log_err("UTF-16 (LE BOM)->u with substitute did not match.\n");
1725 if(!testConvertToUnicode(in3
, ARRAY_LENGTH(in3
), out3
, ARRAY_LENGTH(out3
), "UTF-16",
1726 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets3
, NULL
, 0)
1728 log_err("UTF-16 (no BOM)->u with substitute did not match.\n");
1732 log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n");
1734 static const uint8_t
1735 in1
[]={ 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff },
1736 in2
[]={ 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 },
1737 in3
[]={ 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 },
1738 in4
[]={ 0x00, 0x01, 0x02, 0x03, 0x00, 0x11, 0x12, 0x00, 0x00, 0x00, 0x4e, 0x00 };
1741 out1
[]={ U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfeff },
1742 out2
[]={ U16_LEAD(0x0f1000), U16_TRAIL(0x0f1000), 0xfffe },
1743 out3
[]={ 0xfefe, U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfffd, 0xfffd },
1744 out4
[]={ U16_LEAD(0x10203), U16_TRAIL(0x10203), 0xfffd, 0x4e00 };
1746 static const int32_t
1747 offsets1
[]={ 4, 4, 8 },
1748 offsets2
[]={ 4, 4, 8 },
1749 offsets3
[]={ 0, 4, 4, 8, 12 },
1750 offsets4
[]={ 0, 0, 4, 8 };
1752 if(!testConvertToUnicode(in1
, ARRAY_LENGTH(in1
), out1
, ARRAY_LENGTH(out1
), "UTF-32",
1753 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets1
, NULL
, 0)
1755 log_err("UTF-32 (BE BOM)->u with substitute did not match.\n");
1758 if(!testConvertToUnicode(in2
, ARRAY_LENGTH(in2
), out2
, ARRAY_LENGTH(out2
), "UTF-32",
1759 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets2
, NULL
, 0)
1761 log_err("UTF-32 (LE BOM)->u with substitute did not match.\n");
1764 if(!testConvertToUnicode(in3
, ARRAY_LENGTH(in3
), out3
, ARRAY_LENGTH(out3
), "UTF-32",
1765 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets3
, NULL
, 0)
1767 log_err("UTF-32 (no BOM)->u with substitute did not match.\n");
1770 if(!testConvertToUnicode(in4
, ARRAY_LENGTH(in4
), out4
, ARRAY_LENGTH(out4
), "UTF-32",
1771 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets4
, NULL
, 0)
1773 log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n");
1778 static void TestSubWithValue(int32_t inputsize
, int32_t outputsize
)
1780 UChar sampleText
[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1781 UChar sampleText2
[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1783 const uint8_t expsubwvalIBM_949
[]= {
1784 0x00, 0xb0, 0xa1, 0xb0, 0xa2,
1785 0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 };
1787 const uint8_t expsubwvalIBM_943
[]= {
1788 0x9f, 0xaf, 0x9f, 0xb1,
1789 0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 };
1791 const uint8_t expsubwvalIBM_930
[] = {
1792 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f };
1794 int32_t toIBM949Offs
[] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 };
1795 int32_t toIBM943Offs
[] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 };
1796 int32_t toIBM930Offs
[] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */
1798 gInBufferSize
= inputsize
;
1799 gOutBufferSize
= outputsize
;
1803 #if !UCONFIG_NO_LEGACY_CONVERSION
1804 if(!testConvertFromUnicode(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
1805 expsubwvalIBM_949
, sizeof(expsubwvalIBM_949
), "ibm-949",
1806 UCNV_FROM_U_CALLBACK_ESCAPE
, toIBM949Offs
, NULL
, 0 ))
1807 log_err("u-> ibm-949 with subst with value did not match.\n");
1809 if(!testConvertFromUnicode(sampleText2
, sizeof(sampleText2
)/sizeof(sampleText2
[0]),
1810 expsubwvalIBM_943
, sizeof(expsubwvalIBM_943
), "ibm-943",
1811 UCNV_FROM_U_CALLBACK_ESCAPE
, toIBM943Offs
, NULL
, 0 ))
1812 log_err("u-> ibm-943 with sub with value did not match.\n");
1814 if(!testConvertFromUnicode(sampleText2
, sizeof(sampleText2
)/sizeof(sampleText2
[0]),
1815 expsubwvalIBM_930
, sizeof(expsubwvalIBM_930
), "ibm-930",
1816 UCNV_FROM_U_CALLBACK_ESCAPE
, toIBM930Offs
, NULL
, 0 ))
1817 log_err("u-> ibm-930 with subst with value did not match.\n");
1820 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n");
1822 static const UChar inputTest
[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1823 static const uint8_t toIBM943
[]= { 0x61,
1824 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1825 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1826 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1828 static const int32_t offset
[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
1832 static const UChar euc_jp_inputText
[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, };
1833 static const uint8_t to_euc_jp
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1834 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1835 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1836 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1839 static const int32_t fromEUC_JPOffs
[] ={ 0, 1, 1, 2, 2, 2,
1847 static const UChar euc_tw_inputText
[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1848 static const uint8_t to_euc_tw
[]={
1849 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1850 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1851 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1852 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1853 0x61, 0xe6, 0xca, 0x8a,
1855 static const int32_t from_euc_twOffs
[] ={ 0, 1, 1, 2, 2, 2, 2,
1856 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5,
1860 static const UChar iso_2022_jp_inputText1
[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ;
1861 static const uint8_t to_iso_2022_jp1
[]={
1862 0x1b, 0x24, 0x42, 0x21, 0x21,
1863 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
1864 0x1b, 0x24, 0x42, 0x21, 0x22,
1865 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
1869 static const int32_t from_iso_2022_jpOffs1
[] ={
1877 static const UChar iso_2022_jp_inputText2
[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ;
1878 static const uint8_t to_iso_2022_jp2
[]={
1879 0x1b, 0x24, 0x42, 0x21, 0x21,
1880 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1881 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1882 0x1b, 0x24, 0x42, 0x21, 0x22,
1883 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1884 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1887 static const int32_t from_iso_2022_jpOffs2
[] ={
1898 static const UChar iso_2022_cn_inputText
[]={ 0x0041, 0x3712, 0x0042, };
1899 static const uint8_t to_iso_2022_cn
[]={
1901 0x25, 0x55, 0x33, 0x37, 0x31, 0x32,
1904 static const int32_t from_iso_2022_cnOffs
[] ={
1910 static const UChar iso_2022_cn_inputText4
[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042};
1912 static const uint8_t to_iso_2022_cn4
[]={
1913 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
1914 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1915 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1917 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1918 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1921 static const int32_t from_iso_2022_cnOffs4
[] ={
1933 static const UChar iso_2022_kr_inputText2
[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
1934 static const uint8_t to_iso_2022_kr2
[]={
1935 0x1b, 0x24, 0x29, 0x43,
1938 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1939 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1942 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1943 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1946 static const int32_t from_iso_2022_krOffs2
[] ={
1959 static const UChar iso_2022_kr_inputText
[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 };
1960 static const uint8_t to_iso_2022_kr
[]={
1961 0x1b, 0x24, 0x29, 0x43,
1964 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1967 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1972 static const int32_t from_iso_2022_krOffs
[] ={
1983 static const UChar hz_inputText
[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1985 static const uint8_t to_hz
[]={
1987 0x7e, 0x7b, 0x26, 0x30,
1988 0x7e, 0x7d, 0x25, 0x55, 0x30, 0x36, 0x36, 0x32, /*unassigned*/
1989 0x7e, 0x7b, 0x26, 0x30,
1993 static const int32_t from_hzOffs
[] ={
2001 static const UChar hz_inputText2
[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
2002 static const uint8_t to_hz2
[]={
2004 0x7e, 0x7b, 0x26, 0x30,
2005 0x7e, 0x7d, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
2006 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2007 0x7e, 0x7b, 0x26, 0x30,
2009 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
2010 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2013 static const int32_t from_hzOffs2
[] ={
2026 static const UChar iscii_inputText
[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 };
2027 static const uint8_t to_iscii
[]={
2030 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
2033 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
2038 static const int32_t from_isciiOffs
[] ={
2048 if(!testConvertFromUnicode(inputTest
, sizeof(inputTest
)/sizeof(inputTest
[0]),
2049 toIBM943
, sizeof(toIBM943
), "ibm-943",
2050 UCNV_FROM_U_CALLBACK_ESCAPE
, offset
, NULL
, 0 ))
2051 log_err("u-> ibm-943 with subst with value did not match.\n");
2053 if(!testConvertFromUnicode(euc_jp_inputText
, sizeof(euc_jp_inputText
)/sizeof(euc_jp_inputText
[0]),
2054 to_euc_jp
, sizeof(to_euc_jp
), "euc-jp",
2055 UCNV_FROM_U_CALLBACK_ESCAPE
, fromEUC_JPOffs
, NULL
, 0 ))
2056 log_err("u-> euc-jp with subst with value did not match.\n");
2058 if(!testConvertFromUnicode(euc_tw_inputText
, sizeof(euc_tw_inputText
)/sizeof(euc_tw_inputText
[0]),
2059 to_euc_tw
, sizeof(to_euc_tw
), "euc-tw",
2060 UCNV_FROM_U_CALLBACK_ESCAPE
, from_euc_twOffs
, NULL
, 0 ))
2061 log_err("u-> euc-tw with subst with value did not match.\n");
2063 if(!testConvertFromUnicode(iso_2022_jp_inputText1
, sizeof(iso_2022_jp_inputText1
)/sizeof(iso_2022_jp_inputText1
[0]),
2064 to_iso_2022_jp1
, sizeof(to_iso_2022_jp1
), "iso-2022-jp",
2065 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffs1
, NULL
, 0 ))
2066 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2068 if(!testConvertFromUnicode(iso_2022_jp_inputText1
, sizeof(iso_2022_jp_inputText1
)/sizeof(iso_2022_jp_inputText1
[0]),
2069 to_iso_2022_jp1
, sizeof(to_iso_2022_jp1
), "iso-2022-jp",
2070 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffs1
, NULL
, 0 ))
2071 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2073 if(!testConvertFromUnicode(iso_2022_jp_inputText2
, sizeof(iso_2022_jp_inputText2
)/sizeof(iso_2022_jp_inputText2
[0]),
2074 to_iso_2022_jp2
, sizeof(to_iso_2022_jp2
), "iso-2022-jp",
2075 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffs2
, NULL
, 0 ))
2076 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2080 static const UChar iso_2022_jp_inputText3
[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ;
2081 static const uint8_t to_iso_2022_jp3_v2
[]={
2082 0x1b, 0x24, 0x42, 0x21, 0x21,
2083 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b,
2085 0x1b, 0x24, 0x42, 0x21, 0x22,
2086 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b,
2089 0x26, 0x23, 0x33, 0x36, 0x38, 0x39, 0x32, 0x3b,
2092 static const int32_t from_iso_2022_jpOffs3_v2
[] ={
2094 1,1,1,1,1,1,1,1,1,1,1,1,
2097 4,4,4,4,4,4,4,4,4,4,4,4,
2103 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3
, sizeof(iso_2022_jp_inputText3
)/sizeof(iso_2022_jp_inputText3
[0]),
2104 to_iso_2022_jp3_v2
, sizeof(to_iso_2022_jp3_v2
), "iso-2022-jp",
2105 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffs3_v2
, NULL
, 0,UCNV_ESCAPE_XML_DEC
,U_ZERO_ERROR
))
2106 log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n");
2109 static const UChar iso_2022_cn_inputText5
[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2110 static const uint8_t to_iso_2022_cn5_v2
[]={
2111 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2112 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44,
2113 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36,
2115 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44,
2116 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36,
2118 0x5c, 0x75, 0x30, 0x39, 0x30, 0x32,
2120 static const int32_t from_iso_2022_cnOffs5_v2
[] ={
2130 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5
, sizeof(iso_2022_cn_inputText5
)/sizeof(iso_2022_cn_inputText5
[0]),
2131 to_iso_2022_cn5_v2
, sizeof(to_iso_2022_cn5_v2
), "iso-2022-cn",
2132 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs5_v2
, NULL
, 0,UCNV_ESCAPE_JAVA
,U_ZERO_ERROR
))
2133 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n");
2137 static const UChar iso_2022_cn_inputText6
[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2138 static const uint8_t to_iso_2022_cn6_v2
[]={
2139 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2140 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d,
2142 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d,
2144 0x7b, 0x55, 0x2b, 0x30, 0x39, 0x30, 0x32, 0x7d
2146 static const int32_t from_iso_2022_cnOffs6_v2
[] ={
2147 0, 0, 0, 0, 0, 0, 0,
2148 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2150 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2152 7, 7, 7, 7, 7, 7, 7, 7,
2154 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6
, sizeof(iso_2022_cn_inputText6
)/sizeof(iso_2022_cn_inputText6
[0]),
2155 to_iso_2022_cn6_v2
, sizeof(to_iso_2022_cn6_v2
), "iso-2022-cn",
2156 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs6_v2
, NULL
, 0,UCNV_ESCAPE_UNICODE
,U_ZERO_ERROR
))
2157 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n");
2161 static const UChar iso_2022_cn_inputText7
[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2162 static const uint8_t to_iso_2022_cn7_v2
[]={
2163 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2164 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2166 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2167 0x42, 0x25, 0x55, 0x30, 0x39, 0x30, 0x32,
2169 static const int32_t from_iso_2022_cnOffs7_v2
[] ={
2170 0, 0, 0, 0, 0, 0, 0,
2171 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2173 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2177 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7
, sizeof(iso_2022_cn_inputText7
)/sizeof(iso_2022_cn_inputText7
[0]),
2178 to_iso_2022_cn7_v2
, sizeof(to_iso_2022_cn7_v2
), "iso-2022-cn",
2179 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs7_v2
, NULL
, 0,"K" ,U_ZERO_ERROR
))
2180 log_err("u-> iso-2022-cn with sub & K did not match.\n");
2184 static const UChar iso_2022_cn_inputText8
[]={
2192 static const uint8_t to_iso_2022_cn8_v2
[]={
2193 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2194 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20,
2196 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20,
2197 0x5c, 0x31, 0x30, 0x46, 0x46, 0x46, 0x46, 0x20,
2199 0x5c, 0x39, 0x30, 0x32, 0x20
2201 static const int32_t from_iso_2022_cnOffs8_v2
[] ={
2202 0, 0, 0, 0, 0, 0, 0,
2203 1, 1, 1, 1, 1, 1, 1, 1,
2205 4, 4, 4, 4, 4, 4, 4, 4,
2206 6, 6, 6, 6, 6, 6, 6, 6,
2210 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8
, sizeof(iso_2022_cn_inputText8
)/sizeof(iso_2022_cn_inputText8
[0]),
2211 to_iso_2022_cn8_v2
, sizeof(to_iso_2022_cn8_v2
), "iso-2022-cn",
2212 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs8_v2
, NULL
, 0,UCNV_ESCAPE_CSS2
,U_ZERO_ERROR
))
2213 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not match.\n");
2217 static const uint8_t to_iso_2022_cn4_v3
[]={
2218 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2219 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36,
2221 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36,
2226 static const int32_t from_iso_2022_cnOffs4_v3
[] ={
2228 1,1,1,1,1,1,1,1,1,1,1,
2231 4,4,4,4,4,4,4,4,4,4,4,
2236 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4
, sizeof(iso_2022_cn_inputText4
)/sizeof(iso_2022_cn_inputText4
[0]),
2237 to_iso_2022_cn4_v3
, sizeof(to_iso_2022_cn4_v3
), "iso-2022-cn",
2238 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs4_v3
, NULL
, 0,UCNV_ESCAPE_C
,U_ZERO_ERROR
))
2240 log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n");
2243 if(!testConvertFromUnicode(iso_2022_cn_inputText
, sizeof(iso_2022_cn_inputText
)/sizeof(iso_2022_cn_inputText
[0]),
2244 to_iso_2022_cn
, sizeof(to_iso_2022_cn
), "iso-2022-cn",
2245 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs
, NULL
, 0 ))
2246 log_err("u-> iso_2022_cn with subst with value did not match.\n");
2248 if(!testConvertFromUnicode(iso_2022_cn_inputText4
, sizeof(iso_2022_cn_inputText4
)/sizeof(iso_2022_cn_inputText4
[0]),
2249 to_iso_2022_cn4
, sizeof(to_iso_2022_cn4
), "iso-2022-cn",
2250 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs4
, NULL
, 0 ))
2251 log_err("u-> iso_2022_cn with subst with value did not match.\n");
2252 if(!testConvertFromUnicode(iso_2022_kr_inputText
, sizeof(iso_2022_kr_inputText
)/sizeof(iso_2022_kr_inputText
[0]),
2253 to_iso_2022_kr
, sizeof(to_iso_2022_kr
), "iso-2022-kr",
2254 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_krOffs
, NULL
, 0 ))
2255 log_err("u-> iso_2022_kr with subst with value did not match.\n");
2256 if(!testConvertFromUnicode(iso_2022_kr_inputText2
, sizeof(iso_2022_kr_inputText2
)/sizeof(iso_2022_kr_inputText2
[0]),
2257 to_iso_2022_kr2
, sizeof(to_iso_2022_kr2
), "iso-2022-kr",
2258 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_krOffs2
, NULL
, 0 ))
2259 log_err("u-> iso_2022_kr2 with subst with value did not match.\n");
2260 if(!testConvertFromUnicode(hz_inputText
, sizeof(hz_inputText
)/sizeof(hz_inputText
[0]),
2261 to_hz
, sizeof(to_hz
), "HZ",
2262 UCNV_FROM_U_CALLBACK_ESCAPE
, from_hzOffs
, NULL
, 0 ))
2263 log_err("u-> hz with subst with value did not match.\n");
2264 if(!testConvertFromUnicode(hz_inputText2
, sizeof(hz_inputText2
)/sizeof(hz_inputText2
[0]),
2265 to_hz2
, sizeof(to_hz2
), "HZ",
2266 UCNV_FROM_U_CALLBACK_ESCAPE
, from_hzOffs2
, NULL
, 0 ))
2267 log_err("u-> hz with subst with value did not match.\n");
2269 if(!testConvertFromUnicode(iscii_inputText
, sizeof(iscii_inputText
)/sizeof(iscii_inputText
[0]),
2270 to_iscii
, sizeof(to_iscii
), "ISCII,version=0",
2271 UCNV_FROM_U_CALLBACK_ESCAPE
, from_isciiOffs
, NULL
, 0 ))
2272 log_err("u-> iscii with subst with value did not match.\n");
2276 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
2279 #if !UCONFIG_NO_LEGACY_CONVERSION
2280 static const uint8_t sampleTxtToU
[]= { 0x00, 0x9f, 0xaf,
2281 0x81, 0xad, /*unassigned*/
2283 static const UChar IBM_943toUnicode
[] = { 0x0000, 0x6D63,
2284 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
2286 static const int32_t fromIBM943Offs
[] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
2289 static const uint8_t sampleTxt_EUC_JP
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
2290 0x8f, 0xda, 0xa1, /*unassigned*/
2293 static const UChar EUC_JPtoUnicode
[]={ 0x0061, 0x4edd, 0x5bec,
2294 0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31,
2296 static const int32_t fromEUC_JPOffs
[] ={ 0, 1, 3,
2297 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
2302 static const uint8_t sampleTxt_euc_tw
[]={
2303 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
2304 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
2307 static const UChar euc_twtoUnicode
[]={ 0x0061, 0x2295, 0x5BF2,
2308 0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43,
2310 static const int32_t from_euc_twOffs
[] ={ 0, 1, 3,
2311 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2315 static const uint8_t sampleTxt_iso_2022_jp
[]={
2316 0x1b, 0x28, 0x42, 0x41,
2317 0x1b, 0x24, 0x42, 0x2A, 0x44, /*unassigned*/
2318 0x1b, 0x28, 0x42, 0x42,
2321 static const UChar iso_2022_jptoUnicode
[]={ 0x41,0x25,0x58,0x32,0x41,0x25,0x58,0x34,0x34, 0x42 };
2322 static const int32_t from_iso_2022_jpOffs
[] ={ 3, 7, 7, 7, 7, 7, 7, 7, 7, 12 };
2325 static const uint8_t sampleTxt_iso_2022_cn
[]={
2327 0x1B, 0x24, 0x29, 0x47,
2328 0x0E, 0x40, 0x6c, /*unassigned*/
2332 static const UChar iso_2022_cntoUnicode
[]={ 0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 };
2333 static const int32_t from_iso_2022_cnOffs
[] ={ 1, 2, 8, 8, 8, 8, 8, 8, 8, 8, 11 };
2336 static const uint8_t sampleTxt_iso_2022_kr
[]={
2337 0x1b, 0x24, 0x29, 0x43,
2345 static const UChar iso_2022_krtoUnicode
[]={ 0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43};
2346 static const int32_t from_iso_2022_krOffs
[] ={ 4, 6, 6, 6, 6, 6, 6, 6, 6, 9, 12, 13 , 14 };
2349 static const uint8_t sampleTxt_hz
[]={
2351 0x7e, 0x7b, 0x26, 0x30,
2352 0x7f, 0x1E, /*unassigned*/
2355 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/
2358 static const UChar hztoUnicode
[]={
2361 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2364 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2367 static const int32_t from_hzOffs
[] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18, };
2371 static const uint8_t sampleTxt_iscii
[]={
2374 0xEB, /*unassigned*/
2377 0xEC, /*unassigned*/
2380 static const UChar isciitoUnicode
[]={
2383 0x25, 0x58, 0x45, 0x42,
2386 0x25, 0x58, 0x45, 0x43,
2389 static const int32_t from_isciiOffs
[] ={0,1,2,2,2,2,3,4,5,5,5,5,6 };
2393 static const uint8_t sampleTxtUTF8
[]={
2395 0xC2, 0x7E, /* truncated char */
2397 0xE0, 0xB5, 0x7E, /* truncated char */
2400 static const UChar UTF8ToUnicode
[]={
2401 0x0020, 0x0064, 0x0050,
2402 0x0025, 0x0058, 0x0043, 0x0032, 0x007E, /* \xC2~ */
2404 0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E,
2407 static const int32_t fromUTF8
[] = {
2411 6, 6, 6, 6, 6, 6, 6, 6, 8,
2414 static const UChar UTF8ToUnicodeXML_DEC
[]={
2415 0x0020, 0x0064, 0x0050,
2416 0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E, /* Â~ */
2418 0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E,
2421 static const int32_t fromUTF8XML_DEC
[] = {
2423 3, 3, 3, 3, 3, 3, 4,
2425 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8,
2430 #if !UCONFIG_NO_LEGACY_CONVERSION
2431 if(!testConvertToUnicode(sampleTxtToU
, sizeof(sampleTxtToU
),
2432 IBM_943toUnicode
, sizeof(IBM_943toUnicode
)/sizeof(IBM_943toUnicode
[0]),"ibm-943",
2433 UCNV_TO_U_CALLBACK_ESCAPE
, fromIBM943Offs
, NULL
, 0 ))
2434 log_err("ibm-943->u with substitute with value did not match.\n");
2436 if(!testConvertToUnicode(sampleTxt_EUC_JP
, sizeof(sampleTxt_EUC_JP
),
2437 EUC_JPtoUnicode
, sizeof(EUC_JPtoUnicode
)/sizeof(EUC_JPtoUnicode
[0]),"euc-jp",
2438 UCNV_TO_U_CALLBACK_ESCAPE
, fromEUC_JPOffs
, NULL
, 0))
2439 log_err("euc-jp->u with substitute with value did not match.\n");
2441 if(!testConvertToUnicode(sampleTxt_euc_tw
, sizeof(sampleTxt_euc_tw
),
2442 euc_twtoUnicode
, sizeof(euc_twtoUnicode
)/sizeof(euc_twtoUnicode
[0]),"euc-tw",
2443 UCNV_TO_U_CALLBACK_ESCAPE
, from_euc_twOffs
, NULL
, 0))
2444 log_err("euc-tw->u with substitute with value did not match.\n");
2446 if(!testConvertToUnicode(sampleTxt_iso_2022_jp
, sizeof(sampleTxt_iso_2022_jp
),
2447 iso_2022_jptoUnicode
, sizeof(iso_2022_jptoUnicode
)/sizeof(iso_2022_jptoUnicode
[0]),"iso-2022-jp",
2448 UCNV_TO_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffs
, NULL
, 0))
2449 log_err("iso-2022-jp->u with substitute with value did not match.\n");
2451 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp
, sizeof(sampleTxt_iso_2022_jp
),
2452 iso_2022_jptoUnicode
, sizeof(iso_2022_jptoUnicode
)/sizeof(iso_2022_jptoUnicode
[0]),"iso-2022-jp",
2453 UCNV_TO_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffs
, NULL
, 0,"K",U_ZERO_ERROR
))
2454 log_err("iso-2022-jp->u with substitute with value did not match.\n");
2456 {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */
2458 static const UChar iso_2022_jptoUnicodeDec
[]={
2460 0x0026, 0x0023, 0x0034, 0x0032, 0x003b,
2461 0x0026, 0x0023, 0x0036, 0x0038, 0x003b,
2463 static const int32_t from_iso_2022_jpOffsDec
[] ={ 3,7,7,7,7,7,7,7,7,7,7,12, };
2464 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp
, sizeof(sampleTxt_iso_2022_jp
),
2465 iso_2022_jptoUnicodeDec
, sizeof(iso_2022_jptoUnicodeDec
)/sizeof(iso_2022_jptoUnicode
[0]),"iso-2022-jp",
2466 UCNV_TO_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffsDec
, NULL
, 0,UCNV_ESCAPE_XML_DEC
,U_ZERO_ERROR
))
2467 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n");
2470 static const UChar iso_2022_jptoUnicodeHex
[]={
2472 0x0026, 0x0023, 0x0078, 0x0032, 0x0041, 0x003b,
2473 0x0026, 0x0023, 0x0078, 0x0034, 0x0034, 0x003b,
2475 static const int32_t from_iso_2022_jpOffsHex
[] ={ 3,7,7,7,7,7,7,7,7,7,7,7,7,12 };
2476 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp
, sizeof(sampleTxt_iso_2022_jp
),
2477 iso_2022_jptoUnicodeHex
, sizeof(iso_2022_jptoUnicodeHex
)/sizeof(iso_2022_jptoUnicode
[0]),"iso-2022-jp",
2478 UCNV_TO_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffsHex
, NULL
, 0,UCNV_ESCAPE_XML_HEX
,U_ZERO_ERROR
))
2479 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n");
2482 static const UChar iso_2022_jptoUnicodeC
[]={
2484 0x005C, 0x0078, 0x0032, 0x0041,
2485 0x005C, 0x0078, 0x0034, 0x0034,
2487 int32_t from_iso_2022_jpOffsC
[] ={ 3,7,7,7,7,7,7,7,7,12 };
2488 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp
, sizeof(sampleTxt_iso_2022_jp
),
2489 iso_2022_jptoUnicodeC
, sizeof(iso_2022_jptoUnicodeC
)/sizeof(iso_2022_jptoUnicode
[0]),"iso-2022-jp",
2490 UCNV_TO_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffsC
, NULL
, 0,UCNV_ESCAPE_C
,U_ZERO_ERROR
))
2491 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n");
2494 if(!testConvertToUnicode(sampleTxt_iso_2022_cn
, sizeof(sampleTxt_iso_2022_cn
),
2495 iso_2022_cntoUnicode
, sizeof(iso_2022_cntoUnicode
)/sizeof(iso_2022_cntoUnicode
[0]),"iso-2022-cn",
2496 UCNV_TO_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs
, NULL
, 0))
2497 log_err("iso-2022-cn->u with substitute with value did not match.\n");
2499 if(!testConvertToUnicode(sampleTxt_iso_2022_kr
, sizeof(sampleTxt_iso_2022_kr
),
2500 iso_2022_krtoUnicode
, sizeof(iso_2022_krtoUnicode
)/sizeof(iso_2022_krtoUnicode
[0]),"iso-2022-kr",
2501 UCNV_TO_U_CALLBACK_ESCAPE
, from_iso_2022_krOffs
, NULL
, 0))
2502 log_err("iso-2022-kr->u with substitute with value did not match.\n");
2504 if(!testConvertToUnicode(sampleTxt_hz
, sizeof(sampleTxt_hz
),
2505 hztoUnicode
, sizeof(hztoUnicode
)/sizeof(hztoUnicode
[0]),"HZ",
2506 UCNV_TO_U_CALLBACK_ESCAPE
, from_hzOffs
, NULL
, 0))
2507 log_err("hz->u with substitute with value did not match.\n");
2509 if(!testConvertToUnicode(sampleTxt_iscii
, sizeof(sampleTxt_iscii
),
2510 isciitoUnicode
, sizeof(isciitoUnicode
)/sizeof(isciitoUnicode
[0]),"ISCII,version=0",
2511 UCNV_TO_U_CALLBACK_ESCAPE
, from_isciiOffs
, NULL
, 0))
2512 log_err("ISCII ->u with substitute with value did not match.\n");
2515 if(!testConvertToUnicode(sampleTxtUTF8
, sizeof(sampleTxtUTF8
),
2516 UTF8ToUnicode
, sizeof(UTF8ToUnicode
)/sizeof(UTF8ToUnicode
[0]),"UTF-8",
2517 UCNV_TO_U_CALLBACK_ESCAPE
, fromUTF8
, NULL
, 0))
2518 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2519 if(!testConvertToUnicodeWithContext(sampleTxtUTF8
, sizeof(sampleTxtUTF8
),
2520 UTF8ToUnicodeXML_DEC
, sizeof(UTF8ToUnicodeXML_DEC
)/sizeof(UTF8ToUnicodeXML_DEC
[0]),"UTF-8",
2521 UCNV_TO_U_CALLBACK_ESCAPE
, fromUTF8XML_DEC
, NULL
, 0, UCNV_ESCAPE_XML_DEC
, U_ZERO_ERROR
))
2522 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2526 #if !UCONFIG_NO_LEGACY_CONVERSION
2527 static void TestLegalAndOthers(int32_t inputsize
, int32_t outputsize
)
2529 static const UChar legalText
[] = { 0x0000, 0xAC00, 0xAC01, 0xD700 };
2530 static const uint8_t templegal949
[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
2531 static const int32_t to949legal
[] = {0, 1, 1, 2, 2, 3, 3};
2534 static const uint8_t text943
[] = {
2535 0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a };
2536 static const UChar toUnicode943sub
[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22, 0x5b57 };
2537 static const UChar toUnicode943skip
[]= { 0x304b, 0x20, 0x0061, 0x6f22, 0x5b57 };
2538 static const UChar toUnicode943stop
[]= { 0x304b};
2540 static const int32_t fromIBM943Offssub
[] = { 0, 2, 3, 4, 5, 7 };
2541 static const int32_t fromIBM943Offsskip
[] = { 0, 3, 4, 5, 7 };
2542 static const int32_t fromIBM943Offsstop
[] = { 0};
2544 gInBufferSize
= inputsize
;
2545 gOutBufferSize
= outputsize
;
2546 /*checking with a legal value*/
2547 if(!testConvertFromUnicode(legalText
, sizeof(legalText
)/sizeof(legalText
[0]),
2548 templegal949
, sizeof(templegal949
), "ibm-949",
2549 UCNV_FROM_U_CALLBACK_SKIP
, to949legal
, NULL
, 0 ))
2550 log_err("u-> ibm-949 with skip did not match.\n");
2552 /*checking illegal value for ibm-943 with substitute*/
2553 if(!testConvertToUnicode(text943
, sizeof(text943
),
2554 toUnicode943sub
, sizeof(toUnicode943sub
)/sizeof(toUnicode943sub
[0]),"ibm-943",
2555 UCNV_TO_U_CALLBACK_SUBSTITUTE
, fromIBM943Offssub
, NULL
, 0 ))
2556 log_err("ibm-943->u with subst did not match.\n");
2557 /*checking illegal value for ibm-943 with skip */
2558 if(!testConvertToUnicode(text943
, sizeof(text943
),
2559 toUnicode943skip
, sizeof(toUnicode943skip
)/sizeof(toUnicode943skip
[0]),"ibm-943",
2560 UCNV_TO_U_CALLBACK_SKIP
, fromIBM943Offsskip
, NULL
, 0 ))
2561 log_err("ibm-943->u with skip did not match.\n");
2563 /*checking illegal value for ibm-943 with stop */
2564 if(!testConvertToUnicode(text943
, sizeof(text943
),
2565 toUnicode943stop
, sizeof(toUnicode943stop
)/sizeof(toUnicode943stop
[0]),"ibm-943",
2566 UCNV_TO_U_CALLBACK_STOP
, fromIBM943Offsstop
, NULL
, 0 ))
2567 log_err("ibm-943->u with stop did not match.\n");
2571 static void TestSingleByte(int32_t inputsize
, int32_t outputsize
)
2573 static const uint8_t sampleText
[] = {
2574 0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82,
2576 static const UChar toUnicode943sub
[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 };
2577 static const int32_t fromIBM943Offssub
[] = { 0, 2, 3, 4, 5, 6, 7, 8 };
2578 /*checking illegal value for ibm-943 with substitute*/
2579 gInBufferSize
= inputsize
;
2580 gOutBufferSize
= outputsize
;
2582 if(!testConvertToUnicode(sampleText
, sizeof(sampleText
),
2583 toUnicode943sub
, sizeof(toUnicode943sub
)/sizeof(toUnicode943sub
[0]),"ibm-943",
2584 UCNV_TO_U_CALLBACK_SUBSTITUTE
, fromIBM943Offssub
, NULL
, 0 ))
2585 log_err("ibm-943->u with subst did not match.\n");
2588 static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize
, int32_t outputsize
)
2591 static const UChar ebcdic_inputTest
[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 };
2592 static const uint8_t toIBM930
[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 };
2593 static const int32_t offset_930
[]= { 0, 1, 1, 1, 2, 2, 3, 4, 4, 4, 5, 5 };
2594 /* s SO doubl SI sng s SO fe fe SI s */
2596 /*EBCDIC_STATEFUL with subChar=3f*/
2597 static const uint8_t toIBM930_subvaried
[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 };
2598 static const int32_t offset_930_subvaried
[]= { 0, 1, 1, 1, 2, 2, 3, 4, 5 };
2599 static const char mySubChar
[]={ 0x3f};
2601 gInBufferSize
= inputsize
;
2602 gOutBufferSize
= outputsize
;
2604 if(!testConvertFromUnicode(ebcdic_inputTest
, sizeof(ebcdic_inputTest
)/sizeof(ebcdic_inputTest
[0]),
2605 toIBM930
, sizeof(toIBM930
), "ibm-930",
2606 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, offset_930
, NULL
, 0 ))
2607 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n");
2609 if(!testConvertFromUnicode(ebcdic_inputTest
, sizeof(ebcdic_inputTest
)/sizeof(ebcdic_inputTest
[0]),
2610 toIBM930_subvaried
, sizeof(toIBM930_subvaried
), "ibm-930",
2611 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, offset_930_subvaried
, mySubChar
, 1 ))
2612 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n");
2616 UBool
testConvertFromUnicode(const UChar
*source
, int sourceLen
, const uint8_t *expect
, int expectLen
,
2617 const char *codepage
, UConverterFromUCallback callback
, const int32_t *expectOffsets
,
2618 const char *mySubChar
, int8_t len
)
2622 UErrorCode status
= U_ZERO_ERROR
;
2623 UConverter
*conv
= 0;
2624 char junkout
[NEW_MAX_BUFFER
]; /* FIX */
2625 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
2631 int32_t realBufferSize
;
2632 char *realBufferEnd
;
2633 const UChar
*realSourceEnd
;
2634 const UChar
*sourceLimit
;
2635 UBool checkOffsets
= TRUE
;
2638 char offset_str
[9999];
2640 UConverterFromUCallback oldAction
= NULL
;
2641 const void* oldContext
= NULL
;
2644 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
2645 junkout
[i
] = (char)0xF0;
2646 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
2648 setNuConvTestName(codepage
, "FROM");
2650 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage
, gInBufferSize
,
2653 conv
= ucnv_open(codepage
, &status
);
2654 if(U_FAILURE(status
))
2656 log_data_err("Couldn't open converter %s\n",codepage
);
2660 log_verbose("Converter opened..\n");
2662 /*----setting the callback routine----*/
2663 ucnv_setFromUCallBack (conv
, callback
, NULL
, &oldAction
, &oldContext
, &status
);
2664 if (U_FAILURE(status
))
2666 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status
));
2668 /*------------------------*/
2669 /*setting the subChar*/
2670 if(mySubChar
!= NULL
){
2671 ucnv_setSubstChars(conv
, mySubChar
, len
, &status
);
2672 if (U_FAILURE(status
)) {
2673 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status
));
2682 realBufferSize
= (sizeof(junkout
)/sizeof(junkout
[0]));
2683 realBufferEnd
= junkout
+ realBufferSize
;
2684 realSourceEnd
= source
+ sourceLen
;
2686 if ( gOutBufferSize
!= realBufferSize
)
2687 checkOffsets
= FALSE
;
2689 if( gInBufferSize
!= NEW_MAX_BUFFER
)
2690 checkOffsets
= FALSE
;
2694 end
= nct_min(targ
+ gOutBufferSize
, realBufferEnd
);
2695 sourceLimit
= nct_min(src
+ gInBufferSize
, realSourceEnd
);
2697 doFlush
= (UBool
)(sourceLimit
== realSourceEnd
);
2699 if(targ
== realBufferEnd
)
2701 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ
, gNuConvTestName
);
2704 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src
,sourceLimit
, targ
,end
, doFlush
?"TRUE":"FALSE");
2707 status
= U_ZERO_ERROR
;
2709 ucnv_fromUnicode (conv
,
2714 checkOffsets
? offs
: NULL
,
2715 doFlush
, /* flush if we're at the end of the input data */
2717 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && (sourceLimit
< realSourceEnd
)) );
2720 if(status
==U_INVALID_CHAR_FOUND
|| status
== U_ILLEGAL_CHAR_FOUND
){
2721 UChar errChars
[50]; /* should be sufficient */
2723 UErrorCode err
= U_ZERO_ERROR
;
2724 const UChar
* limit
= NULL
;
2725 const UChar
* start
= NULL
;
2726 ucnv_getInvalidUChars(conv
,errChars
, &errLen
, &err
);
2728 log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err
));
2730 /* src points to limit of invalid chars */
2732 /* length of in invalid chars should be equal to returned length*/
2733 start
= src
- errLen
;
2734 if(u_strncmp(errChars
,start
,errLen
)!=0){
2735 log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv
,&err
));
2738 /* allow failure codes for the stop callback */
2739 if(U_FAILURE(status
) &&
2740 (callback
!= UCNV_FROM_U_CALLBACK_STOP
|| (status
!= U_INVALID_CHAR_FOUND
&& status
!= U_ILLEGAL_CHAR_FOUND
)))
2742 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status
), gNuConvTestName
);
2746 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
2747 sourceLen
, targ
-junkout
);
2748 if(getTestOption(VERBOSITY_OPTION
))
2753 for(p
= junkout
;p
<targ
;p
++)
2755 sprintf(junk
+ strlen(junk
), "0x%02x, ", (0xFF) & (unsigned int)*p
);
2756 sprintf(offset_str
+ strlen(offset_str
), "0x%02x, ", (0xFF) & (unsigned int)junokout
[p
-junkout
]);
2760 printSeq(expect
, expectLen
);
2763 log_verbose("\nOffsets:");
2764 log_verbose(offset_str
);
2771 if(expectLen
!= targ
-junkout
)
2773 log_err("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
2774 log_verbose("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
2775 printSeqErr((const uint8_t *)junkout
, (int32_t)(targ
-junkout
));
2776 printSeqErr(expect
, expectLen
);
2780 if (checkOffsets
&& (expectOffsets
!= 0) )
2782 log_verbose("comparing %d offsets..\n", targ
-junkout
);
2783 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t) )){
2784 log_err("did not get the expected offsets while %s \n", gNuConvTestName
);
2785 log_err("Got Output : ");
2786 printSeqErr((const uint8_t *)junkout
, (int32_t)(targ
-junkout
));
2787 log_err("Got Offsets: ");
2788 for(p
=junkout
;p
<targ
;p
++)
2789 log_err("%d,", junokout
[p
-junkout
]);
2791 log_err("Expected Offsets: ");
2792 for(i
=0; i
<(targ
-junkout
); i
++)
2793 log_err("%d,", expectOffsets
[i
]);
2799 if(!memcmp(junkout
, expect
, expectLen
))
2801 log_verbose("String matches! %s\n", gNuConvTestName
);
2806 log_err("String does not match. %s\n", gNuConvTestName
);
2807 log_err("source: ");
2808 printUSeqErr(source
, sourceLen
);
2810 printSeqErr((const uint8_t *)junkout
, expectLen
);
2811 log_err("Expected: ");
2812 printSeqErr(expect
, expectLen
);
2817 UBool
testConvertToUnicode( const uint8_t *source
, int sourcelen
, const UChar
*expect
, int expectlen
,
2818 const char *codepage
, UConverterToUCallback callback
, const int32_t *expectOffsets
,
2819 const char *mySubChar
, int8_t len
)
2821 UErrorCode status
= U_ZERO_ERROR
;
2822 UConverter
*conv
= 0;
2823 UChar junkout
[NEW_MAX_BUFFER
]; /* FIX */
2824 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
2826 const char *realSourceEnd
;
2827 const char *srcLimit
;
2832 UBool checkOffsets
= TRUE
;
2834 char offset_str
[9999];
2836 UConverterToUCallback oldAction
= NULL
;
2837 const void* oldContext
= NULL
;
2839 int32_t realBufferSize
;
2840 UChar
*realBufferEnd
;
2843 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
2844 junkout
[i
] = 0xFFFE;
2846 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
2849 setNuConvTestName(codepage
, "TO");
2851 log_verbose("\n========= %s\n", gNuConvTestName
);
2853 conv
= ucnv_open(codepage
, &status
);
2854 if(U_FAILURE(status
))
2856 log_data_err("Couldn't open converter %s\n",gNuConvTestName
);
2860 log_verbose("Converter opened..\n");
2862 src
= (const char *)source
;
2866 realBufferSize
= (sizeof(junkout
)/sizeof(junkout
[0]));
2867 realBufferEnd
= junkout
+ realBufferSize
;
2868 realSourceEnd
= src
+ sourcelen
;
2869 /*----setting the callback routine----*/
2870 ucnv_setToUCallBack (conv
, callback
, NULL
, &oldAction
, &oldContext
, &status
);
2871 if (U_FAILURE(status
))
2873 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status
));
2875 /*-------------------------------------*/
2876 /*setting the subChar*/
2877 if(mySubChar
!= NULL
){
2878 ucnv_setSubstChars(conv
, mySubChar
, len
, &status
);
2879 if (U_FAILURE(status
)) {
2880 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status
));
2886 if ( gOutBufferSize
!= realBufferSize
)
2887 checkOffsets
= FALSE
;
2889 if( gInBufferSize
!= NEW_MAX_BUFFER
)
2890 checkOffsets
= FALSE
;
2894 end
= nct_min( targ
+ gOutBufferSize
, realBufferEnd
);
2895 srcLimit
= nct_min(realSourceEnd
, src
+ gInBufferSize
);
2897 if(targ
== realBufferEnd
)
2899 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ
,gNuConvTestName
);
2902 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ
,end
);
2906 status
= U_ZERO_ERROR
;
2908 ucnv_toUnicode (conv
,
2911 (const char **)&src
,
2912 (const char *)srcLimit
,
2913 checkOffsets
? offs
: NULL
,
2914 (UBool
)(srcLimit
== realSourceEnd
), /* flush if we're at the end of the source data */
2916 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && (srcLimit
< realSourceEnd
)) ); /* while we just need another buffer */
2918 if(status
==U_INVALID_CHAR_FOUND
|| status
== U_ILLEGAL_CHAR_FOUND
){
2919 char errChars
[50]; /* should be sufficient */
2921 UErrorCode err
= U_ZERO_ERROR
;
2922 const char* limit
= NULL
;
2923 const char* start
= NULL
;
2924 ucnv_getInvalidChars(conv
,errChars
, &errLen
, &err
);
2926 log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err
));
2928 /* src points to limit of invalid chars */
2930 /* length of in invalid chars should be equal to returned length*/
2931 start
= src
- errLen
;
2932 if(uprv_strncmp(errChars
,start
,errLen
)!=0){
2933 log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv
,&err
));
2936 /* allow failure codes for the stop callback */
2937 if(U_FAILURE(status
) &&
2938 (callback
!= UCNV_TO_U_CALLBACK_STOP
|| (status
!= U_INVALID_CHAR_FOUND
&& status
!= U_ILLEGAL_CHAR_FOUND
&& status
!= U_TRUNCATED_CHAR_FOUND
)))
2940 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status
), gNuConvTestName
);
2944 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
2945 sourcelen
, targ
-junkout
);
2946 if(getTestOption(VERBOSITY_OPTION
))
2952 for(p
= junkout
;p
<targ
;p
++)
2954 sprintf(junk
+ strlen(junk
), "0x%04x, ", (0xFFFF) & (unsigned int)*p
);
2955 sprintf(offset_str
+ strlen(offset_str
), "0x%04x, ", (0xFFFF) & (unsigned int)junokout
[p
-junkout
]);
2959 printUSeq(expect
, expectlen
);
2962 log_verbose("\nOffsets:");
2963 log_verbose(offset_str
);
2969 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen
,expectlen
*2);
2971 if (checkOffsets
&& (expectOffsets
!= 0))
2973 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t)))
2975 log_err("did not get the expected offsets while %s \n", gNuConvTestName
);
2976 log_err("Got offsets: ");
2977 for(p
=junkout
;p
<targ
;p
++)
2978 log_err(" %2d,", junokout
[p
-junkout
]);
2980 log_err("Expected offsets: ");
2981 for(i
=0; i
<(targ
-junkout
); i
++)
2982 log_err(" %2d,", expectOffsets
[i
]);
2984 log_err("Got output: ");
2985 for(i
=0; i
<(targ
-junkout
); i
++)
2986 log_err("0x%04x,", junkout
[i
]);
2988 log_err("From source: ");
2989 for(i
=0; i
<(src
-(const char *)source
); i
++)
2990 log_err(" 0x%02x,", (unsigned char)source
[i
]);
2995 if(!memcmp(junkout
, expect
, expectlen
*2))
2997 log_verbose("Matches!\n");
3002 log_err("String does not match. %s\n", gNuConvTestName
);
3003 log_verbose("String does not match. %s\n", gNuConvTestName
);
3005 printUSeqErr(junkout
, expectlen
);
3006 log_err("Expected: ");
3007 printUSeqErr(expect
, expectlen
);
3013 UBool
testConvertFromUnicodeWithContext(const UChar
*source
, int sourceLen
, const uint8_t *expect
, int expectLen
,
3014 const char *codepage
, UConverterFromUCallback callback
, const int32_t *expectOffsets
,
3015 const char *mySubChar
, int8_t len
, const void* context
, UErrorCode expectedError
)
3019 UErrorCode status
= U_ZERO_ERROR
;
3020 UConverter
*conv
= 0;
3021 char junkout
[NEW_MAX_BUFFER
]; /* FIX */
3022 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
3028 int32_t realBufferSize
;
3029 char *realBufferEnd
;
3030 const UChar
*realSourceEnd
;
3031 const UChar
*sourceLimit
;
3032 UBool checkOffsets
= TRUE
;
3035 char offset_str
[9999];
3037 UConverterFromUCallback oldAction
= NULL
;
3038 const void* oldContext
= NULL
;
3041 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
3042 junkout
[i
] = (char)0xF0;
3043 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
3045 setNuConvTestName(codepage
, "FROM");
3047 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage
, gInBufferSize
,
3050 conv
= ucnv_open(codepage
, &status
);
3051 if(U_FAILURE(status
))
3053 log_data_err("Couldn't open converter %s\n",codepage
);
3054 return TRUE
; /* Because the err has already been logged. */
3057 log_verbose("Converter opened..\n");
3059 /*----setting the callback routine----*/
3060 ucnv_setFromUCallBack (conv
, callback
, context
, &oldAction
, &oldContext
, &status
);
3061 if (U_FAILURE(status
))
3063 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status
));
3065 /*------------------------*/
3066 /*setting the subChar*/
3067 if(mySubChar
!= NULL
){
3068 ucnv_setSubstChars(conv
, mySubChar
, len
, &status
);
3069 if (U_FAILURE(status
)) {
3070 log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status
));
3079 realBufferSize
= (sizeof(junkout
)/sizeof(junkout
[0]));
3080 realBufferEnd
= junkout
+ realBufferSize
;
3081 realSourceEnd
= source
+ sourceLen
;
3083 if ( gOutBufferSize
!= realBufferSize
)
3084 checkOffsets
= FALSE
;
3086 if( gInBufferSize
!= NEW_MAX_BUFFER
)
3087 checkOffsets
= FALSE
;
3091 end
= nct_min(targ
+ gOutBufferSize
, realBufferEnd
);
3092 sourceLimit
= nct_min(src
+ gInBufferSize
, realSourceEnd
);
3094 doFlush
= (UBool
)(sourceLimit
== realSourceEnd
);
3096 if(targ
== realBufferEnd
)
3098 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ
, gNuConvTestName
);
3101 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src
,sourceLimit
, targ
,end
, doFlush
?"TRUE":"FALSE");
3104 status
= U_ZERO_ERROR
;
3106 ucnv_fromUnicode (conv
,
3111 checkOffsets
? offs
: NULL
,
3112 doFlush
, /* flush if we're at the end of the input data */
3114 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && (sourceLimit
< realSourceEnd
)) );
3116 /* allow failure codes for the stop callback */
3117 if(U_FAILURE(status
) && status
!= expectedError
)
3119 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status
), gNuConvTestName
);
3123 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
3124 sourceLen
, targ
-junkout
);
3125 if(getTestOption(VERBOSITY_OPTION
))
3130 for(p
= junkout
;p
<targ
;p
++)
3132 sprintf(junk
+ strlen(junk
), "0x%02x, ", (0xFF) & (unsigned int)*p
);
3133 sprintf(offset_str
+ strlen(offset_str
), "0x%02x, ", (0xFF) & (unsigned int)junokout
[p
-junkout
]);
3137 printSeq(expect
, expectLen
);
3140 log_verbose("\nOffsets:");
3141 log_verbose(offset_str
);
3148 if(expectLen
!= targ
-junkout
)
3150 log_err("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
3151 log_verbose("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
3152 printSeqErr((const uint8_t *)junkout
, (int32_t)(targ
-junkout
));
3153 printSeqErr(expect
, expectLen
);
3157 if (checkOffsets
&& (expectOffsets
!= 0) )
3159 log_verbose("comparing %d offsets..\n", targ
-junkout
);
3160 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t) )){
3161 log_err("did not get the expected offsets while %s \n", gNuConvTestName
);
3162 log_err("Got Output : ");
3163 printSeqErr((const uint8_t *)junkout
, (int32_t)(targ
-junkout
));
3164 log_err("Got Offsets: ");
3165 for(p
=junkout
;p
<targ
;p
++)
3166 log_err("%d,", junokout
[p
-junkout
]);
3168 log_err("Expected Offsets: ");
3169 for(i
=0; i
<(targ
-junkout
); i
++)
3170 log_err("%d,", expectOffsets
[i
]);
3176 if(!memcmp(junkout
, expect
, expectLen
))
3178 log_verbose("String matches! %s\n", gNuConvTestName
);
3183 log_err("String does not match. %s\n", gNuConvTestName
);
3184 log_err("source: ");
3185 printUSeqErr(source
, sourceLen
);
3187 printSeqErr((const uint8_t *)junkout
, expectLen
);
3188 log_err("Expected: ");
3189 printSeqErr(expect
, expectLen
);
3193 UBool
testConvertToUnicodeWithContext( const uint8_t *source
, int sourcelen
, const UChar
*expect
, int expectlen
,
3194 const char *codepage
, UConverterToUCallback callback
, const int32_t *expectOffsets
,
3195 const char *mySubChar
, int8_t len
, const void* context
, UErrorCode expectedError
)
3197 UErrorCode status
= U_ZERO_ERROR
;
3198 UConverter
*conv
= 0;
3199 UChar junkout
[NEW_MAX_BUFFER
]; /* FIX */
3200 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
3202 const char *realSourceEnd
;
3203 const char *srcLimit
;
3208 UBool checkOffsets
= TRUE
;
3210 char offset_str
[9999];
3212 UConverterToUCallback oldAction
= NULL
;
3213 const void* oldContext
= NULL
;
3215 int32_t realBufferSize
;
3216 UChar
*realBufferEnd
;
3219 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
3220 junkout
[i
] = 0xFFFE;
3222 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
3225 setNuConvTestName(codepage
, "TO");
3227 log_verbose("\n========= %s\n", gNuConvTestName
);
3229 conv
= ucnv_open(codepage
, &status
);
3230 if(U_FAILURE(status
))
3232 log_data_err("Couldn't open converter %s\n",gNuConvTestName
);
3236 log_verbose("Converter opened..\n");
3238 src
= (const char *)source
;
3242 realBufferSize
= (sizeof(junkout
)/sizeof(junkout
[0]));
3243 realBufferEnd
= junkout
+ realBufferSize
;
3244 realSourceEnd
= src
+ sourcelen
;
3245 /*----setting the callback routine----*/
3246 ucnv_setToUCallBack (conv
, callback
, context
, &oldAction
, &oldContext
, &status
);
3247 if (U_FAILURE(status
))
3249 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status
));
3251 /*-------------------------------------*/
3252 /*setting the subChar*/
3253 if(mySubChar
!= NULL
){
3254 ucnv_setSubstChars(conv
, mySubChar
, len
, &status
);
3255 if (U_FAILURE(status
)) {
3256 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status
));
3262 if ( gOutBufferSize
!= realBufferSize
)
3263 checkOffsets
= FALSE
;
3265 if( gInBufferSize
!= NEW_MAX_BUFFER
)
3266 checkOffsets
= FALSE
;
3270 end
= nct_min( targ
+ gOutBufferSize
, realBufferEnd
);
3271 srcLimit
= nct_min(realSourceEnd
, src
+ gInBufferSize
);
3273 if(targ
== realBufferEnd
)
3275 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ
,gNuConvTestName
);
3278 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ
,end
);
3282 status
= U_ZERO_ERROR
;
3284 ucnv_toUnicode (conv
,
3287 (const char **)&src
,
3288 (const char *)srcLimit
,
3289 checkOffsets
? offs
: NULL
,
3290 (UBool
)(srcLimit
== realSourceEnd
), /* flush if we're at the end of the source data */
3292 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && (srcLimit
< realSourceEnd
)) ); /* while we just need another buffer */
3294 /* allow failure codes for the stop callback */
3295 if(U_FAILURE(status
) && status
!=expectedError
)
3297 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status
), gNuConvTestName
);
3301 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
3302 sourcelen
, targ
-junkout
);
3303 if(getTestOption(VERBOSITY_OPTION
))
3309 for(p
= junkout
;p
<targ
;p
++)
3311 sprintf(junk
+ strlen(junk
), "0x%04x, ", (0xFFFF) & (unsigned int)*p
);
3312 sprintf(offset_str
+ strlen(offset_str
), "0x%04x, ", (0xFFFF) & (unsigned int)junokout
[p
-junkout
]);
3316 printUSeq(expect
, expectlen
);
3319 log_verbose("\nOffsets:");
3320 log_verbose(offset_str
);
3326 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen
,expectlen
*2);
3328 if (checkOffsets
&& (expectOffsets
!= 0))
3330 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t)))
3332 log_err("did not get the expected offsets while %s \n", gNuConvTestName
);
3333 log_err("Got offsets: ");
3334 for(p
=junkout
;p
<targ
;p
++)
3335 log_err(" %2d,", junokout
[p
-junkout
]);
3337 log_err("Expected offsets: ");
3338 for(i
=0; i
<(targ
-junkout
); i
++)
3339 log_err(" %2d,", expectOffsets
[i
]);
3341 log_err("Got output: ");
3342 for(i
=0; i
<(targ
-junkout
); i
++)
3343 log_err("0x%04x,", junkout
[i
]);
3345 log_err("From source: ");
3346 for(i
=0; i
<(src
-(const char *)source
); i
++)
3347 log_err(" 0x%02x,", (unsigned char)source
[i
]);
3352 if(!memcmp(junkout
, expect
, expectlen
*2))
3354 log_verbose("Matches!\n");
3359 log_err("String does not match. %s\n", gNuConvTestName
);
3360 log_verbose("String does not match. %s\n", gNuConvTestName
);
3362 printUSeqErr(junkout
, expectlen
);
3363 log_err("Expected: ");
3364 printUSeqErr(expect
, expectlen
);
3370 static void TestCallBackFailure(void) {
3371 UErrorCode status
= U_USELESS_COLLATOR_ERROR
;
3372 ucnv_cbFromUWriteBytes(NULL
, NULL
, -1, -1, &status
);
3373 if (status
!= U_USELESS_COLLATOR_ERROR
) {
3374 log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad UErrorCode\n");
3376 ucnv_cbFromUWriteUChars(NULL
, NULL
, NULL
, -1, &status
);
3377 if (status
!= U_USELESS_COLLATOR_ERROR
) {
3378 log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad UErrorCode\n");
3380 ucnv_cbFromUWriteSub(NULL
, -1, &status
);
3381 if (status
!= U_USELESS_COLLATOR_ERROR
) {
3382 log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UErrorCode\n");
3384 ucnv_cbToUWriteUChars(NULL
, NULL
, -1, -1, &status
);
3385 if (status
!= U_USELESS_COLLATOR_ERROR
) {
3386 log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad UErrorCode\n");