1 /********************************************************************
3 * Copyright (c) 1997-2004, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
9 * Modification History:
11 * Madhu Katragadda 7/21/1999 Testing error callback routines
12 **************************************************************************************
19 #include "unicode/uloc.h"
20 #include "unicode/ucnv.h"
21 #include "unicode/ucnv_err.h"
23 #include "unicode/utypes.h"
24 #include "unicode/ustring.h"
26 #define NEW_MAX_BUFFER 999
28 #define nct_min(x,y) ((x<y) ? x : y)
29 #define ARRAY_LENGTH(array) (sizeof(array)/sizeof((array)[0]))
31 static int32_t gInBufferSize
= 0;
32 static int32_t gOutBufferSize
= 0;
33 static char gNuConvTestName
[1024];
35 static void printSeq(const uint8_t* a
, int len
)
40 log_verbose("0x%02X, ", a
[i
++]);
44 static void printUSeq(const UChar
* a
, int len
)
49 log_verbose(" 0x%04x, ", a
[i
++]);
53 static void printSeqErr(const uint8_t* a
, int len
)
58 fprintf(stderr
, " 0x%02x, ", a
[i
++]);
59 fprintf(stderr
, "}\n");
62 static void printUSeqErr(const UChar
* a
, int len
)
67 fprintf(stderr
, "0x%04x, ", a
[i
++]);
68 fprintf(stderr
,"}\n");
71 static void setNuConvTestName(const char *codepage
, const char *direction
)
73 sprintf(gNuConvTestName
, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
81 void addTestConvertErrorCallBack(TestNode
** root
);
83 void addTestConvertErrorCallBack(TestNode
** root
)
85 addTest(root
, &TestSkipCallBack
, "tsconv/nccbtst/TestSkipCallBack");
86 addTest(root
, &TestStopCallBack
, "tsconv/nccbtst/TestStopCallBack");
87 addTest(root
, &TestSubCallBack
, "tsconv/nccbtst/TestSubCallBack");
88 addTest(root
, &TestSubWithValueCallBack
, "tsconv/nccbtst/TestSubWithValueCallBack");
89 addTest(root
, &TestLegalAndOtherCallBack
, "tsconv/nccbtst/TestLegalAndOtherCallBack");
90 addTest(root
, &TestSingleByteCallBack
, "tsconv/nccbtst/TestSingleByteCallBack");
93 static void TestSkipCallBack()
95 TestSkip(NEW_MAX_BUFFER
, NEW_MAX_BUFFER
);
96 TestSkip(1,NEW_MAX_BUFFER
);
98 TestSkip(NEW_MAX_BUFFER
, 1);
101 static void TestStopCallBack()
103 TestStop(NEW_MAX_BUFFER
, NEW_MAX_BUFFER
);
104 TestStop(1,NEW_MAX_BUFFER
);
106 TestStop(NEW_MAX_BUFFER
, 1);
109 static void TestSubCallBack()
111 TestSub(NEW_MAX_BUFFER
, NEW_MAX_BUFFER
);
112 TestSub(1,NEW_MAX_BUFFER
);
114 TestSub(NEW_MAX_BUFFER
, 1);
115 TestEBCDIC_STATEFUL_Sub(1, 1);
116 TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER
);
117 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER
, 1);
118 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER
, NEW_MAX_BUFFER
);
123 static void TestSubWithValueCallBack()
125 TestSubWithValue(NEW_MAX_BUFFER
, NEW_MAX_BUFFER
);
126 TestSubWithValue(1,NEW_MAX_BUFFER
);
127 TestSubWithValue(1,1);
128 TestSubWithValue(NEW_MAX_BUFFER
, 1);
131 static void TestLegalAndOtherCallBack()
133 TestLegalAndOthers(NEW_MAX_BUFFER
, NEW_MAX_BUFFER
);
134 TestLegalAndOthers(1,NEW_MAX_BUFFER
);
135 TestLegalAndOthers(1,1);
136 TestLegalAndOthers(NEW_MAX_BUFFER
, 1);
139 static void TestSingleByteCallBack()
141 TestSingleByte(NEW_MAX_BUFFER
, NEW_MAX_BUFFER
);
142 TestSingleByte(1,NEW_MAX_BUFFER
);
144 TestSingleByte(NEW_MAX_BUFFER
, 1);
147 static void TestSkip(int32_t inputsize
, int32_t outputsize
)
149 static const uint8_t expskipIBM_949
[]= {
150 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
152 static const uint8_t expskipIBM_943
[] = {
153 0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 };
155 static const uint8_t expskipIBM_930
[] = {
156 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f };
158 gInBufferSize
= inputsize
;
159 gOutBufferSize
= outputsize
;
162 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP \n");
165 static const UChar sampleText
[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
166 static const UChar sampleText2
[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
168 static const int32_t toIBM949Offsskip
[] = { 0, 1, 1, 2, 2, 4, 4 };
169 static const int32_t toIBM943Offsskip
[] = { 0, 0, 1, 1, 3, 3 };
171 if(!testConvertFromUnicode(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
172 expskipIBM_949
, sizeof(expskipIBM_949
), "ibm-949",
173 UCNV_FROM_U_CALLBACK_SKIP
, toIBM949Offsskip
, NULL
, 0 ))
174 log_err("u-> ibm-949 with skip did not match.\n");
175 if(!testConvertFromUnicode(sampleText2
, sizeof(sampleText2
)/sizeof(sampleText2
[0]),
176 expskipIBM_943
, sizeof(expskipIBM_943
), "ibm-943",
177 UCNV_FROM_U_CALLBACK_SKIP
, toIBM943Offsskip
, NULL
, 0 ))
178 log_err("u-> ibm-943 with skip did not match.\n");
182 static const UChar fromU
[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 };
183 static const uint8_t fromUBytes
[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f };
184 static const int32_t fromUOffsets
[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 };
186 /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */
187 if(!testConvertFromUnicode(fromU
, sizeof(fromU
)/U_SIZEOF_UCHAR
,
188 fromUBytes
, sizeof(fromUBytes
),
190 UCNV_FROM_U_CALLBACK_SKIP
, fromUOffsets
,
193 log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n");
198 static const UChar usasciiFromU
[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
199 static const uint8_t usasciiFromUBytes
[] = { 0x61, 0x31, 0x39 };
200 static const int32_t usasciiFromUOffsets
[] = { 0, 3, 6 };
202 static const UChar latin1FromU
[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
203 static const uint8_t latin1FromUBytes
[] = { 0x61, 0xa0, 0x31, 0x39 };
204 static const int32_t latin1FromUOffsets
[] = { 0, 1, 3, 6 };
207 if(!testConvertFromUnicode(usasciiFromU
, sizeof(usasciiFromU
)/U_SIZEOF_UCHAR
,
208 usasciiFromUBytes
, sizeof(usasciiFromUBytes
),
210 UCNV_FROM_U_CALLBACK_SKIP
, usasciiFromUOffsets
,
213 log_err("u->US-ASCII with skip did not match.\n");
216 /* SBCS NLTC codepage 367 for US-ASCII */
217 if(!testConvertFromUnicode(usasciiFromU
, sizeof(usasciiFromU
)/U_SIZEOF_UCHAR
,
218 usasciiFromUBytes
, sizeof(usasciiFromUBytes
),
220 UCNV_FROM_U_CALLBACK_SKIP
, usasciiFromUOffsets
,
223 log_err("u->ibm-367 with skip did not match.\n");
227 if(!testConvertFromUnicode(latin1FromU
, sizeof(latin1FromU
)/U_SIZEOF_UCHAR
,
228 latin1FromUBytes
, sizeof(latin1FromUBytes
),
230 UCNV_FROM_U_CALLBACK_SKIP
, latin1FromUOffsets
,
233 log_err("u->LATIN_1 with skip did not match.\n");
237 if(!testConvertFromUnicode(latin1FromU
, sizeof(latin1FromU
)/U_SIZEOF_UCHAR
,
238 latin1FromUBytes
, sizeof(latin1FromUBytes
),
240 UCNV_FROM_U_CALLBACK_SKIP
, latin1FromUOffsets
,
243 log_err("u->windows-1252 with skip did not match.\n");
248 static const UChar inputTest
[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
249 static const uint8_t toIBM943
[]= { 0x61, 0x61 };
250 static const int32_t offset
[]= {0, 4};
253 static const UChar euc_jp_inputText
[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
254 static const uint8_t to_euc_jp
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
257 static const int32_t fromEUC_JPOffs
[] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7};
260 static const UChar euc_tw_inputText
[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
261 static const uint8_t to_euc_tw
[]={
262 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
263 0x61, 0xe6, 0xca, 0x8a,
265 static const int32_t from_euc_twOffs
[] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,};
268 static const UChar iso_2022_jp_inputText
[]={0x0041, 0x00E9/*unassigned*/,0x0042, };
269 static const uint8_t to_iso_2022_jp
[]={
274 static const int32_t from_iso_2022_jpOffs
[] ={0,2};
277 UChar
const iso_2022_jp_inputText2
[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
278 static const uint8_t to_iso_2022_jp2
[]={
283 static const int32_t from_iso_2022_jpOffs2
[] ={0,2};
286 static const UChar iso_2022_cn_inputText
[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
287 static const uint8_t to_iso_2022_cn
[]={
290 static const int32_t from_iso_2022_cnOffs
[] ={
295 static const UChar iso_2022_cn_inputText1
[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
296 static const uint8_t to_iso_2022_cn1
[]={
300 static const int32_t from_iso_2022_cnOffs1
[] ={ 0, 2 };
303 static const UChar iso_2022_kr_inputText
[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
304 static const uint8_t to_iso_2022_kr
[]={
305 0x1b, 0x24, 0x29, 0x43,
311 static const int32_t from_iso_2022_krOffs
[] ={
320 static const UChar iso_2022_kr_inputText1
[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
321 static const uint8_t to_iso_2022_kr1
[]={
322 0x1b, 0x24, 0x29, 0x43,
328 static const int32_t from_iso_2022_krOffs1
[] ={
336 static const UChar hz_inputText
[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
338 static const uint8_t to_hz
[]={
340 0x7e, 0x7b, 0x26, 0x30,
345 static const int32_t from_hzOffs
[] ={
352 static const UChar hz_inputText1
[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
354 static const uint8_t to_hz1
[]={
356 0x7e, 0x7b, 0x26, 0x30,
361 static const int32_t from_hzOffs1
[] ={
369 static const UChar SCSU_inputText
[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
371 static const uint8_t to_SCSU
[]={
377 static const int32_t from_SCSUOffs
[] ={
383 static const UChar iscii_inputText
[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
384 static const uint8_t to_iscii
[]={
388 static const int32_t from_isciiOffs
[] ={
393 static const UChar iscii_inputText1
[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
394 static const uint8_t to_iscii1
[]={
399 static const int32_t from_isciiOffs1
[] ={0,2};
401 if(!testConvertFromUnicode(inputTest
, sizeof(inputTest
)/sizeof(inputTest
[0]),
402 toIBM943
, sizeof(toIBM943
), "ibm-943",
403 UCNV_FROM_U_CALLBACK_SKIP
, offset
, NULL
, 0 ))
404 log_err("u-> ibm-943 with skip did not match.\n");
406 if(!testConvertFromUnicode(euc_jp_inputText
, sizeof(euc_jp_inputText
)/sizeof(euc_jp_inputText
[0]),
407 to_euc_jp
, sizeof(to_euc_jp
), "euc-jp",
408 UCNV_FROM_U_CALLBACK_SKIP
, fromEUC_JPOffs
, NULL
, 0 ))
409 log_err("u-> euc-jp with skip did not match.\n");
411 if(!testConvertFromUnicode(euc_tw_inputText
, sizeof(euc_tw_inputText
)/sizeof(euc_tw_inputText
[0]),
412 to_euc_tw
, sizeof(to_euc_tw
), "euc-tw",
413 UCNV_FROM_U_CALLBACK_SKIP
, from_euc_twOffs
, NULL
, 0 ))
414 log_err("u-> euc-tw with skip did not match.\n");
417 if(!testConvertFromUnicode(iso_2022_jp_inputText
, sizeof(iso_2022_jp_inputText
)/sizeof(iso_2022_jp_inputText
[0]),
418 to_iso_2022_jp
, sizeof(to_iso_2022_jp
), "iso-2022-jp",
419 UCNV_FROM_U_CALLBACK_SKIP
, from_iso_2022_jpOffs
, NULL
, 0 ))
420 log_err("u-> iso-2022-jp with skip did not match.\n");
423 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2
, sizeof(iso_2022_jp_inputText2
)/sizeof(iso_2022_jp_inputText2
[0]),
424 to_iso_2022_jp2
, sizeof(to_iso_2022_jp2
), "iso-2022-jp",
425 UCNV_FROM_U_CALLBACK_SKIP
, from_iso_2022_jpOffs2
, NULL
, 0,UCNV_SKIP_STOP_ON_ILLEGAL
,U_ILLEGAL_CHAR_FOUND
))
426 log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
429 if(!testConvertFromUnicode(iso_2022_cn_inputText
, sizeof(iso_2022_cn_inputText
)/sizeof(iso_2022_cn_inputText
[0]),
430 to_iso_2022_cn
, sizeof(to_iso_2022_cn
), "iso-2022-cn",
431 UCNV_FROM_U_CALLBACK_SKIP
, from_iso_2022_cnOffs
, NULL
, 0 ))
432 log_err("u-> iso-2022-cn with skip did not match.\n");
434 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1
, sizeof(iso_2022_cn_inputText1
)/sizeof(iso_2022_cn_inputText1
[0]),
435 to_iso_2022_cn1
, sizeof(to_iso_2022_cn1
), "iso-2022-cn",
436 UCNV_FROM_U_CALLBACK_SKIP
, from_iso_2022_cnOffs1
, NULL
, 0,UCNV_SKIP_STOP_ON_ILLEGAL
,U_ILLEGAL_CHAR_FOUND
))
437 log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
440 if(!testConvertFromUnicode(iso_2022_kr_inputText
, sizeof(iso_2022_kr_inputText
)/sizeof(iso_2022_kr_inputText
[0]),
441 to_iso_2022_kr
, sizeof(to_iso_2022_kr
), "iso-2022-kr",
442 UCNV_FROM_U_CALLBACK_SKIP
, from_iso_2022_krOffs
, NULL
, 0 ))
443 log_err("u-> iso-2022-kr with skip did not match.\n");
445 if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1
, sizeof(iso_2022_kr_inputText1
)/sizeof(iso_2022_kr_inputText1
[0]),
446 to_iso_2022_kr1
, sizeof(to_iso_2022_kr1
), "iso-2022-kr",
447 UCNV_FROM_U_CALLBACK_SKIP
, from_iso_2022_krOffs1
, NULL
, 0,UCNV_SKIP_STOP_ON_ILLEGAL
,U_ILLEGAL_CHAR_FOUND
))
448 log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
451 if(!testConvertFromUnicode(hz_inputText
, sizeof(hz_inputText
)/sizeof(hz_inputText
[0]),
452 to_hz
, sizeof(to_hz
), "HZ",
453 UCNV_FROM_U_CALLBACK_SKIP
, from_hzOffs
, NULL
, 0 ))
454 log_err("u-> HZ with skip did not match.\n");
456 if(!testConvertFromUnicodeWithContext(hz_inputText1
, sizeof(hz_inputText1
)/sizeof(hz_inputText1
[0]),
457 to_hz1
, sizeof(to_hz1
), "hz",
458 UCNV_FROM_U_CALLBACK_SKIP
, from_hzOffs1
, NULL
, 0,UCNV_SKIP_STOP_ON_ILLEGAL
,U_ILLEGAL_CHAR_FOUND
))
459 log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
462 if(!testConvertFromUnicode(SCSU_inputText
, sizeof(SCSU_inputText
)/sizeof(SCSU_inputText
[0]),
463 to_SCSU
, sizeof(to_SCSU
), "SCSU",
464 UCNV_FROM_U_CALLBACK_SKIP
, from_SCSUOffs
, NULL
, 0 ))
465 log_err("u-> SCSU with skip did not match.\n");
468 if(!testConvertFromUnicode(iscii_inputText
, sizeof(iscii_inputText
)/sizeof(iscii_inputText
[0]),
469 to_iscii
, sizeof(to_iscii
), "ISCII,version=0",
470 UCNV_FROM_U_CALLBACK_SKIP
, from_isciiOffs
, NULL
, 0 ))
471 log_err("u-> iscii with skip did not match.\n");
473 if(!testConvertFromUnicodeWithContext(iscii_inputText1
, sizeof(iscii_inputText1
)/sizeof(iscii_inputText1
[0]),
474 to_iscii1
, sizeof(to_iscii1
), "ISCII,version=0",
475 UCNV_FROM_U_CALLBACK_SKIP
, from_isciiOffs1
, NULL
, 0,UCNV_SKIP_STOP_ON_ILLEGAL
,U_ILLEGAL_CHAR_FOUND
))
476 log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
480 log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
482 static const uint8_t sampleText
[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */
483 0xFB, 0xEE, 0x28, /* from source offset 0 */
501 0xF9, 0x28, /* from 16 */
510 0xFA, 0x83, /* from 24 */
519 0xF9, 0xA2, /* from 32 */
521 0xFE, 0x16, 0x3A, 0x8C,
530 static const UChar expected
[]={
531 0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */
532 0x0063, 0x0061, 0x000D, 0x000A,
534 0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */
535 0x0930, 0x0020, 0x0918, 0x0909,
537 0x3086, 0x304D, 0x0020, 0x3053, /* 16 */
538 0x4000, 0x4E00, 0x7777, 0x0020,
540 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */
541 0x0020, 0xD7A3, 0xDC00, 0xD800,
543 0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */
544 0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
546 0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */
549 static const int32_t offsets
[]={
550 0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7,
551 8, 9, 10, 10, 11, 12, 12, 13, 14, 15,
552 16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23,
553 24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31,
554 32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39,
558 /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */
559 if(!testConvertFromUnicode(expected
, ARRAY_LENGTH(expected
),
560 sampleText
, sizeof(sampleText
),
562 UCNV_FROM_U_CALLBACK_SKIP
, offsets
, NULL
, 0)
564 log_err("u->BOCU-1 with skip did not match.\n");
568 log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
570 const uint8_t sampleText
[]={
572 0xc4, 0xb5, /* U+0135 */
573 0xed, 0x80, 0xa0, /* Hangul U+d020 */
574 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */
575 0xee, 0x80, 0x80, /* PUA U+e000 */
576 0xed, 0xb0, 0x81, /* unpaired trail surrogate U+dc01 */
578 0xed, 0xa0, 0x81, /* unpaired lead surrogate U+d801 */
579 0xd0, 0x80 /* U+0400 */
604 /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */
606 /* without offsets */
607 if(!testConvertFromUnicode(expected
, ARRAY_LENGTH(expected
),
608 sampleText
, sizeof(sampleText
),
610 UCNV_FROM_U_CALLBACK_SKIP
, NULL
, NULL
, 0)
612 log_err("u->CESU-8 with skip did not match.\n");
616 if(!testConvertFromUnicode(expected
, ARRAY_LENGTH(expected
),
617 sampleText
, sizeof(sampleText
),
619 UCNV_FROM_U_CALLBACK_SKIP
, offsets
, NULL
, 0)
621 log_err("u->CESU-8 with skip did not match.\n");
626 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP \n");
630 static const UChar IBM_949skiptoUnicode
[]= {0x0000, 0xAC00, 0xAC01, 0xD700 };
631 static const UChar IBM_943skiptoUnicode
[]= { 0x6D63, 0x6D64, 0x6D66 };
632 static const UChar IBM_930skiptoUnicode
[]= { 0x6D63, 0x6D64, 0x6D66 };
634 static const int32_t fromIBM949Offs
[] = { 0, 1, 3, 5};
635 static const int32_t fromIBM943Offs
[] = { 0, 2, 4};
636 static const int32_t fromIBM930Offs
[] = { 1, 3, 5};
638 if(!testConvertToUnicode(expskipIBM_949
, sizeof(expskipIBM_949
),
639 IBM_949skiptoUnicode
, sizeof(IBM_949skiptoUnicode
)/sizeof(IBM_949skiptoUnicode
),"ibm-949",
640 UCNV_TO_U_CALLBACK_SKIP
, fromIBM949Offs
, NULL
, 0 ))
641 log_err("ibm-949->u with skip did not match.\n");
642 if(!testConvertToUnicode(expskipIBM_943
, sizeof(expskipIBM_943
),
643 IBM_943skiptoUnicode
, sizeof(IBM_943skiptoUnicode
)/sizeof(IBM_943skiptoUnicode
[0]),"ibm-943",
644 UCNV_TO_U_CALLBACK_SKIP
, fromIBM943Offs
, NULL
, 0 ))
645 log_err("ibm-943->u with skip did not match.\n");
648 if(!testConvertToUnicode(expskipIBM_930
, sizeof(expskipIBM_930
),
649 IBM_930skiptoUnicode
, sizeof(IBM_930skiptoUnicode
)/sizeof(IBM_930skiptoUnicode
[0]),"ibm-930",
650 UCNV_TO_U_CALLBACK_SKIP
, fromIBM930Offs
, NULL
, 0 ))
651 log_err("ibm-930->u with skip did not match.\n");
654 if(!testConvertToUnicodeWithContext(expskipIBM_930
, sizeof(expskipIBM_930
),
655 IBM_930skiptoUnicode
, sizeof(IBM_930skiptoUnicode
)/sizeof(IBM_930skiptoUnicode
[0]),"ibm-930",
656 UCNV_TO_U_CALLBACK_SKIP
, fromIBM930Offs
, NULL
, 0,"i",U_ILLEGAL_CHAR_FOUND
))
657 log_err("ibm-930->u with skip did not match.\n");
661 static const uint8_t usasciiToUBytes
[] = { 0x61, 0x80, 0x31 };
662 static const UChar usasciiToU
[] = { 0x61, 0x31 };
663 static const int32_t usasciiToUOffsets
[] = { 0, 2 };
665 static const uint8_t latin1ToUBytes
[] = { 0x61, 0xa0, 0x31 };
666 static const UChar latin1ToU
[] = { 0x61, 0xa0, 0x31 };
667 static const int32_t latin1ToUOffsets
[] = { 0, 1, 2 };
670 if(!testConvertToUnicode(usasciiToUBytes
, sizeof(usasciiToUBytes
),
671 usasciiToU
, sizeof(usasciiToU
)/U_SIZEOF_UCHAR
,
673 UCNV_TO_U_CALLBACK_SKIP
, usasciiToUOffsets
,
676 log_err("US-ASCII->u with skip did not match.\n");
679 /* SBCS NLTC codepage 367 for US-ASCII */
680 if(!testConvertToUnicode(usasciiToUBytes
, sizeof(usasciiToUBytes
),
681 usasciiToU
, sizeof(usasciiToU
)/U_SIZEOF_UCHAR
,
683 UCNV_TO_U_CALLBACK_SKIP
, usasciiToUOffsets
,
686 log_err("ibm-367->u with skip did not match.\n");
690 if(!testConvertToUnicode(latin1ToUBytes
, sizeof(latin1ToUBytes
),
691 latin1ToU
, sizeof(latin1ToU
)/U_SIZEOF_UCHAR
,
693 UCNV_TO_U_CALLBACK_SKIP
, latin1ToUOffsets
,
696 log_err("LATIN_1->u with skip did not match.\n");
700 if(!testConvertToUnicode(latin1ToUBytes
, sizeof(latin1ToUBytes
),
701 latin1ToU
, sizeof(latin1ToU
)/U_SIZEOF_UCHAR
,
703 UCNV_TO_U_CALLBACK_SKIP
, latin1ToUOffsets
,
706 log_err("windows-1252->u with skip did not match.\n");
711 static const uint8_t sampleTxtEBCIDIC_STATEFUL
[] ={
712 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
714 static const UChar EBCIDIC_STATEFUL_toUnicode
[] ={ 0x6d63, 0x03b4
716 static const int32_t from_EBCIDIC_STATEFULOffsets
[]={ 1, 5};
720 static const uint8_t sampleTxt_euc_jp
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
721 0x8f, 0xda, 0xa1, /*unassigned*/
724 static const UChar euc_jptoUnicode
[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2};
725 static const int32_t from_euc_jpOffs
[] ={ 0, 1, 3, 9};
728 static const uint8_t sampleTxt_euc_tw
[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
729 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
732 static const UChar euc_twtoUnicode
[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, };
733 static const int32_t from_euc_twOffs
[] ={ 0, 1, 3, 11, 13};
735 static const uint8_t sampleTxt_iso_2022_jp
[]={
737 0x1b, 0x24, 0x42, 0x2A, 0x44, /*unassigned*/
738 0x1b, 0x28, 0x42, 0x42,
741 static const UChar iso_2022_jptoUnicode
[]={ 0x41,0x42 };
742 static const int32_t from_iso_2022_jpOffs
[] ={ 0,9 };
745 static const uint8_t sampleTxt_iso_2022_cn
[]={
747 0x1B, 0x24, 0x29, 0x47,
748 0x0E, 0x40, 0x6f, /*unassigned*/
753 static const UChar iso_2022_cntoUnicode
[]={ 0x41, 0x44,0x42 };
754 static const int32_t from_iso_2022_cnOffs
[] ={ 1, 2, 11 };
757 static const uint8_t sampleTxt_iso_2022_kr
[]={
758 0x1b, 0x24, 0x29, 0x43,
766 static const UChar iso_2022_krtoUnicode
[]={ 0x41,0x03A0,0x51, 0x42,0x43};
767 static const int32_t from_iso_2022_krOffs
[] ={ 4, 9, 12, 13 , 14 };
770 static const uint8_t sampleTxt_hz
[]={
772 0x7e, 0x7b, 0x26, 0x30,
773 0x7f, 0x1E, /*unassigned*/
776 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/
779 static const UChar hztoUnicode
[]={
786 static const int32_t from_hzOffs
[] ={0,3,7,11,18, };
789 static const uint8_t sampleTxt_iscii
[]={
799 static const UChar isciitoUnicode
[]={
808 static const int32_t from_isciiOffs
[] ={0,1,3,4,5,7 };
811 static const uint8_t sampleTxtLMBCS
[]={ 0x12, 0xc9, 0x50,
812 0x12, 0x92, 0xa0, /*unassigned*/
815 static const UChar LMBCSToUnicode
[]={ 0x4e2e, 0xe5c4};
816 static const int32_t fromLMBCS
[] = {0, 6};
818 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL
, sizeof(sampleTxtEBCIDIC_STATEFUL
),
819 EBCIDIC_STATEFUL_toUnicode
, sizeof(EBCIDIC_STATEFUL_toUnicode
)/sizeof(EBCIDIC_STATEFUL_toUnicode
[0]),"ibm-930",
820 UCNV_TO_U_CALLBACK_SKIP
, from_EBCIDIC_STATEFULOffsets
, NULL
, 0 ))
821 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
823 if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL
, sizeof(sampleTxtEBCIDIC_STATEFUL
),
824 EBCIDIC_STATEFUL_toUnicode
, sizeof(EBCIDIC_STATEFUL_toUnicode
)/sizeof(EBCIDIC_STATEFUL_toUnicode
[0]),"ibm-930",
825 UCNV_TO_U_CALLBACK_SKIP
, from_EBCIDIC_STATEFULOffsets
, NULL
, 0,"i",U_ILLEGAL_CHAR_FOUND
))
826 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
828 if(!testConvertToUnicode(sampleTxt_euc_jp
, sizeof(sampleTxt_euc_jp
),
829 euc_jptoUnicode
, sizeof(euc_jptoUnicode
)/sizeof(euc_jptoUnicode
[0]),"euc-jp",
830 UCNV_TO_U_CALLBACK_SKIP
, from_euc_jpOffs
, NULL
, 0))
831 log_err("euc-jp->u with skip did not match.\n");
835 if(!testConvertToUnicode(sampleTxt_euc_tw
, sizeof(sampleTxt_euc_tw
),
836 euc_twtoUnicode
, sizeof(euc_twtoUnicode
)/sizeof(euc_twtoUnicode
[0]),"euc-tw",
837 UCNV_TO_U_CALLBACK_SKIP
, from_euc_twOffs
, NULL
, 0))
838 log_err("euc-tw->u with skip did not match.\n");
841 if(!testConvertToUnicode(sampleTxt_iso_2022_jp
, sizeof(sampleTxt_iso_2022_jp
),
842 iso_2022_jptoUnicode
, sizeof(iso_2022_jptoUnicode
)/sizeof(iso_2022_jptoUnicode
[0]),"iso-2022-jp",
843 UCNV_TO_U_CALLBACK_SKIP
, from_iso_2022_jpOffs
, NULL
, 0))
844 log_err("iso-2022-jp->u with skip did not match.\n");
846 if(!testConvertToUnicode(sampleTxt_iso_2022_cn
, sizeof(sampleTxt_iso_2022_cn
),
847 iso_2022_cntoUnicode
, sizeof(iso_2022_cntoUnicode
)/sizeof(iso_2022_cntoUnicode
[0]),"iso-2022-cn",
848 UCNV_TO_U_CALLBACK_SKIP
, from_iso_2022_cnOffs
, NULL
, 0))
849 log_err("iso-2022-cn->u with skip did not match.\n");
851 if(!testConvertToUnicode(sampleTxt_iso_2022_kr
, sizeof(sampleTxt_iso_2022_kr
),
852 iso_2022_krtoUnicode
, sizeof(iso_2022_krtoUnicode
)/sizeof(iso_2022_krtoUnicode
[0]),"iso-2022-kr",
853 UCNV_TO_U_CALLBACK_SKIP
, from_iso_2022_krOffs
, NULL
, 0))
854 log_err("iso-2022-kr->u with skip did not match.\n");
856 if(!testConvertToUnicode(sampleTxt_hz
, sizeof(sampleTxt_hz
),
857 hztoUnicode
, sizeof(hztoUnicode
)/sizeof(hztoUnicode
[0]),"HZ",
858 UCNV_TO_U_CALLBACK_SKIP
, from_hzOffs
, NULL
, 0))
859 log_err("HZ->u with skip did not match.\n");
861 if(!testConvertToUnicode(sampleTxt_iscii
, sizeof(sampleTxt_iscii
),
862 isciitoUnicode
, sizeof(isciitoUnicode
)/sizeof(isciitoUnicode
[0]),"ISCII,version=0",
863 UCNV_TO_U_CALLBACK_SKIP
, from_isciiOffs
, NULL
, 0))
864 log_err("iscii->u with skip did not match.\n");
866 if(!testConvertToUnicode(sampleTxtLMBCS
, sizeof(sampleTxtLMBCS
),
867 LMBCSToUnicode
, sizeof(LMBCSToUnicode
)/sizeof(LMBCSToUnicode
[0]),"LMBCS-1",
868 UCNV_TO_U_CALLBACK_SKIP
, fromLMBCS
, NULL
, 0))
869 log_err("LMBCS->u with skip did not match.\n");
872 log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n");
874 const uint8_t sampleText1
[] = { 0x31, 0xe4, 0xba, 0x8c,
876 UChar expected1
[] = { 0x0031, 0x4e8c, 0x0061};
877 int32_t offsets1
[] = { 0x0000, 0x0001, 0x0006};
879 if(!testConvertToUnicode(sampleText1
, sizeof(sampleText1
),
880 expected1
, sizeof(expected1
)/sizeof(expected1
[0]),"utf8",
881 UCNV_TO_U_CALLBACK_SKIP
, offsets1
, NULL
, 0 ))
882 log_err("utf8->u with skip did not match.\n");;
885 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n");
887 const uint8_t sampleText1
[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
888 UChar expected1
[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffe,0xfffe};
889 int32_t offsets1
[] = { 0x0000, 0x0001,0x0002,0x0003,4,5};
891 if(!testConvertToUnicode(sampleText1
, sizeof(sampleText1
),
892 expected1
, sizeof(expected1
)/sizeof(expected1
[0]),"SCSU",
893 UCNV_TO_U_CALLBACK_SKIP
, offsets1
, NULL
, 0 ))
894 log_err("scsu->u with skip did not match.\n");
897 log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
899 const uint8_t sampleText
[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */
900 0xFB, 0xEE, 0x28, /* single-code point sequence at offset 0 */
901 0x24, 0x1E, 0x52, /* 3 */
904 0x40, 0x07, /* 8 - wrong trail byte */
907 0xD0, 0x20, /* 12 - wrong trail byte */
928 0xFB, 0x16, 0x87, /* 42 */
935 0xFC, 0x10, 0x3E, /* 56 */
936 0xFE, 0x16, 0x3A, 0x8C, /* 59 */
938 0xFC, 0x03, 0xAC, /* 64 */
939 0xFF, /* 67 - FF just resets the state without encoding anything */
946 0xFEFF, 0x0061, 0x0062, 0x0020,
947 0x0063, 0x0061, 0x000D, 0x000A,
948 0x0020, 0x0000, 0x00DF, 0x00E6,
949 0x0930, 0x0020, 0x0918, 0x0909,
950 0x3086, 0x304D, 0x0020, 0x3053,
951 0x4000, 0x4E00, 0x7777, 0x0020,
952 0x9FA5, 0x4E00, 0xAC00, 0xBCDE,
953 0x0020, 0xD7A3, 0xDC00, 0xD800,
954 0xD800, 0xDC00, 0xD845, 0xDDDD,
955 0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
956 0xDFFF, 0x0001, 0x0E40, 0x0020,
960 0, 3, 6, 7, /* skip 8, */
961 10, 11, /* skip 12, */
963 20, 21, 23, 24, 25, 26, 28, 29,
964 30, 31, 33, 35, 37, 38,
966 50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59,
967 63, 64, /* trail */ 64, /* reset only 67, */
972 if(!testConvertToUnicode(sampleText
, sizeof(sampleText
),
973 expected
, ARRAY_LENGTH(expected
), "BOCU-1",
974 UCNV_TO_U_CALLBACK_SKIP
, offsets
, NULL
, 0)
976 log_err("BOCU-1->u with skip did not match.\n");
980 log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
982 const uint8_t sampleText
[]={
984 0xc0, 0x80, /* 1 non-shortest form */
985 0xc4, 0xb5, /* 3 U+0135 */
986 0xed, 0x80, 0xa0, /* 5 Hangul U+d020 */
987 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8 surrogate pair for U+10401 */
988 0xee, 0x80, 0x80, /* 14 PUA U+e000 */
989 0xed, 0xb0, 0x81, /* 17 unpaired trail surrogate U+dc01 */
990 0xf0, 0x90, 0x80, 0x80, /* 20 illegal 4-byte form for U+10000 */
992 0xed, 0xa0, 0x81, /* 25 unpaired lead surrogate U+d801 */
993 0xed, 0xa0, /* 28 incomplete sequence */
994 0xd0, 0x80 /* 30 U+0400 */
1024 /* without offsets */
1025 if(!testConvertToUnicode(sampleText
, sizeof(sampleText
),
1026 expected
, ARRAY_LENGTH(expected
), "CESU-8",
1027 UCNV_TO_U_CALLBACK_SKIP
, NULL
, NULL
, 0)
1029 log_err("CESU-8->u with skip did not match.\n");
1033 if(!testConvertToUnicode(sampleText
, sizeof(sampleText
),
1034 expected
, ARRAY_LENGTH(expected
), "CESU-8",
1035 UCNV_TO_U_CALLBACK_SKIP
, offsets
, NULL
, 0)
1037 log_err("CESU-8->u with skip did not match.\n");
1042 static void TestStop(int32_t inputsize
, int32_t outputsize
)
1044 static const UChar sampleText
[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1045 static const UChar sampleText2
[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1047 static const uint8_t expstopIBM_949
[]= {
1048 0x00, 0xb0, 0xa1, 0xb0, 0xa2};
1050 static const uint8_t expstopIBM_943
[] = {
1051 0x9f, 0xaf, 0x9f, 0xb1};
1053 static const uint8_t expstopIBM_930
[] = {
1054 0x0e, 0x5d, 0x5f, 0x5d, 0x63};
1056 static const UChar IBM_949stoptoUnicode
[]= {0x0000, 0xAC00, 0xAC01};
1057 static const UChar IBM_943stoptoUnicode
[]= { 0x6D63, 0x6D64};
1058 static const UChar IBM_930stoptoUnicode
[]= { 0x6D63, 0x6D64};
1061 static const int32_t toIBM949Offsstop
[] = { 0, 1, 1, 2, 2};
1062 static const int32_t toIBM943Offsstop
[] = { 0, 0, 1, 1};
1063 static const int32_t toIBM930Offsstop
[] = { 0, 0, 0, 1, 1};
1065 static const int32_t fromIBM949Offs
[] = { 0, 1, 3};
1066 static const int32_t fromIBM943Offs
[] = { 0, 2};
1067 static const int32_t fromIBM930Offs
[] = { 1, 3};
1069 gInBufferSize
= inputsize
;
1070 gOutBufferSize
= outputsize
;
1072 if(!testConvertFromUnicode(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
1073 expstopIBM_949
, sizeof(expstopIBM_949
), "ibm-949",
1074 UCNV_FROM_U_CALLBACK_STOP
, toIBM949Offsstop
, NULL
, 0 ))
1075 log_err("u-> ibm-949 with stop did not match.\n");
1076 if(!testConvertFromUnicode(sampleText2
, sizeof(sampleText2
)/sizeof(sampleText2
[0]),
1077 expstopIBM_943
, sizeof(expstopIBM_943
), "ibm-943",
1078 UCNV_FROM_U_CALLBACK_STOP
, toIBM943Offsstop
, NULL
, 0))
1079 log_err("u-> ibm-943 with stop did not match.\n");
1080 if(!testConvertFromUnicode(sampleText2
, sizeof(sampleText2
)/sizeof(sampleText2
[0]),
1081 expstopIBM_930
, sizeof(expstopIBM_930
), "ibm-930",
1082 UCNV_FROM_U_CALLBACK_STOP
, toIBM930Offsstop
, NULL
, 0 ))
1083 log_err("u-> ibm-930 with stop did not match.\n");
1085 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP \n");
1087 static const UChar inputTest
[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1088 static const uint8_t toIBM943
[]= { 0x61,};
1089 static const int32_t offset
[]= {0,} ;
1092 static const UChar euc_jp_inputText
[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1093 static const uint8_t to_euc_jp
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,};
1094 static const int32_t fromEUC_JPOffs
[] ={ 0, 1, 1, 2, 2, 2,};
1097 static const UChar euc_tw_inputText
[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1098 static const uint8_t to_euc_tw
[]={
1099 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,};
1100 static const int32_t from_euc_twOffs
[] ={ 0, 1, 1, 2, 2, 2, 2,};
1103 static const UChar iso_2022_jp_inputText
[]={0x0041, 0x00E9, 0x0042, };
1104 static const uint8_t to_iso_2022_jp
[]={
1108 static const int32_t from_iso_2022_jpOffs
[] ={0,};
1111 static const UChar iso_2022_cn_inputText
[]={ 0x0041, 0x3712, 0x0042, };
1112 static const uint8_t to_iso_2022_cn
[]={
1116 static const int32_t from_iso_2022_cnOffs
[] ={
1122 static const UChar iso_2022_kr_inputText
[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
1123 static const uint8_t to_iso_2022_kr
[]={
1124 0x1b, 0x24, 0x29, 0x43,
1128 static const int32_t from_iso_2022_krOffs
[] ={
1135 static const UChar hz_inputText
[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1137 static const uint8_t to_hz
[]={
1139 0x7e, 0x7b, 0x26, 0x30,
1142 static const int32_t from_hzOffs
[] ={
1148 static const UChar iscii_inputText
[]={ 0x0041, 0x3712, 0x0042, };
1149 static const uint8_t to_iscii
[]={
1152 static const int32_t from_isciiOffs
[] ={
1156 if(!testConvertFromUnicode(inputTest
, sizeof(inputTest
)/sizeof(inputTest
[0]),
1157 toIBM943
, sizeof(toIBM943
), "ibm-943",
1158 UCNV_FROM_U_CALLBACK_STOP
, offset
, NULL
, 0 ))
1159 log_err("u-> ibm-943 with stop did not match.\n");
1161 if(!testConvertFromUnicode(euc_jp_inputText
, sizeof(euc_jp_inputText
)/sizeof(euc_jp_inputText
[0]),
1162 to_euc_jp
, sizeof(to_euc_jp
), "euc-jp",
1163 UCNV_FROM_U_CALLBACK_STOP
, fromEUC_JPOffs
, NULL
, 0 ))
1164 log_err("u-> euc-jp with stop did not match.\n");
1166 if(!testConvertFromUnicode(euc_tw_inputText
, sizeof(euc_tw_inputText
)/sizeof(euc_tw_inputText
[0]),
1167 to_euc_tw
, sizeof(to_euc_tw
), "euc-tw",
1168 UCNV_FROM_U_CALLBACK_STOP
, from_euc_twOffs
, NULL
, 0 ))
1169 log_err("u-> euc-tw with stop did not match.\n");
1171 if(!testConvertFromUnicode(iso_2022_jp_inputText
, sizeof(iso_2022_jp_inputText
)/sizeof(iso_2022_jp_inputText
[0]),
1172 to_iso_2022_jp
, sizeof(to_iso_2022_jp
), "iso-2022-jp",
1173 UCNV_FROM_U_CALLBACK_STOP
, from_iso_2022_jpOffs
, NULL
, 0 ))
1174 log_err("u-> iso-2022-jp with stop did not match.\n");
1176 if(!testConvertFromUnicode(iso_2022_jp_inputText
, sizeof(iso_2022_jp_inputText
)/sizeof(iso_2022_jp_inputText
[0]),
1177 to_iso_2022_jp
, sizeof(to_iso_2022_jp
), "iso-2022-jp",
1178 UCNV_FROM_U_CALLBACK_STOP
, from_iso_2022_jpOffs
, NULL
, 0 ))
1179 log_err("u-> iso-2022-jp with stop did not match.\n");
1181 if(!testConvertFromUnicode(iso_2022_cn_inputText
, sizeof(iso_2022_cn_inputText
)/sizeof(iso_2022_cn_inputText
[0]),
1182 to_iso_2022_cn
, sizeof(to_iso_2022_cn
), "iso-2022-cn",
1183 UCNV_FROM_U_CALLBACK_STOP
, from_iso_2022_cnOffs
, NULL
, 0 ))
1184 log_err("u-> iso-2022-cn with stop did not match.\n");
1186 if(!testConvertFromUnicode(iso_2022_kr_inputText
, sizeof(iso_2022_kr_inputText
)/sizeof(iso_2022_kr_inputText
[0]),
1187 to_iso_2022_kr
, sizeof(to_iso_2022_kr
), "iso-2022-kr",
1188 UCNV_FROM_U_CALLBACK_STOP
, from_iso_2022_krOffs
, NULL
, 0 ))
1189 log_err("u-> iso-2022-kr with stop did not match.\n");
1191 if(!testConvertFromUnicode(hz_inputText
, sizeof(hz_inputText
)/sizeof(hz_inputText
[0]),
1192 to_hz
, sizeof(to_hz
), "HZ",
1193 UCNV_FROM_U_CALLBACK_STOP
, from_hzOffs
, NULL
, 0 ))
1194 log_err("u-> HZ with stop did not match.\n");\
1196 if(!testConvertFromUnicode(iscii_inputText
, sizeof(iscii_inputText
)/sizeof(iscii_inputText
[0]),
1197 to_iscii
, sizeof(to_iscii
), "ISCII,version=0",
1198 UCNV_FROM_U_CALLBACK_STOP
, from_isciiOffs
, NULL
, 0 ))
1199 log_err("u-> iscii with stop did not match.\n");
1203 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n");
1205 static const UChar SCSU_inputText
[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1207 static const uint8_t to_SCSU
[]={
1211 int32_t from_SCSUOffs
[] ={
1215 if(!testConvertFromUnicode(SCSU_inputText
, sizeof(SCSU_inputText
)/sizeof(SCSU_inputText
[0]),
1216 to_SCSU
, sizeof(to_SCSU
), "SCSU",
1217 UCNV_FROM_U_CALLBACK_STOP
, from_SCSUOffs
, NULL
, 0 ))
1218 log_err("u-> SCSU with skip did not match.\n");
1222 if(!testConvertToUnicode(expstopIBM_949
, sizeof(expstopIBM_949
),
1223 IBM_949stoptoUnicode
, sizeof(IBM_949stoptoUnicode
)/sizeof(IBM_949stoptoUnicode
[0]),"ibm-949",
1224 UCNV_TO_U_CALLBACK_STOP
, fromIBM949Offs
, NULL
, 0 ))
1225 log_err("ibm-949->u with stop did not match.\n");
1226 if(!testConvertToUnicode(expstopIBM_943
, sizeof(expstopIBM_943
),
1227 IBM_943stoptoUnicode
, sizeof(IBM_943stoptoUnicode
)/sizeof(IBM_943stoptoUnicode
[0]),"ibm-943",
1228 UCNV_TO_U_CALLBACK_STOP
, fromIBM943Offs
, NULL
, 0 ))
1229 log_err("ibm-943->u with stop did not match.\n");
1230 if(!testConvertToUnicode(expstopIBM_930
, sizeof(expstopIBM_930
),
1231 IBM_930stoptoUnicode
, sizeof(IBM_930stoptoUnicode
)/sizeof(IBM_930stoptoUnicode
[0]),"ibm-930",
1232 UCNV_TO_U_CALLBACK_STOP
, fromIBM930Offs
, NULL
, 0 ))
1233 log_err("ibm-930->u with stop did not match.\n");
1235 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n");
1238 static const uint8_t sampleTxtEBCIDIC_STATEFUL
[] ={
1239 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1241 static const UChar EBCIDIC_STATEFUL_toUnicode
[] ={ 0x6d63 };
1242 static const int32_t from_EBCIDIC_STATEFULOffsets
[]={ 1};
1246 static const uint8_t sampleTxt_euc_jp
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1247 0x8f, 0xda, 0xa1, /*unassigned*/
1250 static const UChar euc_jptoUnicode
[]={ 0x0061, 0x4edd, 0x5bec};
1251 static const int32_t from_euc_jpOffs
[] ={ 0, 1, 3};
1254 static const uint8_t sampleTxt_euc_tw
[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1255 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1258 UChar euc_twtoUnicode
[]={ 0x0061, 0x2295, 0x5BF2};
1259 int32_t from_euc_twOffs
[] ={ 0, 1, 3};
1263 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL
, sizeof(sampleTxtEBCIDIC_STATEFUL
),
1264 EBCIDIC_STATEFUL_toUnicode
, sizeof(EBCIDIC_STATEFUL_toUnicode
)/sizeof(EBCIDIC_STATEFUL_toUnicode
[0]),"ibm-930",
1265 UCNV_TO_U_CALLBACK_STOP
, from_EBCIDIC_STATEFULOffsets
, NULL
, 0 ))
1266 log_err("EBCIDIC_STATEFUL->u with stop did not match.\n");
1268 if(!testConvertToUnicode(sampleTxt_euc_jp
, sizeof(sampleTxt_euc_jp
),
1269 euc_jptoUnicode
, sizeof(euc_jptoUnicode
)/sizeof(euc_jptoUnicode
[0]),"euc-jp",
1270 UCNV_TO_U_CALLBACK_STOP
, from_euc_jpOffs
, NULL
, 0))
1271 log_err("euc-jp->u with stop did not match.\n");
1273 if(!testConvertToUnicode(sampleTxt_euc_tw
, sizeof(sampleTxt_euc_tw
),
1274 euc_twtoUnicode
, sizeof(euc_twtoUnicode
)/sizeof(euc_twtoUnicode
[0]),"euc-tw",
1275 UCNV_TO_U_CALLBACK_STOP
, from_euc_twOffs
, NULL
, 0 ))
1276 log_err("euc-tw->u with stop did not match.\n");
1278 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n");
1280 static const uint8_t sampleText1
[] = { 0x31, 0xe4, 0xba, 0x8c,
1282 static const UChar expected1
[] = { 0x0031, 0x4e8c,};
1283 static const int32_t offsets1
[] = { 0x0000, 0x0001};
1285 if(!testConvertToUnicode(sampleText1
, sizeof(sampleText1
),
1286 expected1
, sizeof(expected1
)/sizeof(expected1
[0]),"utf8",
1287 UCNV_TO_U_CALLBACK_STOP
, offsets1
, NULL
, 0 ))
1288 log_err("utf8->u with stop did not match.\n");;
1290 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n");
1292 static const uint8_t sampleText1
[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04};
1293 static const UChar expected1
[] = { 0x00ba, 0x008c, 0x00f8, 0x0061};
1294 static const int32_t offsets1
[] = { 0x0000, 0x0001,0x0002,0x0003};
1296 if(!testConvertToUnicode(sampleText1
, sizeof(sampleText1
),
1297 expected1
, sizeof(expected1
)/sizeof(expected1
[0]),"SCSU",
1298 UCNV_TO_U_CALLBACK_STOP
, offsets1
, NULL
, 0 ))
1299 log_err("scsu->u with stop did not match.\n");;
1304 static void TestSub(int32_t inputsize
, int32_t outputsize
)
1306 static const UChar sampleText
[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1307 static const UChar sampleText2
[]= { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1309 static const uint8_t expsubIBM_949
[] =
1310 { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 };
1312 static const uint8_t expsubIBM_943
[] = {
1313 0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 };
1315 static const uint8_t expsubIBM_930
[] = {
1316 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f };
1318 static const UChar IBM_949subtoUnicode
[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 };
1319 static const UChar IBM_943subtoUnicode
[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1320 static const UChar IBM_930subtoUnicode
[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1322 static const int32_t toIBM949Offssub
[] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1323 static const int32_t toIBM943Offssub
[] ={ 0, 0, 1, 1, 2, 2, 3, 3 };
1324 static const int32_t toIBM930Offssub
[] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 };
1326 static const int32_t fromIBM949Offs
[] = { 0, 1, 3, 5, 7 };
1327 static const int32_t fromIBM943Offs
[] = { 0, 2, 4, 6 };
1328 static const int32_t fromIBM930Offs
[] = { 1, 3, 5, 7 };
1330 gInBufferSize
= inputsize
;
1331 gOutBufferSize
= outputsize
;
1334 if(!testConvertFromUnicode(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
1335 expsubIBM_949
, sizeof(expsubIBM_949
), "ibm-949",
1336 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, toIBM949Offssub
, NULL
, 0 ))
1337 log_err("u-> ibm-949 with subst did not match.\n");
1338 if(!testConvertFromUnicode(sampleText2
, sizeof(sampleText2
)/sizeof(sampleText2
[0]),
1339 expsubIBM_943
, sizeof(expsubIBM_943
), "ibm-943",
1340 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, toIBM943Offssub
, NULL
, 0))
1341 log_err("u-> ibm-943 with subst did not match.\n");
1342 if(!testConvertFromUnicode(sampleText2
, sizeof(sampleText2
)/sizeof(sampleText2
[0]),
1343 expsubIBM_930
, sizeof(expsubIBM_930
), "ibm-930",
1344 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, toIBM930Offssub
, NULL
, 0 ))
1345 log_err("u-> ibm-930 with subst did not match.\n");
1347 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1349 static const UChar inputTest
[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1350 static const uint8_t toIBM943
[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 };
1351 static const int32_t offset
[]= {0, 1, 1, 3, 3, 4};
1355 static const UChar euc_jp_inputText
[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1356 static const uint8_t to_euc_jp
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1357 0xf4, 0xfe, 0xf4, 0xfe,
1360 static const int32_t fromEUC_JPOffs
[] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7};
1363 static const UChar euc_tw_inputText
[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1364 static const uint8_t to_euc_tw
[]={
1365 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1366 0xfd, 0xfe, 0xfd, 0xfe,
1367 0x61, 0xe6, 0xca, 0x8a,
1370 static const int32_t from_euc_twOffs
[] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,};
1372 if(!testConvertFromUnicode(inputTest
, sizeof(inputTest
)/sizeof(inputTest
[0]),
1373 toIBM943
, sizeof(toIBM943
), "ibm-943",
1374 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, offset
, NULL
, 0 ))
1375 log_err("u-> ibm-943 with substitute did not match.\n");
1377 if(!testConvertFromUnicode(euc_jp_inputText
, sizeof(euc_jp_inputText
)/sizeof(euc_jp_inputText
[0]),
1378 to_euc_jp
, sizeof(to_euc_jp
), "euc-jp",
1379 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, fromEUC_JPOffs
, NULL
, 0 ))
1380 log_err("u-> euc-jp with substitute did not match.\n");
1382 if(!testConvertFromUnicode(euc_tw_inputText
, sizeof(euc_tw_inputText
)/sizeof(euc_tw_inputText
[0]),
1383 to_euc_tw
, sizeof(to_euc_tw
), "euc-tw",
1384 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, from_euc_twOffs
, NULL
, 0 ))
1385 log_err("u-> euc-tw with substitute did not match.\n");
1388 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1390 UChar SCSU_inputText
[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1392 const uint8_t to_SCSU
[]={
1399 int32_t from_SCSUOffs
[] ={
1405 const uint8_t to_SCSU_1
[]={
1409 int32_t from_SCSUOffs_1
[] ={
1413 if(!testConvertFromUnicode(SCSU_inputText
, sizeof(SCSU_inputText
)/sizeof(SCSU_inputText
[0]),
1414 to_SCSU
, sizeof(to_SCSU
), "SCSU",
1415 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, from_SCSUOffs
, NULL
, 0 ))
1416 log_err("u-> SCSU with substitute did not match.\n");
1418 if(!testConvertFromUnicodeWithContext(SCSU_inputText
, sizeof(SCSU_inputText
)/sizeof(SCSU_inputText
[0]),
1419 to_SCSU_1
, sizeof(to_SCSU_1
), "SCSU",
1420 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, from_SCSUOffs_1
, NULL
, 0,"i",U_ILLEGAL_CHAR_FOUND
))
1421 log_err("u-> SCSU with substitute did not match.\n");
1424 log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1426 static const UChar testinput
[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,};
1427 static const uint8_t expectedUTF8
[]= { 0xe2, 0x82, 0xac,
1428 0xf0, 0x90, 0x90, 0x81,
1429 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd,
1430 0xef, 0xbf, 0xbf, 0x61,
1433 static const int32_t offsets
[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 };
1434 if(!testConvertFromUnicode(testinput
, sizeof(testinput
)/sizeof(testinput
[0]),
1435 expectedUTF8
, sizeof(expectedUTF8
), "utf8",
1436 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, offsets
, NULL
, 0 )) {
1437 log_err("u-> utf8 with stop did not match.\n");
1441 log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1443 static const UChar in
[]={ 0x0041, 0xfeff };
1445 static const uint8_t out
[]={
1456 static const int32_t offsets
[]={
1460 if(!testConvertFromUnicode(in
, ARRAY_LENGTH(in
),
1461 out
, sizeof(out
), "UTF-16",
1462 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, offsets
, NULL
, 0)
1464 log_err("u->UTF-16 with substitute did not match.\n");
1468 log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1470 static const UChar in
[]={ 0x0041, 0xfeff };
1472 static const uint8_t out
[]={
1474 0x00, 0x00, 0xfe, 0xff,
1475 0x00, 0x00, 0x00, 0x41,
1476 0x00, 0x00, 0xfe, 0xff
1478 0xff, 0xfe, 0x00, 0x00,
1479 0x41, 0x00, 0x00, 0x00,
1480 0xff, 0xfe, 0x00, 0x00
1483 static const int32_t offsets
[]={
1484 -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1
1487 if(!testConvertFromUnicode(in
, ARRAY_LENGTH(in
),
1488 out
, sizeof(out
), "UTF-32",
1489 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, offsets
, NULL
, 0)
1491 log_err("u->UTF-32 with substitute did not match.\n");
1496 if(!testConvertToUnicode(expsubIBM_949
, sizeof(expsubIBM_949
),
1497 IBM_949subtoUnicode
, sizeof(IBM_949subtoUnicode
)/sizeof(IBM_949subtoUnicode
[0]),"ibm-949",
1498 UCNV_TO_U_CALLBACK_SUBSTITUTE
, fromIBM949Offs
, NULL
, 0 ))
1499 log_err("ibm-949->u with substitute did not match.\n");
1500 if(!testConvertToUnicode(expsubIBM_943
, sizeof(expsubIBM_943
),
1501 IBM_943subtoUnicode
, sizeof(IBM_943subtoUnicode
)/sizeof(IBM_943subtoUnicode
[0]),"ibm-943",
1502 UCNV_TO_U_CALLBACK_SUBSTITUTE
, fromIBM943Offs
, NULL
, 0 ))
1503 log_err("ibm-943->u with substitute did not match.\n");
1504 if(!testConvertToUnicode(expsubIBM_930
, sizeof(expsubIBM_930
),
1505 IBM_930subtoUnicode
, sizeof(IBM_930subtoUnicode
)/sizeof(IBM_930subtoUnicode
[0]),"ibm-930",
1506 UCNV_TO_U_CALLBACK_SUBSTITUTE
, fromIBM930Offs
, NULL
, 0 ))
1507 log_err("ibm-930->u with substitute did not match.\n");
1509 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1512 const uint8_t sampleTxtEBCIDIC_STATEFUL
[] ={
1513 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1515 UChar EBCIDIC_STATEFUL_toUnicode
[] ={ 0x6d63, 0xfffd, 0x03b4
1517 int32_t from_EBCIDIC_STATEFULOffsets
[]={ 1, 3, 5};
1521 const uint8_t sampleTxt_euc_jp
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1522 0x8f, 0xda, 0xa1, /*unassigned*/
1525 UChar euc_jptoUnicode
[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a };
1526 int32_t from_euc_jpOffs
[] ={ 0, 1, 3, 6, 9, 11 };
1529 const uint8_t sampleTxt_euc_tw
[]={
1530 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1531 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1534 UChar euc_twtoUnicode
[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, };
1535 int32_t from_euc_twOffs
[] ={ 0, 1, 3, 7, 11, 13};
1538 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL
, sizeof(sampleTxtEBCIDIC_STATEFUL
),
1539 EBCIDIC_STATEFUL_toUnicode
, sizeof(EBCIDIC_STATEFUL_toUnicode
)/sizeof(EBCIDIC_STATEFUL_toUnicode
[0]),"ibm-930",
1540 UCNV_TO_U_CALLBACK_SUBSTITUTE
, from_EBCIDIC_STATEFULOffsets
, NULL
, 0 ))
1541 log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n");
1544 if(!testConvertToUnicode(sampleTxt_euc_jp
, sizeof(sampleTxt_euc_jp
),
1545 euc_jptoUnicode
, sizeof(euc_jptoUnicode
)/sizeof(euc_jptoUnicode
[0]),"euc-jp",
1546 UCNV_TO_U_CALLBACK_SUBSTITUTE
, from_euc_jpOffs
, NULL
, 0 ))
1547 log_err("euc-jp->u with substitute did not match.\n");
1550 if(!testConvertToUnicode(sampleTxt_euc_tw
, sizeof(sampleTxt_euc_tw
),
1551 euc_twtoUnicode
, sizeof(euc_twtoUnicode
)/sizeof(euc_twtoUnicode
[0]),"euc-tw",
1552 UCNV_TO_U_CALLBACK_SUBSTITUTE
, from_euc_twOffs
, NULL
, 0 ))
1553 log_err("euc-tw->u with substitute did not match.\n");
1556 if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp
, sizeof(sampleTxt_euc_jp
),
1557 euc_jptoUnicode
, sizeof(euc_jptoUnicode
)/sizeof(euc_jptoUnicode
[0]),"euc-jp",
1558 UCNV_TO_U_CALLBACK_SUBSTITUTE
, from_euc_jpOffs
, NULL
, 0 ,"i", U_ILLEGAL_CHAR_FOUND
))
1559 log_err("euc-jp->u with substitute did not match.\n");
1564 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1566 const uint8_t sampleText1
[] = { 0x31, 0xe4, 0xba, 0x8c,
1568 UChar expected1
[] = { 0x0031, 0x4e8c, 0xfffd, 0x0061};
1569 int32_t offsets1
[] = { 0x0000, 0x0001, 0x0004, 0x0006};
1571 if(!testConvertToUnicode(sampleText1
, sizeof(sampleText1
),
1572 expected1
, sizeof(expected1
)/sizeof(expected1
[0]),"utf8",
1573 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets1
, NULL
, 0 ))
1574 log_err("utf8->u with substitute did not match.\n");;
1576 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1578 const uint8_t sampleText1
[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
1579 UChar expected1
[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffd,0xfffd};
1580 int32_t offsets1
[] = { 0x0000, 0x0001,0x0002,0x0003,4,5};
1582 if(!testConvertToUnicode(sampleText1
, sizeof(sampleText1
),
1583 expected1
, sizeof(expected1
)/sizeof(expected1
[0]),"SCSU",
1584 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets1
, NULL
, 0 ))
1585 log_err("scsu->u with stop did not match.\n");;
1588 log_verbose("Testing ibm-930 subchar/subchar1\n");
1590 static const UChar u1
[]={ 0x6d63, 0x6d64, 0x6d65, 0x6d66, 0xdf };
1591 static const uint8_t s1
[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f };
1592 static const int32_t offsets1
[]={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1594 static const UChar u2
[]={ 0x6d63, 0x6d64, 0xfffd, 0x6d66, 0x1a };
1595 static const uint8_t s2
[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 };
1596 static const int32_t offsets2
[]={ 1, 3, 5, 7, 10 };
1598 if(!testConvertFromUnicode(u1
, ARRAY_LENGTH(u1
), s1
, ARRAY_LENGTH(s1
), "ibm-930",
1599 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, offsets1
, NULL
, 0)
1601 log_err("u->ibm-930 subchar/subchar1 did not match.\n");
1604 if(!testConvertToUnicode(s2
, ARRAY_LENGTH(s2
), u2
, ARRAY_LENGTH(u2
), "ibm-930",
1605 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets2
, NULL
, 0)
1607 log_err("ibm-930->u subchar/subchar1 did not match.\n");
1611 log_verbose("Testing GB 18030 with substitute callbacks\n");
1613 static const UChar u2
[]={
1614 0x24, 0x7f, 0x80, 0x1f9, 0x20ac, 0x4e00, 0x9fa6, 0xffff, 0xd800, 0xdc00, 0xfffd, 0xdbff, 0xdfff };
1615 static const uint8_t gb2
[]={
1616 0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 };
1617 static const int32_t offsets2
[]={
1618 0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 };
1620 if(!testConvertToUnicode(gb2
, ARRAY_LENGTH(gb2
), u2
, ARRAY_LENGTH(u2
), "gb18030",
1621 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets2
, NULL
, 0)
1623 log_err("gb18030->u with substitute did not match.\n");
1627 log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n");
1629 static const uint8_t utf7
[]={
1630 /* a~ a+AB~ a+AB\x0c a+AB- a+AB. a+. */
1631 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x2e
1633 static const UChar unicode
[]={
1634 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd
1636 static const int32_t offsets
[]={
1637 0, 1, 2, 4, 7, 9, 12, 14, 17, 19, 22, 23
1640 if(!testConvertToUnicode(utf7
, ARRAY_LENGTH(utf7
), unicode
, ARRAY_LENGTH(unicode
), "UTF-7",
1641 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets
, NULL
, 0)
1643 log_err("UTF-7->u with substitute did not match.\n");
1647 log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n");
1649 static const uint8_t
1650 in1
[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff },
1651 in2
[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff },
1652 in3
[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff };
1655 out1
[]={ 0x4e00, 0xfeff },
1656 out2
[]={ 0x004e, 0xfffe },
1657 out3
[]={ 0xfefd, 0x4e00, 0xfeff };
1659 static const int32_t
1660 offsets1
[]={ 2, 4 },
1661 offsets2
[]={ 2, 4 },
1662 offsets3
[]={ 0, 2, 4 };
1664 if(!testConvertToUnicode(in1
, ARRAY_LENGTH(in1
), out1
, ARRAY_LENGTH(out1
), "UTF-16",
1665 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets1
, NULL
, 0)
1667 log_err("UTF-16 (BE BOM)->u with substitute did not match.\n");
1670 if(!testConvertToUnicode(in2
, ARRAY_LENGTH(in2
), out2
, ARRAY_LENGTH(out2
), "UTF-16",
1671 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets2
, NULL
, 0)
1673 log_err("UTF-16 (LE BOM)->u with substitute did not match.\n");
1676 if(!testConvertToUnicode(in3
, ARRAY_LENGTH(in3
), out3
, ARRAY_LENGTH(out3
), "UTF-16",
1677 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets3
, NULL
, 0)
1679 log_err("UTF-16 (no BOM)->u with substitute did not match.\n");
1683 log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n");
1685 static const uint8_t
1686 in1
[]={ 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff },
1687 in2
[]={ 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 },
1688 in3
[]={ 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 },
1689 in4
[]={ 0x00, 0x01, 0x02, 0x03, 0x00, 0x11, 0x12, 0x00, 0x00, 0x00, 0x4e, 0x00 };
1692 out1
[]={ UTF16_LEAD(0x100f00), UTF16_TRAIL(0x100f00), 0xfeff },
1693 out2
[]={ UTF16_LEAD(0x0f1000), UTF16_TRAIL(0x0f1000), 0xfffe },
1694 out3
[]={ 0xfefe, UTF16_LEAD(0x100f00), UTF16_TRAIL(0x100f00), 0xfffd, 0xfffd },
1695 out4
[]={ UTF16_LEAD(0x10203), UTF16_TRAIL(0x10203), 0xfffd, 0x4e00 };
1697 static const int32_t
1698 offsets1
[]={ 4, 4, 8 },
1699 offsets2
[]={ 4, 4, 8 },
1700 offsets3
[]={ 0, 4, 4, 8, 12 },
1701 offsets4
[]={ 0, 0, 4, 8 };
1703 if(!testConvertToUnicode(in1
, ARRAY_LENGTH(in1
), out1
, ARRAY_LENGTH(out1
), "UTF-32",
1704 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets1
, NULL
, 0)
1706 log_err("UTF-32 (BE BOM)->u with substitute did not match.\n");
1709 if(!testConvertToUnicode(in2
, ARRAY_LENGTH(in2
), out2
, ARRAY_LENGTH(out2
), "UTF-32",
1710 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets2
, NULL
, 0)
1712 log_err("UTF-32 (LE BOM)->u with substitute did not match.\n");
1715 if(!testConvertToUnicode(in3
, ARRAY_LENGTH(in3
), out3
, ARRAY_LENGTH(out3
), "UTF-32",
1716 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets3
, NULL
, 0)
1718 log_err("UTF-32 (no BOM)->u with substitute did not match.\n");
1721 if(!testConvertToUnicode(in4
, ARRAY_LENGTH(in4
), out4
, ARRAY_LENGTH(out4
), "UTF-32",
1722 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets4
, NULL
, 0)
1724 log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n");
1729 static void TestSubWithValue(int32_t inputsize
, int32_t outputsize
)
1731 UChar sampleText
[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1732 UChar sampleText2
[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1734 const uint8_t expsubwvalIBM_949
[]= {
1735 0x00, 0xb0, 0xa1, 0xb0, 0xa2,
1736 0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 };
1738 const uint8_t expsubwvalIBM_943
[]= {
1739 0x9f, 0xaf, 0x9f, 0xb1,
1740 0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 };
1742 const uint8_t expsubwvalIBM_930
[] = {
1743 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f };
1745 int32_t toIBM949Offs
[] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 };
1746 int32_t toIBM943Offs
[] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 };
1747 int32_t toIBM930Offs
[] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */
1749 gInBufferSize
= inputsize
;
1750 gOutBufferSize
= outputsize
;
1753 if(!testConvertFromUnicode(sampleText
, sizeof(sampleText
)/sizeof(sampleText
[0]),
1754 expsubwvalIBM_949
, sizeof(expsubwvalIBM_949
), "ibm-949",
1755 UCNV_FROM_U_CALLBACK_ESCAPE
, toIBM949Offs
, NULL
, 0 ))
1756 log_err("u-> ibm-949 with subst with value did not match.\n");
1758 if(!testConvertFromUnicode(sampleText2
, sizeof(sampleText2
)/sizeof(sampleText2
[0]),
1759 expsubwvalIBM_943
, sizeof(expsubwvalIBM_943
), "ibm-943",
1760 UCNV_FROM_U_CALLBACK_ESCAPE
, toIBM943Offs
, NULL
, 0 ))
1761 log_err("u-> ibm-943 with sub with value did not match.\n");
1763 if(!testConvertFromUnicode(sampleText2
, sizeof(sampleText2
)/sizeof(sampleText2
[0]),
1764 expsubwvalIBM_930
, sizeof(expsubwvalIBM_930
), "ibm-930",
1765 UCNV_FROM_U_CALLBACK_ESCAPE
, toIBM930Offs
, NULL
, 0 ))
1766 log_err("u-> ibm-930 with subst with value did not match.\n");
1769 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n");
1771 static const UChar inputTest
[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1772 static const uint8_t toIBM943
[]= { 0x61,
1773 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1774 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1775 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1777 static const int32_t offset
[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
1781 static const UChar euc_jp_inputText
[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, };
1782 static const uint8_t to_euc_jp
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1783 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1784 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1785 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1788 static const int32_t fromEUC_JPOffs
[] ={ 0, 1, 1, 2, 2, 2,
1796 static const UChar euc_tw_inputText
[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1797 static const uint8_t to_euc_tw
[]={
1798 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1799 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1800 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1801 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1802 0x61, 0xe6, 0xca, 0x8a,
1804 static const int32_t from_euc_twOffs
[] ={ 0, 1, 1, 2, 2, 2, 2,
1805 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5,
1809 static const UChar iso_2022_jp_inputText1
[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ;
1810 static const uint8_t to_iso_2022_jp1
[]={
1811 0x1b, 0x24, 0x42, 0x21, 0x21,
1812 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
1813 0x1b, 0x24, 0x42, 0x21, 0x22,
1814 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
1818 static const int32_t from_iso_2022_jpOffs1
[] ={
1826 static const UChar iso_2022_jp_inputText2
[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ;
1827 static const uint8_t to_iso_2022_jp2
[]={
1828 0x1b, 0x24, 0x42, 0x21, 0x21,
1829 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1830 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1831 0x1b, 0x24, 0x42, 0x21, 0x22,
1832 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1833 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1836 static const int32_t from_iso_2022_jpOffs2
[] ={
1847 static const UChar iso_2022_cn_inputText
[]={ 0x0041, 0x3712, 0x0042, };
1848 static const uint8_t to_iso_2022_cn
[]={
1850 0x25, 0x55, 0x33, 0x37, 0x31, 0x32,
1853 static const int32_t from_iso_2022_cnOffs
[] ={
1859 static const UChar iso_2022_cn_inputText4
[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042};
1861 static const uint8_t to_iso_2022_cn4
[]={
1862 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
1863 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1864 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1866 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1867 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1870 static const int32_t from_iso_2022_cnOffs4
[] ={
1882 static const UChar iso_2022_kr_inputText2
[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
1883 static const uint8_t to_iso_2022_kr2
[]={
1884 0x1b, 0x24, 0x29, 0x43,
1887 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1888 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1891 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1892 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1895 static const int32_t from_iso_2022_krOffs2
[] ={
1908 static const UChar iso_2022_kr_inputText
[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 };
1909 static const uint8_t to_iso_2022_kr
[]={
1910 0x1b, 0x24, 0x29, 0x43,
1913 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1916 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1921 static const int32_t from_iso_2022_krOffs
[] ={
1932 static const UChar hz_inputText
[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1934 static const uint8_t to_hz
[]={
1936 0x7e, 0x7b, 0x26, 0x30,
1937 0x7e, 0x7d, 0x25, 0x55, 0x30, 0x36, 0x36, 0x32, /*unassigned*/
1938 0x7e, 0x7b, 0x26, 0x30,
1942 static const int32_t from_hzOffs
[] ={
1950 static const UChar hz_inputText2
[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
1951 static const uint8_t to_hz2
[]={
1953 0x7e, 0x7b, 0x26, 0x30,
1954 0x7e, 0x7d, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1955 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1956 0x7e, 0x7b, 0x26, 0x30,
1958 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1959 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1962 static const int32_t from_hzOffs2
[] ={
1975 static const UChar iscii_inputText
[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 };
1976 static const uint8_t to_iscii
[]={
1979 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1982 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1987 static const int32_t from_isciiOffs
[] ={
1997 if(!testConvertFromUnicode(inputTest
, sizeof(inputTest
)/sizeof(inputTest
[0]),
1998 toIBM943
, sizeof(toIBM943
), "ibm-943",
1999 UCNV_FROM_U_CALLBACK_ESCAPE
, offset
, NULL
, 0 ))
2000 log_err("u-> ibm-943 with subst with value did not match.\n");
2002 if(!testConvertFromUnicode(euc_jp_inputText
, sizeof(euc_jp_inputText
)/sizeof(euc_jp_inputText
[0]),
2003 to_euc_jp
, sizeof(to_euc_jp
), "euc-jp",
2004 UCNV_FROM_U_CALLBACK_ESCAPE
, fromEUC_JPOffs
, NULL
, 0 ))
2005 log_err("u-> euc-jp with subst with value did not match.\n");
2007 if(!testConvertFromUnicode(euc_tw_inputText
, sizeof(euc_tw_inputText
)/sizeof(euc_tw_inputText
[0]),
2008 to_euc_tw
, sizeof(to_euc_tw
), "euc-tw",
2009 UCNV_FROM_U_CALLBACK_ESCAPE
, from_euc_twOffs
, NULL
, 0 ))
2010 log_err("u-> euc-tw with subst with value did not match.\n");
2012 if(!testConvertFromUnicode(iso_2022_jp_inputText1
, sizeof(iso_2022_jp_inputText1
)/sizeof(iso_2022_jp_inputText1
[0]),
2013 to_iso_2022_jp1
, sizeof(to_iso_2022_jp1
), "iso-2022-jp",
2014 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffs1
, NULL
, 0 ))
2015 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2017 if(!testConvertFromUnicode(iso_2022_jp_inputText1
, sizeof(iso_2022_jp_inputText1
)/sizeof(iso_2022_jp_inputText1
[0]),
2018 to_iso_2022_jp1
, sizeof(to_iso_2022_jp1
), "iso-2022-jp",
2019 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffs1
, NULL
, 0 ))
2020 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2022 if(!testConvertFromUnicode(iso_2022_jp_inputText2
, sizeof(iso_2022_jp_inputText2
)/sizeof(iso_2022_jp_inputText2
[0]),
2023 to_iso_2022_jp2
, sizeof(to_iso_2022_jp2
), "iso-2022-jp",
2024 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffs2
, NULL
, 0 ))
2025 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2029 static const UChar iso_2022_jp_inputText3
[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ;
2030 static const uint8_t to_iso_2022_jp3_v2
[]={
2031 0x1b, 0x24, 0x42, 0x21, 0x21,
2032 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b,
2034 0x1b, 0x24, 0x42, 0x21, 0x22,
2035 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b,
2038 0x26, 0x23, 0x33, 0x36, 0x38, 0x39, 0x32, 0x3b,
2041 static const int32_t from_iso_2022_jpOffs3_v2
[] ={
2043 1,1,1,1,1,1,1,1,1,1,1,1,
2046 4,4,4,4,4,4,4,4,4,4,4,4,
2052 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3
, sizeof(iso_2022_jp_inputText3
)/sizeof(iso_2022_jp_inputText3
[0]),
2053 to_iso_2022_jp3_v2
, sizeof(to_iso_2022_jp3_v2
), "iso-2022-jp",
2054 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffs3_v2
, NULL
, 0,UCNV_ESCAPE_XML_DEC
,U_ZERO_ERROR
))
2055 log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n");
2058 static const UChar iso_2022_cn_inputText5
[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2059 static const uint8_t to_iso_2022_cn5_v2
[]={
2060 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2061 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44,
2062 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36,
2064 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44,
2065 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36,
2067 0x5c, 0x75, 0x30, 0x39, 0x30, 0x32,
2069 static const int32_t from_iso_2022_cnOffs5_v2
[] ={
2079 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5
, sizeof(iso_2022_cn_inputText5
)/sizeof(iso_2022_cn_inputText5
[0]),
2080 to_iso_2022_cn5_v2
, sizeof(to_iso_2022_cn5_v2
), "iso-2022-cn",
2081 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs5_v2
, NULL
, 0,UCNV_ESCAPE_JAVA
,U_ZERO_ERROR
))
2082 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n");
2086 static const UChar iso_2022_cn_inputText6
[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2087 static const uint8_t to_iso_2022_cn6_v2
[]={
2088 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2089 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d,
2091 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d,
2093 0x7b, 0x55, 0x2b, 0x30, 0x39, 0x30, 0x32, 0x7d
2095 static const int32_t from_iso_2022_cnOffs6_v2
[] ={
2096 0, 0, 0, 0, 0, 0, 0,
2097 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2099 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2101 7, 7, 7, 7, 7, 7, 7, 7,
2103 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6
, sizeof(iso_2022_cn_inputText6
)/sizeof(iso_2022_cn_inputText6
[0]),
2104 to_iso_2022_cn6_v2
, sizeof(to_iso_2022_cn6_v2
), "iso-2022-cn",
2105 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs6_v2
, NULL
, 0,UCNV_ESCAPE_UNICODE
,U_ZERO_ERROR
))
2106 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n");
2110 static const UChar iso_2022_cn_inputText7
[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2111 static const uint8_t to_iso_2022_cn7_v2
[]={
2112 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2113 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2115 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2116 0x42, 0x25, 0x55, 0x30, 0x39, 0x30, 0x32,
2118 static const int32_t from_iso_2022_cnOffs7_v2
[] ={
2119 0, 0, 0, 0, 0, 0, 0,
2120 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2122 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2126 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7
, sizeof(iso_2022_cn_inputText7
)/sizeof(iso_2022_cn_inputText7
[0]),
2127 to_iso_2022_cn7_v2
, sizeof(to_iso_2022_cn7_v2
), "iso-2022-cn",
2128 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs7_v2
, NULL
, 0,"K" ,U_ZERO_ERROR
))
2129 log_err("u-> iso-2022-cn with sub & K did not match.\n");
2133 static const uint8_t to_iso_2022_cn4_v3
[]={
2134 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2135 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36,
2137 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36,
2142 static const int32_t from_iso_2022_cnOffs4_v3
[] ={
2144 1,1,1,1,1,1,1,1,1,1,1,
2147 4,4,4,4,4,4,4,4,4,4,4,
2152 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4
, sizeof(iso_2022_cn_inputText4
)/sizeof(iso_2022_cn_inputText4
[0]),
2153 to_iso_2022_cn4_v3
, sizeof(to_iso_2022_cn4_v3
), "iso-2022-cn",
2154 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs4_v3
, NULL
, 0,UCNV_ESCAPE_C
,U_ZERO_ERROR
))
2156 log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n");
2159 if(!testConvertFromUnicode(iso_2022_cn_inputText
, sizeof(iso_2022_cn_inputText
)/sizeof(iso_2022_cn_inputText
[0]),
2160 to_iso_2022_cn
, sizeof(to_iso_2022_cn
), "iso-2022-cn",
2161 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs
, NULL
, 0 ))
2162 log_err("u-> iso_2022_cn with subst with value did not match.\n");
2164 if(!testConvertFromUnicode(iso_2022_cn_inputText4
, sizeof(iso_2022_cn_inputText4
)/sizeof(iso_2022_cn_inputText4
[0]),
2165 to_iso_2022_cn4
, sizeof(to_iso_2022_cn4
), "iso-2022-cn",
2166 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs4
, NULL
, 0 ))
2167 log_err("u-> iso_2022_cn with subst with value did not match.\n");
2168 if(!testConvertFromUnicode(iso_2022_kr_inputText
, sizeof(iso_2022_kr_inputText
)/sizeof(iso_2022_kr_inputText
[0]),
2169 to_iso_2022_kr
, sizeof(to_iso_2022_kr
), "iso-2022-kr",
2170 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_krOffs
, NULL
, 0 ))
2171 log_err("u-> iso_2022_kr with subst with value did not match.\n");
2172 if(!testConvertFromUnicode(iso_2022_kr_inputText2
, sizeof(iso_2022_kr_inputText2
)/sizeof(iso_2022_kr_inputText2
[0]),
2173 to_iso_2022_kr2
, sizeof(to_iso_2022_kr2
), "iso-2022-kr",
2174 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_krOffs2
, NULL
, 0 ))
2175 log_err("u-> iso_2022_kr2 with subst with value did not match.\n");
2176 if(!testConvertFromUnicode(hz_inputText
, sizeof(hz_inputText
)/sizeof(hz_inputText
[0]),
2177 to_hz
, sizeof(to_hz
), "HZ",
2178 UCNV_FROM_U_CALLBACK_ESCAPE
, from_hzOffs
, NULL
, 0 ))
2179 log_err("u-> hz with subst with value did not match.\n");
2180 if(!testConvertFromUnicode(hz_inputText2
, sizeof(hz_inputText2
)/sizeof(hz_inputText2
[0]),
2181 to_hz2
, sizeof(to_hz2
), "HZ",
2182 UCNV_FROM_U_CALLBACK_ESCAPE
, from_hzOffs2
, NULL
, 0 ))
2183 log_err("u-> hz with subst with value did not match.\n");
2185 if(!testConvertFromUnicode(iscii_inputText
, sizeof(iscii_inputText
)/sizeof(iscii_inputText
[0]),
2186 to_iscii
, sizeof(to_iscii
), "ISCII,version=0",
2187 UCNV_FROM_U_CALLBACK_ESCAPE
, from_isciiOffs
, NULL
, 0 ))
2188 log_err("u-> iscii with subst with value did not match.\n");
2192 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
2195 static const uint8_t sampleTxtToU
[]= { 0x00, 0x9f, 0xaf,
2196 0x81, 0xad, /*unassigned*/
2198 static const UChar IBM_943toUnicode
[] = { 0x0000, 0x6D63,
2199 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
2201 static const int32_t fromIBM943Offs
[] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
2204 static const uint8_t sampleTxt_EUC_JP
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
2205 0x8f, 0xda, 0xa1, /*unassigned*/
2208 static const UChar EUC_JPtoUnicode
[]={ 0x0061, 0x4edd, 0x5bec,
2209 0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31,
2211 static const int32_t fromEUC_JPOffs
[] ={ 0, 1, 3,
2212 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
2217 static const uint8_t sampleTxt_euc_tw
[]={
2218 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
2219 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
2222 static const UChar euc_twtoUnicode
[]={ 0x0061, 0x2295, 0x5BF2,
2223 0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43,
2225 static const int32_t from_euc_twOffs
[] ={ 0, 1, 3,
2226 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2230 static const uint8_t sampleTxt_iso_2022_jp
[]={
2231 0x1b, 0x28, 0x42, 0x41,
2232 0x1b, 0x24, 0x42, 0x2A, 0x44, /*unassigned*/
2233 0x1b, 0x28, 0x42, 0x42,
2236 static const UChar iso_2022_jptoUnicode
[]={ 0x41,0x25,0x58,0x32,0x41,0x25,0x58,0x34,0x34, 0x42 };
2237 static const int32_t from_iso_2022_jpOffs
[] ={ 3, 7, 7, 7, 7, 7, 7, 7, 7, 12 };
2240 static const uint8_t sampleTxt_iso_2022_cn
[]={
2242 0x1B, 0x24, 0x29, 0x47,
2243 0x0E, 0x40, 0x6c, /*unassigned*/
2247 static const UChar iso_2022_cntoUnicode
[]={ 0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 };
2248 static const int32_t from_iso_2022_cnOffs
[] ={ 1, 2, 8, 8, 8, 8, 8, 8, 8, 8, 11 };
2251 static const uint8_t sampleTxt_iso_2022_kr
[]={
2252 0x1b, 0x24, 0x29, 0x43,
2260 static const UChar iso_2022_krtoUnicode
[]={ 0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43};
2261 static const int32_t from_iso_2022_krOffs
[] ={ 4, 6, 6, 6, 6, 6, 6, 6, 6, 9, 12, 13 , 14 };
2264 static const uint8_t sampleTxt_hz
[]={
2266 0x7e, 0x7b, 0x26, 0x30,
2267 0x7f, 0x1E, /*unassigned*/
2270 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/
2273 static const UChar hztoUnicode
[]={
2276 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2279 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2282 static const int32_t from_hzOffs
[] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18, };
2286 static const uint8_t sampleTxt_iscii
[]={
2289 0xEB, /*unassigned*/
2292 0xEC, /*unassigned*/
2295 static const UChar isciitoUnicode
[]={
2298 0x25, 0x58, 0x45, 0x42,
2301 0x25, 0x58, 0x45, 0x43,
2304 static const int32_t from_isciiOffs
[] ={0,1,2,2,2,2,3,4,5,5,5,5,6 };
2308 static const uint8_t sampleTxtUTF8
[]={
2310 0xC2, 0x7E, /* truncated char */
2312 0xE0, 0xB5, 0x7E, /* truncated char */
2315 static const UChar UTF8ToUnicode
[]={
2316 0x0020, 0x0064, 0x0050,
2317 0x0025, 0x0058, 0x0043, 0x0032, 0x007E, /* \xC2~ */
2319 0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E,
2322 static const int32_t fromUTF8
[] = {
2326 6, 6, 6, 6, 6, 6, 6, 6, 8,
2329 static const UChar UTF8ToUnicodeXML_DEC
[]={
2330 0x0020, 0x0064, 0x0050,
2331 0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E, /* Â~ */
2333 0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E,
2336 static const int32_t fromUTF8XML_DEC
[] = {
2338 3, 3, 3, 3, 3, 3, 4,
2340 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8,
2345 if(!testConvertToUnicode(sampleTxtToU
, sizeof(sampleTxtToU
),
2346 IBM_943toUnicode
, sizeof(IBM_943toUnicode
)/sizeof(IBM_943toUnicode
[0]),"ibm-943",
2347 UCNV_TO_U_CALLBACK_ESCAPE
, fromIBM943Offs
, NULL
, 0 ))
2348 log_err("ibm-943->u with substitute with value did not match.\n");
2350 if(!testConvertToUnicode(sampleTxt_EUC_JP
, sizeof(sampleTxt_EUC_JP
),
2351 EUC_JPtoUnicode
, sizeof(EUC_JPtoUnicode
)/sizeof(EUC_JPtoUnicode
[0]),"euc-jp",
2352 UCNV_TO_U_CALLBACK_ESCAPE
, fromEUC_JPOffs
, NULL
, 0))
2353 log_err("euc-jp->u with substitute with value did not match.\n");
2355 if(!testConvertToUnicode(sampleTxt_euc_tw
, sizeof(sampleTxt_euc_tw
),
2356 euc_twtoUnicode
, sizeof(euc_twtoUnicode
)/sizeof(euc_twtoUnicode
[0]),"euc-tw",
2357 UCNV_TO_U_CALLBACK_ESCAPE
, from_euc_twOffs
, NULL
, 0))
2358 log_err("euc-tw->u with substitute with value did not match.\n");
2360 if(!testConvertToUnicode(sampleTxt_iso_2022_jp
, sizeof(sampleTxt_iso_2022_jp
),
2361 iso_2022_jptoUnicode
, sizeof(iso_2022_jptoUnicode
)/sizeof(iso_2022_jptoUnicode
[0]),"iso-2022-jp",
2362 UCNV_TO_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffs
, NULL
, 0))
2363 log_err("iso-2022-jp->u with substitute with value did not match.\n");
2365 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp
, sizeof(sampleTxt_iso_2022_jp
),
2366 iso_2022_jptoUnicode
, sizeof(iso_2022_jptoUnicode
)/sizeof(iso_2022_jptoUnicode
[0]),"iso-2022-jp",
2367 UCNV_TO_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffs
, NULL
, 0,"K",U_ZERO_ERROR
))
2368 log_err("iso-2022-jp->u with substitute with value did not match.\n");
2370 {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */
2372 static const UChar iso_2022_jptoUnicodeDec
[]={
2374 0x0026, 0x0023, 0x0034, 0x0032, 0x003b,
2375 0x0026, 0x0023, 0x0036, 0x0038, 0x003b,
2377 static const int32_t from_iso_2022_jpOffsDec
[] ={ 3,7,7,7,7,7,7,7,7,7,7,12, };
2378 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp
, sizeof(sampleTxt_iso_2022_jp
),
2379 iso_2022_jptoUnicodeDec
, sizeof(iso_2022_jptoUnicodeDec
)/sizeof(iso_2022_jptoUnicode
[0]),"iso-2022-jp",
2380 UCNV_TO_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffsDec
, NULL
, 0,UCNV_ESCAPE_XML_DEC
,U_ZERO_ERROR
))
2381 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n");
2384 static const UChar iso_2022_jptoUnicodeHex
[]={
2386 0x0026, 0x0023, 0x0078, 0x0032, 0x0041, 0x003b,
2387 0x0026, 0x0023, 0x0078, 0x0034, 0x0034, 0x003b,
2389 static const int32_t from_iso_2022_jpOffsHex
[] ={ 3,7,7,7,7,7,7,7,7,7,7,7,7,12 };
2390 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp
, sizeof(sampleTxt_iso_2022_jp
),
2391 iso_2022_jptoUnicodeHex
, sizeof(iso_2022_jptoUnicodeHex
)/sizeof(iso_2022_jptoUnicode
[0]),"iso-2022-jp",
2392 UCNV_TO_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffsHex
, NULL
, 0,UCNV_ESCAPE_XML_HEX
,U_ZERO_ERROR
))
2393 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n");
2396 static const UChar iso_2022_jptoUnicodeC
[]={
2398 0x005C, 0x0078, 0x0032, 0x0041,
2399 0x005C, 0x0078, 0x0034, 0x0034,
2401 int32_t from_iso_2022_jpOffsC
[] ={ 3,7,7,7,7,7,7,7,7,12 };
2402 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp
, sizeof(sampleTxt_iso_2022_jp
),
2403 iso_2022_jptoUnicodeC
, sizeof(iso_2022_jptoUnicodeC
)/sizeof(iso_2022_jptoUnicode
[0]),"iso-2022-jp",
2404 UCNV_TO_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffsC
, NULL
, 0,UCNV_ESCAPE_C
,U_ZERO_ERROR
))
2405 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n");
2408 if(!testConvertToUnicode(sampleTxt_iso_2022_cn
, sizeof(sampleTxt_iso_2022_cn
),
2409 iso_2022_cntoUnicode
, sizeof(iso_2022_cntoUnicode
)/sizeof(iso_2022_cntoUnicode
[0]),"iso-2022-cn",
2410 UCNV_TO_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs
, NULL
, 0))
2411 log_err("iso-2022-cn->u with substitute with value did not match.\n");
2413 if(!testConvertToUnicode(sampleTxt_iso_2022_kr
, sizeof(sampleTxt_iso_2022_kr
),
2414 iso_2022_krtoUnicode
, sizeof(iso_2022_krtoUnicode
)/sizeof(iso_2022_krtoUnicode
[0]),"iso-2022-kr",
2415 UCNV_TO_U_CALLBACK_ESCAPE
, from_iso_2022_krOffs
, NULL
, 0))
2416 log_err("iso-2022-kr->u with substitute with value did not match.\n");
2418 if(!testConvertToUnicode(sampleTxt_hz
, sizeof(sampleTxt_hz
),
2419 hztoUnicode
, sizeof(hztoUnicode
)/sizeof(hztoUnicode
[0]),"HZ",
2420 UCNV_TO_U_CALLBACK_ESCAPE
, from_hzOffs
, NULL
, 0))
2421 log_err("hz->u with substitute with value did not match.\n");
2423 if(!testConvertToUnicode(sampleTxt_iscii
, sizeof(sampleTxt_iscii
),
2424 isciitoUnicode
, sizeof(isciitoUnicode
)/sizeof(isciitoUnicode
[0]),"ISCII,version=0",
2425 UCNV_TO_U_CALLBACK_ESCAPE
, from_isciiOffs
, NULL
, 0))
2426 log_err("ISCII ->u with substitute with value did not match.\n");
2427 if(!testConvertToUnicode(sampleTxtUTF8
, sizeof(sampleTxtUTF8
),
2428 UTF8ToUnicode
, sizeof(UTF8ToUnicode
)/sizeof(UTF8ToUnicode
[0]),"UTF-8",
2429 UCNV_TO_U_CALLBACK_ESCAPE
, fromUTF8
, NULL
, 0))
2430 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2431 if(!testConvertToUnicodeWithContext(sampleTxtUTF8
, sizeof(sampleTxtUTF8
),
2432 UTF8ToUnicodeXML_DEC
, sizeof(UTF8ToUnicodeXML_DEC
)/sizeof(UTF8ToUnicodeXML_DEC
[0]),"UTF-8",
2433 UCNV_TO_U_CALLBACK_ESCAPE
, fromUTF8XML_DEC
, NULL
, 0, UCNV_ESCAPE_XML_DEC
, U_ZERO_ERROR
))
2434 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2438 static void TestLegalAndOthers(int32_t inputsize
, int32_t outputsize
)
2440 static const UChar legalText
[] = { 0x0000, 0xAC00, 0xAC01, 0xD700 };
2441 static const uint8_t templegal949
[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
2442 static const int32_t to949legal
[] = {0, 1, 1, 2, 2, 3, 3};
2445 static const uint8_t text943
[] = {
2446 0x82, 0xa9, 0x82, 0x20, /*0xc8,*/ 0x61, 0x8a, 0xbf, 0x8e, 0x9a };
2447 static const UChar toUnicode943sub
[] = { 0x304b, 0xfffd, /*0xff88,*/ 0x0061, 0x6f22, 0x5b57};
2448 static const UChar toUnicode943skip
[]= { 0x304b, /*0xff88,*/ 0x0061, 0x6f22, 0x5b57};
2449 static const UChar toUnicode943stop
[]= { 0x304b};
2451 static const int32_t fromIBM943Offssub
[] = {0, 2, 4, 5, 7};
2452 static const int32_t fromIBM943Offsskip
[] = { 0, 4, 5, 7};
2453 static const int32_t fromIBM943Offsstop
[] = { 0};
2455 gInBufferSize
= inputsize
;
2456 gOutBufferSize
= outputsize
;
2457 /*checking with a legal value*/
2458 if(!testConvertFromUnicode(legalText
, sizeof(legalText
)/sizeof(legalText
[0]),
2459 templegal949
, sizeof(templegal949
), "ibm-949",
2460 UCNV_FROM_U_CALLBACK_SKIP
, to949legal
, NULL
, 0 ))
2461 log_err("u-> ibm-949 with skip did not match.\n");
2463 /*checking illegal value for ibm-943 with substitute*/
2464 if(!testConvertToUnicode(text943
, sizeof(text943
),
2465 toUnicode943sub
, sizeof(toUnicode943sub
)/sizeof(toUnicode943sub
[0]),"ibm-943",
2466 UCNV_TO_U_CALLBACK_SUBSTITUTE
, fromIBM943Offssub
, NULL
, 0 ))
2467 log_err("ibm-943->u with subst did not match.\n");
2468 /*checking illegal value for ibm-943 with skip */
2469 if(!testConvertToUnicode(text943
, sizeof(text943
),
2470 toUnicode943skip
, sizeof(toUnicode943skip
)/sizeof(toUnicode943skip
[0]),"ibm-943",
2471 UCNV_TO_U_CALLBACK_SKIP
, fromIBM943Offsskip
, NULL
, 0 ))
2472 log_err("ibm-943->u with skip did not match.\n");
2474 /*checking illegal value for ibm-943 with stop */
2475 if(!testConvertToUnicode(text943
, sizeof(text943
),
2476 toUnicode943stop
, sizeof(toUnicode943stop
)/sizeof(toUnicode943stop
[0]),"ibm-943",
2477 UCNV_TO_U_CALLBACK_STOP
, fromIBM943Offsstop
, NULL
, 0 ))
2478 log_err("ibm-943->u with stop did not match.\n");
2482 static void TestSingleByte(int32_t inputsize
, int32_t outputsize
)
2484 static const uint8_t sampleText
[] = {
2485 0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82,
2486 0xff, /*0x82, 0xa9,*/ 0x32, 0x33};
2487 static const UChar toUnicode943sub
[] = {0x304b, 0x0061, 0x0062, 0x0063, 0xfffd,/*0x304b,*/ 0x0032, 0x0033};
2488 static const int32_t fromIBM943Offssub
[] = {0, 2, 3, 4, 5, 7, 8};
2489 /*checking illegal value for ibm-943 with substitute*/
2490 gInBufferSize
= inputsize
;
2491 gOutBufferSize
= outputsize
;
2493 if(!testConvertToUnicode(sampleText
, sizeof(sampleText
),
2494 toUnicode943sub
, sizeof(toUnicode943sub
)/sizeof(toUnicode943sub
[0]),"ibm-943",
2495 UCNV_TO_U_CALLBACK_SUBSTITUTE
, fromIBM943Offssub
, NULL
, 0 ))
2496 log_err("ibm-943->u with subst did not match.\n");
2499 static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize
, int32_t outputsize
)
2502 static const UChar ebcdic_inputTest
[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 };
2503 static const uint8_t toIBM930
[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 };
2504 static const int32_t offset_930
[]= { 0, 1, 1, 1, 2, 2, 3, 4, 4, 4, 5, 5 };
2505 /* s SO doubl SI sng s SO fe fe SI s */
2507 /*EBCDIC_STATEFUL with subChar=3f*/
2508 static const uint8_t toIBM930_subvaried
[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 };
2509 static const int32_t offset_930_subvaried
[]= { 0, 1, 1, 1, 2, 2, 3, 4, 5 };
2510 static const char mySubChar
[]={ 0x3f};
2512 gInBufferSize
= inputsize
;
2513 gOutBufferSize
= outputsize
;
2515 if(!testConvertFromUnicode(ebcdic_inputTest
, sizeof(ebcdic_inputTest
)/sizeof(ebcdic_inputTest
[0]),
2516 toIBM930
, sizeof(toIBM930
), "ibm-930",
2517 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, offset_930
, NULL
, 0 ))
2518 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n");
2520 if(!testConvertFromUnicode(ebcdic_inputTest
, sizeof(ebcdic_inputTest
)/sizeof(ebcdic_inputTest
[0]),
2521 toIBM930_subvaried
, sizeof(toIBM930_subvaried
), "ibm-930",
2522 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, offset_930_subvaried
, mySubChar
, 1 ))
2523 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n");
2528 UBool
testConvertFromUnicode(const UChar
*source
, int sourceLen
, const uint8_t *expect
, int expectLen
,
2529 const char *codepage
, UConverterFromUCallback callback
, const int32_t *expectOffsets
,
2530 const char *mySubChar
, int8_t len
)
2534 UErrorCode status
= U_ZERO_ERROR
;
2535 UConverter
*conv
= 0;
2536 uint8_t junkout
[NEW_MAX_BUFFER
]; /* FIX */
2537 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
2543 int32_t realBufferSize
;
2544 uint8_t *realBufferEnd
;
2545 const UChar
*realSourceEnd
;
2546 const UChar
*sourceLimit
;
2547 UBool checkOffsets
= TRUE
;
2550 char offset_str
[9999];
2552 UConverterFromUCallback oldAction
= NULL
;
2553 const void* oldContext
= NULL
;
2556 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
2558 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
2560 setNuConvTestName(codepage
, "FROM");
2562 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage
, gInBufferSize
,
2565 conv
= ucnv_open(codepage
, &status
);
2566 if(U_FAILURE(status
))
2568 log_data_err("Couldn't open converter %s\n",codepage
);
2572 log_verbose("Converter opened..\n");
2574 /*----setting the callback routine----*/
2575 ucnv_setFromUCallBack (conv
, callback
, NULL
, &oldAction
, &oldContext
, &status
);
2576 if (U_FAILURE(status
))
2578 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status
));
2580 /*------------------------*/
2581 /*setting the subChar*/
2582 if(mySubChar
!= NULL
){
2583 ucnv_setSubstChars(conv
, mySubChar
, len
, &status
);
2584 if (U_FAILURE(status
)) {
2585 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status
));
2594 realBufferSize
= (sizeof(junkout
)/sizeof(junkout
[0]));
2595 realBufferEnd
= junkout
+ realBufferSize
;
2596 realSourceEnd
= source
+ sourceLen
;
2598 if ( gOutBufferSize
!= realBufferSize
)
2599 checkOffsets
= FALSE
;
2601 if( gInBufferSize
!= NEW_MAX_BUFFER
)
2602 checkOffsets
= FALSE
;
2606 end
= nct_min(targ
+ gOutBufferSize
, realBufferEnd
);
2607 sourceLimit
= nct_min(src
+ gInBufferSize
, realSourceEnd
);
2609 doFlush
= (UBool
)(sourceLimit
== realSourceEnd
);
2611 if(targ
== realBufferEnd
)
2613 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ
, gNuConvTestName
);
2616 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src
,sourceLimit
, targ
,end
, doFlush
?"TRUE":"FALSE");
2619 status
= U_ZERO_ERROR
;
2621 ucnv_fromUnicode (conv
,
2626 checkOffsets
? offs
: NULL
,
2627 doFlush
, /* flush if we're at the end of the input data */
2629 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && (sourceLimit
< realSourceEnd
)) );
2632 if(status
==U_INVALID_CHAR_FOUND
|| status
== U_ILLEGAL_CHAR_FOUND
){
2633 UChar errChars
[50]; /* should be sufficient */
2635 UErrorCode err
= U_ZERO_ERROR
;
2636 const UChar
* limit
= NULL
;
2637 const UChar
* start
= NULL
;
2638 ucnv_getInvalidUChars(conv
,errChars
, &errLen
, &err
);
2640 log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err
));
2642 /* src points to limit of invalid chars */
2644 /* length of in invalid chars should be equal to returned length*/
2645 start
= src
- errLen
;
2646 if(u_strncmp(errChars
,start
,errLen
)!=0){
2647 log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv
,&err
));
2650 /* allow failure codes for the stop callback */
2651 if(U_FAILURE(status
) &&
2652 (callback
!= UCNV_FROM_U_CALLBACK_STOP
|| (status
!= U_INVALID_CHAR_FOUND
&& status
!= U_ILLEGAL_CHAR_FOUND
)))
2654 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status
), gNuConvTestName
);
2658 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
2659 sourceLen
, targ
-junkout
);
2665 for(p
= junkout
;p
<targ
;p
++)
2667 sprintf(junk
+ strlen(junk
), "0x%02x, ", (0xFF) & (unsigned int)*p
);
2668 sprintf(offset_str
+ strlen(offset_str
), "0x%02x, ", (0xFF) & (unsigned int)junokout
[p
-junkout
]);
2672 printSeq(expect
, expectLen
);
2675 log_verbose("\nOffsets:");
2676 log_verbose(offset_str
);
2683 if(expectLen
!= targ
-junkout
)
2685 log_err("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
2686 log_verbose("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
2687 printSeqErr(junkout
, targ
-junkout
);
2688 printSeqErr(expect
, expectLen
);
2692 if (checkOffsets
&& (expectOffsets
!= 0) )
2694 log_verbose("comparing %d offsets..\n", targ
-junkout
);
2695 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t) )){
2696 log_err("did not get the expected offsets while %s \n", gNuConvTestName
);
2697 log_err("Got Output : ");
2698 printSeqErr(junkout
, targ
-junkout
);
2699 log_err("Got Offsets: ");
2700 for(p
=junkout
;p
<targ
;p
++)
2701 log_err("%d,", junokout
[p
-junkout
]);
2703 log_err("Expected Offsets: ");
2704 for(i
=0; i
<(targ
-junkout
); i
++)
2705 log_err("%d,", expectOffsets
[i
]);
2711 if(!memcmp(junkout
, expect
, expectLen
))
2713 log_verbose("String matches! %s\n", gNuConvTestName
);
2718 log_err("String does not match. %s\n", gNuConvTestName
);
2719 log_err("source: ");
2720 printUSeqErr(source
, sourceLen
);
2722 printSeqErr(junkout
, expectLen
);
2723 log_err("Expected: ");
2724 printSeqErr(expect
, expectLen
);
2729 UBool
testConvertToUnicode( const uint8_t *source
, int sourcelen
, const UChar
*expect
, int expectlen
,
2730 const char *codepage
, UConverterToUCallback callback
, const int32_t *expectOffsets
,
2731 const char *mySubChar
, int8_t len
)
2733 UErrorCode status
= U_ZERO_ERROR
;
2734 UConverter
*conv
= 0;
2735 UChar junkout
[NEW_MAX_BUFFER
]; /* FIX */
2736 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
2738 const uint8_t *realSourceEnd
;
2739 const uint8_t *srcLimit
;
2744 UBool checkOffsets
= TRUE
;
2746 char offset_str
[9999];
2748 UConverterToUCallback oldAction
= NULL
;
2749 const void* oldContext
= NULL
;
2751 int32_t realBufferSize
;
2752 UChar
*realBufferEnd
;
2755 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
2756 junkout
[i
] = 0xFFFE;
2758 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
2761 setNuConvTestName(codepage
, "TO");
2763 log_verbose("\n========= %s\n", gNuConvTestName
);
2765 conv
= ucnv_open(codepage
, &status
);
2766 if(U_FAILURE(status
))
2768 log_data_err("Couldn't open converter %s\n",gNuConvTestName
);
2772 log_verbose("Converter opened..\n");
2778 realBufferSize
= (sizeof(junkout
)/sizeof(junkout
[0]));
2779 realBufferEnd
= junkout
+ realBufferSize
;
2780 realSourceEnd
= src
+ sourcelen
;
2781 /*----setting the callback routine----*/
2782 ucnv_setToUCallBack (conv
, callback
, NULL
, &oldAction
, &oldContext
, &status
);
2783 if (U_FAILURE(status
))
2785 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status
));
2787 /*-------------------------------------*/
2788 /*setting the subChar*/
2789 if(mySubChar
!= NULL
){
2790 ucnv_setSubstChars(conv
, mySubChar
, len
, &status
);
2791 if (U_FAILURE(status
)) {
2792 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status
));
2798 if ( gOutBufferSize
!= realBufferSize
)
2799 checkOffsets
= FALSE
;
2801 if( gInBufferSize
!= NEW_MAX_BUFFER
)
2802 checkOffsets
= FALSE
;
2806 end
= nct_min( targ
+ gOutBufferSize
, realBufferEnd
);
2807 srcLimit
= nct_min(realSourceEnd
, src
+ gInBufferSize
);
2809 if(targ
== realBufferEnd
)
2811 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ
,gNuConvTestName
);
2814 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ
,end
);
2818 status
= U_ZERO_ERROR
;
2820 ucnv_toUnicode (conv
,
2823 (const char **)&src
,
2824 (const char *)srcLimit
,
2825 checkOffsets
? offs
: NULL
,
2826 (UBool
)(srcLimit
== realSourceEnd
), /* flush if we're at the end of the source data */
2828 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && (srcLimit
< realSourceEnd
)) ); /* while we just need another buffer */
2830 if(status
==U_INVALID_CHAR_FOUND
|| status
== U_ILLEGAL_CHAR_FOUND
){
2831 char errChars
[50]; /* should be sufficient */
2833 UErrorCode err
= U_ZERO_ERROR
;
2834 const uint8_t* limit
= NULL
;
2835 const uint8_t* start
= NULL
;
2836 ucnv_getInvalidChars(conv
,errChars
, &errLen
, &err
);
2838 log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err
));
2840 /* src points to limit of invalid chars */
2842 /* length of in invalid chars should be equal to returned length*/
2843 start
= src
- errLen
;
2844 if(uprv_strncmp(errChars
,(char*)start
,errLen
)!=0){
2845 log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv
,&err
));
2848 /* allow failure codes for the stop callback */
2849 if(U_FAILURE(status
) &&
2850 (callback
!= UCNV_TO_U_CALLBACK_STOP
|| (status
!= U_INVALID_CHAR_FOUND
&& status
!= U_ILLEGAL_CHAR_FOUND
&& status
!= U_TRUNCATED_CHAR_FOUND
)))
2852 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status
), gNuConvTestName
);
2856 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
2857 sourcelen
, targ
-junkout
);
2864 for(p
= junkout
;p
<targ
;p
++)
2866 sprintf(junk
+ strlen(junk
), "0x%04x, ", (0xFFFF) & (unsigned int)*p
);
2867 sprintf(offset_str
+ strlen(offset_str
), "0x%04x, ", (0xFFFF) & (unsigned int)junokout
[p
-junkout
]);
2871 printUSeq(expect
, expectlen
);
2874 log_verbose("\nOffsets:");
2875 log_verbose(offset_str
);
2881 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen
,expectlen
*2);
2883 if (checkOffsets
&& (expectOffsets
!= 0))
2885 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t)))
2887 log_err("did not get the expected offsets while %s \n", gNuConvTestName
);
2888 log_err("Got offsets: ");
2889 for(p
=junkout
;p
<targ
;p
++)
2890 log_err(" %2d,", junokout
[p
-junkout
]);
2892 log_err("Expected offsets: ");
2893 for(i
=0; i
<(targ
-junkout
); i
++)
2894 log_err(" %2d,", expectOffsets
[i
]);
2896 log_err("Got output: ");
2897 for(i
=0; i
<(targ
-junkout
); i
++)
2898 log_err("0x%04x,", junkout
[i
]);
2900 log_err("From source: ");
2901 for(i
=0; i
<(src
-source
); i
++)
2902 log_err(" 0x%02x,", (unsigned char)source
[i
]);
2907 if(!memcmp(junkout
, expect
, expectlen
*2))
2909 log_verbose("Matches!\n");
2914 log_err("String does not match. %s\n", gNuConvTestName
);
2915 log_verbose("String does not match. %s\n", gNuConvTestName
);
2917 printUSeqErr(junkout
, expectlen
);
2918 log_err("Expected: ");
2919 printUSeqErr(expect
, expectlen
);
2925 UBool
testConvertFromUnicodeWithContext(const UChar
*source
, int sourceLen
, const uint8_t *expect
, int expectLen
,
2926 const char *codepage
, UConverterFromUCallback callback
, const int32_t *expectOffsets
,
2927 const char *mySubChar
, int8_t len
, const void* context
, UErrorCode expectedError
)
2931 UErrorCode status
= U_ZERO_ERROR
;
2932 UConverter
*conv
= 0;
2933 uint8_t junkout
[NEW_MAX_BUFFER
]; /* FIX */
2934 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
2940 int32_t realBufferSize
;
2941 uint8_t *realBufferEnd
;
2942 const UChar
*realSourceEnd
;
2943 const UChar
*sourceLimit
;
2944 UBool checkOffsets
= TRUE
;
2947 char offset_str
[9999];
2949 UConverterFromUCallback oldAction
= NULL
;
2950 const void* oldContext
= NULL
;
2953 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
2955 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
2957 setNuConvTestName(codepage
, "FROM");
2959 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage
, gInBufferSize
,
2962 conv
= ucnv_open(codepage
, &status
);
2963 if(U_FAILURE(status
))
2965 log_data_err("Couldn't open converter %s\n",codepage
);
2966 return TRUE
; /* Because the err has already been logged. */
2969 log_verbose("Converter opened..\n");
2971 /*----setting the callback routine----*/
2972 ucnv_setFromUCallBack (conv
, callback
, context
, &oldAction
, &oldContext
, &status
);
2973 if (U_FAILURE(status
))
2975 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status
));
2977 /*------------------------*/
2978 /*setting the subChar*/
2979 if(mySubChar
!= NULL
){
2980 ucnv_setSubstChars(conv
, mySubChar
, len
, &status
);
2981 if (U_FAILURE(status
)) {
2982 log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status
));
2991 realBufferSize
= (sizeof(junkout
)/sizeof(junkout
[0]));
2992 realBufferEnd
= junkout
+ realBufferSize
;
2993 realSourceEnd
= source
+ sourceLen
;
2995 if ( gOutBufferSize
!= realBufferSize
)
2996 checkOffsets
= FALSE
;
2998 if( gInBufferSize
!= NEW_MAX_BUFFER
)
2999 checkOffsets
= FALSE
;
3003 end
= nct_min(targ
+ gOutBufferSize
, realBufferEnd
);
3004 sourceLimit
= nct_min(src
+ gInBufferSize
, realSourceEnd
);
3006 doFlush
= (UBool
)(sourceLimit
== realSourceEnd
);
3008 if(targ
== realBufferEnd
)
3010 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ
, gNuConvTestName
);
3013 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src
,sourceLimit
, targ
,end
, doFlush
?"TRUE":"FALSE");
3016 status
= U_ZERO_ERROR
;
3018 ucnv_fromUnicode (conv
,
3023 checkOffsets
? offs
: NULL
,
3024 doFlush
, /* flush if we're at the end of the input data */
3026 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && (sourceLimit
< realSourceEnd
)) );
3028 /* allow failure codes for the stop callback */
3029 if(U_FAILURE(status
) && status
!= expectedError
)
3031 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status
), gNuConvTestName
);
3035 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
3036 sourceLen
, targ
-junkout
);
3042 for(p
= junkout
;p
<targ
;p
++)
3044 sprintf(junk
+ strlen(junk
), "0x%02x, ", (0xFF) & (unsigned int)*p
);
3045 sprintf(offset_str
+ strlen(offset_str
), "0x%02x, ", (0xFF) & (unsigned int)junokout
[p
-junkout
]);
3049 printSeq(expect
, expectLen
);
3052 log_verbose("\nOffsets:");
3053 log_verbose(offset_str
);
3060 if(expectLen
!= targ
-junkout
)
3062 log_err("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
3063 log_verbose("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
3064 printSeqErr(junkout
, targ
-junkout
);
3065 printSeqErr(expect
, expectLen
);
3069 if (checkOffsets
&& (expectOffsets
!= 0) )
3071 log_verbose("comparing %d offsets..\n", targ
-junkout
);
3072 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t) )){
3073 log_err("did not get the expected offsets while %s \n", gNuConvTestName
);
3074 log_err("Got Output : ");
3075 printSeqErr(junkout
, targ
-junkout
);
3076 log_err("Got Offsets: ");
3077 for(p
=junkout
;p
<targ
;p
++)
3078 log_err("%d,", junokout
[p
-junkout
]);
3080 log_err("Expected Offsets: ");
3081 for(i
=0; i
<(targ
-junkout
); i
++)
3082 log_err("%d,", expectOffsets
[i
]);
3088 if(!memcmp(junkout
, expect
, expectLen
))
3090 log_verbose("String matches! %s\n", gNuConvTestName
);
3095 log_err("String does not match. %s\n", gNuConvTestName
);
3096 log_err("source: ");
3097 printUSeqErr(source
, sourceLen
);
3099 printSeqErr(junkout
, expectLen
);
3100 log_err("Expected: ");
3101 printSeqErr(expect
, expectLen
);
3105 UBool
testConvertToUnicodeWithContext( const uint8_t *source
, int sourcelen
, const UChar
*expect
, int expectlen
,
3106 const char *codepage
, UConverterToUCallback callback
, const int32_t *expectOffsets
,
3107 const char *mySubChar
, int8_t len
, const void* context
, UErrorCode expectedError
)
3109 UErrorCode status
= U_ZERO_ERROR
;
3110 UConverter
*conv
= 0;
3111 UChar junkout
[NEW_MAX_BUFFER
]; /* FIX */
3112 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
3114 const uint8_t *realSourceEnd
;
3115 const uint8_t *srcLimit
;
3120 UBool checkOffsets
= TRUE
;
3122 char offset_str
[9999];
3124 UConverterToUCallback oldAction
= NULL
;
3125 const void* oldContext
= NULL
;
3127 int32_t realBufferSize
;
3128 UChar
*realBufferEnd
;
3131 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
3132 junkout
[i
] = 0xFFFE;
3134 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
3137 setNuConvTestName(codepage
, "TO");
3139 log_verbose("\n========= %s\n", gNuConvTestName
);
3141 conv
= ucnv_open(codepage
, &status
);
3142 if(U_FAILURE(status
))
3144 log_data_err("Couldn't open converter %s\n",gNuConvTestName
);
3148 log_verbose("Converter opened..\n");
3154 realBufferSize
= (sizeof(junkout
)/sizeof(junkout
[0]));
3155 realBufferEnd
= junkout
+ realBufferSize
;
3156 realSourceEnd
= src
+ sourcelen
;
3157 /*----setting the callback routine----*/
3158 ucnv_setToUCallBack (conv
, callback
, context
, &oldAction
, &oldContext
, &status
);
3159 if (U_FAILURE(status
))
3161 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status
));
3163 /*-------------------------------------*/
3164 /*setting the subChar*/
3165 if(mySubChar
!= NULL
){
3166 ucnv_setSubstChars(conv
, mySubChar
, len
, &status
);
3167 if (U_FAILURE(status
)) {
3168 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status
));
3174 if ( gOutBufferSize
!= realBufferSize
)
3175 checkOffsets
= FALSE
;
3177 if( gInBufferSize
!= NEW_MAX_BUFFER
)
3178 checkOffsets
= FALSE
;
3182 end
= nct_min( targ
+ gOutBufferSize
, realBufferEnd
);
3183 srcLimit
= nct_min(realSourceEnd
, src
+ gInBufferSize
);
3185 if(targ
== realBufferEnd
)
3187 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ
,gNuConvTestName
);
3190 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ
,end
);
3194 status
= U_ZERO_ERROR
;
3196 ucnv_toUnicode (conv
,
3199 (const char **)&src
,
3200 (const char *)srcLimit
,
3201 checkOffsets
? offs
: NULL
,
3202 (UBool
)(srcLimit
== realSourceEnd
), /* flush if we're at the end of the source data */
3204 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && (srcLimit
< realSourceEnd
)) ); /* while we just need another buffer */
3206 /* allow failure codes for the stop callback */
3207 if(U_FAILURE(status
) && status
!=expectedError
)
3209 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status
), gNuConvTestName
);
3213 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
3214 sourcelen
, targ
-junkout
);
3221 for(p
= junkout
;p
<targ
;p
++)
3223 sprintf(junk
+ strlen(junk
), "0x%04x, ", (0xFFFF) & (unsigned int)*p
);
3224 sprintf(offset_str
+ strlen(offset_str
), "0x%04x, ", (0xFFFF) & (unsigned int)junokout
[p
-junkout
]);
3228 printUSeq(expect
, expectlen
);
3231 log_verbose("\nOffsets:");
3232 log_verbose(offset_str
);
3238 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen
,expectlen
*2);
3240 if (checkOffsets
&& (expectOffsets
!= 0))
3242 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t)))
3244 log_err("did not get the expected offsets while %s \n", gNuConvTestName
);
3245 log_err("Got offsets: ");
3246 for(p
=junkout
;p
<targ
;p
++)
3247 log_err(" %2d,", junokout
[p
-junkout
]);
3249 log_err("Expected offsets: ");
3250 for(i
=0; i
<(targ
-junkout
); i
++)
3251 log_err(" %2d,", expectOffsets
[i
]);
3253 log_err("Got output: ");
3254 for(i
=0; i
<(targ
-junkout
); i
++)
3255 log_err("0x%04x,", junkout
[i
]);
3257 log_err("From source: ");
3258 for(i
=0; i
<(src
-source
); i
++)
3259 log_err(" 0x%02x,", (unsigned char)source
[i
]);
3264 if(!memcmp(junkout
, expect
, expectlen
*2))
3266 log_verbose("Matches!\n");
3271 log_err("String does not match. %s\n", gNuConvTestName
);
3272 log_verbose("String does not match. %s\n", gNuConvTestName
);
3274 printUSeqErr(junkout
, expectlen
);
3275 log_err("Expected: ");
3276 printUSeqErr(expect
, expectlen
);