1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
9 ********************************************************************************
12 * Modification History:
14 * Madhu Katragadda 7/21/1999 Testing error callback routines
15 ********************************************************************************
23 #include "unicode/uloc.h"
24 #include "unicode/ucnv.h"
25 #include "unicode/ucnv_err.h"
27 #include "unicode/utypes.h"
28 #include "unicode/ustring.h"
30 #include "unicode/ucnv_cb.h"
31 #include "unicode/utf16.h"
33 #define NEW_MAX_BUFFER 999
35 #define nct_min(x,y) ((x<y) ? x : y)
37 static int32_t gInBufferSize
= 0;
38 static int32_t gOutBufferSize
= 0;
39 static char gNuConvTestName
[1024];
41 static void printSeq(const uint8_t* a
, int len
)
46 log_verbose("0x%02X, ", a
[i
++]);
50 static void printUSeq(const UChar
* a
, int len
)
55 log_verbose(" 0x%04x, ", a
[i
++]);
59 static void printSeqErr(const uint8_t* a
, int len
)
64 fprintf(stderr
, " 0x%02x, ", a
[i
++]);
65 fprintf(stderr
, "}\n");
68 static void printUSeqErr(const UChar
* a
, int len
)
73 fprintf(stderr
, "0x%04x, ", a
[i
++]);
74 fprintf(stderr
,"}\n");
77 static void setNuConvTestName(const char *codepage
, const char *direction
)
79 sprintf(gNuConvTestName
, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
87 static void TestCallBackFailure(void);
89 void addTestConvertErrorCallBack(TestNode
** root
);
91 void addTestConvertErrorCallBack(TestNode
** root
)
93 addTest(root
, &TestSkipCallBack
, "tsconv/nccbtst/TestSkipCallBack");
94 addTest(root
, &TestStopCallBack
, "tsconv/nccbtst/TestStopCallBack");
95 addTest(root
, &TestSubCallBack
, "tsconv/nccbtst/TestSubCallBack");
96 addTest(root
, &TestSubWithValueCallBack
, "tsconv/nccbtst/TestSubWithValueCallBack");
98 #if !UCONFIG_NO_LEGACY_CONVERSION
99 addTest(root
, &TestLegalAndOtherCallBack
, "tsconv/nccbtst/TestLegalAndOtherCallBack");
100 addTest(root
, &TestSingleByteCallBack
, "tsconv/nccbtst/TestSingleByteCallBack");
103 addTest(root
, &TestCallBackFailure
, "tsconv/nccbtst/TestCallBackFailure");
106 static void TestSkipCallBack()
108 TestSkip(NEW_MAX_BUFFER
, NEW_MAX_BUFFER
);
109 TestSkip(1,NEW_MAX_BUFFER
);
111 TestSkip(NEW_MAX_BUFFER
, 1);
114 static void TestStopCallBack()
116 TestStop(NEW_MAX_BUFFER
, NEW_MAX_BUFFER
);
117 TestStop(1,NEW_MAX_BUFFER
);
119 TestStop(NEW_MAX_BUFFER
, 1);
122 static void TestSubCallBack()
124 TestSub(NEW_MAX_BUFFER
, NEW_MAX_BUFFER
);
125 TestSub(1,NEW_MAX_BUFFER
);
127 TestSub(NEW_MAX_BUFFER
, 1);
129 #if !UCONFIG_NO_LEGACY_CONVERSION
130 TestEBCDIC_STATEFUL_Sub(1, 1);
131 TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER
);
132 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER
, 1);
133 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER
, NEW_MAX_BUFFER
);
137 static void TestSubWithValueCallBack()
139 TestSubWithValue(NEW_MAX_BUFFER
, NEW_MAX_BUFFER
);
140 TestSubWithValue(1,NEW_MAX_BUFFER
);
141 TestSubWithValue(1,1);
142 TestSubWithValue(NEW_MAX_BUFFER
, 1);
145 #if !UCONFIG_NO_LEGACY_CONVERSION
146 static void TestLegalAndOtherCallBack()
148 TestLegalAndOthers(NEW_MAX_BUFFER
, NEW_MAX_BUFFER
);
149 TestLegalAndOthers(1,NEW_MAX_BUFFER
);
150 TestLegalAndOthers(1,1);
151 TestLegalAndOthers(NEW_MAX_BUFFER
, 1);
154 static void TestSingleByteCallBack()
156 TestSingleByte(NEW_MAX_BUFFER
, NEW_MAX_BUFFER
);
157 TestSingleByte(1,NEW_MAX_BUFFER
);
159 TestSingleByte(NEW_MAX_BUFFER
, 1);
163 static void TestSkip(int32_t inputsize
, int32_t outputsize
)
165 static const uint8_t expskipIBM_949
[]= {
166 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
168 static const uint8_t expskipIBM_943
[] = {
169 0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 };
171 static const uint8_t expskipIBM_930
[] = {
172 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f };
174 gInBufferSize
= inputsize
;
175 gOutBufferSize
= outputsize
;
178 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP \n");
180 #if !UCONFIG_NO_LEGACY_CONVERSION
182 static const UChar sampleText
[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
183 static const UChar sampleText2
[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
185 static const int32_t toIBM949Offsskip
[] = { 0, 1, 1, 2, 2, 4, 4 };
186 static const int32_t toIBM943Offsskip
[] = { 0, 0, 1, 1, 3, 3 };
188 if(!testConvertFromUnicode(sampleText
, UPRV_LENGTHOF(sampleText
),
189 expskipIBM_949
, UPRV_LENGTHOF(expskipIBM_949
), "ibm-949",
190 UCNV_FROM_U_CALLBACK_SKIP
, toIBM949Offsskip
, NULL
, 0 ))
191 log_err("u-> ibm-949 with skip did not match.\n");
192 if(!testConvertFromUnicode(sampleText2
, UPRV_LENGTHOF(sampleText2
),
193 expskipIBM_943
, UPRV_LENGTHOF(expskipIBM_943
), "ibm-943",
194 UCNV_FROM_U_CALLBACK_SKIP
, toIBM943Offsskip
, NULL
, 0 ))
195 log_err("u-> ibm-943 with skip did not match.\n");
199 static const UChar fromU
[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 };
200 static const uint8_t fromUBytes
[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f };
201 static const int32_t fromUOffsets
[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 };
203 /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */
204 if(!testConvertFromUnicode(fromU
, UPRV_LENGTHOF(fromU
),
205 fromUBytes
, UPRV_LENGTHOF(fromUBytes
),
207 UCNV_FROM_U_CALLBACK_SKIP
, fromUOffsets
,
210 log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n");
216 static const UChar usasciiFromU
[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
217 static const uint8_t usasciiFromUBytes
[] = { 0x61, 0x31, 0x39 };
218 static const int32_t usasciiFromUOffsets
[] = { 0, 3, 6 };
220 static const UChar latin1FromU
[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
221 static const uint8_t latin1FromUBytes
[] = { 0x61, 0xa0, 0x31, 0x39 };
222 static const int32_t latin1FromUOffsets
[] = { 0, 1, 3, 6 };
225 if(!testConvertFromUnicode(usasciiFromU
, UPRV_LENGTHOF(usasciiFromU
),
226 usasciiFromUBytes
, UPRV_LENGTHOF(usasciiFromUBytes
),
228 UCNV_FROM_U_CALLBACK_SKIP
, usasciiFromUOffsets
,
231 log_err("u->US-ASCII with skip did not match.\n");
234 #if !UCONFIG_NO_LEGACY_CONVERSION
235 /* SBCS NLTC codepage 367 for US-ASCII */
236 if(!testConvertFromUnicode(usasciiFromU
, UPRV_LENGTHOF(usasciiFromU
),
237 usasciiFromUBytes
, UPRV_LENGTHOF(usasciiFromUBytes
),
239 UCNV_FROM_U_CALLBACK_SKIP
, usasciiFromUOffsets
,
242 log_err("u->ibm-367 with skip did not match.\n");
247 if(!testConvertFromUnicode(latin1FromU
, UPRV_LENGTHOF(latin1FromU
),
248 latin1FromUBytes
, UPRV_LENGTHOF(latin1FromUBytes
),
250 UCNV_FROM_U_CALLBACK_SKIP
, latin1FromUOffsets
,
253 log_err("u->LATIN_1 with skip did not match.\n");
256 #if !UCONFIG_NO_LEGACY_CONVERSION
258 if(!testConvertFromUnicode(latin1FromU
, UPRV_LENGTHOF(latin1FromU
),
259 latin1FromUBytes
, UPRV_LENGTHOF(latin1FromUBytes
),
261 UCNV_FROM_U_CALLBACK_SKIP
, latin1FromUOffsets
,
264 log_err("u->windows-1252 with skip did not match.\n");
269 static const UChar inputTest
[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
270 static const uint8_t toIBM943
[]= { 0x61, 0x61 };
271 static const int32_t offset
[]= {0, 4};
274 static const UChar euc_jp_inputText
[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
275 static const uint8_t to_euc_jp
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
278 static const int32_t fromEUC_JPOffs
[] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7};
281 static const UChar euc_tw_inputText
[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
282 static const uint8_t to_euc_tw
[]={
283 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
284 0x61, 0xe6, 0xca, 0x8a,
286 static const int32_t from_euc_twOffs
[] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,};
289 static const UChar iso_2022_jp_inputText
[]={0x0041, 0x00E9/*unassigned*/,0x0042, };
290 static const uint8_t to_iso_2022_jp
[]={
295 static const int32_t from_iso_2022_jpOffs
[] ={0,2};
298 UChar
const iso_2022_jp_inputText2
[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
299 static const uint8_t to_iso_2022_jp2
[]={
304 static const int32_t from_iso_2022_jpOffs2
[] ={0,2};
307 static const UChar iso_2022_cn_inputText
[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
308 static const uint8_t to_iso_2022_cn
[]={
311 static const int32_t from_iso_2022_cnOffs
[] ={
316 static const UChar iso_2022_cn_inputText1
[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
317 static const uint8_t to_iso_2022_cn1
[]={
321 static const int32_t from_iso_2022_cnOffs1
[] ={ 0, 2 };
324 static const UChar iso_2022_kr_inputText
[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
325 static const uint8_t to_iso_2022_kr
[]={
326 0x1b, 0x24, 0x29, 0x43,
332 static const int32_t from_iso_2022_krOffs
[] ={
341 static const UChar iso_2022_kr_inputText1
[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
342 static const uint8_t to_iso_2022_kr1
[]={
343 0x1b, 0x24, 0x29, 0x43,
349 static const int32_t from_iso_2022_krOffs1
[] ={
357 static const UChar hz_inputText
[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
359 static const uint8_t to_hz
[]={
361 0x7e, 0x7b, 0x26, 0x30,
366 static const int32_t from_hzOffs
[] ={
373 static const UChar hz_inputText1
[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
375 static const uint8_t to_hz1
[]={
377 0x7e, 0x7b, 0x26, 0x30,
382 static const int32_t from_hzOffs1
[] ={
391 static const UChar SCSU_inputText
[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
393 static const uint8_t to_SCSU
[]={
399 static const int32_t from_SCSUOffs
[] ={
405 #if !UCONFIG_NO_LEGACY_CONVERSION
407 static const UChar iscii_inputText
[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
408 static const uint8_t to_iscii
[]={
412 static const int32_t from_isciiOffs
[] ={
417 static const UChar iscii_inputText1
[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
418 static const uint8_t to_iscii1
[]={
423 static const int32_t from_isciiOffs1
[] ={0,2};
425 if(!testConvertFromUnicode(inputTest
, UPRV_LENGTHOF(inputTest
),
426 toIBM943
, UPRV_LENGTHOF(toIBM943
), "ibm-943",
427 UCNV_FROM_U_CALLBACK_SKIP
, offset
, NULL
, 0 ))
428 log_err("u-> ibm-943 with skip did not match.\n");
430 if(!testConvertFromUnicode(euc_jp_inputText
, UPRV_LENGTHOF(euc_jp_inputText
),
431 to_euc_jp
, UPRV_LENGTHOF(to_euc_jp
), "IBM-eucJP",
432 UCNV_FROM_U_CALLBACK_SKIP
, fromEUC_JPOffs
, NULL
, 0 ))
433 log_err("u-> euc-jp with skip did not match.\n");
435 if(!testConvertFromUnicode(euc_tw_inputText
, UPRV_LENGTHOF(euc_tw_inputText
),
436 to_euc_tw
, UPRV_LENGTHOF(to_euc_tw
), "euc-tw",
437 UCNV_FROM_U_CALLBACK_SKIP
, from_euc_twOffs
, NULL
, 0 ))
438 log_err("u-> euc-tw with skip did not match.\n");
441 if(!testConvertFromUnicode(iso_2022_jp_inputText
, UPRV_LENGTHOF(iso_2022_jp_inputText
),
442 to_iso_2022_jp
, UPRV_LENGTHOF(to_iso_2022_jp
), "iso-2022-jp",
443 UCNV_FROM_U_CALLBACK_SKIP
, from_iso_2022_jpOffs
, NULL
, 0 ))
444 log_err("u-> iso-2022-jp with skip did not match.\n");
447 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2
, UPRV_LENGTHOF(iso_2022_jp_inputText2
),
448 to_iso_2022_jp2
, UPRV_LENGTHOF(to_iso_2022_jp2
), "iso-2022-jp",
449 UCNV_FROM_U_CALLBACK_SKIP
, from_iso_2022_jpOffs2
, NULL
, 0,UCNV_SKIP_STOP_ON_ILLEGAL
,U_ILLEGAL_CHAR_FOUND
))
450 log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
453 if(!testConvertFromUnicode(iso_2022_cn_inputText
, UPRV_LENGTHOF(iso_2022_cn_inputText
),
454 to_iso_2022_cn
, UPRV_LENGTHOF(to_iso_2022_cn
), "iso-2022-cn",
455 UCNV_FROM_U_CALLBACK_SKIP
, from_iso_2022_cnOffs
, NULL
, 0 ))
456 log_err("u-> iso-2022-cn with skip did not match.\n");
458 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1
, UPRV_LENGTHOF(iso_2022_cn_inputText1
),
459 to_iso_2022_cn1
, UPRV_LENGTHOF(to_iso_2022_cn1
), "iso-2022-cn",
460 UCNV_FROM_U_CALLBACK_SKIP
, from_iso_2022_cnOffs1
, NULL
, 0,UCNV_SKIP_STOP_ON_ILLEGAL
,U_ILLEGAL_CHAR_FOUND
))
461 log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
464 if(!testConvertFromUnicode(iso_2022_kr_inputText
, UPRV_LENGTHOF(iso_2022_kr_inputText
),
465 to_iso_2022_kr
, UPRV_LENGTHOF(to_iso_2022_kr
), "iso-2022-kr",
466 UCNV_FROM_U_CALLBACK_SKIP
, from_iso_2022_krOffs
, NULL
, 0 ))
467 log_err("u-> iso-2022-kr with skip did not match.\n");
469 if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1
, UPRV_LENGTHOF(iso_2022_kr_inputText1
),
470 to_iso_2022_kr1
, UPRV_LENGTHOF(to_iso_2022_kr1
), "iso-2022-kr",
471 UCNV_FROM_U_CALLBACK_SKIP
, from_iso_2022_krOffs1
, NULL
, 0,UCNV_SKIP_STOP_ON_ILLEGAL
,U_ILLEGAL_CHAR_FOUND
))
472 log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
475 if(!testConvertFromUnicode(hz_inputText
, UPRV_LENGTHOF(hz_inputText
),
476 to_hz
, UPRV_LENGTHOF(to_hz
), "HZ",
477 UCNV_FROM_U_CALLBACK_SKIP
, from_hzOffs
, NULL
, 0 ))
478 log_err("u-> HZ with skip did not match.\n");
480 if(!testConvertFromUnicodeWithContext(hz_inputText1
, UPRV_LENGTHOF(hz_inputText1
),
481 to_hz1
, UPRV_LENGTHOF(to_hz1
), "hz",
482 UCNV_FROM_U_CALLBACK_SKIP
, from_hzOffs1
, NULL
, 0,UCNV_SKIP_STOP_ON_ILLEGAL
,U_ILLEGAL_CHAR_FOUND
))
483 log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
487 if(!testConvertFromUnicode(SCSU_inputText
, UPRV_LENGTHOF(SCSU_inputText
),
488 to_SCSU
, UPRV_LENGTHOF(to_SCSU
), "SCSU",
489 UCNV_FROM_U_CALLBACK_SKIP
, from_SCSUOffs
, NULL
, 0 ))
490 log_err("u-> SCSU with skip did not match.\n");
492 #if !UCONFIG_NO_LEGACY_CONVERSION
494 if(!testConvertFromUnicode(iscii_inputText
, UPRV_LENGTHOF(iscii_inputText
),
495 to_iscii
, UPRV_LENGTHOF(to_iscii
), "ISCII,version=0",
496 UCNV_FROM_U_CALLBACK_SKIP
, from_isciiOffs
, NULL
, 0 ))
497 log_err("u-> iscii with skip did not match.\n");
499 if(!testConvertFromUnicodeWithContext(iscii_inputText1
, UPRV_LENGTHOF(iscii_inputText1
),
500 to_iscii1
, UPRV_LENGTHOF(to_iscii1
), "ISCII,version=0",
501 UCNV_FROM_U_CALLBACK_SKIP
, from_isciiOffs1
, NULL
, 0,UCNV_SKIP_STOP_ON_ILLEGAL
,U_ILLEGAL_CHAR_FOUND
))
502 log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
506 log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
508 static const uint8_t sampleText
[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */
509 0xFB, 0xEE, 0x28, /* from source offset 0 */
527 0xF9, 0x28, /* from 16 */
536 0xFA, 0x83, /* from 24 */
545 0xF9, 0xA2, /* from 32 */
547 0xFE, 0x16, 0x3A, 0x8C,
556 static const UChar expected
[]={
557 0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */
558 0x0063, 0x0061, 0x000D, 0x000A,
560 0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */
561 0x0930, 0x0020, 0x0918, 0x0909,
563 0x3086, 0x304D, 0x0020, 0x3053, /* 16 */
564 0x4000, 0x4E00, 0x7777, 0x0020,
566 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */
567 0x0020, 0xD7A3, 0xDC00, 0xD800,
569 0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */
570 0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
572 0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */
575 static const int32_t offsets
[]={
576 0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7,
577 8, 9, 10, 10, 11, 12, 12, 13, 14, 15,
578 16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23,
579 24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31,
580 32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39,
584 /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */
585 if(!testConvertFromUnicode(expected
, UPRV_LENGTHOF(expected
),
586 sampleText
, UPRV_LENGTHOF(sampleText
),
588 UCNV_FROM_U_CALLBACK_SKIP
, offsets
, NULL
, 0)
590 log_err("u->BOCU-1 with skip did not match.\n");
594 log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
596 const uint8_t sampleText
[]={
598 0xc4, 0xb5, /* U+0135 */
599 0xed, 0x80, 0xa0, /* Hangul U+d020 */
600 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */
601 0xee, 0x80, 0x80, /* PUA U+e000 */
602 0xed, 0xb0, 0x81, /* unpaired trail surrogate U+dc01 */
604 0xed, 0xa0, 0x81, /* unpaired lead surrogate U+d801 */
605 0xd0, 0x80 /* U+0400 */
630 /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */
632 /* without offsets */
633 if(!testConvertFromUnicode(expected
, UPRV_LENGTHOF(expected
),
634 sampleText
, UPRV_LENGTHOF(sampleText
),
636 UCNV_FROM_U_CALLBACK_SKIP
, NULL
, NULL
, 0)
638 log_err("u->CESU-8 with skip did not match.\n");
642 if(!testConvertFromUnicode(expected
, UPRV_LENGTHOF(expected
),
643 sampleText
, UPRV_LENGTHOF(sampleText
),
645 UCNV_FROM_U_CALLBACK_SKIP
, offsets
, NULL
, 0)
647 log_err("u->CESU-8 with skip did not match.\n");
652 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP \n");
654 #if !UCONFIG_NO_LEGACY_CONVERSION
657 static const UChar IBM_949skiptoUnicode
[]= {0x0000, 0xAC00, 0xAC01, 0xD700 };
658 static const UChar IBM_943skiptoUnicode
[]= { 0x6D63, 0x6D64, 0x6D66 };
659 static const UChar IBM_930skiptoUnicode
[]= { 0x6D63, 0x6D64, 0x6D66 };
661 static const int32_t fromIBM949Offs
[] = { 0, 1, 3, 5};
662 static const int32_t fromIBM943Offs
[] = { 0, 2, 4};
663 static const int32_t fromIBM930Offs
[] = { 1, 3, 5};
665 if(!testConvertToUnicode(expskipIBM_949
, UPRV_LENGTHOF(expskipIBM_949
),
666 IBM_949skiptoUnicode
, UPRV_LENGTHOF(IBM_949skiptoUnicode
),"ibm-949",
667 UCNV_TO_U_CALLBACK_SKIP
, fromIBM949Offs
, NULL
, 0 ))
668 log_err("ibm-949->u with skip did not match.\n");
669 if(!testConvertToUnicode(expskipIBM_943
, UPRV_LENGTHOF(expskipIBM_943
),
670 IBM_943skiptoUnicode
, UPRV_LENGTHOF(IBM_943skiptoUnicode
),"ibm-943",
671 UCNV_TO_U_CALLBACK_SKIP
, fromIBM943Offs
, NULL
, 0 ))
672 log_err("ibm-943->u with skip did not match.\n");
675 if(!testConvertToUnicode(expskipIBM_930
, UPRV_LENGTHOF(expskipIBM_930
),
676 IBM_930skiptoUnicode
, UPRV_LENGTHOF(IBM_930skiptoUnicode
),"ibm-930",
677 UCNV_TO_U_CALLBACK_SKIP
, fromIBM930Offs
, NULL
, 0 ))
678 log_err("ibm-930->u with skip did not match.\n");
681 if(!testConvertToUnicodeWithContext(expskipIBM_930
, UPRV_LENGTHOF(expskipIBM_930
),
682 IBM_930skiptoUnicode
, UPRV_LENGTHOF(IBM_930skiptoUnicode
),"ibm-930",
683 UCNV_TO_U_CALLBACK_SKIP
, fromIBM930Offs
, NULL
, 0,"i",U_ILLEGAL_CHAR_FOUND
))
684 log_err("ibm-930->u with skip did not match.\n");
689 static const uint8_t usasciiToUBytes
[] = { 0x61, 0x80, 0x31 };
690 static const UChar usasciiToU
[] = { 0x61, 0x31 };
691 static const int32_t usasciiToUOffsets
[] = { 0, 2 };
693 static const uint8_t latin1ToUBytes
[] = { 0x61, 0xa0, 0x31 };
694 static const UChar latin1ToU
[] = { 0x61, 0xa0, 0x31 };
695 static const int32_t latin1ToUOffsets
[] = { 0, 1, 2 };
698 if(!testConvertToUnicode(usasciiToUBytes
, UPRV_LENGTHOF(usasciiToUBytes
),
699 usasciiToU
, UPRV_LENGTHOF(usasciiToU
),
701 UCNV_TO_U_CALLBACK_SKIP
, usasciiToUOffsets
,
704 log_err("US-ASCII->u with skip did not match.\n");
707 #if !UCONFIG_NO_LEGACY_CONVERSION
708 /* SBCS NLTC codepage 367 for US-ASCII */
709 if(!testConvertToUnicode(usasciiToUBytes
, UPRV_LENGTHOF(usasciiToUBytes
),
710 usasciiToU
, UPRV_LENGTHOF(usasciiToU
),
712 UCNV_TO_U_CALLBACK_SKIP
, usasciiToUOffsets
,
715 log_err("ibm-367->u with skip did not match.\n");
720 if(!testConvertToUnicode(latin1ToUBytes
, UPRV_LENGTHOF(latin1ToUBytes
),
721 latin1ToU
, UPRV_LENGTHOF(latin1ToU
),
723 UCNV_TO_U_CALLBACK_SKIP
, latin1ToUOffsets
,
726 log_err("LATIN_1->u with skip did not match.\n");
729 #if !UCONFIG_NO_LEGACY_CONVERSION
731 if(!testConvertToUnicode(latin1ToUBytes
, UPRV_LENGTHOF(latin1ToUBytes
),
732 latin1ToU
, UPRV_LENGTHOF(latin1ToU
),
734 UCNV_TO_U_CALLBACK_SKIP
, latin1ToUOffsets
,
737 log_err("windows-1252->u with skip did not match.\n");
742 #if !UCONFIG_NO_LEGACY_CONVERSION
744 static const uint8_t sampleTxtEBCIDIC_STATEFUL
[] ={
745 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
747 static const UChar EBCIDIC_STATEFUL_toUnicode
[] ={ 0x6d63, 0x03b4
749 static const int32_t from_EBCIDIC_STATEFULOffsets
[]={ 1, 5};
753 static const uint8_t sampleTxt_euc_jp
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
754 0x8f, 0xda, 0xa1, /*unassigned*/
757 static const UChar euc_jptoUnicode
[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2};
758 static const int32_t from_euc_jpOffs
[] ={ 0, 1, 3, 9};
761 static const uint8_t sampleTxt_euc_tw
[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
762 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
765 static const UChar euc_twtoUnicode
[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, };
766 static const int32_t from_euc_twOffs
[] ={ 0, 1, 3, 11, 13};
768 static const uint8_t sampleTxt_iso_2022_jp
[]={
770 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/
771 0x1b, 0x28, 0x42, 0x42,
774 static const UChar iso_2022_jptoUnicode
[]={ 0x41,0x42 };
775 static const int32_t from_iso_2022_jpOffs
[] ={ 0,9 };
778 static const uint8_t sampleTxt_iso_2022_cn
[]={
780 0x1B, 0x24, 0x29, 0x47,
781 0x0E, 0x40, 0x6f, /*unassigned*/
786 static const UChar iso_2022_cntoUnicode
[]={ 0x41, 0x44,0x42 };
787 static const int32_t from_iso_2022_cnOffs
[] ={ 1, 2, 11 };
790 static const uint8_t sampleTxt_iso_2022_kr
[]={
791 0x1b, 0x24, 0x29, 0x43,
799 static const UChar iso_2022_krtoUnicode
[]={ 0x41,0x03A0,0x51, 0x42,0x43};
800 static const int32_t from_iso_2022_krOffs
[] ={ 4, 9, 12, 13 , 14 };
803 static const uint8_t sampleTxt_hz
[]={
805 0x7e, 0x7b, 0x26, 0x30,
806 0x7f, 0x1E, /*unassigned*/
809 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/
812 static const UChar hztoUnicode
[]={
819 static const int32_t from_hzOffs
[] ={0,3,7,11,18, };
822 static const uint8_t sampleTxt_iscii
[]={
832 static const UChar isciitoUnicode
[]={
841 static const int32_t from_isciiOffs
[] ={0,1,3,4,5,7 };
844 static const uint8_t sampleTxtLMBCS
[]={ 0x12, 0xc9, 0x50,
845 0x12, 0x92, 0xa0, /*unassigned*/
848 static const UChar LMBCSToUnicode
[]={ 0x4e2e, 0xe5c4};
849 static const int32_t fromLMBCS
[] = {0, 6};
851 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL
, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL
),
852 EBCIDIC_STATEFUL_toUnicode
, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode
),"ibm-930",
853 UCNV_TO_U_CALLBACK_SKIP
, from_EBCIDIC_STATEFULOffsets
, NULL
, 0 ))
854 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
856 if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL
, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL
),
857 EBCIDIC_STATEFUL_toUnicode
, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode
),"ibm-930",
858 UCNV_TO_U_CALLBACK_SKIP
, from_EBCIDIC_STATEFULOffsets
, NULL
, 0,"i",U_ILLEGAL_CHAR_FOUND
))
859 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
861 if(!testConvertToUnicode(sampleTxt_euc_jp
, UPRV_LENGTHOF(sampleTxt_euc_jp
),
862 euc_jptoUnicode
, UPRV_LENGTHOF(euc_jptoUnicode
),"IBM-eucJP",
863 UCNV_TO_U_CALLBACK_SKIP
, from_euc_jpOffs
, NULL
, 0))
864 log_err("euc-jp->u with skip did not match.\n");
868 if(!testConvertToUnicode(sampleTxt_euc_tw
, UPRV_LENGTHOF(sampleTxt_euc_tw
),
869 euc_twtoUnicode
, UPRV_LENGTHOF(euc_twtoUnicode
),"euc-tw",
870 UCNV_TO_U_CALLBACK_SKIP
, from_euc_twOffs
, NULL
, 0))
871 log_err("euc-tw->u with skip did not match.\n");
874 if(!testConvertToUnicode(sampleTxt_iso_2022_jp
, UPRV_LENGTHOF(sampleTxt_iso_2022_jp
),
875 iso_2022_jptoUnicode
, UPRV_LENGTHOF(iso_2022_jptoUnicode
),"iso-2022-jp",
876 UCNV_TO_U_CALLBACK_SKIP
, from_iso_2022_jpOffs
, NULL
, 0))
877 log_err("iso-2022-jp->u with skip did not match.\n");
879 if(!testConvertToUnicode(sampleTxt_iso_2022_cn
, UPRV_LENGTHOF(sampleTxt_iso_2022_cn
),
880 iso_2022_cntoUnicode
, UPRV_LENGTHOF(iso_2022_cntoUnicode
),"iso-2022-cn",
881 UCNV_TO_U_CALLBACK_SKIP
, from_iso_2022_cnOffs
, NULL
, 0))
882 log_err("iso-2022-cn->u with skip did not match.\n");
884 if(!testConvertToUnicode(sampleTxt_iso_2022_kr
, UPRV_LENGTHOF(sampleTxt_iso_2022_kr
),
885 iso_2022_krtoUnicode
, UPRV_LENGTHOF(iso_2022_krtoUnicode
),"iso-2022-kr",
886 UCNV_TO_U_CALLBACK_SKIP
, from_iso_2022_krOffs
, NULL
, 0))
887 log_err("iso-2022-kr->u with skip did not match.\n");
889 if(!testConvertToUnicode(sampleTxt_hz
, UPRV_LENGTHOF(sampleTxt_hz
),
890 hztoUnicode
, UPRV_LENGTHOF(hztoUnicode
),"HZ",
891 UCNV_TO_U_CALLBACK_SKIP
, from_hzOffs
, NULL
, 0))
892 log_err("HZ->u with skip did not match.\n");
894 if(!testConvertToUnicode(sampleTxt_iscii
, UPRV_LENGTHOF(sampleTxt_iscii
),
895 isciitoUnicode
, UPRV_LENGTHOF(isciitoUnicode
),"ISCII,version=0",
896 UCNV_TO_U_CALLBACK_SKIP
, from_isciiOffs
, NULL
, 0))
897 log_err("iscii->u with skip did not match.\n");
899 if(!testConvertToUnicode(sampleTxtLMBCS
, UPRV_LENGTHOF(sampleTxtLMBCS
),
900 LMBCSToUnicode
, UPRV_LENGTHOF(LMBCSToUnicode
),"LMBCS-1",
901 UCNV_TO_U_CALLBACK_SKIP
, fromLMBCS
, NULL
, 0))
902 log_err("LMBCS->u with skip did not match.\n");
907 log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n");
909 const uint8_t sampleText1
[] = { 0x31, 0xe4, 0xba, 0x8c,
911 UChar expected1
[] = { 0x0031, 0x4e8c, 0x0061};
912 int32_t offsets1
[] = { 0x0000, 0x0001, 0x0006};
914 if(!testConvertToUnicode(sampleText1
, UPRV_LENGTHOF(sampleText1
),
915 expected1
, UPRV_LENGTHOF(expected1
),"utf8",
916 UCNV_TO_U_CALLBACK_SKIP
, offsets1
, NULL
, 0 ))
917 log_err("utf8->u with skip did not match.\n");;
920 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n");
922 const uint8_t sampleText1
[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
923 UChar expected1
[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffe,0xfffe};
924 int32_t offsets1
[] = { 0x0000, 0x0001,0x0002,0x0003,4,5};
926 if(!testConvertToUnicode(sampleText1
, UPRV_LENGTHOF(sampleText1
),
927 expected1
, UPRV_LENGTHOF(expected1
),"SCSU",
928 UCNV_TO_U_CALLBACK_SKIP
, offsets1
, NULL
, 0 ))
929 log_err("scsu->u with skip did not match.\n");
932 log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
934 const uint8_t sampleText
[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */
935 0xFB, 0xEE, 0x28, /* single-code point sequence at offset 0 */
936 0x24, 0x1E, 0x52, /* 3 */
939 0x40, 0x07, /* 8 - wrong trail byte */
942 0xD0, 0x20, /* 12 - wrong trail byte */
963 0xFB, 0x16, 0x87, /* 42 */
970 0xFC, 0x10, 0x3E, /* 56 */
971 0xFE, 0x16, 0x3A, 0x8C, /* 59 */
973 0xFC, 0x03, 0xAC, /* 64 */
974 0xFF, /* 67 - FF just resets the state without encoding anything */
981 0xFEFF, 0x0061, 0x0062, 0x0020,
982 0x0063, 0x0061, 0x000D, 0x000A,
983 0x0020, 0x0000, 0x00DF, 0x00E6,
984 0x0930, 0x0020, 0x0918, 0x0909,
985 0x3086, 0x304D, 0x0020, 0x3053,
986 0x4000, 0x4E00, 0x7777, 0x0020,
987 0x9FA5, 0x4E00, 0xAC00, 0xBCDE,
988 0x0020, 0xD7A3, 0xDC00, 0xD800,
989 0xD800, 0xDC00, 0xD845, 0xDDDD,
990 0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
991 0xDFFF, 0x0001, 0x0E40, 0x0020,
995 0, 3, 6, 7, /* skip 8, */
996 10, 11, /* skip 12, */
998 20, 21, 23, 24, 25, 26, 28, 29,
999 30, 31, 33, 35, 37, 38,
1001 50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59,
1002 63, 64, /* trail */ 64, /* reset only 67, */
1007 if(!testConvertToUnicode(sampleText
, UPRV_LENGTHOF(sampleText
),
1008 expected
, UPRV_LENGTHOF(expected
), "BOCU-1",
1009 UCNV_TO_U_CALLBACK_SKIP
, offsets
, NULL
, 0)
1011 log_err("BOCU-1->u with skip did not match.\n");
1015 log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
1017 const uint8_t sampleText
[]={
1019 0xc0, 0x80, /* 1 non-shortest form */
1020 0xc4, 0xb5, /* 3 U+0135 */
1021 0xed, 0x80, 0xa0, /* 5 Hangul U+d020 */
1022 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8 surrogate pair for U+10401 */
1023 0xee, 0x80, 0x80, /* 14 PUA U+e000 */
1024 0xed, 0xb0, 0x81, /* 17 unpaired trail surrogate U+dc01 */
1025 0xf0, 0x90, 0x80, 0x80, /* 20 illegal 4-byte form for U+10000 */
1027 0xed, 0xa0, 0x81, /* 25 unpaired lead surrogate U+d801 */
1028 0xed, 0xa0, /* 28 incomplete sequence */
1029 0xd0, 0x80 /* 30 U+0400 */
1059 /* without offsets */
1060 if(!testConvertToUnicode(sampleText
, UPRV_LENGTHOF(sampleText
),
1061 expected
, UPRV_LENGTHOF(expected
), "CESU-8",
1062 UCNV_TO_U_CALLBACK_SKIP
, NULL
, NULL
, 0)
1064 log_err("CESU-8->u with skip did not match.\n");
1068 if(!testConvertToUnicode(sampleText
, UPRV_LENGTHOF(sampleText
),
1069 expected
, UPRV_LENGTHOF(expected
), "CESU-8",
1070 UCNV_TO_U_CALLBACK_SKIP
, offsets
, NULL
, 0)
1072 log_err("CESU-8->u with skip did not match.\n");
1077 static void TestStop(int32_t inputsize
, int32_t outputsize
)
1079 static const UChar sampleText
[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1080 static const UChar sampleText2
[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1082 static const uint8_t expstopIBM_949
[]= {
1083 0x00, 0xb0, 0xa1, 0xb0, 0xa2};
1085 static const uint8_t expstopIBM_943
[] = {
1086 0x9f, 0xaf, 0x9f, 0xb1};
1088 static const uint8_t expstopIBM_930
[] = {
1089 0x0e, 0x5d, 0x5f, 0x5d, 0x63};
1091 static const UChar IBM_949stoptoUnicode
[]= {0x0000, 0xAC00, 0xAC01};
1092 static const UChar IBM_943stoptoUnicode
[]= { 0x6D63, 0x6D64};
1093 static const UChar IBM_930stoptoUnicode
[]= { 0x6D63, 0x6D64};
1096 static const int32_t toIBM949Offsstop
[] = { 0, 1, 1, 2, 2};
1097 static const int32_t toIBM943Offsstop
[] = { 0, 0, 1, 1};
1098 static const int32_t toIBM930Offsstop
[] = { 0, 0, 0, 1, 1};
1100 static const int32_t fromIBM949Offs
[] = { 0, 1, 3};
1101 static const int32_t fromIBM943Offs
[] = { 0, 2};
1102 static const int32_t fromIBM930Offs
[] = { 1, 3};
1104 gInBufferSize
= inputsize
;
1105 gOutBufferSize
= outputsize
;
1109 #if !UCONFIG_NO_LEGACY_CONVERSION
1110 if(!testConvertFromUnicode(sampleText
, UPRV_LENGTHOF(sampleText
),
1111 expstopIBM_949
, UPRV_LENGTHOF(expstopIBM_949
), "ibm-949",
1112 UCNV_FROM_U_CALLBACK_STOP
, toIBM949Offsstop
, NULL
, 0 ))
1113 log_err("u-> ibm-949 with stop did not match.\n");
1114 if(!testConvertFromUnicode(sampleText2
, UPRV_LENGTHOF(sampleText2
),
1115 expstopIBM_943
, UPRV_LENGTHOF(expstopIBM_943
), "ibm-943",
1116 UCNV_FROM_U_CALLBACK_STOP
, toIBM943Offsstop
, NULL
, 0))
1117 log_err("u-> ibm-943 with stop did not match.\n");
1118 if(!testConvertFromUnicode(sampleText2
, UPRV_LENGTHOF(sampleText2
),
1119 expstopIBM_930
, UPRV_LENGTHOF(expstopIBM_930
), "ibm-930",
1120 UCNV_FROM_U_CALLBACK_STOP
, toIBM930Offsstop
, NULL
, 0 ))
1121 log_err("u-> ibm-930 with stop did not match.\n");
1123 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP \n");
1125 static const UChar inputTest
[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1126 static const uint8_t toIBM943
[]= { 0x61,};
1127 static const int32_t offset
[]= {0,} ;
1130 static const UChar euc_jp_inputText
[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1131 static const uint8_t to_euc_jp
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,};
1132 static const int32_t fromEUC_JPOffs
[] ={ 0, 1, 1, 2, 2, 2,};
1135 static const UChar euc_tw_inputText
[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1136 static const uint8_t to_euc_tw
[]={
1137 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,};
1138 static const int32_t from_euc_twOffs
[] ={ 0, 1, 1, 2, 2, 2, 2,};
1141 static const UChar iso_2022_jp_inputText
[]={0x0041, 0x00E9, 0x0042, };
1142 static const uint8_t to_iso_2022_jp
[]={
1146 static const int32_t from_iso_2022_jpOffs
[] ={0,};
1149 static const UChar iso_2022_cn_inputText
[]={ 0x0041, 0x3712, 0x0042, };
1150 static const uint8_t to_iso_2022_cn
[]={
1154 static const int32_t from_iso_2022_cnOffs
[] ={
1160 static const UChar iso_2022_kr_inputText
[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
1161 static const uint8_t to_iso_2022_kr
[]={
1162 0x1b, 0x24, 0x29, 0x43,
1166 static const int32_t from_iso_2022_krOffs
[] ={
1173 static const UChar hz_inputText
[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1175 static const uint8_t to_hz
[]={
1177 0x7e, 0x7b, 0x26, 0x30,
1180 static const int32_t from_hzOffs
[] ={
1186 static const UChar iscii_inputText
[]={ 0x0041, 0x3712, 0x0042, };
1187 static const uint8_t to_iscii
[]={
1190 static const int32_t from_isciiOffs
[] ={
1194 if(!testConvertFromUnicode(inputTest
, UPRV_LENGTHOF(inputTest
),
1195 toIBM943
, UPRV_LENGTHOF(toIBM943
), "ibm-943",
1196 UCNV_FROM_U_CALLBACK_STOP
, offset
, NULL
, 0 ))
1197 log_err("u-> ibm-943 with stop did not match.\n");
1199 if(!testConvertFromUnicode(euc_jp_inputText
, UPRV_LENGTHOF(euc_jp_inputText
),
1200 to_euc_jp
, UPRV_LENGTHOF(to_euc_jp
), "IBM-eucJP",
1201 UCNV_FROM_U_CALLBACK_STOP
, fromEUC_JPOffs
, NULL
, 0 ))
1202 log_err("u-> euc-jp with stop did not match.\n");
1204 if(!testConvertFromUnicode(euc_tw_inputText
, UPRV_LENGTHOF(euc_tw_inputText
),
1205 to_euc_tw
, UPRV_LENGTHOF(to_euc_tw
), "euc-tw",
1206 UCNV_FROM_U_CALLBACK_STOP
, from_euc_twOffs
, NULL
, 0 ))
1207 log_err("u-> euc-tw with stop did not match.\n");
1209 if(!testConvertFromUnicode(iso_2022_jp_inputText
, UPRV_LENGTHOF(iso_2022_jp_inputText
),
1210 to_iso_2022_jp
, UPRV_LENGTHOF(to_iso_2022_jp
), "iso-2022-jp",
1211 UCNV_FROM_U_CALLBACK_STOP
, from_iso_2022_jpOffs
, NULL
, 0 ))
1212 log_err("u-> iso-2022-jp with stop did not match.\n");
1214 if(!testConvertFromUnicode(iso_2022_jp_inputText
, UPRV_LENGTHOF(iso_2022_jp_inputText
),
1215 to_iso_2022_jp
, UPRV_LENGTHOF(to_iso_2022_jp
), "iso-2022-jp",
1216 UCNV_FROM_U_CALLBACK_STOP
, from_iso_2022_jpOffs
, NULL
, 0 ))
1217 log_err("u-> iso-2022-jp with stop did not match.\n");
1219 if(!testConvertFromUnicode(iso_2022_cn_inputText
, UPRV_LENGTHOF(iso_2022_cn_inputText
),
1220 to_iso_2022_cn
, UPRV_LENGTHOF(to_iso_2022_cn
), "iso-2022-cn",
1221 UCNV_FROM_U_CALLBACK_STOP
, from_iso_2022_cnOffs
, NULL
, 0 ))
1222 log_err("u-> iso-2022-cn with stop did not match.\n");
1224 if(!testConvertFromUnicode(iso_2022_kr_inputText
, UPRV_LENGTHOF(iso_2022_kr_inputText
),
1225 to_iso_2022_kr
, UPRV_LENGTHOF(to_iso_2022_kr
), "iso-2022-kr",
1226 UCNV_FROM_U_CALLBACK_STOP
, from_iso_2022_krOffs
, NULL
, 0 ))
1227 log_err("u-> iso-2022-kr with stop did not match.\n");
1229 if(!testConvertFromUnicode(hz_inputText
, UPRV_LENGTHOF(hz_inputText
),
1230 to_hz
, UPRV_LENGTHOF(to_hz
), "HZ",
1231 UCNV_FROM_U_CALLBACK_STOP
, from_hzOffs
, NULL
, 0 ))
1232 log_err("u-> HZ with stop did not match.\n");\
1234 if(!testConvertFromUnicode(iscii_inputText
, UPRV_LENGTHOF(iscii_inputText
),
1235 to_iscii
, UPRV_LENGTHOF(to_iscii
), "ISCII,version=0",
1236 UCNV_FROM_U_CALLBACK_STOP
, from_isciiOffs
, NULL
, 0 ))
1237 log_err("u-> iscii with stop did not match.\n");
1243 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n");
1245 static const UChar SCSU_inputText
[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1247 static const uint8_t to_SCSU
[]={
1251 int32_t from_SCSUOffs
[] ={
1255 if(!testConvertFromUnicode(SCSU_inputText
, UPRV_LENGTHOF(SCSU_inputText
),
1256 to_SCSU
, UPRV_LENGTHOF(to_SCSU
), "SCSU",
1257 UCNV_FROM_U_CALLBACK_STOP
, from_SCSUOffs
, NULL
, 0 ))
1258 log_err("u-> SCSU with skip did not match.\n");
1264 #if !UCONFIG_NO_LEGACY_CONVERSION
1265 if(!testConvertToUnicode(expstopIBM_949
, UPRV_LENGTHOF(expstopIBM_949
),
1266 IBM_949stoptoUnicode
, UPRV_LENGTHOF(IBM_949stoptoUnicode
),"ibm-949",
1267 UCNV_TO_U_CALLBACK_STOP
, fromIBM949Offs
, NULL
, 0 ))
1268 log_err("ibm-949->u with stop did not match.\n");
1269 if(!testConvertToUnicode(expstopIBM_943
, UPRV_LENGTHOF(expstopIBM_943
),
1270 IBM_943stoptoUnicode
, UPRV_LENGTHOF(IBM_943stoptoUnicode
),"ibm-943",
1271 UCNV_TO_U_CALLBACK_STOP
, fromIBM943Offs
, NULL
, 0 ))
1272 log_err("ibm-943->u with stop did not match.\n");
1273 if(!testConvertToUnicode(expstopIBM_930
, UPRV_LENGTHOF(expstopIBM_930
),
1274 IBM_930stoptoUnicode
, UPRV_LENGTHOF(IBM_930stoptoUnicode
),"ibm-930",
1275 UCNV_TO_U_CALLBACK_STOP
, fromIBM930Offs
, NULL
, 0 ))
1276 log_err("ibm-930->u with stop did not match.\n");
1278 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n");
1281 static const uint8_t sampleTxtEBCIDIC_STATEFUL
[] ={
1282 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1284 static const UChar EBCIDIC_STATEFUL_toUnicode
[] ={ 0x6d63 };
1285 static const int32_t from_EBCIDIC_STATEFULOffsets
[]={ 1};
1289 static const uint8_t sampleTxt_euc_jp
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1290 0x8f, 0xda, 0xa1, /*unassigned*/
1293 static const UChar euc_jptoUnicode
[]={ 0x0061, 0x4edd, 0x5bec};
1294 static const int32_t from_euc_jpOffs
[] ={ 0, 1, 3};
1297 static const uint8_t sampleTxt_euc_tw
[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1298 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1301 UChar euc_twtoUnicode
[]={ 0x0061, 0x2295, 0x5BF2};
1302 int32_t from_euc_twOffs
[] ={ 0, 1, 3};
1306 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL
, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL
),
1307 EBCIDIC_STATEFUL_toUnicode
, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode
),"ibm-930",
1308 UCNV_TO_U_CALLBACK_STOP
, from_EBCIDIC_STATEFULOffsets
, NULL
, 0 ))
1309 log_err("EBCIDIC_STATEFUL->u with stop did not match.\n");
1311 if(!testConvertToUnicode(sampleTxt_euc_jp
, UPRV_LENGTHOF(sampleTxt_euc_jp
),
1312 euc_jptoUnicode
, UPRV_LENGTHOF(euc_jptoUnicode
),"IBM-eucJP",
1313 UCNV_TO_U_CALLBACK_STOP
, from_euc_jpOffs
, NULL
, 0))
1314 log_err("euc-jp->u with stop did not match.\n");
1316 if(!testConvertToUnicode(sampleTxt_euc_tw
, UPRV_LENGTHOF(sampleTxt_euc_tw
),
1317 euc_twtoUnicode
, UPRV_LENGTHOF(euc_twtoUnicode
),"euc-tw",
1318 UCNV_TO_U_CALLBACK_STOP
, from_euc_twOffs
, NULL
, 0 ))
1319 log_err("euc-tw->u with stop did not match.\n");
1323 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n");
1325 static const uint8_t sampleText1
[] = { 0x31, 0xe4, 0xba, 0x8c,
1327 static const UChar expected1
[] = { 0x0031, 0x4e8c,};
1328 static const int32_t offsets1
[] = { 0x0000, 0x0001};
1330 if(!testConvertToUnicode(sampleText1
, UPRV_LENGTHOF(sampleText1
),
1331 expected1
, UPRV_LENGTHOF(expected1
),"utf8",
1332 UCNV_TO_U_CALLBACK_STOP
, offsets1
, NULL
, 0 ))
1333 log_err("utf8->u with stop did not match.\n");;
1335 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n");
1337 static const uint8_t sampleText1
[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04};
1338 static const UChar expected1
[] = { 0x00ba, 0x008c, 0x00f8, 0x0061};
1339 static const int32_t offsets1
[] = { 0x0000, 0x0001,0x0002,0x0003};
1341 if(!testConvertToUnicode(sampleText1
, UPRV_LENGTHOF(sampleText1
),
1342 expected1
, UPRV_LENGTHOF(expected1
),"SCSU",
1343 UCNV_TO_U_CALLBACK_STOP
, offsets1
, NULL
, 0 ))
1344 log_err("scsu->u with stop did not match.\n");;
1349 static void TestSub(int32_t inputsize
, int32_t outputsize
)
1351 static const UChar sampleText
[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1352 static const UChar sampleText2
[]= { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1354 static const uint8_t expsubIBM_949
[] =
1355 { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 };
1357 static const uint8_t expsubIBM_943
[] = {
1358 0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 };
1360 static const uint8_t expsubIBM_930
[] = {
1361 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f };
1363 static const UChar IBM_949subtoUnicode
[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 };
1364 static const UChar IBM_943subtoUnicode
[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1365 static const UChar IBM_930subtoUnicode
[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1367 static const int32_t toIBM949Offssub
[] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1368 static const int32_t toIBM943Offssub
[] ={ 0, 0, 1, 1, 2, 2, 3, 3 };
1369 static const int32_t toIBM930Offssub
[] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 };
1371 static const int32_t fromIBM949Offs
[] = { 0, 1, 3, 5, 7 };
1372 static const int32_t fromIBM943Offs
[] = { 0, 2, 4, 6 };
1373 static const int32_t fromIBM930Offs
[] = { 1, 3, 5, 7 };
1375 gInBufferSize
= inputsize
;
1376 gOutBufferSize
= outputsize
;
1380 #if !UCONFIG_NO_LEGACY_CONVERSION
1381 if(!testConvertFromUnicode(sampleText
, UPRV_LENGTHOF(sampleText
),
1382 expsubIBM_949
, UPRV_LENGTHOF(expsubIBM_949
), "ibm-949",
1383 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, toIBM949Offssub
, NULL
, 0 ))
1384 log_err("u-> ibm-949 with subst did not match.\n");
1385 if(!testConvertFromUnicode(sampleText2
, UPRV_LENGTHOF(sampleText2
),
1386 expsubIBM_943
, UPRV_LENGTHOF(expsubIBM_943
), "ibm-943",
1387 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, toIBM943Offssub
, NULL
, 0))
1388 log_err("u-> ibm-943 with subst did not match.\n");
1389 if(!testConvertFromUnicode(sampleText2
, UPRV_LENGTHOF(sampleText2
),
1390 expsubIBM_930
, UPRV_LENGTHOF(expsubIBM_930
), "ibm-930",
1391 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, toIBM930Offssub
, NULL
, 0 ))
1392 log_err("u-> ibm-930 with subst did not match.\n");
1394 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1396 static const UChar inputTest
[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1397 static const uint8_t toIBM943
[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 };
1398 static const int32_t offset
[]= {0, 1, 1, 3, 3, 4};
1402 static const UChar euc_jp_inputText
[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1403 static const uint8_t to_euc_jp
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1404 0xf4, 0xfe, 0xf4, 0xfe,
1407 static const int32_t fromEUC_JPOffs
[] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7};
1410 static const UChar euc_tw_inputText
[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1411 static const uint8_t to_euc_tw
[]={
1412 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1413 0xfd, 0xfe, 0xfd, 0xfe,
1414 0x61, 0xe6, 0xca, 0x8a,
1417 static const int32_t from_euc_twOffs
[] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,};
1419 if(!testConvertFromUnicode(inputTest
, UPRV_LENGTHOF(inputTest
),
1420 toIBM943
, UPRV_LENGTHOF(toIBM943
), "ibm-943",
1421 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, offset
, NULL
, 0 ))
1422 log_err("u-> ibm-943 with substitute did not match.\n");
1424 if(!testConvertFromUnicode(euc_jp_inputText
, UPRV_LENGTHOF(euc_jp_inputText
),
1425 to_euc_jp
, UPRV_LENGTHOF(to_euc_jp
), "IBM-eucJP",
1426 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, fromEUC_JPOffs
, NULL
, 0 ))
1427 log_err("u-> euc-jp with substitute did not match.\n");
1429 if(!testConvertFromUnicode(euc_tw_inputText
, UPRV_LENGTHOF(euc_tw_inputText
),
1430 to_euc_tw
, UPRV_LENGTHOF(to_euc_tw
), "euc-tw",
1431 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, from_euc_twOffs
, NULL
, 0 ))
1432 log_err("u-> euc-tw with substitute did not match.\n");
1436 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1438 UChar SCSU_inputText
[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1440 const uint8_t to_SCSU
[]={
1447 int32_t from_SCSUOffs
[] ={
1453 const uint8_t to_SCSU_1
[]={
1457 int32_t from_SCSUOffs_1
[] ={
1461 if(!testConvertFromUnicode(SCSU_inputText
, UPRV_LENGTHOF(SCSU_inputText
),
1462 to_SCSU
, UPRV_LENGTHOF(to_SCSU
), "SCSU",
1463 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, from_SCSUOffs
, NULL
, 0 ))
1464 log_err("u-> SCSU with substitute did not match.\n");
1466 if(!testConvertFromUnicodeWithContext(SCSU_inputText
, UPRV_LENGTHOF(SCSU_inputText
),
1467 to_SCSU_1
, UPRV_LENGTHOF(to_SCSU_1
), "SCSU",
1468 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, from_SCSUOffs_1
, NULL
, 0,"i",U_ILLEGAL_CHAR_FOUND
))
1469 log_err("u-> SCSU with substitute did not match.\n");
1472 log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1474 static const UChar testinput
[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,};
1475 static const uint8_t expectedUTF8
[]= { 0xe2, 0x82, 0xac,
1476 0xf0, 0x90, 0x90, 0x81,
1477 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd,
1478 0xef, 0xbf, 0xbf, 0x61,
1481 static const int32_t offsets
[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 };
1482 if(!testConvertFromUnicode(testinput
, UPRV_LENGTHOF(testinput
),
1483 expectedUTF8
, UPRV_LENGTHOF(expectedUTF8
), "utf8",
1484 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, offsets
, NULL
, 0 )) {
1485 log_err("u-> utf8 with substitute did not match.\n");
1489 log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1491 static const UChar in
[]={ 0x0041, 0xfeff };
1493 static const uint8_t out
[]={
1504 static const int32_t offsets
[]={
1508 if(!testConvertFromUnicode(in
, UPRV_LENGTHOF(in
),
1509 out
, UPRV_LENGTHOF(out
), "UTF-16",
1510 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, offsets
, NULL
, 0)
1512 log_err("u->UTF-16 with substitute did not match.\n");
1516 log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1518 static const UChar in
[]={ 0x0041, 0xfeff };
1520 static const uint8_t out
[]={
1522 0x00, 0x00, 0xfe, 0xff,
1523 0x00, 0x00, 0x00, 0x41,
1524 0x00, 0x00, 0xfe, 0xff
1526 0xff, 0xfe, 0x00, 0x00,
1527 0x41, 0x00, 0x00, 0x00,
1528 0xff, 0xfe, 0x00, 0x00
1531 static const int32_t offsets
[]={
1532 -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1
1535 if(!testConvertFromUnicode(in
, UPRV_LENGTHOF(in
),
1536 out
, UPRV_LENGTHOF(out
), "UTF-32",
1537 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, offsets
, NULL
, 0)
1539 log_err("u->UTF-32 with substitute did not match.\n");
1545 #if !UCONFIG_NO_LEGACY_CONVERSION
1546 if(!testConvertToUnicode(expsubIBM_949
, UPRV_LENGTHOF(expsubIBM_949
),
1547 IBM_949subtoUnicode
, UPRV_LENGTHOF(IBM_949subtoUnicode
),"ibm-949",
1548 UCNV_TO_U_CALLBACK_SUBSTITUTE
, fromIBM949Offs
, NULL
, 0 ))
1549 log_err("ibm-949->u with substitute did not match.\n");
1550 if(!testConvertToUnicode(expsubIBM_943
, UPRV_LENGTHOF(expsubIBM_943
),
1551 IBM_943subtoUnicode
, UPRV_LENGTHOF(IBM_943subtoUnicode
),"ibm-943",
1552 UCNV_TO_U_CALLBACK_SUBSTITUTE
, fromIBM943Offs
, NULL
, 0 ))
1553 log_err("ibm-943->u with substitute did not match.\n");
1554 if(!testConvertToUnicode(expsubIBM_930
, UPRV_LENGTHOF(expsubIBM_930
),
1555 IBM_930subtoUnicode
, UPRV_LENGTHOF(IBM_930subtoUnicode
),"ibm-930",
1556 UCNV_TO_U_CALLBACK_SUBSTITUTE
, fromIBM930Offs
, NULL
, 0 ))
1557 log_err("ibm-930->u with substitute did not match.\n");
1559 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1562 const uint8_t sampleTxtEBCIDIC_STATEFUL
[] ={
1563 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1565 UChar EBCIDIC_STATEFUL_toUnicode
[] ={ 0x6d63, 0xfffd, 0x03b4
1567 int32_t from_EBCIDIC_STATEFULOffsets
[]={ 1, 3, 5};
1571 const uint8_t sampleTxt_euc_jp
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1572 0x8f, 0xda, 0xa1, /*unassigned*/
1575 UChar euc_jptoUnicode
[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a };
1576 int32_t from_euc_jpOffs
[] ={ 0, 1, 3, 6, 9, 11 };
1579 const uint8_t sampleTxt_euc_tw
[]={
1580 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1581 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1584 UChar euc_twtoUnicode
[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, };
1585 int32_t from_euc_twOffs
[] ={ 0, 1, 3, 7, 11, 13};
1588 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL
, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL
),
1589 EBCIDIC_STATEFUL_toUnicode
, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode
),"ibm-930",
1590 UCNV_TO_U_CALLBACK_SUBSTITUTE
, from_EBCIDIC_STATEFULOffsets
, NULL
, 0 ))
1591 log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n");
1594 if(!testConvertToUnicode(sampleTxt_euc_jp
, UPRV_LENGTHOF(sampleTxt_euc_jp
),
1595 euc_jptoUnicode
, UPRV_LENGTHOF(euc_jptoUnicode
),"IBM-eucJP",
1596 UCNV_TO_U_CALLBACK_SUBSTITUTE
, from_euc_jpOffs
, NULL
, 0 ))
1597 log_err("euc-jp->u with substitute did not match.\n");
1600 if(!testConvertToUnicode(sampleTxt_euc_tw
, UPRV_LENGTHOF(sampleTxt_euc_tw
),
1601 euc_twtoUnicode
, UPRV_LENGTHOF(euc_twtoUnicode
),"euc-tw",
1602 UCNV_TO_U_CALLBACK_SUBSTITUTE
, from_euc_twOffs
, NULL
, 0 ))
1603 log_err("euc-tw->u with substitute did not match.\n");
1606 if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp
, UPRV_LENGTHOF(sampleTxt_euc_jp
),
1607 euc_jptoUnicode
, UPRV_LENGTHOF(euc_jptoUnicode
),"IBM-eucJP",
1608 UCNV_TO_U_CALLBACK_SUBSTITUTE
, from_euc_jpOffs
, NULL
, 0 ,"i", U_ILLEGAL_CHAR_FOUND
))
1609 log_err("euc-jp->u with substitute did not match.\n");
1613 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1615 const uint8_t sampleText1
[] = { 0x31, 0xe4, 0xba, 0x8c,
1617 UChar expected1
[] = { 0x0031, 0x4e8c, 0xfffd, 0xfffd, 0x0061};
1618 int32_t offsets1
[] = { 0x0000, 0x0001, 0x0004, 0x0005, 0x0006};
1620 if(!testConvertToUnicode(sampleText1
, UPRV_LENGTHOF(sampleText1
),
1621 expected1
, UPRV_LENGTHOF(expected1
),"utf8",
1622 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets1
, NULL
, 0 ))
1623 log_err("utf8->u with substitute did not match.\n");;
1625 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1627 const uint8_t sampleText1
[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
1628 UChar expected1
[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffd,0xfffd};
1629 int32_t offsets1
[] = { 0x0000, 0x0001,0x0002,0x0003,4,5};
1631 if(!testConvertToUnicode(sampleText1
, UPRV_LENGTHOF(sampleText1
),
1632 expected1
, UPRV_LENGTHOF(expected1
),"SCSU",
1633 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets1
, NULL
, 0 ))
1634 log_err("scsu->u with stop did not match.\n");;
1637 #if !UCONFIG_NO_LEGACY_CONVERSION
1638 log_verbose("Testing ibm-930 subchar/subchar1\n");
1640 static const UChar u1
[]={ 0x6d63, 0x6d64, 0x6d65, 0x6d66, 0xdf };
1641 static const uint8_t s1
[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f };
1642 static const int32_t offsets1
[]={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1644 static const UChar u2
[]={ 0x6d63, 0x6d64, 0xfffd, 0x6d66, 0x1a };
1645 static const uint8_t s2
[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 };
1646 static const int32_t offsets2
[]={ 1, 3, 5, 7, 10 };
1648 if(!testConvertFromUnicode(u1
, UPRV_LENGTHOF(u1
), s1
, UPRV_LENGTHOF(s1
), "ibm-930",
1649 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, offsets1
, NULL
, 0)
1651 log_err("u->ibm-930 subchar/subchar1 did not match.\n");
1654 if(!testConvertToUnicode(s2
, UPRV_LENGTHOF(s2
), u2
, UPRV_LENGTHOF(u2
), "ibm-930",
1655 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets2
, NULL
, 0)
1657 log_err("ibm-930->u subchar/subchar1 did not match.\n");
1661 log_verbose("Testing GB 18030 with substitute callbacks\n");
1663 static const UChar u2
[]={
1664 0x24, 0x7f, 0x80, 0x1f9, 0x20ac, 0x4e00, 0x9fa6, 0xffff, 0xd800, 0xdc00, 0xfffd, 0xdbff, 0xdfff };
1665 static const uint8_t gb2
[]={
1666 0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 };
1667 static const int32_t offsets2
[]={
1668 0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 };
1670 if(!testConvertToUnicode(gb2
, UPRV_LENGTHOF(gb2
), u2
, UPRV_LENGTHOF(u2
), "gb18030",
1671 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets2
, NULL
, 0)
1673 log_err("gb18030->u with substitute did not match.\n");
1678 log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n");
1680 static const uint8_t utf7
[]={
1681 /* a~ a+AB~ a+AB\x0c a+AB- a+AB. a+. */
1682 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x2e
1684 static const UChar unicode
[]={
1685 0x61, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x2e, 0x61, 0xfffd, 0x2e
1687 static const int32_t offsets
[]={
1688 0, 1, 2, 4, 6, 7, 9, 11, 12, 14, 17, 19, 21, 22, 23, 24
1691 if(!testConvertToUnicode(utf7
, UPRV_LENGTHOF(utf7
), unicode
, UPRV_LENGTHOF(unicode
), "UTF-7",
1692 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets
, NULL
, 0)
1694 log_err("UTF-7->u with substitute did not match.\n");
1698 log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n");
1700 static const uint8_t
1701 in1
[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff },
1702 in2
[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff },
1703 in3
[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff };
1706 out1
[]={ 0x4e00, 0xfeff },
1707 out2
[]={ 0x004e, 0xfffe },
1708 out3
[]={ 0xfefd, 0x4e00, 0xfeff };
1710 static const int32_t
1711 offsets1
[]={ 2, 4 },
1712 offsets2
[]={ 2, 4 },
1713 offsets3
[]={ 0, 2, 4 };
1715 if(!testConvertToUnicode(in1
, UPRV_LENGTHOF(in1
), out1
, UPRV_LENGTHOF(out1
), "UTF-16",
1716 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets1
, NULL
, 0)
1718 log_err("UTF-16 (BE BOM)->u with substitute did not match.\n");
1721 if(!testConvertToUnicode(in2
, UPRV_LENGTHOF(in2
), out2
, UPRV_LENGTHOF(out2
), "UTF-16",
1722 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets2
, NULL
, 0)
1724 log_err("UTF-16 (LE BOM)->u with substitute did not match.\n");
1727 if(!testConvertToUnicode(in3
, UPRV_LENGTHOF(in3
), out3
, UPRV_LENGTHOF(out3
), "UTF-16",
1728 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets3
, NULL
, 0)
1730 log_err("UTF-16 (no BOM)->u with substitute did not match.\n");
1734 log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n");
1736 static const uint8_t
1737 in1
[]={ 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff },
1738 in2
[]={ 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 },
1739 in3
[]={ 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 },
1740 in4
[]={ 0x00, 0x01, 0x02, 0x03, 0x00, 0x11, 0x12, 0x00, 0x00, 0x00, 0x4e, 0x00 };
1743 out1
[]={ U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfeff },
1744 out2
[]={ U16_LEAD(0x0f1000), U16_TRAIL(0x0f1000), 0xfffe },
1745 out3
[]={ 0xfefe, U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfffd, 0xfffd },
1746 out4
[]={ U16_LEAD(0x10203), U16_TRAIL(0x10203), 0xfffd, 0x4e00 };
1748 static const int32_t
1749 offsets1
[]={ 4, 4, 8 },
1750 offsets2
[]={ 4, 4, 8 },
1751 offsets3
[]={ 0, 4, 4, 8, 12 },
1752 offsets4
[]={ 0, 0, 4, 8 };
1754 if(!testConvertToUnicode(in1
, UPRV_LENGTHOF(in1
), out1
, UPRV_LENGTHOF(out1
), "UTF-32",
1755 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets1
, NULL
, 0)
1757 log_err("UTF-32 (BE BOM)->u with substitute did not match.\n");
1760 if(!testConvertToUnicode(in2
, UPRV_LENGTHOF(in2
), out2
, UPRV_LENGTHOF(out2
), "UTF-32",
1761 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets2
, NULL
, 0)
1763 log_err("UTF-32 (LE BOM)->u with substitute did not match.\n");
1766 if(!testConvertToUnicode(in3
, UPRV_LENGTHOF(in3
), out3
, UPRV_LENGTHOF(out3
), "UTF-32",
1767 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets3
, NULL
, 0)
1769 log_err("UTF-32 (no BOM)->u with substitute did not match.\n");
1772 if(!testConvertToUnicode(in4
, UPRV_LENGTHOF(in4
), out4
, UPRV_LENGTHOF(out4
), "UTF-32",
1773 UCNV_TO_U_CALLBACK_SUBSTITUTE
, offsets4
, NULL
, 0)
1775 log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n");
1780 static void TestSubWithValue(int32_t inputsize
, int32_t outputsize
)
1782 UChar sampleText
[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1783 UChar sampleText2
[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1785 const uint8_t expsubwvalIBM_949
[]= {
1786 0x00, 0xb0, 0xa1, 0xb0, 0xa2,
1787 0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 };
1789 const uint8_t expsubwvalIBM_943
[]= {
1790 0x9f, 0xaf, 0x9f, 0xb1,
1791 0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 };
1793 const uint8_t expsubwvalIBM_930
[] = {
1794 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f };
1796 int32_t toIBM949Offs
[] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 };
1797 int32_t toIBM943Offs
[] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 };
1798 int32_t toIBM930Offs
[] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */
1800 gInBufferSize
= inputsize
;
1801 gOutBufferSize
= outputsize
;
1805 #if !UCONFIG_NO_LEGACY_CONVERSION
1806 if(!testConvertFromUnicode(sampleText
, UPRV_LENGTHOF(sampleText
),
1807 expsubwvalIBM_949
, UPRV_LENGTHOF(expsubwvalIBM_949
), "ibm-949",
1808 UCNV_FROM_U_CALLBACK_ESCAPE
, toIBM949Offs
, NULL
, 0 ))
1809 log_err("u-> ibm-949 with subst with value did not match.\n");
1811 if(!testConvertFromUnicode(sampleText2
, UPRV_LENGTHOF(sampleText2
),
1812 expsubwvalIBM_943
, UPRV_LENGTHOF(expsubwvalIBM_943
), "ibm-943",
1813 UCNV_FROM_U_CALLBACK_ESCAPE
, toIBM943Offs
, NULL
, 0 ))
1814 log_err("u-> ibm-943 with sub with value did not match.\n");
1816 if(!testConvertFromUnicode(sampleText2
, UPRV_LENGTHOF(sampleText2
),
1817 expsubwvalIBM_930
, UPRV_LENGTHOF(expsubwvalIBM_930
), "ibm-930",
1818 UCNV_FROM_U_CALLBACK_ESCAPE
, toIBM930Offs
, NULL
, 0 ))
1819 log_err("u-> ibm-930 with subst with value did not match.\n");
1822 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n");
1824 static const UChar inputTest
[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1825 static const uint8_t toIBM943
[]= { 0x61,
1826 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1827 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1828 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1830 static const int32_t offset
[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
1834 static const UChar euc_jp_inputText
[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, };
1835 static const uint8_t to_euc_jp
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1836 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1837 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1838 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1841 static const int32_t fromEUC_JPOffs
[] ={ 0, 1, 1, 2, 2, 2,
1849 static const UChar euc_tw_inputText
[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1850 static const uint8_t to_euc_tw
[]={
1851 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1852 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1853 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1854 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1855 0x61, 0xe6, 0xca, 0x8a,
1857 static const int32_t from_euc_twOffs
[] ={ 0, 1, 1, 2, 2, 2, 2,
1858 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5,
1862 static const UChar iso_2022_jp_inputText1
[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ;
1863 static const uint8_t to_iso_2022_jp1
[]={
1864 0x1b, 0x24, 0x42, 0x21, 0x21,
1865 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
1866 0x1b, 0x24, 0x42, 0x21, 0x22,
1867 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
1871 static const int32_t from_iso_2022_jpOffs1
[] ={
1879 static const UChar iso_2022_jp_inputText2
[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ;
1880 static const uint8_t to_iso_2022_jp2
[]={
1881 0x1b, 0x24, 0x42, 0x21, 0x21,
1882 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1883 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1884 0x1b, 0x24, 0x42, 0x21, 0x22,
1885 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1886 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1889 static const int32_t from_iso_2022_jpOffs2
[] ={
1900 static const UChar iso_2022_cn_inputText
[]={ 0x0041, 0x3712, 0x0042, };
1901 static const uint8_t to_iso_2022_cn
[]={
1903 0x25, 0x55, 0x33, 0x37, 0x31, 0x32,
1906 static const int32_t from_iso_2022_cnOffs
[] ={
1912 static const UChar iso_2022_cn_inputText4
[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042};
1914 static const uint8_t to_iso_2022_cn4
[]={
1915 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
1916 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1917 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1919 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1920 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1923 static const int32_t from_iso_2022_cnOffs4
[] ={
1935 static const UChar iso_2022_kr_inputText2
[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
1936 static const uint8_t to_iso_2022_kr2
[]={
1937 0x1b, 0x24, 0x29, 0x43,
1940 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1941 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1944 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1945 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1948 static const int32_t from_iso_2022_krOffs2
[] ={
1961 static const UChar iso_2022_kr_inputText
[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 };
1962 static const uint8_t to_iso_2022_kr
[]={
1963 0x1b, 0x24, 0x29, 0x43,
1966 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1969 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1974 static const int32_t from_iso_2022_krOffs
[] ={
1985 static const UChar hz_inputText
[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1987 static const uint8_t to_hz
[]={
1989 0x7e, 0x7b, 0x26, 0x30,
1990 0x7e, 0x7d, 0x25, 0x55, 0x30, 0x36, 0x36, 0x32, /*unassigned*/
1991 0x7e, 0x7b, 0x26, 0x30,
1995 static const int32_t from_hzOffs
[] ={
2003 static const UChar hz_inputText2
[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
2004 static const uint8_t to_hz2
[]={
2006 0x7e, 0x7b, 0x26, 0x30,
2007 0x7e, 0x7d, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
2008 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2009 0x7e, 0x7b, 0x26, 0x30,
2011 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
2012 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2015 static const int32_t from_hzOffs2
[] ={
2028 static const UChar iscii_inputText
[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 };
2029 static const uint8_t to_iscii
[]={
2032 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
2035 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
2040 static const int32_t from_isciiOffs
[] ={
2050 if(!testConvertFromUnicode(inputTest
, UPRV_LENGTHOF(inputTest
),
2051 toIBM943
, UPRV_LENGTHOF(toIBM943
), "ibm-943",
2052 UCNV_FROM_U_CALLBACK_ESCAPE
, offset
, NULL
, 0 ))
2053 log_err("u-> ibm-943 with subst with value did not match.\n");
2055 if(!testConvertFromUnicode(euc_jp_inputText
, UPRV_LENGTHOF(euc_jp_inputText
),
2056 to_euc_jp
, UPRV_LENGTHOF(to_euc_jp
), "IBM-eucJP",
2057 UCNV_FROM_U_CALLBACK_ESCAPE
, fromEUC_JPOffs
, NULL
, 0 ))
2058 log_err("u-> euc-jp with subst with value did not match.\n");
2060 if(!testConvertFromUnicode(euc_tw_inputText
, UPRV_LENGTHOF(euc_tw_inputText
),
2061 to_euc_tw
, UPRV_LENGTHOF(to_euc_tw
), "euc-tw",
2062 UCNV_FROM_U_CALLBACK_ESCAPE
, from_euc_twOffs
, NULL
, 0 ))
2063 log_err("u-> euc-tw with subst with value did not match.\n");
2065 if(!testConvertFromUnicode(iso_2022_jp_inputText1
, UPRV_LENGTHOF(iso_2022_jp_inputText1
),
2066 to_iso_2022_jp1
, UPRV_LENGTHOF(to_iso_2022_jp1
), "iso-2022-jp",
2067 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffs1
, NULL
, 0 ))
2068 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2070 if(!testConvertFromUnicode(iso_2022_jp_inputText1
, UPRV_LENGTHOF(iso_2022_jp_inputText1
),
2071 to_iso_2022_jp1
, UPRV_LENGTHOF(to_iso_2022_jp1
), "iso-2022-jp",
2072 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffs1
, NULL
, 0 ))
2073 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2075 if(!testConvertFromUnicode(iso_2022_jp_inputText2
, UPRV_LENGTHOF(iso_2022_jp_inputText2
),
2076 to_iso_2022_jp2
, UPRV_LENGTHOF(to_iso_2022_jp2
), "iso-2022-jp",
2077 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffs2
, NULL
, 0 ))
2078 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2082 static const UChar iso_2022_jp_inputText3
[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ;
2083 static const uint8_t to_iso_2022_jp3_v2
[]={
2084 0x1b, 0x24, 0x42, 0x21, 0x21,
2085 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b,
2087 0x1b, 0x24, 0x42, 0x21, 0x22,
2088 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b,
2091 0x26, 0x23, 0x33, 0x36, 0x38, 0x39, 0x32, 0x3b,
2094 static const int32_t from_iso_2022_jpOffs3_v2
[] ={
2096 1,1,1,1,1,1,1,1,1,1,1,1,
2099 4,4,4,4,4,4,4,4,4,4,4,4,
2105 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3
, UPRV_LENGTHOF(iso_2022_jp_inputText3
),
2106 to_iso_2022_jp3_v2
, UPRV_LENGTHOF(to_iso_2022_jp3_v2
), "iso-2022-jp",
2107 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffs3_v2
, NULL
, 0,UCNV_ESCAPE_XML_DEC
,U_ZERO_ERROR
))
2108 log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n");
2111 static const UChar iso_2022_cn_inputText5
[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2112 static const uint8_t to_iso_2022_cn5_v2
[]={
2113 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2114 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44,
2115 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36,
2117 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44,
2118 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36,
2120 0x5c, 0x75, 0x30, 0x39, 0x30, 0x32,
2122 static const int32_t from_iso_2022_cnOffs5_v2
[] ={
2132 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5
, UPRV_LENGTHOF(iso_2022_cn_inputText5
),
2133 to_iso_2022_cn5_v2
, UPRV_LENGTHOF(to_iso_2022_cn5_v2
), "iso-2022-cn",
2134 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs5_v2
, NULL
, 0,UCNV_ESCAPE_JAVA
,U_ZERO_ERROR
))
2135 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n");
2139 static const UChar iso_2022_cn_inputText6
[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2140 static const uint8_t to_iso_2022_cn6_v2
[]={
2141 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2142 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d,
2144 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d,
2146 0x7b, 0x55, 0x2b, 0x30, 0x39, 0x30, 0x32, 0x7d
2148 static const int32_t from_iso_2022_cnOffs6_v2
[] ={
2149 0, 0, 0, 0, 0, 0, 0,
2150 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2152 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2154 7, 7, 7, 7, 7, 7, 7, 7,
2156 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6
, UPRV_LENGTHOF(iso_2022_cn_inputText6
),
2157 to_iso_2022_cn6_v2
, UPRV_LENGTHOF(to_iso_2022_cn6_v2
), "iso-2022-cn",
2158 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs6_v2
, NULL
, 0,UCNV_ESCAPE_UNICODE
,U_ZERO_ERROR
))
2159 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n");
2163 static const UChar iso_2022_cn_inputText7
[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2164 static const uint8_t to_iso_2022_cn7_v2
[]={
2165 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2166 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2168 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2169 0x42, 0x25, 0x55, 0x30, 0x39, 0x30, 0x32,
2171 static const int32_t from_iso_2022_cnOffs7_v2
[] ={
2172 0, 0, 0, 0, 0, 0, 0,
2173 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2175 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2179 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7
, UPRV_LENGTHOF(iso_2022_cn_inputText7
),
2180 to_iso_2022_cn7_v2
, UPRV_LENGTHOF(to_iso_2022_cn7_v2
), "iso-2022-cn",
2181 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs7_v2
, NULL
, 0,"K" ,U_ZERO_ERROR
))
2182 log_err("u-> iso-2022-cn with sub & K did not match.\n");
2186 static const UChar iso_2022_cn_inputText8
[]={
2194 static const uint8_t to_iso_2022_cn8_v2
[]={
2195 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2196 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20,
2198 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20,
2199 0x5c, 0x31, 0x30, 0x46, 0x46, 0x46, 0x46, 0x20,
2201 0x5c, 0x39, 0x30, 0x32, 0x20
2203 static const int32_t from_iso_2022_cnOffs8_v2
[] ={
2204 0, 0, 0, 0, 0, 0, 0,
2205 1, 1, 1, 1, 1, 1, 1, 1,
2207 4, 4, 4, 4, 4, 4, 4, 4,
2208 6, 6, 6, 6, 6, 6, 6, 6,
2212 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8
, UPRV_LENGTHOF(iso_2022_cn_inputText8
),
2213 to_iso_2022_cn8_v2
, UPRV_LENGTHOF(to_iso_2022_cn8_v2
), "iso-2022-cn",
2214 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs8_v2
, NULL
, 0,UCNV_ESCAPE_CSS2
,U_ZERO_ERROR
))
2215 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not match.\n");
2219 static const uint8_t to_iso_2022_cn4_v3
[]={
2220 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2221 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36,
2223 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36,
2228 static const int32_t from_iso_2022_cnOffs4_v3
[] ={
2230 1,1,1,1,1,1,1,1,1,1,1,
2233 4,4,4,4,4,4,4,4,4,4,4,
2238 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4
, UPRV_LENGTHOF(iso_2022_cn_inputText4
),
2239 to_iso_2022_cn4_v3
, UPRV_LENGTHOF(to_iso_2022_cn4_v3
), "iso-2022-cn",
2240 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs4_v3
, NULL
, 0,UCNV_ESCAPE_C
,U_ZERO_ERROR
))
2242 log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n");
2245 if(!testConvertFromUnicode(iso_2022_cn_inputText
, UPRV_LENGTHOF(iso_2022_cn_inputText
),
2246 to_iso_2022_cn
, UPRV_LENGTHOF(to_iso_2022_cn
), "iso-2022-cn",
2247 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs
, NULL
, 0 ))
2248 log_err("u-> iso_2022_cn with subst with value did not match.\n");
2250 if(!testConvertFromUnicode(iso_2022_cn_inputText4
, UPRV_LENGTHOF(iso_2022_cn_inputText4
),
2251 to_iso_2022_cn4
, UPRV_LENGTHOF(to_iso_2022_cn4
), "iso-2022-cn",
2252 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs4
, NULL
, 0 ))
2253 log_err("u-> iso_2022_cn with subst with value did not match.\n");
2254 if(!testConvertFromUnicode(iso_2022_kr_inputText
, UPRV_LENGTHOF(iso_2022_kr_inputText
),
2255 to_iso_2022_kr
, UPRV_LENGTHOF(to_iso_2022_kr
), "iso-2022-kr",
2256 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_krOffs
, NULL
, 0 ))
2257 log_err("u-> iso_2022_kr with subst with value did not match.\n");
2258 if(!testConvertFromUnicode(iso_2022_kr_inputText2
, UPRV_LENGTHOF(iso_2022_kr_inputText2
),
2259 to_iso_2022_kr2
, UPRV_LENGTHOF(to_iso_2022_kr2
), "iso-2022-kr",
2260 UCNV_FROM_U_CALLBACK_ESCAPE
, from_iso_2022_krOffs2
, NULL
, 0 ))
2261 log_err("u-> iso_2022_kr2 with subst with value did not match.\n");
2262 if(!testConvertFromUnicode(hz_inputText
, UPRV_LENGTHOF(hz_inputText
),
2263 to_hz
, UPRV_LENGTHOF(to_hz
), "HZ",
2264 UCNV_FROM_U_CALLBACK_ESCAPE
, from_hzOffs
, NULL
, 0 ))
2265 log_err("u-> hz with subst with value did not match.\n");
2266 if(!testConvertFromUnicode(hz_inputText2
, UPRV_LENGTHOF(hz_inputText2
),
2267 to_hz2
, UPRV_LENGTHOF(to_hz2
), "HZ",
2268 UCNV_FROM_U_CALLBACK_ESCAPE
, from_hzOffs2
, NULL
, 0 ))
2269 log_err("u-> hz with subst with value did not match.\n");
2271 if(!testConvertFromUnicode(iscii_inputText
, UPRV_LENGTHOF(iscii_inputText
),
2272 to_iscii
, UPRV_LENGTHOF(to_iscii
), "ISCII,version=0",
2273 UCNV_FROM_U_CALLBACK_ESCAPE
, from_isciiOffs
, NULL
, 0 ))
2274 log_err("u-> iscii with subst with value did not match.\n");
2278 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
2281 #if !UCONFIG_NO_LEGACY_CONVERSION
2282 static const uint8_t sampleTxtToU
[]= { 0x00, 0x9f, 0xaf,
2283 0x81, 0xad, /*unassigned*/
2285 static const UChar IBM_943toUnicode
[] = { 0x0000, 0x6D63,
2286 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
2288 static const int32_t fromIBM943Offs
[] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
2291 static const uint8_t sampleTxt_EUC_JP
[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
2292 0x8f, 0xda, 0xa1, /*unassigned*/
2295 static const UChar EUC_JPtoUnicode
[]={ 0x0061, 0x4edd, 0x5bec,
2296 0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31,
2298 static const int32_t fromEUC_JPOffs
[] ={ 0, 1, 3,
2299 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
2304 static const uint8_t sampleTxt_euc_tw
[]={
2305 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
2306 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
2309 static const UChar euc_twtoUnicode
[]={ 0x0061, 0x2295, 0x5BF2,
2310 0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43,
2312 static const int32_t from_euc_twOffs
[] ={ 0, 1, 3,
2313 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2317 static const uint8_t sampleTxt_iso_2022_jp
[]={
2318 0x1b, 0x28, 0x42, 0x41,
2319 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/
2320 0x1b, 0x28, 0x42, 0x42,
2323 /* A % X 3 A % X 1 A B */
2324 static const UChar iso_2022_jptoUnicode
[]={ 0x41,0x25,0x58,0x33,0x41,0x25,0x58,0x31,0x41, 0x42 };
2325 static const int32_t from_iso_2022_jpOffs
[] ={ 3, 7, 7, 7, 7, 7, 7, 7, 7, 12 };
2328 static const uint8_t sampleTxt_iso_2022_cn
[]={
2330 0x1B, 0x24, 0x29, 0x47,
2331 0x0E, 0x40, 0x6c, /*unassigned*/
2335 static const UChar iso_2022_cntoUnicode
[]={ 0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 };
2336 static const int32_t from_iso_2022_cnOffs
[] ={ 1, 2, 8, 8, 8, 8, 8, 8, 8, 8, 11 };
2339 static const uint8_t sampleTxt_iso_2022_kr
[]={
2340 0x1b, 0x24, 0x29, 0x43,
2348 static const UChar iso_2022_krtoUnicode
[]={ 0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43};
2349 static const int32_t from_iso_2022_krOffs
[] ={ 4, 6, 6, 6, 6, 6, 6, 6, 6, 9, 12, 13 , 14 };
2352 static const uint8_t sampleTxt_hz
[]={
2354 0x7e, 0x7b, 0x26, 0x30,
2355 0x7f, 0x1E, /*unassigned*/
2358 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/
2361 static const UChar hztoUnicode
[]={
2364 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2367 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2370 static const int32_t from_hzOffs
[] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18, };
2374 static const uint8_t sampleTxt_iscii
[]={
2377 0xEB, /*unassigned*/
2380 0xEC, /*unassigned*/
2383 static const UChar isciitoUnicode
[]={
2386 0x25, 0x58, 0x45, 0x42,
2389 0x25, 0x58, 0x45, 0x43,
2392 static const int32_t from_isciiOffs
[] ={0,1,2,2,2,2,3,4,5,5,5,5,6 };
2396 static const uint8_t sampleTxtUTF8
[]={
2398 0xC2, 0x7E, /* truncated char */
2400 0xE0, 0xB5, 0x7E, /* truncated char */
2403 static const UChar UTF8ToUnicode
[]={
2404 0x0020, 0x0064, 0x0050,
2405 0x0025, 0x0058, 0x0043, 0x0032, 0x007E, /* \xC2~ */
2407 0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E,
2410 static const int32_t fromUTF8
[] = {
2414 6, 6, 6, 6, 6, 6, 6, 6, 8,
2417 static const UChar UTF8ToUnicodeXML_DEC
[]={
2418 0x0020, 0x0064, 0x0050,
2419 0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E, /* Â~ */
2421 0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E,
2424 static const int32_t fromUTF8XML_DEC
[] = {
2426 3, 3, 3, 3, 3, 3, 4,
2428 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8,
2433 #if !UCONFIG_NO_LEGACY_CONVERSION
2434 if(!testConvertToUnicode(sampleTxtToU
, UPRV_LENGTHOF(sampleTxtToU
),
2435 IBM_943toUnicode
, UPRV_LENGTHOF(IBM_943toUnicode
),"ibm-943",
2436 UCNV_TO_U_CALLBACK_ESCAPE
, fromIBM943Offs
, NULL
, 0 ))
2437 log_err("ibm-943->u with substitute with value did not match.\n");
2439 if(!testConvertToUnicode(sampleTxt_EUC_JP
, UPRV_LENGTHOF(sampleTxt_EUC_JP
),
2440 EUC_JPtoUnicode
, UPRV_LENGTHOF(EUC_JPtoUnicode
),"IBM-eucJP",
2441 UCNV_TO_U_CALLBACK_ESCAPE
, fromEUC_JPOffs
, NULL
, 0))
2442 log_err("euc-jp->u with substitute with value did not match.\n");
2444 if(!testConvertToUnicode(sampleTxt_euc_tw
, UPRV_LENGTHOF(sampleTxt_euc_tw
),
2445 euc_twtoUnicode
, UPRV_LENGTHOF(euc_twtoUnicode
),"euc-tw",
2446 UCNV_TO_U_CALLBACK_ESCAPE
, from_euc_twOffs
, NULL
, 0))
2447 log_err("euc-tw->u with substitute with value did not match.\n");
2449 if(!testConvertToUnicode(sampleTxt_iso_2022_jp
, UPRV_LENGTHOF(sampleTxt_iso_2022_jp
),
2450 iso_2022_jptoUnicode
, UPRV_LENGTHOF(iso_2022_jptoUnicode
),"iso-2022-jp",
2451 UCNV_TO_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffs
, NULL
, 0))
2452 log_err("iso-2022-jp->u with substitute with value did not match.\n");
2454 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp
, UPRV_LENGTHOF(sampleTxt_iso_2022_jp
),
2455 iso_2022_jptoUnicode
, UPRV_LENGTHOF(iso_2022_jptoUnicode
),"iso-2022-jp",
2456 UCNV_TO_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffs
, NULL
, 0,"K",U_ZERO_ERROR
))
2457 log_err("iso-2022-jp->u with substitute with value did not match.\n");
2459 {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */
2461 static const UChar iso_2022_jptoUnicodeDec
[]={
2464 0x0026, 0x0023, 0x0035, 0x0038, 0x003b,
2465 0x0026, 0x0023, 0x0032, 0x0036, 0x003b,
2467 static const int32_t from_iso_2022_jpOffsDec
[] ={ 3,7,7,7,7,7,7,7,7,7,7,12, };
2468 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp
, UPRV_LENGTHOF(sampleTxt_iso_2022_jp
),
2469 iso_2022_jptoUnicodeDec
, UPRV_LENGTHOF(iso_2022_jptoUnicodeDec
),"iso-2022-jp",
2470 UCNV_TO_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffsDec
, NULL
, 0,UCNV_ESCAPE_XML_DEC
,U_ZERO_ERROR
))
2471 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n");
2474 static const UChar iso_2022_jptoUnicodeHex
[]={
2477 0x0026, 0x0023, 0x0078, 0x0033, 0x0041, 0x003b,
2478 0x0026, 0x0023, 0x0078, 0x0031, 0x0041, 0x003b,
2480 static const int32_t from_iso_2022_jpOffsHex
[] ={ 3,7,7,7,7,7,7,7,7,7,7,7,7,12 };
2481 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp
, UPRV_LENGTHOF(sampleTxt_iso_2022_jp
),
2482 iso_2022_jptoUnicodeHex
, UPRV_LENGTHOF(iso_2022_jptoUnicodeHex
),"iso-2022-jp",
2483 UCNV_TO_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffsHex
, NULL
, 0,UCNV_ESCAPE_XML_HEX
,U_ZERO_ERROR
))
2484 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n");
2487 static const UChar iso_2022_jptoUnicodeC
[]={
2489 0x005C, 0x0078, 0x0033, 0x0041, /* \x3A */
2490 0x005C, 0x0078, 0x0031, 0x0041, /* \x1A */
2492 int32_t from_iso_2022_jpOffsC
[] ={ 3,7,7,7,7,7,7,7,7,12 };
2493 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp
, UPRV_LENGTHOF(sampleTxt_iso_2022_jp
),
2494 iso_2022_jptoUnicodeC
, UPRV_LENGTHOF(iso_2022_jptoUnicodeC
),"iso-2022-jp",
2495 UCNV_TO_U_CALLBACK_ESCAPE
, from_iso_2022_jpOffsC
, NULL
, 0,UCNV_ESCAPE_C
,U_ZERO_ERROR
))
2496 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n");
2499 if(!testConvertToUnicode(sampleTxt_iso_2022_cn
, UPRV_LENGTHOF(sampleTxt_iso_2022_cn
),
2500 iso_2022_cntoUnicode
, UPRV_LENGTHOF(iso_2022_cntoUnicode
),"iso-2022-cn",
2501 UCNV_TO_U_CALLBACK_ESCAPE
, from_iso_2022_cnOffs
, NULL
, 0))
2502 log_err("iso-2022-cn->u with substitute with value did not match.\n");
2504 if(!testConvertToUnicode(sampleTxt_iso_2022_kr
, UPRV_LENGTHOF(sampleTxt_iso_2022_kr
),
2505 iso_2022_krtoUnicode
, UPRV_LENGTHOF(iso_2022_krtoUnicode
),"iso-2022-kr",
2506 UCNV_TO_U_CALLBACK_ESCAPE
, from_iso_2022_krOffs
, NULL
, 0))
2507 log_err("iso-2022-kr->u with substitute with value did not match.\n");
2509 if(!testConvertToUnicode(sampleTxt_hz
, UPRV_LENGTHOF(sampleTxt_hz
),
2510 hztoUnicode
, UPRV_LENGTHOF(hztoUnicode
),"HZ",
2511 UCNV_TO_U_CALLBACK_ESCAPE
, from_hzOffs
, NULL
, 0))
2512 log_err("hz->u with substitute with value did not match.\n");
2514 if(!testConvertToUnicode(sampleTxt_iscii
, UPRV_LENGTHOF(sampleTxt_iscii
),
2515 isciitoUnicode
, UPRV_LENGTHOF(isciitoUnicode
),"ISCII,version=0",
2516 UCNV_TO_U_CALLBACK_ESCAPE
, from_isciiOffs
, NULL
, 0))
2517 log_err("ISCII ->u with substitute with value did not match.\n");
2520 if(!testConvertToUnicode(sampleTxtUTF8
, UPRV_LENGTHOF(sampleTxtUTF8
),
2521 UTF8ToUnicode
, UPRV_LENGTHOF(UTF8ToUnicode
),"UTF-8",
2522 UCNV_TO_U_CALLBACK_ESCAPE
, fromUTF8
, NULL
, 0))
2523 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2524 if(!testConvertToUnicodeWithContext(sampleTxtUTF8
, UPRV_LENGTHOF(sampleTxtUTF8
),
2525 UTF8ToUnicodeXML_DEC
, UPRV_LENGTHOF(UTF8ToUnicodeXML_DEC
),"UTF-8",
2526 UCNV_TO_U_CALLBACK_ESCAPE
, fromUTF8XML_DEC
, NULL
, 0, UCNV_ESCAPE_XML_DEC
, U_ZERO_ERROR
))
2527 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2531 #if !UCONFIG_NO_LEGACY_CONVERSION
2532 static void TestLegalAndOthers(int32_t inputsize
, int32_t outputsize
)
2534 static const UChar legalText
[] = { 0x0000, 0xAC00, 0xAC01, 0xD700 };
2535 static const uint8_t templegal949
[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
2536 static const int32_t to949legal
[] = {0, 1, 1, 2, 2, 3, 3};
2539 static const uint8_t text943
[] = {
2540 0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a };
2541 static const UChar toUnicode943sub
[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22, 0x5b57 };
2542 static const UChar toUnicode943skip
[]= { 0x304b, 0x20, 0x0061, 0x6f22, 0x5b57 };
2543 static const UChar toUnicode943stop
[]= { 0x304b};
2545 static const int32_t fromIBM943Offssub
[] = { 0, 2, 3, 4, 5, 7 };
2546 static const int32_t fromIBM943Offsskip
[] = { 0, 3, 4, 5, 7 };
2547 static const int32_t fromIBM943Offsstop
[] = { 0};
2549 gInBufferSize
= inputsize
;
2550 gOutBufferSize
= outputsize
;
2551 /*checking with a legal value*/
2552 if(!testConvertFromUnicode(legalText
, UPRV_LENGTHOF(legalText
),
2553 templegal949
, UPRV_LENGTHOF(templegal949
), "ibm-949",
2554 UCNV_FROM_U_CALLBACK_SKIP
, to949legal
, NULL
, 0 ))
2555 log_err("u-> ibm-949 with skip did not match.\n");
2557 /*checking illegal value for ibm-943 with substitute*/
2558 if(!testConvertToUnicode(text943
, UPRV_LENGTHOF(text943
),
2559 toUnicode943sub
, UPRV_LENGTHOF(toUnicode943sub
),"ibm-943",
2560 UCNV_TO_U_CALLBACK_SUBSTITUTE
, fromIBM943Offssub
, NULL
, 0 ))
2561 log_err("ibm-943->u with subst did not match.\n");
2562 /*checking illegal value for ibm-943 with skip */
2563 if(!testConvertToUnicode(text943
, UPRV_LENGTHOF(text943
),
2564 toUnicode943skip
, UPRV_LENGTHOF(toUnicode943skip
),"ibm-943",
2565 UCNV_TO_U_CALLBACK_SKIP
, fromIBM943Offsskip
, NULL
, 0 ))
2566 log_err("ibm-943->u with skip did not match.\n");
2568 /*checking illegal value for ibm-943 with stop */
2569 if(!testConvertToUnicode(text943
, UPRV_LENGTHOF(text943
),
2570 toUnicode943stop
, UPRV_LENGTHOF(toUnicode943stop
),"ibm-943",
2571 UCNV_TO_U_CALLBACK_STOP
, fromIBM943Offsstop
, NULL
, 0 ))
2572 log_err("ibm-943->u with stop did not match.\n");
2576 static void TestSingleByte(int32_t inputsize
, int32_t outputsize
)
2578 static const uint8_t sampleText
[] = {
2579 0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82,
2581 static const UChar toUnicode943sub
[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 };
2582 static const int32_t fromIBM943Offssub
[] = { 0, 2, 3, 4, 5, 6, 7, 8 };
2583 /*checking illegal value for ibm-943 with substitute*/
2584 gInBufferSize
= inputsize
;
2585 gOutBufferSize
= outputsize
;
2587 if(!testConvertToUnicode(sampleText
, UPRV_LENGTHOF(sampleText
),
2588 toUnicode943sub
, UPRV_LENGTHOF(toUnicode943sub
),"ibm-943",
2589 UCNV_TO_U_CALLBACK_SUBSTITUTE
, fromIBM943Offssub
, NULL
, 0 ))
2590 log_err("ibm-943->u with subst did not match.\n");
2593 static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize
, int32_t outputsize
)
2596 static const UChar ebcdic_inputTest
[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 };
2597 static const uint8_t toIBM930
[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 };
2598 static const int32_t offset_930
[]= { 0, 1, 1, 1, 2, 2, 3, 4, 4, 4, 5, 5 };
2599 /* s SO doubl SI sng s SO fe fe SI s */
2601 /*EBCDIC_STATEFUL with subChar=3f*/
2602 static const uint8_t toIBM930_subvaried
[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 };
2603 static const int32_t offset_930_subvaried
[]= { 0, 1, 1, 1, 2, 2, 3, 4, 5 };
2604 static const char mySubChar
[]={ 0x3f};
2606 gInBufferSize
= inputsize
;
2607 gOutBufferSize
= outputsize
;
2609 if(!testConvertFromUnicode(ebcdic_inputTest
, UPRV_LENGTHOF(ebcdic_inputTest
),
2610 toIBM930
, UPRV_LENGTHOF(toIBM930
), "ibm-930",
2611 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, offset_930
, NULL
, 0 ))
2612 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n");
2614 if(!testConvertFromUnicode(ebcdic_inputTest
, UPRV_LENGTHOF(ebcdic_inputTest
),
2615 toIBM930_subvaried
, UPRV_LENGTHOF(toIBM930_subvaried
), "ibm-930",
2616 UCNV_FROM_U_CALLBACK_SUBSTITUTE
, offset_930_subvaried
, mySubChar
, 1 ))
2617 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n");
2621 UBool
testConvertFromUnicode(const UChar
*source
, int sourceLen
, const uint8_t *expect
, int expectLen
,
2622 const char *codepage
, UConverterFromUCallback callback
, const int32_t *expectOffsets
,
2623 const char *mySubChar
, int8_t len
)
2627 UErrorCode status
= U_ZERO_ERROR
;
2628 UConverter
*conv
= 0;
2629 char junkout
[NEW_MAX_BUFFER
]; /* FIX */
2630 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
2636 int32_t realBufferSize
;
2637 char *realBufferEnd
;
2638 const UChar
*realSourceEnd
;
2639 const UChar
*sourceLimit
;
2640 UBool checkOffsets
= TRUE
;
2643 char offset_str
[9999];
2645 UConverterFromUCallback oldAction
= NULL
;
2646 const void* oldContext
= NULL
;
2649 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
2650 junkout
[i
] = (char)0xF0;
2651 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
2653 setNuConvTestName(codepage
, "FROM");
2655 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage
, gInBufferSize
,
2658 conv
= ucnv_open(codepage
, &status
);
2659 if(U_FAILURE(status
))
2661 log_data_err("Couldn't open converter %s\n",codepage
);
2665 log_verbose("Converter opened..\n");
2667 /*----setting the callback routine----*/
2668 ucnv_setFromUCallBack (conv
, callback
, NULL
, &oldAction
, &oldContext
, &status
);
2669 if (U_FAILURE(status
))
2671 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status
));
2673 /*------------------------*/
2674 /*setting the subChar*/
2675 if(mySubChar
!= NULL
){
2676 ucnv_setSubstChars(conv
, mySubChar
, len
, &status
);
2677 if (U_FAILURE(status
)) {
2678 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status
));
2687 realBufferSize
= UPRV_LENGTHOF(junkout
);
2688 realBufferEnd
= junkout
+ realBufferSize
;
2689 realSourceEnd
= source
+ sourceLen
;
2691 if ( gOutBufferSize
!= realBufferSize
)
2692 checkOffsets
= FALSE
;
2694 if( gInBufferSize
!= NEW_MAX_BUFFER
)
2695 checkOffsets
= FALSE
;
2699 end
= nct_min(targ
+ gOutBufferSize
, realBufferEnd
);
2700 sourceLimit
= nct_min(src
+ gInBufferSize
, realSourceEnd
);
2702 doFlush
= (UBool
)(sourceLimit
== realSourceEnd
);
2704 if(targ
== realBufferEnd
)
2706 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ
, gNuConvTestName
);
2709 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src
,sourceLimit
, targ
,end
, doFlush
?"TRUE":"FALSE");
2712 status
= U_ZERO_ERROR
;
2714 ucnv_fromUnicode (conv
,
2719 checkOffsets
? offs
: NULL
,
2720 doFlush
, /* flush if we're at the end of the input data */
2722 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && (sourceLimit
< realSourceEnd
)) );
2725 if(status
==U_INVALID_CHAR_FOUND
|| status
== U_ILLEGAL_CHAR_FOUND
){
2726 UChar errChars
[50]; /* should be sufficient */
2728 UErrorCode err
= U_ZERO_ERROR
;
2729 const UChar
* start
= NULL
;
2730 ucnv_getInvalidUChars(conv
,errChars
, &errLen
, &err
);
2732 log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err
));
2734 /* length of in invalid chars should be equal to returned length*/
2735 start
= src
- errLen
;
2736 if(u_strncmp(errChars
,start
,errLen
)!=0){
2737 log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv
,&err
));
2740 /* allow failure codes for the stop callback */
2741 if(U_FAILURE(status
) &&
2742 (callback
!= UCNV_FROM_U_CALLBACK_STOP
|| (status
!= U_INVALID_CHAR_FOUND
&& status
!= U_ILLEGAL_CHAR_FOUND
)))
2744 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status
), gNuConvTestName
);
2748 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
2749 sourceLen
, targ
-junkout
);
2750 if(getTestOption(VERBOSITY_OPTION
))
2755 for(p
= junkout
;p
<targ
;p
++)
2757 sprintf(junk
+ strlen(junk
), "0x%02x, ", (0xFF) & (unsigned int)*p
);
2758 sprintf(offset_str
+ strlen(offset_str
), "0x%02x, ", (0xFF) & (unsigned int)junokout
[p
-junkout
]);
2762 printSeq(expect
, expectLen
);
2765 log_verbose("\nOffsets:");
2766 log_verbose(offset_str
);
2773 if(expectLen
!= targ
-junkout
)
2775 log_err("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
2776 log_verbose("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
2777 printSeqErr((const uint8_t *)junkout
, (int32_t)(targ
-junkout
));
2778 printSeqErr(expect
, expectLen
);
2782 if (checkOffsets
&& (expectOffsets
!= 0) )
2784 log_verbose("comparing %d offsets..\n", targ
-junkout
);
2785 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t) )){
2786 log_err("did not get the expected offsets while %s \n", gNuConvTestName
);
2787 log_err("Got Output : ");
2788 printSeqErr((const uint8_t *)junkout
, (int32_t)(targ
-junkout
));
2789 log_err("Got Offsets: ");
2790 for(p
=junkout
;p
<targ
;p
++)
2791 log_err("%d,", junokout
[p
-junkout
]);
2793 log_err("Expected Offsets: ");
2794 for(i
=0; i
<(targ
-junkout
); i
++)
2795 log_err("%d,", expectOffsets
[i
]);
2801 if(!memcmp(junkout
, expect
, expectLen
))
2803 log_verbose("String matches! %s\n", gNuConvTestName
);
2808 log_err("String does not match. %s\n", gNuConvTestName
);
2809 log_err("source: ");
2810 printUSeqErr(source
, sourceLen
);
2812 printSeqErr((const uint8_t *)junkout
, expectLen
);
2813 log_err("Expected: ");
2814 printSeqErr(expect
, expectLen
);
2819 UBool
testConvertToUnicode( const uint8_t *source
, int sourcelen
, const UChar
*expect
, int expectlen
,
2820 const char *codepage
, UConverterToUCallback callback
, const int32_t *expectOffsets
,
2821 const char *mySubChar
, int8_t len
)
2823 UErrorCode status
= U_ZERO_ERROR
;
2824 UConverter
*conv
= 0;
2825 UChar junkout
[NEW_MAX_BUFFER
]; /* FIX */
2826 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
2828 const char *realSourceEnd
;
2829 const char *srcLimit
;
2834 UBool checkOffsets
= TRUE
;
2836 char offset_str
[9999];
2838 UConverterToUCallback oldAction
= NULL
;
2839 const void* oldContext
= NULL
;
2841 int32_t realBufferSize
;
2842 UChar
*realBufferEnd
;
2845 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
2846 junkout
[i
] = 0xFFFE;
2848 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
2851 setNuConvTestName(codepage
, "TO");
2853 log_verbose("\n========= %s\n", gNuConvTestName
);
2855 conv
= ucnv_open(codepage
, &status
);
2856 if(U_FAILURE(status
))
2858 log_data_err("Couldn't open converter %s\n",gNuConvTestName
);
2862 log_verbose("Converter opened..\n");
2864 src
= (const char *)source
;
2868 realBufferSize
= UPRV_LENGTHOF(junkout
);
2869 realBufferEnd
= junkout
+ realBufferSize
;
2870 realSourceEnd
= src
+ sourcelen
;
2871 /*----setting the callback routine----*/
2872 ucnv_setToUCallBack (conv
, callback
, NULL
, &oldAction
, &oldContext
, &status
);
2873 if (U_FAILURE(status
))
2875 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status
));
2877 /*-------------------------------------*/
2878 /*setting the subChar*/
2879 if(mySubChar
!= NULL
){
2880 ucnv_setSubstChars(conv
, mySubChar
, len
, &status
);
2881 if (U_FAILURE(status
)) {
2882 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status
));
2888 if ( gOutBufferSize
!= realBufferSize
)
2889 checkOffsets
= FALSE
;
2891 if( gInBufferSize
!= NEW_MAX_BUFFER
)
2892 checkOffsets
= FALSE
;
2896 end
= nct_min( targ
+ gOutBufferSize
, realBufferEnd
);
2897 srcLimit
= nct_min(realSourceEnd
, src
+ gInBufferSize
);
2899 if(targ
== realBufferEnd
)
2901 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ
,gNuConvTestName
);
2904 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ
,end
);
2908 status
= U_ZERO_ERROR
;
2910 ucnv_toUnicode (conv
,
2913 (const char **)&src
,
2914 (const char *)srcLimit
,
2915 checkOffsets
? offs
: NULL
,
2916 (UBool
)(srcLimit
== realSourceEnd
), /* flush if we're at the end of the source data */
2918 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && (srcLimit
< realSourceEnd
)) ); /* while we just need another buffer */
2920 if(status
==U_INVALID_CHAR_FOUND
|| status
== U_ILLEGAL_CHAR_FOUND
){
2921 char errChars
[50]; /* should be sufficient */
2923 UErrorCode err
= U_ZERO_ERROR
;
2924 const char* start
= NULL
;
2925 ucnv_getInvalidChars(conv
,errChars
, &errLen
, &err
);
2927 log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err
));
2929 /* length of in invalid chars should be equal to returned length*/
2930 start
= src
- errLen
;
2931 if(uprv_strncmp(errChars
,start
,errLen
)!=0){
2932 log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv
,&err
));
2935 /* allow failure codes for the stop callback */
2936 if(U_FAILURE(status
) &&
2937 (callback
!= UCNV_TO_U_CALLBACK_STOP
|| (status
!= U_INVALID_CHAR_FOUND
&& status
!= U_ILLEGAL_CHAR_FOUND
&& status
!= U_TRUNCATED_CHAR_FOUND
)))
2939 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status
), gNuConvTestName
);
2943 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
2944 sourcelen
, targ
-junkout
);
2945 if(getTestOption(VERBOSITY_OPTION
))
2951 for(p
= junkout
;p
<targ
;p
++)
2953 sprintf(junk
+ strlen(junk
), "0x%04x, ", (0xFFFF) & (unsigned int)*p
);
2954 sprintf(offset_str
+ strlen(offset_str
), "0x%04x, ", (0xFFFF) & (unsigned int)junokout
[p
-junkout
]);
2958 printUSeq(expect
, expectlen
);
2961 log_verbose("\nOffsets:");
2962 log_verbose(offset_str
);
2968 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen
,expectlen
*2);
2970 if (checkOffsets
&& (expectOffsets
!= 0))
2972 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t)))
2974 log_err("did not get the expected offsets while %s \n", gNuConvTestName
);
2975 log_err("Got offsets: ");
2976 for(p
=junkout
;p
<targ
;p
++)
2977 log_err(" %2d,", junokout
[p
-junkout
]);
2979 log_err("Expected offsets: ");
2980 for(i
=0; i
<(targ
-junkout
); i
++)
2981 log_err(" %2d,", expectOffsets
[i
]);
2983 log_err("Got output: ");
2984 for(i
=0; i
<(targ
-junkout
); i
++)
2985 log_err("0x%04x,", junkout
[i
]);
2987 log_err("From source: ");
2988 for(i
=0; i
<(src
-(const char *)source
); i
++)
2989 log_err(" 0x%02x,", (unsigned char)source
[i
]);
2994 if(!memcmp(junkout
, expect
, expectlen
*2))
2996 log_verbose("Matches!\n");
3001 log_err("String does not match. %s\n", gNuConvTestName
);
3002 log_verbose("String does not match. %s\n", gNuConvTestName
);
3004 printUSeqErr(junkout
, expectlen
);
3005 log_err("Expected: ");
3006 printUSeqErr(expect
, expectlen
);
3012 UBool
testConvertFromUnicodeWithContext(const UChar
*source
, int sourceLen
, const uint8_t *expect
, int expectLen
,
3013 const char *codepage
, UConverterFromUCallback callback
, const int32_t *expectOffsets
,
3014 const char *mySubChar
, int8_t len
, const void* context
, UErrorCode expectedError
)
3018 UErrorCode status
= U_ZERO_ERROR
;
3019 UConverter
*conv
= 0;
3020 char junkout
[NEW_MAX_BUFFER
]; /* FIX */
3021 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
3027 int32_t realBufferSize
;
3028 char *realBufferEnd
;
3029 const UChar
*realSourceEnd
;
3030 const UChar
*sourceLimit
;
3031 UBool checkOffsets
= TRUE
;
3034 char offset_str
[9999];
3036 UConverterFromUCallback oldAction
= NULL
;
3037 const void* oldContext
= NULL
;
3040 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
3041 junkout
[i
] = (char)0xF0;
3042 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
3044 setNuConvTestName(codepage
, "FROM");
3046 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage
, gInBufferSize
,
3049 conv
= ucnv_open(codepage
, &status
);
3050 if(U_FAILURE(status
))
3052 log_data_err("Couldn't open converter %s\n",codepage
);
3053 return TRUE
; /* Because the err has already been logged. */
3056 log_verbose("Converter opened..\n");
3058 /*----setting the callback routine----*/
3059 ucnv_setFromUCallBack (conv
, callback
, context
, &oldAction
, &oldContext
, &status
);
3060 if (U_FAILURE(status
))
3062 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status
));
3064 /*------------------------*/
3065 /*setting the subChar*/
3066 if(mySubChar
!= NULL
){
3067 ucnv_setSubstChars(conv
, mySubChar
, len
, &status
);
3068 if (U_FAILURE(status
)) {
3069 log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status
));
3078 realBufferSize
= UPRV_LENGTHOF(junkout
);
3079 realBufferEnd
= junkout
+ realBufferSize
;
3080 realSourceEnd
= source
+ sourceLen
;
3082 if ( gOutBufferSize
!= realBufferSize
)
3083 checkOffsets
= FALSE
;
3085 if( gInBufferSize
!= NEW_MAX_BUFFER
)
3086 checkOffsets
= FALSE
;
3090 end
= nct_min(targ
+ gOutBufferSize
, realBufferEnd
);
3091 sourceLimit
= nct_min(src
+ gInBufferSize
, realSourceEnd
);
3093 doFlush
= (UBool
)(sourceLimit
== realSourceEnd
);
3095 if(targ
== realBufferEnd
)
3097 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ
, gNuConvTestName
);
3100 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src
,sourceLimit
, targ
,end
, doFlush
?"TRUE":"FALSE");
3103 status
= U_ZERO_ERROR
;
3105 ucnv_fromUnicode (conv
,
3110 checkOffsets
? offs
: NULL
,
3111 doFlush
, /* flush if we're at the end of the input data */
3113 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && (sourceLimit
< realSourceEnd
)) );
3115 /* allow failure codes for the stop callback */
3116 if(U_FAILURE(status
) && status
!= expectedError
)
3118 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status
), gNuConvTestName
);
3122 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
3123 sourceLen
, targ
-junkout
);
3124 if(getTestOption(VERBOSITY_OPTION
))
3129 for(p
= junkout
;p
<targ
;p
++)
3131 sprintf(junk
+ strlen(junk
), "0x%02x, ", (0xFF) & (unsigned int)*p
);
3132 sprintf(offset_str
+ strlen(offset_str
), "0x%02x, ", (0xFF) & (unsigned int)junokout
[p
-junkout
]);
3136 printSeq(expect
, expectLen
);
3139 log_verbose("\nOffsets:");
3140 log_verbose(offset_str
);
3147 if(expectLen
!= targ
-junkout
)
3149 log_err("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
3150 log_verbose("Expected %d chars out, got %d %s\n", expectLen
, targ
-junkout
, gNuConvTestName
);
3151 printSeqErr((const uint8_t *)junkout
, (int32_t)(targ
-junkout
));
3152 printSeqErr(expect
, expectLen
);
3156 if (checkOffsets
&& (expectOffsets
!= 0) )
3158 log_verbose("comparing %d offsets..\n", targ
-junkout
);
3159 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t) )){
3160 log_err("did not get the expected offsets while %s \n", gNuConvTestName
);
3161 log_err("Got Output : ");
3162 printSeqErr((const uint8_t *)junkout
, (int32_t)(targ
-junkout
));
3163 log_err("Got Offsets: ");
3164 for(p
=junkout
;p
<targ
;p
++)
3165 log_err("%d,", junokout
[p
-junkout
]);
3167 log_err("Expected Offsets: ");
3168 for(i
=0; i
<(targ
-junkout
); i
++)
3169 log_err("%d,", expectOffsets
[i
]);
3175 if(!memcmp(junkout
, expect
, expectLen
))
3177 log_verbose("String matches! %s\n", gNuConvTestName
);
3182 log_err("String does not match. %s\n", gNuConvTestName
);
3183 log_err("source: ");
3184 printUSeqErr(source
, sourceLen
);
3186 printSeqErr((const uint8_t *)junkout
, expectLen
);
3187 log_err("Expected: ");
3188 printSeqErr(expect
, expectLen
);
3192 UBool
testConvertToUnicodeWithContext( const uint8_t *source
, int sourcelen
, const UChar
*expect
, int expectlen
,
3193 const char *codepage
, UConverterToUCallback callback
, const int32_t *expectOffsets
,
3194 const char *mySubChar
, int8_t len
, const void* context
, UErrorCode expectedError
)
3196 UErrorCode status
= U_ZERO_ERROR
;
3197 UConverter
*conv
= 0;
3198 UChar junkout
[NEW_MAX_BUFFER
]; /* FIX */
3199 int32_t junokout
[NEW_MAX_BUFFER
]; /* FIX */
3201 const char *realSourceEnd
;
3202 const char *srcLimit
;
3207 UBool checkOffsets
= TRUE
;
3209 char offset_str
[9999];
3211 UConverterToUCallback oldAction
= NULL
;
3212 const void* oldContext
= NULL
;
3214 int32_t realBufferSize
;
3215 UChar
*realBufferEnd
;
3218 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
3219 junkout
[i
] = 0xFFFE;
3221 for(i
=0;i
<NEW_MAX_BUFFER
;i
++)
3224 setNuConvTestName(codepage
, "TO");
3226 log_verbose("\n========= %s\n", gNuConvTestName
);
3228 conv
= ucnv_open(codepage
, &status
);
3229 if(U_FAILURE(status
))
3231 log_data_err("Couldn't open converter %s\n",gNuConvTestName
);
3235 log_verbose("Converter opened..\n");
3237 src
= (const char *)source
;
3241 realBufferSize
= UPRV_LENGTHOF(junkout
);
3242 realBufferEnd
= junkout
+ realBufferSize
;
3243 realSourceEnd
= src
+ sourcelen
;
3244 /*----setting the callback routine----*/
3245 ucnv_setToUCallBack (conv
, callback
, context
, &oldAction
, &oldContext
, &status
);
3246 if (U_FAILURE(status
))
3248 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status
));
3250 /*-------------------------------------*/
3251 /*setting the subChar*/
3252 if(mySubChar
!= NULL
){
3253 ucnv_setSubstChars(conv
, mySubChar
, len
, &status
);
3254 if (U_FAILURE(status
)) {
3255 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status
));
3261 if ( gOutBufferSize
!= realBufferSize
)
3262 checkOffsets
= FALSE
;
3264 if( gInBufferSize
!= NEW_MAX_BUFFER
)
3265 checkOffsets
= FALSE
;
3269 end
= nct_min( targ
+ gOutBufferSize
, realBufferEnd
);
3270 srcLimit
= nct_min(realSourceEnd
, src
+ gInBufferSize
);
3272 if(targ
== realBufferEnd
)
3274 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ
,gNuConvTestName
);
3277 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ
,end
);
3281 status
= U_ZERO_ERROR
;
3283 ucnv_toUnicode (conv
,
3286 (const char **)&src
,
3287 (const char *)srcLimit
,
3288 checkOffsets
? offs
: NULL
,
3289 (UBool
)(srcLimit
== realSourceEnd
), /* flush if we're at the end of the source data */
3291 } while ( (status
== U_BUFFER_OVERFLOW_ERROR
) || (U_SUCCESS(status
) && (srcLimit
< realSourceEnd
)) ); /* while we just need another buffer */
3293 /* allow failure codes for the stop callback */
3294 if(U_FAILURE(status
) && status
!=expectedError
)
3296 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status
), gNuConvTestName
);
3300 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
3301 sourcelen
, targ
-junkout
);
3302 if(getTestOption(VERBOSITY_OPTION
))
3308 for(p
= junkout
;p
<targ
;p
++)
3310 sprintf(junk
+ strlen(junk
), "0x%04x, ", (0xFFFF) & (unsigned int)*p
);
3311 sprintf(offset_str
+ strlen(offset_str
), "0x%04x, ", (0xFFFF) & (unsigned int)junokout
[p
-junkout
]);
3315 printUSeq(expect
, expectlen
);
3318 log_verbose("\nOffsets:");
3319 log_verbose(offset_str
);
3325 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen
,expectlen
*2);
3327 if (checkOffsets
&& (expectOffsets
!= 0))
3329 if(memcmp(junokout
,expectOffsets
,(targ
-junkout
) * sizeof(int32_t)))
3331 log_err("did not get the expected offsets while %s \n", gNuConvTestName
);
3332 log_err("Got offsets: ");
3333 for(p
=junkout
;p
<targ
;p
++)
3334 log_err(" %2d,", junokout
[p
-junkout
]);
3336 log_err("Expected offsets: ");
3337 for(i
=0; i
<(targ
-junkout
); i
++)
3338 log_err(" %2d,", expectOffsets
[i
]);
3340 log_err("Got output: ");
3341 for(i
=0; i
<(targ
-junkout
); i
++)
3342 log_err("0x%04x,", junkout
[i
]);
3344 log_err("From source: ");
3345 for(i
=0; i
<(src
-(const char *)source
); i
++)
3346 log_err(" 0x%02x,", (unsigned char)source
[i
]);
3351 if(!memcmp(junkout
, expect
, expectlen
*2))
3353 log_verbose("Matches!\n");
3358 log_err("String does not match. %s\n", gNuConvTestName
);
3359 log_verbose("String does not match. %s\n", gNuConvTestName
);
3361 printUSeqErr(junkout
, expectlen
);
3362 log_err("Expected: ");
3363 printUSeqErr(expect
, expectlen
);
3369 static void TestCallBackFailure(void) {
3370 UErrorCode status
= U_USELESS_COLLATOR_ERROR
;
3371 ucnv_cbFromUWriteBytes(NULL
, NULL
, -1, -1, &status
);
3372 if (status
!= U_USELESS_COLLATOR_ERROR
) {
3373 log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad UErrorCode\n");
3375 ucnv_cbFromUWriteUChars(NULL
, NULL
, NULL
, -1, &status
);
3376 if (status
!= U_USELESS_COLLATOR_ERROR
) {
3377 log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad UErrorCode\n");
3379 ucnv_cbFromUWriteSub(NULL
, -1, &status
);
3380 if (status
!= U_USELESS_COLLATOR_ERROR
) {
3381 log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UErrorCode\n");
3383 ucnv_cbToUWriteUChars(NULL
, NULL
, -1, -1, &status
);
3384 if (status
!= U_USELESS_COLLATOR_ERROR
) {
3385 log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad UErrorCode\n");