]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/nccbtst.c
ICU-511.35.tar.gz
[apple/icu.git] / icuSources / test / cintltst / nccbtst.c
CommitLineData
b75a7d8f
A
1/********************************************************************
2 * COPYRIGHT:
51004dcb 3 * Copyright (c) 1997-2013, International Business Machines Corporation and
b75a7d8f
A
4 * others. All Rights Reserved.
5 ********************************************************************/
6/*
73c04bcf 7********************************************************************************
b75a7d8f
A
8* File NCCBTST.C
9*
10* Modification History:
11* Name Description
12* Madhu Katragadda 7/21/1999 Testing error callback routines
73c04bcf 13********************************************************************************
b75a7d8f
A
14*/
15#include <stdio.h>
16#include <stdlib.h>
17#include <string.h>
18#include <ctype.h>
19#include "cstring.h"
20#include "unicode/uloc.h"
21#include "unicode/ucnv.h"
22#include "unicode/ucnv_err.h"
23#include "cintltst.h"
24#include "unicode/utypes.h"
25#include "unicode/ustring.h"
26#include "nccbtst.h"
73c04bcf 27#include "unicode/ucnv_cb.h"
4388f060
A
28#include "unicode/utf16.h"
29
b75a7d8f
A
30#define NEW_MAX_BUFFER 999
31
32#define nct_min(x,y) ((x<y) ? x : y)
33#define ARRAY_LENGTH(array) (sizeof(array)/sizeof((array)[0]))
34
35static int32_t gInBufferSize = 0;
36static int32_t gOutBufferSize = 0;
37static char gNuConvTestName[1024];
38
39static void printSeq(const uint8_t* a, int len)
40{
41 int i=0;
42 log_verbose("\n{");
43 while (i<len)
44 log_verbose("0x%02X, ", a[i++]);
45 log_verbose("}\n");
46}
47
48static void printUSeq(const UChar* a, int len)
49{
50 int i=0;
51 log_verbose("{");
52 while (i<len)
53 log_verbose(" 0x%04x, ", a[i++]);
54 log_verbose("}\n");
55}
56
57static void printSeqErr(const uint8_t* a, int len)
58{
59 int i=0;
60 fprintf(stderr, "{");
61 while (i<len)
62 fprintf(stderr, " 0x%02x, ", a[i++]);
63 fprintf(stderr, "}\n");
64}
65
66static void printUSeqErr(const UChar* a, int len)
67{
68 int i=0;
69 fprintf(stderr, "{");
70 while (i<len)
71 fprintf(stderr, "0x%04x, ", a[i++]);
72 fprintf(stderr,"}\n");
73}
74
75static void setNuConvTestName(const char *codepage, const char *direction)
76{
77 sprintf(gNuConvTestName, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
78 codepage,
79 direction,
374ca955
A
80 (int)gInBufferSize,
81 (int)gOutBufferSize);
b75a7d8f
A
82}
83
84
73c04bcf
A
85static void TestCallBackFailure(void);
86
b75a7d8f
A
87void addTestConvertErrorCallBack(TestNode** root);
88
89void addTestConvertErrorCallBack(TestNode** root)
90{
91 addTest(root, &TestSkipCallBack, "tsconv/nccbtst/TestSkipCallBack");
92 addTest(root, &TestStopCallBack, "tsconv/nccbtst/TestStopCallBack");
93 addTest(root, &TestSubCallBack, "tsconv/nccbtst/TestSubCallBack");
94 addTest(root, &TestSubWithValueCallBack, "tsconv/nccbtst/TestSubWithValueCallBack");
73c04bcf
A
95
96#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
97 addTest(root, &TestLegalAndOtherCallBack, "tsconv/nccbtst/TestLegalAndOtherCallBack");
98 addTest(root, &TestSingleByteCallBack, "tsconv/nccbtst/TestSingleByteCallBack");
73c04bcf
A
99#endif
100
101 addTest(root, &TestCallBackFailure, "tsconv/nccbtst/TestCallBackFailure");
b75a7d8f
A
102}
103
104static void TestSkipCallBack()
105{
106 TestSkip(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
107 TestSkip(1,NEW_MAX_BUFFER);
108 TestSkip(1,1);
109 TestSkip(NEW_MAX_BUFFER, 1);
110}
111
112static void TestStopCallBack()
113{
114 TestStop(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
115 TestStop(1,NEW_MAX_BUFFER);
116 TestStop(1,1);
117 TestStop(NEW_MAX_BUFFER, 1);
118}
119
120static void TestSubCallBack()
121{
122 TestSub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
123 TestSub(1,NEW_MAX_BUFFER);
124 TestSub(1,1);
125 TestSub(NEW_MAX_BUFFER, 1);
73c04bcf
A
126
127#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
128 TestEBCDIC_STATEFUL_Sub(1, 1);
129 TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER);
130 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, 1);
131 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
73c04bcf 132#endif
b75a7d8f
A
133}
134
135static void TestSubWithValueCallBack()
136{
137 TestSubWithValue(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
138 TestSubWithValue(1,NEW_MAX_BUFFER);
139 TestSubWithValue(1,1);
140 TestSubWithValue(NEW_MAX_BUFFER, 1);
141}
142
73c04bcf 143#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
144static void TestLegalAndOtherCallBack()
145{
146 TestLegalAndOthers(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
147 TestLegalAndOthers(1,NEW_MAX_BUFFER);
148 TestLegalAndOthers(1,1);
149 TestLegalAndOthers(NEW_MAX_BUFFER, 1);
150}
151
152static void TestSingleByteCallBack()
153{
154 TestSingleByte(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
155 TestSingleByte(1,NEW_MAX_BUFFER);
156 TestSingleByte(1,1);
157 TestSingleByte(NEW_MAX_BUFFER, 1);
158}
73c04bcf 159#endif
b75a7d8f
A
160
161static void TestSkip(int32_t inputsize, int32_t outputsize)
162{
163 static const uint8_t expskipIBM_949[]= {
164 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
165
166 static const uint8_t expskipIBM_943[] = {
167 0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 };
168
169 static const uint8_t expskipIBM_930[] = {
170 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f };
171
172 gInBufferSize = inputsize;
173 gOutBufferSize = outputsize;
174
175 /*From Unicode*/
176 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP \n");
177
73c04bcf 178#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
179 {
180 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
181 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
182
183 static const int32_t toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 };
184 static const int32_t toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 };
b75a7d8f
A
185
186 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
187 expskipIBM_949, sizeof(expskipIBM_949), "ibm-949",
188 UCNV_FROM_U_CALLBACK_SKIP, toIBM949Offsskip, NULL, 0 ))
189 log_err("u-> ibm-949 with skip did not match.\n");
190 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
191 expskipIBM_943, sizeof(expskipIBM_943), "ibm-943",
192 UCNV_FROM_U_CALLBACK_SKIP, toIBM943Offsskip, NULL, 0 ))
193 log_err("u-> ibm-943 with skip did not match.\n");
b75a7d8f
A
194 }
195
196 {
197 static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 };
198 static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f };
199 static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 };
200
201 /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */
202 if(!testConvertFromUnicode(fromU, sizeof(fromU)/U_SIZEOF_UCHAR,
203 fromUBytes, sizeof(fromUBytes),
204 "ibm-930",
205 UCNV_FROM_U_CALLBACK_SKIP, fromUOffsets,
206 NULL, 0)
207 ) {
208 log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n");
209 }
210 }
73c04bcf 211#endif
b75a7d8f
A
212
213 {
214 static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
215 static const uint8_t usasciiFromUBytes[] = { 0x61, 0x31, 0x39 };
216 static const int32_t usasciiFromUOffsets[] = { 0, 3, 6 };
217
218 static const UChar latin1FromU[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
219 static const uint8_t latin1FromUBytes[] = { 0x61, 0xa0, 0x31, 0x39 };
220 static const int32_t latin1FromUOffsets[] = { 0, 1, 3, 6 };
221
222 /* US-ASCII */
223 if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR,
224 usasciiFromUBytes, sizeof(usasciiFromUBytes),
225 "US-ASCII",
226 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
227 NULL, 0)
228 ) {
229 log_err("u->US-ASCII with skip did not match.\n");
230 }
231
73c04bcf 232#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
233 /* SBCS NLTC codepage 367 for US-ASCII */
234 if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR,
235 usasciiFromUBytes, sizeof(usasciiFromUBytes),
236 "ibm-367",
237 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
238 NULL, 0)
239 ) {
240 log_err("u->ibm-367 with skip did not match.\n");
241 }
73c04bcf 242#endif
b75a7d8f
A
243
244 /* ISO-Latin-1 */
245 if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR,
246 latin1FromUBytes, sizeof(latin1FromUBytes),
247 "LATIN_1",
248 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
249 NULL, 0)
250 ) {
251 log_err("u->LATIN_1 with skip did not match.\n");
252 }
253
73c04bcf 254#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
255 /* windows-1252 */
256 if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR,
257 latin1FromUBytes, sizeof(latin1FromUBytes),
258 "windows-1252",
259 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
260 NULL, 0)
261 ) {
262 log_err("u->windows-1252 with skip did not match.\n");
263 }
264 }
265
266 {
267 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
268 static const uint8_t toIBM943[]= { 0x61, 0x61 };
269 static const int32_t offset[]= {0, 4};
270
271 /* EUC_JP*/
272 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
273 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
274 0x61, 0x8e, 0xe0,
275 };
276 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7};
277
278 /*EUC_TW*/
279 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
280 static const uint8_t to_euc_tw[]={
281 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
282 0x61, 0xe6, 0xca, 0x8a,
283 };
284 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,};
285
286 /*ISO-2022-JP*/
287 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9/*unassigned*/,0x0042, };
288 static const uint8_t to_iso_2022_jp[]={
289 0x41,
290 0x42,
291
292 };
293 static const int32_t from_iso_2022_jpOffs [] ={0,2};
294
b75a7d8f
A
295 /*ISO-2022-JP*/
296 UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
297 static const uint8_t to_iso_2022_jp2[]={
298 0x41,
299 0x43,
300
301 };
302 static const int32_t from_iso_2022_jpOffs2 [] ={0,2};
303
304 /*ISO-2022-cn*/
305 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
306 static const uint8_t to_iso_2022_cn[]={
374ca955 307 0x41, 0x42
b75a7d8f
A
308 };
309 static const int32_t from_iso_2022_cnOffs [] ={
374ca955 310 0, 2
b75a7d8f
A
311 };
312
313 /*ISO-2022-CN*/
314 static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
315 static const uint8_t to_iso_2022_cn1[]={
374ca955 316 0x41, 0x43
b75a7d8f
A
317
318 };
374ca955 319 static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 };
b75a7d8f
A
320
321 /*ISO-2022-kr*/
322 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
323 static const uint8_t to_iso_2022_kr[]={
324 0x1b, 0x24, 0x29, 0x43,
325 0x41,
326 0x0e, 0x25, 0x50,
327 0x25, 0x50,
328 0x0f, 0x42,
329 };
330 static const int32_t from_iso_2022_krOffs [] ={
331 -1,-1,-1,-1,
332 0,
333 1,1,1,
334 3,3,
335 4,4
336 };
337
338 /*ISO-2022-kr*/
339 static const UChar iso_2022_kr_inputText1[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
340 static const uint8_t to_iso_2022_kr1[]={
341 0x1b, 0x24, 0x29, 0x43,
342 0x41,
343 0x0e, 0x25, 0x50,
344 0x25, 0x50,
345
346 };
347 static const int32_t from_iso_2022_krOffs1 [] ={
348 -1,-1,-1,-1,
349 0,
350 1,1,1,
351 3,3,
352
353 };
354 /* HZ encoding */
355 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
356
357 static const uint8_t to_hz[]={
358 0x7e, 0x7d, 0x41,
359 0x7e, 0x7b, 0x26, 0x30,
360 0x26, 0x30,
361 0x7e, 0x7d, 0x42,
362
363 };
364 static const int32_t from_hzOffs [] ={
365 0,0,0,
366 1,1,1,1,
367 3,3,
368 4,4,4,4
369 };
370
371 static const UChar hz_inputText1[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
372
373 static const uint8_t to_hz1[]={
374 0x7e, 0x7d, 0x41,
375 0x7e, 0x7b, 0x26, 0x30,
376 0x26, 0x30,
377
378
379 };
380 static const int32_t from_hzOffs1 [] ={
381 0,0,0,
382 1,1,1,1,
383 3,3,
384
385 };
386
73c04bcf 387#endif
b75a7d8f
A
388
389 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
390
391 static const uint8_t to_SCSU[]={
392 0x41,
393 0x42
394
395
396 };
397 static const int32_t from_SCSUOffs [] ={
398 0,
399 2,
400
401 };
73c04bcf
A
402
403#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
404 /* ISCII */
405 static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
406 static const uint8_t to_iscii[]={
407 0x41,
408 0x42,
409 };
410 static const int32_t from_isciiOffs [] ={
411 0,2,
412
413 };
414 /*ISCII*/
415 static const UChar iscii_inputText1[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
416 static const uint8_t to_iscii1[]={
417 0x44,
418 0x43,
419
420 };
421 static const int32_t from_isciiOffs1 [] ={0,2};
422
423 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
424 toIBM943, sizeof(toIBM943), "ibm-943",
425 UCNV_FROM_U_CALLBACK_SKIP, offset, NULL, 0 ))
426 log_err("u-> ibm-943 with skip did not match.\n");
427
428 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
51004dcb 429 to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP",
b75a7d8f
A
430 UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 ))
431 log_err("u-> euc-jp with skip did not match.\n");
432
433 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
434 to_euc_tw, sizeof(to_euc_tw), "euc-tw",
435 UCNV_FROM_U_CALLBACK_SKIP, from_euc_twOffs, NULL, 0 ))
436 log_err("u-> euc-tw with skip did not match.\n");
437
438 /*iso_2022_jp*/
439 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]),
440 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp",
441 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 ))
442 log_err("u-> iso-2022-jp with skip did not match.\n");
443
b75a7d8f
A
444 /* with context */
445 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]),
446 to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp",
447 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs2, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
448 log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
449
450 /*iso_2022_cn*/
451 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]),
452 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn",
453 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs, NULL, 0 ))
454 log_err("u-> iso-2022-cn with skip did not match.\n");
455 /*with context*/
456 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1, sizeof(iso_2022_cn_inputText1)/sizeof(iso_2022_cn_inputText1[0]),
457 to_iso_2022_cn1, sizeof(to_iso_2022_cn1), "iso-2022-cn",
458 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
459 log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
460
461 /*iso_2022_kr*/
462 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]),
463 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr",
464 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs, NULL, 0 ))
465 log_err("u-> iso-2022-kr with skip did not match.\n");
466 /*with context*/
467 if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1, sizeof(iso_2022_kr_inputText1)/sizeof(iso_2022_kr_inputText1[0]),
468 to_iso_2022_kr1, sizeof(to_iso_2022_kr1), "iso-2022-kr",
469 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
470 log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
471
472 /*hz*/
473 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]),
474 to_hz, sizeof(to_hz), "HZ",
475 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs, NULL, 0 ))
476 log_err("u-> HZ with skip did not match.\n");
477 /*with context*/
478 if(!testConvertFromUnicodeWithContext(hz_inputText1, sizeof(hz_inputText1)/sizeof(hz_inputText1[0]),
479 to_hz1, sizeof(to_hz1), "hz",
480 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
73c04bcf
A
481 log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
482#endif
b75a7d8f
A
483
484 /*SCSU*/
485 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
486 to_SCSU, sizeof(to_SCSU), "SCSU",
487 UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 ))
488 log_err("u-> SCSU with skip did not match.\n");
489
73c04bcf 490#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
491 /*ISCII*/
492 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]),
493 to_iscii, sizeof(to_iscii), "ISCII,version=0",
494 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs, NULL, 0 ))
495 log_err("u-> iscii with skip did not match.\n");
496 /*with context*/
497 if(!testConvertFromUnicodeWithContext(iscii_inputText1, sizeof(iscii_inputText1)/sizeof(iscii_inputText1[0]),
498 to_iscii1, sizeof(to_iscii1), "ISCII,version=0",
499 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
500 log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
73c04bcf 501#endif
b75a7d8f
A
502 }
503
504 log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
505 {
506 static const uint8_t sampleText[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */
507 0xFB, 0xEE, 0x28, /* from source offset 0 */
508 0x24, 0x1E, 0x52,
509 0xB2,
510 0x20,
511 0xB3,
512 0xB1,
513 0x0D,
514 0x0A,
515
516 0x20, /* from 8 */
517 0x00,
518 0xD0, 0x6C,
519 0xB6,
520 0xD8, 0xA5,
521 0x20,
522 0x68,
523 0x59,
524
525 0xF9, 0x28, /* from 16 */
526 0x6D,
527 0x20,
528 0x73,
529 0xE0, 0x2D,
530 0xDE, 0x43,
531 0xD0, 0x33,
532 0x20,
533
534 0xFA, 0x83, /* from 24 */
535 0x25, 0x01,
536 0xFB, 0x16, 0x87,
537 0x4B, 0x16,
538 0x20,
539 0xE6, 0xBD,
540 0xEB, 0x5B,
541 0x4B, 0xCC,
542
543 0xF9, 0xA2, /* from 32 */
544 0xFC, 0x10, 0x3E,
545 0xFE, 0x16, 0x3A, 0x8C,
546 0x20,
547 0xFC, 0x03, 0xAC,
548
549 0x01, /* from 41 */
550 0xDE, 0x83,
551 0x20,
552 0x09
553 };
554 static const UChar expected[]={
555 0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */
556 0x0063, 0x0061, 0x000D, 0x000A,
557
558 0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */
559 0x0930, 0x0020, 0x0918, 0x0909,
560
561 0x3086, 0x304D, 0x0020, 0x3053, /* 16 */
562 0x4000, 0x4E00, 0x7777, 0x0020,
563
564 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */
565 0x0020, 0xD7A3, 0xDC00, 0xD800,
566
567 0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */
568 0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
569
570 0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */
571 0x0009
572 };
573 static const int32_t offsets[]={
574 0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7,
575 8, 9, 10, 10, 11, 12, 12, 13, 14, 15,
576 16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23,
577 24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31,
578 32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39,
579 41, 42, 42, 43, 44
580 };
581
582 /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */
583 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected),
584 sampleText, sizeof(sampleText),
585 "BOCU-1",
586 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
587 ) {
588 log_err("u->BOCU-1 with skip did not match.\n");
589 }
590 }
591
592 log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
593 {
594 const uint8_t sampleText[]={
595 0x61, /* 'a' */
596 0xc4, 0xb5, /* U+0135 */
597 0xed, 0x80, 0xa0, /* Hangul U+d020 */
598 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */
599 0xee, 0x80, 0x80, /* PUA U+e000 */
600 0xed, 0xb0, 0x81, /* unpaired trail surrogate U+dc01 */
601 0x62, /* 'b' */
602 0xed, 0xa0, 0x81, /* unpaired lead surrogate U+d801 */
603 0xd0, 0x80 /* U+0400 */
604 };
605 UChar expected[]={
606 0x0061,
607 0x0135,
608 0xd020,
609 0xd801, 0xdc01,
610 0xe000,
611 0xdc01,
612 0x0062,
613 0xd801,
614 0x0400
615 };
616 int32_t offsets[]={
617 0,
618 1, 1,
619 2, 2, 2,
620 3, 3, 3, 4, 4, 4,
621 5, 5, 5,
622 6, 6, 6,
623 7,
624 8, 8, 8,
625 9, 9
626 };
627
628 /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */
629
630 /* without offsets */
631 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected),
632 sampleText, sizeof(sampleText),
633 "CESU-8",
634 UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, 0)
635 ) {
636 log_err("u->CESU-8 with skip did not match.\n");
637 }
638
639 /* with offsets */
640 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected),
641 sampleText, sizeof(sampleText),
642 "CESU-8",
643 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
644 ) {
645 log_err("u->CESU-8 with skip did not match.\n");
646 }
647 }
648
649 /*to Unicode*/
650 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP \n");
651
73c04bcf 652#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
653 {
654
655 static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD700 };
656 static const UChar IBM_943skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
657 static const UChar IBM_930skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
658
659 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5};
660 static const int32_t fromIBM943Offs [] = { 0, 2, 4};
661 static const int32_t fromIBM930Offs [] = { 1, 3, 5};
662
663 if(!testConvertToUnicode(expskipIBM_949, sizeof(expskipIBM_949),
664 IBM_949skiptoUnicode, sizeof(IBM_949skiptoUnicode)/sizeof(IBM_949skiptoUnicode),"ibm-949",
665 UCNV_TO_U_CALLBACK_SKIP, fromIBM949Offs, NULL, 0 ))
666 log_err("ibm-949->u with skip did not match.\n");
667 if(!testConvertToUnicode(expskipIBM_943, sizeof(expskipIBM_943),
668 IBM_943skiptoUnicode, sizeof(IBM_943skiptoUnicode)/sizeof(IBM_943skiptoUnicode[0]),"ibm-943",
669 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offs, NULL, 0 ))
670 log_err("ibm-943->u with skip did not match.\n");
671
672
673 if(!testConvertToUnicode(expskipIBM_930, sizeof(expskipIBM_930),
674 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930",
675 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0 ))
676 log_err("ibm-930->u with skip did not match.\n");
677
678
679 if(!testConvertToUnicodeWithContext(expskipIBM_930, sizeof(expskipIBM_930),
680 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930",
681 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
682 log_err("ibm-930->u with skip did not match.\n");
683 }
73c04bcf 684#endif
b75a7d8f
A
685
686 {
687 static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 };
688 static const UChar usasciiToU[] = { 0x61, 0x31 };
689 static const int32_t usasciiToUOffsets[] = { 0, 2 };
690
691 static const uint8_t latin1ToUBytes[] = { 0x61, 0xa0, 0x31 };
692 static const UChar latin1ToU[] = { 0x61, 0xa0, 0x31 };
693 static const int32_t latin1ToUOffsets[] = { 0, 1, 2 };
694
695 /* US-ASCII */
696 if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes),
697 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR,
698 "US-ASCII",
699 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
700 NULL, 0)
701 ) {
702 log_err("US-ASCII->u with skip did not match.\n");
703 }
704
73c04bcf 705#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
706 /* SBCS NLTC codepage 367 for US-ASCII */
707 if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes),
708 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR,
709 "ibm-367",
710 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
711 NULL, 0)
712 ) {
713 log_err("ibm-367->u with skip did not match.\n");
714 }
73c04bcf 715#endif
b75a7d8f
A
716
717 /* ISO-Latin-1 */
718 if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes),
719 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR,
720 "LATIN_1",
721 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
722 NULL, 0)
723 ) {
724 log_err("LATIN_1->u with skip did not match.\n");
725 }
726
73c04bcf 727#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
728 /* windows-1252 */
729 if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes),
730 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR,
731 "windows-1252",
732 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
733 NULL, 0)
734 ) {
735 log_err("windows-1252->u with skip did not match.\n");
736 }
73c04bcf 737#endif
b75a7d8f
A
738 }
739
73c04bcf 740#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
741 {
742 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
743 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
744 };
745 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0x03b4
746 };
747 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 5};
748
749
750 /* euc-jp*/
751 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
752 0x8f, 0xda, 0xa1, /*unassigned*/
753 0x8e, 0xe0,
754 };
755 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2};
756 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3, 9};
757
758 /*EUC_TW*/
759 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
760 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
761 0xe6, 0xca, 0x8a,
762 };
763 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, };
764 static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 11, 13};
765 /*iso-2022-jp*/
766 static const uint8_t sampleTxt_iso_2022_jp[]={
767 0x41,
51004dcb
A
768 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/
769 0x1b, 0x28, 0x42, 0x42,
b75a7d8f
A
770
771 };
772 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x42 };
773 static const int32_t from_iso_2022_jpOffs [] ={ 0,9 };
774
775 /*iso-2022-cn*/
776 static const uint8_t sampleTxt_iso_2022_cn[]={
777 0x0f, 0x41, 0x44,
778 0x1B, 0x24, 0x29, 0x47,
779 0x0E, 0x40, 0x6f, /*unassigned*/
780 0x0f, 0x42,
781
782 };
783
784 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x42 };
785 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 11 };
786
787 /*iso-2022-kr*/
788 static const uint8_t sampleTxt_iso_2022_kr[]={
789 0x1b, 0x24, 0x29, 0x43,
790 0x41,
791 0x0E, 0x7f, 0x1E,
792 0x0e, 0x25, 0x50,
793 0x0f, 0x51,
794 0x42, 0x43,
795
796 };
797 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x03A0,0x51, 0x42,0x43};
798 static const int32_t from_iso_2022_krOffs [] ={ 4, 9, 12, 13 , 14 };
799
800 /*hz*/
801 static const uint8_t sampleTxt_hz[]={
802 0x41,
803 0x7e, 0x7b, 0x26, 0x30,
804 0x7f, 0x1E, /*unassigned*/
805 0x26, 0x30,
806 0x7e, 0x7d, 0x42,
807 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/
808 0x7e, 0x7d, 0x42,
809 };
810 static const UChar hztoUnicode[]={
811 0x41,
812 0x03a0,
813 0x03A0,
814 0x42,
815 0x42,};
816
817 static const int32_t from_hzOffs [] ={0,3,7,11,18, };
818
819 /*ISCII*/
820 static const uint8_t sampleTxt_iscii[]={
821 0x41,
822 0xa1,
823 0xEB, /*unassigned*/
824 0x26,
825 0x30,
826 0xa2,
827 0xEC, /*unassigned*/
828 0x42,
829 };
830 static const UChar isciitoUnicode[]={
831 0x41,
832 0x0901,
833 0x26,
834 0x30,
835 0x0902,
836 0x42,
837 };
838
839 static const int32_t from_isciiOffs [] ={0,1,3,4,5,7 };
840
841 /*LMBCS*/
842 static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50,
843 0x12, 0x92, 0xa0, /*unassigned*/
844 0x12, 0x92, 0xA1,
845 };
846 static const UChar LMBCSToUnicode[]={ 0x4e2e, 0xe5c4};
847 static const int32_t fromLMBCS[] = {0, 6};
848
849 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
850 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
851 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
852 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
853
854 if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
855 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
856 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
857 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
858
859 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
51004dcb 860 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP",
b75a7d8f
A
861 UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0))
862 log_err("euc-jp->u with skip did not match.\n");
863
864
865
866 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
867 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
868 UCNV_TO_U_CALLBACK_SKIP, from_euc_twOffs , NULL, 0))
869 log_err("euc-tw->u with skip did not match.\n");
870
871
872 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
873 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
874 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_jpOffs , NULL, 0))
875 log_err("iso-2022-jp->u with skip did not match.\n");
876
877 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn),
878 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn",
879 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_cnOffs , NULL, 0))
880 log_err("iso-2022-cn->u with skip did not match.\n");
881
882 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr),
883 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr",
884 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_krOffs , NULL, 0))
885 log_err("iso-2022-kr->u with skip did not match.\n");
886
887 if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz),
888 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ",
889 UCNV_TO_U_CALLBACK_SKIP, from_hzOffs , NULL, 0))
890 log_err("HZ->u with skip did not match.\n");
891
892 if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii),
893 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0",
894 UCNV_TO_U_CALLBACK_SKIP, from_isciiOffs , NULL, 0))
895 log_err("iscii->u with skip did not match.\n");
896
897 if(!testConvertToUnicode(sampleTxtLMBCS, sizeof(sampleTxtLMBCS),
898 LMBCSToUnicode, sizeof(LMBCSToUnicode)/sizeof(LMBCSToUnicode[0]),"LMBCS-1",
899 UCNV_TO_U_CALLBACK_SKIP, fromLMBCS , NULL, 0))
900 log_err("LMBCS->u with skip did not match.\n");
901
902 }
73c04bcf
A
903#endif
904
b75a7d8f
A
905 log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n");
906 {
907 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
908 0xe0, 0x80, 0x61,};
909 UChar expected1[] = { 0x0031, 0x4e8c, 0x0061};
910 int32_t offsets1[] = { 0x0000, 0x0001, 0x0006};
911
912 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
913 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8",
914 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
915 log_err("utf8->u with skip did not match.\n");;
916 }
917
918 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n");
919 {
920 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
921 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffe,0xfffe};
922 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5};
923
924 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
925 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU",
926 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
927 log_err("scsu->u with skip did not match.\n");
928 }
929
930 log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
931 {
932 const uint8_t sampleText[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */
933 0xFB, 0xEE, 0x28, /* single-code point sequence at offset 0 */
934 0x24, 0x1E, 0x52, /* 3 */
935 0xB2, /* 6 */
936 0x20, /* 7 */
937 0x40, 0x07, /* 8 - wrong trail byte */
938 0xB3, /* 10 */
939 0xB1, /* 11 */
940 0xD0, 0x20, /* 12 - wrong trail byte */
941 0x0D, /* 14 */
942 0x0A, /* 15 */
943 0x20, /* 16 */
944 0x00, /* 17 */
945 0xD0, 0x6C, /* 18 */
946 0xB6, /* 20 */
947 0xD8, 0xA5, /* 21 */
948 0x20, /* 23 */
949 0x68, /* 24 */
950 0x59, /* 25 */
951 0xF9, 0x28, /* 26 */
952 0x6D, /* 28 */
953 0x20, /* 29 */
954 0x73, /* 30 */
955 0xE0, 0x2D, /* 31 */
956 0xDE, 0x43, /* 33 */
957 0xD0, 0x33, /* 35 */
958 0x20, /* 37 */
959 0xFA, 0x83, /* 38 */
960 0x25, 0x01, /* 40 */
961 0xFB, 0x16, 0x87, /* 42 */
962 0x4B, 0x16, /* 45 */
963 0x20, /* 47 */
964 0xE6, 0xBD, /* 48 */
965 0xEB, 0x5B, /* 50 */
966 0x4B, 0xCC, /* 52 */
967 0xF9, 0xA2, /* 54 */
968 0xFC, 0x10, 0x3E, /* 56 */
969 0xFE, 0x16, 0x3A, 0x8C, /* 59 */
970 0x20, /* 63 */
971 0xFC, 0x03, 0xAC, /* 64 */
972 0xFF, /* 67 - FF just resets the state without encoding anything */
973 0x01, /* 68 */
974 0xDE, 0x83, /* 69 */
975 0x20, /* 71 */
976 0x09 /* 72 */
977 };
978 UChar expected[]={
979 0xFEFF, 0x0061, 0x0062, 0x0020,
980 0x0063, 0x0061, 0x000D, 0x000A,
981 0x0020, 0x0000, 0x00DF, 0x00E6,
982 0x0930, 0x0020, 0x0918, 0x0909,
983 0x3086, 0x304D, 0x0020, 0x3053,
984 0x4000, 0x4E00, 0x7777, 0x0020,
985 0x9FA5, 0x4E00, 0xAC00, 0xBCDE,
986 0x0020, 0xD7A3, 0xDC00, 0xD800,
987 0xD800, 0xDC00, 0xD845, 0xDDDD,
988 0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
989 0xDFFF, 0x0001, 0x0E40, 0x0020,
990 0x0009
991 };
992 int32_t offsets[]={
993 0, 3, 6, 7, /* skip 8, */
994 10, 11, /* skip 12, */
995 14, 15, 16, 17, 18,
996 20, 21, 23, 24, 25, 26, 28, 29,
997 30, 31, 33, 35, 37, 38,
998 40, 42, 45, 47, 48,
999 50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59,
1000 63, 64, /* trail */ 64, /* reset only 67, */
1001 68, 69,
1002 71, 72
1003 };
1004
1005 if(!testConvertToUnicode(sampleText, sizeof(sampleText),
1006 expected, ARRAY_LENGTH(expected), "BOCU-1",
1007 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1008 ) {
1009 log_err("BOCU-1->u with skip did not match.\n");
1010 }
1011 }
1012
1013 log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
1014 {
1015 const uint8_t sampleText[]={
1016 0x61, /* 0 'a' */
1017 0xc0, 0x80, /* 1 non-shortest form */
1018 0xc4, 0xb5, /* 3 U+0135 */
1019 0xed, 0x80, 0xa0, /* 5 Hangul U+d020 */
1020 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8 surrogate pair for U+10401 */
1021 0xee, 0x80, 0x80, /* 14 PUA U+e000 */
1022 0xed, 0xb0, 0x81, /* 17 unpaired trail surrogate U+dc01 */
1023 0xf0, 0x90, 0x80, 0x80, /* 20 illegal 4-byte form for U+10000 */
1024 0x62, /* 24 'b' */
1025 0xed, 0xa0, 0x81, /* 25 unpaired lead surrogate U+d801 */
1026 0xed, 0xa0, /* 28 incomplete sequence */
1027 0xd0, 0x80 /* 30 U+0400 */
1028 };
1029 UChar expected[]={
1030 0x0061,
1031 /* skip */
1032 0x0135,
1033 0xd020,
1034 0xd801, 0xdc01,
1035 0xe000,
1036 0xdc01,
1037 /* skip */
1038 0x0062,
1039 0xd801,
1040 0x0400
1041 };
1042 int32_t offsets[]={
1043 0,
1044 /* skip 1, */
1045 3,
1046 5,
1047 8, 11,
1048 14,
1049 17,
1050 /* skip 20, 20, */
1051 24,
1052 25,
1053 /* skip 28 */
1054 30
1055 };
1056
1057 /* without offsets */
1058 if(!testConvertToUnicode(sampleText, sizeof(sampleText),
1059 expected, ARRAY_LENGTH(expected), "CESU-8",
1060 UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, 0)
1061 ) {
1062 log_err("CESU-8->u with skip did not match.\n");
1063 }
1064
1065 /* with offsets */
1066 if(!testConvertToUnicode(sampleText, sizeof(sampleText),
1067 expected, ARRAY_LENGTH(expected), "CESU-8",
1068 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1069 ) {
1070 log_err("CESU-8->u with skip did not match.\n");
1071 }
1072 }
1073}
1074
1075static void TestStop(int32_t inputsize, int32_t outputsize)
1076{
1077 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1078 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1079
1080 static const uint8_t expstopIBM_949[]= {
1081 0x00, 0xb0, 0xa1, 0xb0, 0xa2};
1082
1083 static const uint8_t expstopIBM_943[] = {
1084 0x9f, 0xaf, 0x9f, 0xb1};
1085
1086 static const uint8_t expstopIBM_930[] = {
1087 0x0e, 0x5d, 0x5f, 0x5d, 0x63};
1088
1089 static const UChar IBM_949stoptoUnicode[]= {0x0000, 0xAC00, 0xAC01};
1090 static const UChar IBM_943stoptoUnicode[]= { 0x6D63, 0x6D64};
1091 static const UChar IBM_930stoptoUnicode[]= { 0x6D63, 0x6D64};
1092
1093
1094 static const int32_t toIBM949Offsstop [] = { 0, 1, 1, 2, 2};
1095 static const int32_t toIBM943Offsstop [] = { 0, 0, 1, 1};
1096 static const int32_t toIBM930Offsstop [] = { 0, 0, 0, 1, 1};
1097
1098 static const int32_t fromIBM949Offs [] = { 0, 1, 3};
1099 static const int32_t fromIBM943Offs [] = { 0, 2};
1100 static const int32_t fromIBM930Offs [] = { 1, 3};
1101
1102 gInBufferSize = inputsize;
1103 gOutBufferSize = outputsize;
73c04bcf 1104
b75a7d8f 1105 /*From Unicode*/
73c04bcf
A
1106
1107#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
1108 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1109 expstopIBM_949, sizeof(expstopIBM_949), "ibm-949",
1110 UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 ))
1111 log_err("u-> ibm-949 with stop did not match.\n");
1112 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1113 expstopIBM_943, sizeof(expstopIBM_943), "ibm-943",
1114 UCNV_FROM_U_CALLBACK_STOP, toIBM943Offsstop , NULL, 0))
1115 log_err("u-> ibm-943 with stop did not match.\n");
1116 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1117 expstopIBM_930, sizeof(expstopIBM_930), "ibm-930",
1118 UCNV_FROM_U_CALLBACK_STOP, toIBM930Offsstop, NULL, 0 ))
1119 log_err("u-> ibm-930 with stop did not match.\n");
1120
1121 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP \n");
1122 {
1123 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1124 static const uint8_t toIBM943[]= { 0x61,};
1125 static const int32_t offset[]= {0,} ;
1126
1127 /*EUC_JP*/
1128 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1129 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,};
1130 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,};
1131
1132 /*EUC_TW*/
1133 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1134 static const uint8_t to_euc_tw[]={
1135 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,};
1136 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,};
1137
1138 /*ISO-2022-JP*/
1139 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9, 0x0042, };
1140 static const uint8_t to_iso_2022_jp[]={
1141 0x41,
1142
1143 };
1144 static const int32_t from_iso_2022_jpOffs [] ={0,};
1145
1146 /*ISO-2022-cn*/
1147 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1148 static const uint8_t to_iso_2022_cn[]={
374ca955 1149 0x41,
b75a7d8f
A
1150
1151 };
1152 static const int32_t from_iso_2022_cnOffs [] ={
1153 0,0,
1154 2,2,
1155 };
1156
1157 /*ISO-2022-kr*/
1158 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
1159 static const uint8_t to_iso_2022_kr[]={
1160 0x1b, 0x24, 0x29, 0x43,
1161 0x41,
1162 0x0e, 0x25, 0x50,
1163 };
1164 static const int32_t from_iso_2022_krOffs [] ={
1165 -1,-1,-1,-1,
1166 0,
1167 1,1,1,
1168 };
1169
1170 /* HZ encoding */
1171 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1172
1173 static const uint8_t to_hz[]={
1174 0x7e, 0x7d, 0x41,
1175 0x7e, 0x7b, 0x26, 0x30,
1176
1177 };
1178 static const int32_t from_hzOffs [] ={
1179 0, 0,0,
1180 1,1,1,1,
1181 };
1182
1183 /*ISCII*/
1184 static const UChar iscii_inputText[]={ 0x0041, 0x3712, 0x0042, };
1185 static const uint8_t to_iscii[]={
1186 0x41,
1187 };
1188 static const int32_t from_isciiOffs [] ={
1189 0,
1190 };
1191
1192 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
1193 toIBM943, sizeof(toIBM943), "ibm-943",
1194 UCNV_FROM_U_CALLBACK_STOP, offset, NULL, 0 ))
1195 log_err("u-> ibm-943 with stop did not match.\n");
1196
1197 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
51004dcb 1198 to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP",
b75a7d8f
A
1199 UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 ))
1200 log_err("u-> euc-jp with stop did not match.\n");
1201
1202 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
1203 to_euc_tw, sizeof(to_euc_tw), "euc-tw",
1204 UCNV_FROM_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1205 log_err("u-> euc-tw with stop did not match.\n");
1206
1207 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]),
1208 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp",
1209 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1210 log_err("u-> iso-2022-jp with stop did not match.\n");
1211
1212 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]),
1213 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp",
1214 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1215 log_err("u-> iso-2022-jp with stop did not match.\n");
1216
1217 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]),
1218 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn",
1219 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_cnOffs, NULL, 0 ))
1220 log_err("u-> iso-2022-cn with stop did not match.\n");
1221
1222 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]),
1223 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr",
1224 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_krOffs, NULL, 0 ))
1225 log_err("u-> iso-2022-kr with stop did not match.\n");
1226
1227 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]),
1228 to_hz, sizeof(to_hz), "HZ",
1229 UCNV_FROM_U_CALLBACK_STOP, from_hzOffs, NULL, 0 ))
1230 log_err("u-> HZ with stop did not match.\n");\
1231
1232 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]),
1233 to_iscii, sizeof(to_iscii), "ISCII,version=0",
1234 UCNV_FROM_U_CALLBACK_STOP, from_isciiOffs, NULL, 0 ))
1235 log_err("u-> iscii with stop did not match.\n");
1236
1237
1238 }
73c04bcf
A
1239#endif
1240
b75a7d8f
A
1241 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n");
1242 {
1243 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1244
1245 static const uint8_t to_SCSU[]={
1246 0x41,
1247
1248 };
1249 int32_t from_SCSUOffs [] ={
1250 0,
1251
1252 };
1253 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
1254 to_SCSU, sizeof(to_SCSU), "SCSU",
1255 UCNV_FROM_U_CALLBACK_STOP, from_SCSUOffs, NULL, 0 ))
1256 log_err("u-> SCSU with skip did not match.\n");
1257
1258 }
73c04bcf 1259
b75a7d8f 1260 /*to Unicode*/
73c04bcf
A
1261
1262#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
1263 if(!testConvertToUnicode(expstopIBM_949, sizeof(expstopIBM_949),
1264 IBM_949stoptoUnicode, sizeof(IBM_949stoptoUnicode)/sizeof(IBM_949stoptoUnicode[0]),"ibm-949",
1265 UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 ))
1266 log_err("ibm-949->u with stop did not match.\n");
1267 if(!testConvertToUnicode(expstopIBM_943, sizeof(expstopIBM_943),
1268 IBM_943stoptoUnicode, sizeof(IBM_943stoptoUnicode)/sizeof(IBM_943stoptoUnicode[0]),"ibm-943",
1269 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offs, NULL, 0 ))
1270 log_err("ibm-943->u with stop did not match.\n");
1271 if(!testConvertToUnicode(expstopIBM_930, sizeof(expstopIBM_930),
1272 IBM_930stoptoUnicode, sizeof(IBM_930stoptoUnicode)/sizeof(IBM_930stoptoUnicode[0]),"ibm-930",
1273 UCNV_TO_U_CALLBACK_STOP, fromIBM930Offs, NULL, 0 ))
1274 log_err("ibm-930->u with stop did not match.\n");
1275
1276 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n");
1277 {
1278
1279 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1280 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1281 };
1282 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63 };
1283 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1};
1284
1285
1286 /*EUC-JP*/
1287 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1288 0x8f, 0xda, 0xa1, /*unassigned*/
1289 0x8e, 0xe0,
1290 };
1291 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec};
1292 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3};
1293
1294 /*EUC_TW*/
1295 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1296 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1297 0xe6, 0xca, 0x8a,
1298 };
1299 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2};
1300 int32_t from_euc_twOffs [] ={ 0, 1, 3};
1301
1302
1303
1304 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
1305 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
1306 UCNV_TO_U_CALLBACK_STOP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1307 log_err("EBCIDIC_STATEFUL->u with stop did not match.\n");
1308
1309 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
51004dcb 1310 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP",
b75a7d8f
A
1311 UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0))
1312 log_err("euc-jp->u with stop did not match.\n");
1313
1314 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
1315 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
1316 UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1317 log_err("euc-tw->u with stop did not match.\n");
1318 }
73c04bcf
A
1319#endif
1320
b75a7d8f
A
1321 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n");
1322 {
1323 static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1324 0xe0, 0x80, 0x61,};
1325 static const UChar expected1[] = { 0x0031, 0x4e8c,};
1326 static const int32_t offsets1[] = { 0x0000, 0x0001};
1327
1328 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1329 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8",
1330 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1331 log_err("utf8->u with stop did not match.\n");;
1332 }
1333 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n");
1334 {
1335 static const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04};
1336 static const UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061};
1337 static const int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003};
1338
1339 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1340 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU",
1341 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1342 log_err("scsu->u with stop did not match.\n");;
1343 }
1344
1345}
1346
1347static void TestSub(int32_t inputsize, int32_t outputsize)
1348{
1349 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1350 static const UChar sampleText2[]= { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1351
1352 static const uint8_t expsubIBM_949[] =
1353 { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 };
1354
1355 static const uint8_t expsubIBM_943[] = {
1356 0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 };
1357
1358 static const uint8_t expsubIBM_930[] = {
1359 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f };
1360
1361 static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 };
1362 static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1363 static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1364
1365 static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1366 static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 };
1367 static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 };
1368
1369 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5, 7 };
1370 static const int32_t fromIBM943Offs [] = { 0, 2, 4, 6 };
1371 static const int32_t fromIBM930Offs [] = { 1, 3, 5, 7 };
1372
1373 gInBufferSize = inputsize;
1374 gOutBufferSize = outputsize;
1375
1376 /*from unicode*/
73c04bcf
A
1377
1378#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
1379 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1380 expsubIBM_949, sizeof(expsubIBM_949), "ibm-949",
1381 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 ))
1382 log_err("u-> ibm-949 with subst did not match.\n");
1383 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1384 expsubIBM_943, sizeof(expsubIBM_943), "ibm-943",
1385 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM943Offssub , NULL, 0))
1386 log_err("u-> ibm-943 with subst did not match.\n");
1387 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1388 expsubIBM_930, sizeof(expsubIBM_930), "ibm-930",
1389 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM930Offssub, NULL, 0 ))
1390 log_err("u-> ibm-930 with subst did not match.\n");
1391
1392 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1393 {
1394 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1395 static const uint8_t toIBM943[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 };
1396 static const int32_t offset[]= {0, 1, 1, 3, 3, 4};
1397
1398
1399 /* EUC_JP*/
1400 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1401 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1402 0xf4, 0xfe, 0xf4, 0xfe,
1403 0x61, 0x8e, 0xe0,
1404 };
1405 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7};
1406
1407 /*EUC_TW*/
1408 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1409 static const uint8_t to_euc_tw[]={
1410 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1411 0xfd, 0xfe, 0xfd, 0xfe,
1412 0x61, 0xe6, 0xca, 0x8a,
1413 };
1414
b75a7d8f
A
1415 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,};
1416
1417 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
1418 toIBM943, sizeof(toIBM943), "ibm-943",
1419 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset, NULL, 0 ))
1420 log_err("u-> ibm-943 with substitute did not match.\n");
1421
1422 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
51004dcb 1423 to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP",
b75a7d8f
A
1424 UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 ))
1425 log_err("u-> euc-jp with substitute did not match.\n");
1426
1427 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
1428 to_euc_tw, sizeof(to_euc_tw), "euc-tw",
1429 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1430 log_err("u-> euc-tw with substitute did not match.\n");
b75a7d8f 1431 }
73c04bcf 1432#endif
b75a7d8f
A
1433
1434 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1435 {
1436 UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1437
1438 const uint8_t to_SCSU[]={
1439 0x41,
1440 0x0e, 0xff,0xfd,
1441 0x42
1442
1443
1444 };
1445 int32_t from_SCSUOffs [] ={
1446 0,
1447 1,1,1,
1448 2,
1449
1450 };
1451 const uint8_t to_SCSU_1[]={
1452 0x41,
1453
1454 };
1455 int32_t from_SCSUOffs_1 [] ={
1456 0,
1457
1458 };
1459 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
1460 to_SCSU, sizeof(to_SCSU), "SCSU",
1461 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs, NULL, 0 ))
1462 log_err("u-> SCSU with substitute did not match.\n");
1463
1464 if(!testConvertFromUnicodeWithContext(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
1465 to_SCSU_1, sizeof(to_SCSU_1), "SCSU",
1466 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs_1, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
1467 log_err("u-> SCSU with substitute did not match.\n");
1468 }
1469
1470 log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1471 {
1472 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,};
1473 static const uint8_t expectedUTF8[]= { 0xe2, 0x82, 0xac,
1474 0xf0, 0x90, 0x90, 0x81,
1475 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd,
1476 0xef, 0xbf, 0xbf, 0x61,
1477
1478 };
1479 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 };
1480 if(!testConvertFromUnicode(testinput, sizeof(testinput)/sizeof(testinput[0]),
1481 expectedUTF8, sizeof(expectedUTF8), "utf8",
1482 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0 )) {
1483 log_err("u-> utf8 with stop did not match.\n");
1484 }
1485 }
1486
1487 log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1488 {
1489 static const UChar in[]={ 0x0041, 0xfeff };
1490
1491 static const uint8_t out[]={
1492#if U_IS_BIG_ENDIAN
1493 0xfe, 0xff,
1494 0x00, 0x41,
1495 0xfe, 0xff
1496#else
1497 0xff, 0xfe,
1498 0x41, 0x00,
1499 0xff, 0xfe
1500#endif
1501 };
1502 static const int32_t offsets[]={
1503 -1, -1, 0, 0, 1, 1
1504 };
1505
1506 if(!testConvertFromUnicode(in, ARRAY_LENGTH(in),
1507 out, sizeof(out), "UTF-16",
1508 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1509 ) {
1510 log_err("u->UTF-16 with substitute did not match.\n");
1511 }
1512 }
1513
1514 log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1515 {
1516 static const UChar in[]={ 0x0041, 0xfeff };
1517
1518 static const uint8_t out[]={
1519#if U_IS_BIG_ENDIAN
1520 0x00, 0x00, 0xfe, 0xff,
1521 0x00, 0x00, 0x00, 0x41,
1522 0x00, 0x00, 0xfe, 0xff
1523#else
1524 0xff, 0xfe, 0x00, 0x00,
1525 0x41, 0x00, 0x00, 0x00,
1526 0xff, 0xfe, 0x00, 0x00
1527#endif
1528 };
1529 static const int32_t offsets[]={
1530 -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1
1531 };
1532
1533 if(!testConvertFromUnicode(in, ARRAY_LENGTH(in),
1534 out, sizeof(out), "UTF-32",
1535 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1536 ) {
1537 log_err("u->UTF-32 with substitute did not match.\n");
1538 }
1539 }
1540
1541 /*to unicode*/
73c04bcf
A
1542
1543#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
1544 if(!testConvertToUnicode(expsubIBM_949, sizeof(expsubIBM_949),
1545 IBM_949subtoUnicode, sizeof(IBM_949subtoUnicode)/sizeof(IBM_949subtoUnicode[0]),"ibm-949",
1546 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 ))
1547 log_err("ibm-949->u with substitute did not match.\n");
1548 if(!testConvertToUnicode(expsubIBM_943, sizeof(expsubIBM_943),
1549 IBM_943subtoUnicode, sizeof(IBM_943subtoUnicode)/sizeof(IBM_943subtoUnicode[0]),"ibm-943",
1550 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offs, NULL, 0 ))
1551 log_err("ibm-943->u with substitute did not match.\n");
1552 if(!testConvertToUnicode(expsubIBM_930, sizeof(expsubIBM_930),
1553 IBM_930subtoUnicode, sizeof(IBM_930subtoUnicode)/sizeof(IBM_930subtoUnicode[0]),"ibm-930",
1554 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM930Offs, NULL, 0 ))
1555 log_err("ibm-930->u with substitute did not match.\n");
1556
1557 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1558 {
1559
1560 const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1561 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1562 };
1563 UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0xfffd, 0x03b4
1564 };
1565 int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 3, 5};
1566
1567
1568 /* EUC_JP*/
1569 const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1570 0x8f, 0xda, 0xa1, /*unassigned*/
1571 0x8e, 0xe0, 0x8a
1572 };
1573 UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a };
1574 int32_t from_euc_jpOffs [] ={ 0, 1, 3, 6, 9, 11 };
1575
1576 /*EUC_TW*/
1577 const uint8_t sampleTxt_euc_tw[]={
1578 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1579 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1580 0xe6, 0xca, 0x8a,
1581 };
1582 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, };
1583 int32_t from_euc_twOffs [] ={ 0, 1, 3, 7, 11, 13};
1584
1585
1586 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
1587 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
1588 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1589 log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n");
1590
1591
1592 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
51004dcb 1593 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP",
b75a7d8f
A
1594 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ))
1595 log_err("euc-jp->u with substitute did not match.\n");
1596
1597
1598 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
1599 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
1600 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1601 log_err("euc-tw->u with substitute did not match.\n");
1602
1603
1604 if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
51004dcb 1605 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP",
b75a7d8f
A
1606 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGAL_CHAR_FOUND))
1607 log_err("euc-jp->u with substitute did not match.\n");
b75a7d8f 1608 }
73c04bcf
A
1609#endif
1610
b75a7d8f
A
1611 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1612 {
1613 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1614 0xe0, 0x80, 0x61,};
1615 UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0x0061};
1616 int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0006};
1617
1618 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1619 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8",
1620 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1621 log_err("utf8->u with substitute did not match.\n");;
1622 }
1623 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1624 {
1625 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
1626 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffd,0xfffd};
1627 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5};
1628
1629 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1630 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU",
1631 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1632 log_err("scsu->u with stop did not match.\n");;
1633 }
1634
73c04bcf 1635#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
1636 log_verbose("Testing ibm-930 subchar/subchar1\n");
1637 {
1638 static const UChar u1[]={ 0x6d63, 0x6d64, 0x6d65, 0x6d66, 0xdf };
1639 static const uint8_t s1[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f };
1640 static const int32_t offsets1[]={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1641
1642 static const UChar u2[]={ 0x6d63, 0x6d64, 0xfffd, 0x6d66, 0x1a };
1643 static const uint8_t s2[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 };
1644 static const int32_t offsets2[]={ 1, 3, 5, 7, 10 };
1645
1646 if(!testConvertFromUnicode(u1, ARRAY_LENGTH(u1), s1, ARRAY_LENGTH(s1), "ibm-930",
1647 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1648 ) {
1649 log_err("u->ibm-930 subchar/subchar1 did not match.\n");
1650 }
1651
1652 if(!testConvertToUnicode(s2, ARRAY_LENGTH(s2), u2, ARRAY_LENGTH(u2), "ibm-930",
1653 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1654 ) {
1655 log_err("ibm-930->u subchar/subchar1 did not match.\n");
1656 }
1657 }
1658
1659 log_verbose("Testing GB 18030 with substitute callbacks\n");
1660 {
b75a7d8f
A
1661 static const UChar u2[]={
1662 0x24, 0x7f, 0x80, 0x1f9, 0x20ac, 0x4e00, 0x9fa6, 0xffff, 0xd800, 0xdc00, 0xfffd, 0xdbff, 0xdfff };
1663 static const uint8_t gb2[]={
1664 0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 };
1665 static const int32_t offsets2[]={
1666 0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 };
1667
b75a7d8f
A
1668 if(!testConvertToUnicode(gb2, ARRAY_LENGTH(gb2), u2, ARRAY_LENGTH(u2), "gb18030",
1669 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1670 ) {
1671 log_err("gb18030->u with substitute did not match.\n");
1672 }
1673 }
73c04bcf 1674#endif
b75a7d8f
A
1675
1676 log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n");
1677 {
1678 static const uint8_t utf7[]={
729e4ab9
A
1679 /* a~ a+AB~ a+AB\x0c a+AB- a+AB. a+. */
1680 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x2e
b75a7d8f
A
1681 };
1682 static const UChar unicode[]={
729e4ab9 1683 0x61, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x2e, 0x61, 0xfffd, 0x2e
b75a7d8f
A
1684 };
1685 static const int32_t offsets[]={
729e4ab9 1686 0, 1, 2, 4, 6, 7, 9, 11, 12, 14, 17, 19, 21, 22, 23, 24
b75a7d8f
A
1687 };
1688
1689 if(!testConvertToUnicode(utf7, ARRAY_LENGTH(utf7), unicode, ARRAY_LENGTH(unicode), "UTF-7",
1690 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1691 ) {
1692 log_err("UTF-7->u with substitute did not match.\n");
1693 }
1694 }
1695
b75a7d8f
A
1696 log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n");
1697 {
1698 static const uint8_t
1699 in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff },
1700 in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff },
1701 in3[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff };
1702
1703 static const UChar
1704 out1[]={ 0x4e00, 0xfeff },
1705 out2[]={ 0x004e, 0xfffe },
1706 out3[]={ 0xfefd, 0x4e00, 0xfeff };
1707
1708 static const int32_t
1709 offsets1[]={ 2, 4 },
1710 offsets2[]={ 2, 4 },
1711 offsets3[]={ 0, 2, 4 };
1712
1713 if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-16",
1714 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1715 ) {
1716 log_err("UTF-16 (BE BOM)->u with substitute did not match.\n");
1717 }
1718
1719 if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-16",
1720 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1721 ) {
1722 log_err("UTF-16 (LE BOM)->u with substitute did not match.\n");
1723 }
1724
1725 if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-16",
1726 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1727 ) {
1728 log_err("UTF-16 (no BOM)->u with substitute did not match.\n");
1729 }
1730 }
1731
1732 log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n");
1733 {
1734 static const uint8_t
1735 in1[]={ 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff },
1736 in2[]={ 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 },
1737 in3[]={ 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 },
1738 in4[]={ 0x00, 0x01, 0x02, 0x03, 0x00, 0x11, 0x12, 0x00, 0x00, 0x00, 0x4e, 0x00 };
1739
1740 static const UChar
4388f060
A
1741 out1[]={ U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfeff },
1742 out2[]={ U16_LEAD(0x0f1000), U16_TRAIL(0x0f1000), 0xfffe },
1743 out3[]={ 0xfefe, U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfffd, 0xfffd },
1744 out4[]={ U16_LEAD(0x10203), U16_TRAIL(0x10203), 0xfffd, 0x4e00 };
b75a7d8f
A
1745
1746 static const int32_t
1747 offsets1[]={ 4, 4, 8 },
1748 offsets2[]={ 4, 4, 8 },
1749 offsets3[]={ 0, 4, 4, 8, 12 },
1750 offsets4[]={ 0, 0, 4, 8 };
1751
1752 if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-32",
1753 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1754 ) {
1755 log_err("UTF-32 (BE BOM)->u with substitute did not match.\n");
1756 }
1757
1758 if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-32",
1759 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1760 ) {
1761 log_err("UTF-32 (LE BOM)->u with substitute did not match.\n");
1762 }
1763
1764 if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-32",
1765 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1766 ) {
1767 log_err("UTF-32 (no BOM)->u with substitute did not match.\n");
1768 }
1769
1770 if(!testConvertToUnicode(in4, ARRAY_LENGTH(in4), out4, ARRAY_LENGTH(out4), "UTF-32",
1771 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets4, NULL, 0)
1772 ) {
1773 log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n");
1774 }
1775 }
1776}
1777
1778static void TestSubWithValue(int32_t inputsize, int32_t outputsize)
1779{
1780 UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1781 UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1782
1783 const uint8_t expsubwvalIBM_949[]= {
1784 0x00, 0xb0, 0xa1, 0xb0, 0xa2,
1785 0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 };
1786
1787 const uint8_t expsubwvalIBM_943[]= {
1788 0x9f, 0xaf, 0x9f, 0xb1,
1789 0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 };
1790
1791 const uint8_t expsubwvalIBM_930[] = {
1792 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f };
1793
1794 int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 };
1795 int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 };
1796 int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */
1797
1798 gInBufferSize = inputsize;
1799 gOutBufferSize = outputsize;
1800
1801 /*from Unicode*/
73c04bcf
A
1802
1803#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
1804 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1805 expsubwvalIBM_949, sizeof(expsubwvalIBM_949), "ibm-949",
1806 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 ))
1807 log_err("u-> ibm-949 with subst with value did not match.\n");
1808
1809 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1810 expsubwvalIBM_943, sizeof(expsubwvalIBM_943), "ibm-943",
1811 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM943Offs, NULL, 0 ))
1812 log_err("u-> ibm-943 with sub with value did not match.\n");
1813
1814 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1815 expsubwvalIBM_930, sizeof(expsubwvalIBM_930), "ibm-930",
1816 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM930Offs, NULL, 0 ))
1817 log_err("u-> ibm-930 with subst with value did not match.\n");
1818
1819
1820 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n");
1821 {
1822 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1823 static const uint8_t toIBM943[]= { 0x61,
1824 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1825 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1826 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1827 0x61 };
1828 static const int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
1829
1830
1831 /* EUC_JP*/
1832 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, };
1833 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1834 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1835 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1836 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1837 0x61, 0x8e, 0xe0,
1838 };
1839 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,
1840 3, 3, 3, 3, 3, 3,
1841 3, 3, 3, 3, 3, 3,
1842 5, 5, 5, 5, 5, 5,
1843 6, 7, 7,
1844 };
1845
1846 /*EUC_TW*/
1847 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1848 static const uint8_t to_euc_tw[]={
1849 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1850 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1851 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1852 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1853 0x61, 0xe6, 0xca, 0x8a,
1854 };
1855 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,
1856 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5,
1857 6, 7, 7, 8,
1858 };
1859 /*ISO-2022-JP*/
b75a7d8f
A
1860 static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ;
1861 static const uint8_t to_iso_2022_jp1[]={
1862 0x1b, 0x24, 0x42, 0x21, 0x21,
1863 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
1864 0x1b, 0x24, 0x42, 0x21, 0x22,
1865 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
1866 0x42,
1867 };
1868
1869 static const int32_t from_iso_2022_jpOffs1 [] ={
1870 0,0,0,0,0,
1871 1,1,1,1,1,1,1,1,1,
1872 2,2,2,2,2,
1873 3,3,3,3,3,3,3,3,3,
1874 4,
1875 };
1876 /* surrogate pair*/
1877 static const UChar iso_2022_jp_inputText2[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ;
1878 static const uint8_t to_iso_2022_jp2[]={
1879 0x1b, 0x24, 0x42, 0x21, 0x21,
1880 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1881 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1882 0x1b, 0x24, 0x42, 0x21, 0x22,
1883 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1884 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1885 0x42,
1886 };
1887 static const int32_t from_iso_2022_jpOffs2 [] ={
1888 0,0,0,0,0,
1889 1,1,1,1,1,1,1,1,1,
1890 1,1,1,1,1,1,
1891 3,3,3,3,3,
1892 4,4,4,4,4,4,4,4,4,
1893 4,4,4,4,4,4,
1894 6,
1895 };
1896
1897 /*ISO-2022-cn*/
1898 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1899 static const uint8_t to_iso_2022_cn[]={
374ca955
A
1900 0x41,
1901 0x25, 0x55, 0x33, 0x37, 0x31, 0x32,
b75a7d8f
A
1902 0x42,
1903 };
1904 static const int32_t from_iso_2022_cnOffs [] ={
374ca955
A
1905 0,
1906 1,1,1,1,1,1,
b75a7d8f
A
1907 2,
1908 };
b75a7d8f
A
1909
1910 static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042};
1911
1912 static const uint8_t to_iso_2022_cn4[]={
1913 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
1914 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1915 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
374ca955 1916 0x0e, 0x21, 0x22,
b75a7d8f
A
1917 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1918 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1919 0x42,
1920 };
1921 static const int32_t from_iso_2022_cnOffs4 [] ={
1922 0,0,0,0,0,0,0,
1923 1,1,1,1,1,1,1,
1924 1,1,1,1,1,1,
374ca955 1925 3,3,3,
b75a7d8f
A
1926 4,4,4,4,4,4,4,
1927 4,4,4,4,4,4,
1928 6
1929
1930 };
1931
1932 /*ISO-2022-kr*/
1933 static const UChar iso_2022_kr_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
1934 static const uint8_t to_iso_2022_kr2[]={
1935 0x1b, 0x24, 0x29, 0x43,
1936 0x41,
1937 0x0e, 0x25, 0x50,
1938 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1939 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1940 0x0e, 0x25, 0x50,
1941 0x0f, 0x42,
1942 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1943 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1944 0x43
1945 };
1946 static const int32_t from_iso_2022_krOffs2 [] ={
1947 -1,-1,-1,-1,
1948 0,
1949 1,1,1,
1950 2,2,2,2,2,2,2,
1951 2,2,2,2,2,2,
1952 4,4,4,
1953 5,5,
1954 6,6,6,6,6,6,
1955 6,6,6,6,6,6,
1956 8,
1957 };
1958
1959 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 };
1960 static const uint8_t to_iso_2022_kr[]={
1961 0x1b, 0x24, 0x29, 0x43,
1962 0x41,
1963 0x0e, 0x25, 0x50,
1964 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1965 0x0e, 0x25, 0x50,
1966 0x0f, 0x42,
1967 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1968 0x43
1969 };
1970
1971
1972 static const int32_t from_iso_2022_krOffs [] ={
1973 -1,-1,-1,-1,
1974 0,
1975 1,1,1,
1976 2,2,2,2,2,2,2,
1977 3,3,3,
1978 4,4,
1979 5,5,5,5,5,5,
1980 6,
1981 };
1982 /* HZ encoding */
1983 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1984
1985 static const uint8_t to_hz[]={
1986 0x7e, 0x7d, 0x41,
1987 0x7e, 0x7b, 0x26, 0x30,
1988 0x7e, 0x7d, 0x25, 0x55, 0x30, 0x36, 0x36, 0x32, /*unassigned*/
1989 0x7e, 0x7b, 0x26, 0x30,
1990 0x7e, 0x7d, 0x42,
1991
1992 };
1993 static const int32_t from_hzOffs [] ={
1994 0,0,0,
1995 1,1,1,1,
1996 2,2,2,2,2,2,2,2,
1997 3,3,3,3,
1998 4,4,4
1999 };
2000
2001 static const UChar hz_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
2002 static const uint8_t to_hz2[]={
2003 0x7e, 0x7d, 0x41,
2004 0x7e, 0x7b, 0x26, 0x30,
2005 0x7e, 0x7d, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
2006 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2007 0x7e, 0x7b, 0x26, 0x30,
2008 0x7e, 0x7d, 0x42,
2009 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
2010 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2011 0x43
2012 };
2013 static const int32_t from_hzOffs2 [] ={
2014 0,0,0,
2015 1,1,1,1,
2016 2,2,2,2,2,2,2,2,
2017 2,2,2,2,2,2,
2018 4,4,4,4,
2019 5,5,5,
2020 6,6,6,6,6,6,
2021 6,6,6,6,6,6,
2022 8,
2023 };
2024
2025 /*ISCII*/
b75a7d8f
A
2026 static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 };
2027 static const uint8_t to_iscii[]={
2028 0x41,
2029 0xef, 0x42, 0xa1,
2030 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
2031 0xa2,
2032 0x42,
2033 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
2034 0x43
2035 };
2036
2037
2038 static const int32_t from_isciiOffs [] ={
2039 0,
2040 1,1,1,
2041 2,2,2,2,2,2,
2042 3,
2043 4,
2044 5,5,5,5,5,5,
2045 6,
2046 };
2047
2048 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
2049 toIBM943, sizeof(toIBM943), "ibm-943",
2050 UCNV_FROM_U_CALLBACK_ESCAPE, offset, NULL, 0 ))
2051 log_err("u-> ibm-943 with subst with value did not match.\n");
2052
2053 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
51004dcb 2054 to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP",
b75a7d8f
A
2055 UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 ))
2056 log_err("u-> euc-jp with subst with value did not match.\n");
2057
2058 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
2059 to_euc_tw, sizeof(to_euc_tw), "euc-tw",
2060 UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 ))
2061 log_err("u-> euc-tw with subst with value did not match.\n");
2062
b75a7d8f
A
2063 if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]),
2064 to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp",
2065 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2066 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2067
2068 if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]),
2069 to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp",
2070 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2071 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2072
2073 if(!testConvertFromUnicode(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]),
2074 to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp",
2075 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs2, NULL, 0 ))
2076 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2077 /*ESCAPE OPTIONS*/
2078 {
2079 /* surrogate pair*/
2080 static const UChar iso_2022_jp_inputText3[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ;
2081 static const uint8_t to_iso_2022_jp3_v2[]={
2082 0x1b, 0x24, 0x42, 0x21, 0x21,
2083 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b,
2084
2085 0x1b, 0x24, 0x42, 0x21, 0x22,
2086 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b,
2087
2088 0x42,
2089 0x26, 0x23, 0x33, 0x36, 0x38, 0x39, 0x32, 0x3b,
2090 };
2091
2092 static const int32_t from_iso_2022_jpOffs3_v2 [] ={
2093 0,0,0,0,0,
2094 1,1,1,1,1,1,1,1,1,1,1,1,
2095
2096 3,3,3,3,3,
2097 4,4,4,4,4,4,4,4,4,4,4,4,
2098
2099 6,
2100 7,7,7,7,7,7,7,7,7
2101 };
2102
2103 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3, sizeof(iso_2022_jp_inputText3)/sizeof(iso_2022_jp_inputText3[0]),
2104 to_iso_2022_jp3_v2, sizeof(to_iso_2022_jp3_v2), "iso-2022-jp",
2105 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2106 log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n");
2107 }
b75a7d8f
A
2108 {
2109 static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2110 static const uint8_t to_iso_2022_cn5_v2[]={
2111 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2112 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44,
2113 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36,
374ca955 2114 0x0e, 0x21, 0x22,
b75a7d8f
A
2115 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44,
2116 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36,
2117 0x42,
374ca955 2118 0x5c, 0x75, 0x30, 0x39, 0x30, 0x32,
b75a7d8f
A
2119 };
2120 static const int32_t from_iso_2022_cnOffs5_v2 [] ={
2121 0,0,0,0,0,0,0,
2122 1,1,1,1,1,1,1,
2123 1,1,1,1,1,1,
374ca955 2124 3,3,3,
b75a7d8f
A
2125 4,4,4,4,4,4,4,
2126 4,4,4,4,4,4,
2127 6,
374ca955 2128 7,7,7,7,7,7
b75a7d8f
A
2129 };
2130 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, sizeof(iso_2022_cn_inputText5)/sizeof(iso_2022_cn_inputText5[0]),
2131 to_iso_2022_cn5_v2, sizeof(to_iso_2022_cn5_v2), "iso-2022-cn",
2132 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs5_v2, NULL, 0,UCNV_ESCAPE_JAVA,U_ZERO_ERROR ))
2133 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n");
2134
2135 }
2136 {
2137 static const UChar iso_2022_cn_inputText6[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2138 static const uint8_t to_iso_2022_cn6_v2[]={
2139 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2140 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d,
374ca955 2141 0x0e, 0x21, 0x22,
b75a7d8f
A
2142 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d,
2143 0x42,
374ca955 2144 0x7b, 0x55, 0x2b, 0x30, 0x39, 0x30, 0x32, 0x7d
b75a7d8f
A
2145 };
2146 static const int32_t from_iso_2022_cnOffs6_v2 [] ={
2147 0, 0, 0, 0, 0, 0, 0,
2148 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
374ca955 2149 3, 3, 3,
b75a7d8f
A
2150 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2151 6,
374ca955 2152 7, 7, 7, 7, 7, 7, 7, 7,
b75a7d8f
A
2153 };
2154 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, sizeof(iso_2022_cn_inputText6)/sizeof(iso_2022_cn_inputText6[0]),
2155 to_iso_2022_cn6_v2, sizeof(to_iso_2022_cn6_v2), "iso-2022-cn",
2156 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs6_v2, NULL, 0,UCNV_ESCAPE_UNICODE,U_ZERO_ERROR ))
2157 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n");
2158
2159 }
2160 {
2161 static const UChar iso_2022_cn_inputText7[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2162 static const uint8_t to_iso_2022_cn7_v2[]={
2163 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2164 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
374ca955 2165 0x0e, 0x21, 0x22,
b75a7d8f 2166 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
374ca955 2167 0x42, 0x25, 0x55, 0x30, 0x39, 0x30, 0x32,
b75a7d8f
A
2168 };
2169 static const int32_t from_iso_2022_cnOffs7_v2 [] ={
2170 0, 0, 0, 0, 0, 0, 0,
2171 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
374ca955 2172 3, 3, 3,
b75a7d8f
A
2173 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2174 6,
374ca955 2175 7, 7, 7, 7, 7, 7,
b75a7d8f
A
2176 };
2177 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, sizeof(iso_2022_cn_inputText7)/sizeof(iso_2022_cn_inputText7[0]),
2178 to_iso_2022_cn7_v2, sizeof(to_iso_2022_cn7_v2), "iso-2022-cn",
2179 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs7_v2, NULL, 0,"K" ,U_ZERO_ERROR ))
2180 log_err("u-> iso-2022-cn with sub & K did not match.\n");
2181
46f4442e
A
2182 }
2183 {
2184 static const UChar iso_2022_cn_inputText8[]={
2185 0x3000,
2186 0xD84D, 0xDC56,
2187 0x3001,
2188 0xD84D, 0xDC56,
2189 0xDBFF, 0xDFFF,
2190 0x0042,
2191 0x0902};
2192 static const uint8_t to_iso_2022_cn8_v2[]={
2193 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2194 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20,
2195 0x0e, 0x21, 0x22,
2196 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20,
2197 0x5c, 0x31, 0x30, 0x46, 0x46, 0x46, 0x46, 0x20,
2198 0x42,
2199 0x5c, 0x39, 0x30, 0x32, 0x20
2200 };
2201 static const int32_t from_iso_2022_cnOffs8_v2 [] ={
2202 0, 0, 0, 0, 0, 0, 0,
2203 1, 1, 1, 1, 1, 1, 1, 1,
2204 3, 3, 3,
2205 4, 4, 4, 4, 4, 4, 4, 4,
2206 6, 6, 6, 6, 6, 6, 6, 6,
2207 8,
2208 9, 9, 9, 9, 9
2209 };
2210 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8, sizeof(iso_2022_cn_inputText8)/sizeof(iso_2022_cn_inputText8[0]),
2211 to_iso_2022_cn8_v2, sizeof(to_iso_2022_cn8_v2), "iso-2022-cn",
2212 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs8_v2, NULL, 0,UCNV_ESCAPE_CSS2,U_ZERO_ERROR ))
2213 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not match.\n");
2214
b75a7d8f
A
2215 }
2216 {
2217 static const uint8_t to_iso_2022_cn4_v3[]={
2218 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2219 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36,
374ca955 2220 0x0e, 0x21, 0x22,
b75a7d8f
A
2221 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36,
2222 0x42
2223 };
2224
2225
2226 static const int32_t from_iso_2022_cnOffs4_v3 [] ={
2227 0,0,0,0,0,0,0,
2228 1,1,1,1,1,1,1,1,1,1,1,
2229
374ca955 2230 3,3,3,
b75a7d8f
A
2231 4,4,4,4,4,4,4,4,4,4,4,
2232
2233 6
2234
2235 };
2236 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]),
2237 to_iso_2022_cn4_v3, sizeof(to_iso_2022_cn4_v3), "iso-2022-cn",
2238 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4_v3, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2239 {
2240 log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n");
2241 }
2242 }
2243 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]),
2244 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn",
2245 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 ))
2246 log_err("u-> iso_2022_cn with subst with value did not match.\n");
2247
b75a7d8f
A
2248 if(!testConvertFromUnicode(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]),
2249 to_iso_2022_cn4, sizeof(to_iso_2022_cn4), "iso-2022-cn",
2250 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 ))
2251 log_err("u-> iso_2022_cn with subst with value did not match.\n");
2252 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]),
2253 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr",
2254 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0 ))
2255 log_err("u-> iso_2022_kr with subst with value did not match.\n");
2256 if(!testConvertFromUnicode(iso_2022_kr_inputText2, sizeof(iso_2022_kr_inputText2)/sizeof(iso_2022_kr_inputText2[0]),
2257 to_iso_2022_kr2, sizeof(to_iso_2022_kr2), "iso-2022-kr",
2258 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs2, NULL, 0 ))
2259 log_err("u-> iso_2022_kr2 with subst with value did not match.\n");
2260 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]),
2261 to_hz, sizeof(to_hz), "HZ",
2262 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0 ))
2263 log_err("u-> hz with subst with value did not match.\n");
2264 if(!testConvertFromUnicode(hz_inputText2, sizeof(hz_inputText2)/sizeof(hz_inputText2[0]),
2265 to_hz2, sizeof(to_hz2), "HZ",
2266 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs2, NULL, 0 ))
2267 log_err("u-> hz with subst with value did not match.\n");
2268
2269 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]),
2270 to_iscii, sizeof(to_iscii), "ISCII,version=0",
2271 UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 ))
2272 log_err("u-> iscii with subst with value did not match.\n");
b75a7d8f 2273 }
73c04bcf 2274#endif
b75a7d8f
A
2275
2276 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
2277 /*to Unicode*/
2278 {
73c04bcf 2279#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
2280 static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf,
2281 0x81, 0xad, /*unassigned*/
2282 0x89, 0xd3 };
2283 static const UChar IBM_943toUnicode[] = { 0x0000, 0x6D63,
2284 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
2285 0x7B87};
2286 static const int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
2287
2288 /* EUC_JP*/
2289 static const uint8_t sampleTxt_EUC_JP[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
2290 0x8f, 0xda, 0xa1, /*unassigned*/
2291 0x8e, 0xe0,
2292 };
2293 static const UChar EUC_JPtoUnicode[]={ 0x0061, 0x4edd, 0x5bec,
2294 0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31,
2295 0x00a2 };
2296 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 3,
2297 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
2298 9,
2299 };
2300
2301 /*EUC_TW*/
2302 static const uint8_t sampleTxt_euc_tw[]={
2303 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
2304 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
2305 0xe6, 0xca, 0x8a,
2306 };
2307 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2,
2308 0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43,
2309 0x8706, 0x8a, };
2310 static const int32_t from_euc_twOffs [] ={ 0, 1, 3,
2311 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2312 11, 13};
2313
2314 /*iso-2022-jp*/
2315 static const uint8_t sampleTxt_iso_2022_jp[]={
2316 0x1b, 0x28, 0x42, 0x41,
51004dcb 2317 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/
b75a7d8f
A
2318 0x1b, 0x28, 0x42, 0x42,
2319
2320 };
51004dcb
A
2321 /* A % X 3 A % X 1 A B */
2322 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x25,0x58,0x33,0x41,0x25,0x58,0x31,0x41, 0x42 };
b75a7d8f 2323 static const int32_t from_iso_2022_jpOffs [] ={ 3, 7, 7, 7, 7, 7, 7, 7, 7, 12 };
51004dcb 2324
b75a7d8f
A
2325 /*iso-2022-cn*/
2326 static const uint8_t sampleTxt_iso_2022_cn[]={
2327 0x0f, 0x41, 0x44,
2328 0x1B, 0x24, 0x29, 0x47,
2329 0x0E, 0x40, 0x6c, /*unassigned*/
2330 0x0f, 0x42,
2331
2332 };
2333 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 };
2334 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 8, 8, 8, 8, 8, 8, 8, 8, 11 };
2335
2336 /*iso-2022-kr*/
2337 static const uint8_t sampleTxt_iso_2022_kr[]={
2338 0x1b, 0x24, 0x29, 0x43,
2339 0x41,
2340 0x0E, 0x7f, 0x1E,
2341 0x0e, 0x25, 0x50,
2342 0x0f, 0x51,
2343 0x42, 0x43,
2344
2345 };
2346 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43};
2347 static const int32_t from_iso_2022_krOffs [] ={ 4, 6, 6, 6, 6, 6, 6, 6, 6, 9, 12, 13 , 14 };
2348
2349 /*hz*/
2350 static const uint8_t sampleTxt_hz[]={
2351 0x41,
2352 0x7e, 0x7b, 0x26, 0x30,
2353 0x7f, 0x1E, /*unassigned*/
2354 0x26, 0x30,
2355 0x7e, 0x7d, 0x42,
2356 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/
2357 0x7e, 0x7d, 0x42,
2358 };
2359 static const UChar hztoUnicode[]={
2360 0x41,
2361 0x03a0,
2362 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2363 0x03A0,
2364 0x42,
2365 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2366 0x42,};
2367
2368 static const int32_t from_hzOffs [] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18, };
2369
2370
2371 /*iscii*/
2372 static const uint8_t sampleTxt_iscii[]={
2373 0x41,
2374 0x30,
2375 0xEB, /*unassigned*/
2376 0xa3,
2377 0x42,
2378 0xEC, /*unassigned*/
2379 0x42,
2380 };
2381 static const UChar isciitoUnicode[]={
2382 0x41,
2383 0x30,
2384 0x25, 0x58, 0x45, 0x42,
2385 0x0903,
2386 0x42,
2387 0x25, 0x58, 0x45, 0x43,
2388 0x42,};
2389
2390 static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6 };
73c04bcf 2391#endif
b75a7d8f 2392
b75a7d8f
A
2393 /*UTF8*/
2394 static const uint8_t sampleTxtUTF8[]={
2395 0x20, 0x64, 0x50,
2396 0xC2, 0x7E, /* truncated char */
2397 0x20,
2398 0xE0, 0xB5, 0x7E, /* truncated char */
2399 0x40,
2400 };
2401 static const UChar UTF8ToUnicode[]={
2402 0x0020, 0x0064, 0x0050,
2403 0x0025, 0x0058, 0x0043, 0x0032, 0x007E, /* \xC2~ */
2404 0x0020,
2405 0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E,
2406 0x0040
2407 };
2408 static const int32_t fromUTF8[] = {
2409 0, 1, 2,
2410 3, 3, 3, 3, 4,
2411 5,
2412 6, 6, 6, 6, 6, 6, 6, 6, 8,
2413 9
2414 };
2415 static const UChar UTF8ToUnicodeXML_DEC[]={
2416 0x0020, 0x0064, 0x0050,
2417 0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E, /* &#194;~ */
2418 0x0020,
2419 0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E,
2420 0x0040
2421 };
2422 static const int32_t fromUTF8XML_DEC[] = {
2423 0, 1, 2,
2424 3, 3, 3, 3, 3, 3, 4,
2425 5,
2426 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8,
2427 9
2428 };
2429
73c04bcf
A
2430
2431#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
2432 if(!testConvertToUnicode(sampleTxtToU, sizeof(sampleTxtToU),
2433 IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnicode[0]),"ibm-943",
2434 UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 ))
2435 log_err("ibm-943->u with substitute with value did not match.\n");
2436
2437 if(!testConvertToUnicode(sampleTxt_EUC_JP, sizeof(sampleTxt_EUC_JP),
51004dcb 2438 EUC_JPtoUnicode, sizeof(EUC_JPtoUnicode)/sizeof(EUC_JPtoUnicode[0]),"IBM-eucJP",
b75a7d8f
A
2439 UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0))
2440 log_err("euc-jp->u with substitute with value did not match.\n");
2441
2442 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
2443 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
2444 UCNV_TO_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0))
2445 log_err("euc-tw->u with substitute with value did not match.\n");
2446
2447 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2448 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2449 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0))
2450 log_err("iso-2022-jp->u with substitute with value did not match.\n");
2451
2452 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2453 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2454 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0,"K",U_ZERO_ERROR))
2455 log_err("iso-2022-jp->u with substitute with value did not match.\n");
2456
2457 {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */
2458 {
2459 static const UChar iso_2022_jptoUnicodeDec[]={
2460 0x0041,
51004dcb
A
2461 /* & # 5 8 ; */
2462 0x0026, 0x0023, 0x0035, 0x0038, 0x003b,
2463 0x0026, 0x0023, 0x0032, 0x0036, 0x003b,
b75a7d8f
A
2464 0x0042 };
2465 static const int32_t from_iso_2022_jpOffsDec [] ={ 3,7,7,7,7,7,7,7,7,7,7,12, };
2466 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2467 iso_2022_jptoUnicodeDec, sizeof(iso_2022_jptoUnicodeDec)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2468 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsDec, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2469 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n");
2470 }
2471 {
2472 static const UChar iso_2022_jptoUnicodeHex[]={
2473 0x0041,
51004dcb
A
2474 /* & # x 3 A ; */
2475 0x0026, 0x0023, 0x0078, 0x0033, 0x0041, 0x003b,
2476 0x0026, 0x0023, 0x0078, 0x0031, 0x0041, 0x003b,
b75a7d8f
A
2477 0x0042 };
2478 static const int32_t from_iso_2022_jpOffsHex [] ={ 3,7,7,7,7,7,7,7,7,7,7,7,7,12 };
2479 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2480 iso_2022_jptoUnicodeHex, sizeof(iso_2022_jptoUnicodeHex)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2481 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsHex, NULL, 0,UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR ))
2482 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n");
2483 }
2484 {
2485 static const UChar iso_2022_jptoUnicodeC[]={
2486 0x0041,
51004dcb
A
2487 0x005C, 0x0078, 0x0033, 0x0041, /* \x3A */
2488 0x005C, 0x0078, 0x0031, 0x0041, /* \x1A */
b75a7d8f
A
2489 0x0042 };
2490 int32_t from_iso_2022_jpOffsC [] ={ 3,7,7,7,7,7,7,7,7,12 };
2491 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2492 iso_2022_jptoUnicodeC, sizeof(iso_2022_jptoUnicodeC)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2493 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsC, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2494 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n");
2495 }
2496 }
2497 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn),
2498 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn",
2499 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0))
2500 log_err("iso-2022-cn->u with substitute with value did not match.\n");
2501
2502 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr),
2503 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr",
2504 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0))
2505 log_err("iso-2022-kr->u with substitute with value did not match.\n");
2506
2507 if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz),
2508 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ",
2509 UCNV_TO_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0))
2510 log_err("hz->u with substitute with value did not match.\n");
2511
2512 if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii),
2513 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0",
2514 UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0))
2515 log_err("ISCII ->u with substitute with value did not match.\n");
73c04bcf
A
2516#endif
2517
b75a7d8f
A
2518 if(!testConvertToUnicode(sampleTxtUTF8, sizeof(sampleTxtUTF8),
2519 UTF8ToUnicode, sizeof(UTF8ToUnicode)/sizeof(UTF8ToUnicode[0]),"UTF-8",
2520 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0))
2521 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2522 if(!testConvertToUnicodeWithContext(sampleTxtUTF8, sizeof(sampleTxtUTF8),
2523 UTF8ToUnicodeXML_DEC, sizeof(UTF8ToUnicodeXML_DEC)/sizeof(UTF8ToUnicodeXML_DEC[0]),"UTF-8",
2524 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8XML_DEC, NULL, 0, UCNV_ESCAPE_XML_DEC, U_ZERO_ERROR))
2525 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2526 }
2527}
2528
73c04bcf 2529#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
2530static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize)
2531{
2532 static const UChar legalText[] = { 0x0000, 0xAC00, 0xAC01, 0xD700 };
2533 static const uint8_t templegal949[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
2534 static const int32_t to949legal[] = {0, 1, 1, 2, 2, 3, 3};
2535
2536
2537 static const uint8_t text943[] = {
fd0068a8
A
2538 0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a };
2539 static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22, 0x5b57 };
2540 static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22, 0x5b57 };
b75a7d8f
A
2541 static const UChar toUnicode943stop[]= { 0x304b};
2542
fd0068a8
A
2543 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 7 };
2544 static const int32_t fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 };
b75a7d8f
A
2545 static const int32_t fromIBM943Offsstop[] = { 0};
2546
2547 gInBufferSize = inputsize;
2548 gOutBufferSize = outputsize;
2549 /*checking with a legal value*/
2550 if(!testConvertFromUnicode(legalText, sizeof(legalText)/sizeof(legalText[0]),
2551 templegal949, sizeof(templegal949), "ibm-949",
2552 UCNV_FROM_U_CALLBACK_SKIP, to949legal, NULL, 0 ))
2553 log_err("u-> ibm-949 with skip did not match.\n");
2554
2555 /*checking illegal value for ibm-943 with substitute*/
2556 if(!testConvertToUnicode(text943, sizeof(text943),
2557 toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943",
2558 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2559 log_err("ibm-943->u with subst did not match.\n");
2560 /*checking illegal value for ibm-943 with skip */
2561 if(!testConvertToUnicode(text943, sizeof(text943),
2562 toUnicode943skip, sizeof(toUnicode943skip)/sizeof(toUnicode943skip[0]),"ibm-943",
2563 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offsskip, NULL, 0 ))
2564 log_err("ibm-943->u with skip did not match.\n");
2565
2566 /*checking illegal value for ibm-943 with stop */
2567 if(!testConvertToUnicode(text943, sizeof(text943),
2568 toUnicode943stop, sizeof(toUnicode943stop)/sizeof(toUnicode943stop[0]),"ibm-943",
2569 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offsstop, NULL, 0 ))
2570 log_err("ibm-943->u with stop did not match.\n");
2571
2572}
2573
2574static void TestSingleByte(int32_t inputsize, int32_t outputsize)
2575{
2576 static const uint8_t sampleText[] = {
2577 0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82,
fd0068a8
A
2578 0xff, 0x32, 0x33};
2579 static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 };
2580 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 };
b75a7d8f
A
2581 /*checking illegal value for ibm-943 with substitute*/
2582 gInBufferSize = inputsize;
2583 gOutBufferSize = outputsize;
2584
2585 if(!testConvertToUnicode(sampleText, sizeof(sampleText),
2586 toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943",
2587 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2588 log_err("ibm-943->u with subst did not match.\n");
2589}
2590
2591static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize)
2592{
2593 /*EBCDIC_STATEFUL*/
2594 static const UChar ebcdic_inputTest[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 };
2595 static const uint8_t toIBM930[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 };
2596 static const int32_t offset_930[]= { 0, 1, 1, 1, 2, 2, 3, 4, 4, 4, 5, 5 };
2597/* s SO doubl SI sng s SO fe fe SI s */
2598
2599 /*EBCDIC_STATEFUL with subChar=3f*/
2600 static const uint8_t toIBM930_subvaried[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 };
2601 static const int32_t offset_930_subvaried[]= { 0, 1, 1, 1, 2, 2, 3, 4, 5 };
2602 static const char mySubChar[]={ 0x3f};
2603
2604 gInBufferSize = inputsize;
2605 gOutBufferSize = outputsize;
2606
2607 if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]),
2608 toIBM930, sizeof(toIBM930), "ibm-930",
2609 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930, NULL, 0 ))
2610 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n");
2611
2612 if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]),
2613 toIBM930_subvaried, sizeof(toIBM930_subvaried), "ibm-930",
2614 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 ))
2615 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n");
2616}
73c04bcf 2617#endif
b75a7d8f
A
2618
2619UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
2620 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
2621 const char *mySubChar, int8_t len)
2622{
2623
2624
2625 UErrorCode status = U_ZERO_ERROR;
2626 UConverter *conv = 0;
73c04bcf 2627 char junkout[NEW_MAX_BUFFER]; /* FIX */
b75a7d8f
A
2628 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2629 const UChar *src;
73c04bcf
A
2630 char *end;
2631 char *targ;
b75a7d8f
A
2632 int32_t *offs;
2633 int i;
2634 int32_t realBufferSize;
73c04bcf 2635 char *realBufferEnd;
b75a7d8f
A
2636 const UChar *realSourceEnd;
2637 const UChar *sourceLimit;
2638 UBool checkOffsets = TRUE;
2639 UBool doFlush;
2640 char junk[9999];
2641 char offset_str[9999];
73c04bcf 2642 char *p;
b75a7d8f
A
2643 UConverterFromUCallback oldAction = NULL;
2644 const void* oldContext = NULL;
2645
2646
2647 for(i=0;i<NEW_MAX_BUFFER;i++)
73c04bcf 2648 junkout[i] = (char)0xF0;
b75a7d8f
A
2649 for(i=0;i<NEW_MAX_BUFFER;i++)
2650 junokout[i] = 0xFF;
2651 setNuConvTestName(codepage, "FROM");
2652
2653 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize,
2654 gOutBufferSize);
2655
2656 conv = ucnv_open(codepage, &status);
2657 if(U_FAILURE(status))
2658 {
2659 log_data_err("Couldn't open converter %s\n",codepage);
2660 return TRUE;
2661 }
2662
2663 log_verbose("Converter opened..\n");
2664
2665 /*----setting the callback routine----*/
2666 ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2667 if (U_FAILURE(status))
2668 {
2669 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2670 }
2671 /*------------------------*/
2672 /*setting the subChar*/
2673 if(mySubChar != NULL){
2674 ucnv_setSubstChars(conv, mySubChar, len, &status);
2675 if (U_FAILURE(status)) {
2676 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2677 }
2678 }
2679 /*------------*/
2680
2681 src = source;
2682 targ = junkout;
2683 offs = junokout;
2684
2685 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
2686 realBufferEnd = junkout + realBufferSize;
2687 realSourceEnd = source + sourceLen;
2688
2689 if ( gOutBufferSize != realBufferSize )
2690 checkOffsets = FALSE;
2691
2692 if( gInBufferSize != NEW_MAX_BUFFER )
2693 checkOffsets = FALSE;
2694
2695 do
2696 {
2697 end = nct_min(targ + gOutBufferSize, realBufferEnd);
2698 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
2699
2700 doFlush = (UBool)(sourceLimit == realSourceEnd);
2701
2702 if(targ == realBufferEnd)
2703 {
2704 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
2705 return FALSE;
2706 }
2707 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
2708
2709
2710 status = U_ZERO_ERROR;
2711
2712 ucnv_fromUnicode (conv,
2713 (char **)&targ,
2714 (const char *)end,
2715 &src,
2716 sourceLimit,
2717 checkOffsets ? offs : NULL,
2718 doFlush, /* flush if we're at the end of the input data */
2719 &status);
2720 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
2721
2722
2723 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2724 UChar errChars[50]; /* should be sufficient */
2725 int8_t errLen = 50;
2726 UErrorCode err = U_ZERO_ERROR;
b75a7d8f
A
2727 const UChar* start= NULL;
2728 ucnv_getInvalidUChars(conv,errChars, &errLen, &err);
2729 if(U_FAILURE(err)){
2730 log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err));
2731 }
b75a7d8f
A
2732 /* length of in invalid chars should be equal to returned length*/
2733 start = src - errLen;
2734 if(u_strncmp(errChars,start,errLen)!=0){
2735 log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2736 }
2737 }
2738 /* allow failure codes for the stop callback */
2739 if(U_FAILURE(status) &&
2740 (callback != UCNV_FROM_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND)))
2741 {
2742 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2743 return FALSE;
2744 }
2745
2746 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
2747 sourceLen, targ-junkout);
729e4ab9 2748 if(getTestOption(VERBOSITY_OPTION))
b75a7d8f
A
2749 {
2750
2751 junk[0] = 0;
2752 offset_str[0] = 0;
2753 for(p = junkout;p<targ;p++)
2754 {
2755 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
2756 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
2757 }
2758
2759 log_verbose(junk);
2760 printSeq(expect, expectLen);
2761 if ( checkOffsets )
2762 {
2763 log_verbose("\nOffsets:");
2764 log_verbose(offset_str);
2765 }
2766 log_verbose("\n");
2767 }
2768 ucnv_close(conv);
2769
2770
2771 if(expectLen != targ-junkout)
2772 {
2773 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2774 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
73c04bcf 2775 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
b75a7d8f
A
2776 printSeqErr(expect, expectLen);
2777 return FALSE;
2778 }
2779
2780 if (checkOffsets && (expectOffsets != 0) )
2781 {
2782 log_verbose("comparing %d offsets..\n", targ-junkout);
2783 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
2784 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2785 log_err("Got Output : ");
73c04bcf 2786 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
b75a7d8f
A
2787 log_err("Got Offsets: ");
2788 for(p=junkout;p<targ;p++)
2789 log_err("%d,", junokout[p-junkout]);
2790 log_err("\n");
2791 log_err("Expected Offsets: ");
2792 for(i=0; i<(targ-junkout); i++)
2793 log_err("%d,", expectOffsets[i]);
2794 log_err("\n");
2795 return FALSE;
2796 }
2797 }
2798
2799 if(!memcmp(junkout, expect, expectLen))
2800 {
2801 log_verbose("String matches! %s\n", gNuConvTestName);
2802 return TRUE;
2803 }
2804 else
2805 {
2806 log_err("String does not match. %s\n", gNuConvTestName);
2807 log_err("source: ");
2808 printUSeqErr(source, sourceLen);
2809 log_err("Got: ");
73c04bcf 2810 printSeqErr((const uint8_t *)junkout, expectLen);
b75a7d8f
A
2811 log_err("Expected: ");
2812 printSeqErr(expect, expectLen);
2813 return FALSE;
2814 }
2815}
2816
2817UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
2818 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
2819 const char *mySubChar, int8_t len)
2820{
2821 UErrorCode status = U_ZERO_ERROR;
2822 UConverter *conv = 0;
2823 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
2824 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
73c04bcf
A
2825 const char *src;
2826 const char *realSourceEnd;
2827 const char *srcLimit;
b75a7d8f
A
2828 UChar *targ;
2829 UChar *end;
2830 int32_t *offs;
2831 int i;
2832 UBool checkOffsets = TRUE;
2833 char junk[9999];
2834 char offset_str[9999];
2835 UChar *p;
2836 UConverterToUCallback oldAction = NULL;
2837 const void* oldContext = NULL;
2838
2839 int32_t realBufferSize;
2840 UChar *realBufferEnd;
2841
2842
2843 for(i=0;i<NEW_MAX_BUFFER;i++)
2844 junkout[i] = 0xFFFE;
2845
2846 for(i=0;i<NEW_MAX_BUFFER;i++)
2847 junokout[i] = -1;
2848
2849 setNuConvTestName(codepage, "TO");
2850
2851 log_verbose("\n========= %s\n", gNuConvTestName);
2852
2853 conv = ucnv_open(codepage, &status);
2854 if(U_FAILURE(status))
2855 {
2856 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
2857 return TRUE;
2858 }
2859
2860 log_verbose("Converter opened..\n");
2861
73c04bcf 2862 src = (const char *)source;
b75a7d8f
A
2863 targ = junkout;
2864 offs = junokout;
2865
2866 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
2867 realBufferEnd = junkout + realBufferSize;
2868 realSourceEnd = src + sourcelen;
2869 /*----setting the callback routine----*/
2870 ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2871 if (U_FAILURE(status))
2872 {
2873 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2874 }
2875 /*-------------------------------------*/
2876 /*setting the subChar*/
2877 if(mySubChar != NULL){
2878 ucnv_setSubstChars(conv, mySubChar, len, &status);
2879 if (U_FAILURE(status)) {
2880 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2881 }
2882 }
2883 /*------------*/
2884
2885
2886 if ( gOutBufferSize != realBufferSize )
2887 checkOffsets = FALSE;
2888
2889 if( gInBufferSize != NEW_MAX_BUFFER )
2890 checkOffsets = FALSE;
2891
2892 do
2893 {
2894 end = nct_min( targ + gOutBufferSize, realBufferEnd);
2895 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
2896
2897 if(targ == realBufferEnd)
2898 {
2899 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
2900 return FALSE;
2901 }
2902 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
2903
2904
2905
2906 status = U_ZERO_ERROR;
2907
2908 ucnv_toUnicode (conv,
2909 &targ,
2910 end,
2911 (const char **)&src,
2912 (const char *)srcLimit,
2913 checkOffsets ? offs : NULL,
2914 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
2915 &status);
2916 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
2917
2918 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2919 char errChars[50]; /* should be sufficient */
2920 int8_t errLen = 50;
2921 UErrorCode err = U_ZERO_ERROR;
73c04bcf 2922 const char* start= NULL;
b75a7d8f
A
2923 ucnv_getInvalidChars(conv,errChars, &errLen, &err);
2924 if(U_FAILURE(err)){
2925 log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err));
2926 }
b75a7d8f
A
2927 /* length of in invalid chars should be equal to returned length*/
2928 start = src - errLen;
73c04bcf 2929 if(uprv_strncmp(errChars,start,errLen)!=0){
b75a7d8f
A
2930 log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2931 }
2932 }
2933 /* allow failure codes for the stop callback */
2934 if(U_FAILURE(status) &&
2935 (callback != UCNV_TO_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND && status != U_TRUNCATED_CHAR_FOUND)))
2936 {
2937 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2938 return FALSE;
2939 }
2940
2941 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
2942 sourcelen, targ-junkout);
729e4ab9 2943 if(getTestOption(VERBOSITY_OPTION))
b75a7d8f
A
2944 {
2945
2946 junk[0] = 0;
2947 offset_str[0] = 0;
2948
2949 for(p = junkout;p<targ;p++)
2950 {
2951 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
2952 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
2953 }
2954
2955 log_verbose(junk);
2956 printUSeq(expect, expectlen);
2957 if ( checkOffsets )
2958 {
2959 log_verbose("\nOffsets:");
2960 log_verbose(offset_str);
2961 }
2962 log_verbose("\n");
2963 }
2964 ucnv_close(conv);
2965
2966 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
2967
2968 if (checkOffsets && (expectOffsets != 0))
2969 {
2970 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
2971 {
2972 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2973 log_err("Got offsets: ");
2974 for(p=junkout;p<targ;p++)
2975 log_err(" %2d,", junokout[p-junkout]);
2976 log_err("\n");
2977 log_err("Expected offsets: ");
2978 for(i=0; i<(targ-junkout); i++)
2979 log_err(" %2d,", expectOffsets[i]);
2980 log_err("\n");
2981 log_err("Got output: ");
2982 for(i=0; i<(targ-junkout); i++)
2983 log_err("0x%04x,", junkout[i]);
2984 log_err("\n");
2985 log_err("From source: ");
73c04bcf 2986 for(i=0; i<(src-(const char *)source); i++)
b75a7d8f
A
2987 log_err(" 0x%02x,", (unsigned char)source[i]);
2988 log_err("\n");
2989 }
2990 }
2991
2992 if(!memcmp(junkout, expect, expectlen*2))
2993 {
2994 log_verbose("Matches!\n");
2995 return TRUE;
2996 }
2997 else
2998 {
2999 log_err("String does not match. %s\n", gNuConvTestName);
3000 log_verbose("String does not match. %s\n", gNuConvTestName);
3001 log_err("Got: ");
3002 printUSeqErr(junkout, expectlen);
3003 log_err("Expected: ");
3004 printUSeqErr(expect, expectlen);
3005 log_err("\n");
3006 return FALSE;
3007 }
3008}
3009
3010UBool testConvertFromUnicodeWithContext(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
3011 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
3012 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3013{
3014
3015
3016 UErrorCode status = U_ZERO_ERROR;
3017 UConverter *conv = 0;
73c04bcf 3018 char junkout[NEW_MAX_BUFFER]; /* FIX */
b75a7d8f
A
3019 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
3020 const UChar *src;
73c04bcf
A
3021 char *end;
3022 char *targ;
b75a7d8f
A
3023 int32_t *offs;
3024 int i;
3025 int32_t realBufferSize;
73c04bcf 3026 char *realBufferEnd;
b75a7d8f
A
3027 const UChar *realSourceEnd;
3028 const UChar *sourceLimit;
3029 UBool checkOffsets = TRUE;
3030 UBool doFlush;
3031 char junk[9999];
3032 char offset_str[9999];
73c04bcf 3033 char *p;
b75a7d8f
A
3034 UConverterFromUCallback oldAction = NULL;
3035 const void* oldContext = NULL;
3036
3037
3038 for(i=0;i<NEW_MAX_BUFFER;i++)
73c04bcf 3039 junkout[i] = (char)0xF0;
b75a7d8f
A
3040 for(i=0;i<NEW_MAX_BUFFER;i++)
3041 junokout[i] = 0xFF;
3042 setNuConvTestName(codepage, "FROM");
3043
3044 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize,
3045 gOutBufferSize);
3046
3047 conv = ucnv_open(codepage, &status);
3048 if(U_FAILURE(status))
3049 {
3050 log_data_err("Couldn't open converter %s\n",codepage);
3051 return TRUE; /* Because the err has already been logged. */
3052 }
3053
3054 log_verbose("Converter opened..\n");
3055
3056 /*----setting the callback routine----*/
3057 ucnv_setFromUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3058 if (U_FAILURE(status))
3059 {
3060 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3061 }
3062 /*------------------------*/
3063 /*setting the subChar*/
3064 if(mySubChar != NULL){
3065 ucnv_setSubstChars(conv, mySubChar, len, &status);
3066 if (U_FAILURE(status)) {
3067 log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status));
3068 }
3069 }
3070 /*------------*/
3071
3072 src = source;
3073 targ = junkout;
3074 offs = junokout;
3075
3076 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
3077 realBufferEnd = junkout + realBufferSize;
3078 realSourceEnd = source + sourceLen;
3079
3080 if ( gOutBufferSize != realBufferSize )
3081 checkOffsets = FALSE;
3082
3083 if( gInBufferSize != NEW_MAX_BUFFER )
3084 checkOffsets = FALSE;
3085
3086 do
3087 {
3088 end = nct_min(targ + gOutBufferSize, realBufferEnd);
3089 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
3090
3091 doFlush = (UBool)(sourceLimit == realSourceEnd);
3092
3093 if(targ == realBufferEnd)
3094 {
3095 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
3096 return FALSE;
3097 }
3098 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
3099
3100
3101 status = U_ZERO_ERROR;
3102
3103 ucnv_fromUnicode (conv,
3104 (char **)&targ,
3105 (const char *)end,
3106 &src,
3107 sourceLimit,
3108 checkOffsets ? offs : NULL,
3109 doFlush, /* flush if we're at the end of the input data */
3110 &status);
3111 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
3112
3113 /* allow failure codes for the stop callback */
3114 if(U_FAILURE(status) && status != expectedError)
3115 {
3116 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3117 return FALSE;
3118 }
3119
3120 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
3121 sourceLen, targ-junkout);
729e4ab9 3122 if(getTestOption(VERBOSITY_OPTION))
b75a7d8f
A
3123 {
3124
3125 junk[0] = 0;
3126 offset_str[0] = 0;
3127 for(p = junkout;p<targ;p++)
3128 {
3129 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
3130 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
3131 }
3132
3133 log_verbose(junk);
3134 printSeq(expect, expectLen);
3135 if ( checkOffsets )
3136 {
3137 log_verbose("\nOffsets:");
3138 log_verbose(offset_str);
3139 }
3140 log_verbose("\n");
3141 }
3142 ucnv_close(conv);
3143
3144
3145 if(expectLen != targ-junkout)
3146 {
3147 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3148 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
73c04bcf 3149 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
b75a7d8f
A
3150 printSeqErr(expect, expectLen);
3151 return FALSE;
3152 }
3153
3154 if (checkOffsets && (expectOffsets != 0) )
3155 {
3156 log_verbose("comparing %d offsets..\n", targ-junkout);
3157 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
3158 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3159 log_err("Got Output : ");
73c04bcf 3160 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
b75a7d8f
A
3161 log_err("Got Offsets: ");
3162 for(p=junkout;p<targ;p++)
3163 log_err("%d,", junokout[p-junkout]);
3164 log_err("\n");
3165 log_err("Expected Offsets: ");
3166 for(i=0; i<(targ-junkout); i++)
3167 log_err("%d,", expectOffsets[i]);
3168 log_err("\n");
3169 return FALSE;
3170 }
3171 }
3172
3173 if(!memcmp(junkout, expect, expectLen))
3174 {
3175 log_verbose("String matches! %s\n", gNuConvTestName);
3176 return TRUE;
3177 }
3178 else
3179 {
3180 log_err("String does not match. %s\n", gNuConvTestName);
3181 log_err("source: ");
3182 printUSeqErr(source, sourceLen);
3183 log_err("Got: ");
73c04bcf 3184 printSeqErr((const uint8_t *)junkout, expectLen);
b75a7d8f
A
3185 log_err("Expected: ");
3186 printSeqErr(expect, expectLen);
3187 return FALSE;
3188 }
3189}
3190UBool testConvertToUnicodeWithContext( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
3191 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
3192 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3193{
3194 UErrorCode status = U_ZERO_ERROR;
3195 UConverter *conv = 0;
3196 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
3197 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
73c04bcf
A
3198 const char *src;
3199 const char *realSourceEnd;
3200 const char *srcLimit;
b75a7d8f
A
3201 UChar *targ;
3202 UChar *end;
3203 int32_t *offs;
3204 int i;
3205 UBool checkOffsets = TRUE;
3206 char junk[9999];
3207 char offset_str[9999];
3208 UChar *p;
3209 UConverterToUCallback oldAction = NULL;
3210 const void* oldContext = NULL;
3211
3212 int32_t realBufferSize;
3213 UChar *realBufferEnd;
3214
3215
3216 for(i=0;i<NEW_MAX_BUFFER;i++)
3217 junkout[i] = 0xFFFE;
3218
3219 for(i=0;i<NEW_MAX_BUFFER;i++)
3220 junokout[i] = -1;
3221
3222 setNuConvTestName(codepage, "TO");
3223
3224 log_verbose("\n========= %s\n", gNuConvTestName);
3225
3226 conv = ucnv_open(codepage, &status);
3227 if(U_FAILURE(status))
3228 {
3229 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
3230 return TRUE;
3231 }
3232
3233 log_verbose("Converter opened..\n");
3234
73c04bcf 3235 src = (const char *)source;
b75a7d8f
A
3236 targ = junkout;
3237 offs = junokout;
3238
3239 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
3240 realBufferEnd = junkout + realBufferSize;
3241 realSourceEnd = src + sourcelen;
3242 /*----setting the callback routine----*/
3243 ucnv_setToUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3244 if (U_FAILURE(status))
3245 {
3246 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3247 }
3248 /*-------------------------------------*/
3249 /*setting the subChar*/
3250 if(mySubChar != NULL){
3251 ucnv_setSubstChars(conv, mySubChar, len, &status);
3252 if (U_FAILURE(status)) {
3253 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3254 }
3255 }
3256 /*------------*/
3257
3258
3259 if ( gOutBufferSize != realBufferSize )
3260 checkOffsets = FALSE;
3261
3262 if( gInBufferSize != NEW_MAX_BUFFER )
3263 checkOffsets = FALSE;
3264
3265 do
3266 {
3267 end = nct_min( targ + gOutBufferSize, realBufferEnd);
3268 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
3269
3270 if(targ == realBufferEnd)
3271 {
3272 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
3273 return FALSE;
3274 }
3275 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
3276
3277
3278
3279 status = U_ZERO_ERROR;
3280
3281 ucnv_toUnicode (conv,
3282 &targ,
3283 end,
3284 (const char **)&src,
3285 (const char *)srcLimit,
3286 checkOffsets ? offs : NULL,
3287 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
3288 &status);
3289 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
3290
3291 /* allow failure codes for the stop callback */
3292 if(U_FAILURE(status) && status!=expectedError)
3293 {
3294 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3295 return FALSE;
3296 }
3297
3298 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
3299 sourcelen, targ-junkout);
729e4ab9 3300 if(getTestOption(VERBOSITY_OPTION))
b75a7d8f
A
3301 {
3302
3303 junk[0] = 0;
3304 offset_str[0] = 0;
3305
3306 for(p = junkout;p<targ;p++)
3307 {
3308 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
3309 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
3310 }
3311
3312 log_verbose(junk);
3313 printUSeq(expect, expectlen);
3314 if ( checkOffsets )
3315 {
3316 log_verbose("\nOffsets:");
3317 log_verbose(offset_str);
3318 }
3319 log_verbose("\n");
3320 }
3321 ucnv_close(conv);
3322
3323 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
3324
3325 if (checkOffsets && (expectOffsets != 0))
3326 {
3327 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
3328 {
3329 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3330 log_err("Got offsets: ");
3331 for(p=junkout;p<targ;p++)
3332 log_err(" %2d,", junokout[p-junkout]);
3333 log_err("\n");
3334 log_err("Expected offsets: ");
3335 for(i=0; i<(targ-junkout); i++)
3336 log_err(" %2d,", expectOffsets[i]);
3337 log_err("\n");
3338 log_err("Got output: ");
3339 for(i=0; i<(targ-junkout); i++)
3340 log_err("0x%04x,", junkout[i]);
3341 log_err("\n");
3342 log_err("From source: ");
73c04bcf 3343 for(i=0; i<(src-(const char *)source); i++)
b75a7d8f
A
3344 log_err(" 0x%02x,", (unsigned char)source[i]);
3345 log_err("\n");
3346 }
3347 }
3348
3349 if(!memcmp(junkout, expect, expectlen*2))
3350 {
3351 log_verbose("Matches!\n");
3352 return TRUE;
3353 }
3354 else
3355 {
3356 log_err("String does not match. %s\n", gNuConvTestName);
3357 log_verbose("String does not match. %s\n", gNuConvTestName);
3358 log_err("Got: ");
3359 printUSeqErr(junkout, expectlen);
3360 log_err("Expected: ");
3361 printUSeqErr(expect, expectlen);
3362 log_err("\n");
3363 return FALSE;
3364 }
3365}
73c04bcf
A
3366
3367static void TestCallBackFailure(void) {
3368 UErrorCode status = U_USELESS_COLLATOR_ERROR;
3369 ucnv_cbFromUWriteBytes(NULL, NULL, -1, -1, &status);
3370 if (status != U_USELESS_COLLATOR_ERROR) {
3371 log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad UErrorCode\n");
3372 }
3373 ucnv_cbFromUWriteUChars(NULL, NULL, NULL, -1, &status);
3374 if (status != U_USELESS_COLLATOR_ERROR) {
3375 log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad UErrorCode\n");
3376 }
3377 ucnv_cbFromUWriteSub(NULL, -1, &status);
3378 if (status != U_USELESS_COLLATOR_ERROR) {
3379 log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UErrorCode\n");
3380 }
3381 ucnv_cbToUWriteUChars(NULL, NULL, -1, -1, &status);
3382 if (status != U_USELESS_COLLATOR_ERROR) {
3383 log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad UErrorCode\n");
3384 }
3385}