]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/nccbtst.c
ICU-491.11.1.tar.gz
[apple/icu.git] / icuSources / test / cintltst / nccbtst.c
CommitLineData
b75a7d8f
A
1/********************************************************************
2 * COPYRIGHT:
4388f060 3 * Copyright (c) 1997-2011, International Business Machines Corporation and
b75a7d8f
A
4 * others. All Rights Reserved.
5 ********************************************************************/
6/*
73c04bcf 7********************************************************************************
b75a7d8f
A
8* File NCCBTST.C
9*
10* Modification History:
11* Name Description
12* Madhu Katragadda 7/21/1999 Testing error callback routines
73c04bcf 13********************************************************************************
b75a7d8f
A
14*/
15#include <stdio.h>
16#include <stdlib.h>
17#include <string.h>
18#include <ctype.h>
19#include "cstring.h"
20#include "unicode/uloc.h"
21#include "unicode/ucnv.h"
22#include "unicode/ucnv_err.h"
23#include "cintltst.h"
24#include "unicode/utypes.h"
25#include "unicode/ustring.h"
26#include "nccbtst.h"
73c04bcf 27#include "unicode/ucnv_cb.h"
4388f060
A
28#include "unicode/utf16.h"
29
b75a7d8f
A
30#define NEW_MAX_BUFFER 999
31
32#define nct_min(x,y) ((x<y) ? x : y)
33#define ARRAY_LENGTH(array) (sizeof(array)/sizeof((array)[0]))
34
35static int32_t gInBufferSize = 0;
36static int32_t gOutBufferSize = 0;
37static char gNuConvTestName[1024];
38
39static void printSeq(const uint8_t* a, int len)
40{
41 int i=0;
42 log_verbose("\n{");
43 while (i<len)
44 log_verbose("0x%02X, ", a[i++]);
45 log_verbose("}\n");
46}
47
48static void printUSeq(const UChar* a, int len)
49{
50 int i=0;
51 log_verbose("{");
52 while (i<len)
53 log_verbose(" 0x%04x, ", a[i++]);
54 log_verbose("}\n");
55}
56
57static void printSeqErr(const uint8_t* a, int len)
58{
59 int i=0;
60 fprintf(stderr, "{");
61 while (i<len)
62 fprintf(stderr, " 0x%02x, ", a[i++]);
63 fprintf(stderr, "}\n");
64}
65
66static void printUSeqErr(const UChar* a, int len)
67{
68 int i=0;
69 fprintf(stderr, "{");
70 while (i<len)
71 fprintf(stderr, "0x%04x, ", a[i++]);
72 fprintf(stderr,"}\n");
73}
74
75static void setNuConvTestName(const char *codepage, const char *direction)
76{
77 sprintf(gNuConvTestName, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
78 codepage,
79 direction,
374ca955
A
80 (int)gInBufferSize,
81 (int)gOutBufferSize);
b75a7d8f
A
82}
83
84
73c04bcf
A
85static void TestCallBackFailure(void);
86
b75a7d8f
A
87void addTestConvertErrorCallBack(TestNode** root);
88
89void addTestConvertErrorCallBack(TestNode** root)
90{
91 addTest(root, &TestSkipCallBack, "tsconv/nccbtst/TestSkipCallBack");
92 addTest(root, &TestStopCallBack, "tsconv/nccbtst/TestStopCallBack");
93 addTest(root, &TestSubCallBack, "tsconv/nccbtst/TestSubCallBack");
94 addTest(root, &TestSubWithValueCallBack, "tsconv/nccbtst/TestSubWithValueCallBack");
73c04bcf
A
95
96#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
97 addTest(root, &TestLegalAndOtherCallBack, "tsconv/nccbtst/TestLegalAndOtherCallBack");
98 addTest(root, &TestSingleByteCallBack, "tsconv/nccbtst/TestSingleByteCallBack");
73c04bcf
A
99#endif
100
101 addTest(root, &TestCallBackFailure, "tsconv/nccbtst/TestCallBackFailure");
b75a7d8f
A
102}
103
104static void TestSkipCallBack()
105{
106 TestSkip(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
107 TestSkip(1,NEW_MAX_BUFFER);
108 TestSkip(1,1);
109 TestSkip(NEW_MAX_BUFFER, 1);
110}
111
112static void TestStopCallBack()
113{
114 TestStop(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
115 TestStop(1,NEW_MAX_BUFFER);
116 TestStop(1,1);
117 TestStop(NEW_MAX_BUFFER, 1);
118}
119
120static void TestSubCallBack()
121{
122 TestSub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
123 TestSub(1,NEW_MAX_BUFFER);
124 TestSub(1,1);
125 TestSub(NEW_MAX_BUFFER, 1);
73c04bcf
A
126
127#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
128 TestEBCDIC_STATEFUL_Sub(1, 1);
129 TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER);
130 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, 1);
131 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
73c04bcf 132#endif
b75a7d8f
A
133}
134
135static void TestSubWithValueCallBack()
136{
137 TestSubWithValue(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
138 TestSubWithValue(1,NEW_MAX_BUFFER);
139 TestSubWithValue(1,1);
140 TestSubWithValue(NEW_MAX_BUFFER, 1);
141}
142
73c04bcf 143#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
144static void TestLegalAndOtherCallBack()
145{
146 TestLegalAndOthers(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
147 TestLegalAndOthers(1,NEW_MAX_BUFFER);
148 TestLegalAndOthers(1,1);
149 TestLegalAndOthers(NEW_MAX_BUFFER, 1);
150}
151
152static void TestSingleByteCallBack()
153{
154 TestSingleByte(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
155 TestSingleByte(1,NEW_MAX_BUFFER);
156 TestSingleByte(1,1);
157 TestSingleByte(NEW_MAX_BUFFER, 1);
158}
73c04bcf 159#endif
b75a7d8f
A
160
161static void TestSkip(int32_t inputsize, int32_t outputsize)
162{
163 static const uint8_t expskipIBM_949[]= {
164 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
165
166 static const uint8_t expskipIBM_943[] = {
167 0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 };
168
169 static const uint8_t expskipIBM_930[] = {
170 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f };
171
172 gInBufferSize = inputsize;
173 gOutBufferSize = outputsize;
174
175 /*From Unicode*/
176 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP \n");
177
73c04bcf 178#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
179 {
180 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
181 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
182
183 static const int32_t toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 };
184 static const int32_t toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 };
b75a7d8f
A
185
186 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
187 expskipIBM_949, sizeof(expskipIBM_949), "ibm-949",
188 UCNV_FROM_U_CALLBACK_SKIP, toIBM949Offsskip, NULL, 0 ))
189 log_err("u-> ibm-949 with skip did not match.\n");
190 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
191 expskipIBM_943, sizeof(expskipIBM_943), "ibm-943",
192 UCNV_FROM_U_CALLBACK_SKIP, toIBM943Offsskip, NULL, 0 ))
193 log_err("u-> ibm-943 with skip did not match.\n");
b75a7d8f
A
194 }
195
196 {
197 static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 };
198 static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f };
199 static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 };
200
201 /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */
202 if(!testConvertFromUnicode(fromU, sizeof(fromU)/U_SIZEOF_UCHAR,
203 fromUBytes, sizeof(fromUBytes),
204 "ibm-930",
205 UCNV_FROM_U_CALLBACK_SKIP, fromUOffsets,
206 NULL, 0)
207 ) {
208 log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n");
209 }
210 }
73c04bcf 211#endif
b75a7d8f
A
212
213 {
214 static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
215 static const uint8_t usasciiFromUBytes[] = { 0x61, 0x31, 0x39 };
216 static const int32_t usasciiFromUOffsets[] = { 0, 3, 6 };
217
218 static const UChar latin1FromU[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
219 static const uint8_t latin1FromUBytes[] = { 0x61, 0xa0, 0x31, 0x39 };
220 static const int32_t latin1FromUOffsets[] = { 0, 1, 3, 6 };
221
222 /* US-ASCII */
223 if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR,
224 usasciiFromUBytes, sizeof(usasciiFromUBytes),
225 "US-ASCII",
226 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
227 NULL, 0)
228 ) {
229 log_err("u->US-ASCII with skip did not match.\n");
230 }
231
73c04bcf 232#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
233 /* SBCS NLTC codepage 367 for US-ASCII */
234 if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR,
235 usasciiFromUBytes, sizeof(usasciiFromUBytes),
236 "ibm-367",
237 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
238 NULL, 0)
239 ) {
240 log_err("u->ibm-367 with skip did not match.\n");
241 }
73c04bcf 242#endif
b75a7d8f
A
243
244 /* ISO-Latin-1 */
245 if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR,
246 latin1FromUBytes, sizeof(latin1FromUBytes),
247 "LATIN_1",
248 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
249 NULL, 0)
250 ) {
251 log_err("u->LATIN_1 with skip did not match.\n");
252 }
253
73c04bcf 254#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
255 /* windows-1252 */
256 if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR,
257 latin1FromUBytes, sizeof(latin1FromUBytes),
258 "windows-1252",
259 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
260 NULL, 0)
261 ) {
262 log_err("u->windows-1252 with skip did not match.\n");
263 }
264 }
265
266 {
267 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
268 static const uint8_t toIBM943[]= { 0x61, 0x61 };
269 static const int32_t offset[]= {0, 4};
270
271 /* EUC_JP*/
272 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
273 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
274 0x61, 0x8e, 0xe0,
275 };
276 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7};
277
278 /*EUC_TW*/
279 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
280 static const uint8_t to_euc_tw[]={
281 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
282 0x61, 0xe6, 0xca, 0x8a,
283 };
284 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,};
285
286 /*ISO-2022-JP*/
287 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9/*unassigned*/,0x0042, };
288 static const uint8_t to_iso_2022_jp[]={
289 0x41,
290 0x42,
291
292 };
293 static const int32_t from_iso_2022_jpOffs [] ={0,2};
294
b75a7d8f
A
295 /*ISO-2022-JP*/
296 UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
297 static const uint8_t to_iso_2022_jp2[]={
298 0x41,
299 0x43,
300
301 };
302 static const int32_t from_iso_2022_jpOffs2 [] ={0,2};
303
304 /*ISO-2022-cn*/
305 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
306 static const uint8_t to_iso_2022_cn[]={
374ca955 307 0x41, 0x42
b75a7d8f
A
308 };
309 static const int32_t from_iso_2022_cnOffs [] ={
374ca955 310 0, 2
b75a7d8f
A
311 };
312
313 /*ISO-2022-CN*/
314 static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
315 static const uint8_t to_iso_2022_cn1[]={
374ca955 316 0x41, 0x43
b75a7d8f
A
317
318 };
374ca955 319 static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 };
b75a7d8f
A
320
321 /*ISO-2022-kr*/
322 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
323 static const uint8_t to_iso_2022_kr[]={
324 0x1b, 0x24, 0x29, 0x43,
325 0x41,
326 0x0e, 0x25, 0x50,
327 0x25, 0x50,
328 0x0f, 0x42,
329 };
330 static const int32_t from_iso_2022_krOffs [] ={
331 -1,-1,-1,-1,
332 0,
333 1,1,1,
334 3,3,
335 4,4
336 };
337
338 /*ISO-2022-kr*/
339 static const UChar iso_2022_kr_inputText1[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
340 static const uint8_t to_iso_2022_kr1[]={
341 0x1b, 0x24, 0x29, 0x43,
342 0x41,
343 0x0e, 0x25, 0x50,
344 0x25, 0x50,
345
346 };
347 static const int32_t from_iso_2022_krOffs1 [] ={
348 -1,-1,-1,-1,
349 0,
350 1,1,1,
351 3,3,
352
353 };
354 /* HZ encoding */
355 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
356
357 static const uint8_t to_hz[]={
358 0x7e, 0x7d, 0x41,
359 0x7e, 0x7b, 0x26, 0x30,
360 0x26, 0x30,
361 0x7e, 0x7d, 0x42,
362
363 };
364 static const int32_t from_hzOffs [] ={
365 0,0,0,
366 1,1,1,1,
367 3,3,
368 4,4,4,4
369 };
370
371 static const UChar hz_inputText1[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
372
373 static const uint8_t to_hz1[]={
374 0x7e, 0x7d, 0x41,
375 0x7e, 0x7b, 0x26, 0x30,
376 0x26, 0x30,
377
378
379 };
380 static const int32_t from_hzOffs1 [] ={
381 0,0,0,
382 1,1,1,1,
383 3,3,
384
385 };
386
73c04bcf 387#endif
b75a7d8f
A
388
389 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
390
391 static const uint8_t to_SCSU[]={
392 0x41,
393 0x42
394
395
396 };
397 static const int32_t from_SCSUOffs [] ={
398 0,
399 2,
400
401 };
73c04bcf
A
402
403#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
404 /* ISCII */
405 static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
406 static const uint8_t to_iscii[]={
407 0x41,
408 0x42,
409 };
410 static const int32_t from_isciiOffs [] ={
411 0,2,
412
413 };
414 /*ISCII*/
415 static const UChar iscii_inputText1[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
416 static const uint8_t to_iscii1[]={
417 0x44,
418 0x43,
419
420 };
421 static const int32_t from_isciiOffs1 [] ={0,2};
422
423 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
424 toIBM943, sizeof(toIBM943), "ibm-943",
425 UCNV_FROM_U_CALLBACK_SKIP, offset, NULL, 0 ))
426 log_err("u-> ibm-943 with skip did not match.\n");
427
428 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
429 to_euc_jp, sizeof(to_euc_jp), "euc-jp",
430 UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 ))
431 log_err("u-> euc-jp with skip did not match.\n");
432
433 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
434 to_euc_tw, sizeof(to_euc_tw), "euc-tw",
435 UCNV_FROM_U_CALLBACK_SKIP, from_euc_twOffs, NULL, 0 ))
436 log_err("u-> euc-tw with skip did not match.\n");
437
438 /*iso_2022_jp*/
439 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]),
440 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp",
441 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 ))
442 log_err("u-> iso-2022-jp with skip did not match.\n");
443
b75a7d8f
A
444 /* with context */
445 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]),
446 to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp",
447 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs2, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
448 log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
449
450 /*iso_2022_cn*/
451 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]),
452 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn",
453 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs, NULL, 0 ))
454 log_err("u-> iso-2022-cn with skip did not match.\n");
455 /*with context*/
456 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1, sizeof(iso_2022_cn_inputText1)/sizeof(iso_2022_cn_inputText1[0]),
457 to_iso_2022_cn1, sizeof(to_iso_2022_cn1), "iso-2022-cn",
458 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
459 log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
460
461 /*iso_2022_kr*/
462 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]),
463 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr",
464 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs, NULL, 0 ))
465 log_err("u-> iso-2022-kr with skip did not match.\n");
466 /*with context*/
467 if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1, sizeof(iso_2022_kr_inputText1)/sizeof(iso_2022_kr_inputText1[0]),
468 to_iso_2022_kr1, sizeof(to_iso_2022_kr1), "iso-2022-kr",
469 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
470 log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
471
472 /*hz*/
473 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]),
474 to_hz, sizeof(to_hz), "HZ",
475 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs, NULL, 0 ))
476 log_err("u-> HZ with skip did not match.\n");
477 /*with context*/
478 if(!testConvertFromUnicodeWithContext(hz_inputText1, sizeof(hz_inputText1)/sizeof(hz_inputText1[0]),
479 to_hz1, sizeof(to_hz1), "hz",
480 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
73c04bcf
A
481 log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
482#endif
b75a7d8f
A
483
484 /*SCSU*/
485 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
486 to_SCSU, sizeof(to_SCSU), "SCSU",
487 UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 ))
488 log_err("u-> SCSU with skip did not match.\n");
489
73c04bcf 490#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
491 /*ISCII*/
492 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]),
493 to_iscii, sizeof(to_iscii), "ISCII,version=0",
494 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs, NULL, 0 ))
495 log_err("u-> iscii with skip did not match.\n");
496 /*with context*/
497 if(!testConvertFromUnicodeWithContext(iscii_inputText1, sizeof(iscii_inputText1)/sizeof(iscii_inputText1[0]),
498 to_iscii1, sizeof(to_iscii1), "ISCII,version=0",
499 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
500 log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
73c04bcf 501#endif
b75a7d8f
A
502 }
503
504 log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
505 {
506 static const uint8_t sampleText[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */
507 0xFB, 0xEE, 0x28, /* from source offset 0 */
508 0x24, 0x1E, 0x52,
509 0xB2,
510 0x20,
511 0xB3,
512 0xB1,
513 0x0D,
514 0x0A,
515
516 0x20, /* from 8 */
517 0x00,
518 0xD0, 0x6C,
519 0xB6,
520 0xD8, 0xA5,
521 0x20,
522 0x68,
523 0x59,
524
525 0xF9, 0x28, /* from 16 */
526 0x6D,
527 0x20,
528 0x73,
529 0xE0, 0x2D,
530 0xDE, 0x43,
531 0xD0, 0x33,
532 0x20,
533
534 0xFA, 0x83, /* from 24 */
535 0x25, 0x01,
536 0xFB, 0x16, 0x87,
537 0x4B, 0x16,
538 0x20,
539 0xE6, 0xBD,
540 0xEB, 0x5B,
541 0x4B, 0xCC,
542
543 0xF9, 0xA2, /* from 32 */
544 0xFC, 0x10, 0x3E,
545 0xFE, 0x16, 0x3A, 0x8C,
546 0x20,
547 0xFC, 0x03, 0xAC,
548
549 0x01, /* from 41 */
550 0xDE, 0x83,
551 0x20,
552 0x09
553 };
554 static const UChar expected[]={
555 0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */
556 0x0063, 0x0061, 0x000D, 0x000A,
557
558 0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */
559 0x0930, 0x0020, 0x0918, 0x0909,
560
561 0x3086, 0x304D, 0x0020, 0x3053, /* 16 */
562 0x4000, 0x4E00, 0x7777, 0x0020,
563
564 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */
565 0x0020, 0xD7A3, 0xDC00, 0xD800,
566
567 0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */
568 0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
569
570 0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */
571 0x0009
572 };
573 static const int32_t offsets[]={
574 0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7,
575 8, 9, 10, 10, 11, 12, 12, 13, 14, 15,
576 16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23,
577 24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31,
578 32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39,
579 41, 42, 42, 43, 44
580 };
581
582 /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */
583 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected),
584 sampleText, sizeof(sampleText),
585 "BOCU-1",
586 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
587 ) {
588 log_err("u->BOCU-1 with skip did not match.\n");
589 }
590 }
591
592 log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
593 {
594 const uint8_t sampleText[]={
595 0x61, /* 'a' */
596 0xc4, 0xb5, /* U+0135 */
597 0xed, 0x80, 0xa0, /* Hangul U+d020 */
598 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */
599 0xee, 0x80, 0x80, /* PUA U+e000 */
600 0xed, 0xb0, 0x81, /* unpaired trail surrogate U+dc01 */
601 0x62, /* 'b' */
602 0xed, 0xa0, 0x81, /* unpaired lead surrogate U+d801 */
603 0xd0, 0x80 /* U+0400 */
604 };
605 UChar expected[]={
606 0x0061,
607 0x0135,
608 0xd020,
609 0xd801, 0xdc01,
610 0xe000,
611 0xdc01,
612 0x0062,
613 0xd801,
614 0x0400
615 };
616 int32_t offsets[]={
617 0,
618 1, 1,
619 2, 2, 2,
620 3, 3, 3, 4, 4, 4,
621 5, 5, 5,
622 6, 6, 6,
623 7,
624 8, 8, 8,
625 9, 9
626 };
627
628 /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */
629
630 /* without offsets */
631 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected),
632 sampleText, sizeof(sampleText),
633 "CESU-8",
634 UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, 0)
635 ) {
636 log_err("u->CESU-8 with skip did not match.\n");
637 }
638
639 /* with offsets */
640 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected),
641 sampleText, sizeof(sampleText),
642 "CESU-8",
643 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
644 ) {
645 log_err("u->CESU-8 with skip did not match.\n");
646 }
647 }
648
649 /*to Unicode*/
650 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP \n");
651
73c04bcf 652#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
653 {
654
655 static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD700 };
656 static const UChar IBM_943skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
657 static const UChar IBM_930skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
658
659 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5};
660 static const int32_t fromIBM943Offs [] = { 0, 2, 4};
661 static const int32_t fromIBM930Offs [] = { 1, 3, 5};
662
663 if(!testConvertToUnicode(expskipIBM_949, sizeof(expskipIBM_949),
664 IBM_949skiptoUnicode, sizeof(IBM_949skiptoUnicode)/sizeof(IBM_949skiptoUnicode),"ibm-949",
665 UCNV_TO_U_CALLBACK_SKIP, fromIBM949Offs, NULL, 0 ))
666 log_err("ibm-949->u with skip did not match.\n");
667 if(!testConvertToUnicode(expskipIBM_943, sizeof(expskipIBM_943),
668 IBM_943skiptoUnicode, sizeof(IBM_943skiptoUnicode)/sizeof(IBM_943skiptoUnicode[0]),"ibm-943",
669 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offs, NULL, 0 ))
670 log_err("ibm-943->u with skip did not match.\n");
671
672
673 if(!testConvertToUnicode(expskipIBM_930, sizeof(expskipIBM_930),
674 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930",
675 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0 ))
676 log_err("ibm-930->u with skip did not match.\n");
677
678
679 if(!testConvertToUnicodeWithContext(expskipIBM_930, sizeof(expskipIBM_930),
680 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930",
681 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
682 log_err("ibm-930->u with skip did not match.\n");
683 }
73c04bcf 684#endif
b75a7d8f
A
685
686 {
687 static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 };
688 static const UChar usasciiToU[] = { 0x61, 0x31 };
689 static const int32_t usasciiToUOffsets[] = { 0, 2 };
690
691 static const uint8_t latin1ToUBytes[] = { 0x61, 0xa0, 0x31 };
692 static const UChar latin1ToU[] = { 0x61, 0xa0, 0x31 };
693 static const int32_t latin1ToUOffsets[] = { 0, 1, 2 };
694
695 /* US-ASCII */
696 if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes),
697 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR,
698 "US-ASCII",
699 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
700 NULL, 0)
701 ) {
702 log_err("US-ASCII->u with skip did not match.\n");
703 }
704
73c04bcf 705#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
706 /* SBCS NLTC codepage 367 for US-ASCII */
707 if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes),
708 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR,
709 "ibm-367",
710 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
711 NULL, 0)
712 ) {
713 log_err("ibm-367->u with skip did not match.\n");
714 }
73c04bcf 715#endif
b75a7d8f
A
716
717 /* ISO-Latin-1 */
718 if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes),
719 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR,
720 "LATIN_1",
721 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
722 NULL, 0)
723 ) {
724 log_err("LATIN_1->u with skip did not match.\n");
725 }
726
73c04bcf 727#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
728 /* windows-1252 */
729 if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes),
730 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR,
731 "windows-1252",
732 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
733 NULL, 0)
734 ) {
735 log_err("windows-1252->u with skip did not match.\n");
736 }
73c04bcf 737#endif
b75a7d8f
A
738 }
739
73c04bcf 740#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
741 {
742 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
743 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
744 };
745 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0x03b4
746 };
747 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 5};
748
749
750 /* euc-jp*/
751 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
752 0x8f, 0xda, 0xa1, /*unassigned*/
753 0x8e, 0xe0,
754 };
755 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2};
756 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3, 9};
757
758 /*EUC_TW*/
759 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
760 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
761 0xe6, 0xca, 0x8a,
762 };
763 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, };
764 static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 11, 13};
765 /*iso-2022-jp*/
766 static const uint8_t sampleTxt_iso_2022_jp[]={
767 0x41,
768 0x1b, 0x24, 0x42, 0x2A, 0x44, /*unassigned*/
769 0x1b, 0x28, 0x42, 0x42,
770
771 };
772 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x42 };
773 static const int32_t from_iso_2022_jpOffs [] ={ 0,9 };
774
775 /*iso-2022-cn*/
776 static const uint8_t sampleTxt_iso_2022_cn[]={
777 0x0f, 0x41, 0x44,
778 0x1B, 0x24, 0x29, 0x47,
779 0x0E, 0x40, 0x6f, /*unassigned*/
780 0x0f, 0x42,
781
782 };
783
784 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x42 };
785 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 11 };
786
787 /*iso-2022-kr*/
788 static const uint8_t sampleTxt_iso_2022_kr[]={
789 0x1b, 0x24, 0x29, 0x43,
790 0x41,
791 0x0E, 0x7f, 0x1E,
792 0x0e, 0x25, 0x50,
793 0x0f, 0x51,
794 0x42, 0x43,
795
796 };
797 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x03A0,0x51, 0x42,0x43};
798 static const int32_t from_iso_2022_krOffs [] ={ 4, 9, 12, 13 , 14 };
799
800 /*hz*/
801 static const uint8_t sampleTxt_hz[]={
802 0x41,
803 0x7e, 0x7b, 0x26, 0x30,
804 0x7f, 0x1E, /*unassigned*/
805 0x26, 0x30,
806 0x7e, 0x7d, 0x42,
807 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/
808 0x7e, 0x7d, 0x42,
809 };
810 static const UChar hztoUnicode[]={
811 0x41,
812 0x03a0,
813 0x03A0,
814 0x42,
815 0x42,};
816
817 static const int32_t from_hzOffs [] ={0,3,7,11,18, };
818
819 /*ISCII*/
820 static const uint8_t sampleTxt_iscii[]={
821 0x41,
822 0xa1,
823 0xEB, /*unassigned*/
824 0x26,
825 0x30,
826 0xa2,
827 0xEC, /*unassigned*/
828 0x42,
829 };
830 static const UChar isciitoUnicode[]={
831 0x41,
832 0x0901,
833 0x26,
834 0x30,
835 0x0902,
836 0x42,
837 };
838
839 static const int32_t from_isciiOffs [] ={0,1,3,4,5,7 };
840
841 /*LMBCS*/
842 static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50,
843 0x12, 0x92, 0xa0, /*unassigned*/
844 0x12, 0x92, 0xA1,
845 };
846 static const UChar LMBCSToUnicode[]={ 0x4e2e, 0xe5c4};
847 static const int32_t fromLMBCS[] = {0, 6};
848
849 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
850 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
851 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
852 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
853
854 if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
855 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
856 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
857 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
858
859 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
860 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp",
861 UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0))
862 log_err("euc-jp->u with skip did not match.\n");
863
864
865
866 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
867 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
868 UCNV_TO_U_CALLBACK_SKIP, from_euc_twOffs , NULL, 0))
869 log_err("euc-tw->u with skip did not match.\n");
870
871
872 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
873 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
874 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_jpOffs , NULL, 0))
875 log_err("iso-2022-jp->u with skip did not match.\n");
876
877 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn),
878 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn",
879 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_cnOffs , NULL, 0))
880 log_err("iso-2022-cn->u with skip did not match.\n");
881
882 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr),
883 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr",
884 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_krOffs , NULL, 0))
885 log_err("iso-2022-kr->u with skip did not match.\n");
886
887 if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz),
888 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ",
889 UCNV_TO_U_CALLBACK_SKIP, from_hzOffs , NULL, 0))
890 log_err("HZ->u with skip did not match.\n");
891
892 if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii),
893 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0",
894 UCNV_TO_U_CALLBACK_SKIP, from_isciiOffs , NULL, 0))
895 log_err("iscii->u with skip did not match.\n");
896
897 if(!testConvertToUnicode(sampleTxtLMBCS, sizeof(sampleTxtLMBCS),
898 LMBCSToUnicode, sizeof(LMBCSToUnicode)/sizeof(LMBCSToUnicode[0]),"LMBCS-1",
899 UCNV_TO_U_CALLBACK_SKIP, fromLMBCS , NULL, 0))
900 log_err("LMBCS->u with skip did not match.\n");
901
902 }
73c04bcf
A
903#endif
904
b75a7d8f
A
905 log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n");
906 {
907 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
908 0xe0, 0x80, 0x61,};
909 UChar expected1[] = { 0x0031, 0x4e8c, 0x0061};
910 int32_t offsets1[] = { 0x0000, 0x0001, 0x0006};
911
912 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
913 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8",
914 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
915 log_err("utf8->u with skip did not match.\n");;
916 }
917
918 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n");
919 {
920 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
921 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffe,0xfffe};
922 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5};
923
924 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
925 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU",
926 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
927 log_err("scsu->u with skip did not match.\n");
928 }
929
930 log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
931 {
932 const uint8_t sampleText[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */
933 0xFB, 0xEE, 0x28, /* single-code point sequence at offset 0 */
934 0x24, 0x1E, 0x52, /* 3 */
935 0xB2, /* 6 */
936 0x20, /* 7 */
937 0x40, 0x07, /* 8 - wrong trail byte */
938 0xB3, /* 10 */
939 0xB1, /* 11 */
940 0xD0, 0x20, /* 12 - wrong trail byte */
941 0x0D, /* 14 */
942 0x0A, /* 15 */
943 0x20, /* 16 */
944 0x00, /* 17 */
945 0xD0, 0x6C, /* 18 */
946 0xB6, /* 20 */
947 0xD8, 0xA5, /* 21 */
948 0x20, /* 23 */
949 0x68, /* 24 */
950 0x59, /* 25 */
951 0xF9, 0x28, /* 26 */
952 0x6D, /* 28 */
953 0x20, /* 29 */
954 0x73, /* 30 */
955 0xE0, 0x2D, /* 31 */
956 0xDE, 0x43, /* 33 */
957 0xD0, 0x33, /* 35 */
958 0x20, /* 37 */
959 0xFA, 0x83, /* 38 */
960 0x25, 0x01, /* 40 */
961 0xFB, 0x16, 0x87, /* 42 */
962 0x4B, 0x16, /* 45 */
963 0x20, /* 47 */
964 0xE6, 0xBD, /* 48 */
965 0xEB, 0x5B, /* 50 */
966 0x4B, 0xCC, /* 52 */
967 0xF9, 0xA2, /* 54 */
968 0xFC, 0x10, 0x3E, /* 56 */
969 0xFE, 0x16, 0x3A, 0x8C, /* 59 */
970 0x20, /* 63 */
971 0xFC, 0x03, 0xAC, /* 64 */
972 0xFF, /* 67 - FF just resets the state without encoding anything */
973 0x01, /* 68 */
974 0xDE, 0x83, /* 69 */
975 0x20, /* 71 */
976 0x09 /* 72 */
977 };
978 UChar expected[]={
979 0xFEFF, 0x0061, 0x0062, 0x0020,
980 0x0063, 0x0061, 0x000D, 0x000A,
981 0x0020, 0x0000, 0x00DF, 0x00E6,
982 0x0930, 0x0020, 0x0918, 0x0909,
983 0x3086, 0x304D, 0x0020, 0x3053,
984 0x4000, 0x4E00, 0x7777, 0x0020,
985 0x9FA5, 0x4E00, 0xAC00, 0xBCDE,
986 0x0020, 0xD7A3, 0xDC00, 0xD800,
987 0xD800, 0xDC00, 0xD845, 0xDDDD,
988 0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
989 0xDFFF, 0x0001, 0x0E40, 0x0020,
990 0x0009
991 };
992 int32_t offsets[]={
993 0, 3, 6, 7, /* skip 8, */
994 10, 11, /* skip 12, */
995 14, 15, 16, 17, 18,
996 20, 21, 23, 24, 25, 26, 28, 29,
997 30, 31, 33, 35, 37, 38,
998 40, 42, 45, 47, 48,
999 50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59,
1000 63, 64, /* trail */ 64, /* reset only 67, */
1001 68, 69,
1002 71, 72
1003 };
1004
1005 if(!testConvertToUnicode(sampleText, sizeof(sampleText),
1006 expected, ARRAY_LENGTH(expected), "BOCU-1",
1007 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1008 ) {
1009 log_err("BOCU-1->u with skip did not match.\n");
1010 }
1011 }
1012
1013 log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
1014 {
1015 const uint8_t sampleText[]={
1016 0x61, /* 0 'a' */
1017 0xc0, 0x80, /* 1 non-shortest form */
1018 0xc4, 0xb5, /* 3 U+0135 */
1019 0xed, 0x80, 0xa0, /* 5 Hangul U+d020 */
1020 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8 surrogate pair for U+10401 */
1021 0xee, 0x80, 0x80, /* 14 PUA U+e000 */
1022 0xed, 0xb0, 0x81, /* 17 unpaired trail surrogate U+dc01 */
1023 0xf0, 0x90, 0x80, 0x80, /* 20 illegal 4-byte form for U+10000 */
1024 0x62, /* 24 'b' */
1025 0xed, 0xa0, 0x81, /* 25 unpaired lead surrogate U+d801 */
1026 0xed, 0xa0, /* 28 incomplete sequence */
1027 0xd0, 0x80 /* 30 U+0400 */
1028 };
1029 UChar expected[]={
1030 0x0061,
1031 /* skip */
1032 0x0135,
1033 0xd020,
1034 0xd801, 0xdc01,
1035 0xe000,
1036 0xdc01,
1037 /* skip */
1038 0x0062,
1039 0xd801,
1040 0x0400
1041 };
1042 int32_t offsets[]={
1043 0,
1044 /* skip 1, */
1045 3,
1046 5,
1047 8, 11,
1048 14,
1049 17,
1050 /* skip 20, 20, */
1051 24,
1052 25,
1053 /* skip 28 */
1054 30
1055 };
1056
1057 /* without offsets */
1058 if(!testConvertToUnicode(sampleText, sizeof(sampleText),
1059 expected, ARRAY_LENGTH(expected), "CESU-8",
1060 UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, 0)
1061 ) {
1062 log_err("CESU-8->u with skip did not match.\n");
1063 }
1064
1065 /* with offsets */
1066 if(!testConvertToUnicode(sampleText, sizeof(sampleText),
1067 expected, ARRAY_LENGTH(expected), "CESU-8",
1068 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1069 ) {
1070 log_err("CESU-8->u with skip did not match.\n");
1071 }
1072 }
1073}
1074
1075static void TestStop(int32_t inputsize, int32_t outputsize)
1076{
1077 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1078 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1079
1080 static const uint8_t expstopIBM_949[]= {
1081 0x00, 0xb0, 0xa1, 0xb0, 0xa2};
1082
1083 static const uint8_t expstopIBM_943[] = {
1084 0x9f, 0xaf, 0x9f, 0xb1};
1085
1086 static const uint8_t expstopIBM_930[] = {
1087 0x0e, 0x5d, 0x5f, 0x5d, 0x63};
1088
1089 static const UChar IBM_949stoptoUnicode[]= {0x0000, 0xAC00, 0xAC01};
1090 static const UChar IBM_943stoptoUnicode[]= { 0x6D63, 0x6D64};
1091 static const UChar IBM_930stoptoUnicode[]= { 0x6D63, 0x6D64};
1092
1093
1094 static const int32_t toIBM949Offsstop [] = { 0, 1, 1, 2, 2};
1095 static const int32_t toIBM943Offsstop [] = { 0, 0, 1, 1};
1096 static const int32_t toIBM930Offsstop [] = { 0, 0, 0, 1, 1};
1097
1098 static const int32_t fromIBM949Offs [] = { 0, 1, 3};
1099 static const int32_t fromIBM943Offs [] = { 0, 2};
1100 static const int32_t fromIBM930Offs [] = { 1, 3};
1101
1102 gInBufferSize = inputsize;
1103 gOutBufferSize = outputsize;
73c04bcf 1104
b75a7d8f 1105 /*From Unicode*/
73c04bcf
A
1106
1107#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
1108 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1109 expstopIBM_949, sizeof(expstopIBM_949), "ibm-949",
1110 UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 ))
1111 log_err("u-> ibm-949 with stop did not match.\n");
1112 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1113 expstopIBM_943, sizeof(expstopIBM_943), "ibm-943",
1114 UCNV_FROM_U_CALLBACK_STOP, toIBM943Offsstop , NULL, 0))
1115 log_err("u-> ibm-943 with stop did not match.\n");
1116 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1117 expstopIBM_930, sizeof(expstopIBM_930), "ibm-930",
1118 UCNV_FROM_U_CALLBACK_STOP, toIBM930Offsstop, NULL, 0 ))
1119 log_err("u-> ibm-930 with stop did not match.\n");
1120
1121 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP \n");
1122 {
1123 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1124 static const uint8_t toIBM943[]= { 0x61,};
1125 static const int32_t offset[]= {0,} ;
1126
1127 /*EUC_JP*/
1128 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1129 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,};
1130 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,};
1131
1132 /*EUC_TW*/
1133 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1134 static const uint8_t to_euc_tw[]={
1135 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,};
1136 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,};
1137
1138 /*ISO-2022-JP*/
1139 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9, 0x0042, };
1140 static const uint8_t to_iso_2022_jp[]={
1141 0x41,
1142
1143 };
1144 static const int32_t from_iso_2022_jpOffs [] ={0,};
1145
1146 /*ISO-2022-cn*/
1147 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1148 static const uint8_t to_iso_2022_cn[]={
374ca955 1149 0x41,
b75a7d8f
A
1150
1151 };
1152 static const int32_t from_iso_2022_cnOffs [] ={
1153 0,0,
1154 2,2,
1155 };
1156
1157 /*ISO-2022-kr*/
1158 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
1159 static const uint8_t to_iso_2022_kr[]={
1160 0x1b, 0x24, 0x29, 0x43,
1161 0x41,
1162 0x0e, 0x25, 0x50,
1163 };
1164 static const int32_t from_iso_2022_krOffs [] ={
1165 -1,-1,-1,-1,
1166 0,
1167 1,1,1,
1168 };
1169
1170 /* HZ encoding */
1171 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1172
1173 static const uint8_t to_hz[]={
1174 0x7e, 0x7d, 0x41,
1175 0x7e, 0x7b, 0x26, 0x30,
1176
1177 };
1178 static const int32_t from_hzOffs [] ={
1179 0, 0,0,
1180 1,1,1,1,
1181 };
1182
1183 /*ISCII*/
1184 static const UChar iscii_inputText[]={ 0x0041, 0x3712, 0x0042, };
1185 static const uint8_t to_iscii[]={
1186 0x41,
1187 };
1188 static const int32_t from_isciiOffs [] ={
1189 0,
1190 };
1191
1192 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
1193 toIBM943, sizeof(toIBM943), "ibm-943",
1194 UCNV_FROM_U_CALLBACK_STOP, offset, NULL, 0 ))
1195 log_err("u-> ibm-943 with stop did not match.\n");
1196
1197 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
1198 to_euc_jp, sizeof(to_euc_jp), "euc-jp",
1199 UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 ))
1200 log_err("u-> euc-jp with stop did not match.\n");
1201
1202 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
1203 to_euc_tw, sizeof(to_euc_tw), "euc-tw",
1204 UCNV_FROM_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1205 log_err("u-> euc-tw with stop did not match.\n");
1206
1207 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]),
1208 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp",
1209 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1210 log_err("u-> iso-2022-jp with stop did not match.\n");
1211
1212 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]),
1213 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp",
1214 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1215 log_err("u-> iso-2022-jp with stop did not match.\n");
1216
1217 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]),
1218 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn",
1219 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_cnOffs, NULL, 0 ))
1220 log_err("u-> iso-2022-cn with stop did not match.\n");
1221
1222 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]),
1223 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr",
1224 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_krOffs, NULL, 0 ))
1225 log_err("u-> iso-2022-kr with stop did not match.\n");
1226
1227 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]),
1228 to_hz, sizeof(to_hz), "HZ",
1229 UCNV_FROM_U_CALLBACK_STOP, from_hzOffs, NULL, 0 ))
1230 log_err("u-> HZ with stop did not match.\n");\
1231
1232 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]),
1233 to_iscii, sizeof(to_iscii), "ISCII,version=0",
1234 UCNV_FROM_U_CALLBACK_STOP, from_isciiOffs, NULL, 0 ))
1235 log_err("u-> iscii with stop did not match.\n");
1236
1237
1238 }
73c04bcf
A
1239#endif
1240
b75a7d8f
A
1241 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n");
1242 {
1243 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1244
1245 static const uint8_t to_SCSU[]={
1246 0x41,
1247
1248 };
1249 int32_t from_SCSUOffs [] ={
1250 0,
1251
1252 };
1253 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
1254 to_SCSU, sizeof(to_SCSU), "SCSU",
1255 UCNV_FROM_U_CALLBACK_STOP, from_SCSUOffs, NULL, 0 ))
1256 log_err("u-> SCSU with skip did not match.\n");
1257
1258 }
73c04bcf 1259
b75a7d8f 1260 /*to Unicode*/
73c04bcf
A
1261
1262#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
1263 if(!testConvertToUnicode(expstopIBM_949, sizeof(expstopIBM_949),
1264 IBM_949stoptoUnicode, sizeof(IBM_949stoptoUnicode)/sizeof(IBM_949stoptoUnicode[0]),"ibm-949",
1265 UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 ))
1266 log_err("ibm-949->u with stop did not match.\n");
1267 if(!testConvertToUnicode(expstopIBM_943, sizeof(expstopIBM_943),
1268 IBM_943stoptoUnicode, sizeof(IBM_943stoptoUnicode)/sizeof(IBM_943stoptoUnicode[0]),"ibm-943",
1269 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offs, NULL, 0 ))
1270 log_err("ibm-943->u with stop did not match.\n");
1271 if(!testConvertToUnicode(expstopIBM_930, sizeof(expstopIBM_930),
1272 IBM_930stoptoUnicode, sizeof(IBM_930stoptoUnicode)/sizeof(IBM_930stoptoUnicode[0]),"ibm-930",
1273 UCNV_TO_U_CALLBACK_STOP, fromIBM930Offs, NULL, 0 ))
1274 log_err("ibm-930->u with stop did not match.\n");
1275
1276 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n");
1277 {
1278
1279 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1280 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1281 };
1282 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63 };
1283 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1};
1284
1285
1286 /*EUC-JP*/
1287 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1288 0x8f, 0xda, 0xa1, /*unassigned*/
1289 0x8e, 0xe0,
1290 };
1291 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec};
1292 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3};
1293
1294 /*EUC_TW*/
1295 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1296 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1297 0xe6, 0xca, 0x8a,
1298 };
1299 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2};
1300 int32_t from_euc_twOffs [] ={ 0, 1, 3};
1301
1302
1303
1304 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
1305 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
1306 UCNV_TO_U_CALLBACK_STOP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1307 log_err("EBCIDIC_STATEFUL->u with stop did not match.\n");
1308
1309 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
1310 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp",
1311 UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0))
1312 log_err("euc-jp->u with stop did not match.\n");
1313
1314 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
1315 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
1316 UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1317 log_err("euc-tw->u with stop did not match.\n");
1318 }
73c04bcf
A
1319#endif
1320
b75a7d8f
A
1321 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n");
1322 {
1323 static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1324 0xe0, 0x80, 0x61,};
1325 static const UChar expected1[] = { 0x0031, 0x4e8c,};
1326 static const int32_t offsets1[] = { 0x0000, 0x0001};
1327
1328 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1329 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8",
1330 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1331 log_err("utf8->u with stop did not match.\n");;
1332 }
1333 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n");
1334 {
1335 static const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04};
1336 static const UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061};
1337 static const int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003};
1338
1339 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1340 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU",
1341 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1342 log_err("scsu->u with stop did not match.\n");;
1343 }
1344
1345}
1346
1347static void TestSub(int32_t inputsize, int32_t outputsize)
1348{
1349 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1350 static const UChar sampleText2[]= { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1351
1352 static const uint8_t expsubIBM_949[] =
1353 { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 };
1354
1355 static const uint8_t expsubIBM_943[] = {
1356 0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 };
1357
1358 static const uint8_t expsubIBM_930[] = {
1359 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f };
1360
1361 static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 };
1362 static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1363 static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1364
1365 static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1366 static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 };
1367 static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 };
1368
1369 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5, 7 };
1370 static const int32_t fromIBM943Offs [] = { 0, 2, 4, 6 };
1371 static const int32_t fromIBM930Offs [] = { 1, 3, 5, 7 };
1372
1373 gInBufferSize = inputsize;
1374 gOutBufferSize = outputsize;
1375
1376 /*from unicode*/
73c04bcf
A
1377
1378#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
1379 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1380 expsubIBM_949, sizeof(expsubIBM_949), "ibm-949",
1381 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 ))
1382 log_err("u-> ibm-949 with subst did not match.\n");
1383 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1384 expsubIBM_943, sizeof(expsubIBM_943), "ibm-943",
1385 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM943Offssub , NULL, 0))
1386 log_err("u-> ibm-943 with subst did not match.\n");
1387 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1388 expsubIBM_930, sizeof(expsubIBM_930), "ibm-930",
1389 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM930Offssub, NULL, 0 ))
1390 log_err("u-> ibm-930 with subst did not match.\n");
1391
1392 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1393 {
1394 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1395 static const uint8_t toIBM943[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 };
1396 static const int32_t offset[]= {0, 1, 1, 3, 3, 4};
1397
1398
1399 /* EUC_JP*/
1400 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1401 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1402 0xf4, 0xfe, 0xf4, 0xfe,
1403 0x61, 0x8e, 0xe0,
1404 };
1405 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7};
1406
1407 /*EUC_TW*/
1408 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1409 static const uint8_t to_euc_tw[]={
1410 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1411 0xfd, 0xfe, 0xfd, 0xfe,
1412 0x61, 0xe6, 0xca, 0x8a,
1413 };
1414
b75a7d8f
A
1415 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,};
1416
1417 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
1418 toIBM943, sizeof(toIBM943), "ibm-943",
1419 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset, NULL, 0 ))
1420 log_err("u-> ibm-943 with substitute did not match.\n");
1421
1422 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
1423 to_euc_jp, sizeof(to_euc_jp), "euc-jp",
1424 UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 ))
1425 log_err("u-> euc-jp with substitute did not match.\n");
1426
1427 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
1428 to_euc_tw, sizeof(to_euc_tw), "euc-tw",
1429 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1430 log_err("u-> euc-tw with substitute did not match.\n");
b75a7d8f 1431 }
73c04bcf 1432#endif
b75a7d8f
A
1433
1434 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1435 {
1436 UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1437
1438 const uint8_t to_SCSU[]={
1439 0x41,
1440 0x0e, 0xff,0xfd,
1441 0x42
1442
1443
1444 };
1445 int32_t from_SCSUOffs [] ={
1446 0,
1447 1,1,1,
1448 2,
1449
1450 };
1451 const uint8_t to_SCSU_1[]={
1452 0x41,
1453
1454 };
1455 int32_t from_SCSUOffs_1 [] ={
1456 0,
1457
1458 };
1459 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
1460 to_SCSU, sizeof(to_SCSU), "SCSU",
1461 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs, NULL, 0 ))
1462 log_err("u-> SCSU with substitute did not match.\n");
1463
1464 if(!testConvertFromUnicodeWithContext(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
1465 to_SCSU_1, sizeof(to_SCSU_1), "SCSU",
1466 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs_1, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
1467 log_err("u-> SCSU with substitute did not match.\n");
1468 }
1469
1470 log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1471 {
1472 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,};
1473 static const uint8_t expectedUTF8[]= { 0xe2, 0x82, 0xac,
1474 0xf0, 0x90, 0x90, 0x81,
1475 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd,
1476 0xef, 0xbf, 0xbf, 0x61,
1477
1478 };
1479 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 };
1480 if(!testConvertFromUnicode(testinput, sizeof(testinput)/sizeof(testinput[0]),
1481 expectedUTF8, sizeof(expectedUTF8), "utf8",
1482 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0 )) {
1483 log_err("u-> utf8 with stop did not match.\n");
1484 }
1485 }
1486
1487 log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1488 {
1489 static const UChar in[]={ 0x0041, 0xfeff };
1490
1491 static const uint8_t out[]={
1492#if U_IS_BIG_ENDIAN
1493 0xfe, 0xff,
1494 0x00, 0x41,
1495 0xfe, 0xff
1496#else
1497 0xff, 0xfe,
1498 0x41, 0x00,
1499 0xff, 0xfe
1500#endif
1501 };
1502 static const int32_t offsets[]={
1503 -1, -1, 0, 0, 1, 1
1504 };
1505
1506 if(!testConvertFromUnicode(in, ARRAY_LENGTH(in),
1507 out, sizeof(out), "UTF-16",
1508 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1509 ) {
1510 log_err("u->UTF-16 with substitute did not match.\n");
1511 }
1512 }
1513
1514 log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1515 {
1516 static const UChar in[]={ 0x0041, 0xfeff };
1517
1518 static const uint8_t out[]={
1519#if U_IS_BIG_ENDIAN
1520 0x00, 0x00, 0xfe, 0xff,
1521 0x00, 0x00, 0x00, 0x41,
1522 0x00, 0x00, 0xfe, 0xff
1523#else
1524 0xff, 0xfe, 0x00, 0x00,
1525 0x41, 0x00, 0x00, 0x00,
1526 0xff, 0xfe, 0x00, 0x00
1527#endif
1528 };
1529 static const int32_t offsets[]={
1530 -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1
1531 };
1532
1533 if(!testConvertFromUnicode(in, ARRAY_LENGTH(in),
1534 out, sizeof(out), "UTF-32",
1535 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1536 ) {
1537 log_err("u->UTF-32 with substitute did not match.\n");
1538 }
1539 }
1540
1541 /*to unicode*/
73c04bcf
A
1542
1543#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
1544 if(!testConvertToUnicode(expsubIBM_949, sizeof(expsubIBM_949),
1545 IBM_949subtoUnicode, sizeof(IBM_949subtoUnicode)/sizeof(IBM_949subtoUnicode[0]),"ibm-949",
1546 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 ))
1547 log_err("ibm-949->u with substitute did not match.\n");
1548 if(!testConvertToUnicode(expsubIBM_943, sizeof(expsubIBM_943),
1549 IBM_943subtoUnicode, sizeof(IBM_943subtoUnicode)/sizeof(IBM_943subtoUnicode[0]),"ibm-943",
1550 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offs, NULL, 0 ))
1551 log_err("ibm-943->u with substitute did not match.\n");
1552 if(!testConvertToUnicode(expsubIBM_930, sizeof(expsubIBM_930),
1553 IBM_930subtoUnicode, sizeof(IBM_930subtoUnicode)/sizeof(IBM_930subtoUnicode[0]),"ibm-930",
1554 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM930Offs, NULL, 0 ))
1555 log_err("ibm-930->u with substitute did not match.\n");
1556
1557 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1558 {
1559
1560 const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1561 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1562 };
1563 UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0xfffd, 0x03b4
1564 };
1565 int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 3, 5};
1566
1567
1568 /* EUC_JP*/
1569 const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1570 0x8f, 0xda, 0xa1, /*unassigned*/
1571 0x8e, 0xe0, 0x8a
1572 };
1573 UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a };
1574 int32_t from_euc_jpOffs [] ={ 0, 1, 3, 6, 9, 11 };
1575
1576 /*EUC_TW*/
1577 const uint8_t sampleTxt_euc_tw[]={
1578 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1579 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1580 0xe6, 0xca, 0x8a,
1581 };
1582 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, };
1583 int32_t from_euc_twOffs [] ={ 0, 1, 3, 7, 11, 13};
1584
1585
1586 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
1587 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
1588 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1589 log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n");
1590
1591
1592 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
1593 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp",
1594 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ))
1595 log_err("euc-jp->u with substitute did not match.\n");
1596
1597
1598 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
1599 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
1600 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1601 log_err("euc-tw->u with substitute did not match.\n");
1602
1603
1604 if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
1605 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp",
1606 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGAL_CHAR_FOUND))
1607 log_err("euc-jp->u with substitute did not match.\n");
b75a7d8f 1608 }
73c04bcf
A
1609#endif
1610
b75a7d8f
A
1611 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1612 {
1613 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1614 0xe0, 0x80, 0x61,};
1615 UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0x0061};
1616 int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0006};
1617
1618 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1619 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8",
1620 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1621 log_err("utf8->u with substitute did not match.\n");;
1622 }
1623 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1624 {
1625 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
1626 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffd,0xfffd};
1627 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5};
1628
1629 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1630 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU",
1631 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1632 log_err("scsu->u with stop did not match.\n");;
1633 }
1634
73c04bcf 1635#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
1636 log_verbose("Testing ibm-930 subchar/subchar1\n");
1637 {
1638 static const UChar u1[]={ 0x6d63, 0x6d64, 0x6d65, 0x6d66, 0xdf };
1639 static const uint8_t s1[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f };
1640 static const int32_t offsets1[]={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1641
1642 static const UChar u2[]={ 0x6d63, 0x6d64, 0xfffd, 0x6d66, 0x1a };
1643 static const uint8_t s2[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 };
1644 static const int32_t offsets2[]={ 1, 3, 5, 7, 10 };
1645
1646 if(!testConvertFromUnicode(u1, ARRAY_LENGTH(u1), s1, ARRAY_LENGTH(s1), "ibm-930",
1647 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1648 ) {
1649 log_err("u->ibm-930 subchar/subchar1 did not match.\n");
1650 }
1651
1652 if(!testConvertToUnicode(s2, ARRAY_LENGTH(s2), u2, ARRAY_LENGTH(u2), "ibm-930",
1653 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1654 ) {
1655 log_err("ibm-930->u subchar/subchar1 did not match.\n");
1656 }
1657 }
1658
1659 log_verbose("Testing GB 18030 with substitute callbacks\n");
1660 {
b75a7d8f
A
1661 static const UChar u2[]={
1662 0x24, 0x7f, 0x80, 0x1f9, 0x20ac, 0x4e00, 0x9fa6, 0xffff, 0xd800, 0xdc00, 0xfffd, 0xdbff, 0xdfff };
1663 static const uint8_t gb2[]={
1664 0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 };
1665 static const int32_t offsets2[]={
1666 0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 };
1667
b75a7d8f
A
1668 if(!testConvertToUnicode(gb2, ARRAY_LENGTH(gb2), u2, ARRAY_LENGTH(u2), "gb18030",
1669 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1670 ) {
1671 log_err("gb18030->u with substitute did not match.\n");
1672 }
1673 }
73c04bcf 1674#endif
b75a7d8f
A
1675
1676 log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n");
1677 {
1678 static const uint8_t utf7[]={
729e4ab9
A
1679 /* a~ a+AB~ a+AB\x0c a+AB- a+AB. a+. */
1680 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x2e
b75a7d8f
A
1681 };
1682 static const UChar unicode[]={
729e4ab9 1683 0x61, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x2e, 0x61, 0xfffd, 0x2e
b75a7d8f
A
1684 };
1685 static const int32_t offsets[]={
729e4ab9 1686 0, 1, 2, 4, 6, 7, 9, 11, 12, 14, 17, 19, 21, 22, 23, 24
b75a7d8f
A
1687 };
1688
1689 if(!testConvertToUnicode(utf7, ARRAY_LENGTH(utf7), unicode, ARRAY_LENGTH(unicode), "UTF-7",
1690 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1691 ) {
1692 log_err("UTF-7->u with substitute did not match.\n");
1693 }
1694 }
1695
b75a7d8f
A
1696 log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n");
1697 {
1698 static const uint8_t
1699 in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff },
1700 in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff },
1701 in3[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff };
1702
1703 static const UChar
1704 out1[]={ 0x4e00, 0xfeff },
1705 out2[]={ 0x004e, 0xfffe },
1706 out3[]={ 0xfefd, 0x4e00, 0xfeff };
1707
1708 static const int32_t
1709 offsets1[]={ 2, 4 },
1710 offsets2[]={ 2, 4 },
1711 offsets3[]={ 0, 2, 4 };
1712
1713 if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-16",
1714 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1715 ) {
1716 log_err("UTF-16 (BE BOM)->u with substitute did not match.\n");
1717 }
1718
1719 if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-16",
1720 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1721 ) {
1722 log_err("UTF-16 (LE BOM)->u with substitute did not match.\n");
1723 }
1724
1725 if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-16",
1726 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1727 ) {
1728 log_err("UTF-16 (no BOM)->u with substitute did not match.\n");
1729 }
1730 }
1731
1732 log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n");
1733 {
1734 static const uint8_t
1735 in1[]={ 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff },
1736 in2[]={ 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 },
1737 in3[]={ 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 },
1738 in4[]={ 0x00, 0x01, 0x02, 0x03, 0x00, 0x11, 0x12, 0x00, 0x00, 0x00, 0x4e, 0x00 };
1739
1740 static const UChar
4388f060
A
1741 out1[]={ U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfeff },
1742 out2[]={ U16_LEAD(0x0f1000), U16_TRAIL(0x0f1000), 0xfffe },
1743 out3[]={ 0xfefe, U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfffd, 0xfffd },
1744 out4[]={ U16_LEAD(0x10203), U16_TRAIL(0x10203), 0xfffd, 0x4e00 };
b75a7d8f
A
1745
1746 static const int32_t
1747 offsets1[]={ 4, 4, 8 },
1748 offsets2[]={ 4, 4, 8 },
1749 offsets3[]={ 0, 4, 4, 8, 12 },
1750 offsets4[]={ 0, 0, 4, 8 };
1751
1752 if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-32",
1753 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1754 ) {
1755 log_err("UTF-32 (BE BOM)->u with substitute did not match.\n");
1756 }
1757
1758 if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-32",
1759 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1760 ) {
1761 log_err("UTF-32 (LE BOM)->u with substitute did not match.\n");
1762 }
1763
1764 if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-32",
1765 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1766 ) {
1767 log_err("UTF-32 (no BOM)->u with substitute did not match.\n");
1768 }
1769
1770 if(!testConvertToUnicode(in4, ARRAY_LENGTH(in4), out4, ARRAY_LENGTH(out4), "UTF-32",
1771 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets4, NULL, 0)
1772 ) {
1773 log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n");
1774 }
1775 }
1776}
1777
1778static void TestSubWithValue(int32_t inputsize, int32_t outputsize)
1779{
1780 UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1781 UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1782
1783 const uint8_t expsubwvalIBM_949[]= {
1784 0x00, 0xb0, 0xa1, 0xb0, 0xa2,
1785 0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 };
1786
1787 const uint8_t expsubwvalIBM_943[]= {
1788 0x9f, 0xaf, 0x9f, 0xb1,
1789 0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 };
1790
1791 const uint8_t expsubwvalIBM_930[] = {
1792 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f };
1793
1794 int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 };
1795 int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 };
1796 int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */
1797
1798 gInBufferSize = inputsize;
1799 gOutBufferSize = outputsize;
1800
1801 /*from Unicode*/
73c04bcf
A
1802
1803#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
1804 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1805 expsubwvalIBM_949, sizeof(expsubwvalIBM_949), "ibm-949",
1806 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 ))
1807 log_err("u-> ibm-949 with subst with value did not match.\n");
1808
1809 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1810 expsubwvalIBM_943, sizeof(expsubwvalIBM_943), "ibm-943",
1811 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM943Offs, NULL, 0 ))
1812 log_err("u-> ibm-943 with sub with value did not match.\n");
1813
1814 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1815 expsubwvalIBM_930, sizeof(expsubwvalIBM_930), "ibm-930",
1816 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM930Offs, NULL, 0 ))
1817 log_err("u-> ibm-930 with subst with value did not match.\n");
1818
1819
1820 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n");
1821 {
1822 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1823 static const uint8_t toIBM943[]= { 0x61,
1824 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1825 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1826 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1827 0x61 };
1828 static const int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
1829
1830
1831 /* EUC_JP*/
1832 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, };
1833 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1834 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1835 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1836 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1837 0x61, 0x8e, 0xe0,
1838 };
1839 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,
1840 3, 3, 3, 3, 3, 3,
1841 3, 3, 3, 3, 3, 3,
1842 5, 5, 5, 5, 5, 5,
1843 6, 7, 7,
1844 };
1845
1846 /*EUC_TW*/
1847 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1848 static const uint8_t to_euc_tw[]={
1849 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1850 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1851 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1852 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1853 0x61, 0xe6, 0xca, 0x8a,
1854 };
1855 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,
1856 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5,
1857 6, 7, 7, 8,
1858 };
1859 /*ISO-2022-JP*/
b75a7d8f
A
1860 static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ;
1861 static const uint8_t to_iso_2022_jp1[]={
1862 0x1b, 0x24, 0x42, 0x21, 0x21,
1863 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
1864 0x1b, 0x24, 0x42, 0x21, 0x22,
1865 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
1866 0x42,
1867 };
1868
1869 static const int32_t from_iso_2022_jpOffs1 [] ={
1870 0,0,0,0,0,
1871 1,1,1,1,1,1,1,1,1,
1872 2,2,2,2,2,
1873 3,3,3,3,3,3,3,3,3,
1874 4,
1875 };
1876 /* surrogate pair*/
1877 static const UChar iso_2022_jp_inputText2[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ;
1878 static const uint8_t to_iso_2022_jp2[]={
1879 0x1b, 0x24, 0x42, 0x21, 0x21,
1880 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1881 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1882 0x1b, 0x24, 0x42, 0x21, 0x22,
1883 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1884 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1885 0x42,
1886 };
1887 static const int32_t from_iso_2022_jpOffs2 [] ={
1888 0,0,0,0,0,
1889 1,1,1,1,1,1,1,1,1,
1890 1,1,1,1,1,1,
1891 3,3,3,3,3,
1892 4,4,4,4,4,4,4,4,4,
1893 4,4,4,4,4,4,
1894 6,
1895 };
1896
1897 /*ISO-2022-cn*/
1898 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1899 static const uint8_t to_iso_2022_cn[]={
374ca955
A
1900 0x41,
1901 0x25, 0x55, 0x33, 0x37, 0x31, 0x32,
b75a7d8f
A
1902 0x42,
1903 };
1904 static const int32_t from_iso_2022_cnOffs [] ={
374ca955
A
1905 0,
1906 1,1,1,1,1,1,
b75a7d8f
A
1907 2,
1908 };
b75a7d8f
A
1909
1910 static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042};
1911
1912 static const uint8_t to_iso_2022_cn4[]={
1913 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
1914 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1915 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
374ca955 1916 0x0e, 0x21, 0x22,
b75a7d8f
A
1917 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1918 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1919 0x42,
1920 };
1921 static const int32_t from_iso_2022_cnOffs4 [] ={
1922 0,0,0,0,0,0,0,
1923 1,1,1,1,1,1,1,
1924 1,1,1,1,1,1,
374ca955 1925 3,3,3,
b75a7d8f
A
1926 4,4,4,4,4,4,4,
1927 4,4,4,4,4,4,
1928 6
1929
1930 };
1931
1932 /*ISO-2022-kr*/
1933 static const UChar iso_2022_kr_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
1934 static const uint8_t to_iso_2022_kr2[]={
1935 0x1b, 0x24, 0x29, 0x43,
1936 0x41,
1937 0x0e, 0x25, 0x50,
1938 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1939 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1940 0x0e, 0x25, 0x50,
1941 0x0f, 0x42,
1942 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1943 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1944 0x43
1945 };
1946 static const int32_t from_iso_2022_krOffs2 [] ={
1947 -1,-1,-1,-1,
1948 0,
1949 1,1,1,
1950 2,2,2,2,2,2,2,
1951 2,2,2,2,2,2,
1952 4,4,4,
1953 5,5,
1954 6,6,6,6,6,6,
1955 6,6,6,6,6,6,
1956 8,
1957 };
1958
1959 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 };
1960 static const uint8_t to_iso_2022_kr[]={
1961 0x1b, 0x24, 0x29, 0x43,
1962 0x41,
1963 0x0e, 0x25, 0x50,
1964 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1965 0x0e, 0x25, 0x50,
1966 0x0f, 0x42,
1967 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1968 0x43
1969 };
1970
1971
1972 static const int32_t from_iso_2022_krOffs [] ={
1973 -1,-1,-1,-1,
1974 0,
1975 1,1,1,
1976 2,2,2,2,2,2,2,
1977 3,3,3,
1978 4,4,
1979 5,5,5,5,5,5,
1980 6,
1981 };
1982 /* HZ encoding */
1983 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1984
1985 static const uint8_t to_hz[]={
1986 0x7e, 0x7d, 0x41,
1987 0x7e, 0x7b, 0x26, 0x30,
1988 0x7e, 0x7d, 0x25, 0x55, 0x30, 0x36, 0x36, 0x32, /*unassigned*/
1989 0x7e, 0x7b, 0x26, 0x30,
1990 0x7e, 0x7d, 0x42,
1991
1992 };
1993 static const int32_t from_hzOffs [] ={
1994 0,0,0,
1995 1,1,1,1,
1996 2,2,2,2,2,2,2,2,
1997 3,3,3,3,
1998 4,4,4
1999 };
2000
2001 static const UChar hz_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
2002 static const uint8_t to_hz2[]={
2003 0x7e, 0x7d, 0x41,
2004 0x7e, 0x7b, 0x26, 0x30,
2005 0x7e, 0x7d, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
2006 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2007 0x7e, 0x7b, 0x26, 0x30,
2008 0x7e, 0x7d, 0x42,
2009 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
2010 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2011 0x43
2012 };
2013 static const int32_t from_hzOffs2 [] ={
2014 0,0,0,
2015 1,1,1,1,
2016 2,2,2,2,2,2,2,2,
2017 2,2,2,2,2,2,
2018 4,4,4,4,
2019 5,5,5,
2020 6,6,6,6,6,6,
2021 6,6,6,6,6,6,
2022 8,
2023 };
2024
2025 /*ISCII*/
b75a7d8f
A
2026 static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 };
2027 static const uint8_t to_iscii[]={
2028 0x41,
2029 0xef, 0x42, 0xa1,
2030 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
2031 0xa2,
2032 0x42,
2033 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
2034 0x43
2035 };
2036
2037
2038 static const int32_t from_isciiOffs [] ={
2039 0,
2040 1,1,1,
2041 2,2,2,2,2,2,
2042 3,
2043 4,
2044 5,5,5,5,5,5,
2045 6,
2046 };
2047
2048 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
2049 toIBM943, sizeof(toIBM943), "ibm-943",
2050 UCNV_FROM_U_CALLBACK_ESCAPE, offset, NULL, 0 ))
2051 log_err("u-> ibm-943 with subst with value did not match.\n");
2052
2053 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
2054 to_euc_jp, sizeof(to_euc_jp), "euc-jp",
2055 UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 ))
2056 log_err("u-> euc-jp with subst with value did not match.\n");
2057
2058 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
2059 to_euc_tw, sizeof(to_euc_tw), "euc-tw",
2060 UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 ))
2061 log_err("u-> euc-tw with subst with value did not match.\n");
2062
b75a7d8f
A
2063 if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]),
2064 to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp",
2065 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2066 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2067
2068 if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]),
2069 to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp",
2070 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2071 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2072
2073 if(!testConvertFromUnicode(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]),
2074 to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp",
2075 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs2, NULL, 0 ))
2076 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2077 /*ESCAPE OPTIONS*/
2078 {
2079 /* surrogate pair*/
2080 static const UChar iso_2022_jp_inputText3[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ;
2081 static const uint8_t to_iso_2022_jp3_v2[]={
2082 0x1b, 0x24, 0x42, 0x21, 0x21,
2083 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b,
2084
2085 0x1b, 0x24, 0x42, 0x21, 0x22,
2086 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b,
2087
2088 0x42,
2089 0x26, 0x23, 0x33, 0x36, 0x38, 0x39, 0x32, 0x3b,
2090 };
2091
2092 static const int32_t from_iso_2022_jpOffs3_v2 [] ={
2093 0,0,0,0,0,
2094 1,1,1,1,1,1,1,1,1,1,1,1,
2095
2096 3,3,3,3,3,
2097 4,4,4,4,4,4,4,4,4,4,4,4,
2098
2099 6,
2100 7,7,7,7,7,7,7,7,7
2101 };
2102
2103 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3, sizeof(iso_2022_jp_inputText3)/sizeof(iso_2022_jp_inputText3[0]),
2104 to_iso_2022_jp3_v2, sizeof(to_iso_2022_jp3_v2), "iso-2022-jp",
2105 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2106 log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n");
2107 }
b75a7d8f
A
2108 {
2109 static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2110 static const uint8_t to_iso_2022_cn5_v2[]={
2111 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2112 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44,
2113 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36,
374ca955 2114 0x0e, 0x21, 0x22,
b75a7d8f
A
2115 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44,
2116 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36,
2117 0x42,
374ca955 2118 0x5c, 0x75, 0x30, 0x39, 0x30, 0x32,
b75a7d8f
A
2119 };
2120 static const int32_t from_iso_2022_cnOffs5_v2 [] ={
2121 0,0,0,0,0,0,0,
2122 1,1,1,1,1,1,1,
2123 1,1,1,1,1,1,
374ca955 2124 3,3,3,
b75a7d8f
A
2125 4,4,4,4,4,4,4,
2126 4,4,4,4,4,4,
2127 6,
374ca955 2128 7,7,7,7,7,7
b75a7d8f
A
2129 };
2130 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, sizeof(iso_2022_cn_inputText5)/sizeof(iso_2022_cn_inputText5[0]),
2131 to_iso_2022_cn5_v2, sizeof(to_iso_2022_cn5_v2), "iso-2022-cn",
2132 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs5_v2, NULL, 0,UCNV_ESCAPE_JAVA,U_ZERO_ERROR ))
2133 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n");
2134
2135 }
2136 {
2137 static const UChar iso_2022_cn_inputText6[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2138 static const uint8_t to_iso_2022_cn6_v2[]={
2139 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2140 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d,
374ca955 2141 0x0e, 0x21, 0x22,
b75a7d8f
A
2142 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d,
2143 0x42,
374ca955 2144 0x7b, 0x55, 0x2b, 0x30, 0x39, 0x30, 0x32, 0x7d
b75a7d8f
A
2145 };
2146 static const int32_t from_iso_2022_cnOffs6_v2 [] ={
2147 0, 0, 0, 0, 0, 0, 0,
2148 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
374ca955 2149 3, 3, 3,
b75a7d8f
A
2150 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2151 6,
374ca955 2152 7, 7, 7, 7, 7, 7, 7, 7,
b75a7d8f
A
2153 };
2154 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, sizeof(iso_2022_cn_inputText6)/sizeof(iso_2022_cn_inputText6[0]),
2155 to_iso_2022_cn6_v2, sizeof(to_iso_2022_cn6_v2), "iso-2022-cn",
2156 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs6_v2, NULL, 0,UCNV_ESCAPE_UNICODE,U_ZERO_ERROR ))
2157 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n");
2158
2159 }
2160 {
2161 static const UChar iso_2022_cn_inputText7[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2162 static const uint8_t to_iso_2022_cn7_v2[]={
2163 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2164 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
374ca955 2165 0x0e, 0x21, 0x22,
b75a7d8f 2166 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
374ca955 2167 0x42, 0x25, 0x55, 0x30, 0x39, 0x30, 0x32,
b75a7d8f
A
2168 };
2169 static const int32_t from_iso_2022_cnOffs7_v2 [] ={
2170 0, 0, 0, 0, 0, 0, 0,
2171 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
374ca955 2172 3, 3, 3,
b75a7d8f
A
2173 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2174 6,
374ca955 2175 7, 7, 7, 7, 7, 7,
b75a7d8f
A
2176 };
2177 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, sizeof(iso_2022_cn_inputText7)/sizeof(iso_2022_cn_inputText7[0]),
2178 to_iso_2022_cn7_v2, sizeof(to_iso_2022_cn7_v2), "iso-2022-cn",
2179 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs7_v2, NULL, 0,"K" ,U_ZERO_ERROR ))
2180 log_err("u-> iso-2022-cn with sub & K did not match.\n");
2181
46f4442e
A
2182 }
2183 {
2184 static const UChar iso_2022_cn_inputText8[]={
2185 0x3000,
2186 0xD84D, 0xDC56,
2187 0x3001,
2188 0xD84D, 0xDC56,
2189 0xDBFF, 0xDFFF,
2190 0x0042,
2191 0x0902};
2192 static const uint8_t to_iso_2022_cn8_v2[]={
2193 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2194 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20,
2195 0x0e, 0x21, 0x22,
2196 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20,
2197 0x5c, 0x31, 0x30, 0x46, 0x46, 0x46, 0x46, 0x20,
2198 0x42,
2199 0x5c, 0x39, 0x30, 0x32, 0x20
2200 };
2201 static const int32_t from_iso_2022_cnOffs8_v2 [] ={
2202 0, 0, 0, 0, 0, 0, 0,
2203 1, 1, 1, 1, 1, 1, 1, 1,
2204 3, 3, 3,
2205 4, 4, 4, 4, 4, 4, 4, 4,
2206 6, 6, 6, 6, 6, 6, 6, 6,
2207 8,
2208 9, 9, 9, 9, 9
2209 };
2210 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8, sizeof(iso_2022_cn_inputText8)/sizeof(iso_2022_cn_inputText8[0]),
2211 to_iso_2022_cn8_v2, sizeof(to_iso_2022_cn8_v2), "iso-2022-cn",
2212 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs8_v2, NULL, 0,UCNV_ESCAPE_CSS2,U_ZERO_ERROR ))
2213 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not match.\n");
2214
b75a7d8f
A
2215 }
2216 {
2217 static const uint8_t to_iso_2022_cn4_v3[]={
2218 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2219 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36,
374ca955 2220 0x0e, 0x21, 0x22,
b75a7d8f
A
2221 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36,
2222 0x42
2223 };
2224
2225
2226 static const int32_t from_iso_2022_cnOffs4_v3 [] ={
2227 0,0,0,0,0,0,0,
2228 1,1,1,1,1,1,1,1,1,1,1,
2229
374ca955 2230 3,3,3,
b75a7d8f
A
2231 4,4,4,4,4,4,4,4,4,4,4,
2232
2233 6
2234
2235 };
2236 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]),
2237 to_iso_2022_cn4_v3, sizeof(to_iso_2022_cn4_v3), "iso-2022-cn",
2238 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4_v3, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2239 {
2240 log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n");
2241 }
2242 }
2243 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]),
2244 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn",
2245 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 ))
2246 log_err("u-> iso_2022_cn with subst with value did not match.\n");
2247
b75a7d8f
A
2248 if(!testConvertFromUnicode(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]),
2249 to_iso_2022_cn4, sizeof(to_iso_2022_cn4), "iso-2022-cn",
2250 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 ))
2251 log_err("u-> iso_2022_cn with subst with value did not match.\n");
2252 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]),
2253 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr",
2254 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0 ))
2255 log_err("u-> iso_2022_kr with subst with value did not match.\n");
2256 if(!testConvertFromUnicode(iso_2022_kr_inputText2, sizeof(iso_2022_kr_inputText2)/sizeof(iso_2022_kr_inputText2[0]),
2257 to_iso_2022_kr2, sizeof(to_iso_2022_kr2), "iso-2022-kr",
2258 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs2, NULL, 0 ))
2259 log_err("u-> iso_2022_kr2 with subst with value did not match.\n");
2260 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]),
2261 to_hz, sizeof(to_hz), "HZ",
2262 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0 ))
2263 log_err("u-> hz with subst with value did not match.\n");
2264 if(!testConvertFromUnicode(hz_inputText2, sizeof(hz_inputText2)/sizeof(hz_inputText2[0]),
2265 to_hz2, sizeof(to_hz2), "HZ",
2266 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs2, NULL, 0 ))
2267 log_err("u-> hz with subst with value did not match.\n");
2268
2269 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]),
2270 to_iscii, sizeof(to_iscii), "ISCII,version=0",
2271 UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 ))
2272 log_err("u-> iscii with subst with value did not match.\n");
b75a7d8f 2273 }
73c04bcf 2274#endif
b75a7d8f
A
2275
2276 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
2277 /*to Unicode*/
2278 {
73c04bcf 2279#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
2280 static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf,
2281 0x81, 0xad, /*unassigned*/
2282 0x89, 0xd3 };
2283 static const UChar IBM_943toUnicode[] = { 0x0000, 0x6D63,
2284 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
2285 0x7B87};
2286 static const int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
2287
2288 /* EUC_JP*/
2289 static const uint8_t sampleTxt_EUC_JP[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
2290 0x8f, 0xda, 0xa1, /*unassigned*/
2291 0x8e, 0xe0,
2292 };
2293 static const UChar EUC_JPtoUnicode[]={ 0x0061, 0x4edd, 0x5bec,
2294 0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31,
2295 0x00a2 };
2296 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 3,
2297 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
2298 9,
2299 };
2300
2301 /*EUC_TW*/
2302 static const uint8_t sampleTxt_euc_tw[]={
2303 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
2304 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
2305 0xe6, 0xca, 0x8a,
2306 };
2307 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2,
2308 0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43,
2309 0x8706, 0x8a, };
2310 static const int32_t from_euc_twOffs [] ={ 0, 1, 3,
2311 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2312 11, 13};
2313
2314 /*iso-2022-jp*/
2315 static const uint8_t sampleTxt_iso_2022_jp[]={
2316 0x1b, 0x28, 0x42, 0x41,
2317 0x1b, 0x24, 0x42, 0x2A, 0x44, /*unassigned*/
2318 0x1b, 0x28, 0x42, 0x42,
2319
2320 };
2321 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x25,0x58,0x32,0x41,0x25,0x58,0x34,0x34, 0x42 };
2322 static const int32_t from_iso_2022_jpOffs [] ={ 3, 7, 7, 7, 7, 7, 7, 7, 7, 12 };
2323
2324 /*iso-2022-cn*/
2325 static const uint8_t sampleTxt_iso_2022_cn[]={
2326 0x0f, 0x41, 0x44,
2327 0x1B, 0x24, 0x29, 0x47,
2328 0x0E, 0x40, 0x6c, /*unassigned*/
2329 0x0f, 0x42,
2330
2331 };
2332 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 };
2333 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 8, 8, 8, 8, 8, 8, 8, 8, 11 };
2334
2335 /*iso-2022-kr*/
2336 static const uint8_t sampleTxt_iso_2022_kr[]={
2337 0x1b, 0x24, 0x29, 0x43,
2338 0x41,
2339 0x0E, 0x7f, 0x1E,
2340 0x0e, 0x25, 0x50,
2341 0x0f, 0x51,
2342 0x42, 0x43,
2343
2344 };
2345 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43};
2346 static const int32_t from_iso_2022_krOffs [] ={ 4, 6, 6, 6, 6, 6, 6, 6, 6, 9, 12, 13 , 14 };
2347
2348 /*hz*/
2349 static const uint8_t sampleTxt_hz[]={
2350 0x41,
2351 0x7e, 0x7b, 0x26, 0x30,
2352 0x7f, 0x1E, /*unassigned*/
2353 0x26, 0x30,
2354 0x7e, 0x7d, 0x42,
2355 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/
2356 0x7e, 0x7d, 0x42,
2357 };
2358 static const UChar hztoUnicode[]={
2359 0x41,
2360 0x03a0,
2361 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2362 0x03A0,
2363 0x42,
2364 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2365 0x42,};
2366
2367 static const int32_t from_hzOffs [] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18, };
2368
2369
2370 /*iscii*/
2371 static const uint8_t sampleTxt_iscii[]={
2372 0x41,
2373 0x30,
2374 0xEB, /*unassigned*/
2375 0xa3,
2376 0x42,
2377 0xEC, /*unassigned*/
2378 0x42,
2379 };
2380 static const UChar isciitoUnicode[]={
2381 0x41,
2382 0x30,
2383 0x25, 0x58, 0x45, 0x42,
2384 0x0903,
2385 0x42,
2386 0x25, 0x58, 0x45, 0x43,
2387 0x42,};
2388
2389 static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6 };
73c04bcf 2390#endif
b75a7d8f 2391
b75a7d8f
A
2392 /*UTF8*/
2393 static const uint8_t sampleTxtUTF8[]={
2394 0x20, 0x64, 0x50,
2395 0xC2, 0x7E, /* truncated char */
2396 0x20,
2397 0xE0, 0xB5, 0x7E, /* truncated char */
2398 0x40,
2399 };
2400 static const UChar UTF8ToUnicode[]={
2401 0x0020, 0x0064, 0x0050,
2402 0x0025, 0x0058, 0x0043, 0x0032, 0x007E, /* \xC2~ */
2403 0x0020,
2404 0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E,
2405 0x0040
2406 };
2407 static const int32_t fromUTF8[] = {
2408 0, 1, 2,
2409 3, 3, 3, 3, 4,
2410 5,
2411 6, 6, 6, 6, 6, 6, 6, 6, 8,
2412 9
2413 };
2414 static const UChar UTF8ToUnicodeXML_DEC[]={
2415 0x0020, 0x0064, 0x0050,
2416 0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E, /* &#194;~ */
2417 0x0020,
2418 0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E,
2419 0x0040
2420 };
2421 static const int32_t fromUTF8XML_DEC[] = {
2422 0, 1, 2,
2423 3, 3, 3, 3, 3, 3, 4,
2424 5,
2425 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8,
2426 9
2427 };
2428
73c04bcf
A
2429
2430#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
2431 if(!testConvertToUnicode(sampleTxtToU, sizeof(sampleTxtToU),
2432 IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnicode[0]),"ibm-943",
2433 UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 ))
2434 log_err("ibm-943->u with substitute with value did not match.\n");
2435
2436 if(!testConvertToUnicode(sampleTxt_EUC_JP, sizeof(sampleTxt_EUC_JP),
2437 EUC_JPtoUnicode, sizeof(EUC_JPtoUnicode)/sizeof(EUC_JPtoUnicode[0]),"euc-jp",
2438 UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0))
2439 log_err("euc-jp->u with substitute with value did not match.\n");
2440
2441 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
2442 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
2443 UCNV_TO_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0))
2444 log_err("euc-tw->u with substitute with value did not match.\n");
2445
2446 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2447 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2448 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0))
2449 log_err("iso-2022-jp->u with substitute with value did not match.\n");
2450
2451 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2452 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2453 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0,"K",U_ZERO_ERROR))
2454 log_err("iso-2022-jp->u with substitute with value did not match.\n");
2455
2456 {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */
2457 {
2458 static const UChar iso_2022_jptoUnicodeDec[]={
2459 0x0041,
2460 0x0026, 0x0023, 0x0034, 0x0032, 0x003b,
2461 0x0026, 0x0023, 0x0036, 0x0038, 0x003b,
2462 0x0042 };
2463 static const int32_t from_iso_2022_jpOffsDec [] ={ 3,7,7,7,7,7,7,7,7,7,7,12, };
2464 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2465 iso_2022_jptoUnicodeDec, sizeof(iso_2022_jptoUnicodeDec)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2466 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsDec, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2467 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n");
2468 }
2469 {
2470 static const UChar iso_2022_jptoUnicodeHex[]={
2471 0x0041,
2472 0x0026, 0x0023, 0x0078, 0x0032, 0x0041, 0x003b,
2473 0x0026, 0x0023, 0x0078, 0x0034, 0x0034, 0x003b,
2474 0x0042 };
2475 static const int32_t from_iso_2022_jpOffsHex [] ={ 3,7,7,7,7,7,7,7,7,7,7,7,7,12 };
2476 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2477 iso_2022_jptoUnicodeHex, sizeof(iso_2022_jptoUnicodeHex)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2478 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsHex, NULL, 0,UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR ))
2479 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n");
2480 }
2481 {
2482 static const UChar iso_2022_jptoUnicodeC[]={
2483 0x0041,
2484 0x005C, 0x0078, 0x0032, 0x0041,
2485 0x005C, 0x0078, 0x0034, 0x0034,
2486 0x0042 };
2487 int32_t from_iso_2022_jpOffsC [] ={ 3,7,7,7,7,7,7,7,7,12 };
2488 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2489 iso_2022_jptoUnicodeC, sizeof(iso_2022_jptoUnicodeC)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2490 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsC, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2491 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n");
2492 }
2493 }
2494 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn),
2495 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn",
2496 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0))
2497 log_err("iso-2022-cn->u with substitute with value did not match.\n");
2498
2499 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr),
2500 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr",
2501 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0))
2502 log_err("iso-2022-kr->u with substitute with value did not match.\n");
2503
2504 if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz),
2505 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ",
2506 UCNV_TO_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0))
2507 log_err("hz->u with substitute with value did not match.\n");
2508
2509 if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii),
2510 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0",
2511 UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0))
2512 log_err("ISCII ->u with substitute with value did not match.\n");
73c04bcf
A
2513#endif
2514
b75a7d8f
A
2515 if(!testConvertToUnicode(sampleTxtUTF8, sizeof(sampleTxtUTF8),
2516 UTF8ToUnicode, sizeof(UTF8ToUnicode)/sizeof(UTF8ToUnicode[0]),"UTF-8",
2517 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0))
2518 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2519 if(!testConvertToUnicodeWithContext(sampleTxtUTF8, sizeof(sampleTxtUTF8),
2520 UTF8ToUnicodeXML_DEC, sizeof(UTF8ToUnicodeXML_DEC)/sizeof(UTF8ToUnicodeXML_DEC[0]),"UTF-8",
2521 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8XML_DEC, NULL, 0, UCNV_ESCAPE_XML_DEC, U_ZERO_ERROR))
2522 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2523 }
2524}
2525
73c04bcf 2526#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
2527static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize)
2528{
2529 static const UChar legalText[] = { 0x0000, 0xAC00, 0xAC01, 0xD700 };
2530 static const uint8_t templegal949[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
2531 static const int32_t to949legal[] = {0, 1, 1, 2, 2, 3, 3};
2532
2533
2534 static const uint8_t text943[] = {
fd0068a8
A
2535 0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a };
2536 static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22, 0x5b57 };
2537 static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22, 0x5b57 };
b75a7d8f
A
2538 static const UChar toUnicode943stop[]= { 0x304b};
2539
fd0068a8
A
2540 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 7 };
2541 static const int32_t fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 };
b75a7d8f
A
2542 static const int32_t fromIBM943Offsstop[] = { 0};
2543
2544 gInBufferSize = inputsize;
2545 gOutBufferSize = outputsize;
2546 /*checking with a legal value*/
2547 if(!testConvertFromUnicode(legalText, sizeof(legalText)/sizeof(legalText[0]),
2548 templegal949, sizeof(templegal949), "ibm-949",
2549 UCNV_FROM_U_CALLBACK_SKIP, to949legal, NULL, 0 ))
2550 log_err("u-> ibm-949 with skip did not match.\n");
2551
2552 /*checking illegal value for ibm-943 with substitute*/
2553 if(!testConvertToUnicode(text943, sizeof(text943),
2554 toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943",
2555 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2556 log_err("ibm-943->u with subst did not match.\n");
2557 /*checking illegal value for ibm-943 with skip */
2558 if(!testConvertToUnicode(text943, sizeof(text943),
2559 toUnicode943skip, sizeof(toUnicode943skip)/sizeof(toUnicode943skip[0]),"ibm-943",
2560 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offsskip, NULL, 0 ))
2561 log_err("ibm-943->u with skip did not match.\n");
2562
2563 /*checking illegal value for ibm-943 with stop */
2564 if(!testConvertToUnicode(text943, sizeof(text943),
2565 toUnicode943stop, sizeof(toUnicode943stop)/sizeof(toUnicode943stop[0]),"ibm-943",
2566 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offsstop, NULL, 0 ))
2567 log_err("ibm-943->u with stop did not match.\n");
2568
2569}
2570
2571static void TestSingleByte(int32_t inputsize, int32_t outputsize)
2572{
2573 static const uint8_t sampleText[] = {
2574 0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82,
fd0068a8
A
2575 0xff, 0x32, 0x33};
2576 static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 };
2577 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 };
b75a7d8f
A
2578 /*checking illegal value for ibm-943 with substitute*/
2579 gInBufferSize = inputsize;
2580 gOutBufferSize = outputsize;
2581
2582 if(!testConvertToUnicode(sampleText, sizeof(sampleText),
2583 toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943",
2584 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2585 log_err("ibm-943->u with subst did not match.\n");
2586}
2587
2588static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize)
2589{
2590 /*EBCDIC_STATEFUL*/
2591 static const UChar ebcdic_inputTest[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 };
2592 static const uint8_t toIBM930[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 };
2593 static const int32_t offset_930[]= { 0, 1, 1, 1, 2, 2, 3, 4, 4, 4, 5, 5 };
2594/* s SO doubl SI sng s SO fe fe SI s */
2595
2596 /*EBCDIC_STATEFUL with subChar=3f*/
2597 static const uint8_t toIBM930_subvaried[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 };
2598 static const int32_t offset_930_subvaried[]= { 0, 1, 1, 1, 2, 2, 3, 4, 5 };
2599 static const char mySubChar[]={ 0x3f};
2600
2601 gInBufferSize = inputsize;
2602 gOutBufferSize = outputsize;
2603
2604 if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]),
2605 toIBM930, sizeof(toIBM930), "ibm-930",
2606 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930, NULL, 0 ))
2607 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n");
2608
2609 if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]),
2610 toIBM930_subvaried, sizeof(toIBM930_subvaried), "ibm-930",
2611 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 ))
2612 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n");
2613}
73c04bcf 2614#endif
b75a7d8f
A
2615
2616UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
2617 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
2618 const char *mySubChar, int8_t len)
2619{
2620
2621
2622 UErrorCode status = U_ZERO_ERROR;
2623 UConverter *conv = 0;
73c04bcf 2624 char junkout[NEW_MAX_BUFFER]; /* FIX */
b75a7d8f
A
2625 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2626 const UChar *src;
73c04bcf
A
2627 char *end;
2628 char *targ;
b75a7d8f
A
2629 int32_t *offs;
2630 int i;
2631 int32_t realBufferSize;
73c04bcf 2632 char *realBufferEnd;
b75a7d8f
A
2633 const UChar *realSourceEnd;
2634 const UChar *sourceLimit;
2635 UBool checkOffsets = TRUE;
2636 UBool doFlush;
2637 char junk[9999];
2638 char offset_str[9999];
73c04bcf 2639 char *p;
b75a7d8f
A
2640 UConverterFromUCallback oldAction = NULL;
2641 const void* oldContext = NULL;
2642
2643
2644 for(i=0;i<NEW_MAX_BUFFER;i++)
73c04bcf 2645 junkout[i] = (char)0xF0;
b75a7d8f
A
2646 for(i=0;i<NEW_MAX_BUFFER;i++)
2647 junokout[i] = 0xFF;
2648 setNuConvTestName(codepage, "FROM");
2649
2650 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize,
2651 gOutBufferSize);
2652
2653 conv = ucnv_open(codepage, &status);
2654 if(U_FAILURE(status))
2655 {
2656 log_data_err("Couldn't open converter %s\n",codepage);
2657 return TRUE;
2658 }
2659
2660 log_verbose("Converter opened..\n");
2661
2662 /*----setting the callback routine----*/
2663 ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2664 if (U_FAILURE(status))
2665 {
2666 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2667 }
2668 /*------------------------*/
2669 /*setting the subChar*/
2670 if(mySubChar != NULL){
2671 ucnv_setSubstChars(conv, mySubChar, len, &status);
2672 if (U_FAILURE(status)) {
2673 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2674 }
2675 }
2676 /*------------*/
2677
2678 src = source;
2679 targ = junkout;
2680 offs = junokout;
2681
2682 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
2683 realBufferEnd = junkout + realBufferSize;
2684 realSourceEnd = source + sourceLen;
2685
2686 if ( gOutBufferSize != realBufferSize )
2687 checkOffsets = FALSE;
2688
2689 if( gInBufferSize != NEW_MAX_BUFFER )
2690 checkOffsets = FALSE;
2691
2692 do
2693 {
2694 end = nct_min(targ + gOutBufferSize, realBufferEnd);
2695 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
2696
2697 doFlush = (UBool)(sourceLimit == realSourceEnd);
2698
2699 if(targ == realBufferEnd)
2700 {
2701 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
2702 return FALSE;
2703 }
2704 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
2705
2706
2707 status = U_ZERO_ERROR;
2708
2709 ucnv_fromUnicode (conv,
2710 (char **)&targ,
2711 (const char *)end,
2712 &src,
2713 sourceLimit,
2714 checkOffsets ? offs : NULL,
2715 doFlush, /* flush if we're at the end of the input data */
2716 &status);
2717 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
2718
2719
2720 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2721 UChar errChars[50]; /* should be sufficient */
2722 int8_t errLen = 50;
2723 UErrorCode err = U_ZERO_ERROR;
2724 const UChar* limit= NULL;
2725 const UChar* start= NULL;
2726 ucnv_getInvalidUChars(conv,errChars, &errLen, &err);
2727 if(U_FAILURE(err)){
2728 log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err));
2729 }
2730 /* src points to limit of invalid chars */
2731 limit = src;
2732 /* length of in invalid chars should be equal to returned length*/
2733 start = src - errLen;
2734 if(u_strncmp(errChars,start,errLen)!=0){
2735 log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2736 }
2737 }
2738 /* allow failure codes for the stop callback */
2739 if(U_FAILURE(status) &&
2740 (callback != UCNV_FROM_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND)))
2741 {
2742 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2743 return FALSE;
2744 }
2745
2746 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
2747 sourceLen, targ-junkout);
729e4ab9 2748 if(getTestOption(VERBOSITY_OPTION))
b75a7d8f
A
2749 {
2750
2751 junk[0] = 0;
2752 offset_str[0] = 0;
2753 for(p = junkout;p<targ;p++)
2754 {
2755 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
2756 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
2757 }
2758
2759 log_verbose(junk);
2760 printSeq(expect, expectLen);
2761 if ( checkOffsets )
2762 {
2763 log_verbose("\nOffsets:");
2764 log_verbose(offset_str);
2765 }
2766 log_verbose("\n");
2767 }
2768 ucnv_close(conv);
2769
2770
2771 if(expectLen != targ-junkout)
2772 {
2773 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2774 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
73c04bcf 2775 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
b75a7d8f
A
2776 printSeqErr(expect, expectLen);
2777 return FALSE;
2778 }
2779
2780 if (checkOffsets && (expectOffsets != 0) )
2781 {
2782 log_verbose("comparing %d offsets..\n", targ-junkout);
2783 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
2784 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2785 log_err("Got Output : ");
73c04bcf 2786 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
b75a7d8f
A
2787 log_err("Got Offsets: ");
2788 for(p=junkout;p<targ;p++)
2789 log_err("%d,", junokout[p-junkout]);
2790 log_err("\n");
2791 log_err("Expected Offsets: ");
2792 for(i=0; i<(targ-junkout); i++)
2793 log_err("%d,", expectOffsets[i]);
2794 log_err("\n");
2795 return FALSE;
2796 }
2797 }
2798
2799 if(!memcmp(junkout, expect, expectLen))
2800 {
2801 log_verbose("String matches! %s\n", gNuConvTestName);
2802 return TRUE;
2803 }
2804 else
2805 {
2806 log_err("String does not match. %s\n", gNuConvTestName);
2807 log_err("source: ");
2808 printUSeqErr(source, sourceLen);
2809 log_err("Got: ");
73c04bcf 2810 printSeqErr((const uint8_t *)junkout, expectLen);
b75a7d8f
A
2811 log_err("Expected: ");
2812 printSeqErr(expect, expectLen);
2813 return FALSE;
2814 }
2815}
2816
2817UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
2818 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
2819 const char *mySubChar, int8_t len)
2820{
2821 UErrorCode status = U_ZERO_ERROR;
2822 UConverter *conv = 0;
2823 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
2824 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
73c04bcf
A
2825 const char *src;
2826 const char *realSourceEnd;
2827 const char *srcLimit;
b75a7d8f
A
2828 UChar *targ;
2829 UChar *end;
2830 int32_t *offs;
2831 int i;
2832 UBool checkOffsets = TRUE;
2833 char junk[9999];
2834 char offset_str[9999];
2835 UChar *p;
2836 UConverterToUCallback oldAction = NULL;
2837 const void* oldContext = NULL;
2838
2839 int32_t realBufferSize;
2840 UChar *realBufferEnd;
2841
2842
2843 for(i=0;i<NEW_MAX_BUFFER;i++)
2844 junkout[i] = 0xFFFE;
2845
2846 for(i=0;i<NEW_MAX_BUFFER;i++)
2847 junokout[i] = -1;
2848
2849 setNuConvTestName(codepage, "TO");
2850
2851 log_verbose("\n========= %s\n", gNuConvTestName);
2852
2853 conv = ucnv_open(codepage, &status);
2854 if(U_FAILURE(status))
2855 {
2856 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
2857 return TRUE;
2858 }
2859
2860 log_verbose("Converter opened..\n");
2861
73c04bcf 2862 src = (const char *)source;
b75a7d8f
A
2863 targ = junkout;
2864 offs = junokout;
2865
2866 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
2867 realBufferEnd = junkout + realBufferSize;
2868 realSourceEnd = src + sourcelen;
2869 /*----setting the callback routine----*/
2870 ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2871 if (U_FAILURE(status))
2872 {
2873 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2874 }
2875 /*-------------------------------------*/
2876 /*setting the subChar*/
2877 if(mySubChar != NULL){
2878 ucnv_setSubstChars(conv, mySubChar, len, &status);
2879 if (U_FAILURE(status)) {
2880 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2881 }
2882 }
2883 /*------------*/
2884
2885
2886 if ( gOutBufferSize != realBufferSize )
2887 checkOffsets = FALSE;
2888
2889 if( gInBufferSize != NEW_MAX_BUFFER )
2890 checkOffsets = FALSE;
2891
2892 do
2893 {
2894 end = nct_min( targ + gOutBufferSize, realBufferEnd);
2895 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
2896
2897 if(targ == realBufferEnd)
2898 {
2899 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
2900 return FALSE;
2901 }
2902 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
2903
2904
2905
2906 status = U_ZERO_ERROR;
2907
2908 ucnv_toUnicode (conv,
2909 &targ,
2910 end,
2911 (const char **)&src,
2912 (const char *)srcLimit,
2913 checkOffsets ? offs : NULL,
2914 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
2915 &status);
2916 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
2917
2918 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2919 char errChars[50]; /* should be sufficient */
2920 int8_t errLen = 50;
2921 UErrorCode err = U_ZERO_ERROR;
73c04bcf
A
2922 const char* limit= NULL;
2923 const char* start= NULL;
b75a7d8f
A
2924 ucnv_getInvalidChars(conv,errChars, &errLen, &err);
2925 if(U_FAILURE(err)){
2926 log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err));
2927 }
2928 /* src points to limit of invalid chars */
2929 limit = src;
2930 /* length of in invalid chars should be equal to returned length*/
2931 start = src - errLen;
73c04bcf 2932 if(uprv_strncmp(errChars,start,errLen)!=0){
b75a7d8f
A
2933 log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2934 }
2935 }
2936 /* allow failure codes for the stop callback */
2937 if(U_FAILURE(status) &&
2938 (callback != UCNV_TO_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND && status != U_TRUNCATED_CHAR_FOUND)))
2939 {
2940 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2941 return FALSE;
2942 }
2943
2944 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
2945 sourcelen, targ-junkout);
729e4ab9 2946 if(getTestOption(VERBOSITY_OPTION))
b75a7d8f
A
2947 {
2948
2949 junk[0] = 0;
2950 offset_str[0] = 0;
2951
2952 for(p = junkout;p<targ;p++)
2953 {
2954 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
2955 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
2956 }
2957
2958 log_verbose(junk);
2959 printUSeq(expect, expectlen);
2960 if ( checkOffsets )
2961 {
2962 log_verbose("\nOffsets:");
2963 log_verbose(offset_str);
2964 }
2965 log_verbose("\n");
2966 }
2967 ucnv_close(conv);
2968
2969 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
2970
2971 if (checkOffsets && (expectOffsets != 0))
2972 {
2973 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
2974 {
2975 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2976 log_err("Got offsets: ");
2977 for(p=junkout;p<targ;p++)
2978 log_err(" %2d,", junokout[p-junkout]);
2979 log_err("\n");
2980 log_err("Expected offsets: ");
2981 for(i=0; i<(targ-junkout); i++)
2982 log_err(" %2d,", expectOffsets[i]);
2983 log_err("\n");
2984 log_err("Got output: ");
2985 for(i=0; i<(targ-junkout); i++)
2986 log_err("0x%04x,", junkout[i]);
2987 log_err("\n");
2988 log_err("From source: ");
73c04bcf 2989 for(i=0; i<(src-(const char *)source); i++)
b75a7d8f
A
2990 log_err(" 0x%02x,", (unsigned char)source[i]);
2991 log_err("\n");
2992 }
2993 }
2994
2995 if(!memcmp(junkout, expect, expectlen*2))
2996 {
2997 log_verbose("Matches!\n");
2998 return TRUE;
2999 }
3000 else
3001 {
3002 log_err("String does not match. %s\n", gNuConvTestName);
3003 log_verbose("String does not match. %s\n", gNuConvTestName);
3004 log_err("Got: ");
3005 printUSeqErr(junkout, expectlen);
3006 log_err("Expected: ");
3007 printUSeqErr(expect, expectlen);
3008 log_err("\n");
3009 return FALSE;
3010 }
3011}
3012
3013UBool testConvertFromUnicodeWithContext(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
3014 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
3015 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3016{
3017
3018
3019 UErrorCode status = U_ZERO_ERROR;
3020 UConverter *conv = 0;
73c04bcf 3021 char junkout[NEW_MAX_BUFFER]; /* FIX */
b75a7d8f
A
3022 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
3023 const UChar *src;
73c04bcf
A
3024 char *end;
3025 char *targ;
b75a7d8f
A
3026 int32_t *offs;
3027 int i;
3028 int32_t realBufferSize;
73c04bcf 3029 char *realBufferEnd;
b75a7d8f
A
3030 const UChar *realSourceEnd;
3031 const UChar *sourceLimit;
3032 UBool checkOffsets = TRUE;
3033 UBool doFlush;
3034 char junk[9999];
3035 char offset_str[9999];
73c04bcf 3036 char *p;
b75a7d8f
A
3037 UConverterFromUCallback oldAction = NULL;
3038 const void* oldContext = NULL;
3039
3040
3041 for(i=0;i<NEW_MAX_BUFFER;i++)
73c04bcf 3042 junkout[i] = (char)0xF0;
b75a7d8f
A
3043 for(i=0;i<NEW_MAX_BUFFER;i++)
3044 junokout[i] = 0xFF;
3045 setNuConvTestName(codepage, "FROM");
3046
3047 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize,
3048 gOutBufferSize);
3049
3050 conv = ucnv_open(codepage, &status);
3051 if(U_FAILURE(status))
3052 {
3053 log_data_err("Couldn't open converter %s\n",codepage);
3054 return TRUE; /* Because the err has already been logged. */
3055 }
3056
3057 log_verbose("Converter opened..\n");
3058
3059 /*----setting the callback routine----*/
3060 ucnv_setFromUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3061 if (U_FAILURE(status))
3062 {
3063 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3064 }
3065 /*------------------------*/
3066 /*setting the subChar*/
3067 if(mySubChar != NULL){
3068 ucnv_setSubstChars(conv, mySubChar, len, &status);
3069 if (U_FAILURE(status)) {
3070 log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status));
3071 }
3072 }
3073 /*------------*/
3074
3075 src = source;
3076 targ = junkout;
3077 offs = junokout;
3078
3079 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
3080 realBufferEnd = junkout + realBufferSize;
3081 realSourceEnd = source + sourceLen;
3082
3083 if ( gOutBufferSize != realBufferSize )
3084 checkOffsets = FALSE;
3085
3086 if( gInBufferSize != NEW_MAX_BUFFER )
3087 checkOffsets = FALSE;
3088
3089 do
3090 {
3091 end = nct_min(targ + gOutBufferSize, realBufferEnd);
3092 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
3093
3094 doFlush = (UBool)(sourceLimit == realSourceEnd);
3095
3096 if(targ == realBufferEnd)
3097 {
3098 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
3099 return FALSE;
3100 }
3101 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
3102
3103
3104 status = U_ZERO_ERROR;
3105
3106 ucnv_fromUnicode (conv,
3107 (char **)&targ,
3108 (const char *)end,
3109 &src,
3110 sourceLimit,
3111 checkOffsets ? offs : NULL,
3112 doFlush, /* flush if we're at the end of the input data */
3113 &status);
3114 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
3115
3116 /* allow failure codes for the stop callback */
3117 if(U_FAILURE(status) && status != expectedError)
3118 {
3119 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3120 return FALSE;
3121 }
3122
3123 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
3124 sourceLen, targ-junkout);
729e4ab9 3125 if(getTestOption(VERBOSITY_OPTION))
b75a7d8f
A
3126 {
3127
3128 junk[0] = 0;
3129 offset_str[0] = 0;
3130 for(p = junkout;p<targ;p++)
3131 {
3132 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
3133 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
3134 }
3135
3136 log_verbose(junk);
3137 printSeq(expect, expectLen);
3138 if ( checkOffsets )
3139 {
3140 log_verbose("\nOffsets:");
3141 log_verbose(offset_str);
3142 }
3143 log_verbose("\n");
3144 }
3145 ucnv_close(conv);
3146
3147
3148 if(expectLen != targ-junkout)
3149 {
3150 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3151 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
73c04bcf 3152 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
b75a7d8f
A
3153 printSeqErr(expect, expectLen);
3154 return FALSE;
3155 }
3156
3157 if (checkOffsets && (expectOffsets != 0) )
3158 {
3159 log_verbose("comparing %d offsets..\n", targ-junkout);
3160 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
3161 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3162 log_err("Got Output : ");
73c04bcf 3163 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
b75a7d8f
A
3164 log_err("Got Offsets: ");
3165 for(p=junkout;p<targ;p++)
3166 log_err("%d,", junokout[p-junkout]);
3167 log_err("\n");
3168 log_err("Expected Offsets: ");
3169 for(i=0; i<(targ-junkout); i++)
3170 log_err("%d,", expectOffsets[i]);
3171 log_err("\n");
3172 return FALSE;
3173 }
3174 }
3175
3176 if(!memcmp(junkout, expect, expectLen))
3177 {
3178 log_verbose("String matches! %s\n", gNuConvTestName);
3179 return TRUE;
3180 }
3181 else
3182 {
3183 log_err("String does not match. %s\n", gNuConvTestName);
3184 log_err("source: ");
3185 printUSeqErr(source, sourceLen);
3186 log_err("Got: ");
73c04bcf 3187 printSeqErr((const uint8_t *)junkout, expectLen);
b75a7d8f
A
3188 log_err("Expected: ");
3189 printSeqErr(expect, expectLen);
3190 return FALSE;
3191 }
3192}
3193UBool testConvertToUnicodeWithContext( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
3194 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
3195 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3196{
3197 UErrorCode status = U_ZERO_ERROR;
3198 UConverter *conv = 0;
3199 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
3200 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
73c04bcf
A
3201 const char *src;
3202 const char *realSourceEnd;
3203 const char *srcLimit;
b75a7d8f
A
3204 UChar *targ;
3205 UChar *end;
3206 int32_t *offs;
3207 int i;
3208 UBool checkOffsets = TRUE;
3209 char junk[9999];
3210 char offset_str[9999];
3211 UChar *p;
3212 UConverterToUCallback oldAction = NULL;
3213 const void* oldContext = NULL;
3214
3215 int32_t realBufferSize;
3216 UChar *realBufferEnd;
3217
3218
3219 for(i=0;i<NEW_MAX_BUFFER;i++)
3220 junkout[i] = 0xFFFE;
3221
3222 for(i=0;i<NEW_MAX_BUFFER;i++)
3223 junokout[i] = -1;
3224
3225 setNuConvTestName(codepage, "TO");
3226
3227 log_verbose("\n========= %s\n", gNuConvTestName);
3228
3229 conv = ucnv_open(codepage, &status);
3230 if(U_FAILURE(status))
3231 {
3232 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
3233 return TRUE;
3234 }
3235
3236 log_verbose("Converter opened..\n");
3237
73c04bcf 3238 src = (const char *)source;
b75a7d8f
A
3239 targ = junkout;
3240 offs = junokout;
3241
3242 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
3243 realBufferEnd = junkout + realBufferSize;
3244 realSourceEnd = src + sourcelen;
3245 /*----setting the callback routine----*/
3246 ucnv_setToUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3247 if (U_FAILURE(status))
3248 {
3249 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3250 }
3251 /*-------------------------------------*/
3252 /*setting the subChar*/
3253 if(mySubChar != NULL){
3254 ucnv_setSubstChars(conv, mySubChar, len, &status);
3255 if (U_FAILURE(status)) {
3256 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3257 }
3258 }
3259 /*------------*/
3260
3261
3262 if ( gOutBufferSize != realBufferSize )
3263 checkOffsets = FALSE;
3264
3265 if( gInBufferSize != NEW_MAX_BUFFER )
3266 checkOffsets = FALSE;
3267
3268 do
3269 {
3270 end = nct_min( targ + gOutBufferSize, realBufferEnd);
3271 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
3272
3273 if(targ == realBufferEnd)
3274 {
3275 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
3276 return FALSE;
3277 }
3278 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
3279
3280
3281
3282 status = U_ZERO_ERROR;
3283
3284 ucnv_toUnicode (conv,
3285 &targ,
3286 end,
3287 (const char **)&src,
3288 (const char *)srcLimit,
3289 checkOffsets ? offs : NULL,
3290 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
3291 &status);
3292 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
3293
3294 /* allow failure codes for the stop callback */
3295 if(U_FAILURE(status) && status!=expectedError)
3296 {
3297 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3298 return FALSE;
3299 }
3300
3301 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
3302 sourcelen, targ-junkout);
729e4ab9 3303 if(getTestOption(VERBOSITY_OPTION))
b75a7d8f
A
3304 {
3305
3306 junk[0] = 0;
3307 offset_str[0] = 0;
3308
3309 for(p = junkout;p<targ;p++)
3310 {
3311 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
3312 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
3313 }
3314
3315 log_verbose(junk);
3316 printUSeq(expect, expectlen);
3317 if ( checkOffsets )
3318 {
3319 log_verbose("\nOffsets:");
3320 log_verbose(offset_str);
3321 }
3322 log_verbose("\n");
3323 }
3324 ucnv_close(conv);
3325
3326 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
3327
3328 if (checkOffsets && (expectOffsets != 0))
3329 {
3330 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
3331 {
3332 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3333 log_err("Got offsets: ");
3334 for(p=junkout;p<targ;p++)
3335 log_err(" %2d,", junokout[p-junkout]);
3336 log_err("\n");
3337 log_err("Expected offsets: ");
3338 for(i=0; i<(targ-junkout); i++)
3339 log_err(" %2d,", expectOffsets[i]);
3340 log_err("\n");
3341 log_err("Got output: ");
3342 for(i=0; i<(targ-junkout); i++)
3343 log_err("0x%04x,", junkout[i]);
3344 log_err("\n");
3345 log_err("From source: ");
73c04bcf 3346 for(i=0; i<(src-(const char *)source); i++)
b75a7d8f
A
3347 log_err(" 0x%02x,", (unsigned char)source[i]);
3348 log_err("\n");
3349 }
3350 }
3351
3352 if(!memcmp(junkout, expect, expectlen*2))
3353 {
3354 log_verbose("Matches!\n");
3355 return TRUE;
3356 }
3357 else
3358 {
3359 log_err("String does not match. %s\n", gNuConvTestName);
3360 log_verbose("String does not match. %s\n", gNuConvTestName);
3361 log_err("Got: ");
3362 printUSeqErr(junkout, expectlen);
3363 log_err("Expected: ");
3364 printUSeqErr(expect, expectlen);
3365 log_err("\n");
3366 return FALSE;
3367 }
3368}
73c04bcf
A
3369
3370static void TestCallBackFailure(void) {
3371 UErrorCode status = U_USELESS_COLLATOR_ERROR;
3372 ucnv_cbFromUWriteBytes(NULL, NULL, -1, -1, &status);
3373 if (status != U_USELESS_COLLATOR_ERROR) {
3374 log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad UErrorCode\n");
3375 }
3376 ucnv_cbFromUWriteUChars(NULL, NULL, NULL, -1, &status);
3377 if (status != U_USELESS_COLLATOR_ERROR) {
3378 log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad UErrorCode\n");
3379 }
3380 ucnv_cbFromUWriteSub(NULL, -1, &status);
3381 if (status != U_USELESS_COLLATOR_ERROR) {
3382 log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UErrorCode\n");
3383 }
3384 ucnv_cbToUWriteUChars(NULL, NULL, -1, -1, &status);
3385 if (status != U_USELESS_COLLATOR_ERROR) {
3386 log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad UErrorCode\n");
3387 }
3388}