]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/nccbtst.c
ICU-400.39.tar.gz
[apple/icu.git] / icuSources / test / cintltst / nccbtst.c
CommitLineData
b75a7d8f
A
1/********************************************************************
2 * COPYRIGHT:
46f4442e 3 * Copyright (c) 1997-2008, International Business Machines Corporation and
b75a7d8f
A
4 * others. All Rights Reserved.
5 ********************************************************************/
6/*
73c04bcf 7********************************************************************************
b75a7d8f
A
8* File NCCBTST.C
9*
10* Modification History:
11* Name Description
12* Madhu Katragadda 7/21/1999 Testing error callback routines
73c04bcf 13********************************************************************************
b75a7d8f
A
14*/
15#include <stdio.h>
16#include <stdlib.h>
17#include <string.h>
18#include <ctype.h>
19#include "cstring.h"
20#include "unicode/uloc.h"
21#include "unicode/ucnv.h"
22#include "unicode/ucnv_err.h"
23#include "cintltst.h"
24#include "unicode/utypes.h"
25#include "unicode/ustring.h"
26#include "nccbtst.h"
73c04bcf 27#include "unicode/ucnv_cb.h"
b75a7d8f
A
28#define NEW_MAX_BUFFER 999
29
30#define nct_min(x,y) ((x<y) ? x : y)
31#define ARRAY_LENGTH(array) (sizeof(array)/sizeof((array)[0]))
32
33static int32_t gInBufferSize = 0;
34static int32_t gOutBufferSize = 0;
35static char gNuConvTestName[1024];
36
37static void printSeq(const uint8_t* a, int len)
38{
39 int i=0;
40 log_verbose("\n{");
41 while (i<len)
42 log_verbose("0x%02X, ", a[i++]);
43 log_verbose("}\n");
44}
45
46static void printUSeq(const UChar* a, int len)
47{
48 int i=0;
49 log_verbose("{");
50 while (i<len)
51 log_verbose(" 0x%04x, ", a[i++]);
52 log_verbose("}\n");
53}
54
55static void printSeqErr(const uint8_t* a, int len)
56{
57 int i=0;
58 fprintf(stderr, "{");
59 while (i<len)
60 fprintf(stderr, " 0x%02x, ", a[i++]);
61 fprintf(stderr, "}\n");
62}
63
64static void printUSeqErr(const UChar* a, int len)
65{
66 int i=0;
67 fprintf(stderr, "{");
68 while (i<len)
69 fprintf(stderr, "0x%04x, ", a[i++]);
70 fprintf(stderr,"}\n");
71}
72
73static void setNuConvTestName(const char *codepage, const char *direction)
74{
75 sprintf(gNuConvTestName, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
76 codepage,
77 direction,
374ca955
A
78 (int)gInBufferSize,
79 (int)gOutBufferSize);
b75a7d8f
A
80}
81
82
73c04bcf
A
83static void TestCallBackFailure(void);
84
b75a7d8f
A
85void addTestConvertErrorCallBack(TestNode** root);
86
87void addTestConvertErrorCallBack(TestNode** root)
88{
89 addTest(root, &TestSkipCallBack, "tsconv/nccbtst/TestSkipCallBack");
90 addTest(root, &TestStopCallBack, "tsconv/nccbtst/TestStopCallBack");
91 addTest(root, &TestSubCallBack, "tsconv/nccbtst/TestSubCallBack");
92 addTest(root, &TestSubWithValueCallBack, "tsconv/nccbtst/TestSubWithValueCallBack");
73c04bcf
A
93
94#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
95 addTest(root, &TestLegalAndOtherCallBack, "tsconv/nccbtst/TestLegalAndOtherCallBack");
96 addTest(root, &TestSingleByteCallBack, "tsconv/nccbtst/TestSingleByteCallBack");
73c04bcf
A
97#endif
98
99 addTest(root, &TestCallBackFailure, "tsconv/nccbtst/TestCallBackFailure");
b75a7d8f
A
100}
101
102static void TestSkipCallBack()
103{
104 TestSkip(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
105 TestSkip(1,NEW_MAX_BUFFER);
106 TestSkip(1,1);
107 TestSkip(NEW_MAX_BUFFER, 1);
108}
109
110static void TestStopCallBack()
111{
112 TestStop(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
113 TestStop(1,NEW_MAX_BUFFER);
114 TestStop(1,1);
115 TestStop(NEW_MAX_BUFFER, 1);
116}
117
118static void TestSubCallBack()
119{
120 TestSub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
121 TestSub(1,NEW_MAX_BUFFER);
122 TestSub(1,1);
123 TestSub(NEW_MAX_BUFFER, 1);
73c04bcf
A
124
125#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
126 TestEBCDIC_STATEFUL_Sub(1, 1);
127 TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER);
128 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, 1);
129 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
73c04bcf 130#endif
b75a7d8f
A
131}
132
133static void TestSubWithValueCallBack()
134{
135 TestSubWithValue(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
136 TestSubWithValue(1,NEW_MAX_BUFFER);
137 TestSubWithValue(1,1);
138 TestSubWithValue(NEW_MAX_BUFFER, 1);
139}
140
73c04bcf 141#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
142static void TestLegalAndOtherCallBack()
143{
144 TestLegalAndOthers(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
145 TestLegalAndOthers(1,NEW_MAX_BUFFER);
146 TestLegalAndOthers(1,1);
147 TestLegalAndOthers(NEW_MAX_BUFFER, 1);
148}
149
150static void TestSingleByteCallBack()
151{
152 TestSingleByte(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
153 TestSingleByte(1,NEW_MAX_BUFFER);
154 TestSingleByte(1,1);
155 TestSingleByte(NEW_MAX_BUFFER, 1);
156}
73c04bcf 157#endif
b75a7d8f
A
158
159static void TestSkip(int32_t inputsize, int32_t outputsize)
160{
161 static const uint8_t expskipIBM_949[]= {
162 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
163
164 static const uint8_t expskipIBM_943[] = {
165 0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 };
166
167 static const uint8_t expskipIBM_930[] = {
168 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f };
169
170 gInBufferSize = inputsize;
171 gOutBufferSize = outputsize;
172
173 /*From Unicode*/
174 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP \n");
175
73c04bcf 176#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
177 {
178 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
179 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
180
181 static const int32_t toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 };
182 static const int32_t toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 };
b75a7d8f
A
183
184 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
185 expskipIBM_949, sizeof(expskipIBM_949), "ibm-949",
186 UCNV_FROM_U_CALLBACK_SKIP, toIBM949Offsskip, NULL, 0 ))
187 log_err("u-> ibm-949 with skip did not match.\n");
188 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
189 expskipIBM_943, sizeof(expskipIBM_943), "ibm-943",
190 UCNV_FROM_U_CALLBACK_SKIP, toIBM943Offsskip, NULL, 0 ))
191 log_err("u-> ibm-943 with skip did not match.\n");
b75a7d8f
A
192 }
193
194 {
195 static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 };
196 static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f };
197 static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 };
198
199 /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */
200 if(!testConvertFromUnicode(fromU, sizeof(fromU)/U_SIZEOF_UCHAR,
201 fromUBytes, sizeof(fromUBytes),
202 "ibm-930",
203 UCNV_FROM_U_CALLBACK_SKIP, fromUOffsets,
204 NULL, 0)
205 ) {
206 log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n");
207 }
208 }
73c04bcf 209#endif
b75a7d8f
A
210
211 {
212 static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
213 static const uint8_t usasciiFromUBytes[] = { 0x61, 0x31, 0x39 };
214 static const int32_t usasciiFromUOffsets[] = { 0, 3, 6 };
215
216 static const UChar latin1FromU[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
217 static const uint8_t latin1FromUBytes[] = { 0x61, 0xa0, 0x31, 0x39 };
218 static const int32_t latin1FromUOffsets[] = { 0, 1, 3, 6 };
219
220 /* US-ASCII */
221 if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR,
222 usasciiFromUBytes, sizeof(usasciiFromUBytes),
223 "US-ASCII",
224 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
225 NULL, 0)
226 ) {
227 log_err("u->US-ASCII with skip did not match.\n");
228 }
229
73c04bcf 230#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
231 /* SBCS NLTC codepage 367 for US-ASCII */
232 if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR,
233 usasciiFromUBytes, sizeof(usasciiFromUBytes),
234 "ibm-367",
235 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
236 NULL, 0)
237 ) {
238 log_err("u->ibm-367 with skip did not match.\n");
239 }
73c04bcf 240#endif
b75a7d8f
A
241
242 /* ISO-Latin-1 */
243 if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR,
244 latin1FromUBytes, sizeof(latin1FromUBytes),
245 "LATIN_1",
246 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
247 NULL, 0)
248 ) {
249 log_err("u->LATIN_1 with skip did not match.\n");
250 }
251
73c04bcf 252#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
253 /* windows-1252 */
254 if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR,
255 latin1FromUBytes, sizeof(latin1FromUBytes),
256 "windows-1252",
257 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
258 NULL, 0)
259 ) {
260 log_err("u->windows-1252 with skip did not match.\n");
261 }
262 }
263
264 {
265 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
266 static const uint8_t toIBM943[]= { 0x61, 0x61 };
267 static const int32_t offset[]= {0, 4};
268
269 /* EUC_JP*/
270 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
271 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
272 0x61, 0x8e, 0xe0,
273 };
274 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7};
275
276 /*EUC_TW*/
277 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
278 static const uint8_t to_euc_tw[]={
279 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
280 0x61, 0xe6, 0xca, 0x8a,
281 };
282 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,};
283
284 /*ISO-2022-JP*/
285 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9/*unassigned*/,0x0042, };
286 static const uint8_t to_iso_2022_jp[]={
287 0x41,
288 0x42,
289
290 };
291 static const int32_t from_iso_2022_jpOffs [] ={0,2};
292
b75a7d8f
A
293 /*ISO-2022-JP*/
294 UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
295 static const uint8_t to_iso_2022_jp2[]={
296 0x41,
297 0x43,
298
299 };
300 static const int32_t from_iso_2022_jpOffs2 [] ={0,2};
301
302 /*ISO-2022-cn*/
303 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
304 static const uint8_t to_iso_2022_cn[]={
374ca955 305 0x41, 0x42
b75a7d8f
A
306 };
307 static const int32_t from_iso_2022_cnOffs [] ={
374ca955 308 0, 2
b75a7d8f
A
309 };
310
311 /*ISO-2022-CN*/
312 static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
313 static const uint8_t to_iso_2022_cn1[]={
374ca955 314 0x41, 0x43
b75a7d8f
A
315
316 };
374ca955 317 static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 };
b75a7d8f
A
318
319 /*ISO-2022-kr*/
320 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
321 static const uint8_t to_iso_2022_kr[]={
322 0x1b, 0x24, 0x29, 0x43,
323 0x41,
324 0x0e, 0x25, 0x50,
325 0x25, 0x50,
326 0x0f, 0x42,
327 };
328 static const int32_t from_iso_2022_krOffs [] ={
329 -1,-1,-1,-1,
330 0,
331 1,1,1,
332 3,3,
333 4,4
334 };
335
336 /*ISO-2022-kr*/
337 static const UChar iso_2022_kr_inputText1[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
338 static const uint8_t to_iso_2022_kr1[]={
339 0x1b, 0x24, 0x29, 0x43,
340 0x41,
341 0x0e, 0x25, 0x50,
342 0x25, 0x50,
343
344 };
345 static const int32_t from_iso_2022_krOffs1 [] ={
346 -1,-1,-1,-1,
347 0,
348 1,1,1,
349 3,3,
350
351 };
352 /* HZ encoding */
353 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
354
355 static const uint8_t to_hz[]={
356 0x7e, 0x7d, 0x41,
357 0x7e, 0x7b, 0x26, 0x30,
358 0x26, 0x30,
359 0x7e, 0x7d, 0x42,
360
361 };
362 static const int32_t from_hzOffs [] ={
363 0,0,0,
364 1,1,1,1,
365 3,3,
366 4,4,4,4
367 };
368
369 static const UChar hz_inputText1[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
370
371 static const uint8_t to_hz1[]={
372 0x7e, 0x7d, 0x41,
373 0x7e, 0x7b, 0x26, 0x30,
374 0x26, 0x30,
375
376
377 };
378 static const int32_t from_hzOffs1 [] ={
379 0,0,0,
380 1,1,1,1,
381 3,3,
382
383 };
384
73c04bcf 385#endif
b75a7d8f
A
386
387 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
388
389 static const uint8_t to_SCSU[]={
390 0x41,
391 0x42
392
393
394 };
395 static const int32_t from_SCSUOffs [] ={
396 0,
397 2,
398
399 };
73c04bcf
A
400
401#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
402 /* ISCII */
403 static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
404 static const uint8_t to_iscii[]={
405 0x41,
406 0x42,
407 };
408 static const int32_t from_isciiOffs [] ={
409 0,2,
410
411 };
412 /*ISCII*/
413 static const UChar iscii_inputText1[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
414 static const uint8_t to_iscii1[]={
415 0x44,
416 0x43,
417
418 };
419 static const int32_t from_isciiOffs1 [] ={0,2};
420
421 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
422 toIBM943, sizeof(toIBM943), "ibm-943",
423 UCNV_FROM_U_CALLBACK_SKIP, offset, NULL, 0 ))
424 log_err("u-> ibm-943 with skip did not match.\n");
425
426 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
427 to_euc_jp, sizeof(to_euc_jp), "euc-jp",
428 UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 ))
429 log_err("u-> euc-jp with skip did not match.\n");
430
431 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
432 to_euc_tw, sizeof(to_euc_tw), "euc-tw",
433 UCNV_FROM_U_CALLBACK_SKIP, from_euc_twOffs, NULL, 0 ))
434 log_err("u-> euc-tw with skip did not match.\n");
435
436 /*iso_2022_jp*/
437 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]),
438 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp",
439 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 ))
440 log_err("u-> iso-2022-jp with skip did not match.\n");
441
b75a7d8f
A
442 /* with context */
443 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]),
444 to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp",
445 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs2, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
446 log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
447
448 /*iso_2022_cn*/
449 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]),
450 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn",
451 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs, NULL, 0 ))
452 log_err("u-> iso-2022-cn with skip did not match.\n");
453 /*with context*/
454 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1, sizeof(iso_2022_cn_inputText1)/sizeof(iso_2022_cn_inputText1[0]),
455 to_iso_2022_cn1, sizeof(to_iso_2022_cn1), "iso-2022-cn",
456 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
457 log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
458
459 /*iso_2022_kr*/
460 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]),
461 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr",
462 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs, NULL, 0 ))
463 log_err("u-> iso-2022-kr with skip did not match.\n");
464 /*with context*/
465 if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1, sizeof(iso_2022_kr_inputText1)/sizeof(iso_2022_kr_inputText1[0]),
466 to_iso_2022_kr1, sizeof(to_iso_2022_kr1), "iso-2022-kr",
467 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
468 log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
469
470 /*hz*/
471 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]),
472 to_hz, sizeof(to_hz), "HZ",
473 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs, NULL, 0 ))
474 log_err("u-> HZ with skip did not match.\n");
475 /*with context*/
476 if(!testConvertFromUnicodeWithContext(hz_inputText1, sizeof(hz_inputText1)/sizeof(hz_inputText1[0]),
477 to_hz1, sizeof(to_hz1), "hz",
478 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
73c04bcf
A
479 log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
480#endif
b75a7d8f
A
481
482 /*SCSU*/
483 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
484 to_SCSU, sizeof(to_SCSU), "SCSU",
485 UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 ))
486 log_err("u-> SCSU with skip did not match.\n");
487
73c04bcf 488#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
489 /*ISCII*/
490 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]),
491 to_iscii, sizeof(to_iscii), "ISCII,version=0",
492 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs, NULL, 0 ))
493 log_err("u-> iscii with skip did not match.\n");
494 /*with context*/
495 if(!testConvertFromUnicodeWithContext(iscii_inputText1, sizeof(iscii_inputText1)/sizeof(iscii_inputText1[0]),
496 to_iscii1, sizeof(to_iscii1), "ISCII,version=0",
497 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
498 log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
73c04bcf 499#endif
b75a7d8f
A
500 }
501
502 log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
503 {
504 static const uint8_t sampleText[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */
505 0xFB, 0xEE, 0x28, /* from source offset 0 */
506 0x24, 0x1E, 0x52,
507 0xB2,
508 0x20,
509 0xB3,
510 0xB1,
511 0x0D,
512 0x0A,
513
514 0x20, /* from 8 */
515 0x00,
516 0xD0, 0x6C,
517 0xB6,
518 0xD8, 0xA5,
519 0x20,
520 0x68,
521 0x59,
522
523 0xF9, 0x28, /* from 16 */
524 0x6D,
525 0x20,
526 0x73,
527 0xE0, 0x2D,
528 0xDE, 0x43,
529 0xD0, 0x33,
530 0x20,
531
532 0xFA, 0x83, /* from 24 */
533 0x25, 0x01,
534 0xFB, 0x16, 0x87,
535 0x4B, 0x16,
536 0x20,
537 0xE6, 0xBD,
538 0xEB, 0x5B,
539 0x4B, 0xCC,
540
541 0xF9, 0xA2, /* from 32 */
542 0xFC, 0x10, 0x3E,
543 0xFE, 0x16, 0x3A, 0x8C,
544 0x20,
545 0xFC, 0x03, 0xAC,
546
547 0x01, /* from 41 */
548 0xDE, 0x83,
549 0x20,
550 0x09
551 };
552 static const UChar expected[]={
553 0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */
554 0x0063, 0x0061, 0x000D, 0x000A,
555
556 0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */
557 0x0930, 0x0020, 0x0918, 0x0909,
558
559 0x3086, 0x304D, 0x0020, 0x3053, /* 16 */
560 0x4000, 0x4E00, 0x7777, 0x0020,
561
562 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */
563 0x0020, 0xD7A3, 0xDC00, 0xD800,
564
565 0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */
566 0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
567
568 0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */
569 0x0009
570 };
571 static const int32_t offsets[]={
572 0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7,
573 8, 9, 10, 10, 11, 12, 12, 13, 14, 15,
574 16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23,
575 24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31,
576 32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39,
577 41, 42, 42, 43, 44
578 };
579
580 /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */
581 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected),
582 sampleText, sizeof(sampleText),
583 "BOCU-1",
584 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
585 ) {
586 log_err("u->BOCU-1 with skip did not match.\n");
587 }
588 }
589
590 log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
591 {
592 const uint8_t sampleText[]={
593 0x61, /* 'a' */
594 0xc4, 0xb5, /* U+0135 */
595 0xed, 0x80, 0xa0, /* Hangul U+d020 */
596 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */
597 0xee, 0x80, 0x80, /* PUA U+e000 */
598 0xed, 0xb0, 0x81, /* unpaired trail surrogate U+dc01 */
599 0x62, /* 'b' */
600 0xed, 0xa0, 0x81, /* unpaired lead surrogate U+d801 */
601 0xd0, 0x80 /* U+0400 */
602 };
603 UChar expected[]={
604 0x0061,
605 0x0135,
606 0xd020,
607 0xd801, 0xdc01,
608 0xe000,
609 0xdc01,
610 0x0062,
611 0xd801,
612 0x0400
613 };
614 int32_t offsets[]={
615 0,
616 1, 1,
617 2, 2, 2,
618 3, 3, 3, 4, 4, 4,
619 5, 5, 5,
620 6, 6, 6,
621 7,
622 8, 8, 8,
623 9, 9
624 };
625
626 /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */
627
628 /* without offsets */
629 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected),
630 sampleText, sizeof(sampleText),
631 "CESU-8",
632 UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, 0)
633 ) {
634 log_err("u->CESU-8 with skip did not match.\n");
635 }
636
637 /* with offsets */
638 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected),
639 sampleText, sizeof(sampleText),
640 "CESU-8",
641 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
642 ) {
643 log_err("u->CESU-8 with skip did not match.\n");
644 }
645 }
646
647 /*to Unicode*/
648 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP \n");
649
73c04bcf 650#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
651 {
652
653 static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD700 };
654 static const UChar IBM_943skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
655 static const UChar IBM_930skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
656
657 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5};
658 static const int32_t fromIBM943Offs [] = { 0, 2, 4};
659 static const int32_t fromIBM930Offs [] = { 1, 3, 5};
660
661 if(!testConvertToUnicode(expskipIBM_949, sizeof(expskipIBM_949),
662 IBM_949skiptoUnicode, sizeof(IBM_949skiptoUnicode)/sizeof(IBM_949skiptoUnicode),"ibm-949",
663 UCNV_TO_U_CALLBACK_SKIP, fromIBM949Offs, NULL, 0 ))
664 log_err("ibm-949->u with skip did not match.\n");
665 if(!testConvertToUnicode(expskipIBM_943, sizeof(expskipIBM_943),
666 IBM_943skiptoUnicode, sizeof(IBM_943skiptoUnicode)/sizeof(IBM_943skiptoUnicode[0]),"ibm-943",
667 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offs, NULL, 0 ))
668 log_err("ibm-943->u with skip did not match.\n");
669
670
671 if(!testConvertToUnicode(expskipIBM_930, sizeof(expskipIBM_930),
672 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930",
673 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0 ))
674 log_err("ibm-930->u with skip did not match.\n");
675
676
677 if(!testConvertToUnicodeWithContext(expskipIBM_930, sizeof(expskipIBM_930),
678 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930",
679 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
680 log_err("ibm-930->u with skip did not match.\n");
681 }
73c04bcf 682#endif
b75a7d8f
A
683
684 {
685 static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 };
686 static const UChar usasciiToU[] = { 0x61, 0x31 };
687 static const int32_t usasciiToUOffsets[] = { 0, 2 };
688
689 static const uint8_t latin1ToUBytes[] = { 0x61, 0xa0, 0x31 };
690 static const UChar latin1ToU[] = { 0x61, 0xa0, 0x31 };
691 static const int32_t latin1ToUOffsets[] = { 0, 1, 2 };
692
693 /* US-ASCII */
694 if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes),
695 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR,
696 "US-ASCII",
697 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
698 NULL, 0)
699 ) {
700 log_err("US-ASCII->u with skip did not match.\n");
701 }
702
73c04bcf 703#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
704 /* SBCS NLTC codepage 367 for US-ASCII */
705 if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes),
706 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR,
707 "ibm-367",
708 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
709 NULL, 0)
710 ) {
711 log_err("ibm-367->u with skip did not match.\n");
712 }
73c04bcf 713#endif
b75a7d8f
A
714
715 /* ISO-Latin-1 */
716 if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes),
717 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR,
718 "LATIN_1",
719 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
720 NULL, 0)
721 ) {
722 log_err("LATIN_1->u with skip did not match.\n");
723 }
724
73c04bcf 725#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
726 /* windows-1252 */
727 if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes),
728 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR,
729 "windows-1252",
730 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
731 NULL, 0)
732 ) {
733 log_err("windows-1252->u with skip did not match.\n");
734 }
73c04bcf 735#endif
b75a7d8f
A
736 }
737
73c04bcf 738#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
739 {
740 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
741 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
742 };
743 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0x03b4
744 };
745 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 5};
746
747
748 /* euc-jp*/
749 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
750 0x8f, 0xda, 0xa1, /*unassigned*/
751 0x8e, 0xe0,
752 };
753 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2};
754 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3, 9};
755
756 /*EUC_TW*/
757 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
758 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
759 0xe6, 0xca, 0x8a,
760 };
761 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, };
762 static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 11, 13};
763 /*iso-2022-jp*/
764 static const uint8_t sampleTxt_iso_2022_jp[]={
765 0x41,
766 0x1b, 0x24, 0x42, 0x2A, 0x44, /*unassigned*/
767 0x1b, 0x28, 0x42, 0x42,
768
769 };
770 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x42 };
771 static const int32_t from_iso_2022_jpOffs [] ={ 0,9 };
772
773 /*iso-2022-cn*/
774 static const uint8_t sampleTxt_iso_2022_cn[]={
775 0x0f, 0x41, 0x44,
776 0x1B, 0x24, 0x29, 0x47,
777 0x0E, 0x40, 0x6f, /*unassigned*/
778 0x0f, 0x42,
779
780 };
781
782 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x42 };
783 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 11 };
784
785 /*iso-2022-kr*/
786 static const uint8_t sampleTxt_iso_2022_kr[]={
787 0x1b, 0x24, 0x29, 0x43,
788 0x41,
789 0x0E, 0x7f, 0x1E,
790 0x0e, 0x25, 0x50,
791 0x0f, 0x51,
792 0x42, 0x43,
793
794 };
795 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x03A0,0x51, 0x42,0x43};
796 static const int32_t from_iso_2022_krOffs [] ={ 4, 9, 12, 13 , 14 };
797
798 /*hz*/
799 static const uint8_t sampleTxt_hz[]={
800 0x41,
801 0x7e, 0x7b, 0x26, 0x30,
802 0x7f, 0x1E, /*unassigned*/
803 0x26, 0x30,
804 0x7e, 0x7d, 0x42,
805 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/
806 0x7e, 0x7d, 0x42,
807 };
808 static const UChar hztoUnicode[]={
809 0x41,
810 0x03a0,
811 0x03A0,
812 0x42,
813 0x42,};
814
815 static const int32_t from_hzOffs [] ={0,3,7,11,18, };
816
817 /*ISCII*/
818 static const uint8_t sampleTxt_iscii[]={
819 0x41,
820 0xa1,
821 0xEB, /*unassigned*/
822 0x26,
823 0x30,
824 0xa2,
825 0xEC, /*unassigned*/
826 0x42,
827 };
828 static const UChar isciitoUnicode[]={
829 0x41,
830 0x0901,
831 0x26,
832 0x30,
833 0x0902,
834 0x42,
835 };
836
837 static const int32_t from_isciiOffs [] ={0,1,3,4,5,7 };
838
839 /*LMBCS*/
840 static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50,
841 0x12, 0x92, 0xa0, /*unassigned*/
842 0x12, 0x92, 0xA1,
843 };
844 static const UChar LMBCSToUnicode[]={ 0x4e2e, 0xe5c4};
845 static const int32_t fromLMBCS[] = {0, 6};
846
847 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
848 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
849 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
850 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
851
852 if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
853 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
854 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
855 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
856
857 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
858 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp",
859 UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0))
860 log_err("euc-jp->u with skip did not match.\n");
861
862
863
864 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
865 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
866 UCNV_TO_U_CALLBACK_SKIP, from_euc_twOffs , NULL, 0))
867 log_err("euc-tw->u with skip did not match.\n");
868
869
870 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
871 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
872 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_jpOffs , NULL, 0))
873 log_err("iso-2022-jp->u with skip did not match.\n");
874
875 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn),
876 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn",
877 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_cnOffs , NULL, 0))
878 log_err("iso-2022-cn->u with skip did not match.\n");
879
880 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr),
881 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr",
882 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_krOffs , NULL, 0))
883 log_err("iso-2022-kr->u with skip did not match.\n");
884
885 if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz),
886 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ",
887 UCNV_TO_U_CALLBACK_SKIP, from_hzOffs , NULL, 0))
888 log_err("HZ->u with skip did not match.\n");
889
890 if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii),
891 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0",
892 UCNV_TO_U_CALLBACK_SKIP, from_isciiOffs , NULL, 0))
893 log_err("iscii->u with skip did not match.\n");
894
895 if(!testConvertToUnicode(sampleTxtLMBCS, sizeof(sampleTxtLMBCS),
896 LMBCSToUnicode, sizeof(LMBCSToUnicode)/sizeof(LMBCSToUnicode[0]),"LMBCS-1",
897 UCNV_TO_U_CALLBACK_SKIP, fromLMBCS , NULL, 0))
898 log_err("LMBCS->u with skip did not match.\n");
899
900 }
73c04bcf
A
901#endif
902
b75a7d8f
A
903 log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n");
904 {
905 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
906 0xe0, 0x80, 0x61,};
907 UChar expected1[] = { 0x0031, 0x4e8c, 0x0061};
908 int32_t offsets1[] = { 0x0000, 0x0001, 0x0006};
909
910 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
911 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8",
912 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
913 log_err("utf8->u with skip did not match.\n");;
914 }
915
916 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n");
917 {
918 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
919 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffe,0xfffe};
920 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5};
921
922 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
923 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU",
924 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
925 log_err("scsu->u with skip did not match.\n");
926 }
927
928 log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
929 {
930 const uint8_t sampleText[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */
931 0xFB, 0xEE, 0x28, /* single-code point sequence at offset 0 */
932 0x24, 0x1E, 0x52, /* 3 */
933 0xB2, /* 6 */
934 0x20, /* 7 */
935 0x40, 0x07, /* 8 - wrong trail byte */
936 0xB3, /* 10 */
937 0xB1, /* 11 */
938 0xD0, 0x20, /* 12 - wrong trail byte */
939 0x0D, /* 14 */
940 0x0A, /* 15 */
941 0x20, /* 16 */
942 0x00, /* 17 */
943 0xD0, 0x6C, /* 18 */
944 0xB6, /* 20 */
945 0xD8, 0xA5, /* 21 */
946 0x20, /* 23 */
947 0x68, /* 24 */
948 0x59, /* 25 */
949 0xF9, 0x28, /* 26 */
950 0x6D, /* 28 */
951 0x20, /* 29 */
952 0x73, /* 30 */
953 0xE0, 0x2D, /* 31 */
954 0xDE, 0x43, /* 33 */
955 0xD0, 0x33, /* 35 */
956 0x20, /* 37 */
957 0xFA, 0x83, /* 38 */
958 0x25, 0x01, /* 40 */
959 0xFB, 0x16, 0x87, /* 42 */
960 0x4B, 0x16, /* 45 */
961 0x20, /* 47 */
962 0xE6, 0xBD, /* 48 */
963 0xEB, 0x5B, /* 50 */
964 0x4B, 0xCC, /* 52 */
965 0xF9, 0xA2, /* 54 */
966 0xFC, 0x10, 0x3E, /* 56 */
967 0xFE, 0x16, 0x3A, 0x8C, /* 59 */
968 0x20, /* 63 */
969 0xFC, 0x03, 0xAC, /* 64 */
970 0xFF, /* 67 - FF just resets the state without encoding anything */
971 0x01, /* 68 */
972 0xDE, 0x83, /* 69 */
973 0x20, /* 71 */
974 0x09 /* 72 */
975 };
976 UChar expected[]={
977 0xFEFF, 0x0061, 0x0062, 0x0020,
978 0x0063, 0x0061, 0x000D, 0x000A,
979 0x0020, 0x0000, 0x00DF, 0x00E6,
980 0x0930, 0x0020, 0x0918, 0x0909,
981 0x3086, 0x304D, 0x0020, 0x3053,
982 0x4000, 0x4E00, 0x7777, 0x0020,
983 0x9FA5, 0x4E00, 0xAC00, 0xBCDE,
984 0x0020, 0xD7A3, 0xDC00, 0xD800,
985 0xD800, 0xDC00, 0xD845, 0xDDDD,
986 0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
987 0xDFFF, 0x0001, 0x0E40, 0x0020,
988 0x0009
989 };
990 int32_t offsets[]={
991 0, 3, 6, 7, /* skip 8, */
992 10, 11, /* skip 12, */
993 14, 15, 16, 17, 18,
994 20, 21, 23, 24, 25, 26, 28, 29,
995 30, 31, 33, 35, 37, 38,
996 40, 42, 45, 47, 48,
997 50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59,
998 63, 64, /* trail */ 64, /* reset only 67, */
999 68, 69,
1000 71, 72
1001 };
1002
1003 if(!testConvertToUnicode(sampleText, sizeof(sampleText),
1004 expected, ARRAY_LENGTH(expected), "BOCU-1",
1005 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1006 ) {
1007 log_err("BOCU-1->u with skip did not match.\n");
1008 }
1009 }
1010
1011 log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
1012 {
1013 const uint8_t sampleText[]={
1014 0x61, /* 0 'a' */
1015 0xc0, 0x80, /* 1 non-shortest form */
1016 0xc4, 0xb5, /* 3 U+0135 */
1017 0xed, 0x80, 0xa0, /* 5 Hangul U+d020 */
1018 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8 surrogate pair for U+10401 */
1019 0xee, 0x80, 0x80, /* 14 PUA U+e000 */
1020 0xed, 0xb0, 0x81, /* 17 unpaired trail surrogate U+dc01 */
1021 0xf0, 0x90, 0x80, 0x80, /* 20 illegal 4-byte form for U+10000 */
1022 0x62, /* 24 'b' */
1023 0xed, 0xa0, 0x81, /* 25 unpaired lead surrogate U+d801 */
1024 0xed, 0xa0, /* 28 incomplete sequence */
1025 0xd0, 0x80 /* 30 U+0400 */
1026 };
1027 UChar expected[]={
1028 0x0061,
1029 /* skip */
1030 0x0135,
1031 0xd020,
1032 0xd801, 0xdc01,
1033 0xe000,
1034 0xdc01,
1035 /* skip */
1036 0x0062,
1037 0xd801,
1038 0x0400
1039 };
1040 int32_t offsets[]={
1041 0,
1042 /* skip 1, */
1043 3,
1044 5,
1045 8, 11,
1046 14,
1047 17,
1048 /* skip 20, 20, */
1049 24,
1050 25,
1051 /* skip 28 */
1052 30
1053 };
1054
1055 /* without offsets */
1056 if(!testConvertToUnicode(sampleText, sizeof(sampleText),
1057 expected, ARRAY_LENGTH(expected), "CESU-8",
1058 UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, 0)
1059 ) {
1060 log_err("CESU-8->u with skip did not match.\n");
1061 }
1062
1063 /* with offsets */
1064 if(!testConvertToUnicode(sampleText, sizeof(sampleText),
1065 expected, ARRAY_LENGTH(expected), "CESU-8",
1066 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1067 ) {
1068 log_err("CESU-8->u with skip did not match.\n");
1069 }
1070 }
1071}
1072
1073static void TestStop(int32_t inputsize, int32_t outputsize)
1074{
1075 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1076 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1077
1078 static const uint8_t expstopIBM_949[]= {
1079 0x00, 0xb0, 0xa1, 0xb0, 0xa2};
1080
1081 static const uint8_t expstopIBM_943[] = {
1082 0x9f, 0xaf, 0x9f, 0xb1};
1083
1084 static const uint8_t expstopIBM_930[] = {
1085 0x0e, 0x5d, 0x5f, 0x5d, 0x63};
1086
1087 static const UChar IBM_949stoptoUnicode[]= {0x0000, 0xAC00, 0xAC01};
1088 static const UChar IBM_943stoptoUnicode[]= { 0x6D63, 0x6D64};
1089 static const UChar IBM_930stoptoUnicode[]= { 0x6D63, 0x6D64};
1090
1091
1092 static const int32_t toIBM949Offsstop [] = { 0, 1, 1, 2, 2};
1093 static const int32_t toIBM943Offsstop [] = { 0, 0, 1, 1};
1094 static const int32_t toIBM930Offsstop [] = { 0, 0, 0, 1, 1};
1095
1096 static const int32_t fromIBM949Offs [] = { 0, 1, 3};
1097 static const int32_t fromIBM943Offs [] = { 0, 2};
1098 static const int32_t fromIBM930Offs [] = { 1, 3};
1099
1100 gInBufferSize = inputsize;
1101 gOutBufferSize = outputsize;
73c04bcf 1102
b75a7d8f 1103 /*From Unicode*/
73c04bcf
A
1104
1105#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
1106 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1107 expstopIBM_949, sizeof(expstopIBM_949), "ibm-949",
1108 UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 ))
1109 log_err("u-> ibm-949 with stop did not match.\n");
1110 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1111 expstopIBM_943, sizeof(expstopIBM_943), "ibm-943",
1112 UCNV_FROM_U_CALLBACK_STOP, toIBM943Offsstop , NULL, 0))
1113 log_err("u-> ibm-943 with stop did not match.\n");
1114 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1115 expstopIBM_930, sizeof(expstopIBM_930), "ibm-930",
1116 UCNV_FROM_U_CALLBACK_STOP, toIBM930Offsstop, NULL, 0 ))
1117 log_err("u-> ibm-930 with stop did not match.\n");
1118
1119 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP \n");
1120 {
1121 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1122 static const uint8_t toIBM943[]= { 0x61,};
1123 static const int32_t offset[]= {0,} ;
1124
1125 /*EUC_JP*/
1126 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1127 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,};
1128 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,};
1129
1130 /*EUC_TW*/
1131 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1132 static const uint8_t to_euc_tw[]={
1133 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,};
1134 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,};
1135
1136 /*ISO-2022-JP*/
1137 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9, 0x0042, };
1138 static const uint8_t to_iso_2022_jp[]={
1139 0x41,
1140
1141 };
1142 static const int32_t from_iso_2022_jpOffs [] ={0,};
1143
1144 /*ISO-2022-cn*/
1145 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1146 static const uint8_t to_iso_2022_cn[]={
374ca955 1147 0x41,
b75a7d8f
A
1148
1149 };
1150 static const int32_t from_iso_2022_cnOffs [] ={
1151 0,0,
1152 2,2,
1153 };
1154
1155 /*ISO-2022-kr*/
1156 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
1157 static const uint8_t to_iso_2022_kr[]={
1158 0x1b, 0x24, 0x29, 0x43,
1159 0x41,
1160 0x0e, 0x25, 0x50,
1161 };
1162 static const int32_t from_iso_2022_krOffs [] ={
1163 -1,-1,-1,-1,
1164 0,
1165 1,1,1,
1166 };
1167
1168 /* HZ encoding */
1169 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1170
1171 static const uint8_t to_hz[]={
1172 0x7e, 0x7d, 0x41,
1173 0x7e, 0x7b, 0x26, 0x30,
1174
1175 };
1176 static const int32_t from_hzOffs [] ={
1177 0, 0,0,
1178 1,1,1,1,
1179 };
1180
1181 /*ISCII*/
1182 static const UChar iscii_inputText[]={ 0x0041, 0x3712, 0x0042, };
1183 static const uint8_t to_iscii[]={
1184 0x41,
1185 };
1186 static const int32_t from_isciiOffs [] ={
1187 0,
1188 };
1189
1190 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
1191 toIBM943, sizeof(toIBM943), "ibm-943",
1192 UCNV_FROM_U_CALLBACK_STOP, offset, NULL, 0 ))
1193 log_err("u-> ibm-943 with stop did not match.\n");
1194
1195 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
1196 to_euc_jp, sizeof(to_euc_jp), "euc-jp",
1197 UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 ))
1198 log_err("u-> euc-jp with stop did not match.\n");
1199
1200 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
1201 to_euc_tw, sizeof(to_euc_tw), "euc-tw",
1202 UCNV_FROM_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1203 log_err("u-> euc-tw with stop did not match.\n");
1204
1205 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]),
1206 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp",
1207 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1208 log_err("u-> iso-2022-jp with stop did not match.\n");
1209
1210 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]),
1211 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp",
1212 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1213 log_err("u-> iso-2022-jp with stop did not match.\n");
1214
1215 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]),
1216 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn",
1217 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_cnOffs, NULL, 0 ))
1218 log_err("u-> iso-2022-cn with stop did not match.\n");
1219
1220 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]),
1221 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr",
1222 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_krOffs, NULL, 0 ))
1223 log_err("u-> iso-2022-kr with stop did not match.\n");
1224
1225 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]),
1226 to_hz, sizeof(to_hz), "HZ",
1227 UCNV_FROM_U_CALLBACK_STOP, from_hzOffs, NULL, 0 ))
1228 log_err("u-> HZ with stop did not match.\n");\
1229
1230 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]),
1231 to_iscii, sizeof(to_iscii), "ISCII,version=0",
1232 UCNV_FROM_U_CALLBACK_STOP, from_isciiOffs, NULL, 0 ))
1233 log_err("u-> iscii with stop did not match.\n");
1234
1235
1236 }
73c04bcf
A
1237#endif
1238
b75a7d8f
A
1239 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n");
1240 {
1241 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1242
1243 static const uint8_t to_SCSU[]={
1244 0x41,
1245
1246 };
1247 int32_t from_SCSUOffs [] ={
1248 0,
1249
1250 };
1251 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
1252 to_SCSU, sizeof(to_SCSU), "SCSU",
1253 UCNV_FROM_U_CALLBACK_STOP, from_SCSUOffs, NULL, 0 ))
1254 log_err("u-> SCSU with skip did not match.\n");
1255
1256 }
73c04bcf 1257
b75a7d8f 1258 /*to Unicode*/
73c04bcf
A
1259
1260#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
1261 if(!testConvertToUnicode(expstopIBM_949, sizeof(expstopIBM_949),
1262 IBM_949stoptoUnicode, sizeof(IBM_949stoptoUnicode)/sizeof(IBM_949stoptoUnicode[0]),"ibm-949",
1263 UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 ))
1264 log_err("ibm-949->u with stop did not match.\n");
1265 if(!testConvertToUnicode(expstopIBM_943, sizeof(expstopIBM_943),
1266 IBM_943stoptoUnicode, sizeof(IBM_943stoptoUnicode)/sizeof(IBM_943stoptoUnicode[0]),"ibm-943",
1267 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offs, NULL, 0 ))
1268 log_err("ibm-943->u with stop did not match.\n");
1269 if(!testConvertToUnicode(expstopIBM_930, sizeof(expstopIBM_930),
1270 IBM_930stoptoUnicode, sizeof(IBM_930stoptoUnicode)/sizeof(IBM_930stoptoUnicode[0]),"ibm-930",
1271 UCNV_TO_U_CALLBACK_STOP, fromIBM930Offs, NULL, 0 ))
1272 log_err("ibm-930->u with stop did not match.\n");
1273
1274 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n");
1275 {
1276
1277 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1278 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1279 };
1280 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63 };
1281 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1};
1282
1283
1284 /*EUC-JP*/
1285 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1286 0x8f, 0xda, 0xa1, /*unassigned*/
1287 0x8e, 0xe0,
1288 };
1289 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec};
1290 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3};
1291
1292 /*EUC_TW*/
1293 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1294 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1295 0xe6, 0xca, 0x8a,
1296 };
1297 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2};
1298 int32_t from_euc_twOffs [] ={ 0, 1, 3};
1299
1300
1301
1302 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
1303 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
1304 UCNV_TO_U_CALLBACK_STOP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1305 log_err("EBCIDIC_STATEFUL->u with stop did not match.\n");
1306
1307 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
1308 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp",
1309 UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0))
1310 log_err("euc-jp->u with stop did not match.\n");
1311
1312 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
1313 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
1314 UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1315 log_err("euc-tw->u with stop did not match.\n");
1316 }
73c04bcf
A
1317#endif
1318
b75a7d8f
A
1319 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n");
1320 {
1321 static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1322 0xe0, 0x80, 0x61,};
1323 static const UChar expected1[] = { 0x0031, 0x4e8c,};
1324 static const int32_t offsets1[] = { 0x0000, 0x0001};
1325
1326 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1327 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8",
1328 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1329 log_err("utf8->u with stop did not match.\n");;
1330 }
1331 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n");
1332 {
1333 static const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04};
1334 static const UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061};
1335 static const int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003};
1336
1337 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1338 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU",
1339 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1340 log_err("scsu->u with stop did not match.\n");;
1341 }
1342
1343}
1344
1345static void TestSub(int32_t inputsize, int32_t outputsize)
1346{
1347 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1348 static const UChar sampleText2[]= { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1349
1350 static const uint8_t expsubIBM_949[] =
1351 { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 };
1352
1353 static const uint8_t expsubIBM_943[] = {
1354 0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 };
1355
1356 static const uint8_t expsubIBM_930[] = {
1357 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f };
1358
1359 static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 };
1360 static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1361 static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1362
1363 static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1364 static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 };
1365 static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 };
1366
1367 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5, 7 };
1368 static const int32_t fromIBM943Offs [] = { 0, 2, 4, 6 };
1369 static const int32_t fromIBM930Offs [] = { 1, 3, 5, 7 };
1370
1371 gInBufferSize = inputsize;
1372 gOutBufferSize = outputsize;
1373
1374 /*from unicode*/
73c04bcf
A
1375
1376#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
1377 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1378 expsubIBM_949, sizeof(expsubIBM_949), "ibm-949",
1379 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 ))
1380 log_err("u-> ibm-949 with subst did not match.\n");
1381 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1382 expsubIBM_943, sizeof(expsubIBM_943), "ibm-943",
1383 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM943Offssub , NULL, 0))
1384 log_err("u-> ibm-943 with subst did not match.\n");
1385 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1386 expsubIBM_930, sizeof(expsubIBM_930), "ibm-930",
1387 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM930Offssub, NULL, 0 ))
1388 log_err("u-> ibm-930 with subst did not match.\n");
1389
1390 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1391 {
1392 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1393 static const uint8_t toIBM943[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 };
1394 static const int32_t offset[]= {0, 1, 1, 3, 3, 4};
1395
1396
1397 /* EUC_JP*/
1398 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1399 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1400 0xf4, 0xfe, 0xf4, 0xfe,
1401 0x61, 0x8e, 0xe0,
1402 };
1403 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7};
1404
1405 /*EUC_TW*/
1406 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1407 static const uint8_t to_euc_tw[]={
1408 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1409 0xfd, 0xfe, 0xfd, 0xfe,
1410 0x61, 0xe6, 0xca, 0x8a,
1411 };
1412
b75a7d8f
A
1413 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,};
1414
1415 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
1416 toIBM943, sizeof(toIBM943), "ibm-943",
1417 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset, NULL, 0 ))
1418 log_err("u-> ibm-943 with substitute did not match.\n");
1419
1420 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
1421 to_euc_jp, sizeof(to_euc_jp), "euc-jp",
1422 UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 ))
1423 log_err("u-> euc-jp with substitute did not match.\n");
1424
1425 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
1426 to_euc_tw, sizeof(to_euc_tw), "euc-tw",
1427 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1428 log_err("u-> euc-tw with substitute did not match.\n");
b75a7d8f 1429 }
73c04bcf 1430#endif
b75a7d8f
A
1431
1432 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1433 {
1434 UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1435
1436 const uint8_t to_SCSU[]={
1437 0x41,
1438 0x0e, 0xff,0xfd,
1439 0x42
1440
1441
1442 };
1443 int32_t from_SCSUOffs [] ={
1444 0,
1445 1,1,1,
1446 2,
1447
1448 };
1449 const uint8_t to_SCSU_1[]={
1450 0x41,
1451
1452 };
1453 int32_t from_SCSUOffs_1 [] ={
1454 0,
1455
1456 };
1457 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
1458 to_SCSU, sizeof(to_SCSU), "SCSU",
1459 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs, NULL, 0 ))
1460 log_err("u-> SCSU with substitute did not match.\n");
1461
1462 if(!testConvertFromUnicodeWithContext(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
1463 to_SCSU_1, sizeof(to_SCSU_1), "SCSU",
1464 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs_1, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
1465 log_err("u-> SCSU with substitute did not match.\n");
1466 }
1467
1468 log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1469 {
1470 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,};
1471 static const uint8_t expectedUTF8[]= { 0xe2, 0x82, 0xac,
1472 0xf0, 0x90, 0x90, 0x81,
1473 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd,
1474 0xef, 0xbf, 0xbf, 0x61,
1475
1476 };
1477 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 };
1478 if(!testConvertFromUnicode(testinput, sizeof(testinput)/sizeof(testinput[0]),
1479 expectedUTF8, sizeof(expectedUTF8), "utf8",
1480 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0 )) {
1481 log_err("u-> utf8 with stop did not match.\n");
1482 }
1483 }
1484
1485 log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1486 {
1487 static const UChar in[]={ 0x0041, 0xfeff };
1488
1489 static const uint8_t out[]={
1490#if U_IS_BIG_ENDIAN
1491 0xfe, 0xff,
1492 0x00, 0x41,
1493 0xfe, 0xff
1494#else
1495 0xff, 0xfe,
1496 0x41, 0x00,
1497 0xff, 0xfe
1498#endif
1499 };
1500 static const int32_t offsets[]={
1501 -1, -1, 0, 0, 1, 1
1502 };
1503
1504 if(!testConvertFromUnicode(in, ARRAY_LENGTH(in),
1505 out, sizeof(out), "UTF-16",
1506 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1507 ) {
1508 log_err("u->UTF-16 with substitute did not match.\n");
1509 }
1510 }
1511
1512 log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1513 {
1514 static const UChar in[]={ 0x0041, 0xfeff };
1515
1516 static const uint8_t out[]={
1517#if U_IS_BIG_ENDIAN
1518 0x00, 0x00, 0xfe, 0xff,
1519 0x00, 0x00, 0x00, 0x41,
1520 0x00, 0x00, 0xfe, 0xff
1521#else
1522 0xff, 0xfe, 0x00, 0x00,
1523 0x41, 0x00, 0x00, 0x00,
1524 0xff, 0xfe, 0x00, 0x00
1525#endif
1526 };
1527 static const int32_t offsets[]={
1528 -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1
1529 };
1530
1531 if(!testConvertFromUnicode(in, ARRAY_LENGTH(in),
1532 out, sizeof(out), "UTF-32",
1533 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1534 ) {
1535 log_err("u->UTF-32 with substitute did not match.\n");
1536 }
1537 }
1538
1539 /*to unicode*/
73c04bcf
A
1540
1541#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
1542 if(!testConvertToUnicode(expsubIBM_949, sizeof(expsubIBM_949),
1543 IBM_949subtoUnicode, sizeof(IBM_949subtoUnicode)/sizeof(IBM_949subtoUnicode[0]),"ibm-949",
1544 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 ))
1545 log_err("ibm-949->u with substitute did not match.\n");
1546 if(!testConvertToUnicode(expsubIBM_943, sizeof(expsubIBM_943),
1547 IBM_943subtoUnicode, sizeof(IBM_943subtoUnicode)/sizeof(IBM_943subtoUnicode[0]),"ibm-943",
1548 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offs, NULL, 0 ))
1549 log_err("ibm-943->u with substitute did not match.\n");
1550 if(!testConvertToUnicode(expsubIBM_930, sizeof(expsubIBM_930),
1551 IBM_930subtoUnicode, sizeof(IBM_930subtoUnicode)/sizeof(IBM_930subtoUnicode[0]),"ibm-930",
1552 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM930Offs, NULL, 0 ))
1553 log_err("ibm-930->u with substitute did not match.\n");
1554
1555 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1556 {
1557
1558 const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1559 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1560 };
1561 UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0xfffd, 0x03b4
1562 };
1563 int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 3, 5};
1564
1565
1566 /* EUC_JP*/
1567 const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1568 0x8f, 0xda, 0xa1, /*unassigned*/
1569 0x8e, 0xe0, 0x8a
1570 };
1571 UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a };
1572 int32_t from_euc_jpOffs [] ={ 0, 1, 3, 6, 9, 11 };
1573
1574 /*EUC_TW*/
1575 const uint8_t sampleTxt_euc_tw[]={
1576 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1577 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1578 0xe6, 0xca, 0x8a,
1579 };
1580 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, };
1581 int32_t from_euc_twOffs [] ={ 0, 1, 3, 7, 11, 13};
1582
1583
1584 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
1585 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
1586 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1587 log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n");
1588
1589
1590 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
1591 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp",
1592 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ))
1593 log_err("euc-jp->u with substitute did not match.\n");
1594
1595
1596 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
1597 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
1598 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1599 log_err("euc-tw->u with substitute did not match.\n");
1600
1601
1602 if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
1603 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp",
1604 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGAL_CHAR_FOUND))
1605 log_err("euc-jp->u with substitute did not match.\n");
b75a7d8f 1606 }
73c04bcf
A
1607#endif
1608
b75a7d8f
A
1609 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1610 {
1611 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1612 0xe0, 0x80, 0x61,};
1613 UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0x0061};
1614 int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0006};
1615
1616 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1617 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8",
1618 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1619 log_err("utf8->u with substitute did not match.\n");;
1620 }
1621 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1622 {
1623 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
1624 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffd,0xfffd};
1625 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5};
1626
1627 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1628 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU",
1629 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1630 log_err("scsu->u with stop did not match.\n");;
1631 }
1632
73c04bcf 1633#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
1634 log_verbose("Testing ibm-930 subchar/subchar1\n");
1635 {
1636 static const UChar u1[]={ 0x6d63, 0x6d64, 0x6d65, 0x6d66, 0xdf };
1637 static const uint8_t s1[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f };
1638 static const int32_t offsets1[]={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1639
1640 static const UChar u2[]={ 0x6d63, 0x6d64, 0xfffd, 0x6d66, 0x1a };
1641 static const uint8_t s2[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 };
1642 static const int32_t offsets2[]={ 1, 3, 5, 7, 10 };
1643
1644 if(!testConvertFromUnicode(u1, ARRAY_LENGTH(u1), s1, ARRAY_LENGTH(s1), "ibm-930",
1645 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1646 ) {
1647 log_err("u->ibm-930 subchar/subchar1 did not match.\n");
1648 }
1649
1650 if(!testConvertToUnicode(s2, ARRAY_LENGTH(s2), u2, ARRAY_LENGTH(u2), "ibm-930",
1651 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1652 ) {
1653 log_err("ibm-930->u subchar/subchar1 did not match.\n");
1654 }
1655 }
1656
1657 log_verbose("Testing GB 18030 with substitute callbacks\n");
1658 {
b75a7d8f
A
1659 static const UChar u2[]={
1660 0x24, 0x7f, 0x80, 0x1f9, 0x20ac, 0x4e00, 0x9fa6, 0xffff, 0xd800, 0xdc00, 0xfffd, 0xdbff, 0xdfff };
1661 static const uint8_t gb2[]={
1662 0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 };
1663 static const int32_t offsets2[]={
1664 0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 };
1665
b75a7d8f
A
1666 if(!testConvertToUnicode(gb2, ARRAY_LENGTH(gb2), u2, ARRAY_LENGTH(u2), "gb18030",
1667 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1668 ) {
1669 log_err("gb18030->u with substitute did not match.\n");
1670 }
1671 }
73c04bcf 1672#endif
b75a7d8f
A
1673
1674 log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n");
1675 {
1676 static const uint8_t utf7[]={
1677 /* a~ a+AB~ a+AB\x0c a+AB- a+AB. a+. */
1678 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x2e
1679 };
1680 static const UChar unicode[]={
1681 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd
1682 };
1683 static const int32_t offsets[]={
1684 0, 1, 2, 4, 7, 9, 12, 14, 17, 19, 22, 23
1685 };
1686
1687 if(!testConvertToUnicode(utf7, ARRAY_LENGTH(utf7), unicode, ARRAY_LENGTH(unicode), "UTF-7",
1688 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1689 ) {
1690 log_err("UTF-7->u with substitute did not match.\n");
1691 }
1692 }
1693
b75a7d8f
A
1694 log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n");
1695 {
1696 static const uint8_t
1697 in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff },
1698 in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff },
1699 in3[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff };
1700
1701 static const UChar
1702 out1[]={ 0x4e00, 0xfeff },
1703 out2[]={ 0x004e, 0xfffe },
1704 out3[]={ 0xfefd, 0x4e00, 0xfeff };
1705
1706 static const int32_t
1707 offsets1[]={ 2, 4 },
1708 offsets2[]={ 2, 4 },
1709 offsets3[]={ 0, 2, 4 };
1710
1711 if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-16",
1712 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1713 ) {
1714 log_err("UTF-16 (BE BOM)->u with substitute did not match.\n");
1715 }
1716
1717 if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-16",
1718 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1719 ) {
1720 log_err("UTF-16 (LE BOM)->u with substitute did not match.\n");
1721 }
1722
1723 if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-16",
1724 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1725 ) {
1726 log_err("UTF-16 (no BOM)->u with substitute did not match.\n");
1727 }
1728 }
1729
1730 log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n");
1731 {
1732 static const uint8_t
1733 in1[]={ 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff },
1734 in2[]={ 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 },
1735 in3[]={ 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 },
1736 in4[]={ 0x00, 0x01, 0x02, 0x03, 0x00, 0x11, 0x12, 0x00, 0x00, 0x00, 0x4e, 0x00 };
1737
1738 static const UChar
1739 out1[]={ UTF16_LEAD(0x100f00), UTF16_TRAIL(0x100f00), 0xfeff },
1740 out2[]={ UTF16_LEAD(0x0f1000), UTF16_TRAIL(0x0f1000), 0xfffe },
374ca955 1741 out3[]={ 0xfefe, UTF16_LEAD(0x100f00), UTF16_TRAIL(0x100f00), 0xfffd, 0xfffd },
b75a7d8f
A
1742 out4[]={ UTF16_LEAD(0x10203), UTF16_TRAIL(0x10203), 0xfffd, 0x4e00 };
1743
1744 static const int32_t
1745 offsets1[]={ 4, 4, 8 },
1746 offsets2[]={ 4, 4, 8 },
1747 offsets3[]={ 0, 4, 4, 8, 12 },
1748 offsets4[]={ 0, 0, 4, 8 };
1749
1750 if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-32",
1751 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1752 ) {
1753 log_err("UTF-32 (BE BOM)->u with substitute did not match.\n");
1754 }
1755
1756 if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-32",
1757 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1758 ) {
1759 log_err("UTF-32 (LE BOM)->u with substitute did not match.\n");
1760 }
1761
1762 if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-32",
1763 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1764 ) {
1765 log_err("UTF-32 (no BOM)->u with substitute did not match.\n");
1766 }
1767
1768 if(!testConvertToUnicode(in4, ARRAY_LENGTH(in4), out4, ARRAY_LENGTH(out4), "UTF-32",
1769 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets4, NULL, 0)
1770 ) {
1771 log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n");
1772 }
1773 }
1774}
1775
1776static void TestSubWithValue(int32_t inputsize, int32_t outputsize)
1777{
1778 UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1779 UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1780
1781 const uint8_t expsubwvalIBM_949[]= {
1782 0x00, 0xb0, 0xa1, 0xb0, 0xa2,
1783 0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 };
1784
1785 const uint8_t expsubwvalIBM_943[]= {
1786 0x9f, 0xaf, 0x9f, 0xb1,
1787 0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 };
1788
1789 const uint8_t expsubwvalIBM_930[] = {
1790 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f };
1791
1792 int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 };
1793 int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 };
1794 int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */
1795
1796 gInBufferSize = inputsize;
1797 gOutBufferSize = outputsize;
1798
1799 /*from Unicode*/
73c04bcf
A
1800
1801#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
1802 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1803 expsubwvalIBM_949, sizeof(expsubwvalIBM_949), "ibm-949",
1804 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 ))
1805 log_err("u-> ibm-949 with subst with value did not match.\n");
1806
1807 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1808 expsubwvalIBM_943, sizeof(expsubwvalIBM_943), "ibm-943",
1809 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM943Offs, NULL, 0 ))
1810 log_err("u-> ibm-943 with sub with value did not match.\n");
1811
1812 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1813 expsubwvalIBM_930, sizeof(expsubwvalIBM_930), "ibm-930",
1814 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM930Offs, NULL, 0 ))
1815 log_err("u-> ibm-930 with subst with value did not match.\n");
1816
1817
1818 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n");
1819 {
1820 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1821 static const uint8_t toIBM943[]= { 0x61,
1822 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1823 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1824 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1825 0x61 };
1826 static const int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
1827
1828
1829 /* EUC_JP*/
1830 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, };
1831 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1832 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1833 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1834 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1835 0x61, 0x8e, 0xe0,
1836 };
1837 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,
1838 3, 3, 3, 3, 3, 3,
1839 3, 3, 3, 3, 3, 3,
1840 5, 5, 5, 5, 5, 5,
1841 6, 7, 7,
1842 };
1843
1844 /*EUC_TW*/
1845 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1846 static const uint8_t to_euc_tw[]={
1847 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1848 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1849 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1850 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1851 0x61, 0xe6, 0xca, 0x8a,
1852 };
1853 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,
1854 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5,
1855 6, 7, 7, 8,
1856 };
1857 /*ISO-2022-JP*/
b75a7d8f
A
1858 static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ;
1859 static const uint8_t to_iso_2022_jp1[]={
1860 0x1b, 0x24, 0x42, 0x21, 0x21,
1861 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
1862 0x1b, 0x24, 0x42, 0x21, 0x22,
1863 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
1864 0x42,
1865 };
1866
1867 static const int32_t from_iso_2022_jpOffs1 [] ={
1868 0,0,0,0,0,
1869 1,1,1,1,1,1,1,1,1,
1870 2,2,2,2,2,
1871 3,3,3,3,3,3,3,3,3,
1872 4,
1873 };
1874 /* surrogate pair*/
1875 static const UChar iso_2022_jp_inputText2[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ;
1876 static const uint8_t to_iso_2022_jp2[]={
1877 0x1b, 0x24, 0x42, 0x21, 0x21,
1878 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1879 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1880 0x1b, 0x24, 0x42, 0x21, 0x22,
1881 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1882 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1883 0x42,
1884 };
1885 static const int32_t from_iso_2022_jpOffs2 [] ={
1886 0,0,0,0,0,
1887 1,1,1,1,1,1,1,1,1,
1888 1,1,1,1,1,1,
1889 3,3,3,3,3,
1890 4,4,4,4,4,4,4,4,4,
1891 4,4,4,4,4,4,
1892 6,
1893 };
1894
1895 /*ISO-2022-cn*/
1896 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1897 static const uint8_t to_iso_2022_cn[]={
374ca955
A
1898 0x41,
1899 0x25, 0x55, 0x33, 0x37, 0x31, 0x32,
b75a7d8f
A
1900 0x42,
1901 };
1902 static const int32_t from_iso_2022_cnOffs [] ={
374ca955
A
1903 0,
1904 1,1,1,1,1,1,
b75a7d8f
A
1905 2,
1906 };
b75a7d8f
A
1907
1908 static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042};
1909
1910 static const uint8_t to_iso_2022_cn4[]={
1911 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
1912 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1913 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
374ca955 1914 0x0e, 0x21, 0x22,
b75a7d8f
A
1915 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1916 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1917 0x42,
1918 };
1919 static const int32_t from_iso_2022_cnOffs4 [] ={
1920 0,0,0,0,0,0,0,
1921 1,1,1,1,1,1,1,
1922 1,1,1,1,1,1,
374ca955 1923 3,3,3,
b75a7d8f
A
1924 4,4,4,4,4,4,4,
1925 4,4,4,4,4,4,
1926 6
1927
1928 };
1929
1930 /*ISO-2022-kr*/
1931 static const UChar iso_2022_kr_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
1932 static const uint8_t to_iso_2022_kr2[]={
1933 0x1b, 0x24, 0x29, 0x43,
1934 0x41,
1935 0x0e, 0x25, 0x50,
1936 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1937 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1938 0x0e, 0x25, 0x50,
1939 0x0f, 0x42,
1940 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1941 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1942 0x43
1943 };
1944 static const int32_t from_iso_2022_krOffs2 [] ={
1945 -1,-1,-1,-1,
1946 0,
1947 1,1,1,
1948 2,2,2,2,2,2,2,
1949 2,2,2,2,2,2,
1950 4,4,4,
1951 5,5,
1952 6,6,6,6,6,6,
1953 6,6,6,6,6,6,
1954 8,
1955 };
1956
1957 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 };
1958 static const uint8_t to_iso_2022_kr[]={
1959 0x1b, 0x24, 0x29, 0x43,
1960 0x41,
1961 0x0e, 0x25, 0x50,
1962 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1963 0x0e, 0x25, 0x50,
1964 0x0f, 0x42,
1965 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1966 0x43
1967 };
1968
1969
1970 static const int32_t from_iso_2022_krOffs [] ={
1971 -1,-1,-1,-1,
1972 0,
1973 1,1,1,
1974 2,2,2,2,2,2,2,
1975 3,3,3,
1976 4,4,
1977 5,5,5,5,5,5,
1978 6,
1979 };
1980 /* HZ encoding */
1981 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1982
1983 static const uint8_t to_hz[]={
1984 0x7e, 0x7d, 0x41,
1985 0x7e, 0x7b, 0x26, 0x30,
1986 0x7e, 0x7d, 0x25, 0x55, 0x30, 0x36, 0x36, 0x32, /*unassigned*/
1987 0x7e, 0x7b, 0x26, 0x30,
1988 0x7e, 0x7d, 0x42,
1989
1990 };
1991 static const int32_t from_hzOffs [] ={
1992 0,0,0,
1993 1,1,1,1,
1994 2,2,2,2,2,2,2,2,
1995 3,3,3,3,
1996 4,4,4
1997 };
1998
1999 static const UChar hz_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
2000 static const uint8_t to_hz2[]={
2001 0x7e, 0x7d, 0x41,
2002 0x7e, 0x7b, 0x26, 0x30,
2003 0x7e, 0x7d, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
2004 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2005 0x7e, 0x7b, 0x26, 0x30,
2006 0x7e, 0x7d, 0x42,
2007 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
2008 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2009 0x43
2010 };
2011 static const int32_t from_hzOffs2 [] ={
2012 0,0,0,
2013 1,1,1,1,
2014 2,2,2,2,2,2,2,2,
2015 2,2,2,2,2,2,
2016 4,4,4,4,
2017 5,5,5,
2018 6,6,6,6,6,6,
2019 6,6,6,6,6,6,
2020 8,
2021 };
2022
2023 /*ISCII*/
b75a7d8f
A
2024 static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 };
2025 static const uint8_t to_iscii[]={
2026 0x41,
2027 0xef, 0x42, 0xa1,
2028 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
2029 0xa2,
2030 0x42,
2031 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
2032 0x43
2033 };
2034
2035
2036 static const int32_t from_isciiOffs [] ={
2037 0,
2038 1,1,1,
2039 2,2,2,2,2,2,
2040 3,
2041 4,
2042 5,5,5,5,5,5,
2043 6,
2044 };
2045
2046 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
2047 toIBM943, sizeof(toIBM943), "ibm-943",
2048 UCNV_FROM_U_CALLBACK_ESCAPE, offset, NULL, 0 ))
2049 log_err("u-> ibm-943 with subst with value did not match.\n");
2050
2051 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
2052 to_euc_jp, sizeof(to_euc_jp), "euc-jp",
2053 UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 ))
2054 log_err("u-> euc-jp with subst with value did not match.\n");
2055
2056 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
2057 to_euc_tw, sizeof(to_euc_tw), "euc-tw",
2058 UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 ))
2059 log_err("u-> euc-tw with subst with value did not match.\n");
2060
b75a7d8f
A
2061 if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]),
2062 to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp",
2063 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2064 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2065
2066 if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]),
2067 to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp",
2068 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2069 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2070
2071 if(!testConvertFromUnicode(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]),
2072 to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp",
2073 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs2, NULL, 0 ))
2074 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2075 /*ESCAPE OPTIONS*/
2076 {
2077 /* surrogate pair*/
2078 static const UChar iso_2022_jp_inputText3[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ;
2079 static const uint8_t to_iso_2022_jp3_v2[]={
2080 0x1b, 0x24, 0x42, 0x21, 0x21,
2081 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b,
2082
2083 0x1b, 0x24, 0x42, 0x21, 0x22,
2084 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b,
2085
2086 0x42,
2087 0x26, 0x23, 0x33, 0x36, 0x38, 0x39, 0x32, 0x3b,
2088 };
2089
2090 static const int32_t from_iso_2022_jpOffs3_v2 [] ={
2091 0,0,0,0,0,
2092 1,1,1,1,1,1,1,1,1,1,1,1,
2093
2094 3,3,3,3,3,
2095 4,4,4,4,4,4,4,4,4,4,4,4,
2096
2097 6,
2098 7,7,7,7,7,7,7,7,7
2099 };
2100
2101 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3, sizeof(iso_2022_jp_inputText3)/sizeof(iso_2022_jp_inputText3[0]),
2102 to_iso_2022_jp3_v2, sizeof(to_iso_2022_jp3_v2), "iso-2022-jp",
2103 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2104 log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n");
2105 }
b75a7d8f
A
2106 {
2107 static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2108 static const uint8_t to_iso_2022_cn5_v2[]={
2109 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2110 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44,
2111 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36,
374ca955 2112 0x0e, 0x21, 0x22,
b75a7d8f
A
2113 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44,
2114 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36,
2115 0x42,
374ca955 2116 0x5c, 0x75, 0x30, 0x39, 0x30, 0x32,
b75a7d8f
A
2117 };
2118 static const int32_t from_iso_2022_cnOffs5_v2 [] ={
2119 0,0,0,0,0,0,0,
2120 1,1,1,1,1,1,1,
2121 1,1,1,1,1,1,
374ca955 2122 3,3,3,
b75a7d8f
A
2123 4,4,4,4,4,4,4,
2124 4,4,4,4,4,4,
2125 6,
374ca955 2126 7,7,7,7,7,7
b75a7d8f
A
2127 };
2128 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, sizeof(iso_2022_cn_inputText5)/sizeof(iso_2022_cn_inputText5[0]),
2129 to_iso_2022_cn5_v2, sizeof(to_iso_2022_cn5_v2), "iso-2022-cn",
2130 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs5_v2, NULL, 0,UCNV_ESCAPE_JAVA,U_ZERO_ERROR ))
2131 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n");
2132
2133 }
2134 {
2135 static const UChar iso_2022_cn_inputText6[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2136 static const uint8_t to_iso_2022_cn6_v2[]={
2137 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2138 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d,
374ca955 2139 0x0e, 0x21, 0x22,
b75a7d8f
A
2140 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d,
2141 0x42,
374ca955 2142 0x7b, 0x55, 0x2b, 0x30, 0x39, 0x30, 0x32, 0x7d
b75a7d8f
A
2143 };
2144 static const int32_t from_iso_2022_cnOffs6_v2 [] ={
2145 0, 0, 0, 0, 0, 0, 0,
2146 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
374ca955 2147 3, 3, 3,
b75a7d8f
A
2148 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2149 6,
374ca955 2150 7, 7, 7, 7, 7, 7, 7, 7,
b75a7d8f
A
2151 };
2152 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, sizeof(iso_2022_cn_inputText6)/sizeof(iso_2022_cn_inputText6[0]),
2153 to_iso_2022_cn6_v2, sizeof(to_iso_2022_cn6_v2), "iso-2022-cn",
2154 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs6_v2, NULL, 0,UCNV_ESCAPE_UNICODE,U_ZERO_ERROR ))
2155 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n");
2156
2157 }
2158 {
2159 static const UChar iso_2022_cn_inputText7[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2160 static const uint8_t to_iso_2022_cn7_v2[]={
2161 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2162 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
374ca955 2163 0x0e, 0x21, 0x22,
b75a7d8f 2164 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
374ca955 2165 0x42, 0x25, 0x55, 0x30, 0x39, 0x30, 0x32,
b75a7d8f
A
2166 };
2167 static const int32_t from_iso_2022_cnOffs7_v2 [] ={
2168 0, 0, 0, 0, 0, 0, 0,
2169 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
374ca955 2170 3, 3, 3,
b75a7d8f
A
2171 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2172 6,
374ca955 2173 7, 7, 7, 7, 7, 7,
b75a7d8f
A
2174 };
2175 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, sizeof(iso_2022_cn_inputText7)/sizeof(iso_2022_cn_inputText7[0]),
2176 to_iso_2022_cn7_v2, sizeof(to_iso_2022_cn7_v2), "iso-2022-cn",
2177 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs7_v2, NULL, 0,"K" ,U_ZERO_ERROR ))
2178 log_err("u-> iso-2022-cn with sub & K did not match.\n");
2179
46f4442e
A
2180 }
2181 {
2182 static const UChar iso_2022_cn_inputText8[]={
2183 0x3000,
2184 0xD84D, 0xDC56,
2185 0x3001,
2186 0xD84D, 0xDC56,
2187 0xDBFF, 0xDFFF,
2188 0x0042,
2189 0x0902};
2190 static const uint8_t to_iso_2022_cn8_v2[]={
2191 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2192 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20,
2193 0x0e, 0x21, 0x22,
2194 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20,
2195 0x5c, 0x31, 0x30, 0x46, 0x46, 0x46, 0x46, 0x20,
2196 0x42,
2197 0x5c, 0x39, 0x30, 0x32, 0x20
2198 };
2199 static const int32_t from_iso_2022_cnOffs8_v2 [] ={
2200 0, 0, 0, 0, 0, 0, 0,
2201 1, 1, 1, 1, 1, 1, 1, 1,
2202 3, 3, 3,
2203 4, 4, 4, 4, 4, 4, 4, 4,
2204 6, 6, 6, 6, 6, 6, 6, 6,
2205 8,
2206 9, 9, 9, 9, 9
2207 };
2208 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8, sizeof(iso_2022_cn_inputText8)/sizeof(iso_2022_cn_inputText8[0]),
2209 to_iso_2022_cn8_v2, sizeof(to_iso_2022_cn8_v2), "iso-2022-cn",
2210 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs8_v2, NULL, 0,UCNV_ESCAPE_CSS2,U_ZERO_ERROR ))
2211 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not match.\n");
2212
b75a7d8f
A
2213 }
2214 {
2215 static const uint8_t to_iso_2022_cn4_v3[]={
2216 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2217 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36,
374ca955 2218 0x0e, 0x21, 0x22,
b75a7d8f
A
2219 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36,
2220 0x42
2221 };
2222
2223
2224 static const int32_t from_iso_2022_cnOffs4_v3 [] ={
2225 0,0,0,0,0,0,0,
2226 1,1,1,1,1,1,1,1,1,1,1,
2227
374ca955 2228 3,3,3,
b75a7d8f
A
2229 4,4,4,4,4,4,4,4,4,4,4,
2230
2231 6
2232
2233 };
2234 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]),
2235 to_iso_2022_cn4_v3, sizeof(to_iso_2022_cn4_v3), "iso-2022-cn",
2236 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4_v3, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2237 {
2238 log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n");
2239 }
2240 }
2241 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]),
2242 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn",
2243 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 ))
2244 log_err("u-> iso_2022_cn with subst with value did not match.\n");
2245
b75a7d8f
A
2246 if(!testConvertFromUnicode(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]),
2247 to_iso_2022_cn4, sizeof(to_iso_2022_cn4), "iso-2022-cn",
2248 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 ))
2249 log_err("u-> iso_2022_cn with subst with value did not match.\n");
2250 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]),
2251 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr",
2252 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0 ))
2253 log_err("u-> iso_2022_kr with subst with value did not match.\n");
2254 if(!testConvertFromUnicode(iso_2022_kr_inputText2, sizeof(iso_2022_kr_inputText2)/sizeof(iso_2022_kr_inputText2[0]),
2255 to_iso_2022_kr2, sizeof(to_iso_2022_kr2), "iso-2022-kr",
2256 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs2, NULL, 0 ))
2257 log_err("u-> iso_2022_kr2 with subst with value did not match.\n");
2258 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]),
2259 to_hz, sizeof(to_hz), "HZ",
2260 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0 ))
2261 log_err("u-> hz with subst with value did not match.\n");
2262 if(!testConvertFromUnicode(hz_inputText2, sizeof(hz_inputText2)/sizeof(hz_inputText2[0]),
2263 to_hz2, sizeof(to_hz2), "HZ",
2264 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs2, NULL, 0 ))
2265 log_err("u-> hz with subst with value did not match.\n");
2266
2267 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]),
2268 to_iscii, sizeof(to_iscii), "ISCII,version=0",
2269 UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 ))
2270 log_err("u-> iscii with subst with value did not match.\n");
b75a7d8f 2271 }
73c04bcf 2272#endif
b75a7d8f
A
2273
2274 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
2275 /*to Unicode*/
2276 {
73c04bcf 2277#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
2278 static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf,
2279 0x81, 0xad, /*unassigned*/
2280 0x89, 0xd3 };
2281 static const UChar IBM_943toUnicode[] = { 0x0000, 0x6D63,
2282 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
2283 0x7B87};
2284 static const int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
2285
2286 /* EUC_JP*/
2287 static const uint8_t sampleTxt_EUC_JP[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
2288 0x8f, 0xda, 0xa1, /*unassigned*/
2289 0x8e, 0xe0,
2290 };
2291 static const UChar EUC_JPtoUnicode[]={ 0x0061, 0x4edd, 0x5bec,
2292 0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31,
2293 0x00a2 };
2294 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 3,
2295 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
2296 9,
2297 };
2298
2299 /*EUC_TW*/
2300 static const uint8_t sampleTxt_euc_tw[]={
2301 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
2302 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
2303 0xe6, 0xca, 0x8a,
2304 };
2305 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2,
2306 0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43,
2307 0x8706, 0x8a, };
2308 static const int32_t from_euc_twOffs [] ={ 0, 1, 3,
2309 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2310 11, 13};
2311
2312 /*iso-2022-jp*/
2313 static const uint8_t sampleTxt_iso_2022_jp[]={
2314 0x1b, 0x28, 0x42, 0x41,
2315 0x1b, 0x24, 0x42, 0x2A, 0x44, /*unassigned*/
2316 0x1b, 0x28, 0x42, 0x42,
2317
2318 };
2319 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x25,0x58,0x32,0x41,0x25,0x58,0x34,0x34, 0x42 };
2320 static const int32_t from_iso_2022_jpOffs [] ={ 3, 7, 7, 7, 7, 7, 7, 7, 7, 12 };
2321
2322 /*iso-2022-cn*/
2323 static const uint8_t sampleTxt_iso_2022_cn[]={
2324 0x0f, 0x41, 0x44,
2325 0x1B, 0x24, 0x29, 0x47,
2326 0x0E, 0x40, 0x6c, /*unassigned*/
2327 0x0f, 0x42,
2328
2329 };
2330 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 };
2331 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 8, 8, 8, 8, 8, 8, 8, 8, 11 };
2332
2333 /*iso-2022-kr*/
2334 static const uint8_t sampleTxt_iso_2022_kr[]={
2335 0x1b, 0x24, 0x29, 0x43,
2336 0x41,
2337 0x0E, 0x7f, 0x1E,
2338 0x0e, 0x25, 0x50,
2339 0x0f, 0x51,
2340 0x42, 0x43,
2341
2342 };
2343 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43};
2344 static const int32_t from_iso_2022_krOffs [] ={ 4, 6, 6, 6, 6, 6, 6, 6, 6, 9, 12, 13 , 14 };
2345
2346 /*hz*/
2347 static const uint8_t sampleTxt_hz[]={
2348 0x41,
2349 0x7e, 0x7b, 0x26, 0x30,
2350 0x7f, 0x1E, /*unassigned*/
2351 0x26, 0x30,
2352 0x7e, 0x7d, 0x42,
2353 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/
2354 0x7e, 0x7d, 0x42,
2355 };
2356 static const UChar hztoUnicode[]={
2357 0x41,
2358 0x03a0,
2359 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2360 0x03A0,
2361 0x42,
2362 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2363 0x42,};
2364
2365 static const int32_t from_hzOffs [] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18, };
2366
2367
2368 /*iscii*/
2369 static const uint8_t sampleTxt_iscii[]={
2370 0x41,
2371 0x30,
2372 0xEB, /*unassigned*/
2373 0xa3,
2374 0x42,
2375 0xEC, /*unassigned*/
2376 0x42,
2377 };
2378 static const UChar isciitoUnicode[]={
2379 0x41,
2380 0x30,
2381 0x25, 0x58, 0x45, 0x42,
2382 0x0903,
2383 0x42,
2384 0x25, 0x58, 0x45, 0x43,
2385 0x42,};
2386
2387 static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6 };
73c04bcf 2388#endif
b75a7d8f 2389
b75a7d8f
A
2390 /*UTF8*/
2391 static const uint8_t sampleTxtUTF8[]={
2392 0x20, 0x64, 0x50,
2393 0xC2, 0x7E, /* truncated char */
2394 0x20,
2395 0xE0, 0xB5, 0x7E, /* truncated char */
2396 0x40,
2397 };
2398 static const UChar UTF8ToUnicode[]={
2399 0x0020, 0x0064, 0x0050,
2400 0x0025, 0x0058, 0x0043, 0x0032, 0x007E, /* \xC2~ */
2401 0x0020,
2402 0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E,
2403 0x0040
2404 };
2405 static const int32_t fromUTF8[] = {
2406 0, 1, 2,
2407 3, 3, 3, 3, 4,
2408 5,
2409 6, 6, 6, 6, 6, 6, 6, 6, 8,
2410 9
2411 };
2412 static const UChar UTF8ToUnicodeXML_DEC[]={
2413 0x0020, 0x0064, 0x0050,
2414 0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E, /* &#194;~ */
2415 0x0020,
2416 0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E,
2417 0x0040
2418 };
2419 static const int32_t fromUTF8XML_DEC[] = {
2420 0, 1, 2,
2421 3, 3, 3, 3, 3, 3, 4,
2422 5,
2423 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8,
2424 9
2425 };
2426
73c04bcf
A
2427
2428#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
2429 if(!testConvertToUnicode(sampleTxtToU, sizeof(sampleTxtToU),
2430 IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnicode[0]),"ibm-943",
2431 UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 ))
2432 log_err("ibm-943->u with substitute with value did not match.\n");
2433
2434 if(!testConvertToUnicode(sampleTxt_EUC_JP, sizeof(sampleTxt_EUC_JP),
2435 EUC_JPtoUnicode, sizeof(EUC_JPtoUnicode)/sizeof(EUC_JPtoUnicode[0]),"euc-jp",
2436 UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0))
2437 log_err("euc-jp->u with substitute with value did not match.\n");
2438
2439 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
2440 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
2441 UCNV_TO_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0))
2442 log_err("euc-tw->u with substitute with value did not match.\n");
2443
2444 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2445 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2446 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0))
2447 log_err("iso-2022-jp->u with substitute with value did not match.\n");
2448
2449 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2450 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2451 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0,"K",U_ZERO_ERROR))
2452 log_err("iso-2022-jp->u with substitute with value did not match.\n");
2453
2454 {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */
2455 {
2456 static const UChar iso_2022_jptoUnicodeDec[]={
2457 0x0041,
2458 0x0026, 0x0023, 0x0034, 0x0032, 0x003b,
2459 0x0026, 0x0023, 0x0036, 0x0038, 0x003b,
2460 0x0042 };
2461 static const int32_t from_iso_2022_jpOffsDec [] ={ 3,7,7,7,7,7,7,7,7,7,7,12, };
2462 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2463 iso_2022_jptoUnicodeDec, sizeof(iso_2022_jptoUnicodeDec)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2464 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsDec, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2465 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n");
2466 }
2467 {
2468 static const UChar iso_2022_jptoUnicodeHex[]={
2469 0x0041,
2470 0x0026, 0x0023, 0x0078, 0x0032, 0x0041, 0x003b,
2471 0x0026, 0x0023, 0x0078, 0x0034, 0x0034, 0x003b,
2472 0x0042 };
2473 static const int32_t from_iso_2022_jpOffsHex [] ={ 3,7,7,7,7,7,7,7,7,7,7,7,7,12 };
2474 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2475 iso_2022_jptoUnicodeHex, sizeof(iso_2022_jptoUnicodeHex)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2476 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsHex, NULL, 0,UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR ))
2477 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n");
2478 }
2479 {
2480 static const UChar iso_2022_jptoUnicodeC[]={
2481 0x0041,
2482 0x005C, 0x0078, 0x0032, 0x0041,
2483 0x005C, 0x0078, 0x0034, 0x0034,
2484 0x0042 };
2485 int32_t from_iso_2022_jpOffsC [] ={ 3,7,7,7,7,7,7,7,7,12 };
2486 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2487 iso_2022_jptoUnicodeC, sizeof(iso_2022_jptoUnicodeC)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2488 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsC, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2489 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n");
2490 }
2491 }
2492 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn),
2493 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn",
2494 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0))
2495 log_err("iso-2022-cn->u with substitute with value did not match.\n");
2496
2497 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr),
2498 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr",
2499 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0))
2500 log_err("iso-2022-kr->u with substitute with value did not match.\n");
2501
2502 if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz),
2503 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ",
2504 UCNV_TO_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0))
2505 log_err("hz->u with substitute with value did not match.\n");
2506
2507 if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii),
2508 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0",
2509 UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0))
2510 log_err("ISCII ->u with substitute with value did not match.\n");
73c04bcf
A
2511#endif
2512
b75a7d8f
A
2513 if(!testConvertToUnicode(sampleTxtUTF8, sizeof(sampleTxtUTF8),
2514 UTF8ToUnicode, sizeof(UTF8ToUnicode)/sizeof(UTF8ToUnicode[0]),"UTF-8",
2515 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0))
2516 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2517 if(!testConvertToUnicodeWithContext(sampleTxtUTF8, sizeof(sampleTxtUTF8),
2518 UTF8ToUnicodeXML_DEC, sizeof(UTF8ToUnicodeXML_DEC)/sizeof(UTF8ToUnicodeXML_DEC[0]),"UTF-8",
2519 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8XML_DEC, NULL, 0, UCNV_ESCAPE_XML_DEC, U_ZERO_ERROR))
2520 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2521 }
2522}
2523
73c04bcf 2524#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f
A
2525static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize)
2526{
2527 static const UChar legalText[] = { 0x0000, 0xAC00, 0xAC01, 0xD700 };
2528 static const uint8_t templegal949[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
2529 static const int32_t to949legal[] = {0, 1, 1, 2, 2, 3, 3};
2530
2531
2532 static const uint8_t text943[] = {
fd0068a8
A
2533 0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a };
2534 static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22, 0x5b57 };
2535 static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22, 0x5b57 };
b75a7d8f
A
2536 static const UChar toUnicode943stop[]= { 0x304b};
2537
fd0068a8
A
2538 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 7 };
2539 static const int32_t fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 };
b75a7d8f
A
2540 static const int32_t fromIBM943Offsstop[] = { 0};
2541
2542 gInBufferSize = inputsize;
2543 gOutBufferSize = outputsize;
2544 /*checking with a legal value*/
2545 if(!testConvertFromUnicode(legalText, sizeof(legalText)/sizeof(legalText[0]),
2546 templegal949, sizeof(templegal949), "ibm-949",
2547 UCNV_FROM_U_CALLBACK_SKIP, to949legal, NULL, 0 ))
2548 log_err("u-> ibm-949 with skip did not match.\n");
2549
2550 /*checking illegal value for ibm-943 with substitute*/
2551 if(!testConvertToUnicode(text943, sizeof(text943),
2552 toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943",
2553 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2554 log_err("ibm-943->u with subst did not match.\n");
2555 /*checking illegal value for ibm-943 with skip */
2556 if(!testConvertToUnicode(text943, sizeof(text943),
2557 toUnicode943skip, sizeof(toUnicode943skip)/sizeof(toUnicode943skip[0]),"ibm-943",
2558 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offsskip, NULL, 0 ))
2559 log_err("ibm-943->u with skip did not match.\n");
2560
2561 /*checking illegal value for ibm-943 with stop */
2562 if(!testConvertToUnicode(text943, sizeof(text943),
2563 toUnicode943stop, sizeof(toUnicode943stop)/sizeof(toUnicode943stop[0]),"ibm-943",
2564 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offsstop, NULL, 0 ))
2565 log_err("ibm-943->u with stop did not match.\n");
2566
2567}
2568
2569static void TestSingleByte(int32_t inputsize, int32_t outputsize)
2570{
2571 static const uint8_t sampleText[] = {
2572 0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82,
fd0068a8
A
2573 0xff, 0x32, 0x33};
2574 static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 };
2575 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 };
b75a7d8f
A
2576 /*checking illegal value for ibm-943 with substitute*/
2577 gInBufferSize = inputsize;
2578 gOutBufferSize = outputsize;
2579
2580 if(!testConvertToUnicode(sampleText, sizeof(sampleText),
2581 toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943",
2582 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2583 log_err("ibm-943->u with subst did not match.\n");
2584}
2585
2586static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize)
2587{
2588 /*EBCDIC_STATEFUL*/
2589 static const UChar ebcdic_inputTest[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 };
2590 static const uint8_t toIBM930[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 };
2591 static const int32_t offset_930[]= { 0, 1, 1, 1, 2, 2, 3, 4, 4, 4, 5, 5 };
2592/* s SO doubl SI sng s SO fe fe SI s */
2593
2594 /*EBCDIC_STATEFUL with subChar=3f*/
2595 static const uint8_t toIBM930_subvaried[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 };
2596 static const int32_t offset_930_subvaried[]= { 0, 1, 1, 1, 2, 2, 3, 4, 5 };
2597 static const char mySubChar[]={ 0x3f};
2598
2599 gInBufferSize = inputsize;
2600 gOutBufferSize = outputsize;
2601
2602 if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]),
2603 toIBM930, sizeof(toIBM930), "ibm-930",
2604 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930, NULL, 0 ))
2605 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n");
2606
2607 if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]),
2608 toIBM930_subvaried, sizeof(toIBM930_subvaried), "ibm-930",
2609 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 ))
2610 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n");
2611}
73c04bcf 2612#endif
b75a7d8f
A
2613
2614UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
2615 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
2616 const char *mySubChar, int8_t len)
2617{
2618
2619
2620 UErrorCode status = U_ZERO_ERROR;
2621 UConverter *conv = 0;
73c04bcf 2622 char junkout[NEW_MAX_BUFFER]; /* FIX */
b75a7d8f
A
2623 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2624 const UChar *src;
73c04bcf
A
2625 char *end;
2626 char *targ;
b75a7d8f
A
2627 int32_t *offs;
2628 int i;
2629 int32_t realBufferSize;
73c04bcf 2630 char *realBufferEnd;
b75a7d8f
A
2631 const UChar *realSourceEnd;
2632 const UChar *sourceLimit;
2633 UBool checkOffsets = TRUE;
2634 UBool doFlush;
2635 char junk[9999];
2636 char offset_str[9999];
73c04bcf 2637 char *p;
b75a7d8f
A
2638 UConverterFromUCallback oldAction = NULL;
2639 const void* oldContext = NULL;
2640
2641
2642 for(i=0;i<NEW_MAX_BUFFER;i++)
73c04bcf 2643 junkout[i] = (char)0xF0;
b75a7d8f
A
2644 for(i=0;i<NEW_MAX_BUFFER;i++)
2645 junokout[i] = 0xFF;
2646 setNuConvTestName(codepage, "FROM");
2647
2648 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize,
2649 gOutBufferSize);
2650
2651 conv = ucnv_open(codepage, &status);
2652 if(U_FAILURE(status))
2653 {
2654 log_data_err("Couldn't open converter %s\n",codepage);
2655 return TRUE;
2656 }
2657
2658 log_verbose("Converter opened..\n");
2659
2660 /*----setting the callback routine----*/
2661 ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2662 if (U_FAILURE(status))
2663 {
2664 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2665 }
2666 /*------------------------*/
2667 /*setting the subChar*/
2668 if(mySubChar != NULL){
2669 ucnv_setSubstChars(conv, mySubChar, len, &status);
2670 if (U_FAILURE(status)) {
2671 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2672 }
2673 }
2674 /*------------*/
2675
2676 src = source;
2677 targ = junkout;
2678 offs = junokout;
2679
2680 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
2681 realBufferEnd = junkout + realBufferSize;
2682 realSourceEnd = source + sourceLen;
2683
2684 if ( gOutBufferSize != realBufferSize )
2685 checkOffsets = FALSE;
2686
2687 if( gInBufferSize != NEW_MAX_BUFFER )
2688 checkOffsets = FALSE;
2689
2690 do
2691 {
2692 end = nct_min(targ + gOutBufferSize, realBufferEnd);
2693 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
2694
2695 doFlush = (UBool)(sourceLimit == realSourceEnd);
2696
2697 if(targ == realBufferEnd)
2698 {
2699 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
2700 return FALSE;
2701 }
2702 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
2703
2704
2705 status = U_ZERO_ERROR;
2706
2707 ucnv_fromUnicode (conv,
2708 (char **)&targ,
2709 (const char *)end,
2710 &src,
2711 sourceLimit,
2712 checkOffsets ? offs : NULL,
2713 doFlush, /* flush if we're at the end of the input data */
2714 &status);
2715 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
2716
2717
2718 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2719 UChar errChars[50]; /* should be sufficient */
2720 int8_t errLen = 50;
2721 UErrorCode err = U_ZERO_ERROR;
2722 const UChar* limit= NULL;
2723 const UChar* start= NULL;
2724 ucnv_getInvalidUChars(conv,errChars, &errLen, &err);
2725 if(U_FAILURE(err)){
2726 log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err));
2727 }
2728 /* src points to limit of invalid chars */
2729 limit = src;
2730 /* length of in invalid chars should be equal to returned length*/
2731 start = src - errLen;
2732 if(u_strncmp(errChars,start,errLen)!=0){
2733 log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2734 }
2735 }
2736 /* allow failure codes for the stop callback */
2737 if(U_FAILURE(status) &&
2738 (callback != UCNV_FROM_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND)))
2739 {
2740 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2741 return FALSE;
2742 }
2743
2744 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
2745 sourceLen, targ-junkout);
2746 if(VERBOSITY)
2747 {
2748
2749 junk[0] = 0;
2750 offset_str[0] = 0;
2751 for(p = junkout;p<targ;p++)
2752 {
2753 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
2754 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
2755 }
2756
2757 log_verbose(junk);
2758 printSeq(expect, expectLen);
2759 if ( checkOffsets )
2760 {
2761 log_verbose("\nOffsets:");
2762 log_verbose(offset_str);
2763 }
2764 log_verbose("\n");
2765 }
2766 ucnv_close(conv);
2767
2768
2769 if(expectLen != targ-junkout)
2770 {
2771 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2772 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
73c04bcf 2773 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
b75a7d8f
A
2774 printSeqErr(expect, expectLen);
2775 return FALSE;
2776 }
2777
2778 if (checkOffsets && (expectOffsets != 0) )
2779 {
2780 log_verbose("comparing %d offsets..\n", targ-junkout);
2781 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
2782 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2783 log_err("Got Output : ");
73c04bcf 2784 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
b75a7d8f
A
2785 log_err("Got Offsets: ");
2786 for(p=junkout;p<targ;p++)
2787 log_err("%d,", junokout[p-junkout]);
2788 log_err("\n");
2789 log_err("Expected Offsets: ");
2790 for(i=0; i<(targ-junkout); i++)
2791 log_err("%d,", expectOffsets[i]);
2792 log_err("\n");
2793 return FALSE;
2794 }
2795 }
2796
2797 if(!memcmp(junkout, expect, expectLen))
2798 {
2799 log_verbose("String matches! %s\n", gNuConvTestName);
2800 return TRUE;
2801 }
2802 else
2803 {
2804 log_err("String does not match. %s\n", gNuConvTestName);
2805 log_err("source: ");
2806 printUSeqErr(source, sourceLen);
2807 log_err("Got: ");
73c04bcf 2808 printSeqErr((const uint8_t *)junkout, expectLen);
b75a7d8f
A
2809 log_err("Expected: ");
2810 printSeqErr(expect, expectLen);
2811 return FALSE;
2812 }
2813}
2814
2815UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
2816 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
2817 const char *mySubChar, int8_t len)
2818{
2819 UErrorCode status = U_ZERO_ERROR;
2820 UConverter *conv = 0;
2821 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
2822 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
73c04bcf
A
2823 const char *src;
2824 const char *realSourceEnd;
2825 const char *srcLimit;
b75a7d8f
A
2826 UChar *targ;
2827 UChar *end;
2828 int32_t *offs;
2829 int i;
2830 UBool checkOffsets = TRUE;
2831 char junk[9999];
2832 char offset_str[9999];
2833 UChar *p;
2834 UConverterToUCallback oldAction = NULL;
2835 const void* oldContext = NULL;
2836
2837 int32_t realBufferSize;
2838 UChar *realBufferEnd;
2839
2840
2841 for(i=0;i<NEW_MAX_BUFFER;i++)
2842 junkout[i] = 0xFFFE;
2843
2844 for(i=0;i<NEW_MAX_BUFFER;i++)
2845 junokout[i] = -1;
2846
2847 setNuConvTestName(codepage, "TO");
2848
2849 log_verbose("\n========= %s\n", gNuConvTestName);
2850
2851 conv = ucnv_open(codepage, &status);
2852 if(U_FAILURE(status))
2853 {
2854 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
2855 return TRUE;
2856 }
2857
2858 log_verbose("Converter opened..\n");
2859
73c04bcf 2860 src = (const char *)source;
b75a7d8f
A
2861 targ = junkout;
2862 offs = junokout;
2863
2864 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
2865 realBufferEnd = junkout + realBufferSize;
2866 realSourceEnd = src + sourcelen;
2867 /*----setting the callback routine----*/
2868 ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2869 if (U_FAILURE(status))
2870 {
2871 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2872 }
2873 /*-------------------------------------*/
2874 /*setting the subChar*/
2875 if(mySubChar != NULL){
2876 ucnv_setSubstChars(conv, mySubChar, len, &status);
2877 if (U_FAILURE(status)) {
2878 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2879 }
2880 }
2881 /*------------*/
2882
2883
2884 if ( gOutBufferSize != realBufferSize )
2885 checkOffsets = FALSE;
2886
2887 if( gInBufferSize != NEW_MAX_BUFFER )
2888 checkOffsets = FALSE;
2889
2890 do
2891 {
2892 end = nct_min( targ + gOutBufferSize, realBufferEnd);
2893 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
2894
2895 if(targ == realBufferEnd)
2896 {
2897 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
2898 return FALSE;
2899 }
2900 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
2901
2902
2903
2904 status = U_ZERO_ERROR;
2905
2906 ucnv_toUnicode (conv,
2907 &targ,
2908 end,
2909 (const char **)&src,
2910 (const char *)srcLimit,
2911 checkOffsets ? offs : NULL,
2912 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
2913 &status);
2914 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
2915
2916 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2917 char errChars[50]; /* should be sufficient */
2918 int8_t errLen = 50;
2919 UErrorCode err = U_ZERO_ERROR;
73c04bcf
A
2920 const char* limit= NULL;
2921 const char* start= NULL;
b75a7d8f
A
2922 ucnv_getInvalidChars(conv,errChars, &errLen, &err);
2923 if(U_FAILURE(err)){
2924 log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err));
2925 }
2926 /* src points to limit of invalid chars */
2927 limit = src;
2928 /* length of in invalid chars should be equal to returned length*/
2929 start = src - errLen;
73c04bcf 2930 if(uprv_strncmp(errChars,start,errLen)!=0){
b75a7d8f
A
2931 log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2932 }
2933 }
2934 /* allow failure codes for the stop callback */
2935 if(U_FAILURE(status) &&
2936 (callback != UCNV_TO_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND && status != U_TRUNCATED_CHAR_FOUND)))
2937 {
2938 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2939 return FALSE;
2940 }
2941
2942 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
2943 sourcelen, targ-junkout);
2944 if(VERBOSITY)
2945 {
2946
2947 junk[0] = 0;
2948 offset_str[0] = 0;
2949
2950 for(p = junkout;p<targ;p++)
2951 {
2952 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
2953 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
2954 }
2955
2956 log_verbose(junk);
2957 printUSeq(expect, expectlen);
2958 if ( checkOffsets )
2959 {
2960 log_verbose("\nOffsets:");
2961 log_verbose(offset_str);
2962 }
2963 log_verbose("\n");
2964 }
2965 ucnv_close(conv);
2966
2967 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
2968
2969 if (checkOffsets && (expectOffsets != 0))
2970 {
2971 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
2972 {
2973 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2974 log_err("Got offsets: ");
2975 for(p=junkout;p<targ;p++)
2976 log_err(" %2d,", junokout[p-junkout]);
2977 log_err("\n");
2978 log_err("Expected offsets: ");
2979 for(i=0; i<(targ-junkout); i++)
2980 log_err(" %2d,", expectOffsets[i]);
2981 log_err("\n");
2982 log_err("Got output: ");
2983 for(i=0; i<(targ-junkout); i++)
2984 log_err("0x%04x,", junkout[i]);
2985 log_err("\n");
2986 log_err("From source: ");
73c04bcf 2987 for(i=0; i<(src-(const char *)source); i++)
b75a7d8f
A
2988 log_err(" 0x%02x,", (unsigned char)source[i]);
2989 log_err("\n");
2990 }
2991 }
2992
2993 if(!memcmp(junkout, expect, expectlen*2))
2994 {
2995 log_verbose("Matches!\n");
2996 return TRUE;
2997 }
2998 else
2999 {
3000 log_err("String does not match. %s\n", gNuConvTestName);
3001 log_verbose("String does not match. %s\n", gNuConvTestName);
3002 log_err("Got: ");
3003 printUSeqErr(junkout, expectlen);
3004 log_err("Expected: ");
3005 printUSeqErr(expect, expectlen);
3006 log_err("\n");
3007 return FALSE;
3008 }
3009}
3010
3011UBool testConvertFromUnicodeWithContext(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
3012 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
3013 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3014{
3015
3016
3017 UErrorCode status = U_ZERO_ERROR;
3018 UConverter *conv = 0;
73c04bcf 3019 char junkout[NEW_MAX_BUFFER]; /* FIX */
b75a7d8f
A
3020 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
3021 const UChar *src;
73c04bcf
A
3022 char *end;
3023 char *targ;
b75a7d8f
A
3024 int32_t *offs;
3025 int i;
3026 int32_t realBufferSize;
73c04bcf 3027 char *realBufferEnd;
b75a7d8f
A
3028 const UChar *realSourceEnd;
3029 const UChar *sourceLimit;
3030 UBool checkOffsets = TRUE;
3031 UBool doFlush;
3032 char junk[9999];
3033 char offset_str[9999];
73c04bcf 3034 char *p;
b75a7d8f
A
3035 UConverterFromUCallback oldAction = NULL;
3036 const void* oldContext = NULL;
3037
3038
3039 for(i=0;i<NEW_MAX_BUFFER;i++)
73c04bcf 3040 junkout[i] = (char)0xF0;
b75a7d8f
A
3041 for(i=0;i<NEW_MAX_BUFFER;i++)
3042 junokout[i] = 0xFF;
3043 setNuConvTestName(codepage, "FROM");
3044
3045 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize,
3046 gOutBufferSize);
3047
3048 conv = ucnv_open(codepage, &status);
3049 if(U_FAILURE(status))
3050 {
3051 log_data_err("Couldn't open converter %s\n",codepage);
3052 return TRUE; /* Because the err has already been logged. */
3053 }
3054
3055 log_verbose("Converter opened..\n");
3056
3057 /*----setting the callback routine----*/
3058 ucnv_setFromUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3059 if (U_FAILURE(status))
3060 {
3061 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3062 }
3063 /*------------------------*/
3064 /*setting the subChar*/
3065 if(mySubChar != NULL){
3066 ucnv_setSubstChars(conv, mySubChar, len, &status);
3067 if (U_FAILURE(status)) {
3068 log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status));
3069 }
3070 }
3071 /*------------*/
3072
3073 src = source;
3074 targ = junkout;
3075 offs = junokout;
3076
3077 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
3078 realBufferEnd = junkout + realBufferSize;
3079 realSourceEnd = source + sourceLen;
3080
3081 if ( gOutBufferSize != realBufferSize )
3082 checkOffsets = FALSE;
3083
3084 if( gInBufferSize != NEW_MAX_BUFFER )
3085 checkOffsets = FALSE;
3086
3087 do
3088 {
3089 end = nct_min(targ + gOutBufferSize, realBufferEnd);
3090 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
3091
3092 doFlush = (UBool)(sourceLimit == realSourceEnd);
3093
3094 if(targ == realBufferEnd)
3095 {
3096 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
3097 return FALSE;
3098 }
3099 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
3100
3101
3102 status = U_ZERO_ERROR;
3103
3104 ucnv_fromUnicode (conv,
3105 (char **)&targ,
3106 (const char *)end,
3107 &src,
3108 sourceLimit,
3109 checkOffsets ? offs : NULL,
3110 doFlush, /* flush if we're at the end of the input data */
3111 &status);
3112 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
3113
3114 /* allow failure codes for the stop callback */
3115 if(U_FAILURE(status) && status != expectedError)
3116 {
3117 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3118 return FALSE;
3119 }
3120
3121 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
3122 sourceLen, targ-junkout);
3123 if(VERBOSITY)
3124 {
3125
3126 junk[0] = 0;
3127 offset_str[0] = 0;
3128 for(p = junkout;p<targ;p++)
3129 {
3130 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
3131 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
3132 }
3133
3134 log_verbose(junk);
3135 printSeq(expect, expectLen);
3136 if ( checkOffsets )
3137 {
3138 log_verbose("\nOffsets:");
3139 log_verbose(offset_str);
3140 }
3141 log_verbose("\n");
3142 }
3143 ucnv_close(conv);
3144
3145
3146 if(expectLen != targ-junkout)
3147 {
3148 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3149 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
73c04bcf 3150 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
b75a7d8f
A
3151 printSeqErr(expect, expectLen);
3152 return FALSE;
3153 }
3154
3155 if (checkOffsets && (expectOffsets != 0) )
3156 {
3157 log_verbose("comparing %d offsets..\n", targ-junkout);
3158 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
3159 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3160 log_err("Got Output : ");
73c04bcf 3161 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
b75a7d8f
A
3162 log_err("Got Offsets: ");
3163 for(p=junkout;p<targ;p++)
3164 log_err("%d,", junokout[p-junkout]);
3165 log_err("\n");
3166 log_err("Expected Offsets: ");
3167 for(i=0; i<(targ-junkout); i++)
3168 log_err("%d,", expectOffsets[i]);
3169 log_err("\n");
3170 return FALSE;
3171 }
3172 }
3173
3174 if(!memcmp(junkout, expect, expectLen))
3175 {
3176 log_verbose("String matches! %s\n", gNuConvTestName);
3177 return TRUE;
3178 }
3179 else
3180 {
3181 log_err("String does not match. %s\n", gNuConvTestName);
3182 log_err("source: ");
3183 printUSeqErr(source, sourceLen);
3184 log_err("Got: ");
73c04bcf 3185 printSeqErr((const uint8_t *)junkout, expectLen);
b75a7d8f
A
3186 log_err("Expected: ");
3187 printSeqErr(expect, expectLen);
3188 return FALSE;
3189 }
3190}
3191UBool testConvertToUnicodeWithContext( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
3192 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
3193 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3194{
3195 UErrorCode status = U_ZERO_ERROR;
3196 UConverter *conv = 0;
3197 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
3198 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
73c04bcf
A
3199 const char *src;
3200 const char *realSourceEnd;
3201 const char *srcLimit;
b75a7d8f
A
3202 UChar *targ;
3203 UChar *end;
3204 int32_t *offs;
3205 int i;
3206 UBool checkOffsets = TRUE;
3207 char junk[9999];
3208 char offset_str[9999];
3209 UChar *p;
3210 UConverterToUCallback oldAction = NULL;
3211 const void* oldContext = NULL;
3212
3213 int32_t realBufferSize;
3214 UChar *realBufferEnd;
3215
3216
3217 for(i=0;i<NEW_MAX_BUFFER;i++)
3218 junkout[i] = 0xFFFE;
3219
3220 for(i=0;i<NEW_MAX_BUFFER;i++)
3221 junokout[i] = -1;
3222
3223 setNuConvTestName(codepage, "TO");
3224
3225 log_verbose("\n========= %s\n", gNuConvTestName);
3226
3227 conv = ucnv_open(codepage, &status);
3228 if(U_FAILURE(status))
3229 {
3230 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
3231 return TRUE;
3232 }
3233
3234 log_verbose("Converter opened..\n");
3235
73c04bcf 3236 src = (const char *)source;
b75a7d8f
A
3237 targ = junkout;
3238 offs = junokout;
3239
3240 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
3241 realBufferEnd = junkout + realBufferSize;
3242 realSourceEnd = src + sourcelen;
3243 /*----setting the callback routine----*/
3244 ucnv_setToUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3245 if (U_FAILURE(status))
3246 {
3247 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3248 }
3249 /*-------------------------------------*/
3250 /*setting the subChar*/
3251 if(mySubChar != NULL){
3252 ucnv_setSubstChars(conv, mySubChar, len, &status);
3253 if (U_FAILURE(status)) {
3254 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3255 }
3256 }
3257 /*------------*/
3258
3259
3260 if ( gOutBufferSize != realBufferSize )
3261 checkOffsets = FALSE;
3262
3263 if( gInBufferSize != NEW_MAX_BUFFER )
3264 checkOffsets = FALSE;
3265
3266 do
3267 {
3268 end = nct_min( targ + gOutBufferSize, realBufferEnd);
3269 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
3270
3271 if(targ == realBufferEnd)
3272 {
3273 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
3274 return FALSE;
3275 }
3276 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
3277
3278
3279
3280 status = U_ZERO_ERROR;
3281
3282 ucnv_toUnicode (conv,
3283 &targ,
3284 end,
3285 (const char **)&src,
3286 (const char *)srcLimit,
3287 checkOffsets ? offs : NULL,
3288 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
3289 &status);
3290 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
3291
3292 /* allow failure codes for the stop callback */
3293 if(U_FAILURE(status) && status!=expectedError)
3294 {
3295 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3296 return FALSE;
3297 }
3298
3299 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
3300 sourcelen, targ-junkout);
3301 if(VERBOSITY)
3302 {
3303
3304 junk[0] = 0;
3305 offset_str[0] = 0;
3306
3307 for(p = junkout;p<targ;p++)
3308 {
3309 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
3310 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
3311 }
3312
3313 log_verbose(junk);
3314 printUSeq(expect, expectlen);
3315 if ( checkOffsets )
3316 {
3317 log_verbose("\nOffsets:");
3318 log_verbose(offset_str);
3319 }
3320 log_verbose("\n");
3321 }
3322 ucnv_close(conv);
3323
3324 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
3325
3326 if (checkOffsets && (expectOffsets != 0))
3327 {
3328 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
3329 {
3330 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3331 log_err("Got offsets: ");
3332 for(p=junkout;p<targ;p++)
3333 log_err(" %2d,", junokout[p-junkout]);
3334 log_err("\n");
3335 log_err("Expected offsets: ");
3336 for(i=0; i<(targ-junkout); i++)
3337 log_err(" %2d,", expectOffsets[i]);
3338 log_err("\n");
3339 log_err("Got output: ");
3340 for(i=0; i<(targ-junkout); i++)
3341 log_err("0x%04x,", junkout[i]);
3342 log_err("\n");
3343 log_err("From source: ");
73c04bcf 3344 for(i=0; i<(src-(const char *)source); i++)
b75a7d8f
A
3345 log_err(" 0x%02x,", (unsigned char)source[i]);
3346 log_err("\n");
3347 }
3348 }
3349
3350 if(!memcmp(junkout, expect, expectlen*2))
3351 {
3352 log_verbose("Matches!\n");
3353 return TRUE;
3354 }
3355 else
3356 {
3357 log_err("String does not match. %s\n", gNuConvTestName);
3358 log_verbose("String does not match. %s\n", gNuConvTestName);
3359 log_err("Got: ");
3360 printUSeqErr(junkout, expectlen);
3361 log_err("Expected: ");
3362 printUSeqErr(expect, expectlen);
3363 log_err("\n");
3364 return FALSE;
3365 }
3366}
73c04bcf
A
3367
3368static void TestCallBackFailure(void) {
3369 UErrorCode status = U_USELESS_COLLATOR_ERROR;
3370 ucnv_cbFromUWriteBytes(NULL, NULL, -1, -1, &status);
3371 if (status != U_USELESS_COLLATOR_ERROR) {
3372 log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad UErrorCode\n");
3373 }
3374 ucnv_cbFromUWriteUChars(NULL, NULL, NULL, -1, &status);
3375 if (status != U_USELESS_COLLATOR_ERROR) {
3376 log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad UErrorCode\n");
3377 }
3378 ucnv_cbFromUWriteSub(NULL, -1, &status);
3379 if (status != U_USELESS_COLLATOR_ERROR) {
3380 log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UErrorCode\n");
3381 }
3382 ucnv_cbToUWriteUChars(NULL, NULL, -1, -1, &status);
3383 if (status != U_USELESS_COLLATOR_ERROR) {
3384 log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad UErrorCode\n");
3385 }
3386}