]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/nccbtst.c
ICU-6.2.22.tar.gz
[apple/icu.git] / icuSources / test / cintltst / nccbtst.c
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2004, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /*
7 * File NCCBTST.C
8 *
9 * Modification History:
10 * Name Description
11 * Madhu Katragadda 7/21/1999 Testing error callback routines
12 **************************************************************************************
13 */
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include <ctype.h>
18 #include "cstring.h"
19 #include "unicode/uloc.h"
20 #include "unicode/ucnv.h"
21 #include "unicode/ucnv_err.h"
22 #include "cintltst.h"
23 #include "unicode/utypes.h"
24 #include "unicode/ustring.h"
25 #include "nccbtst.h"
26 #define NEW_MAX_BUFFER 999
27
28 #define nct_min(x,y) ((x<y) ? x : y)
29 #define ARRAY_LENGTH(array) (sizeof(array)/sizeof((array)[0]))
30
31 static int32_t gInBufferSize = 0;
32 static int32_t gOutBufferSize = 0;
33 static char gNuConvTestName[1024];
34
35 static void printSeq(const uint8_t* a, int len)
36 {
37 int i=0;
38 log_verbose("\n{");
39 while (i<len)
40 log_verbose("0x%02X, ", a[i++]);
41 log_verbose("}\n");
42 }
43
44 static void printUSeq(const UChar* a, int len)
45 {
46 int i=0;
47 log_verbose("{");
48 while (i<len)
49 log_verbose(" 0x%04x, ", a[i++]);
50 log_verbose("}\n");
51 }
52
53 static void printSeqErr(const uint8_t* a, int len)
54 {
55 int i=0;
56 fprintf(stderr, "{");
57 while (i<len)
58 fprintf(stderr, " 0x%02x, ", a[i++]);
59 fprintf(stderr, "}\n");
60 }
61
62 static void printUSeqErr(const UChar* a, int len)
63 {
64 int i=0;
65 fprintf(stderr, "{");
66 while (i<len)
67 fprintf(stderr, "0x%04x, ", a[i++]);
68 fprintf(stderr,"}\n");
69 }
70
71 static void setNuConvTestName(const char *codepage, const char *direction)
72 {
73 sprintf(gNuConvTestName, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
74 codepage,
75 direction,
76 (int)gInBufferSize,
77 (int)gOutBufferSize);
78 }
79
80
81 void addTestConvertErrorCallBack(TestNode** root);
82
83 void addTestConvertErrorCallBack(TestNode** root)
84 {
85 addTest(root, &TestSkipCallBack, "tsconv/nccbtst/TestSkipCallBack");
86 addTest(root, &TestStopCallBack, "tsconv/nccbtst/TestStopCallBack");
87 addTest(root, &TestSubCallBack, "tsconv/nccbtst/TestSubCallBack");
88 addTest(root, &TestSubWithValueCallBack, "tsconv/nccbtst/TestSubWithValueCallBack");
89 addTest(root, &TestLegalAndOtherCallBack, "tsconv/nccbtst/TestLegalAndOtherCallBack");
90 addTest(root, &TestSingleByteCallBack, "tsconv/nccbtst/TestSingleByteCallBack");
91 }
92
93 static void TestSkipCallBack()
94 {
95 TestSkip(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
96 TestSkip(1,NEW_MAX_BUFFER);
97 TestSkip(1,1);
98 TestSkip(NEW_MAX_BUFFER, 1);
99 }
100
101 static void TestStopCallBack()
102 {
103 TestStop(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
104 TestStop(1,NEW_MAX_BUFFER);
105 TestStop(1,1);
106 TestStop(NEW_MAX_BUFFER, 1);
107 }
108
109 static void TestSubCallBack()
110 {
111 TestSub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
112 TestSub(1,NEW_MAX_BUFFER);
113 TestSub(1,1);
114 TestSub(NEW_MAX_BUFFER, 1);
115 TestEBCDIC_STATEFUL_Sub(1, 1);
116 TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER);
117 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, 1);
118 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
119
120
121 }
122
123 static void TestSubWithValueCallBack()
124 {
125 TestSubWithValue(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
126 TestSubWithValue(1,NEW_MAX_BUFFER);
127 TestSubWithValue(1,1);
128 TestSubWithValue(NEW_MAX_BUFFER, 1);
129 }
130
131 static void TestLegalAndOtherCallBack()
132 {
133 TestLegalAndOthers(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
134 TestLegalAndOthers(1,NEW_MAX_BUFFER);
135 TestLegalAndOthers(1,1);
136 TestLegalAndOthers(NEW_MAX_BUFFER, 1);
137 }
138
139 static void TestSingleByteCallBack()
140 {
141 TestSingleByte(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
142 TestSingleByte(1,NEW_MAX_BUFFER);
143 TestSingleByte(1,1);
144 TestSingleByte(NEW_MAX_BUFFER, 1);
145 }
146
147 static void TestSkip(int32_t inputsize, int32_t outputsize)
148 {
149 static const uint8_t expskipIBM_949[]= {
150 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
151
152 static const uint8_t expskipIBM_943[] = {
153 0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 };
154
155 static const uint8_t expskipIBM_930[] = {
156 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f };
157
158 gInBufferSize = inputsize;
159 gOutBufferSize = outputsize;
160
161 /*From Unicode*/
162 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP \n");
163
164 {
165 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
166 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
167
168 static const int32_t toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 };
169 static const int32_t toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 };
170
171 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
172 expskipIBM_949, sizeof(expskipIBM_949), "ibm-949",
173 UCNV_FROM_U_CALLBACK_SKIP, toIBM949Offsskip, NULL, 0 ))
174 log_err("u-> ibm-949 with skip did not match.\n");
175 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
176 expskipIBM_943, sizeof(expskipIBM_943), "ibm-943",
177 UCNV_FROM_U_CALLBACK_SKIP, toIBM943Offsskip, NULL, 0 ))
178 log_err("u-> ibm-943 with skip did not match.\n");
179 }
180
181 {
182 static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 };
183 static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f };
184 static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 };
185
186 /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */
187 if(!testConvertFromUnicode(fromU, sizeof(fromU)/U_SIZEOF_UCHAR,
188 fromUBytes, sizeof(fromUBytes),
189 "ibm-930",
190 UCNV_FROM_U_CALLBACK_SKIP, fromUOffsets,
191 NULL, 0)
192 ) {
193 log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n");
194 }
195 }
196
197 {
198 static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
199 static const uint8_t usasciiFromUBytes[] = { 0x61, 0x31, 0x39 };
200 static const int32_t usasciiFromUOffsets[] = { 0, 3, 6 };
201
202 static const UChar latin1FromU[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
203 static const uint8_t latin1FromUBytes[] = { 0x61, 0xa0, 0x31, 0x39 };
204 static const int32_t latin1FromUOffsets[] = { 0, 1, 3, 6 };
205
206 /* US-ASCII */
207 if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR,
208 usasciiFromUBytes, sizeof(usasciiFromUBytes),
209 "US-ASCII",
210 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
211 NULL, 0)
212 ) {
213 log_err("u->US-ASCII with skip did not match.\n");
214 }
215
216 /* SBCS NLTC codepage 367 for US-ASCII */
217 if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR,
218 usasciiFromUBytes, sizeof(usasciiFromUBytes),
219 "ibm-367",
220 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
221 NULL, 0)
222 ) {
223 log_err("u->ibm-367 with skip did not match.\n");
224 }
225
226 /* ISO-Latin-1 */
227 if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR,
228 latin1FromUBytes, sizeof(latin1FromUBytes),
229 "LATIN_1",
230 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
231 NULL, 0)
232 ) {
233 log_err("u->LATIN_1 with skip did not match.\n");
234 }
235
236 /* windows-1252 */
237 if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR,
238 latin1FromUBytes, sizeof(latin1FromUBytes),
239 "windows-1252",
240 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
241 NULL, 0)
242 ) {
243 log_err("u->windows-1252 with skip did not match.\n");
244 }
245 }
246
247 {
248 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
249 static const uint8_t toIBM943[]= { 0x61, 0x61 };
250 static const int32_t offset[]= {0, 4};
251
252 /* EUC_JP*/
253 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
254 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
255 0x61, 0x8e, 0xe0,
256 };
257 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7};
258
259 /*EUC_TW*/
260 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
261 static const uint8_t to_euc_tw[]={
262 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
263 0x61, 0xe6, 0xca, 0x8a,
264 };
265 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,};
266
267 /*ISO-2022-JP*/
268 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9/*unassigned*/,0x0042, };
269 static const uint8_t to_iso_2022_jp[]={
270 0x41,
271 0x42,
272
273 };
274 static const int32_t from_iso_2022_jpOffs [] ={0,2};
275
276 /*ISO-2022-JP*/
277 UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
278 static const uint8_t to_iso_2022_jp2[]={
279 0x41,
280 0x43,
281
282 };
283 static const int32_t from_iso_2022_jpOffs2 [] ={0,2};
284
285 /*ISO-2022-cn*/
286 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
287 static const uint8_t to_iso_2022_cn[]={
288 0x41, 0x42
289 };
290 static const int32_t from_iso_2022_cnOffs [] ={
291 0, 2
292 };
293
294 /*ISO-2022-CN*/
295 static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
296 static const uint8_t to_iso_2022_cn1[]={
297 0x41, 0x43
298
299 };
300 static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 };
301
302 /*ISO-2022-kr*/
303 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
304 static const uint8_t to_iso_2022_kr[]={
305 0x1b, 0x24, 0x29, 0x43,
306 0x41,
307 0x0e, 0x25, 0x50,
308 0x25, 0x50,
309 0x0f, 0x42,
310 };
311 static const int32_t from_iso_2022_krOffs [] ={
312 -1,-1,-1,-1,
313 0,
314 1,1,1,
315 3,3,
316 4,4
317 };
318
319 /*ISO-2022-kr*/
320 static const UChar iso_2022_kr_inputText1[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
321 static const uint8_t to_iso_2022_kr1[]={
322 0x1b, 0x24, 0x29, 0x43,
323 0x41,
324 0x0e, 0x25, 0x50,
325 0x25, 0x50,
326
327 };
328 static const int32_t from_iso_2022_krOffs1 [] ={
329 -1,-1,-1,-1,
330 0,
331 1,1,1,
332 3,3,
333
334 };
335 /* HZ encoding */
336 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
337
338 static const uint8_t to_hz[]={
339 0x7e, 0x7d, 0x41,
340 0x7e, 0x7b, 0x26, 0x30,
341 0x26, 0x30,
342 0x7e, 0x7d, 0x42,
343
344 };
345 static const int32_t from_hzOffs [] ={
346 0,0,0,
347 1,1,1,1,
348 3,3,
349 4,4,4,4
350 };
351
352 static const UChar hz_inputText1[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
353
354 static const uint8_t to_hz1[]={
355 0x7e, 0x7d, 0x41,
356 0x7e, 0x7b, 0x26, 0x30,
357 0x26, 0x30,
358
359
360 };
361 static const int32_t from_hzOffs1 [] ={
362 0,0,0,
363 1,1,1,1,
364 3,3,
365
366 };
367
368
369 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
370
371 static const uint8_t to_SCSU[]={
372 0x41,
373 0x42
374
375
376 };
377 static const int32_t from_SCSUOffs [] ={
378 0,
379 2,
380
381 };
382 /* ISCII */
383 static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
384 static const uint8_t to_iscii[]={
385 0x41,
386 0x42,
387 };
388 static const int32_t from_isciiOffs [] ={
389 0,2,
390
391 };
392 /*ISCII*/
393 static const UChar iscii_inputText1[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
394 static const uint8_t to_iscii1[]={
395 0x44,
396 0x43,
397
398 };
399 static const int32_t from_isciiOffs1 [] ={0,2};
400
401 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
402 toIBM943, sizeof(toIBM943), "ibm-943",
403 UCNV_FROM_U_CALLBACK_SKIP, offset, NULL, 0 ))
404 log_err("u-> ibm-943 with skip did not match.\n");
405
406 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
407 to_euc_jp, sizeof(to_euc_jp), "euc-jp",
408 UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 ))
409 log_err("u-> euc-jp with skip did not match.\n");
410
411 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
412 to_euc_tw, sizeof(to_euc_tw), "euc-tw",
413 UCNV_FROM_U_CALLBACK_SKIP, from_euc_twOffs, NULL, 0 ))
414 log_err("u-> euc-tw with skip did not match.\n");
415
416 /*iso_2022_jp*/
417 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]),
418 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp",
419 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 ))
420 log_err("u-> iso-2022-jp with skip did not match.\n");
421
422 /* with context */
423 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]),
424 to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp",
425 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs2, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
426 log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
427
428 /*iso_2022_cn*/
429 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]),
430 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn",
431 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs, NULL, 0 ))
432 log_err("u-> iso-2022-cn with skip did not match.\n");
433 /*with context*/
434 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1, sizeof(iso_2022_cn_inputText1)/sizeof(iso_2022_cn_inputText1[0]),
435 to_iso_2022_cn1, sizeof(to_iso_2022_cn1), "iso-2022-cn",
436 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
437 log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
438
439 /*iso_2022_kr*/
440 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]),
441 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr",
442 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs, NULL, 0 ))
443 log_err("u-> iso-2022-kr with skip did not match.\n");
444 /*with context*/
445 if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1, sizeof(iso_2022_kr_inputText1)/sizeof(iso_2022_kr_inputText1[0]),
446 to_iso_2022_kr1, sizeof(to_iso_2022_kr1), "iso-2022-kr",
447 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
448 log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
449
450 /*hz*/
451 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]),
452 to_hz, sizeof(to_hz), "HZ",
453 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs, NULL, 0 ))
454 log_err("u-> HZ with skip did not match.\n");
455 /*with context*/
456 if(!testConvertFromUnicodeWithContext(hz_inputText1, sizeof(hz_inputText1)/sizeof(hz_inputText1[0]),
457 to_hz1, sizeof(to_hz1), "hz",
458 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
459 log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
460
461 /*SCSU*/
462 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
463 to_SCSU, sizeof(to_SCSU), "SCSU",
464 UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 ))
465 log_err("u-> SCSU with skip did not match.\n");
466
467 /*ISCII*/
468 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]),
469 to_iscii, sizeof(to_iscii), "ISCII,version=0",
470 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs, NULL, 0 ))
471 log_err("u-> iscii with skip did not match.\n");
472 /*with context*/
473 if(!testConvertFromUnicodeWithContext(iscii_inputText1, sizeof(iscii_inputText1)/sizeof(iscii_inputText1[0]),
474 to_iscii1, sizeof(to_iscii1), "ISCII,version=0",
475 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
476 log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
477
478 }
479
480 log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
481 {
482 static const uint8_t sampleText[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */
483 0xFB, 0xEE, 0x28, /* from source offset 0 */
484 0x24, 0x1E, 0x52,
485 0xB2,
486 0x20,
487 0xB3,
488 0xB1,
489 0x0D,
490 0x0A,
491
492 0x20, /* from 8 */
493 0x00,
494 0xD0, 0x6C,
495 0xB6,
496 0xD8, 0xA5,
497 0x20,
498 0x68,
499 0x59,
500
501 0xF9, 0x28, /* from 16 */
502 0x6D,
503 0x20,
504 0x73,
505 0xE0, 0x2D,
506 0xDE, 0x43,
507 0xD0, 0x33,
508 0x20,
509
510 0xFA, 0x83, /* from 24 */
511 0x25, 0x01,
512 0xFB, 0x16, 0x87,
513 0x4B, 0x16,
514 0x20,
515 0xE6, 0xBD,
516 0xEB, 0x5B,
517 0x4B, 0xCC,
518
519 0xF9, 0xA2, /* from 32 */
520 0xFC, 0x10, 0x3E,
521 0xFE, 0x16, 0x3A, 0x8C,
522 0x20,
523 0xFC, 0x03, 0xAC,
524
525 0x01, /* from 41 */
526 0xDE, 0x83,
527 0x20,
528 0x09
529 };
530 static const UChar expected[]={
531 0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */
532 0x0063, 0x0061, 0x000D, 0x000A,
533
534 0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */
535 0x0930, 0x0020, 0x0918, 0x0909,
536
537 0x3086, 0x304D, 0x0020, 0x3053, /* 16 */
538 0x4000, 0x4E00, 0x7777, 0x0020,
539
540 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */
541 0x0020, 0xD7A3, 0xDC00, 0xD800,
542
543 0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */
544 0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
545
546 0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */
547 0x0009
548 };
549 static const int32_t offsets[]={
550 0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7,
551 8, 9, 10, 10, 11, 12, 12, 13, 14, 15,
552 16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23,
553 24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31,
554 32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39,
555 41, 42, 42, 43, 44
556 };
557
558 /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */
559 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected),
560 sampleText, sizeof(sampleText),
561 "BOCU-1",
562 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
563 ) {
564 log_err("u->BOCU-1 with skip did not match.\n");
565 }
566 }
567
568 log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
569 {
570 const uint8_t sampleText[]={
571 0x61, /* 'a' */
572 0xc4, 0xb5, /* U+0135 */
573 0xed, 0x80, 0xa0, /* Hangul U+d020 */
574 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */
575 0xee, 0x80, 0x80, /* PUA U+e000 */
576 0xed, 0xb0, 0x81, /* unpaired trail surrogate U+dc01 */
577 0x62, /* 'b' */
578 0xed, 0xa0, 0x81, /* unpaired lead surrogate U+d801 */
579 0xd0, 0x80 /* U+0400 */
580 };
581 UChar expected[]={
582 0x0061,
583 0x0135,
584 0xd020,
585 0xd801, 0xdc01,
586 0xe000,
587 0xdc01,
588 0x0062,
589 0xd801,
590 0x0400
591 };
592 int32_t offsets[]={
593 0,
594 1, 1,
595 2, 2, 2,
596 3, 3, 3, 4, 4, 4,
597 5, 5, 5,
598 6, 6, 6,
599 7,
600 8, 8, 8,
601 9, 9
602 };
603
604 /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */
605
606 /* without offsets */
607 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected),
608 sampleText, sizeof(sampleText),
609 "CESU-8",
610 UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, 0)
611 ) {
612 log_err("u->CESU-8 with skip did not match.\n");
613 }
614
615 /* with offsets */
616 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected),
617 sampleText, sizeof(sampleText),
618 "CESU-8",
619 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
620 ) {
621 log_err("u->CESU-8 with skip did not match.\n");
622 }
623 }
624
625 /*to Unicode*/
626 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP \n");
627
628 {
629
630 static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD700 };
631 static const UChar IBM_943skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
632 static const UChar IBM_930skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
633
634 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5};
635 static const int32_t fromIBM943Offs [] = { 0, 2, 4};
636 static const int32_t fromIBM930Offs [] = { 1, 3, 5};
637
638 if(!testConvertToUnicode(expskipIBM_949, sizeof(expskipIBM_949),
639 IBM_949skiptoUnicode, sizeof(IBM_949skiptoUnicode)/sizeof(IBM_949skiptoUnicode),"ibm-949",
640 UCNV_TO_U_CALLBACK_SKIP, fromIBM949Offs, NULL, 0 ))
641 log_err("ibm-949->u with skip did not match.\n");
642 if(!testConvertToUnicode(expskipIBM_943, sizeof(expskipIBM_943),
643 IBM_943skiptoUnicode, sizeof(IBM_943skiptoUnicode)/sizeof(IBM_943skiptoUnicode[0]),"ibm-943",
644 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offs, NULL, 0 ))
645 log_err("ibm-943->u with skip did not match.\n");
646
647
648 if(!testConvertToUnicode(expskipIBM_930, sizeof(expskipIBM_930),
649 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930",
650 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0 ))
651 log_err("ibm-930->u with skip did not match.\n");
652
653
654 if(!testConvertToUnicodeWithContext(expskipIBM_930, sizeof(expskipIBM_930),
655 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930",
656 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
657 log_err("ibm-930->u with skip did not match.\n");
658 }
659
660 {
661 static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 };
662 static const UChar usasciiToU[] = { 0x61, 0x31 };
663 static const int32_t usasciiToUOffsets[] = { 0, 2 };
664
665 static const uint8_t latin1ToUBytes[] = { 0x61, 0xa0, 0x31 };
666 static const UChar latin1ToU[] = { 0x61, 0xa0, 0x31 };
667 static const int32_t latin1ToUOffsets[] = { 0, 1, 2 };
668
669 /* US-ASCII */
670 if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes),
671 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR,
672 "US-ASCII",
673 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
674 NULL, 0)
675 ) {
676 log_err("US-ASCII->u with skip did not match.\n");
677 }
678
679 /* SBCS NLTC codepage 367 for US-ASCII */
680 if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes),
681 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR,
682 "ibm-367",
683 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
684 NULL, 0)
685 ) {
686 log_err("ibm-367->u with skip did not match.\n");
687 }
688
689 /* ISO-Latin-1 */
690 if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes),
691 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR,
692 "LATIN_1",
693 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
694 NULL, 0)
695 ) {
696 log_err("LATIN_1->u with skip did not match.\n");
697 }
698
699 /* windows-1252 */
700 if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes),
701 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR,
702 "windows-1252",
703 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
704 NULL, 0)
705 ) {
706 log_err("windows-1252->u with skip did not match.\n");
707 }
708 }
709
710 {
711 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
712 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
713 };
714 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0x03b4
715 };
716 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 5};
717
718
719 /* euc-jp*/
720 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
721 0x8f, 0xda, 0xa1, /*unassigned*/
722 0x8e, 0xe0,
723 };
724 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2};
725 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3, 9};
726
727 /*EUC_TW*/
728 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
729 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
730 0xe6, 0xca, 0x8a,
731 };
732 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, };
733 static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 11, 13};
734 /*iso-2022-jp*/
735 static const uint8_t sampleTxt_iso_2022_jp[]={
736 0x41,
737 0x1b, 0x24, 0x42, 0x2A, 0x44, /*unassigned*/
738 0x1b, 0x28, 0x42, 0x42,
739
740 };
741 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x42 };
742 static const int32_t from_iso_2022_jpOffs [] ={ 0,9 };
743
744 /*iso-2022-cn*/
745 static const uint8_t sampleTxt_iso_2022_cn[]={
746 0x0f, 0x41, 0x44,
747 0x1B, 0x24, 0x29, 0x47,
748 0x0E, 0x40, 0x6f, /*unassigned*/
749 0x0f, 0x42,
750
751 };
752
753 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x42 };
754 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 11 };
755
756 /*iso-2022-kr*/
757 static const uint8_t sampleTxt_iso_2022_kr[]={
758 0x1b, 0x24, 0x29, 0x43,
759 0x41,
760 0x0E, 0x7f, 0x1E,
761 0x0e, 0x25, 0x50,
762 0x0f, 0x51,
763 0x42, 0x43,
764
765 };
766 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x03A0,0x51, 0x42,0x43};
767 static const int32_t from_iso_2022_krOffs [] ={ 4, 9, 12, 13 , 14 };
768
769 /*hz*/
770 static const uint8_t sampleTxt_hz[]={
771 0x41,
772 0x7e, 0x7b, 0x26, 0x30,
773 0x7f, 0x1E, /*unassigned*/
774 0x26, 0x30,
775 0x7e, 0x7d, 0x42,
776 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/
777 0x7e, 0x7d, 0x42,
778 };
779 static const UChar hztoUnicode[]={
780 0x41,
781 0x03a0,
782 0x03A0,
783 0x42,
784 0x42,};
785
786 static const int32_t from_hzOffs [] ={0,3,7,11,18, };
787
788 /*ISCII*/
789 static const uint8_t sampleTxt_iscii[]={
790 0x41,
791 0xa1,
792 0xEB, /*unassigned*/
793 0x26,
794 0x30,
795 0xa2,
796 0xEC, /*unassigned*/
797 0x42,
798 };
799 static const UChar isciitoUnicode[]={
800 0x41,
801 0x0901,
802 0x26,
803 0x30,
804 0x0902,
805 0x42,
806 };
807
808 static const int32_t from_isciiOffs [] ={0,1,3,4,5,7 };
809
810 /*LMBCS*/
811 static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50,
812 0x12, 0x92, 0xa0, /*unassigned*/
813 0x12, 0x92, 0xA1,
814 };
815 static const UChar LMBCSToUnicode[]={ 0x4e2e, 0xe5c4};
816 static const int32_t fromLMBCS[] = {0, 6};
817
818 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
819 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
820 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
821 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
822
823 if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
824 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
825 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
826 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
827
828 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
829 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp",
830 UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0))
831 log_err("euc-jp->u with skip did not match.\n");
832
833
834
835 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
836 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
837 UCNV_TO_U_CALLBACK_SKIP, from_euc_twOffs , NULL, 0))
838 log_err("euc-tw->u with skip did not match.\n");
839
840
841 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
842 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
843 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_jpOffs , NULL, 0))
844 log_err("iso-2022-jp->u with skip did not match.\n");
845
846 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn),
847 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn",
848 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_cnOffs , NULL, 0))
849 log_err("iso-2022-cn->u with skip did not match.\n");
850
851 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr),
852 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr",
853 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_krOffs , NULL, 0))
854 log_err("iso-2022-kr->u with skip did not match.\n");
855
856 if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz),
857 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ",
858 UCNV_TO_U_CALLBACK_SKIP, from_hzOffs , NULL, 0))
859 log_err("HZ->u with skip did not match.\n");
860
861 if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii),
862 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0",
863 UCNV_TO_U_CALLBACK_SKIP, from_isciiOffs , NULL, 0))
864 log_err("iscii->u with skip did not match.\n");
865
866 if(!testConvertToUnicode(sampleTxtLMBCS, sizeof(sampleTxtLMBCS),
867 LMBCSToUnicode, sizeof(LMBCSToUnicode)/sizeof(LMBCSToUnicode[0]),"LMBCS-1",
868 UCNV_TO_U_CALLBACK_SKIP, fromLMBCS , NULL, 0))
869 log_err("LMBCS->u with skip did not match.\n");
870
871 }
872 log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n");
873 {
874 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
875 0xe0, 0x80, 0x61,};
876 UChar expected1[] = { 0x0031, 0x4e8c, 0x0061};
877 int32_t offsets1[] = { 0x0000, 0x0001, 0x0006};
878
879 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
880 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8",
881 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
882 log_err("utf8->u with skip did not match.\n");;
883 }
884
885 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n");
886 {
887 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
888 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffe,0xfffe};
889 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5};
890
891 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
892 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU",
893 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
894 log_err("scsu->u with skip did not match.\n");
895 }
896
897 log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
898 {
899 const uint8_t sampleText[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */
900 0xFB, 0xEE, 0x28, /* single-code point sequence at offset 0 */
901 0x24, 0x1E, 0x52, /* 3 */
902 0xB2, /* 6 */
903 0x20, /* 7 */
904 0x40, 0x07, /* 8 - wrong trail byte */
905 0xB3, /* 10 */
906 0xB1, /* 11 */
907 0xD0, 0x20, /* 12 - wrong trail byte */
908 0x0D, /* 14 */
909 0x0A, /* 15 */
910 0x20, /* 16 */
911 0x00, /* 17 */
912 0xD0, 0x6C, /* 18 */
913 0xB6, /* 20 */
914 0xD8, 0xA5, /* 21 */
915 0x20, /* 23 */
916 0x68, /* 24 */
917 0x59, /* 25 */
918 0xF9, 0x28, /* 26 */
919 0x6D, /* 28 */
920 0x20, /* 29 */
921 0x73, /* 30 */
922 0xE0, 0x2D, /* 31 */
923 0xDE, 0x43, /* 33 */
924 0xD0, 0x33, /* 35 */
925 0x20, /* 37 */
926 0xFA, 0x83, /* 38 */
927 0x25, 0x01, /* 40 */
928 0xFB, 0x16, 0x87, /* 42 */
929 0x4B, 0x16, /* 45 */
930 0x20, /* 47 */
931 0xE6, 0xBD, /* 48 */
932 0xEB, 0x5B, /* 50 */
933 0x4B, 0xCC, /* 52 */
934 0xF9, 0xA2, /* 54 */
935 0xFC, 0x10, 0x3E, /* 56 */
936 0xFE, 0x16, 0x3A, 0x8C, /* 59 */
937 0x20, /* 63 */
938 0xFC, 0x03, 0xAC, /* 64 */
939 0xFF, /* 67 - FF just resets the state without encoding anything */
940 0x01, /* 68 */
941 0xDE, 0x83, /* 69 */
942 0x20, /* 71 */
943 0x09 /* 72 */
944 };
945 UChar expected[]={
946 0xFEFF, 0x0061, 0x0062, 0x0020,
947 0x0063, 0x0061, 0x000D, 0x000A,
948 0x0020, 0x0000, 0x00DF, 0x00E6,
949 0x0930, 0x0020, 0x0918, 0x0909,
950 0x3086, 0x304D, 0x0020, 0x3053,
951 0x4000, 0x4E00, 0x7777, 0x0020,
952 0x9FA5, 0x4E00, 0xAC00, 0xBCDE,
953 0x0020, 0xD7A3, 0xDC00, 0xD800,
954 0xD800, 0xDC00, 0xD845, 0xDDDD,
955 0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
956 0xDFFF, 0x0001, 0x0E40, 0x0020,
957 0x0009
958 };
959 int32_t offsets[]={
960 0, 3, 6, 7, /* skip 8, */
961 10, 11, /* skip 12, */
962 14, 15, 16, 17, 18,
963 20, 21, 23, 24, 25, 26, 28, 29,
964 30, 31, 33, 35, 37, 38,
965 40, 42, 45, 47, 48,
966 50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59,
967 63, 64, /* trail */ 64, /* reset only 67, */
968 68, 69,
969 71, 72
970 };
971
972 if(!testConvertToUnicode(sampleText, sizeof(sampleText),
973 expected, ARRAY_LENGTH(expected), "BOCU-1",
974 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
975 ) {
976 log_err("BOCU-1->u with skip did not match.\n");
977 }
978 }
979
980 log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
981 {
982 const uint8_t sampleText[]={
983 0x61, /* 0 'a' */
984 0xc0, 0x80, /* 1 non-shortest form */
985 0xc4, 0xb5, /* 3 U+0135 */
986 0xed, 0x80, 0xa0, /* 5 Hangul U+d020 */
987 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8 surrogate pair for U+10401 */
988 0xee, 0x80, 0x80, /* 14 PUA U+e000 */
989 0xed, 0xb0, 0x81, /* 17 unpaired trail surrogate U+dc01 */
990 0xf0, 0x90, 0x80, 0x80, /* 20 illegal 4-byte form for U+10000 */
991 0x62, /* 24 'b' */
992 0xed, 0xa0, 0x81, /* 25 unpaired lead surrogate U+d801 */
993 0xed, 0xa0, /* 28 incomplete sequence */
994 0xd0, 0x80 /* 30 U+0400 */
995 };
996 UChar expected[]={
997 0x0061,
998 /* skip */
999 0x0135,
1000 0xd020,
1001 0xd801, 0xdc01,
1002 0xe000,
1003 0xdc01,
1004 /* skip */
1005 0x0062,
1006 0xd801,
1007 0x0400
1008 };
1009 int32_t offsets[]={
1010 0,
1011 /* skip 1, */
1012 3,
1013 5,
1014 8, 11,
1015 14,
1016 17,
1017 /* skip 20, 20, */
1018 24,
1019 25,
1020 /* skip 28 */
1021 30
1022 };
1023
1024 /* without offsets */
1025 if(!testConvertToUnicode(sampleText, sizeof(sampleText),
1026 expected, ARRAY_LENGTH(expected), "CESU-8",
1027 UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, 0)
1028 ) {
1029 log_err("CESU-8->u with skip did not match.\n");
1030 }
1031
1032 /* with offsets */
1033 if(!testConvertToUnicode(sampleText, sizeof(sampleText),
1034 expected, ARRAY_LENGTH(expected), "CESU-8",
1035 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1036 ) {
1037 log_err("CESU-8->u with skip did not match.\n");
1038 }
1039 }
1040 }
1041
1042 static void TestStop(int32_t inputsize, int32_t outputsize)
1043 {
1044 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1045 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1046
1047 static const uint8_t expstopIBM_949[]= {
1048 0x00, 0xb0, 0xa1, 0xb0, 0xa2};
1049
1050 static const uint8_t expstopIBM_943[] = {
1051 0x9f, 0xaf, 0x9f, 0xb1};
1052
1053 static const uint8_t expstopIBM_930[] = {
1054 0x0e, 0x5d, 0x5f, 0x5d, 0x63};
1055
1056 static const UChar IBM_949stoptoUnicode[]= {0x0000, 0xAC00, 0xAC01};
1057 static const UChar IBM_943stoptoUnicode[]= { 0x6D63, 0x6D64};
1058 static const UChar IBM_930stoptoUnicode[]= { 0x6D63, 0x6D64};
1059
1060
1061 static const int32_t toIBM949Offsstop [] = { 0, 1, 1, 2, 2};
1062 static const int32_t toIBM943Offsstop [] = { 0, 0, 1, 1};
1063 static const int32_t toIBM930Offsstop [] = { 0, 0, 0, 1, 1};
1064
1065 static const int32_t fromIBM949Offs [] = { 0, 1, 3};
1066 static const int32_t fromIBM943Offs [] = { 0, 2};
1067 static const int32_t fromIBM930Offs [] = { 1, 3};
1068
1069 gInBufferSize = inputsize;
1070 gOutBufferSize = outputsize;
1071 /*From Unicode*/
1072 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1073 expstopIBM_949, sizeof(expstopIBM_949), "ibm-949",
1074 UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 ))
1075 log_err("u-> ibm-949 with stop did not match.\n");
1076 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1077 expstopIBM_943, sizeof(expstopIBM_943), "ibm-943",
1078 UCNV_FROM_U_CALLBACK_STOP, toIBM943Offsstop , NULL, 0))
1079 log_err("u-> ibm-943 with stop did not match.\n");
1080 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1081 expstopIBM_930, sizeof(expstopIBM_930), "ibm-930",
1082 UCNV_FROM_U_CALLBACK_STOP, toIBM930Offsstop, NULL, 0 ))
1083 log_err("u-> ibm-930 with stop did not match.\n");
1084
1085 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP \n");
1086 {
1087 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1088 static const uint8_t toIBM943[]= { 0x61,};
1089 static const int32_t offset[]= {0,} ;
1090
1091 /*EUC_JP*/
1092 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1093 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,};
1094 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,};
1095
1096 /*EUC_TW*/
1097 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1098 static const uint8_t to_euc_tw[]={
1099 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,};
1100 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,};
1101
1102 /*ISO-2022-JP*/
1103 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9, 0x0042, };
1104 static const uint8_t to_iso_2022_jp[]={
1105 0x41,
1106
1107 };
1108 static const int32_t from_iso_2022_jpOffs [] ={0,};
1109
1110 /*ISO-2022-cn*/
1111 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1112 static const uint8_t to_iso_2022_cn[]={
1113 0x41,
1114
1115 };
1116 static const int32_t from_iso_2022_cnOffs [] ={
1117 0,0,
1118 2,2,
1119 };
1120
1121 /*ISO-2022-kr*/
1122 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
1123 static const uint8_t to_iso_2022_kr[]={
1124 0x1b, 0x24, 0x29, 0x43,
1125 0x41,
1126 0x0e, 0x25, 0x50,
1127 };
1128 static const int32_t from_iso_2022_krOffs [] ={
1129 -1,-1,-1,-1,
1130 0,
1131 1,1,1,
1132 };
1133
1134 /* HZ encoding */
1135 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1136
1137 static const uint8_t to_hz[]={
1138 0x7e, 0x7d, 0x41,
1139 0x7e, 0x7b, 0x26, 0x30,
1140
1141 };
1142 static const int32_t from_hzOffs [] ={
1143 0, 0,0,
1144 1,1,1,1,
1145 };
1146
1147 /*ISCII*/
1148 static const UChar iscii_inputText[]={ 0x0041, 0x3712, 0x0042, };
1149 static const uint8_t to_iscii[]={
1150 0x41,
1151 };
1152 static const int32_t from_isciiOffs [] ={
1153 0,
1154 };
1155
1156 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
1157 toIBM943, sizeof(toIBM943), "ibm-943",
1158 UCNV_FROM_U_CALLBACK_STOP, offset, NULL, 0 ))
1159 log_err("u-> ibm-943 with stop did not match.\n");
1160
1161 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
1162 to_euc_jp, sizeof(to_euc_jp), "euc-jp",
1163 UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 ))
1164 log_err("u-> euc-jp with stop did not match.\n");
1165
1166 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
1167 to_euc_tw, sizeof(to_euc_tw), "euc-tw",
1168 UCNV_FROM_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1169 log_err("u-> euc-tw with stop did not match.\n");
1170
1171 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]),
1172 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp",
1173 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1174 log_err("u-> iso-2022-jp with stop did not match.\n");
1175
1176 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]),
1177 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp",
1178 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1179 log_err("u-> iso-2022-jp with stop did not match.\n");
1180
1181 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]),
1182 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn",
1183 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_cnOffs, NULL, 0 ))
1184 log_err("u-> iso-2022-cn with stop did not match.\n");
1185
1186 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]),
1187 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr",
1188 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_krOffs, NULL, 0 ))
1189 log_err("u-> iso-2022-kr with stop did not match.\n");
1190
1191 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]),
1192 to_hz, sizeof(to_hz), "HZ",
1193 UCNV_FROM_U_CALLBACK_STOP, from_hzOffs, NULL, 0 ))
1194 log_err("u-> HZ with stop did not match.\n");\
1195
1196 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]),
1197 to_iscii, sizeof(to_iscii), "ISCII,version=0",
1198 UCNV_FROM_U_CALLBACK_STOP, from_isciiOffs, NULL, 0 ))
1199 log_err("u-> iscii with stop did not match.\n");
1200
1201
1202 }
1203 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n");
1204 {
1205 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1206
1207 static const uint8_t to_SCSU[]={
1208 0x41,
1209
1210 };
1211 int32_t from_SCSUOffs [] ={
1212 0,
1213
1214 };
1215 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
1216 to_SCSU, sizeof(to_SCSU), "SCSU",
1217 UCNV_FROM_U_CALLBACK_STOP, from_SCSUOffs, NULL, 0 ))
1218 log_err("u-> SCSU with skip did not match.\n");
1219
1220 }
1221 /*to Unicode*/
1222 if(!testConvertToUnicode(expstopIBM_949, sizeof(expstopIBM_949),
1223 IBM_949stoptoUnicode, sizeof(IBM_949stoptoUnicode)/sizeof(IBM_949stoptoUnicode[0]),"ibm-949",
1224 UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 ))
1225 log_err("ibm-949->u with stop did not match.\n");
1226 if(!testConvertToUnicode(expstopIBM_943, sizeof(expstopIBM_943),
1227 IBM_943stoptoUnicode, sizeof(IBM_943stoptoUnicode)/sizeof(IBM_943stoptoUnicode[0]),"ibm-943",
1228 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offs, NULL, 0 ))
1229 log_err("ibm-943->u with stop did not match.\n");
1230 if(!testConvertToUnicode(expstopIBM_930, sizeof(expstopIBM_930),
1231 IBM_930stoptoUnicode, sizeof(IBM_930stoptoUnicode)/sizeof(IBM_930stoptoUnicode[0]),"ibm-930",
1232 UCNV_TO_U_CALLBACK_STOP, fromIBM930Offs, NULL, 0 ))
1233 log_err("ibm-930->u with stop did not match.\n");
1234
1235 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n");
1236 {
1237
1238 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1239 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1240 };
1241 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63 };
1242 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1};
1243
1244
1245 /*EUC-JP*/
1246 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1247 0x8f, 0xda, 0xa1, /*unassigned*/
1248 0x8e, 0xe0,
1249 };
1250 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec};
1251 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3};
1252
1253 /*EUC_TW*/
1254 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1255 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1256 0xe6, 0xca, 0x8a,
1257 };
1258 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2};
1259 int32_t from_euc_twOffs [] ={ 0, 1, 3};
1260
1261
1262
1263 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
1264 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
1265 UCNV_TO_U_CALLBACK_STOP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1266 log_err("EBCIDIC_STATEFUL->u with stop did not match.\n");
1267
1268 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
1269 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp",
1270 UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0))
1271 log_err("euc-jp->u with stop did not match.\n");
1272
1273 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
1274 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
1275 UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1276 log_err("euc-tw->u with stop did not match.\n");
1277 }
1278 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n");
1279 {
1280 static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1281 0xe0, 0x80, 0x61,};
1282 static const UChar expected1[] = { 0x0031, 0x4e8c,};
1283 static const int32_t offsets1[] = { 0x0000, 0x0001};
1284
1285 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1286 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8",
1287 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1288 log_err("utf8->u with stop did not match.\n");;
1289 }
1290 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n");
1291 {
1292 static const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04};
1293 static const UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061};
1294 static const int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003};
1295
1296 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1297 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU",
1298 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1299 log_err("scsu->u with stop did not match.\n");;
1300 }
1301
1302 }
1303
1304 static void TestSub(int32_t inputsize, int32_t outputsize)
1305 {
1306 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1307 static const UChar sampleText2[]= { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1308
1309 static const uint8_t expsubIBM_949[] =
1310 { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 };
1311
1312 static const uint8_t expsubIBM_943[] = {
1313 0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 };
1314
1315 static const uint8_t expsubIBM_930[] = {
1316 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f };
1317
1318 static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 };
1319 static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1320 static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1321
1322 static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1323 static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 };
1324 static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 };
1325
1326 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5, 7 };
1327 static const int32_t fromIBM943Offs [] = { 0, 2, 4, 6 };
1328 static const int32_t fromIBM930Offs [] = { 1, 3, 5, 7 };
1329
1330 gInBufferSize = inputsize;
1331 gOutBufferSize = outputsize;
1332
1333 /*from unicode*/
1334 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1335 expsubIBM_949, sizeof(expsubIBM_949), "ibm-949",
1336 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 ))
1337 log_err("u-> ibm-949 with subst did not match.\n");
1338 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1339 expsubIBM_943, sizeof(expsubIBM_943), "ibm-943",
1340 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM943Offssub , NULL, 0))
1341 log_err("u-> ibm-943 with subst did not match.\n");
1342 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1343 expsubIBM_930, sizeof(expsubIBM_930), "ibm-930",
1344 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM930Offssub, NULL, 0 ))
1345 log_err("u-> ibm-930 with subst did not match.\n");
1346
1347 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1348 {
1349 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1350 static const uint8_t toIBM943[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 };
1351 static const int32_t offset[]= {0, 1, 1, 3, 3, 4};
1352
1353
1354 /* EUC_JP*/
1355 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1356 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1357 0xf4, 0xfe, 0xf4, 0xfe,
1358 0x61, 0x8e, 0xe0,
1359 };
1360 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7};
1361
1362 /*EUC_TW*/
1363 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1364 static const uint8_t to_euc_tw[]={
1365 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1366 0xfd, 0xfe, 0xfd, 0xfe,
1367 0x61, 0xe6, 0xca, 0x8a,
1368 };
1369
1370 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,};
1371
1372 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
1373 toIBM943, sizeof(toIBM943), "ibm-943",
1374 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset, NULL, 0 ))
1375 log_err("u-> ibm-943 with substitute did not match.\n");
1376
1377 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
1378 to_euc_jp, sizeof(to_euc_jp), "euc-jp",
1379 UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 ))
1380 log_err("u-> euc-jp with substitute did not match.\n");
1381
1382 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
1383 to_euc_tw, sizeof(to_euc_tw), "euc-tw",
1384 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1385 log_err("u-> euc-tw with substitute did not match.\n");
1386 }
1387
1388 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1389 {
1390 UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1391
1392 const uint8_t to_SCSU[]={
1393 0x41,
1394 0x0e, 0xff,0xfd,
1395 0x42
1396
1397
1398 };
1399 int32_t from_SCSUOffs [] ={
1400 0,
1401 1,1,1,
1402 2,
1403
1404 };
1405 const uint8_t to_SCSU_1[]={
1406 0x41,
1407
1408 };
1409 int32_t from_SCSUOffs_1 [] ={
1410 0,
1411
1412 };
1413 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
1414 to_SCSU, sizeof(to_SCSU), "SCSU",
1415 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs, NULL, 0 ))
1416 log_err("u-> SCSU with substitute did not match.\n");
1417
1418 if(!testConvertFromUnicodeWithContext(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
1419 to_SCSU_1, sizeof(to_SCSU_1), "SCSU",
1420 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs_1, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
1421 log_err("u-> SCSU with substitute did not match.\n");
1422 }
1423
1424 log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1425 {
1426 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,};
1427 static const uint8_t expectedUTF8[]= { 0xe2, 0x82, 0xac,
1428 0xf0, 0x90, 0x90, 0x81,
1429 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd,
1430 0xef, 0xbf, 0xbf, 0x61,
1431
1432 };
1433 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 };
1434 if(!testConvertFromUnicode(testinput, sizeof(testinput)/sizeof(testinput[0]),
1435 expectedUTF8, sizeof(expectedUTF8), "utf8",
1436 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0 )) {
1437 log_err("u-> utf8 with stop did not match.\n");
1438 }
1439 }
1440
1441 log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1442 {
1443 static const UChar in[]={ 0x0041, 0xfeff };
1444
1445 static const uint8_t out[]={
1446 #if U_IS_BIG_ENDIAN
1447 0xfe, 0xff,
1448 0x00, 0x41,
1449 0xfe, 0xff
1450 #else
1451 0xff, 0xfe,
1452 0x41, 0x00,
1453 0xff, 0xfe
1454 #endif
1455 };
1456 static const int32_t offsets[]={
1457 -1, -1, 0, 0, 1, 1
1458 };
1459
1460 if(!testConvertFromUnicode(in, ARRAY_LENGTH(in),
1461 out, sizeof(out), "UTF-16",
1462 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1463 ) {
1464 log_err("u->UTF-16 with substitute did not match.\n");
1465 }
1466 }
1467
1468 log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1469 {
1470 static const UChar in[]={ 0x0041, 0xfeff };
1471
1472 static const uint8_t out[]={
1473 #if U_IS_BIG_ENDIAN
1474 0x00, 0x00, 0xfe, 0xff,
1475 0x00, 0x00, 0x00, 0x41,
1476 0x00, 0x00, 0xfe, 0xff
1477 #else
1478 0xff, 0xfe, 0x00, 0x00,
1479 0x41, 0x00, 0x00, 0x00,
1480 0xff, 0xfe, 0x00, 0x00
1481 #endif
1482 };
1483 static const int32_t offsets[]={
1484 -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1
1485 };
1486
1487 if(!testConvertFromUnicode(in, ARRAY_LENGTH(in),
1488 out, sizeof(out), "UTF-32",
1489 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1490 ) {
1491 log_err("u->UTF-32 with substitute did not match.\n");
1492 }
1493 }
1494
1495 /*to unicode*/
1496 if(!testConvertToUnicode(expsubIBM_949, sizeof(expsubIBM_949),
1497 IBM_949subtoUnicode, sizeof(IBM_949subtoUnicode)/sizeof(IBM_949subtoUnicode[0]),"ibm-949",
1498 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 ))
1499 log_err("ibm-949->u with substitute did not match.\n");
1500 if(!testConvertToUnicode(expsubIBM_943, sizeof(expsubIBM_943),
1501 IBM_943subtoUnicode, sizeof(IBM_943subtoUnicode)/sizeof(IBM_943subtoUnicode[0]),"ibm-943",
1502 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offs, NULL, 0 ))
1503 log_err("ibm-943->u with substitute did not match.\n");
1504 if(!testConvertToUnicode(expsubIBM_930, sizeof(expsubIBM_930),
1505 IBM_930subtoUnicode, sizeof(IBM_930subtoUnicode)/sizeof(IBM_930subtoUnicode[0]),"ibm-930",
1506 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM930Offs, NULL, 0 ))
1507 log_err("ibm-930->u with substitute did not match.\n");
1508
1509 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1510 {
1511
1512 const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1513 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1514 };
1515 UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0xfffd, 0x03b4
1516 };
1517 int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 3, 5};
1518
1519
1520 /* EUC_JP*/
1521 const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1522 0x8f, 0xda, 0xa1, /*unassigned*/
1523 0x8e, 0xe0, 0x8a
1524 };
1525 UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a };
1526 int32_t from_euc_jpOffs [] ={ 0, 1, 3, 6, 9, 11 };
1527
1528 /*EUC_TW*/
1529 const uint8_t sampleTxt_euc_tw[]={
1530 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1531 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1532 0xe6, 0xca, 0x8a,
1533 };
1534 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, };
1535 int32_t from_euc_twOffs [] ={ 0, 1, 3, 7, 11, 13};
1536
1537
1538 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
1539 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
1540 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1541 log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n");
1542
1543
1544 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
1545 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp",
1546 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ))
1547 log_err("euc-jp->u with substitute did not match.\n");
1548
1549
1550 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
1551 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
1552 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1553 log_err("euc-tw->u with substitute did not match.\n");
1554
1555
1556 if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
1557 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp",
1558 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGAL_CHAR_FOUND))
1559 log_err("euc-jp->u with substitute did not match.\n");
1560
1561
1562
1563 }
1564 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1565 {
1566 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1567 0xe0, 0x80, 0x61,};
1568 UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0x0061};
1569 int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0006};
1570
1571 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1572 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8",
1573 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1574 log_err("utf8->u with substitute did not match.\n");;
1575 }
1576 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1577 {
1578 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
1579 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffd,0xfffd};
1580 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5};
1581
1582 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1583 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU",
1584 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1585 log_err("scsu->u with stop did not match.\n");;
1586 }
1587
1588 log_verbose("Testing ibm-930 subchar/subchar1\n");
1589 {
1590 static const UChar u1[]={ 0x6d63, 0x6d64, 0x6d65, 0x6d66, 0xdf };
1591 static const uint8_t s1[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f };
1592 static const int32_t offsets1[]={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1593
1594 static const UChar u2[]={ 0x6d63, 0x6d64, 0xfffd, 0x6d66, 0x1a };
1595 static const uint8_t s2[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 };
1596 static const int32_t offsets2[]={ 1, 3, 5, 7, 10 };
1597
1598 if(!testConvertFromUnicode(u1, ARRAY_LENGTH(u1), s1, ARRAY_LENGTH(s1), "ibm-930",
1599 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1600 ) {
1601 log_err("u->ibm-930 subchar/subchar1 did not match.\n");
1602 }
1603
1604 if(!testConvertToUnicode(s2, ARRAY_LENGTH(s2), u2, ARRAY_LENGTH(u2), "ibm-930",
1605 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1606 ) {
1607 log_err("ibm-930->u subchar/subchar1 did not match.\n");
1608 }
1609 }
1610
1611 log_verbose("Testing GB 18030 with substitute callbacks\n");
1612 {
1613 static const UChar u2[]={
1614 0x24, 0x7f, 0x80, 0x1f9, 0x20ac, 0x4e00, 0x9fa6, 0xffff, 0xd800, 0xdc00, 0xfffd, 0xdbff, 0xdfff };
1615 static const uint8_t gb2[]={
1616 0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 };
1617 static const int32_t offsets2[]={
1618 0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 };
1619
1620 if(!testConvertToUnicode(gb2, ARRAY_LENGTH(gb2), u2, ARRAY_LENGTH(u2), "gb18030",
1621 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1622 ) {
1623 log_err("gb18030->u with substitute did not match.\n");
1624 }
1625 }
1626
1627 log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n");
1628 {
1629 static const uint8_t utf7[]={
1630 /* a~ a+AB~ a+AB\x0c a+AB- a+AB. a+. */
1631 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x2e
1632 };
1633 static const UChar unicode[]={
1634 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd
1635 };
1636 static const int32_t offsets[]={
1637 0, 1, 2, 4, 7, 9, 12, 14, 17, 19, 22, 23
1638 };
1639
1640 if(!testConvertToUnicode(utf7, ARRAY_LENGTH(utf7), unicode, ARRAY_LENGTH(unicode), "UTF-7",
1641 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1642 ) {
1643 log_err("UTF-7->u with substitute did not match.\n");
1644 }
1645 }
1646
1647 log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n");
1648 {
1649 static const uint8_t
1650 in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff },
1651 in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff },
1652 in3[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff };
1653
1654 static const UChar
1655 out1[]={ 0x4e00, 0xfeff },
1656 out2[]={ 0x004e, 0xfffe },
1657 out3[]={ 0xfefd, 0x4e00, 0xfeff };
1658
1659 static const int32_t
1660 offsets1[]={ 2, 4 },
1661 offsets2[]={ 2, 4 },
1662 offsets3[]={ 0, 2, 4 };
1663
1664 if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-16",
1665 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1666 ) {
1667 log_err("UTF-16 (BE BOM)->u with substitute did not match.\n");
1668 }
1669
1670 if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-16",
1671 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1672 ) {
1673 log_err("UTF-16 (LE BOM)->u with substitute did not match.\n");
1674 }
1675
1676 if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-16",
1677 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1678 ) {
1679 log_err("UTF-16 (no BOM)->u with substitute did not match.\n");
1680 }
1681 }
1682
1683 log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n");
1684 {
1685 static const uint8_t
1686 in1[]={ 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff },
1687 in2[]={ 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 },
1688 in3[]={ 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 },
1689 in4[]={ 0x00, 0x01, 0x02, 0x03, 0x00, 0x11, 0x12, 0x00, 0x00, 0x00, 0x4e, 0x00 };
1690
1691 static const UChar
1692 out1[]={ UTF16_LEAD(0x100f00), UTF16_TRAIL(0x100f00), 0xfeff },
1693 out2[]={ UTF16_LEAD(0x0f1000), UTF16_TRAIL(0x0f1000), 0xfffe },
1694 out3[]={ 0xfefe, UTF16_LEAD(0x100f00), UTF16_TRAIL(0x100f00), 0xfffd, 0xfffd },
1695 out4[]={ UTF16_LEAD(0x10203), UTF16_TRAIL(0x10203), 0xfffd, 0x4e00 };
1696
1697 static const int32_t
1698 offsets1[]={ 4, 4, 8 },
1699 offsets2[]={ 4, 4, 8 },
1700 offsets3[]={ 0, 4, 4, 8, 12 },
1701 offsets4[]={ 0, 0, 4, 8 };
1702
1703 if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-32",
1704 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1705 ) {
1706 log_err("UTF-32 (BE BOM)->u with substitute did not match.\n");
1707 }
1708
1709 if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-32",
1710 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1711 ) {
1712 log_err("UTF-32 (LE BOM)->u with substitute did not match.\n");
1713 }
1714
1715 if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-32",
1716 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1717 ) {
1718 log_err("UTF-32 (no BOM)->u with substitute did not match.\n");
1719 }
1720
1721 if(!testConvertToUnicode(in4, ARRAY_LENGTH(in4), out4, ARRAY_LENGTH(out4), "UTF-32",
1722 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets4, NULL, 0)
1723 ) {
1724 log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n");
1725 }
1726 }
1727 }
1728
1729 static void TestSubWithValue(int32_t inputsize, int32_t outputsize)
1730 {
1731 UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1732 UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1733
1734 const uint8_t expsubwvalIBM_949[]= {
1735 0x00, 0xb0, 0xa1, 0xb0, 0xa2,
1736 0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 };
1737
1738 const uint8_t expsubwvalIBM_943[]= {
1739 0x9f, 0xaf, 0x9f, 0xb1,
1740 0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 };
1741
1742 const uint8_t expsubwvalIBM_930[] = {
1743 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f };
1744
1745 int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 };
1746 int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 };
1747 int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */
1748
1749 gInBufferSize = inputsize;
1750 gOutBufferSize = outputsize;
1751
1752 /*from Unicode*/
1753 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1754 expsubwvalIBM_949, sizeof(expsubwvalIBM_949), "ibm-949",
1755 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 ))
1756 log_err("u-> ibm-949 with subst with value did not match.\n");
1757
1758 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1759 expsubwvalIBM_943, sizeof(expsubwvalIBM_943), "ibm-943",
1760 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM943Offs, NULL, 0 ))
1761 log_err("u-> ibm-943 with sub with value did not match.\n");
1762
1763 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1764 expsubwvalIBM_930, sizeof(expsubwvalIBM_930), "ibm-930",
1765 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM930Offs, NULL, 0 ))
1766 log_err("u-> ibm-930 with subst with value did not match.\n");
1767
1768
1769 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n");
1770 {
1771 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1772 static const uint8_t toIBM943[]= { 0x61,
1773 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1774 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1775 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1776 0x61 };
1777 static const int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
1778
1779
1780 /* EUC_JP*/
1781 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, };
1782 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1783 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1784 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1785 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1786 0x61, 0x8e, 0xe0,
1787 };
1788 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,
1789 3, 3, 3, 3, 3, 3,
1790 3, 3, 3, 3, 3, 3,
1791 5, 5, 5, 5, 5, 5,
1792 6, 7, 7,
1793 };
1794
1795 /*EUC_TW*/
1796 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1797 static const uint8_t to_euc_tw[]={
1798 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1799 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1800 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1801 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1802 0x61, 0xe6, 0xca, 0x8a,
1803 };
1804 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,
1805 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5,
1806 6, 7, 7, 8,
1807 };
1808 /*ISO-2022-JP*/
1809 static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ;
1810 static const uint8_t to_iso_2022_jp1[]={
1811 0x1b, 0x24, 0x42, 0x21, 0x21,
1812 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
1813 0x1b, 0x24, 0x42, 0x21, 0x22,
1814 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
1815 0x42,
1816 };
1817
1818 static const int32_t from_iso_2022_jpOffs1 [] ={
1819 0,0,0,0,0,
1820 1,1,1,1,1,1,1,1,1,
1821 2,2,2,2,2,
1822 3,3,3,3,3,3,3,3,3,
1823 4,
1824 };
1825 /* surrogate pair*/
1826 static const UChar iso_2022_jp_inputText2[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ;
1827 static const uint8_t to_iso_2022_jp2[]={
1828 0x1b, 0x24, 0x42, 0x21, 0x21,
1829 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1830 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1831 0x1b, 0x24, 0x42, 0x21, 0x22,
1832 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1833 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1834 0x42,
1835 };
1836 static const int32_t from_iso_2022_jpOffs2 [] ={
1837 0,0,0,0,0,
1838 1,1,1,1,1,1,1,1,1,
1839 1,1,1,1,1,1,
1840 3,3,3,3,3,
1841 4,4,4,4,4,4,4,4,4,
1842 4,4,4,4,4,4,
1843 6,
1844 };
1845
1846 /*ISO-2022-cn*/
1847 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1848 static const uint8_t to_iso_2022_cn[]={
1849 0x41,
1850 0x25, 0x55, 0x33, 0x37, 0x31, 0x32,
1851 0x42,
1852 };
1853 static const int32_t from_iso_2022_cnOffs [] ={
1854 0,
1855 1,1,1,1,1,1,
1856 2,
1857 };
1858
1859 static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042};
1860
1861 static const uint8_t to_iso_2022_cn4[]={
1862 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
1863 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1864 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1865 0x0e, 0x21, 0x22,
1866 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1867 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1868 0x42,
1869 };
1870 static const int32_t from_iso_2022_cnOffs4 [] ={
1871 0,0,0,0,0,0,0,
1872 1,1,1,1,1,1,1,
1873 1,1,1,1,1,1,
1874 3,3,3,
1875 4,4,4,4,4,4,4,
1876 4,4,4,4,4,4,
1877 6
1878
1879 };
1880
1881 /*ISO-2022-kr*/
1882 static const UChar iso_2022_kr_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
1883 static const uint8_t to_iso_2022_kr2[]={
1884 0x1b, 0x24, 0x29, 0x43,
1885 0x41,
1886 0x0e, 0x25, 0x50,
1887 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1888 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1889 0x0e, 0x25, 0x50,
1890 0x0f, 0x42,
1891 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1892 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1893 0x43
1894 };
1895 static const int32_t from_iso_2022_krOffs2 [] ={
1896 -1,-1,-1,-1,
1897 0,
1898 1,1,1,
1899 2,2,2,2,2,2,2,
1900 2,2,2,2,2,2,
1901 4,4,4,
1902 5,5,
1903 6,6,6,6,6,6,
1904 6,6,6,6,6,6,
1905 8,
1906 };
1907
1908 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 };
1909 static const uint8_t to_iso_2022_kr[]={
1910 0x1b, 0x24, 0x29, 0x43,
1911 0x41,
1912 0x0e, 0x25, 0x50,
1913 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1914 0x0e, 0x25, 0x50,
1915 0x0f, 0x42,
1916 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1917 0x43
1918 };
1919
1920
1921 static const int32_t from_iso_2022_krOffs [] ={
1922 -1,-1,-1,-1,
1923 0,
1924 1,1,1,
1925 2,2,2,2,2,2,2,
1926 3,3,3,
1927 4,4,
1928 5,5,5,5,5,5,
1929 6,
1930 };
1931 /* HZ encoding */
1932 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1933
1934 static const uint8_t to_hz[]={
1935 0x7e, 0x7d, 0x41,
1936 0x7e, 0x7b, 0x26, 0x30,
1937 0x7e, 0x7d, 0x25, 0x55, 0x30, 0x36, 0x36, 0x32, /*unassigned*/
1938 0x7e, 0x7b, 0x26, 0x30,
1939 0x7e, 0x7d, 0x42,
1940
1941 };
1942 static const int32_t from_hzOffs [] ={
1943 0,0,0,
1944 1,1,1,1,
1945 2,2,2,2,2,2,2,2,
1946 3,3,3,3,
1947 4,4,4
1948 };
1949
1950 static const UChar hz_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
1951 static const uint8_t to_hz2[]={
1952 0x7e, 0x7d, 0x41,
1953 0x7e, 0x7b, 0x26, 0x30,
1954 0x7e, 0x7d, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1955 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1956 0x7e, 0x7b, 0x26, 0x30,
1957 0x7e, 0x7d, 0x42,
1958 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1959 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1960 0x43
1961 };
1962 static const int32_t from_hzOffs2 [] ={
1963 0,0,0,
1964 1,1,1,1,
1965 2,2,2,2,2,2,2,2,
1966 2,2,2,2,2,2,
1967 4,4,4,4,
1968 5,5,5,
1969 6,6,6,6,6,6,
1970 6,6,6,6,6,6,
1971 8,
1972 };
1973
1974 /*ISCII*/
1975 static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 };
1976 static const uint8_t to_iscii[]={
1977 0x41,
1978 0xef, 0x42, 0xa1,
1979 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1980 0xa2,
1981 0x42,
1982 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1983 0x43
1984 };
1985
1986
1987 static const int32_t from_isciiOffs [] ={
1988 0,
1989 1,1,1,
1990 2,2,2,2,2,2,
1991 3,
1992 4,
1993 5,5,5,5,5,5,
1994 6,
1995 };
1996
1997 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
1998 toIBM943, sizeof(toIBM943), "ibm-943",
1999 UCNV_FROM_U_CALLBACK_ESCAPE, offset, NULL, 0 ))
2000 log_err("u-> ibm-943 with subst with value did not match.\n");
2001
2002 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
2003 to_euc_jp, sizeof(to_euc_jp), "euc-jp",
2004 UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 ))
2005 log_err("u-> euc-jp with subst with value did not match.\n");
2006
2007 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
2008 to_euc_tw, sizeof(to_euc_tw), "euc-tw",
2009 UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 ))
2010 log_err("u-> euc-tw with subst with value did not match.\n");
2011
2012 if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]),
2013 to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp",
2014 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2015 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2016
2017 if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]),
2018 to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp",
2019 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2020 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2021
2022 if(!testConvertFromUnicode(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]),
2023 to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp",
2024 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs2, NULL, 0 ))
2025 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2026 /*ESCAPE OPTIONS*/
2027 {
2028 /* surrogate pair*/
2029 static const UChar iso_2022_jp_inputText3[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ;
2030 static const uint8_t to_iso_2022_jp3_v2[]={
2031 0x1b, 0x24, 0x42, 0x21, 0x21,
2032 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b,
2033
2034 0x1b, 0x24, 0x42, 0x21, 0x22,
2035 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b,
2036
2037 0x42,
2038 0x26, 0x23, 0x33, 0x36, 0x38, 0x39, 0x32, 0x3b,
2039 };
2040
2041 static const int32_t from_iso_2022_jpOffs3_v2 [] ={
2042 0,0,0,0,0,
2043 1,1,1,1,1,1,1,1,1,1,1,1,
2044
2045 3,3,3,3,3,
2046 4,4,4,4,4,4,4,4,4,4,4,4,
2047
2048 6,
2049 7,7,7,7,7,7,7,7,7
2050 };
2051
2052 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3, sizeof(iso_2022_jp_inputText3)/sizeof(iso_2022_jp_inputText3[0]),
2053 to_iso_2022_jp3_v2, sizeof(to_iso_2022_jp3_v2), "iso-2022-jp",
2054 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2055 log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n");
2056 }
2057 {
2058 static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2059 static const uint8_t to_iso_2022_cn5_v2[]={
2060 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2061 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44,
2062 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36,
2063 0x0e, 0x21, 0x22,
2064 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44,
2065 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36,
2066 0x42,
2067 0x5c, 0x75, 0x30, 0x39, 0x30, 0x32,
2068 };
2069 static const int32_t from_iso_2022_cnOffs5_v2 [] ={
2070 0,0,0,0,0,0,0,
2071 1,1,1,1,1,1,1,
2072 1,1,1,1,1,1,
2073 3,3,3,
2074 4,4,4,4,4,4,4,
2075 4,4,4,4,4,4,
2076 6,
2077 7,7,7,7,7,7
2078 };
2079 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, sizeof(iso_2022_cn_inputText5)/sizeof(iso_2022_cn_inputText5[0]),
2080 to_iso_2022_cn5_v2, sizeof(to_iso_2022_cn5_v2), "iso-2022-cn",
2081 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs5_v2, NULL, 0,UCNV_ESCAPE_JAVA,U_ZERO_ERROR ))
2082 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n");
2083
2084 }
2085 {
2086 static const UChar iso_2022_cn_inputText6[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2087 static const uint8_t to_iso_2022_cn6_v2[]={
2088 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2089 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d,
2090 0x0e, 0x21, 0x22,
2091 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d,
2092 0x42,
2093 0x7b, 0x55, 0x2b, 0x30, 0x39, 0x30, 0x32, 0x7d
2094 };
2095 static const int32_t from_iso_2022_cnOffs6_v2 [] ={
2096 0, 0, 0, 0, 0, 0, 0,
2097 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2098 3, 3, 3,
2099 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2100 6,
2101 7, 7, 7, 7, 7, 7, 7, 7,
2102 };
2103 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, sizeof(iso_2022_cn_inputText6)/sizeof(iso_2022_cn_inputText6[0]),
2104 to_iso_2022_cn6_v2, sizeof(to_iso_2022_cn6_v2), "iso-2022-cn",
2105 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs6_v2, NULL, 0,UCNV_ESCAPE_UNICODE,U_ZERO_ERROR ))
2106 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n");
2107
2108 }
2109 {
2110 static const UChar iso_2022_cn_inputText7[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2111 static const uint8_t to_iso_2022_cn7_v2[]={
2112 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2113 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2114 0x0e, 0x21, 0x22,
2115 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2116 0x42, 0x25, 0x55, 0x30, 0x39, 0x30, 0x32,
2117 };
2118 static const int32_t from_iso_2022_cnOffs7_v2 [] ={
2119 0, 0, 0, 0, 0, 0, 0,
2120 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2121 3, 3, 3,
2122 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2123 6,
2124 7, 7, 7, 7, 7, 7,
2125 };
2126 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, sizeof(iso_2022_cn_inputText7)/sizeof(iso_2022_cn_inputText7[0]),
2127 to_iso_2022_cn7_v2, sizeof(to_iso_2022_cn7_v2), "iso-2022-cn",
2128 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs7_v2, NULL, 0,"K" ,U_ZERO_ERROR ))
2129 log_err("u-> iso-2022-cn with sub & K did not match.\n");
2130
2131 }
2132 {
2133 static const uint8_t to_iso_2022_cn4_v3[]={
2134 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2135 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36,
2136 0x0e, 0x21, 0x22,
2137 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36,
2138 0x42
2139 };
2140
2141
2142 static const int32_t from_iso_2022_cnOffs4_v3 [] ={
2143 0,0,0,0,0,0,0,
2144 1,1,1,1,1,1,1,1,1,1,1,
2145
2146 3,3,3,
2147 4,4,4,4,4,4,4,4,4,4,4,
2148
2149 6
2150
2151 };
2152 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]),
2153 to_iso_2022_cn4_v3, sizeof(to_iso_2022_cn4_v3), "iso-2022-cn",
2154 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4_v3, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2155 {
2156 log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n");
2157 }
2158 }
2159 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]),
2160 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn",
2161 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 ))
2162 log_err("u-> iso_2022_cn with subst with value did not match.\n");
2163
2164 if(!testConvertFromUnicode(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]),
2165 to_iso_2022_cn4, sizeof(to_iso_2022_cn4), "iso-2022-cn",
2166 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 ))
2167 log_err("u-> iso_2022_cn with subst with value did not match.\n");
2168 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]),
2169 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr",
2170 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0 ))
2171 log_err("u-> iso_2022_kr with subst with value did not match.\n");
2172 if(!testConvertFromUnicode(iso_2022_kr_inputText2, sizeof(iso_2022_kr_inputText2)/sizeof(iso_2022_kr_inputText2[0]),
2173 to_iso_2022_kr2, sizeof(to_iso_2022_kr2), "iso-2022-kr",
2174 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs2, NULL, 0 ))
2175 log_err("u-> iso_2022_kr2 with subst with value did not match.\n");
2176 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]),
2177 to_hz, sizeof(to_hz), "HZ",
2178 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0 ))
2179 log_err("u-> hz with subst with value did not match.\n");
2180 if(!testConvertFromUnicode(hz_inputText2, sizeof(hz_inputText2)/sizeof(hz_inputText2[0]),
2181 to_hz2, sizeof(to_hz2), "HZ",
2182 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs2, NULL, 0 ))
2183 log_err("u-> hz with subst with value did not match.\n");
2184
2185 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]),
2186 to_iscii, sizeof(to_iscii), "ISCII,version=0",
2187 UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 ))
2188 log_err("u-> iscii with subst with value did not match.\n");
2189 }
2190
2191
2192 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
2193 /*to Unicode*/
2194 {
2195 static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf,
2196 0x81, 0xad, /*unassigned*/
2197 0x89, 0xd3 };
2198 static const UChar IBM_943toUnicode[] = { 0x0000, 0x6D63,
2199 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
2200 0x7B87};
2201 static const int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
2202
2203 /* EUC_JP*/
2204 static const uint8_t sampleTxt_EUC_JP[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
2205 0x8f, 0xda, 0xa1, /*unassigned*/
2206 0x8e, 0xe0,
2207 };
2208 static const UChar EUC_JPtoUnicode[]={ 0x0061, 0x4edd, 0x5bec,
2209 0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31,
2210 0x00a2 };
2211 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 3,
2212 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
2213 9,
2214 };
2215
2216 /*EUC_TW*/
2217 static const uint8_t sampleTxt_euc_tw[]={
2218 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
2219 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
2220 0xe6, 0xca, 0x8a,
2221 };
2222 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2,
2223 0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43,
2224 0x8706, 0x8a, };
2225 static const int32_t from_euc_twOffs [] ={ 0, 1, 3,
2226 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2227 11, 13};
2228
2229 /*iso-2022-jp*/
2230 static const uint8_t sampleTxt_iso_2022_jp[]={
2231 0x1b, 0x28, 0x42, 0x41,
2232 0x1b, 0x24, 0x42, 0x2A, 0x44, /*unassigned*/
2233 0x1b, 0x28, 0x42, 0x42,
2234
2235 };
2236 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x25,0x58,0x32,0x41,0x25,0x58,0x34,0x34, 0x42 };
2237 static const int32_t from_iso_2022_jpOffs [] ={ 3, 7, 7, 7, 7, 7, 7, 7, 7, 12 };
2238
2239 /*iso-2022-cn*/
2240 static const uint8_t sampleTxt_iso_2022_cn[]={
2241 0x0f, 0x41, 0x44,
2242 0x1B, 0x24, 0x29, 0x47,
2243 0x0E, 0x40, 0x6c, /*unassigned*/
2244 0x0f, 0x42,
2245
2246 };
2247 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 };
2248 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 8, 8, 8, 8, 8, 8, 8, 8, 11 };
2249
2250 /*iso-2022-kr*/
2251 static const uint8_t sampleTxt_iso_2022_kr[]={
2252 0x1b, 0x24, 0x29, 0x43,
2253 0x41,
2254 0x0E, 0x7f, 0x1E,
2255 0x0e, 0x25, 0x50,
2256 0x0f, 0x51,
2257 0x42, 0x43,
2258
2259 };
2260 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43};
2261 static const int32_t from_iso_2022_krOffs [] ={ 4, 6, 6, 6, 6, 6, 6, 6, 6, 9, 12, 13 , 14 };
2262
2263 /*hz*/
2264 static const uint8_t sampleTxt_hz[]={
2265 0x41,
2266 0x7e, 0x7b, 0x26, 0x30,
2267 0x7f, 0x1E, /*unassigned*/
2268 0x26, 0x30,
2269 0x7e, 0x7d, 0x42,
2270 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/
2271 0x7e, 0x7d, 0x42,
2272 };
2273 static const UChar hztoUnicode[]={
2274 0x41,
2275 0x03a0,
2276 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2277 0x03A0,
2278 0x42,
2279 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2280 0x42,};
2281
2282 static const int32_t from_hzOffs [] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18, };
2283
2284
2285 /*iscii*/
2286 static const uint8_t sampleTxt_iscii[]={
2287 0x41,
2288 0x30,
2289 0xEB, /*unassigned*/
2290 0xa3,
2291 0x42,
2292 0xEC, /*unassigned*/
2293 0x42,
2294 };
2295 static const UChar isciitoUnicode[]={
2296 0x41,
2297 0x30,
2298 0x25, 0x58, 0x45, 0x42,
2299 0x0903,
2300 0x42,
2301 0x25, 0x58, 0x45, 0x43,
2302 0x42,};
2303
2304 static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6 };
2305
2306
2307 /*UTF8*/
2308 static const uint8_t sampleTxtUTF8[]={
2309 0x20, 0x64, 0x50,
2310 0xC2, 0x7E, /* truncated char */
2311 0x20,
2312 0xE0, 0xB5, 0x7E, /* truncated char */
2313 0x40,
2314 };
2315 static const UChar UTF8ToUnicode[]={
2316 0x0020, 0x0064, 0x0050,
2317 0x0025, 0x0058, 0x0043, 0x0032, 0x007E, /* \xC2~ */
2318 0x0020,
2319 0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E,
2320 0x0040
2321 };
2322 static const int32_t fromUTF8[] = {
2323 0, 1, 2,
2324 3, 3, 3, 3, 4,
2325 5,
2326 6, 6, 6, 6, 6, 6, 6, 6, 8,
2327 9
2328 };
2329 static const UChar UTF8ToUnicodeXML_DEC[]={
2330 0x0020, 0x0064, 0x0050,
2331 0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E, /* &#194;~ */
2332 0x0020,
2333 0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E,
2334 0x0040
2335 };
2336 static const int32_t fromUTF8XML_DEC[] = {
2337 0, 1, 2,
2338 3, 3, 3, 3, 3, 3, 4,
2339 5,
2340 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8,
2341 9
2342 };
2343
2344
2345 if(!testConvertToUnicode(sampleTxtToU, sizeof(sampleTxtToU),
2346 IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnicode[0]),"ibm-943",
2347 UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 ))
2348 log_err("ibm-943->u with substitute with value did not match.\n");
2349
2350 if(!testConvertToUnicode(sampleTxt_EUC_JP, sizeof(sampleTxt_EUC_JP),
2351 EUC_JPtoUnicode, sizeof(EUC_JPtoUnicode)/sizeof(EUC_JPtoUnicode[0]),"euc-jp",
2352 UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0))
2353 log_err("euc-jp->u with substitute with value did not match.\n");
2354
2355 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
2356 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
2357 UCNV_TO_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0))
2358 log_err("euc-tw->u with substitute with value did not match.\n");
2359
2360 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2361 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2362 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0))
2363 log_err("iso-2022-jp->u with substitute with value did not match.\n");
2364
2365 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2366 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2367 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0,"K",U_ZERO_ERROR))
2368 log_err("iso-2022-jp->u with substitute with value did not match.\n");
2369
2370 {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */
2371 {
2372 static const UChar iso_2022_jptoUnicodeDec[]={
2373 0x0041,
2374 0x0026, 0x0023, 0x0034, 0x0032, 0x003b,
2375 0x0026, 0x0023, 0x0036, 0x0038, 0x003b,
2376 0x0042 };
2377 static const int32_t from_iso_2022_jpOffsDec [] ={ 3,7,7,7,7,7,7,7,7,7,7,12, };
2378 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2379 iso_2022_jptoUnicodeDec, sizeof(iso_2022_jptoUnicodeDec)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2380 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsDec, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2381 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n");
2382 }
2383 {
2384 static const UChar iso_2022_jptoUnicodeHex[]={
2385 0x0041,
2386 0x0026, 0x0023, 0x0078, 0x0032, 0x0041, 0x003b,
2387 0x0026, 0x0023, 0x0078, 0x0034, 0x0034, 0x003b,
2388 0x0042 };
2389 static const int32_t from_iso_2022_jpOffsHex [] ={ 3,7,7,7,7,7,7,7,7,7,7,7,7,12 };
2390 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2391 iso_2022_jptoUnicodeHex, sizeof(iso_2022_jptoUnicodeHex)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2392 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsHex, NULL, 0,UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR ))
2393 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n");
2394 }
2395 {
2396 static const UChar iso_2022_jptoUnicodeC[]={
2397 0x0041,
2398 0x005C, 0x0078, 0x0032, 0x0041,
2399 0x005C, 0x0078, 0x0034, 0x0034,
2400 0x0042 };
2401 int32_t from_iso_2022_jpOffsC [] ={ 3,7,7,7,7,7,7,7,7,12 };
2402 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2403 iso_2022_jptoUnicodeC, sizeof(iso_2022_jptoUnicodeC)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2404 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsC, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2405 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n");
2406 }
2407 }
2408 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn),
2409 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn",
2410 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0))
2411 log_err("iso-2022-cn->u with substitute with value did not match.\n");
2412
2413 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr),
2414 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr",
2415 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0))
2416 log_err("iso-2022-kr->u with substitute with value did not match.\n");
2417
2418 if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz),
2419 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ",
2420 UCNV_TO_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0))
2421 log_err("hz->u with substitute with value did not match.\n");
2422
2423 if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii),
2424 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0",
2425 UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0))
2426 log_err("ISCII ->u with substitute with value did not match.\n");
2427 if(!testConvertToUnicode(sampleTxtUTF8, sizeof(sampleTxtUTF8),
2428 UTF8ToUnicode, sizeof(UTF8ToUnicode)/sizeof(UTF8ToUnicode[0]),"UTF-8",
2429 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0))
2430 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2431 if(!testConvertToUnicodeWithContext(sampleTxtUTF8, sizeof(sampleTxtUTF8),
2432 UTF8ToUnicodeXML_DEC, sizeof(UTF8ToUnicodeXML_DEC)/sizeof(UTF8ToUnicodeXML_DEC[0]),"UTF-8",
2433 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8XML_DEC, NULL, 0, UCNV_ESCAPE_XML_DEC, U_ZERO_ERROR))
2434 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2435 }
2436 }
2437
2438 static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize)
2439 {
2440 static const UChar legalText[] = { 0x0000, 0xAC00, 0xAC01, 0xD700 };
2441 static const uint8_t templegal949[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
2442 static const int32_t to949legal[] = {0, 1, 1, 2, 2, 3, 3};
2443
2444
2445 static const uint8_t text943[] = {
2446 0x82, 0xa9, 0x82, 0x20, /*0xc8,*/ 0x61, 0x8a, 0xbf, 0x8e, 0x9a };
2447 static const UChar toUnicode943sub[] = { 0x304b, 0xfffd, /*0xff88,*/ 0x0061, 0x6f22, 0x5b57};
2448 static const UChar toUnicode943skip[]= { 0x304b, /*0xff88,*/ 0x0061, 0x6f22, 0x5b57};
2449 static const UChar toUnicode943stop[]= { 0x304b};
2450
2451 static const int32_t fromIBM943Offssub[] = {0, 2, 4, 5, 7};
2452 static const int32_t fromIBM943Offsskip[] = { 0, 4, 5, 7};
2453 static const int32_t fromIBM943Offsstop[] = { 0};
2454
2455 gInBufferSize = inputsize;
2456 gOutBufferSize = outputsize;
2457 /*checking with a legal value*/
2458 if(!testConvertFromUnicode(legalText, sizeof(legalText)/sizeof(legalText[0]),
2459 templegal949, sizeof(templegal949), "ibm-949",
2460 UCNV_FROM_U_CALLBACK_SKIP, to949legal, NULL, 0 ))
2461 log_err("u-> ibm-949 with skip did not match.\n");
2462
2463 /*checking illegal value for ibm-943 with substitute*/
2464 if(!testConvertToUnicode(text943, sizeof(text943),
2465 toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943",
2466 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2467 log_err("ibm-943->u with subst did not match.\n");
2468 /*checking illegal value for ibm-943 with skip */
2469 if(!testConvertToUnicode(text943, sizeof(text943),
2470 toUnicode943skip, sizeof(toUnicode943skip)/sizeof(toUnicode943skip[0]),"ibm-943",
2471 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offsskip, NULL, 0 ))
2472 log_err("ibm-943->u with skip did not match.\n");
2473
2474 /*checking illegal value for ibm-943 with stop */
2475 if(!testConvertToUnicode(text943, sizeof(text943),
2476 toUnicode943stop, sizeof(toUnicode943stop)/sizeof(toUnicode943stop[0]),"ibm-943",
2477 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offsstop, NULL, 0 ))
2478 log_err("ibm-943->u with stop did not match.\n");
2479
2480 }
2481
2482 static void TestSingleByte(int32_t inputsize, int32_t outputsize)
2483 {
2484 static const uint8_t sampleText[] = {
2485 0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82,
2486 0xff, /*0x82, 0xa9,*/ 0x32, 0x33};
2487 static const UChar toUnicode943sub[] = {0x304b, 0x0061, 0x0062, 0x0063, 0xfffd,/*0x304b,*/ 0x0032, 0x0033};
2488 static const int32_t fromIBM943Offssub[] = {0, 2, 3, 4, 5, 7, 8};
2489 /*checking illegal value for ibm-943 with substitute*/
2490 gInBufferSize = inputsize;
2491 gOutBufferSize = outputsize;
2492
2493 if(!testConvertToUnicode(sampleText, sizeof(sampleText),
2494 toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943",
2495 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2496 log_err("ibm-943->u with subst did not match.\n");
2497 }
2498
2499 static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize)
2500 {
2501 /*EBCDIC_STATEFUL*/
2502 static const UChar ebcdic_inputTest[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 };
2503 static const uint8_t toIBM930[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 };
2504 static const int32_t offset_930[]= { 0, 1, 1, 1, 2, 2, 3, 4, 4, 4, 5, 5 };
2505 /* s SO doubl SI sng s SO fe fe SI s */
2506
2507 /*EBCDIC_STATEFUL with subChar=3f*/
2508 static const uint8_t toIBM930_subvaried[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 };
2509 static const int32_t offset_930_subvaried[]= { 0, 1, 1, 1, 2, 2, 3, 4, 5 };
2510 static const char mySubChar[]={ 0x3f};
2511
2512 gInBufferSize = inputsize;
2513 gOutBufferSize = outputsize;
2514
2515 if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]),
2516 toIBM930, sizeof(toIBM930), "ibm-930",
2517 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930, NULL, 0 ))
2518 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n");
2519
2520 if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]),
2521 toIBM930_subvaried, sizeof(toIBM930_subvaried), "ibm-930",
2522 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 ))
2523 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n");
2524 }
2525
2526
2527
2528 UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
2529 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
2530 const char *mySubChar, int8_t len)
2531 {
2532
2533
2534 UErrorCode status = U_ZERO_ERROR;
2535 UConverter *conv = 0;
2536 uint8_t junkout[NEW_MAX_BUFFER]; /* FIX */
2537 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2538 const UChar *src;
2539 uint8_t *end;
2540 uint8_t *targ;
2541 int32_t *offs;
2542 int i;
2543 int32_t realBufferSize;
2544 uint8_t *realBufferEnd;
2545 const UChar *realSourceEnd;
2546 const UChar *sourceLimit;
2547 UBool checkOffsets = TRUE;
2548 UBool doFlush;
2549 char junk[9999];
2550 char offset_str[9999];
2551 uint8_t *p;
2552 UConverterFromUCallback oldAction = NULL;
2553 const void* oldContext = NULL;
2554
2555
2556 for(i=0;i<NEW_MAX_BUFFER;i++)
2557 junkout[i] = 0xF0;
2558 for(i=0;i<NEW_MAX_BUFFER;i++)
2559 junokout[i] = 0xFF;
2560 setNuConvTestName(codepage, "FROM");
2561
2562 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize,
2563 gOutBufferSize);
2564
2565 conv = ucnv_open(codepage, &status);
2566 if(U_FAILURE(status))
2567 {
2568 log_data_err("Couldn't open converter %s\n",codepage);
2569 return TRUE;
2570 }
2571
2572 log_verbose("Converter opened..\n");
2573
2574 /*----setting the callback routine----*/
2575 ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2576 if (U_FAILURE(status))
2577 {
2578 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2579 }
2580 /*------------------------*/
2581 /*setting the subChar*/
2582 if(mySubChar != NULL){
2583 ucnv_setSubstChars(conv, mySubChar, len, &status);
2584 if (U_FAILURE(status)) {
2585 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2586 }
2587 }
2588 /*------------*/
2589
2590 src = source;
2591 targ = junkout;
2592 offs = junokout;
2593
2594 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
2595 realBufferEnd = junkout + realBufferSize;
2596 realSourceEnd = source + sourceLen;
2597
2598 if ( gOutBufferSize != realBufferSize )
2599 checkOffsets = FALSE;
2600
2601 if( gInBufferSize != NEW_MAX_BUFFER )
2602 checkOffsets = FALSE;
2603
2604 do
2605 {
2606 end = nct_min(targ + gOutBufferSize, realBufferEnd);
2607 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
2608
2609 doFlush = (UBool)(sourceLimit == realSourceEnd);
2610
2611 if(targ == realBufferEnd)
2612 {
2613 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
2614 return FALSE;
2615 }
2616 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
2617
2618
2619 status = U_ZERO_ERROR;
2620
2621 ucnv_fromUnicode (conv,
2622 (char **)&targ,
2623 (const char *)end,
2624 &src,
2625 sourceLimit,
2626 checkOffsets ? offs : NULL,
2627 doFlush, /* flush if we're at the end of the input data */
2628 &status);
2629 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
2630
2631
2632 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2633 UChar errChars[50]; /* should be sufficient */
2634 int8_t errLen = 50;
2635 UErrorCode err = U_ZERO_ERROR;
2636 const UChar* limit= NULL;
2637 const UChar* start= NULL;
2638 ucnv_getInvalidUChars(conv,errChars, &errLen, &err);
2639 if(U_FAILURE(err)){
2640 log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err));
2641 }
2642 /* src points to limit of invalid chars */
2643 limit = src;
2644 /* length of in invalid chars should be equal to returned length*/
2645 start = src - errLen;
2646 if(u_strncmp(errChars,start,errLen)!=0){
2647 log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2648 }
2649 }
2650 /* allow failure codes for the stop callback */
2651 if(U_FAILURE(status) &&
2652 (callback != UCNV_FROM_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND)))
2653 {
2654 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2655 return FALSE;
2656 }
2657
2658 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
2659 sourceLen, targ-junkout);
2660 if(VERBOSITY)
2661 {
2662
2663 junk[0] = 0;
2664 offset_str[0] = 0;
2665 for(p = junkout;p<targ;p++)
2666 {
2667 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
2668 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
2669 }
2670
2671 log_verbose(junk);
2672 printSeq(expect, expectLen);
2673 if ( checkOffsets )
2674 {
2675 log_verbose("\nOffsets:");
2676 log_verbose(offset_str);
2677 }
2678 log_verbose("\n");
2679 }
2680 ucnv_close(conv);
2681
2682
2683 if(expectLen != targ-junkout)
2684 {
2685 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2686 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2687 printSeqErr(junkout, targ-junkout);
2688 printSeqErr(expect, expectLen);
2689 return FALSE;
2690 }
2691
2692 if (checkOffsets && (expectOffsets != 0) )
2693 {
2694 log_verbose("comparing %d offsets..\n", targ-junkout);
2695 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
2696 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2697 log_err("Got Output : ");
2698 printSeqErr(junkout, targ-junkout);
2699 log_err("Got Offsets: ");
2700 for(p=junkout;p<targ;p++)
2701 log_err("%d,", junokout[p-junkout]);
2702 log_err("\n");
2703 log_err("Expected Offsets: ");
2704 for(i=0; i<(targ-junkout); i++)
2705 log_err("%d,", expectOffsets[i]);
2706 log_err("\n");
2707 return FALSE;
2708 }
2709 }
2710
2711 if(!memcmp(junkout, expect, expectLen))
2712 {
2713 log_verbose("String matches! %s\n", gNuConvTestName);
2714 return TRUE;
2715 }
2716 else
2717 {
2718 log_err("String does not match. %s\n", gNuConvTestName);
2719 log_err("source: ");
2720 printUSeqErr(source, sourceLen);
2721 log_err("Got: ");
2722 printSeqErr(junkout, expectLen);
2723 log_err("Expected: ");
2724 printSeqErr(expect, expectLen);
2725 return FALSE;
2726 }
2727 }
2728
2729 UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
2730 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
2731 const char *mySubChar, int8_t len)
2732 {
2733 UErrorCode status = U_ZERO_ERROR;
2734 UConverter *conv = 0;
2735 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
2736 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2737 const uint8_t *src;
2738 const uint8_t *realSourceEnd;
2739 const uint8_t *srcLimit;
2740 UChar *targ;
2741 UChar *end;
2742 int32_t *offs;
2743 int i;
2744 UBool checkOffsets = TRUE;
2745 char junk[9999];
2746 char offset_str[9999];
2747 UChar *p;
2748 UConverterToUCallback oldAction = NULL;
2749 const void* oldContext = NULL;
2750
2751 int32_t realBufferSize;
2752 UChar *realBufferEnd;
2753
2754
2755 for(i=0;i<NEW_MAX_BUFFER;i++)
2756 junkout[i] = 0xFFFE;
2757
2758 for(i=0;i<NEW_MAX_BUFFER;i++)
2759 junokout[i] = -1;
2760
2761 setNuConvTestName(codepage, "TO");
2762
2763 log_verbose("\n========= %s\n", gNuConvTestName);
2764
2765 conv = ucnv_open(codepage, &status);
2766 if(U_FAILURE(status))
2767 {
2768 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
2769 return TRUE;
2770 }
2771
2772 log_verbose("Converter opened..\n");
2773
2774 src = source;
2775 targ = junkout;
2776 offs = junokout;
2777
2778 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
2779 realBufferEnd = junkout + realBufferSize;
2780 realSourceEnd = src + sourcelen;
2781 /*----setting the callback routine----*/
2782 ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2783 if (U_FAILURE(status))
2784 {
2785 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2786 }
2787 /*-------------------------------------*/
2788 /*setting the subChar*/
2789 if(mySubChar != NULL){
2790 ucnv_setSubstChars(conv, mySubChar, len, &status);
2791 if (U_FAILURE(status)) {
2792 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2793 }
2794 }
2795 /*------------*/
2796
2797
2798 if ( gOutBufferSize != realBufferSize )
2799 checkOffsets = FALSE;
2800
2801 if( gInBufferSize != NEW_MAX_BUFFER )
2802 checkOffsets = FALSE;
2803
2804 do
2805 {
2806 end = nct_min( targ + gOutBufferSize, realBufferEnd);
2807 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
2808
2809 if(targ == realBufferEnd)
2810 {
2811 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
2812 return FALSE;
2813 }
2814 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
2815
2816
2817
2818 status = U_ZERO_ERROR;
2819
2820 ucnv_toUnicode (conv,
2821 &targ,
2822 end,
2823 (const char **)&src,
2824 (const char *)srcLimit,
2825 checkOffsets ? offs : NULL,
2826 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
2827 &status);
2828 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
2829
2830 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2831 char errChars[50]; /* should be sufficient */
2832 int8_t errLen = 50;
2833 UErrorCode err = U_ZERO_ERROR;
2834 const uint8_t* limit= NULL;
2835 const uint8_t* start= NULL;
2836 ucnv_getInvalidChars(conv,errChars, &errLen, &err);
2837 if(U_FAILURE(err)){
2838 log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err));
2839 }
2840 /* src points to limit of invalid chars */
2841 limit = src;
2842 /* length of in invalid chars should be equal to returned length*/
2843 start = src - errLen;
2844 if(uprv_strncmp(errChars,(char*)start,errLen)!=0){
2845 log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2846 }
2847 }
2848 /* allow failure codes for the stop callback */
2849 if(U_FAILURE(status) &&
2850 (callback != UCNV_TO_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND && status != U_TRUNCATED_CHAR_FOUND)))
2851 {
2852 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2853 return FALSE;
2854 }
2855
2856 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
2857 sourcelen, targ-junkout);
2858 if(VERBOSITY)
2859 {
2860
2861 junk[0] = 0;
2862 offset_str[0] = 0;
2863
2864 for(p = junkout;p<targ;p++)
2865 {
2866 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
2867 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
2868 }
2869
2870 log_verbose(junk);
2871 printUSeq(expect, expectlen);
2872 if ( checkOffsets )
2873 {
2874 log_verbose("\nOffsets:");
2875 log_verbose(offset_str);
2876 }
2877 log_verbose("\n");
2878 }
2879 ucnv_close(conv);
2880
2881 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
2882
2883 if (checkOffsets && (expectOffsets != 0))
2884 {
2885 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
2886 {
2887 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2888 log_err("Got offsets: ");
2889 for(p=junkout;p<targ;p++)
2890 log_err(" %2d,", junokout[p-junkout]);
2891 log_err("\n");
2892 log_err("Expected offsets: ");
2893 for(i=0; i<(targ-junkout); i++)
2894 log_err(" %2d,", expectOffsets[i]);
2895 log_err("\n");
2896 log_err("Got output: ");
2897 for(i=0; i<(targ-junkout); i++)
2898 log_err("0x%04x,", junkout[i]);
2899 log_err("\n");
2900 log_err("From source: ");
2901 for(i=0; i<(src-source); i++)
2902 log_err(" 0x%02x,", (unsigned char)source[i]);
2903 log_err("\n");
2904 }
2905 }
2906
2907 if(!memcmp(junkout, expect, expectlen*2))
2908 {
2909 log_verbose("Matches!\n");
2910 return TRUE;
2911 }
2912 else
2913 {
2914 log_err("String does not match. %s\n", gNuConvTestName);
2915 log_verbose("String does not match. %s\n", gNuConvTestName);
2916 log_err("Got: ");
2917 printUSeqErr(junkout, expectlen);
2918 log_err("Expected: ");
2919 printUSeqErr(expect, expectlen);
2920 log_err("\n");
2921 return FALSE;
2922 }
2923 }
2924
2925 UBool testConvertFromUnicodeWithContext(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
2926 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
2927 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
2928 {
2929
2930
2931 UErrorCode status = U_ZERO_ERROR;
2932 UConverter *conv = 0;
2933 uint8_t junkout[NEW_MAX_BUFFER]; /* FIX */
2934 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2935 const UChar *src;
2936 uint8_t *end;
2937 uint8_t *targ;
2938 int32_t *offs;
2939 int i;
2940 int32_t realBufferSize;
2941 uint8_t *realBufferEnd;
2942 const UChar *realSourceEnd;
2943 const UChar *sourceLimit;
2944 UBool checkOffsets = TRUE;
2945 UBool doFlush;
2946 char junk[9999];
2947 char offset_str[9999];
2948 uint8_t *p;
2949 UConverterFromUCallback oldAction = NULL;
2950 const void* oldContext = NULL;
2951
2952
2953 for(i=0;i<NEW_MAX_BUFFER;i++)
2954 junkout[i] = 0xF0;
2955 for(i=0;i<NEW_MAX_BUFFER;i++)
2956 junokout[i] = 0xFF;
2957 setNuConvTestName(codepage, "FROM");
2958
2959 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize,
2960 gOutBufferSize);
2961
2962 conv = ucnv_open(codepage, &status);
2963 if(U_FAILURE(status))
2964 {
2965 log_data_err("Couldn't open converter %s\n",codepage);
2966 return TRUE; /* Because the err has already been logged. */
2967 }
2968
2969 log_verbose("Converter opened..\n");
2970
2971 /*----setting the callback routine----*/
2972 ucnv_setFromUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
2973 if (U_FAILURE(status))
2974 {
2975 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2976 }
2977 /*------------------------*/
2978 /*setting the subChar*/
2979 if(mySubChar != NULL){
2980 ucnv_setSubstChars(conv, mySubChar, len, &status);
2981 if (U_FAILURE(status)) {
2982 log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status));
2983 }
2984 }
2985 /*------------*/
2986
2987 src = source;
2988 targ = junkout;
2989 offs = junokout;
2990
2991 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
2992 realBufferEnd = junkout + realBufferSize;
2993 realSourceEnd = source + sourceLen;
2994
2995 if ( gOutBufferSize != realBufferSize )
2996 checkOffsets = FALSE;
2997
2998 if( gInBufferSize != NEW_MAX_BUFFER )
2999 checkOffsets = FALSE;
3000
3001 do
3002 {
3003 end = nct_min(targ + gOutBufferSize, realBufferEnd);
3004 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
3005
3006 doFlush = (UBool)(sourceLimit == realSourceEnd);
3007
3008 if(targ == realBufferEnd)
3009 {
3010 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
3011 return FALSE;
3012 }
3013 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
3014
3015
3016 status = U_ZERO_ERROR;
3017
3018 ucnv_fromUnicode (conv,
3019 (char **)&targ,
3020 (const char *)end,
3021 &src,
3022 sourceLimit,
3023 checkOffsets ? offs : NULL,
3024 doFlush, /* flush if we're at the end of the input data */
3025 &status);
3026 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
3027
3028 /* allow failure codes for the stop callback */
3029 if(U_FAILURE(status) && status != expectedError)
3030 {
3031 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3032 return FALSE;
3033 }
3034
3035 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
3036 sourceLen, targ-junkout);
3037 if(VERBOSITY)
3038 {
3039
3040 junk[0] = 0;
3041 offset_str[0] = 0;
3042 for(p = junkout;p<targ;p++)
3043 {
3044 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
3045 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
3046 }
3047
3048 log_verbose(junk);
3049 printSeq(expect, expectLen);
3050 if ( checkOffsets )
3051 {
3052 log_verbose("\nOffsets:");
3053 log_verbose(offset_str);
3054 }
3055 log_verbose("\n");
3056 }
3057 ucnv_close(conv);
3058
3059
3060 if(expectLen != targ-junkout)
3061 {
3062 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3063 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3064 printSeqErr(junkout, targ-junkout);
3065 printSeqErr(expect, expectLen);
3066 return FALSE;
3067 }
3068
3069 if (checkOffsets && (expectOffsets != 0) )
3070 {
3071 log_verbose("comparing %d offsets..\n", targ-junkout);
3072 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
3073 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3074 log_err("Got Output : ");
3075 printSeqErr(junkout, targ-junkout);
3076 log_err("Got Offsets: ");
3077 for(p=junkout;p<targ;p++)
3078 log_err("%d,", junokout[p-junkout]);
3079 log_err("\n");
3080 log_err("Expected Offsets: ");
3081 for(i=0; i<(targ-junkout); i++)
3082 log_err("%d,", expectOffsets[i]);
3083 log_err("\n");
3084 return FALSE;
3085 }
3086 }
3087
3088 if(!memcmp(junkout, expect, expectLen))
3089 {
3090 log_verbose("String matches! %s\n", gNuConvTestName);
3091 return TRUE;
3092 }
3093 else
3094 {
3095 log_err("String does not match. %s\n", gNuConvTestName);
3096 log_err("source: ");
3097 printUSeqErr(source, sourceLen);
3098 log_err("Got: ");
3099 printSeqErr(junkout, expectLen);
3100 log_err("Expected: ");
3101 printSeqErr(expect, expectLen);
3102 return FALSE;
3103 }
3104 }
3105 UBool testConvertToUnicodeWithContext( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
3106 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
3107 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3108 {
3109 UErrorCode status = U_ZERO_ERROR;
3110 UConverter *conv = 0;
3111 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
3112 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
3113 const uint8_t *src;
3114 const uint8_t *realSourceEnd;
3115 const uint8_t *srcLimit;
3116 UChar *targ;
3117 UChar *end;
3118 int32_t *offs;
3119 int i;
3120 UBool checkOffsets = TRUE;
3121 char junk[9999];
3122 char offset_str[9999];
3123 UChar *p;
3124 UConverterToUCallback oldAction = NULL;
3125 const void* oldContext = NULL;
3126
3127 int32_t realBufferSize;
3128 UChar *realBufferEnd;
3129
3130
3131 for(i=0;i<NEW_MAX_BUFFER;i++)
3132 junkout[i] = 0xFFFE;
3133
3134 for(i=0;i<NEW_MAX_BUFFER;i++)
3135 junokout[i] = -1;
3136
3137 setNuConvTestName(codepage, "TO");
3138
3139 log_verbose("\n========= %s\n", gNuConvTestName);
3140
3141 conv = ucnv_open(codepage, &status);
3142 if(U_FAILURE(status))
3143 {
3144 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
3145 return TRUE;
3146 }
3147
3148 log_verbose("Converter opened..\n");
3149
3150 src = source;
3151 targ = junkout;
3152 offs = junokout;
3153
3154 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
3155 realBufferEnd = junkout + realBufferSize;
3156 realSourceEnd = src + sourcelen;
3157 /*----setting the callback routine----*/
3158 ucnv_setToUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3159 if (U_FAILURE(status))
3160 {
3161 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3162 }
3163 /*-------------------------------------*/
3164 /*setting the subChar*/
3165 if(mySubChar != NULL){
3166 ucnv_setSubstChars(conv, mySubChar, len, &status);
3167 if (U_FAILURE(status)) {
3168 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3169 }
3170 }
3171 /*------------*/
3172
3173
3174 if ( gOutBufferSize != realBufferSize )
3175 checkOffsets = FALSE;
3176
3177 if( gInBufferSize != NEW_MAX_BUFFER )
3178 checkOffsets = FALSE;
3179
3180 do
3181 {
3182 end = nct_min( targ + gOutBufferSize, realBufferEnd);
3183 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
3184
3185 if(targ == realBufferEnd)
3186 {
3187 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
3188 return FALSE;
3189 }
3190 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
3191
3192
3193
3194 status = U_ZERO_ERROR;
3195
3196 ucnv_toUnicode (conv,
3197 &targ,
3198 end,
3199 (const char **)&src,
3200 (const char *)srcLimit,
3201 checkOffsets ? offs : NULL,
3202 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
3203 &status);
3204 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
3205
3206 /* allow failure codes for the stop callback */
3207 if(U_FAILURE(status) && status!=expectedError)
3208 {
3209 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3210 return FALSE;
3211 }
3212
3213 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
3214 sourcelen, targ-junkout);
3215 if(VERBOSITY)
3216 {
3217
3218 junk[0] = 0;
3219 offset_str[0] = 0;
3220
3221 for(p = junkout;p<targ;p++)
3222 {
3223 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
3224 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
3225 }
3226
3227 log_verbose(junk);
3228 printUSeq(expect, expectlen);
3229 if ( checkOffsets )
3230 {
3231 log_verbose("\nOffsets:");
3232 log_verbose(offset_str);
3233 }
3234 log_verbose("\n");
3235 }
3236 ucnv_close(conv);
3237
3238 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
3239
3240 if (checkOffsets && (expectOffsets != 0))
3241 {
3242 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
3243 {
3244 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3245 log_err("Got offsets: ");
3246 for(p=junkout;p<targ;p++)
3247 log_err(" %2d,", junokout[p-junkout]);
3248 log_err("\n");
3249 log_err("Expected offsets: ");
3250 for(i=0; i<(targ-junkout); i++)
3251 log_err(" %2d,", expectOffsets[i]);
3252 log_err("\n");
3253 log_err("Got output: ");
3254 for(i=0; i<(targ-junkout); i++)
3255 log_err("0x%04x,", junkout[i]);
3256 log_err("\n");
3257 log_err("From source: ");
3258 for(i=0; i<(src-source); i++)
3259 log_err(" 0x%02x,", (unsigned char)source[i]);
3260 log_err("\n");
3261 }
3262 }
3263
3264 if(!memcmp(junkout, expect, expectlen*2))
3265 {
3266 log_verbose("Matches!\n");
3267 return TRUE;
3268 }
3269 else
3270 {
3271 log_err("String does not match. %s\n", gNuConvTestName);
3272 log_verbose("String does not match. %s\n", gNuConvTestName);
3273 log_err("Got: ");
3274 printUSeqErr(junkout, expectlen);
3275 log_err("Expected: ");
3276 printUSeqErr(expect, expectlen);
3277 log_err("\n");
3278 return FALSE;
3279 }
3280 }