]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/nccbtst.c
ICU-64252.0.1.tar.gz
[apple/icu.git] / icuSources / test / cintltst / nccbtst.c
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /*
9 ********************************************************************************
10 * File NCCBTST.C
11 *
12 * Modification History:
13 * Name Description
14 * Madhu Katragadda 7/21/1999 Testing error callback routines
15 ********************************************************************************
16 */
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <ctype.h>
21 #include "cmemory.h"
22 #include "cstring.h"
23 #include "unicode/uloc.h"
24 #include "unicode/ucnv.h"
25 #include "unicode/ucnv_err.h"
26 #include "cintltst.h"
27 #include "unicode/utypes.h"
28 #include "unicode/ustring.h"
29 #include "nccbtst.h"
30 #include "unicode/ucnv_cb.h"
31 #include "unicode/utf16.h"
32
33 #define NEW_MAX_BUFFER 999
34
35 #define nct_min(x,y) ((x<y) ? x : y)
36
37 static int32_t gInBufferSize = 0;
38 static int32_t gOutBufferSize = 0;
39 static char gNuConvTestName[1024];
40
41 static void printSeq(const uint8_t* a, int len)
42 {
43 int i=0;
44 log_verbose("\n{");
45 while (i<len)
46 log_verbose("0x%02X, ", a[i++]);
47 log_verbose("}\n");
48 }
49
50 static void printUSeq(const UChar* a, int len)
51 {
52 int i=0;
53 log_verbose("{");
54 while (i<len)
55 log_verbose(" 0x%04x, ", a[i++]);
56 log_verbose("}\n");
57 }
58
59 static void printSeqErr(const uint8_t* a, int len)
60 {
61 int i=0;
62 fprintf(stderr, "{");
63 while (i<len)
64 fprintf(stderr, " 0x%02x, ", a[i++]);
65 fprintf(stderr, "}\n");
66 }
67
68 static void printUSeqErr(const UChar* a, int len)
69 {
70 int i=0;
71 fprintf(stderr, "{");
72 while (i<len)
73 fprintf(stderr, "0x%04x, ", a[i++]);
74 fprintf(stderr,"}\n");
75 }
76
77 static void setNuConvTestName(const char *codepage, const char *direction)
78 {
79 sprintf(gNuConvTestName, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
80 codepage,
81 direction,
82 (int)gInBufferSize,
83 (int)gOutBufferSize);
84 }
85
86
87 static void TestCallBackFailure(void);
88
89 void addTestConvertErrorCallBack(TestNode** root);
90
91 void addTestConvertErrorCallBack(TestNode** root)
92 {
93 addTest(root, &TestSkipCallBack, "tsconv/nccbtst/TestSkipCallBack");
94 addTest(root, &TestStopCallBack, "tsconv/nccbtst/TestStopCallBack");
95 addTest(root, &TestSubCallBack, "tsconv/nccbtst/TestSubCallBack");
96 addTest(root, &TestSubWithValueCallBack, "tsconv/nccbtst/TestSubWithValueCallBack");
97
98 #if !UCONFIG_NO_LEGACY_CONVERSION
99 addTest(root, &TestLegalAndOtherCallBack, "tsconv/nccbtst/TestLegalAndOtherCallBack");
100 addTest(root, &TestSingleByteCallBack, "tsconv/nccbtst/TestSingleByteCallBack");
101 #endif
102
103 addTest(root, &TestCallBackFailure, "tsconv/nccbtst/TestCallBackFailure");
104 }
105
106 static void TestSkipCallBack()
107 {
108 TestSkip(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
109 TestSkip(1,NEW_MAX_BUFFER);
110 TestSkip(1,1);
111 TestSkip(NEW_MAX_BUFFER, 1);
112 }
113
114 static void TestStopCallBack()
115 {
116 TestStop(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
117 TestStop(1,NEW_MAX_BUFFER);
118 TestStop(1,1);
119 TestStop(NEW_MAX_BUFFER, 1);
120 }
121
122 static void TestSubCallBack()
123 {
124 TestSub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
125 TestSub(1,NEW_MAX_BUFFER);
126 TestSub(1,1);
127 TestSub(NEW_MAX_BUFFER, 1);
128
129 #if !UCONFIG_NO_LEGACY_CONVERSION
130 TestEBCDIC_STATEFUL_Sub(1, 1);
131 TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER);
132 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, 1);
133 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
134 #endif
135 }
136
137 static void TestSubWithValueCallBack()
138 {
139 TestSubWithValue(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
140 TestSubWithValue(1,NEW_MAX_BUFFER);
141 TestSubWithValue(1,1);
142 TestSubWithValue(NEW_MAX_BUFFER, 1);
143 }
144
145 #if !UCONFIG_NO_LEGACY_CONVERSION
146 static void TestLegalAndOtherCallBack()
147 {
148 TestLegalAndOthers(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
149 TestLegalAndOthers(1,NEW_MAX_BUFFER);
150 TestLegalAndOthers(1,1);
151 TestLegalAndOthers(NEW_MAX_BUFFER, 1);
152 }
153
154 static void TestSingleByteCallBack()
155 {
156 TestSingleByte(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
157 TestSingleByte(1,NEW_MAX_BUFFER);
158 TestSingleByte(1,1);
159 TestSingleByte(NEW_MAX_BUFFER, 1);
160 }
161 #endif
162
163 static void TestSkip(int32_t inputsize, int32_t outputsize)
164 {
165 static const uint8_t expskipIBM_949[]= {
166 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
167
168 static const uint8_t expskipIBM_943[] = {
169 0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 };
170
171 static const uint8_t expskipIBM_930[] = {
172 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f };
173
174 gInBufferSize = inputsize;
175 gOutBufferSize = outputsize;
176
177 /*From Unicode*/
178 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP \n");
179
180 #if !UCONFIG_NO_LEGACY_CONVERSION
181 {
182 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
183 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
184
185 static const int32_t toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 };
186 static const int32_t toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 };
187
188 if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
189 expskipIBM_949, UPRV_LENGTHOF(expskipIBM_949), "ibm-949",
190 UCNV_FROM_U_CALLBACK_SKIP, toIBM949Offsskip, NULL, 0 ))
191 log_err("u-> ibm-949 with skip did not match.\n");
192 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
193 expskipIBM_943, UPRV_LENGTHOF(expskipIBM_943), "ibm-943",
194 UCNV_FROM_U_CALLBACK_SKIP, toIBM943Offsskip, NULL, 0 ))
195 log_err("u-> ibm-943 with skip did not match.\n");
196 }
197
198 {
199 static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 };
200 static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f };
201 static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 };
202
203 /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */
204 if(!testConvertFromUnicode(fromU, UPRV_LENGTHOF(fromU),
205 fromUBytes, UPRV_LENGTHOF(fromUBytes),
206 "ibm-930",
207 UCNV_FROM_U_CALLBACK_SKIP, fromUOffsets,
208 NULL, 0)
209 ) {
210 log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n");
211 }
212 }
213 #endif
214
215 {
216 static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
217 static const uint8_t usasciiFromUBytes[] = { 0x61, 0x31, 0x39 };
218 static const int32_t usasciiFromUOffsets[] = { 0, 3, 6 };
219
220 static const UChar latin1FromU[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
221 static const uint8_t latin1FromUBytes[] = { 0x61, 0xa0, 0x31, 0x39 };
222 static const int32_t latin1FromUOffsets[] = { 0, 1, 3, 6 };
223
224 /* US-ASCII */
225 if(!testConvertFromUnicode(usasciiFromU, UPRV_LENGTHOF(usasciiFromU),
226 usasciiFromUBytes, UPRV_LENGTHOF(usasciiFromUBytes),
227 "US-ASCII",
228 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
229 NULL, 0)
230 ) {
231 log_err("u->US-ASCII with skip did not match.\n");
232 }
233
234 #if !UCONFIG_NO_LEGACY_CONVERSION
235 /* SBCS NLTC codepage 367 for US-ASCII */
236 if(!testConvertFromUnicode(usasciiFromU, UPRV_LENGTHOF(usasciiFromU),
237 usasciiFromUBytes, UPRV_LENGTHOF(usasciiFromUBytes),
238 "ibm-367",
239 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
240 NULL, 0)
241 ) {
242 log_err("u->ibm-367 with skip did not match.\n");
243 }
244 #endif
245
246 /* ISO-Latin-1 */
247 if(!testConvertFromUnicode(latin1FromU, UPRV_LENGTHOF(latin1FromU),
248 latin1FromUBytes, UPRV_LENGTHOF(latin1FromUBytes),
249 "LATIN_1",
250 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
251 NULL, 0)
252 ) {
253 log_err("u->LATIN_1 with skip did not match.\n");
254 }
255
256 #if !UCONFIG_NO_LEGACY_CONVERSION
257 /* windows-1252 */
258 if(!testConvertFromUnicode(latin1FromU, UPRV_LENGTHOF(latin1FromU),
259 latin1FromUBytes, UPRV_LENGTHOF(latin1FromUBytes),
260 "windows-1252",
261 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
262 NULL, 0)
263 ) {
264 log_err("u->windows-1252 with skip did not match.\n");
265 }
266 }
267
268 {
269 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
270 static const uint8_t toIBM943[]= { 0x61, 0x61 };
271 static const int32_t offset[]= {0, 4};
272
273 /* EUC_JP*/
274 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
275 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
276 0x61, 0x8e, 0xe0,
277 };
278 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7};
279
280 /*EUC_TW*/
281 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
282 static const uint8_t to_euc_tw[]={
283 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
284 0x61, 0xe6, 0xca, 0x8a,
285 };
286 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,};
287
288 /*ISO-2022-JP*/
289 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9/*unassigned*/,0x0042, };
290 static const uint8_t to_iso_2022_jp[]={
291 0x41,
292 0x42,
293
294 };
295 static const int32_t from_iso_2022_jpOffs [] ={0,2};
296
297 /*ISO-2022-JP*/
298 UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
299 static const uint8_t to_iso_2022_jp2[]={
300 0x41,
301 0x43,
302
303 };
304 static const int32_t from_iso_2022_jpOffs2 [] ={0,2};
305
306 /*ISO-2022-cn*/
307 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
308 static const uint8_t to_iso_2022_cn[]={
309 0x41, 0x42
310 };
311 static const int32_t from_iso_2022_cnOffs [] ={
312 0, 2
313 };
314
315 /*ISO-2022-CN*/
316 static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
317 static const uint8_t to_iso_2022_cn1[]={
318 0x41, 0x43
319
320 };
321 static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 };
322
323 /*ISO-2022-kr*/
324 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
325 static const uint8_t to_iso_2022_kr[]={
326 0x1b, 0x24, 0x29, 0x43,
327 0x41,
328 0x0e, 0x25, 0x50,
329 0x25, 0x50,
330 0x0f, 0x42,
331 };
332 static const int32_t from_iso_2022_krOffs [] ={
333 -1,-1,-1,-1,
334 0,
335 1,1,1,
336 3,3,
337 4,4
338 };
339
340 /*ISO-2022-kr*/
341 static const UChar iso_2022_kr_inputText1[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
342 static const uint8_t to_iso_2022_kr1[]={
343 0x1b, 0x24, 0x29, 0x43,
344 0x41,
345 0x0e, 0x25, 0x50,
346 0x25, 0x50,
347
348 };
349 static const int32_t from_iso_2022_krOffs1 [] ={
350 -1,-1,-1,-1,
351 0,
352 1,1,1,
353 3,3,
354
355 };
356 /* HZ encoding */
357 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
358
359 static const uint8_t to_hz[]={
360 0x7e, 0x7d, 0x41,
361 0x7e, 0x7b, 0x26, 0x30,
362 0x26, 0x30,
363 0x7e, 0x7d, 0x42,
364
365 };
366 static const int32_t from_hzOffs [] ={
367 0,0,0,
368 1,1,1,1,
369 3,3,
370 4,4,4,4
371 };
372
373 static const UChar hz_inputText1[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
374
375 static const uint8_t to_hz1[]={
376 0x7e, 0x7d, 0x41,
377 0x7e, 0x7b, 0x26, 0x30,
378 0x26, 0x30,
379
380
381 };
382 static const int32_t from_hzOffs1 [] ={
383 0,0,0,
384 1,1,1,1,
385 3,3,
386
387 };
388
389 #endif
390
391 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
392
393 static const uint8_t to_SCSU[]={
394 0x41,
395 0x42
396
397
398 };
399 static const int32_t from_SCSUOffs [] ={
400 0,
401 2,
402
403 };
404
405 #if !UCONFIG_NO_LEGACY_CONVERSION
406 /* ISCII */
407 static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
408 static const uint8_t to_iscii[]={
409 0x41,
410 0x42,
411 };
412 static const int32_t from_isciiOffs [] ={
413 0,2,
414
415 };
416 /*ISCII*/
417 static const UChar iscii_inputText1[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
418 static const uint8_t to_iscii1[]={
419 0x44,
420 0x43,
421
422 };
423 static const int32_t from_isciiOffs1 [] ={0,2};
424
425 if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
426 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
427 UCNV_FROM_U_CALLBACK_SKIP, offset, NULL, 0 ))
428 log_err("u-> ibm-943 with skip did not match.\n");
429
430 if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
431 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
432 UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 ))
433 log_err("u-> euc-jp with skip did not match.\n");
434
435 if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
436 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
437 UCNV_FROM_U_CALLBACK_SKIP, from_euc_twOffs, NULL, 0 ))
438 log_err("u-> euc-tw with skip did not match.\n");
439
440 /*iso_2022_jp*/
441 if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText),
442 to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp",
443 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 ))
444 log_err("u-> iso-2022-jp with skip did not match.\n");
445
446 /* with context */
447 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, UPRV_LENGTHOF(iso_2022_jp_inputText2),
448 to_iso_2022_jp2, UPRV_LENGTHOF(to_iso_2022_jp2), "iso-2022-jp",
449 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs2, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
450 log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
451
452 /*iso_2022_cn*/
453 if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText),
454 to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn",
455 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs, NULL, 0 ))
456 log_err("u-> iso-2022-cn with skip did not match.\n");
457 /*with context*/
458 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1, UPRV_LENGTHOF(iso_2022_cn_inputText1),
459 to_iso_2022_cn1, UPRV_LENGTHOF(to_iso_2022_cn1), "iso-2022-cn",
460 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
461 log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
462
463 /*iso_2022_kr*/
464 if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText),
465 to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr",
466 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs, NULL, 0 ))
467 log_err("u-> iso-2022-kr with skip did not match.\n");
468 /*with context*/
469 if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1, UPRV_LENGTHOF(iso_2022_kr_inputText1),
470 to_iso_2022_kr1, UPRV_LENGTHOF(to_iso_2022_kr1), "iso-2022-kr",
471 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
472 log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
473
474 /*hz*/
475 if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText),
476 to_hz, UPRV_LENGTHOF(to_hz), "HZ",
477 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs, NULL, 0 ))
478 log_err("u-> HZ with skip did not match.\n");
479 /*with context*/
480 if(!testConvertFromUnicodeWithContext(hz_inputText1, UPRV_LENGTHOF(hz_inputText1),
481 to_hz1, UPRV_LENGTHOF(to_hz1), "hz",
482 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
483 log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
484 #endif
485
486 /*SCSU*/
487 if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
488 to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU",
489 UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 ))
490 log_err("u-> SCSU with skip did not match.\n");
491
492 #if !UCONFIG_NO_LEGACY_CONVERSION
493 /*ISCII*/
494 if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText),
495 to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0",
496 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs, NULL, 0 ))
497 log_err("u-> iscii with skip did not match.\n");
498 /*with context*/
499 if(!testConvertFromUnicodeWithContext(iscii_inputText1, UPRV_LENGTHOF(iscii_inputText1),
500 to_iscii1, UPRV_LENGTHOF(to_iscii1), "ISCII,version=0",
501 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
502 log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
503 #endif
504 }
505
506 log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
507 {
508 static const uint8_t sampleText[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */
509 0xFB, 0xEE, 0x28, /* from source offset 0 */
510 0x24, 0x1E, 0x52,
511 0xB2,
512 0x20,
513 0xB3,
514 0xB1,
515 0x0D,
516 0x0A,
517
518 0x20, /* from 8 */
519 0x00,
520 0xD0, 0x6C,
521 0xB6,
522 0xD8, 0xA5,
523 0x20,
524 0x68,
525 0x59,
526
527 0xF9, 0x28, /* from 16 */
528 0x6D,
529 0x20,
530 0x73,
531 0xE0, 0x2D,
532 0xDE, 0x43,
533 0xD0, 0x33,
534 0x20,
535
536 0xFA, 0x83, /* from 24 */
537 0x25, 0x01,
538 0xFB, 0x16, 0x87,
539 0x4B, 0x16,
540 0x20,
541 0xE6, 0xBD,
542 0xEB, 0x5B,
543 0x4B, 0xCC,
544
545 0xF9, 0xA2, /* from 32 */
546 0xFC, 0x10, 0x3E,
547 0xFE, 0x16, 0x3A, 0x8C,
548 0x20,
549 0xFC, 0x03, 0xAC,
550
551 0x01, /* from 41 */
552 0xDE, 0x83,
553 0x20,
554 0x09
555 };
556 static const UChar expected[]={
557 0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */
558 0x0063, 0x0061, 0x000D, 0x000A,
559
560 0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */
561 0x0930, 0x0020, 0x0918, 0x0909,
562
563 0x3086, 0x304D, 0x0020, 0x3053, /* 16 */
564 0x4000, 0x4E00, 0x7777, 0x0020,
565
566 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */
567 0x0020, 0xD7A3, 0xDC00, 0xD800,
568
569 0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */
570 0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
571
572 0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */
573 0x0009
574 };
575 static const int32_t offsets[]={
576 0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7,
577 8, 9, 10, 10, 11, 12, 12, 13, 14, 15,
578 16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23,
579 24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31,
580 32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39,
581 41, 42, 42, 43, 44
582 };
583
584 /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */
585 if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected),
586 sampleText, UPRV_LENGTHOF(sampleText),
587 "BOCU-1",
588 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
589 ) {
590 log_err("u->BOCU-1 with skip did not match.\n");
591 }
592 }
593
594 log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
595 {
596 const uint8_t sampleText[]={
597 0x61, /* 'a' */
598 0xc4, 0xb5, /* U+0135 */
599 0xed, 0x80, 0xa0, /* Hangul U+d020 */
600 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */
601 0xee, 0x80, 0x80, /* PUA U+e000 */
602 0xed, 0xb0, 0x81, /* unpaired trail surrogate U+dc01 */
603 0x62, /* 'b' */
604 0xed, 0xa0, 0x81, /* unpaired lead surrogate U+d801 */
605 0xd0, 0x80 /* U+0400 */
606 };
607 UChar expected[]={
608 0x0061,
609 0x0135,
610 0xd020,
611 0xd801, 0xdc01,
612 0xe000,
613 0xdc01,
614 0x0062,
615 0xd801,
616 0x0400
617 };
618 int32_t offsets[]={
619 0,
620 1, 1,
621 2, 2, 2,
622 3, 3, 3, 4, 4, 4,
623 5, 5, 5,
624 6, 6, 6,
625 7,
626 8, 8, 8,
627 9, 9
628 };
629
630 /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */
631
632 /* without offsets */
633 if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected),
634 sampleText, UPRV_LENGTHOF(sampleText),
635 "CESU-8",
636 UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, 0)
637 ) {
638 log_err("u->CESU-8 with skip did not match.\n");
639 }
640
641 /* with offsets */
642 if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected),
643 sampleText, UPRV_LENGTHOF(sampleText),
644 "CESU-8",
645 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
646 ) {
647 log_err("u->CESU-8 with skip did not match.\n");
648 }
649 }
650
651 /*to Unicode*/
652 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP \n");
653
654 #if !UCONFIG_NO_LEGACY_CONVERSION
655 {
656
657 static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD700 };
658 static const UChar IBM_943skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
659 static const UChar IBM_930skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
660
661 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5};
662 static const int32_t fromIBM943Offs [] = { 0, 2, 4};
663 static const int32_t fromIBM930Offs [] = { 1, 3, 5};
664
665 if(!testConvertToUnicode(expskipIBM_949, UPRV_LENGTHOF(expskipIBM_949),
666 IBM_949skiptoUnicode, UPRV_LENGTHOF(IBM_949skiptoUnicode),"ibm-949",
667 UCNV_TO_U_CALLBACK_SKIP, fromIBM949Offs, NULL, 0 ))
668 log_err("ibm-949->u with skip did not match.\n");
669 if(!testConvertToUnicode(expskipIBM_943, UPRV_LENGTHOF(expskipIBM_943),
670 IBM_943skiptoUnicode, UPRV_LENGTHOF(IBM_943skiptoUnicode),"ibm-943",
671 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offs, NULL, 0 ))
672 log_err("ibm-943->u with skip did not match.\n");
673
674
675 if(!testConvertToUnicode(expskipIBM_930, UPRV_LENGTHOF(expskipIBM_930),
676 IBM_930skiptoUnicode, UPRV_LENGTHOF(IBM_930skiptoUnicode),"ibm-930",
677 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0 ))
678 log_err("ibm-930->u with skip did not match.\n");
679
680
681 if(!testConvertToUnicodeWithContext(expskipIBM_930, UPRV_LENGTHOF(expskipIBM_930),
682 IBM_930skiptoUnicode, UPRV_LENGTHOF(IBM_930skiptoUnicode),"ibm-930",
683 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
684 log_err("ibm-930->u with skip did not match.\n");
685 }
686 #endif
687
688 {
689 static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 };
690 static const UChar usasciiToU[] = { 0x61, 0x31 };
691 static const int32_t usasciiToUOffsets[] = { 0, 2 };
692
693 static const uint8_t latin1ToUBytes[] = { 0x61, 0xa0, 0x31 };
694 static const UChar latin1ToU[] = { 0x61, 0xa0, 0x31 };
695 static const int32_t latin1ToUOffsets[] = { 0, 1, 2 };
696
697 /* US-ASCII */
698 if(!testConvertToUnicode(usasciiToUBytes, UPRV_LENGTHOF(usasciiToUBytes),
699 usasciiToU, UPRV_LENGTHOF(usasciiToU),
700 "US-ASCII",
701 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
702 NULL, 0)
703 ) {
704 log_err("US-ASCII->u with skip did not match.\n");
705 }
706
707 #if !UCONFIG_NO_LEGACY_CONVERSION
708 /* SBCS NLTC codepage 367 for US-ASCII */
709 if(!testConvertToUnicode(usasciiToUBytes, UPRV_LENGTHOF(usasciiToUBytes),
710 usasciiToU, UPRV_LENGTHOF(usasciiToU),
711 "ibm-367",
712 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
713 NULL, 0)
714 ) {
715 log_err("ibm-367->u with skip did not match.\n");
716 }
717 #endif
718
719 /* ISO-Latin-1 */
720 if(!testConvertToUnicode(latin1ToUBytes, UPRV_LENGTHOF(latin1ToUBytes),
721 latin1ToU, UPRV_LENGTHOF(latin1ToU),
722 "LATIN_1",
723 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
724 NULL, 0)
725 ) {
726 log_err("LATIN_1->u with skip did not match.\n");
727 }
728
729 #if !UCONFIG_NO_LEGACY_CONVERSION
730 /* windows-1252 */
731 if(!testConvertToUnicode(latin1ToUBytes, UPRV_LENGTHOF(latin1ToUBytes),
732 latin1ToU, UPRV_LENGTHOF(latin1ToU),
733 "windows-1252",
734 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
735 NULL, 0)
736 ) {
737 log_err("windows-1252->u with skip did not match.\n");
738 }
739 #endif
740 }
741
742 #if !UCONFIG_NO_LEGACY_CONVERSION
743 {
744 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
745 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
746 };
747 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0x03b4
748 };
749 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 5};
750
751
752 /* euc-jp*/
753 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
754 0x8f, 0xda, 0xa1, /*unassigned*/
755 0x8e, 0xe0,
756 };
757 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2};
758 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3, 9};
759
760 /*EUC_TW*/
761 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
762 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
763 0xe6, 0xca, 0x8a,
764 };
765 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, };
766 static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 11, 13};
767 /*iso-2022-jp*/
768 static const uint8_t sampleTxt_iso_2022_jp[]={
769 0x41,
770 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/
771 0x1b, 0x28, 0x42, 0x42,
772
773 };
774 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x42 };
775 static const int32_t from_iso_2022_jpOffs [] ={ 0,9 };
776
777 /*iso-2022-cn*/
778 static const uint8_t sampleTxt_iso_2022_cn[]={
779 0x0f, 0x41, 0x44,
780 0x1B, 0x24, 0x29, 0x47,
781 0x0E, 0x40, 0x6f, /*unassigned*/
782 0x0f, 0x42,
783
784 };
785
786 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x42 };
787 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 11 };
788
789 /*iso-2022-kr*/
790 static const uint8_t sampleTxt_iso_2022_kr[]={
791 0x1b, 0x24, 0x29, 0x43,
792 0x41,
793 0x0E, 0x7f, 0x1E,
794 0x0e, 0x25, 0x50,
795 0x0f, 0x51,
796 0x42, 0x43,
797
798 };
799 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x03A0,0x51, 0x42,0x43};
800 static const int32_t from_iso_2022_krOffs [] ={ 4, 9, 12, 13 , 14 };
801
802 /*hz*/
803 static const uint8_t sampleTxt_hz[]={
804 0x41,
805 0x7e, 0x7b, 0x26, 0x30,
806 0x7f, 0x1E, /*unassigned*/
807 0x26, 0x30,
808 0x7e, 0x7d, 0x42,
809 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/
810 0x7e, 0x7d, 0x42,
811 };
812 static const UChar hztoUnicode[]={
813 0x41,
814 0x03a0,
815 0x03A0,
816 0x42,
817 0x42,};
818
819 static const int32_t from_hzOffs [] ={0,3,7,11,18, };
820
821 /*ISCII*/
822 static const uint8_t sampleTxt_iscii[]={
823 0x41,
824 0xa1,
825 0xEB, /*unassigned*/
826 0x26,
827 0x30,
828 0xa2,
829 0xEC, /*unassigned*/
830 0x42,
831 };
832 static const UChar isciitoUnicode[]={
833 0x41,
834 0x0901,
835 0x26,
836 0x30,
837 0x0902,
838 0x42,
839 };
840
841 static const int32_t from_isciiOffs [] ={0,1,3,4,5,7 };
842
843 /*LMBCS*/
844 static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50,
845 0x12, 0x92, 0xa0, /*unassigned*/
846 0x12, 0x92, 0xA1,
847 };
848 static const UChar LMBCSToUnicode[]={ 0x4e2e, 0xe5c4};
849 static const int32_t fromLMBCS[] = {0, 6};
850
851 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
852 EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
853 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
854 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
855
856 if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
857 EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
858 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
859 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
860
861 if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
862 euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
863 UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0))
864 log_err("euc-jp->u with skip did not match.\n");
865
866
867
868 if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
869 euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
870 UCNV_TO_U_CALLBACK_SKIP, from_euc_twOffs , NULL, 0))
871 log_err("euc-tw->u with skip did not match.\n");
872
873
874 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
875 iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp",
876 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_jpOffs , NULL, 0))
877 log_err("iso-2022-jp->u with skip did not match.\n");
878
879 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, UPRV_LENGTHOF(sampleTxt_iso_2022_cn),
880 iso_2022_cntoUnicode, UPRV_LENGTHOF(iso_2022_cntoUnicode),"iso-2022-cn",
881 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_cnOffs , NULL, 0))
882 log_err("iso-2022-cn->u with skip did not match.\n");
883
884 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, UPRV_LENGTHOF(sampleTxt_iso_2022_kr),
885 iso_2022_krtoUnicode, UPRV_LENGTHOF(iso_2022_krtoUnicode),"iso-2022-kr",
886 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_krOffs , NULL, 0))
887 log_err("iso-2022-kr->u with skip did not match.\n");
888
889 if(!testConvertToUnicode(sampleTxt_hz, UPRV_LENGTHOF(sampleTxt_hz),
890 hztoUnicode, UPRV_LENGTHOF(hztoUnicode),"HZ",
891 UCNV_TO_U_CALLBACK_SKIP, from_hzOffs , NULL, 0))
892 log_err("HZ->u with skip did not match.\n");
893
894 if(!testConvertToUnicode(sampleTxt_iscii, UPRV_LENGTHOF(sampleTxt_iscii),
895 isciitoUnicode, UPRV_LENGTHOF(isciitoUnicode),"ISCII,version=0",
896 UCNV_TO_U_CALLBACK_SKIP, from_isciiOffs , NULL, 0))
897 log_err("iscii->u with skip did not match.\n");
898
899 if(!testConvertToUnicode(sampleTxtLMBCS, UPRV_LENGTHOF(sampleTxtLMBCS),
900 LMBCSToUnicode, UPRV_LENGTHOF(LMBCSToUnicode),"LMBCS-1",
901 UCNV_TO_U_CALLBACK_SKIP, fromLMBCS , NULL, 0))
902 log_err("LMBCS->u with skip did not match.\n");
903
904 }
905 #endif
906
907 log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n");
908 {
909 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
910 0xe0, 0x80, 0x61,};
911 UChar expected1[] = { 0x0031, 0x4e8c, 0x0061};
912 int32_t offsets1[] = { 0x0000, 0x0001, 0x0006};
913
914 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
915 expected1, UPRV_LENGTHOF(expected1),"utf8",
916 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
917 log_err("utf8->u with skip did not match.\n");;
918 }
919
920 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n");
921 {
922 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
923 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffe,0xfffe};
924 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5};
925
926 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
927 expected1, UPRV_LENGTHOF(expected1),"SCSU",
928 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
929 log_err("scsu->u with skip did not match.\n");
930 }
931
932 log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
933 {
934 const uint8_t sampleText[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */
935 0xFB, 0xEE, 0x28, /* single-code point sequence at offset 0 */
936 0x24, 0x1E, 0x52, /* 3 */
937 0xB2, /* 6 */
938 0x20, /* 7 */
939 0x40, 0x07, /* 8 - wrong trail byte */
940 0xB3, /* 10 */
941 0xB1, /* 11 */
942 0xD0, 0x20, /* 12 - wrong trail byte */
943 0x0D, /* 14 */
944 0x0A, /* 15 */
945 0x20, /* 16 */
946 0x00, /* 17 */
947 0xD0, 0x6C, /* 18 */
948 0xB6, /* 20 */
949 0xD8, 0xA5, /* 21 */
950 0x20, /* 23 */
951 0x68, /* 24 */
952 0x59, /* 25 */
953 0xF9, 0x28, /* 26 */
954 0x6D, /* 28 */
955 0x20, /* 29 */
956 0x73, /* 30 */
957 0xE0, 0x2D, /* 31 */
958 0xDE, 0x43, /* 33 */
959 0xD0, 0x33, /* 35 */
960 0x20, /* 37 */
961 0xFA, 0x83, /* 38 */
962 0x25, 0x01, /* 40 */
963 0xFB, 0x16, 0x87, /* 42 */
964 0x4B, 0x16, /* 45 */
965 0x20, /* 47 */
966 0xE6, 0xBD, /* 48 */
967 0xEB, 0x5B, /* 50 */
968 0x4B, 0xCC, /* 52 */
969 0xF9, 0xA2, /* 54 */
970 0xFC, 0x10, 0x3E, /* 56 */
971 0xFE, 0x16, 0x3A, 0x8C, /* 59 */
972 0x20, /* 63 */
973 0xFC, 0x03, 0xAC, /* 64 */
974 0xFF, /* 67 - FF just resets the state without encoding anything */
975 0x01, /* 68 */
976 0xDE, 0x83, /* 69 */
977 0x20, /* 71 */
978 0x09 /* 72 */
979 };
980 UChar expected[]={
981 0xFEFF, 0x0061, 0x0062, 0x0020,
982 0x0063, 0x0061, 0x000D, 0x000A,
983 0x0020, 0x0000, 0x00DF, 0x00E6,
984 0x0930, 0x0020, 0x0918, 0x0909,
985 0x3086, 0x304D, 0x0020, 0x3053,
986 0x4000, 0x4E00, 0x7777, 0x0020,
987 0x9FA5, 0x4E00, 0xAC00, 0xBCDE,
988 0x0020, 0xD7A3, 0xDC00, 0xD800,
989 0xD800, 0xDC00, 0xD845, 0xDDDD,
990 0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
991 0xDFFF, 0x0001, 0x0E40, 0x0020,
992 0x0009
993 };
994 int32_t offsets[]={
995 0, 3, 6, 7, /* skip 8, */
996 10, 11, /* skip 12, */
997 14, 15, 16, 17, 18,
998 20, 21, 23, 24, 25, 26, 28, 29,
999 30, 31, 33, 35, 37, 38,
1000 40, 42, 45, 47, 48,
1001 50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59,
1002 63, 64, /* trail */ 64, /* reset only 67, */
1003 68, 69,
1004 71, 72
1005 };
1006
1007 if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1008 expected, UPRV_LENGTHOF(expected), "BOCU-1",
1009 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1010 ) {
1011 log_err("BOCU-1->u with skip did not match.\n");
1012 }
1013 }
1014
1015 log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
1016 {
1017 const uint8_t sampleText[]={
1018 0x61, /* 0 'a' */
1019 0xc0, 0x80, /* 1 non-shortest form */
1020 0xc4, 0xb5, /* 3 U+0135 */
1021 0xed, 0x80, 0xa0, /* 5 Hangul U+d020 */
1022 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8 surrogate pair for U+10401 */
1023 0xee, 0x80, 0x80, /* 14 PUA U+e000 */
1024 0xed, 0xb0, 0x81, /* 17 unpaired trail surrogate U+dc01 */
1025 0xf0, 0x90, 0x80, 0x80, /* 20 illegal 4-byte form for U+10000 */
1026 0x62, /* 24 'b' */
1027 0xed, 0xa0, 0x81, /* 25 unpaired lead surrogate U+d801 */
1028 0xed, 0xa0, /* 28 incomplete sequence */
1029 0xd0, 0x80 /* 30 U+0400 */
1030 };
1031 UChar expected[]={
1032 0x0061,
1033 /* skip */
1034 0x0135,
1035 0xd020,
1036 0xd801, 0xdc01,
1037 0xe000,
1038 0xdc01,
1039 /* skip */
1040 0x0062,
1041 0xd801,
1042 0x0400
1043 };
1044 int32_t offsets[]={
1045 0,
1046 /* skip 1, */
1047 3,
1048 5,
1049 8, 11,
1050 14,
1051 17,
1052 /* skip 20, 20, */
1053 24,
1054 25,
1055 /* skip 28 */
1056 30
1057 };
1058
1059 /* without offsets */
1060 if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1061 expected, UPRV_LENGTHOF(expected), "CESU-8",
1062 UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, 0)
1063 ) {
1064 log_err("CESU-8->u with skip did not match.\n");
1065 }
1066
1067 /* with offsets */
1068 if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1069 expected, UPRV_LENGTHOF(expected), "CESU-8",
1070 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1071 ) {
1072 log_err("CESU-8->u with skip did not match.\n");
1073 }
1074 }
1075 }
1076
1077 static void TestStop(int32_t inputsize, int32_t outputsize)
1078 {
1079 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1080 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1081
1082 static const uint8_t expstopIBM_949[]= {
1083 0x00, 0xb0, 0xa1, 0xb0, 0xa2};
1084
1085 static const uint8_t expstopIBM_943[] = {
1086 0x9f, 0xaf, 0x9f, 0xb1};
1087
1088 static const uint8_t expstopIBM_930[] = {
1089 0x0e, 0x5d, 0x5f, 0x5d, 0x63};
1090
1091 static const UChar IBM_949stoptoUnicode[]= {0x0000, 0xAC00, 0xAC01};
1092 static const UChar IBM_943stoptoUnicode[]= { 0x6D63, 0x6D64};
1093 static const UChar IBM_930stoptoUnicode[]= { 0x6D63, 0x6D64};
1094
1095
1096 static const int32_t toIBM949Offsstop [] = { 0, 1, 1, 2, 2};
1097 static const int32_t toIBM943Offsstop [] = { 0, 0, 1, 1};
1098 static const int32_t toIBM930Offsstop [] = { 0, 0, 0, 1, 1};
1099
1100 static const int32_t fromIBM949Offs [] = { 0, 1, 3};
1101 static const int32_t fromIBM943Offs [] = { 0, 2};
1102 static const int32_t fromIBM930Offs [] = { 1, 3};
1103
1104 gInBufferSize = inputsize;
1105 gOutBufferSize = outputsize;
1106
1107 /*From Unicode*/
1108
1109 #if !UCONFIG_NO_LEGACY_CONVERSION
1110 if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1111 expstopIBM_949, UPRV_LENGTHOF(expstopIBM_949), "ibm-949",
1112 UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 ))
1113 log_err("u-> ibm-949 with stop did not match.\n");
1114 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1115 expstopIBM_943, UPRV_LENGTHOF(expstopIBM_943), "ibm-943",
1116 UCNV_FROM_U_CALLBACK_STOP, toIBM943Offsstop , NULL, 0))
1117 log_err("u-> ibm-943 with stop did not match.\n");
1118 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1119 expstopIBM_930, UPRV_LENGTHOF(expstopIBM_930), "ibm-930",
1120 UCNV_FROM_U_CALLBACK_STOP, toIBM930Offsstop, NULL, 0 ))
1121 log_err("u-> ibm-930 with stop did not match.\n");
1122
1123 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP \n");
1124 {
1125 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1126 static const uint8_t toIBM943[]= { 0x61,};
1127 static const int32_t offset[]= {0,} ;
1128
1129 /*EUC_JP*/
1130 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1131 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,};
1132 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,};
1133
1134 /*EUC_TW*/
1135 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1136 static const uint8_t to_euc_tw[]={
1137 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,};
1138 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,};
1139
1140 /*ISO-2022-JP*/
1141 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9, 0x0042, };
1142 static const uint8_t to_iso_2022_jp[]={
1143 0x41,
1144
1145 };
1146 static const int32_t from_iso_2022_jpOffs [] ={0,};
1147
1148 /*ISO-2022-cn*/
1149 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1150 static const uint8_t to_iso_2022_cn[]={
1151 0x41,
1152
1153 };
1154 static const int32_t from_iso_2022_cnOffs [] ={
1155 0,0,
1156 2,2,
1157 };
1158
1159 /*ISO-2022-kr*/
1160 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
1161 static const uint8_t to_iso_2022_kr[]={
1162 0x1b, 0x24, 0x29, 0x43,
1163 0x41,
1164 0x0e, 0x25, 0x50,
1165 };
1166 static const int32_t from_iso_2022_krOffs [] ={
1167 -1,-1,-1,-1,
1168 0,
1169 1,1,1,
1170 };
1171
1172 /* HZ encoding */
1173 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1174
1175 static const uint8_t to_hz[]={
1176 0x7e, 0x7d, 0x41,
1177 0x7e, 0x7b, 0x26, 0x30,
1178
1179 };
1180 static const int32_t from_hzOffs [] ={
1181 0, 0,0,
1182 1,1,1,1,
1183 };
1184
1185 /*ISCII*/
1186 static const UChar iscii_inputText[]={ 0x0041, 0x3712, 0x0042, };
1187 static const uint8_t to_iscii[]={
1188 0x41,
1189 };
1190 static const int32_t from_isciiOffs [] ={
1191 0,
1192 };
1193
1194 if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
1195 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
1196 UCNV_FROM_U_CALLBACK_STOP, offset, NULL, 0 ))
1197 log_err("u-> ibm-943 with stop did not match.\n");
1198
1199 if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
1200 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
1201 UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 ))
1202 log_err("u-> euc-jp with stop did not match.\n");
1203
1204 if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
1205 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
1206 UCNV_FROM_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1207 log_err("u-> euc-tw with stop did not match.\n");
1208
1209 if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText),
1210 to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp",
1211 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1212 log_err("u-> iso-2022-jp with stop did not match.\n");
1213
1214 if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText),
1215 to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp",
1216 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1217 log_err("u-> iso-2022-jp with stop did not match.\n");
1218
1219 if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText),
1220 to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn",
1221 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_cnOffs, NULL, 0 ))
1222 log_err("u-> iso-2022-cn with stop did not match.\n");
1223
1224 if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText),
1225 to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr",
1226 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_krOffs, NULL, 0 ))
1227 log_err("u-> iso-2022-kr with stop did not match.\n");
1228
1229 if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText),
1230 to_hz, UPRV_LENGTHOF(to_hz), "HZ",
1231 UCNV_FROM_U_CALLBACK_STOP, from_hzOffs, NULL, 0 ))
1232 log_err("u-> HZ with stop did not match.\n");\
1233
1234 if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText),
1235 to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0",
1236 UCNV_FROM_U_CALLBACK_STOP, from_isciiOffs, NULL, 0 ))
1237 log_err("u-> iscii with stop did not match.\n");
1238
1239
1240 }
1241 #endif
1242
1243 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n");
1244 {
1245 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1246
1247 static const uint8_t to_SCSU[]={
1248 0x41,
1249
1250 };
1251 int32_t from_SCSUOffs [] ={
1252 0,
1253
1254 };
1255 if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
1256 to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU",
1257 UCNV_FROM_U_CALLBACK_STOP, from_SCSUOffs, NULL, 0 ))
1258 log_err("u-> SCSU with skip did not match.\n");
1259
1260 }
1261
1262 /*to Unicode*/
1263
1264 #if !UCONFIG_NO_LEGACY_CONVERSION
1265 if(!testConvertToUnicode(expstopIBM_949, UPRV_LENGTHOF(expstopIBM_949),
1266 IBM_949stoptoUnicode, UPRV_LENGTHOF(IBM_949stoptoUnicode),"ibm-949",
1267 UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 ))
1268 log_err("ibm-949->u with stop did not match.\n");
1269 if(!testConvertToUnicode(expstopIBM_943, UPRV_LENGTHOF(expstopIBM_943),
1270 IBM_943stoptoUnicode, UPRV_LENGTHOF(IBM_943stoptoUnicode),"ibm-943",
1271 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offs, NULL, 0 ))
1272 log_err("ibm-943->u with stop did not match.\n");
1273 if(!testConvertToUnicode(expstopIBM_930, UPRV_LENGTHOF(expstopIBM_930),
1274 IBM_930stoptoUnicode, UPRV_LENGTHOF(IBM_930stoptoUnicode),"ibm-930",
1275 UCNV_TO_U_CALLBACK_STOP, fromIBM930Offs, NULL, 0 ))
1276 log_err("ibm-930->u with stop did not match.\n");
1277
1278 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n");
1279 {
1280
1281 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1282 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1283 };
1284 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63 };
1285 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1};
1286
1287
1288 /*EUC-JP*/
1289 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1290 0x8f, 0xda, 0xa1, /*unassigned*/
1291 0x8e, 0xe0,
1292 };
1293 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec};
1294 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3};
1295
1296 /*EUC_TW*/
1297 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1298 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1299 0xe6, 0xca, 0x8a,
1300 };
1301 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2};
1302 int32_t from_euc_twOffs [] ={ 0, 1, 3};
1303
1304
1305
1306 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
1307 EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
1308 UCNV_TO_U_CALLBACK_STOP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1309 log_err("EBCIDIC_STATEFUL->u with stop did not match.\n");
1310
1311 if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
1312 euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
1313 UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0))
1314 log_err("euc-jp->u with stop did not match.\n");
1315
1316 if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
1317 euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
1318 UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1319 log_err("euc-tw->u with stop did not match.\n");
1320 }
1321 #endif
1322
1323 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n");
1324 {
1325 static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1326 0xe0, 0x80, 0x61,};
1327 static const UChar expected1[] = { 0x0031, 0x4e8c,};
1328 static const int32_t offsets1[] = { 0x0000, 0x0001};
1329
1330 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1331 expected1, UPRV_LENGTHOF(expected1),"utf8",
1332 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1333 log_err("utf8->u with stop did not match.\n");;
1334 }
1335 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n");
1336 {
1337 static const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04};
1338 static const UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061};
1339 static const int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003};
1340
1341 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1342 expected1, UPRV_LENGTHOF(expected1),"SCSU",
1343 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1344 log_err("scsu->u with stop did not match.\n");;
1345 }
1346
1347 }
1348
1349 static void TestSub(int32_t inputsize, int32_t outputsize)
1350 {
1351 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1352 static const UChar sampleText2[]= { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1353
1354 static const uint8_t expsubIBM_949[] =
1355 { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 };
1356
1357 static const uint8_t expsubIBM_943[] = {
1358 0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 };
1359
1360 static const uint8_t expsubIBM_930[] = {
1361 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f };
1362
1363 static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 };
1364 static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1365 static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1366
1367 static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1368 static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 };
1369 static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 };
1370
1371 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5, 7 };
1372 static const int32_t fromIBM943Offs [] = { 0, 2, 4, 6 };
1373 static const int32_t fromIBM930Offs [] = { 1, 3, 5, 7 };
1374
1375 gInBufferSize = inputsize;
1376 gOutBufferSize = outputsize;
1377
1378 /*from unicode*/
1379
1380 #if !UCONFIG_NO_LEGACY_CONVERSION
1381 if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1382 expsubIBM_949, UPRV_LENGTHOF(expsubIBM_949), "ibm-949",
1383 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 ))
1384 log_err("u-> ibm-949 with subst did not match.\n");
1385 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1386 expsubIBM_943, UPRV_LENGTHOF(expsubIBM_943), "ibm-943",
1387 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM943Offssub , NULL, 0))
1388 log_err("u-> ibm-943 with subst did not match.\n");
1389 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1390 expsubIBM_930, UPRV_LENGTHOF(expsubIBM_930), "ibm-930",
1391 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM930Offssub, NULL, 0 ))
1392 log_err("u-> ibm-930 with subst did not match.\n");
1393
1394 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1395 {
1396 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1397 static const uint8_t toIBM943[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 };
1398 static const int32_t offset[]= {0, 1, 1, 3, 3, 4};
1399
1400
1401 /* EUC_JP*/
1402 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1403 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1404 0xf4, 0xfe, 0xf4, 0xfe,
1405 0x61, 0x8e, 0xe0,
1406 };
1407 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7};
1408
1409 /*EUC_TW*/
1410 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1411 static const uint8_t to_euc_tw[]={
1412 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1413 0xfd, 0xfe, 0xfd, 0xfe,
1414 0x61, 0xe6, 0xca, 0x8a,
1415 };
1416
1417 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,};
1418
1419 if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
1420 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
1421 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset, NULL, 0 ))
1422 log_err("u-> ibm-943 with substitute did not match.\n");
1423
1424 if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
1425 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
1426 UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 ))
1427 log_err("u-> euc-jp with substitute did not match.\n");
1428
1429 if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
1430 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
1431 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1432 log_err("u-> euc-tw with substitute did not match.\n");
1433 }
1434 #endif
1435
1436 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1437 {
1438 UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1439
1440 const uint8_t to_SCSU[]={
1441 0x41,
1442 0x0e, 0xff,0xfd,
1443 0x42
1444
1445
1446 };
1447 int32_t from_SCSUOffs [] ={
1448 0,
1449 1,1,1,
1450 2,
1451
1452 };
1453 const uint8_t to_SCSU_1[]={
1454 0x41,
1455
1456 };
1457 int32_t from_SCSUOffs_1 [] ={
1458 0,
1459
1460 };
1461 if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
1462 to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU",
1463 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs, NULL, 0 ))
1464 log_err("u-> SCSU with substitute did not match.\n");
1465
1466 if(!testConvertFromUnicodeWithContext(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
1467 to_SCSU_1, UPRV_LENGTHOF(to_SCSU_1), "SCSU",
1468 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs_1, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
1469 log_err("u-> SCSU with substitute did not match.\n");
1470 }
1471
1472 log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1473 {
1474 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,};
1475 static const uint8_t expectedUTF8[]= { 0xe2, 0x82, 0xac,
1476 0xf0, 0x90, 0x90, 0x81,
1477 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd,
1478 0xef, 0xbf, 0xbf, 0x61,
1479
1480 };
1481 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 };
1482 if(!testConvertFromUnicode(testinput, UPRV_LENGTHOF(testinput),
1483 expectedUTF8, UPRV_LENGTHOF(expectedUTF8), "utf8",
1484 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0 )) {
1485 log_err("u-> utf8 with substitute did not match.\n");
1486 }
1487 }
1488
1489 log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1490 {
1491 static const UChar in[]={ 0x0041, 0xfeff };
1492
1493 static const uint8_t out[]={
1494 #if U_IS_BIG_ENDIAN
1495 0xfe, 0xff,
1496 0x00, 0x41,
1497 0xfe, 0xff
1498 #else
1499 0xff, 0xfe,
1500 0x41, 0x00,
1501 0xff, 0xfe
1502 #endif
1503 };
1504 static const int32_t offsets[]={
1505 -1, -1, 0, 0, 1, 1
1506 };
1507
1508 if(!testConvertFromUnicode(in, UPRV_LENGTHOF(in),
1509 out, UPRV_LENGTHOF(out), "UTF-16",
1510 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1511 ) {
1512 log_err("u->UTF-16 with substitute did not match.\n");
1513 }
1514 }
1515
1516 log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1517 {
1518 static const UChar in[]={ 0x0041, 0xfeff };
1519
1520 static const uint8_t out[]={
1521 #if U_IS_BIG_ENDIAN
1522 0x00, 0x00, 0xfe, 0xff,
1523 0x00, 0x00, 0x00, 0x41,
1524 0x00, 0x00, 0xfe, 0xff
1525 #else
1526 0xff, 0xfe, 0x00, 0x00,
1527 0x41, 0x00, 0x00, 0x00,
1528 0xff, 0xfe, 0x00, 0x00
1529 #endif
1530 };
1531 static const int32_t offsets[]={
1532 -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1
1533 };
1534
1535 if(!testConvertFromUnicode(in, UPRV_LENGTHOF(in),
1536 out, UPRV_LENGTHOF(out), "UTF-32",
1537 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1538 ) {
1539 log_err("u->UTF-32 with substitute did not match.\n");
1540 }
1541 }
1542
1543 /*to unicode*/
1544
1545 #if !UCONFIG_NO_LEGACY_CONVERSION
1546 if(!testConvertToUnicode(expsubIBM_949, UPRV_LENGTHOF(expsubIBM_949),
1547 IBM_949subtoUnicode, UPRV_LENGTHOF(IBM_949subtoUnicode),"ibm-949",
1548 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 ))
1549 log_err("ibm-949->u with substitute did not match.\n");
1550 if(!testConvertToUnicode(expsubIBM_943, UPRV_LENGTHOF(expsubIBM_943),
1551 IBM_943subtoUnicode, UPRV_LENGTHOF(IBM_943subtoUnicode),"ibm-943",
1552 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offs, NULL, 0 ))
1553 log_err("ibm-943->u with substitute did not match.\n");
1554 if(!testConvertToUnicode(expsubIBM_930, UPRV_LENGTHOF(expsubIBM_930),
1555 IBM_930subtoUnicode, UPRV_LENGTHOF(IBM_930subtoUnicode),"ibm-930",
1556 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM930Offs, NULL, 0 ))
1557 log_err("ibm-930->u with substitute did not match.\n");
1558
1559 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1560 {
1561
1562 const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1563 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1564 };
1565 UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0xfffd, 0x03b4
1566 };
1567 int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 3, 5};
1568
1569
1570 /* EUC_JP*/
1571 const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1572 0x8f, 0xda, 0xa1, /*unassigned*/
1573 0x8e, 0xe0, 0x8a
1574 };
1575 UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a };
1576 int32_t from_euc_jpOffs [] ={ 0, 1, 3, 6, 9, 11 };
1577
1578 /*EUC_TW*/
1579 const uint8_t sampleTxt_euc_tw[]={
1580 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1581 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1582 0xe6, 0xca, 0x8a,
1583 };
1584 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, };
1585 int32_t from_euc_twOffs [] ={ 0, 1, 3, 7, 11, 13};
1586
1587
1588 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
1589 EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
1590 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1591 log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n");
1592
1593
1594 if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
1595 euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
1596 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ))
1597 log_err("euc-jp->u with substitute did not match.\n");
1598
1599
1600 if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
1601 euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
1602 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1603 log_err("euc-tw->u with substitute did not match.\n");
1604
1605
1606 if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
1607 euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
1608 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGAL_CHAR_FOUND))
1609 log_err("euc-jp->u with substitute did not match.\n");
1610 }
1611 #endif
1612
1613 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1614 {
1615 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1616 0xe0, 0x80, 0x61,};
1617 UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0xfffd, 0x0061};
1618 int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0005, 0x0006};
1619
1620 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1621 expected1, UPRV_LENGTHOF(expected1),"utf8",
1622 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1623 log_err("utf8->u with substitute did not match.\n");;
1624 }
1625 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1626 {
1627 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
1628 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffd,0xfffd};
1629 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5};
1630
1631 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1632 expected1, UPRV_LENGTHOF(expected1),"SCSU",
1633 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1634 log_err("scsu->u with stop did not match.\n");;
1635 }
1636
1637 #if !UCONFIG_NO_LEGACY_CONVERSION
1638 log_verbose("Testing ibm-930 subchar/subchar1\n");
1639 {
1640 static const UChar u1[]={ 0x6d63, 0x6d64, 0x6d65, 0x6d66, 0xdf };
1641 static const uint8_t s1[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f };
1642 static const int32_t offsets1[]={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1643
1644 static const UChar u2[]={ 0x6d63, 0x6d64, 0xfffd, 0x6d66, 0x1a };
1645 static const uint8_t s2[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 };
1646 static const int32_t offsets2[]={ 1, 3, 5, 7, 10 };
1647
1648 if(!testConvertFromUnicode(u1, UPRV_LENGTHOF(u1), s1, UPRV_LENGTHOF(s1), "ibm-930",
1649 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1650 ) {
1651 log_err("u->ibm-930 subchar/subchar1 did not match.\n");
1652 }
1653
1654 if(!testConvertToUnicode(s2, UPRV_LENGTHOF(s2), u2, UPRV_LENGTHOF(u2), "ibm-930",
1655 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1656 ) {
1657 log_err("ibm-930->u subchar/subchar1 did not match.\n");
1658 }
1659 }
1660
1661 log_verbose("Testing GB 18030 with substitute callbacks\n");
1662 {
1663 static const UChar u2[]={
1664 0x24, 0x7f, 0x80, 0x1f9, 0x20ac, 0x4e00, 0x9fa6, 0xffff, 0xd800, 0xdc00, 0xfffd, 0xdbff, 0xdfff };
1665 static const uint8_t gb2[]={
1666 0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 };
1667 static const int32_t offsets2[]={
1668 0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 };
1669
1670 if(!testConvertToUnicode(gb2, UPRV_LENGTHOF(gb2), u2, UPRV_LENGTHOF(u2), "gb18030",
1671 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1672 ) {
1673 log_err("gb18030->u with substitute did not match.\n");
1674 }
1675 }
1676 #endif
1677
1678 log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n");
1679 {
1680 static const uint8_t utf7[]={
1681 /* a~ a+AB~ a+AB\x0c a+AB- a+AB. a+. */
1682 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x2e
1683 };
1684 static const UChar unicode[]={
1685 0x61, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x2e, 0x61, 0xfffd, 0x2e
1686 };
1687 static const int32_t offsets[]={
1688 0, 1, 2, 4, 6, 7, 9, 11, 12, 14, 17, 19, 21, 22, 23, 24
1689 };
1690
1691 if(!testConvertToUnicode(utf7, UPRV_LENGTHOF(utf7), unicode, UPRV_LENGTHOF(unicode), "UTF-7",
1692 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1693 ) {
1694 log_err("UTF-7->u with substitute did not match.\n");
1695 }
1696 }
1697
1698 log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n");
1699 {
1700 static const uint8_t
1701 in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff },
1702 in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff },
1703 in3[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff };
1704
1705 static const UChar
1706 out1[]={ 0x4e00, 0xfeff },
1707 out2[]={ 0x004e, 0xfffe },
1708 out3[]={ 0xfefd, 0x4e00, 0xfeff };
1709
1710 static const int32_t
1711 offsets1[]={ 2, 4 },
1712 offsets2[]={ 2, 4 },
1713 offsets3[]={ 0, 2, 4 };
1714
1715 if(!testConvertToUnicode(in1, UPRV_LENGTHOF(in1), out1, UPRV_LENGTHOF(out1), "UTF-16",
1716 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1717 ) {
1718 log_err("UTF-16 (BE BOM)->u with substitute did not match.\n");
1719 }
1720
1721 if(!testConvertToUnicode(in2, UPRV_LENGTHOF(in2), out2, UPRV_LENGTHOF(out2), "UTF-16",
1722 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1723 ) {
1724 log_err("UTF-16 (LE BOM)->u with substitute did not match.\n");
1725 }
1726
1727 if(!testConvertToUnicode(in3, UPRV_LENGTHOF(in3), out3, UPRV_LENGTHOF(out3), "UTF-16",
1728 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1729 ) {
1730 log_err("UTF-16 (no BOM)->u with substitute did not match.\n");
1731 }
1732 }
1733
1734 log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n");
1735 {
1736 static const uint8_t
1737 in1[]={ 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff },
1738 in2[]={ 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 },
1739 in3[]={ 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 },
1740 in4[]={ 0x00, 0x01, 0x02, 0x03, 0x00, 0x11, 0x12, 0x00, 0x00, 0x00, 0x4e, 0x00 };
1741
1742 static const UChar
1743 out1[]={ U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfeff },
1744 out2[]={ U16_LEAD(0x0f1000), U16_TRAIL(0x0f1000), 0xfffe },
1745 out3[]={ 0xfefe, U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfffd, 0xfffd },
1746 out4[]={ U16_LEAD(0x10203), U16_TRAIL(0x10203), 0xfffd, 0x4e00 };
1747
1748 static const int32_t
1749 offsets1[]={ 4, 4, 8 },
1750 offsets2[]={ 4, 4, 8 },
1751 offsets3[]={ 0, 4, 4, 8, 12 },
1752 offsets4[]={ 0, 0, 4, 8 };
1753
1754 if(!testConvertToUnicode(in1, UPRV_LENGTHOF(in1), out1, UPRV_LENGTHOF(out1), "UTF-32",
1755 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1756 ) {
1757 log_err("UTF-32 (BE BOM)->u with substitute did not match.\n");
1758 }
1759
1760 if(!testConvertToUnicode(in2, UPRV_LENGTHOF(in2), out2, UPRV_LENGTHOF(out2), "UTF-32",
1761 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1762 ) {
1763 log_err("UTF-32 (LE BOM)->u with substitute did not match.\n");
1764 }
1765
1766 if(!testConvertToUnicode(in3, UPRV_LENGTHOF(in3), out3, UPRV_LENGTHOF(out3), "UTF-32",
1767 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1768 ) {
1769 log_err("UTF-32 (no BOM)->u with substitute did not match.\n");
1770 }
1771
1772 if(!testConvertToUnicode(in4, UPRV_LENGTHOF(in4), out4, UPRV_LENGTHOF(out4), "UTF-32",
1773 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets4, NULL, 0)
1774 ) {
1775 log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n");
1776 }
1777 }
1778 }
1779
1780 static void TestSubWithValue(int32_t inputsize, int32_t outputsize)
1781 {
1782 UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1783 UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1784
1785 const uint8_t expsubwvalIBM_949[]= {
1786 0x00, 0xb0, 0xa1, 0xb0, 0xa2,
1787 0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 };
1788
1789 const uint8_t expsubwvalIBM_943[]= {
1790 0x9f, 0xaf, 0x9f, 0xb1,
1791 0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 };
1792
1793 const uint8_t expsubwvalIBM_930[] = {
1794 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f };
1795
1796 int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 };
1797 int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 };
1798 int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */
1799
1800 gInBufferSize = inputsize;
1801 gOutBufferSize = outputsize;
1802
1803 /*from Unicode*/
1804
1805 #if !UCONFIG_NO_LEGACY_CONVERSION
1806 if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1807 expsubwvalIBM_949, UPRV_LENGTHOF(expsubwvalIBM_949), "ibm-949",
1808 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 ))
1809 log_err("u-> ibm-949 with subst with value did not match.\n");
1810
1811 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1812 expsubwvalIBM_943, UPRV_LENGTHOF(expsubwvalIBM_943), "ibm-943",
1813 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM943Offs, NULL, 0 ))
1814 log_err("u-> ibm-943 with sub with value did not match.\n");
1815
1816 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1817 expsubwvalIBM_930, UPRV_LENGTHOF(expsubwvalIBM_930), "ibm-930",
1818 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM930Offs, NULL, 0 ))
1819 log_err("u-> ibm-930 with subst with value did not match.\n");
1820
1821
1822 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n");
1823 {
1824 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1825 static const uint8_t toIBM943[]= { 0x61,
1826 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1827 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1828 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1829 0x61 };
1830 static const int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
1831
1832
1833 /* EUC_JP*/
1834 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, };
1835 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1836 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1837 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1838 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1839 0x61, 0x8e, 0xe0,
1840 };
1841 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,
1842 3, 3, 3, 3, 3, 3,
1843 3, 3, 3, 3, 3, 3,
1844 5, 5, 5, 5, 5, 5,
1845 6, 7, 7,
1846 };
1847
1848 /*EUC_TW*/
1849 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1850 static const uint8_t to_euc_tw[]={
1851 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1852 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1853 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1854 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1855 0x61, 0xe6, 0xca, 0x8a,
1856 };
1857 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,
1858 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5,
1859 6, 7, 7, 8,
1860 };
1861 /*ISO-2022-JP*/
1862 static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ;
1863 static const uint8_t to_iso_2022_jp1[]={
1864 0x1b, 0x24, 0x42, 0x21, 0x21,
1865 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
1866 0x1b, 0x24, 0x42, 0x21, 0x22,
1867 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
1868 0x42,
1869 };
1870
1871 static const int32_t from_iso_2022_jpOffs1 [] ={
1872 0,0,0,0,0,
1873 1,1,1,1,1,1,1,1,1,
1874 2,2,2,2,2,
1875 3,3,3,3,3,3,3,3,3,
1876 4,
1877 };
1878 /* surrogate pair*/
1879 static const UChar iso_2022_jp_inputText2[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ;
1880 static const uint8_t to_iso_2022_jp2[]={
1881 0x1b, 0x24, 0x42, 0x21, 0x21,
1882 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1883 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1884 0x1b, 0x24, 0x42, 0x21, 0x22,
1885 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1886 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1887 0x42,
1888 };
1889 static const int32_t from_iso_2022_jpOffs2 [] ={
1890 0,0,0,0,0,
1891 1,1,1,1,1,1,1,1,1,
1892 1,1,1,1,1,1,
1893 3,3,3,3,3,
1894 4,4,4,4,4,4,4,4,4,
1895 4,4,4,4,4,4,
1896 6,
1897 };
1898
1899 /*ISO-2022-cn*/
1900 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1901 static const uint8_t to_iso_2022_cn[]={
1902 0x41,
1903 0x25, 0x55, 0x33, 0x37, 0x31, 0x32,
1904 0x42,
1905 };
1906 static const int32_t from_iso_2022_cnOffs [] ={
1907 0,
1908 1,1,1,1,1,1,
1909 2,
1910 };
1911
1912 static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042};
1913
1914 static const uint8_t to_iso_2022_cn4[]={
1915 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
1916 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1917 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1918 0x0e, 0x21, 0x22,
1919 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1920 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1921 0x42,
1922 };
1923 static const int32_t from_iso_2022_cnOffs4 [] ={
1924 0,0,0,0,0,0,0,
1925 1,1,1,1,1,1,1,
1926 1,1,1,1,1,1,
1927 3,3,3,
1928 4,4,4,4,4,4,4,
1929 4,4,4,4,4,4,
1930 6
1931
1932 };
1933
1934 /*ISO-2022-kr*/
1935 static const UChar iso_2022_kr_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
1936 static const uint8_t to_iso_2022_kr2[]={
1937 0x1b, 0x24, 0x29, 0x43,
1938 0x41,
1939 0x0e, 0x25, 0x50,
1940 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1941 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1942 0x0e, 0x25, 0x50,
1943 0x0f, 0x42,
1944 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1945 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1946 0x43
1947 };
1948 static const int32_t from_iso_2022_krOffs2 [] ={
1949 -1,-1,-1,-1,
1950 0,
1951 1,1,1,
1952 2,2,2,2,2,2,2,
1953 2,2,2,2,2,2,
1954 4,4,4,
1955 5,5,
1956 6,6,6,6,6,6,
1957 6,6,6,6,6,6,
1958 8,
1959 };
1960
1961 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 };
1962 static const uint8_t to_iso_2022_kr[]={
1963 0x1b, 0x24, 0x29, 0x43,
1964 0x41,
1965 0x0e, 0x25, 0x50,
1966 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1967 0x0e, 0x25, 0x50,
1968 0x0f, 0x42,
1969 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1970 0x43
1971 };
1972
1973
1974 static const int32_t from_iso_2022_krOffs [] ={
1975 -1,-1,-1,-1,
1976 0,
1977 1,1,1,
1978 2,2,2,2,2,2,2,
1979 3,3,3,
1980 4,4,
1981 5,5,5,5,5,5,
1982 6,
1983 };
1984 /* HZ encoding */
1985 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1986
1987 static const uint8_t to_hz[]={
1988 0x7e, 0x7d, 0x41,
1989 0x7e, 0x7b, 0x26, 0x30,
1990 0x7e, 0x7d, 0x25, 0x55, 0x30, 0x36, 0x36, 0x32, /*unassigned*/
1991 0x7e, 0x7b, 0x26, 0x30,
1992 0x7e, 0x7d, 0x42,
1993
1994 };
1995 static const int32_t from_hzOffs [] ={
1996 0,0,0,
1997 1,1,1,1,
1998 2,2,2,2,2,2,2,2,
1999 3,3,3,3,
2000 4,4,4
2001 };
2002
2003 static const UChar hz_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
2004 static const uint8_t to_hz2[]={
2005 0x7e, 0x7d, 0x41,
2006 0x7e, 0x7b, 0x26, 0x30,
2007 0x7e, 0x7d, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
2008 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2009 0x7e, 0x7b, 0x26, 0x30,
2010 0x7e, 0x7d, 0x42,
2011 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
2012 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2013 0x43
2014 };
2015 static const int32_t from_hzOffs2 [] ={
2016 0,0,0,
2017 1,1,1,1,
2018 2,2,2,2,2,2,2,2,
2019 2,2,2,2,2,2,
2020 4,4,4,4,
2021 5,5,5,
2022 6,6,6,6,6,6,
2023 6,6,6,6,6,6,
2024 8,
2025 };
2026
2027 /*ISCII*/
2028 static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 };
2029 static const uint8_t to_iscii[]={
2030 0x41,
2031 0xef, 0x42, 0xa1,
2032 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
2033 0xa2,
2034 0x42,
2035 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
2036 0x43
2037 };
2038
2039
2040 static const int32_t from_isciiOffs [] ={
2041 0,
2042 1,1,1,
2043 2,2,2,2,2,2,
2044 3,
2045 4,
2046 5,5,5,5,5,5,
2047 6,
2048 };
2049
2050 if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
2051 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
2052 UCNV_FROM_U_CALLBACK_ESCAPE, offset, NULL, 0 ))
2053 log_err("u-> ibm-943 with subst with value did not match.\n");
2054
2055 if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
2056 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
2057 UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 ))
2058 log_err("u-> euc-jp with subst with value did not match.\n");
2059
2060 if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
2061 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
2062 UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 ))
2063 log_err("u-> euc-tw with subst with value did not match.\n");
2064
2065 if(!testConvertFromUnicode(iso_2022_jp_inputText1, UPRV_LENGTHOF(iso_2022_jp_inputText1),
2066 to_iso_2022_jp1, UPRV_LENGTHOF(to_iso_2022_jp1), "iso-2022-jp",
2067 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2068 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2069
2070 if(!testConvertFromUnicode(iso_2022_jp_inputText1, UPRV_LENGTHOF(iso_2022_jp_inputText1),
2071 to_iso_2022_jp1, UPRV_LENGTHOF(to_iso_2022_jp1), "iso-2022-jp",
2072 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2073 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2074
2075 if(!testConvertFromUnicode(iso_2022_jp_inputText2, UPRV_LENGTHOF(iso_2022_jp_inputText2),
2076 to_iso_2022_jp2, UPRV_LENGTHOF(to_iso_2022_jp2), "iso-2022-jp",
2077 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs2, NULL, 0 ))
2078 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2079 /*ESCAPE OPTIONS*/
2080 {
2081 /* surrogate pair*/
2082 static const UChar iso_2022_jp_inputText3[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ;
2083 static const uint8_t to_iso_2022_jp3_v2[]={
2084 0x1b, 0x24, 0x42, 0x21, 0x21,
2085 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b,
2086
2087 0x1b, 0x24, 0x42, 0x21, 0x22,
2088 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b,
2089
2090 0x42,
2091 0x26, 0x23, 0x33, 0x36, 0x38, 0x39, 0x32, 0x3b,
2092 };
2093
2094 static const int32_t from_iso_2022_jpOffs3_v2 [] ={
2095 0,0,0,0,0,
2096 1,1,1,1,1,1,1,1,1,1,1,1,
2097
2098 3,3,3,3,3,
2099 4,4,4,4,4,4,4,4,4,4,4,4,
2100
2101 6,
2102 7,7,7,7,7,7,7,7,7
2103 };
2104
2105 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3, UPRV_LENGTHOF(iso_2022_jp_inputText3),
2106 to_iso_2022_jp3_v2, UPRV_LENGTHOF(to_iso_2022_jp3_v2), "iso-2022-jp",
2107 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2108 log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n");
2109 }
2110 {
2111 static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2112 static const uint8_t to_iso_2022_cn5_v2[]={
2113 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2114 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44,
2115 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36,
2116 0x0e, 0x21, 0x22,
2117 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44,
2118 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36,
2119 0x42,
2120 0x5c, 0x75, 0x30, 0x39, 0x30, 0x32,
2121 };
2122 static const int32_t from_iso_2022_cnOffs5_v2 [] ={
2123 0,0,0,0,0,0,0,
2124 1,1,1,1,1,1,1,
2125 1,1,1,1,1,1,
2126 3,3,3,
2127 4,4,4,4,4,4,4,
2128 4,4,4,4,4,4,
2129 6,
2130 7,7,7,7,7,7
2131 };
2132 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, UPRV_LENGTHOF(iso_2022_cn_inputText5),
2133 to_iso_2022_cn5_v2, UPRV_LENGTHOF(to_iso_2022_cn5_v2), "iso-2022-cn",
2134 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs5_v2, NULL, 0,UCNV_ESCAPE_JAVA,U_ZERO_ERROR ))
2135 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n");
2136
2137 }
2138 {
2139 static const UChar iso_2022_cn_inputText6[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2140 static const uint8_t to_iso_2022_cn6_v2[]={
2141 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2142 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d,
2143 0x0e, 0x21, 0x22,
2144 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d,
2145 0x42,
2146 0x7b, 0x55, 0x2b, 0x30, 0x39, 0x30, 0x32, 0x7d
2147 };
2148 static const int32_t from_iso_2022_cnOffs6_v2 [] ={
2149 0, 0, 0, 0, 0, 0, 0,
2150 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2151 3, 3, 3,
2152 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2153 6,
2154 7, 7, 7, 7, 7, 7, 7, 7,
2155 };
2156 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, UPRV_LENGTHOF(iso_2022_cn_inputText6),
2157 to_iso_2022_cn6_v2, UPRV_LENGTHOF(to_iso_2022_cn6_v2), "iso-2022-cn",
2158 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs6_v2, NULL, 0,UCNV_ESCAPE_UNICODE,U_ZERO_ERROR ))
2159 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n");
2160
2161 }
2162 {
2163 static const UChar iso_2022_cn_inputText7[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2164 static const uint8_t to_iso_2022_cn7_v2[]={
2165 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2166 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2167 0x0e, 0x21, 0x22,
2168 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2169 0x42, 0x25, 0x55, 0x30, 0x39, 0x30, 0x32,
2170 };
2171 static const int32_t from_iso_2022_cnOffs7_v2 [] ={
2172 0, 0, 0, 0, 0, 0, 0,
2173 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2174 3, 3, 3,
2175 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2176 6,
2177 7, 7, 7, 7, 7, 7,
2178 };
2179 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, UPRV_LENGTHOF(iso_2022_cn_inputText7),
2180 to_iso_2022_cn7_v2, UPRV_LENGTHOF(to_iso_2022_cn7_v2), "iso-2022-cn",
2181 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs7_v2, NULL, 0,"K" ,U_ZERO_ERROR ))
2182 log_err("u-> iso-2022-cn with sub & K did not match.\n");
2183
2184 }
2185 {
2186 static const UChar iso_2022_cn_inputText8[]={
2187 0x3000,
2188 0xD84D, 0xDC56,
2189 0x3001,
2190 0xD84D, 0xDC56,
2191 0xDBFF, 0xDFFF,
2192 0x0042,
2193 0x0902};
2194 static const uint8_t to_iso_2022_cn8_v2[]={
2195 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2196 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20,
2197 0x0e, 0x21, 0x22,
2198 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20,
2199 0x5c, 0x31, 0x30, 0x46, 0x46, 0x46, 0x46, 0x20,
2200 0x42,
2201 0x5c, 0x39, 0x30, 0x32, 0x20
2202 };
2203 static const int32_t from_iso_2022_cnOffs8_v2 [] ={
2204 0, 0, 0, 0, 0, 0, 0,
2205 1, 1, 1, 1, 1, 1, 1, 1,
2206 3, 3, 3,
2207 4, 4, 4, 4, 4, 4, 4, 4,
2208 6, 6, 6, 6, 6, 6, 6, 6,
2209 8,
2210 9, 9, 9, 9, 9
2211 };
2212 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8, UPRV_LENGTHOF(iso_2022_cn_inputText8),
2213 to_iso_2022_cn8_v2, UPRV_LENGTHOF(to_iso_2022_cn8_v2), "iso-2022-cn",
2214 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs8_v2, NULL, 0,UCNV_ESCAPE_CSS2,U_ZERO_ERROR ))
2215 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not match.\n");
2216
2217 }
2218 {
2219 static const uint8_t to_iso_2022_cn4_v3[]={
2220 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2221 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36,
2222 0x0e, 0x21, 0x22,
2223 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36,
2224 0x42
2225 };
2226
2227
2228 static const int32_t from_iso_2022_cnOffs4_v3 [] ={
2229 0,0,0,0,0,0,0,
2230 1,1,1,1,1,1,1,1,1,1,1,
2231
2232 3,3,3,
2233 4,4,4,4,4,4,4,4,4,4,4,
2234
2235 6
2236
2237 };
2238 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4, UPRV_LENGTHOF(iso_2022_cn_inputText4),
2239 to_iso_2022_cn4_v3, UPRV_LENGTHOF(to_iso_2022_cn4_v3), "iso-2022-cn",
2240 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4_v3, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2241 {
2242 log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n");
2243 }
2244 }
2245 if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText),
2246 to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn",
2247 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 ))
2248 log_err("u-> iso_2022_cn with subst with value did not match.\n");
2249
2250 if(!testConvertFromUnicode(iso_2022_cn_inputText4, UPRV_LENGTHOF(iso_2022_cn_inputText4),
2251 to_iso_2022_cn4, UPRV_LENGTHOF(to_iso_2022_cn4), "iso-2022-cn",
2252 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 ))
2253 log_err("u-> iso_2022_cn with subst with value did not match.\n");
2254 if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText),
2255 to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr",
2256 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0 ))
2257 log_err("u-> iso_2022_kr with subst with value did not match.\n");
2258 if(!testConvertFromUnicode(iso_2022_kr_inputText2, UPRV_LENGTHOF(iso_2022_kr_inputText2),
2259 to_iso_2022_kr2, UPRV_LENGTHOF(to_iso_2022_kr2), "iso-2022-kr",
2260 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs2, NULL, 0 ))
2261 log_err("u-> iso_2022_kr2 with subst with value did not match.\n");
2262 if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText),
2263 to_hz, UPRV_LENGTHOF(to_hz), "HZ",
2264 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0 ))
2265 log_err("u-> hz with subst with value did not match.\n");
2266 if(!testConvertFromUnicode(hz_inputText2, UPRV_LENGTHOF(hz_inputText2),
2267 to_hz2, UPRV_LENGTHOF(to_hz2), "HZ",
2268 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs2, NULL, 0 ))
2269 log_err("u-> hz with subst with value did not match.\n");
2270
2271 if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText),
2272 to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0",
2273 UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 ))
2274 log_err("u-> iscii with subst with value did not match.\n");
2275 }
2276 #endif
2277
2278 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
2279 /*to Unicode*/
2280 {
2281 #if !UCONFIG_NO_LEGACY_CONVERSION
2282 static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf,
2283 0x81, 0xad, /*unassigned*/
2284 0x89, 0xd3 };
2285 static const UChar IBM_943toUnicode[] = { 0x0000, 0x6D63,
2286 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
2287 0x7B87};
2288 static const int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
2289
2290 /* EUC_JP*/
2291 static const uint8_t sampleTxt_EUC_JP[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
2292 0x8f, 0xda, 0xa1, /*unassigned*/
2293 0x8e, 0xe0,
2294 };
2295 static const UChar EUC_JPtoUnicode[]={ 0x0061, 0x4edd, 0x5bec,
2296 0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31,
2297 0x00a2 };
2298 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 3,
2299 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
2300 9,
2301 };
2302
2303 /*EUC_TW*/
2304 static const uint8_t sampleTxt_euc_tw[]={
2305 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
2306 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
2307 0xe6, 0xca, 0x8a,
2308 };
2309 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2,
2310 0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43,
2311 0x8706, 0x8a, };
2312 static const int32_t from_euc_twOffs [] ={ 0, 1, 3,
2313 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2314 11, 13};
2315
2316 /*iso-2022-jp*/
2317 static const uint8_t sampleTxt_iso_2022_jp[]={
2318 0x1b, 0x28, 0x42, 0x41,
2319 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/
2320 0x1b, 0x28, 0x42, 0x42,
2321
2322 };
2323 /* A % X 3 A % X 1 A B */
2324 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x25,0x58,0x33,0x41,0x25,0x58,0x31,0x41, 0x42 };
2325 static const int32_t from_iso_2022_jpOffs [] ={ 3, 7, 7, 7, 7, 7, 7, 7, 7, 12 };
2326
2327 /*iso-2022-cn*/
2328 static const uint8_t sampleTxt_iso_2022_cn[]={
2329 0x0f, 0x41, 0x44,
2330 0x1B, 0x24, 0x29, 0x47,
2331 0x0E, 0x40, 0x6c, /*unassigned*/
2332 0x0f, 0x42,
2333
2334 };
2335 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 };
2336 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 8, 8, 8, 8, 8, 8, 8, 8, 11 };
2337
2338 /*iso-2022-kr*/
2339 static const uint8_t sampleTxt_iso_2022_kr[]={
2340 0x1b, 0x24, 0x29, 0x43,
2341 0x41,
2342 0x0E, 0x7f, 0x1E,
2343 0x0e, 0x25, 0x50,
2344 0x0f, 0x51,
2345 0x42, 0x43,
2346
2347 };
2348 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43};
2349 static const int32_t from_iso_2022_krOffs [] ={ 4, 6, 6, 6, 6, 6, 6, 6, 6, 9, 12, 13 , 14 };
2350
2351 /*hz*/
2352 static const uint8_t sampleTxt_hz[]={
2353 0x41,
2354 0x7e, 0x7b, 0x26, 0x30,
2355 0x7f, 0x1E, /*unassigned*/
2356 0x26, 0x30,
2357 0x7e, 0x7d, 0x42,
2358 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/
2359 0x7e, 0x7d, 0x42,
2360 };
2361 static const UChar hztoUnicode[]={
2362 0x41,
2363 0x03a0,
2364 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2365 0x03A0,
2366 0x42,
2367 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2368 0x42,};
2369
2370 static const int32_t from_hzOffs [] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18, };
2371
2372
2373 /*iscii*/
2374 static const uint8_t sampleTxt_iscii[]={
2375 0x41,
2376 0x30,
2377 0xEB, /*unassigned*/
2378 0xa3,
2379 0x42,
2380 0xEC, /*unassigned*/
2381 0x42,
2382 };
2383 static const UChar isciitoUnicode[]={
2384 0x41,
2385 0x30,
2386 0x25, 0x58, 0x45, 0x42,
2387 0x0903,
2388 0x42,
2389 0x25, 0x58, 0x45, 0x43,
2390 0x42,};
2391
2392 static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6 };
2393 #endif
2394
2395 /*UTF8*/
2396 static const uint8_t sampleTxtUTF8[]={
2397 0x20, 0x64, 0x50,
2398 0xC2, 0x7E, /* truncated char */
2399 0x20,
2400 0xE0, 0xB5, 0x7E, /* truncated char */
2401 0x40,
2402 };
2403 static const UChar UTF8ToUnicode[]={
2404 0x0020, 0x0064, 0x0050,
2405 0x0025, 0x0058, 0x0043, 0x0032, 0x007E, /* \xC2~ */
2406 0x0020,
2407 0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E,
2408 0x0040
2409 };
2410 static const int32_t fromUTF8[] = {
2411 0, 1, 2,
2412 3, 3, 3, 3, 4,
2413 5,
2414 6, 6, 6, 6, 6, 6, 6, 6, 8,
2415 9
2416 };
2417 static const UChar UTF8ToUnicodeXML_DEC[]={
2418 0x0020, 0x0064, 0x0050,
2419 0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E, /* &#194;~ */
2420 0x0020,
2421 0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E,
2422 0x0040
2423 };
2424 static const int32_t fromUTF8XML_DEC[] = {
2425 0, 1, 2,
2426 3, 3, 3, 3, 3, 3, 4,
2427 5,
2428 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8,
2429 9
2430 };
2431
2432
2433 #if !UCONFIG_NO_LEGACY_CONVERSION
2434 if(!testConvertToUnicode(sampleTxtToU, UPRV_LENGTHOF(sampleTxtToU),
2435 IBM_943toUnicode, UPRV_LENGTHOF(IBM_943toUnicode),"ibm-943",
2436 UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 ))
2437 log_err("ibm-943->u with substitute with value did not match.\n");
2438
2439 if(!testConvertToUnicode(sampleTxt_EUC_JP, UPRV_LENGTHOF(sampleTxt_EUC_JP),
2440 EUC_JPtoUnicode, UPRV_LENGTHOF(EUC_JPtoUnicode),"IBM-eucJP",
2441 UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0))
2442 log_err("euc-jp->u with substitute with value did not match.\n");
2443
2444 if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
2445 euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
2446 UCNV_TO_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0))
2447 log_err("euc-tw->u with substitute with value did not match.\n");
2448
2449 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2450 iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp",
2451 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0))
2452 log_err("iso-2022-jp->u with substitute with value did not match.\n");
2453
2454 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2455 iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp",
2456 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0,"K",U_ZERO_ERROR))
2457 log_err("iso-2022-jp->u with substitute with value did not match.\n");
2458
2459 {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */
2460 {
2461 static const UChar iso_2022_jptoUnicodeDec[]={
2462 0x0041,
2463 /* & # 5 8 ; */
2464 0x0026, 0x0023, 0x0035, 0x0038, 0x003b,
2465 0x0026, 0x0023, 0x0032, 0x0036, 0x003b,
2466 0x0042 };
2467 static const int32_t from_iso_2022_jpOffsDec [] ={ 3,7,7,7,7,7,7,7,7,7,7,12, };
2468 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2469 iso_2022_jptoUnicodeDec, UPRV_LENGTHOF(iso_2022_jptoUnicodeDec),"iso-2022-jp",
2470 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsDec, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2471 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n");
2472 }
2473 {
2474 static const UChar iso_2022_jptoUnicodeHex[]={
2475 0x0041,
2476 /* & # x 3 A ; */
2477 0x0026, 0x0023, 0x0078, 0x0033, 0x0041, 0x003b,
2478 0x0026, 0x0023, 0x0078, 0x0031, 0x0041, 0x003b,
2479 0x0042 };
2480 static const int32_t from_iso_2022_jpOffsHex [] ={ 3,7,7,7,7,7,7,7,7,7,7,7,7,12 };
2481 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2482 iso_2022_jptoUnicodeHex, UPRV_LENGTHOF(iso_2022_jptoUnicodeHex),"iso-2022-jp",
2483 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsHex, NULL, 0,UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR ))
2484 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n");
2485 }
2486 {
2487 static const UChar iso_2022_jptoUnicodeC[]={
2488 0x0041,
2489 0x005C, 0x0078, 0x0033, 0x0041, /* \x3A */
2490 0x005C, 0x0078, 0x0031, 0x0041, /* \x1A */
2491 0x0042 };
2492 int32_t from_iso_2022_jpOffsC [] ={ 3,7,7,7,7,7,7,7,7,12 };
2493 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2494 iso_2022_jptoUnicodeC, UPRV_LENGTHOF(iso_2022_jptoUnicodeC),"iso-2022-jp",
2495 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsC, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2496 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n");
2497 }
2498 }
2499 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, UPRV_LENGTHOF(sampleTxt_iso_2022_cn),
2500 iso_2022_cntoUnicode, UPRV_LENGTHOF(iso_2022_cntoUnicode),"iso-2022-cn",
2501 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0))
2502 log_err("iso-2022-cn->u with substitute with value did not match.\n");
2503
2504 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, UPRV_LENGTHOF(sampleTxt_iso_2022_kr),
2505 iso_2022_krtoUnicode, UPRV_LENGTHOF(iso_2022_krtoUnicode),"iso-2022-kr",
2506 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0))
2507 log_err("iso-2022-kr->u with substitute with value did not match.\n");
2508
2509 if(!testConvertToUnicode(sampleTxt_hz, UPRV_LENGTHOF(sampleTxt_hz),
2510 hztoUnicode, UPRV_LENGTHOF(hztoUnicode),"HZ",
2511 UCNV_TO_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0))
2512 log_err("hz->u with substitute with value did not match.\n");
2513
2514 if(!testConvertToUnicode(sampleTxt_iscii, UPRV_LENGTHOF(sampleTxt_iscii),
2515 isciitoUnicode, UPRV_LENGTHOF(isciitoUnicode),"ISCII,version=0",
2516 UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0))
2517 log_err("ISCII ->u with substitute with value did not match.\n");
2518 #endif
2519
2520 if(!testConvertToUnicode(sampleTxtUTF8, UPRV_LENGTHOF(sampleTxtUTF8),
2521 UTF8ToUnicode, UPRV_LENGTHOF(UTF8ToUnicode),"UTF-8",
2522 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0))
2523 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2524 if(!testConvertToUnicodeWithContext(sampleTxtUTF8, UPRV_LENGTHOF(sampleTxtUTF8),
2525 UTF8ToUnicodeXML_DEC, UPRV_LENGTHOF(UTF8ToUnicodeXML_DEC),"UTF-8",
2526 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8XML_DEC, NULL, 0, UCNV_ESCAPE_XML_DEC, U_ZERO_ERROR))
2527 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2528 }
2529 }
2530
2531 #if !UCONFIG_NO_LEGACY_CONVERSION
2532 static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize)
2533 {
2534 static const UChar legalText[] = { 0x0000, 0xAC00, 0xAC01, 0xD700 };
2535 static const uint8_t templegal949[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
2536 static const int32_t to949legal[] = {0, 1, 1, 2, 2, 3, 3};
2537
2538
2539 static const uint8_t text943[] = {
2540 0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a };
2541 static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22, 0x5b57 };
2542 static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22, 0x5b57 };
2543 static const UChar toUnicode943stop[]= { 0x304b};
2544
2545 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 7 };
2546 static const int32_t fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 };
2547 static const int32_t fromIBM943Offsstop[] = { 0};
2548
2549 gInBufferSize = inputsize;
2550 gOutBufferSize = outputsize;
2551 /*checking with a legal value*/
2552 if(!testConvertFromUnicode(legalText, UPRV_LENGTHOF(legalText),
2553 templegal949, UPRV_LENGTHOF(templegal949), "ibm-949",
2554 UCNV_FROM_U_CALLBACK_SKIP, to949legal, NULL, 0 ))
2555 log_err("u-> ibm-949 with skip did not match.\n");
2556
2557 /*checking illegal value for ibm-943 with substitute*/
2558 if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943),
2559 toUnicode943sub, UPRV_LENGTHOF(toUnicode943sub),"ibm-943",
2560 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2561 log_err("ibm-943->u with subst did not match.\n");
2562 /*checking illegal value for ibm-943 with skip */
2563 if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943),
2564 toUnicode943skip, UPRV_LENGTHOF(toUnicode943skip),"ibm-943",
2565 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offsskip, NULL, 0 ))
2566 log_err("ibm-943->u with skip did not match.\n");
2567
2568 /*checking illegal value for ibm-943 with stop */
2569 if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943),
2570 toUnicode943stop, UPRV_LENGTHOF(toUnicode943stop),"ibm-943",
2571 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offsstop, NULL, 0 ))
2572 log_err("ibm-943->u with stop did not match.\n");
2573
2574 }
2575
2576 static void TestSingleByte(int32_t inputsize, int32_t outputsize)
2577 {
2578 static const uint8_t sampleText[] = {
2579 0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82,
2580 0xff, 0x32, 0x33};
2581 static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 };
2582 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 };
2583 /*checking illegal value for ibm-943 with substitute*/
2584 gInBufferSize = inputsize;
2585 gOutBufferSize = outputsize;
2586
2587 if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
2588 toUnicode943sub, UPRV_LENGTHOF(toUnicode943sub),"ibm-943",
2589 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2590 log_err("ibm-943->u with subst did not match.\n");
2591 }
2592
2593 static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize)
2594 {
2595 /*EBCDIC_STATEFUL*/
2596 static const UChar ebcdic_inputTest[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 };
2597 static const uint8_t toIBM930[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 };
2598 static const int32_t offset_930[]= { 0, 1, 1, 1, 2, 2, 3, 4, 4, 4, 5, 5 };
2599 /* s SO doubl SI sng s SO fe fe SI s */
2600
2601 /*EBCDIC_STATEFUL with subChar=3f*/
2602 static const uint8_t toIBM930_subvaried[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 };
2603 static const int32_t offset_930_subvaried[]= { 0, 1, 1, 1, 2, 2, 3, 4, 5 };
2604 static const char mySubChar[]={ 0x3f};
2605
2606 gInBufferSize = inputsize;
2607 gOutBufferSize = outputsize;
2608
2609 if(!testConvertFromUnicode(ebcdic_inputTest, UPRV_LENGTHOF(ebcdic_inputTest),
2610 toIBM930, UPRV_LENGTHOF(toIBM930), "ibm-930",
2611 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930, NULL, 0 ))
2612 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n");
2613
2614 if(!testConvertFromUnicode(ebcdic_inputTest, UPRV_LENGTHOF(ebcdic_inputTest),
2615 toIBM930_subvaried, UPRV_LENGTHOF(toIBM930_subvaried), "ibm-930",
2616 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 ))
2617 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n");
2618 }
2619 #endif
2620
2621 UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
2622 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
2623 const char *mySubChar, int8_t len)
2624 {
2625
2626
2627 UErrorCode status = U_ZERO_ERROR;
2628 UConverter *conv = 0;
2629 char junkout[NEW_MAX_BUFFER]; /* FIX */
2630 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2631 const UChar *src;
2632 char *end;
2633 char *targ;
2634 int32_t *offs;
2635 int i;
2636 int32_t realBufferSize;
2637 char *realBufferEnd;
2638 const UChar *realSourceEnd;
2639 const UChar *sourceLimit;
2640 UBool checkOffsets = TRUE;
2641 UBool doFlush;
2642 char junk[9999];
2643 char offset_str[9999];
2644 char *p;
2645 UConverterFromUCallback oldAction = NULL;
2646 const void* oldContext = NULL;
2647
2648
2649 for(i=0;i<NEW_MAX_BUFFER;i++)
2650 junkout[i] = (char)0xF0;
2651 for(i=0;i<NEW_MAX_BUFFER;i++)
2652 junokout[i] = 0xFF;
2653 setNuConvTestName(codepage, "FROM");
2654
2655 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize,
2656 gOutBufferSize);
2657
2658 conv = ucnv_open(codepage, &status);
2659 if(U_FAILURE(status))
2660 {
2661 log_data_err("Couldn't open converter %s\n",codepage);
2662 return TRUE;
2663 }
2664
2665 log_verbose("Converter opened..\n");
2666
2667 /*----setting the callback routine----*/
2668 ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2669 if (U_FAILURE(status))
2670 {
2671 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2672 }
2673 /*------------------------*/
2674 /*setting the subChar*/
2675 if(mySubChar != NULL){
2676 ucnv_setSubstChars(conv, mySubChar, len, &status);
2677 if (U_FAILURE(status)) {
2678 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2679 }
2680 }
2681 /*------------*/
2682
2683 src = source;
2684 targ = junkout;
2685 offs = junokout;
2686
2687 realBufferSize = UPRV_LENGTHOF(junkout);
2688 realBufferEnd = junkout + realBufferSize;
2689 realSourceEnd = source + sourceLen;
2690
2691 if ( gOutBufferSize != realBufferSize )
2692 checkOffsets = FALSE;
2693
2694 if( gInBufferSize != NEW_MAX_BUFFER )
2695 checkOffsets = FALSE;
2696
2697 do
2698 {
2699 end = nct_min(targ + gOutBufferSize, realBufferEnd);
2700 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
2701
2702 doFlush = (UBool)(sourceLimit == realSourceEnd);
2703
2704 if(targ == realBufferEnd)
2705 {
2706 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
2707 return FALSE;
2708 }
2709 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
2710
2711
2712 status = U_ZERO_ERROR;
2713
2714 ucnv_fromUnicode (conv,
2715 (char **)&targ,
2716 (const char *)end,
2717 &src,
2718 sourceLimit,
2719 checkOffsets ? offs : NULL,
2720 doFlush, /* flush if we're at the end of the input data */
2721 &status);
2722 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
2723
2724
2725 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2726 UChar errChars[50]; /* should be sufficient */
2727 int8_t errLen = 50;
2728 UErrorCode err = U_ZERO_ERROR;
2729 const UChar* start= NULL;
2730 ucnv_getInvalidUChars(conv,errChars, &errLen, &err);
2731 if(U_FAILURE(err)){
2732 log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err));
2733 }
2734 /* length of in invalid chars should be equal to returned length*/
2735 start = src - errLen;
2736 if(u_strncmp(errChars,start,errLen)!=0){
2737 log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2738 }
2739 }
2740 /* allow failure codes for the stop callback */
2741 if(U_FAILURE(status) &&
2742 (callback != UCNV_FROM_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND)))
2743 {
2744 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2745 return FALSE;
2746 }
2747
2748 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
2749 sourceLen, targ-junkout);
2750 if(getTestOption(VERBOSITY_OPTION))
2751 {
2752
2753 junk[0] = 0;
2754 offset_str[0] = 0;
2755 for(p = junkout;p<targ;p++)
2756 {
2757 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
2758 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
2759 }
2760
2761 log_verbose(junk);
2762 printSeq(expect, expectLen);
2763 if ( checkOffsets )
2764 {
2765 log_verbose("\nOffsets:");
2766 log_verbose(offset_str);
2767 }
2768 log_verbose("\n");
2769 }
2770 ucnv_close(conv);
2771
2772
2773 if(expectLen != targ-junkout)
2774 {
2775 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2776 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2777 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
2778 printSeqErr(expect, expectLen);
2779 return FALSE;
2780 }
2781
2782 if (checkOffsets && (expectOffsets != 0) )
2783 {
2784 log_verbose("comparing %d offsets..\n", targ-junkout);
2785 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
2786 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2787 log_err("Got Output : ");
2788 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
2789 log_err("Got Offsets: ");
2790 for(p=junkout;p<targ;p++)
2791 log_err("%d,", junokout[p-junkout]);
2792 log_err("\n");
2793 log_err("Expected Offsets: ");
2794 for(i=0; i<(targ-junkout); i++)
2795 log_err("%d,", expectOffsets[i]);
2796 log_err("\n");
2797 return FALSE;
2798 }
2799 }
2800
2801 if(!memcmp(junkout, expect, expectLen))
2802 {
2803 log_verbose("String matches! %s\n", gNuConvTestName);
2804 return TRUE;
2805 }
2806 else
2807 {
2808 log_err("String does not match. %s\n", gNuConvTestName);
2809 log_err("source: ");
2810 printUSeqErr(source, sourceLen);
2811 log_err("Got: ");
2812 printSeqErr((const uint8_t *)junkout, expectLen);
2813 log_err("Expected: ");
2814 printSeqErr(expect, expectLen);
2815 return FALSE;
2816 }
2817 }
2818
2819 UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
2820 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
2821 const char *mySubChar, int8_t len)
2822 {
2823 UErrorCode status = U_ZERO_ERROR;
2824 UConverter *conv = 0;
2825 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
2826 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2827 const char *src;
2828 const char *realSourceEnd;
2829 const char *srcLimit;
2830 UChar *targ;
2831 UChar *end;
2832 int32_t *offs;
2833 int i;
2834 UBool checkOffsets = TRUE;
2835 char junk[9999];
2836 char offset_str[9999];
2837 UChar *p;
2838 UConverterToUCallback oldAction = NULL;
2839 const void* oldContext = NULL;
2840
2841 int32_t realBufferSize;
2842 UChar *realBufferEnd;
2843
2844
2845 for(i=0;i<NEW_MAX_BUFFER;i++)
2846 junkout[i] = 0xFFFE;
2847
2848 for(i=0;i<NEW_MAX_BUFFER;i++)
2849 junokout[i] = -1;
2850
2851 setNuConvTestName(codepage, "TO");
2852
2853 log_verbose("\n========= %s\n", gNuConvTestName);
2854
2855 conv = ucnv_open(codepage, &status);
2856 if(U_FAILURE(status))
2857 {
2858 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
2859 return TRUE;
2860 }
2861
2862 log_verbose("Converter opened..\n");
2863
2864 src = (const char *)source;
2865 targ = junkout;
2866 offs = junokout;
2867
2868 realBufferSize = UPRV_LENGTHOF(junkout);
2869 realBufferEnd = junkout + realBufferSize;
2870 realSourceEnd = src + sourcelen;
2871 /*----setting the callback routine----*/
2872 ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2873 if (U_FAILURE(status))
2874 {
2875 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2876 }
2877 /*-------------------------------------*/
2878 /*setting the subChar*/
2879 if(mySubChar != NULL){
2880 ucnv_setSubstChars(conv, mySubChar, len, &status);
2881 if (U_FAILURE(status)) {
2882 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2883 }
2884 }
2885 /*------------*/
2886
2887
2888 if ( gOutBufferSize != realBufferSize )
2889 checkOffsets = FALSE;
2890
2891 if( gInBufferSize != NEW_MAX_BUFFER )
2892 checkOffsets = FALSE;
2893
2894 do
2895 {
2896 end = nct_min( targ + gOutBufferSize, realBufferEnd);
2897 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
2898
2899 if(targ == realBufferEnd)
2900 {
2901 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
2902 return FALSE;
2903 }
2904 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
2905
2906
2907
2908 status = U_ZERO_ERROR;
2909
2910 ucnv_toUnicode (conv,
2911 &targ,
2912 end,
2913 (const char **)&src,
2914 (const char *)srcLimit,
2915 checkOffsets ? offs : NULL,
2916 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
2917 &status);
2918 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
2919
2920 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2921 char errChars[50]; /* should be sufficient */
2922 int8_t errLen = 50;
2923 UErrorCode err = U_ZERO_ERROR;
2924 const char* start= NULL;
2925 ucnv_getInvalidChars(conv,errChars, &errLen, &err);
2926 if(U_FAILURE(err)){
2927 log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err));
2928 }
2929 /* length of in invalid chars should be equal to returned length*/
2930 start = src - errLen;
2931 if(uprv_strncmp(errChars,start,errLen)!=0){
2932 log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2933 }
2934 }
2935 /* allow failure codes for the stop callback */
2936 if(U_FAILURE(status) &&
2937 (callback != UCNV_TO_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND && status != U_TRUNCATED_CHAR_FOUND)))
2938 {
2939 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2940 return FALSE;
2941 }
2942
2943 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
2944 sourcelen, targ-junkout);
2945 if(getTestOption(VERBOSITY_OPTION))
2946 {
2947
2948 junk[0] = 0;
2949 offset_str[0] = 0;
2950
2951 for(p = junkout;p<targ;p++)
2952 {
2953 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
2954 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
2955 }
2956
2957 log_verbose(junk);
2958 printUSeq(expect, expectlen);
2959 if ( checkOffsets )
2960 {
2961 log_verbose("\nOffsets:");
2962 log_verbose(offset_str);
2963 }
2964 log_verbose("\n");
2965 }
2966 ucnv_close(conv);
2967
2968 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
2969
2970 if (checkOffsets && (expectOffsets != 0))
2971 {
2972 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
2973 {
2974 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2975 log_err("Got offsets: ");
2976 for(p=junkout;p<targ;p++)
2977 log_err(" %2d,", junokout[p-junkout]);
2978 log_err("\n");
2979 log_err("Expected offsets: ");
2980 for(i=0; i<(targ-junkout); i++)
2981 log_err(" %2d,", expectOffsets[i]);
2982 log_err("\n");
2983 log_err("Got output: ");
2984 for(i=0; i<(targ-junkout); i++)
2985 log_err("0x%04x,", junkout[i]);
2986 log_err("\n");
2987 log_err("From source: ");
2988 for(i=0; i<(src-(const char *)source); i++)
2989 log_err(" 0x%02x,", (unsigned char)source[i]);
2990 log_err("\n");
2991 }
2992 }
2993
2994 if(!memcmp(junkout, expect, expectlen*2))
2995 {
2996 log_verbose("Matches!\n");
2997 return TRUE;
2998 }
2999 else
3000 {
3001 log_err("String does not match. %s\n", gNuConvTestName);
3002 log_verbose("String does not match. %s\n", gNuConvTestName);
3003 log_err("Got: ");
3004 printUSeqErr(junkout, expectlen);
3005 log_err("Expected: ");
3006 printUSeqErr(expect, expectlen);
3007 log_err("\n");
3008 return FALSE;
3009 }
3010 }
3011
3012 UBool testConvertFromUnicodeWithContext(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
3013 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
3014 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3015 {
3016
3017
3018 UErrorCode status = U_ZERO_ERROR;
3019 UConverter *conv = 0;
3020 char junkout[NEW_MAX_BUFFER]; /* FIX */
3021 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
3022 const UChar *src;
3023 char *end;
3024 char *targ;
3025 int32_t *offs;
3026 int i;
3027 int32_t realBufferSize;
3028 char *realBufferEnd;
3029 const UChar *realSourceEnd;
3030 const UChar *sourceLimit;
3031 UBool checkOffsets = TRUE;
3032 UBool doFlush;
3033 char junk[9999];
3034 char offset_str[9999];
3035 char *p;
3036 UConverterFromUCallback oldAction = NULL;
3037 const void* oldContext = NULL;
3038
3039
3040 for(i=0;i<NEW_MAX_BUFFER;i++)
3041 junkout[i] = (char)0xF0;
3042 for(i=0;i<NEW_MAX_BUFFER;i++)
3043 junokout[i] = 0xFF;
3044 setNuConvTestName(codepage, "FROM");
3045
3046 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize,
3047 gOutBufferSize);
3048
3049 conv = ucnv_open(codepage, &status);
3050 if(U_FAILURE(status))
3051 {
3052 log_data_err("Couldn't open converter %s\n",codepage);
3053 return TRUE; /* Because the err has already been logged. */
3054 }
3055
3056 log_verbose("Converter opened..\n");
3057
3058 /*----setting the callback routine----*/
3059 ucnv_setFromUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3060 if (U_FAILURE(status))
3061 {
3062 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3063 }
3064 /*------------------------*/
3065 /*setting the subChar*/
3066 if(mySubChar != NULL){
3067 ucnv_setSubstChars(conv, mySubChar, len, &status);
3068 if (U_FAILURE(status)) {
3069 log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status));
3070 }
3071 }
3072 /*------------*/
3073
3074 src = source;
3075 targ = junkout;
3076 offs = junokout;
3077
3078 realBufferSize = UPRV_LENGTHOF(junkout);
3079 realBufferEnd = junkout + realBufferSize;
3080 realSourceEnd = source + sourceLen;
3081
3082 if ( gOutBufferSize != realBufferSize )
3083 checkOffsets = FALSE;
3084
3085 if( gInBufferSize != NEW_MAX_BUFFER )
3086 checkOffsets = FALSE;
3087
3088 do
3089 {
3090 end = nct_min(targ + gOutBufferSize, realBufferEnd);
3091 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
3092
3093 doFlush = (UBool)(sourceLimit == realSourceEnd);
3094
3095 if(targ == realBufferEnd)
3096 {
3097 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
3098 return FALSE;
3099 }
3100 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
3101
3102
3103 status = U_ZERO_ERROR;
3104
3105 ucnv_fromUnicode (conv,
3106 (char **)&targ,
3107 (const char *)end,
3108 &src,
3109 sourceLimit,
3110 checkOffsets ? offs : NULL,
3111 doFlush, /* flush if we're at the end of the input data */
3112 &status);
3113 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
3114
3115 /* allow failure codes for the stop callback */
3116 if(U_FAILURE(status) && status != expectedError)
3117 {
3118 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3119 return FALSE;
3120 }
3121
3122 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
3123 sourceLen, targ-junkout);
3124 if(getTestOption(VERBOSITY_OPTION))
3125 {
3126
3127 junk[0] = 0;
3128 offset_str[0] = 0;
3129 for(p = junkout;p<targ;p++)
3130 {
3131 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
3132 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
3133 }
3134
3135 log_verbose(junk);
3136 printSeq(expect, expectLen);
3137 if ( checkOffsets )
3138 {
3139 log_verbose("\nOffsets:");
3140 log_verbose(offset_str);
3141 }
3142 log_verbose("\n");
3143 }
3144 ucnv_close(conv);
3145
3146
3147 if(expectLen != targ-junkout)
3148 {
3149 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3150 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3151 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
3152 printSeqErr(expect, expectLen);
3153 return FALSE;
3154 }
3155
3156 if (checkOffsets && (expectOffsets != 0) )
3157 {
3158 log_verbose("comparing %d offsets..\n", targ-junkout);
3159 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
3160 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3161 log_err("Got Output : ");
3162 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
3163 log_err("Got Offsets: ");
3164 for(p=junkout;p<targ;p++)
3165 log_err("%d,", junokout[p-junkout]);
3166 log_err("\n");
3167 log_err("Expected Offsets: ");
3168 for(i=0; i<(targ-junkout); i++)
3169 log_err("%d,", expectOffsets[i]);
3170 log_err("\n");
3171 return FALSE;
3172 }
3173 }
3174
3175 if(!memcmp(junkout, expect, expectLen))
3176 {
3177 log_verbose("String matches! %s\n", gNuConvTestName);
3178 return TRUE;
3179 }
3180 else
3181 {
3182 log_err("String does not match. %s\n", gNuConvTestName);
3183 log_err("source: ");
3184 printUSeqErr(source, sourceLen);
3185 log_err("Got: ");
3186 printSeqErr((const uint8_t *)junkout, expectLen);
3187 log_err("Expected: ");
3188 printSeqErr(expect, expectLen);
3189 return FALSE;
3190 }
3191 }
3192 UBool testConvertToUnicodeWithContext( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
3193 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
3194 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3195 {
3196 UErrorCode status = U_ZERO_ERROR;
3197 UConverter *conv = 0;
3198 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
3199 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
3200 const char *src;
3201 const char *realSourceEnd;
3202 const char *srcLimit;
3203 UChar *targ;
3204 UChar *end;
3205 int32_t *offs;
3206 int i;
3207 UBool checkOffsets = TRUE;
3208 char junk[9999];
3209 char offset_str[9999];
3210 UChar *p;
3211 UConverterToUCallback oldAction = NULL;
3212 const void* oldContext = NULL;
3213
3214 int32_t realBufferSize;
3215 UChar *realBufferEnd;
3216
3217
3218 for(i=0;i<NEW_MAX_BUFFER;i++)
3219 junkout[i] = 0xFFFE;
3220
3221 for(i=0;i<NEW_MAX_BUFFER;i++)
3222 junokout[i] = -1;
3223
3224 setNuConvTestName(codepage, "TO");
3225
3226 log_verbose("\n========= %s\n", gNuConvTestName);
3227
3228 conv = ucnv_open(codepage, &status);
3229 if(U_FAILURE(status))
3230 {
3231 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
3232 return TRUE;
3233 }
3234
3235 log_verbose("Converter opened..\n");
3236
3237 src = (const char *)source;
3238 targ = junkout;
3239 offs = junokout;
3240
3241 realBufferSize = UPRV_LENGTHOF(junkout);
3242 realBufferEnd = junkout + realBufferSize;
3243 realSourceEnd = src + sourcelen;
3244 /*----setting the callback routine----*/
3245 ucnv_setToUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3246 if (U_FAILURE(status))
3247 {
3248 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3249 }
3250 /*-------------------------------------*/
3251 /*setting the subChar*/
3252 if(mySubChar != NULL){
3253 ucnv_setSubstChars(conv, mySubChar, len, &status);
3254 if (U_FAILURE(status)) {
3255 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3256 }
3257 }
3258 /*------------*/
3259
3260
3261 if ( gOutBufferSize != realBufferSize )
3262 checkOffsets = FALSE;
3263
3264 if( gInBufferSize != NEW_MAX_BUFFER )
3265 checkOffsets = FALSE;
3266
3267 do
3268 {
3269 end = nct_min( targ + gOutBufferSize, realBufferEnd);
3270 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
3271
3272 if(targ == realBufferEnd)
3273 {
3274 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
3275 return FALSE;
3276 }
3277 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
3278
3279
3280
3281 status = U_ZERO_ERROR;
3282
3283 ucnv_toUnicode (conv,
3284 &targ,
3285 end,
3286 (const char **)&src,
3287 (const char *)srcLimit,
3288 checkOffsets ? offs : NULL,
3289 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
3290 &status);
3291 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
3292
3293 /* allow failure codes for the stop callback */
3294 if(U_FAILURE(status) && status!=expectedError)
3295 {
3296 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3297 return FALSE;
3298 }
3299
3300 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
3301 sourcelen, targ-junkout);
3302 if(getTestOption(VERBOSITY_OPTION))
3303 {
3304
3305 junk[0] = 0;
3306 offset_str[0] = 0;
3307
3308 for(p = junkout;p<targ;p++)
3309 {
3310 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
3311 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
3312 }
3313
3314 log_verbose(junk);
3315 printUSeq(expect, expectlen);
3316 if ( checkOffsets )
3317 {
3318 log_verbose("\nOffsets:");
3319 log_verbose(offset_str);
3320 }
3321 log_verbose("\n");
3322 }
3323 ucnv_close(conv);
3324
3325 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
3326
3327 if (checkOffsets && (expectOffsets != 0))
3328 {
3329 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
3330 {
3331 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3332 log_err("Got offsets: ");
3333 for(p=junkout;p<targ;p++)
3334 log_err(" %2d,", junokout[p-junkout]);
3335 log_err("\n");
3336 log_err("Expected offsets: ");
3337 for(i=0; i<(targ-junkout); i++)
3338 log_err(" %2d,", expectOffsets[i]);
3339 log_err("\n");
3340 log_err("Got output: ");
3341 for(i=0; i<(targ-junkout); i++)
3342 log_err("0x%04x,", junkout[i]);
3343 log_err("\n");
3344 log_err("From source: ");
3345 for(i=0; i<(src-(const char *)source); i++)
3346 log_err(" 0x%02x,", (unsigned char)source[i]);
3347 log_err("\n");
3348 }
3349 }
3350
3351 if(!memcmp(junkout, expect, expectlen*2))
3352 {
3353 log_verbose("Matches!\n");
3354 return TRUE;
3355 }
3356 else
3357 {
3358 log_err("String does not match. %s\n", gNuConvTestName);
3359 log_verbose("String does not match. %s\n", gNuConvTestName);
3360 log_err("Got: ");
3361 printUSeqErr(junkout, expectlen);
3362 log_err("Expected: ");
3363 printUSeqErr(expect, expectlen);
3364 log_err("\n");
3365 return FALSE;
3366 }
3367 }
3368
3369 static void TestCallBackFailure(void) {
3370 UErrorCode status = U_USELESS_COLLATOR_ERROR;
3371 ucnv_cbFromUWriteBytes(NULL, NULL, -1, -1, &status);
3372 if (status != U_USELESS_COLLATOR_ERROR) {
3373 log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad UErrorCode\n");
3374 }
3375 ucnv_cbFromUWriteUChars(NULL, NULL, NULL, -1, &status);
3376 if (status != U_USELESS_COLLATOR_ERROR) {
3377 log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad UErrorCode\n");
3378 }
3379 ucnv_cbFromUWriteSub(NULL, -1, &status);
3380 if (status != U_USELESS_COLLATOR_ERROR) {
3381 log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UErrorCode\n");
3382 }
3383 ucnv_cbToUWriteUChars(NULL, NULL, -1, -1, &status);
3384 if (status != U_USELESS_COLLATOR_ERROR) {
3385 log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad UErrorCode\n");
3386 }
3387 }