]>
Commit | Line | Data |
---|---|---|
1 | // © 2016 and later: Unicode, Inc. and others. | |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | /******************************************************************** | |
4 | * COPYRIGHT: | |
5 | * Copyright (c) 1997-2016, International Business Machines Corporation and | |
6 | * others. All Rights Reserved. | |
7 | ********************************************************************/ | |
8 | /* | |
9 | ******************************************************************************** | |
10 | * File NCCBTST.C | |
11 | * | |
12 | * Modification History: | |
13 | * Name Description | |
14 | * Madhu Katragadda 7/21/1999 Testing error callback routines | |
15 | ******************************************************************************** | |
16 | */ | |
17 | #include <stdio.h> | |
18 | #include <stdlib.h> | |
19 | #include <string.h> | |
20 | #include <ctype.h> | |
21 | #include "cmemory.h" | |
22 | #include "cstring.h" | |
23 | #include "unicode/uloc.h" | |
24 | #include "unicode/ucnv.h" | |
25 | #include "unicode/ucnv_err.h" | |
26 | #include "cintltst.h" | |
27 | #include "unicode/utypes.h" | |
28 | #include "unicode/ustring.h" | |
29 | #include "nccbtst.h" | |
30 | #include "unicode/ucnv_cb.h" | |
31 | #include "unicode/utf16.h" | |
32 | ||
33 | #define NEW_MAX_BUFFER 999 | |
34 | ||
35 | #define nct_min(x,y) ((x<y) ? x : y) | |
36 | ||
37 | static int32_t gInBufferSize = 0; | |
38 | static int32_t gOutBufferSize = 0; | |
39 | static char gNuConvTestName[1024]; | |
40 | ||
41 | static void printSeq(const uint8_t* a, int len) | |
42 | { | |
43 | int i=0; | |
44 | log_verbose("\n{"); | |
45 | while (i<len) | |
46 | log_verbose("0x%02X, ", a[i++]); | |
47 | log_verbose("}\n"); | |
48 | } | |
49 | ||
50 | static void printUSeq(const UChar* a, int len) | |
51 | { | |
52 | int i=0; | |
53 | log_verbose("{"); | |
54 | while (i<len) | |
55 | log_verbose(" 0x%04x, ", a[i++]); | |
56 | log_verbose("}\n"); | |
57 | } | |
58 | ||
59 | static void printSeqErr(const uint8_t* a, int len) | |
60 | { | |
61 | int i=0; | |
62 | fprintf(stderr, "{"); | |
63 | while (i<len) | |
64 | fprintf(stderr, " 0x%02x, ", a[i++]); | |
65 | fprintf(stderr, "}\n"); | |
66 | } | |
67 | ||
68 | static void printUSeqErr(const UChar* a, int len) | |
69 | { | |
70 | int i=0; | |
71 | fprintf(stderr, "{"); | |
72 | while (i<len) | |
73 | fprintf(stderr, "0x%04x, ", a[i++]); | |
74 | fprintf(stderr,"}\n"); | |
75 | } | |
76 | ||
77 | static void setNuConvTestName(const char *codepage, const char *direction) | |
78 | { | |
79 | sprintf(gNuConvTestName, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", | |
80 | codepage, | |
81 | direction, | |
82 | (int)gInBufferSize, | |
83 | (int)gOutBufferSize); | |
84 | } | |
85 | ||
86 | ||
87 | static void TestCallBackFailure(void); | |
88 | ||
89 | void addTestConvertErrorCallBack(TestNode** root); | |
90 | ||
91 | void addTestConvertErrorCallBack(TestNode** root) | |
92 | { | |
93 | addTest(root, &TestSkipCallBack, "tsconv/nccbtst/TestSkipCallBack"); | |
94 | addTest(root, &TestStopCallBack, "tsconv/nccbtst/TestStopCallBack"); | |
95 | addTest(root, &TestSubCallBack, "tsconv/nccbtst/TestSubCallBack"); | |
96 | addTest(root, &TestSubWithValueCallBack, "tsconv/nccbtst/TestSubWithValueCallBack"); | |
97 | ||
98 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
99 | addTest(root, &TestLegalAndOtherCallBack, "tsconv/nccbtst/TestLegalAndOtherCallBack"); | |
100 | addTest(root, &TestSingleByteCallBack, "tsconv/nccbtst/TestSingleByteCallBack"); | |
101 | #endif | |
102 | ||
103 | addTest(root, &TestCallBackFailure, "tsconv/nccbtst/TestCallBackFailure"); | |
104 | } | |
105 | ||
106 | static void TestSkipCallBack() | |
107 | { | |
108 | TestSkip(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
109 | TestSkip(1,NEW_MAX_BUFFER); | |
110 | TestSkip(1,1); | |
111 | TestSkip(NEW_MAX_BUFFER, 1); | |
112 | } | |
113 | ||
114 | static void TestStopCallBack() | |
115 | { | |
116 | TestStop(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
117 | TestStop(1,NEW_MAX_BUFFER); | |
118 | TestStop(1,1); | |
119 | TestStop(NEW_MAX_BUFFER, 1); | |
120 | } | |
121 | ||
122 | static void TestSubCallBack() | |
123 | { | |
124 | TestSub(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
125 | TestSub(1,NEW_MAX_BUFFER); | |
126 | TestSub(1,1); | |
127 | TestSub(NEW_MAX_BUFFER, 1); | |
128 | ||
129 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
130 | TestEBCDIC_STATEFUL_Sub(1, 1); | |
131 | TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER); | |
132 | TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, 1); | |
133 | TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
134 | #endif | |
135 | } | |
136 | ||
137 | static void TestSubWithValueCallBack() | |
138 | { | |
139 | TestSubWithValue(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
140 | TestSubWithValue(1,NEW_MAX_BUFFER); | |
141 | TestSubWithValue(1,1); | |
142 | TestSubWithValue(NEW_MAX_BUFFER, 1); | |
143 | } | |
144 | ||
145 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
146 | static void TestLegalAndOtherCallBack() | |
147 | { | |
148 | TestLegalAndOthers(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
149 | TestLegalAndOthers(1,NEW_MAX_BUFFER); | |
150 | TestLegalAndOthers(1,1); | |
151 | TestLegalAndOthers(NEW_MAX_BUFFER, 1); | |
152 | } | |
153 | ||
154 | static void TestSingleByteCallBack() | |
155 | { | |
156 | TestSingleByte(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
157 | TestSingleByte(1,NEW_MAX_BUFFER); | |
158 | TestSingleByte(1,1); | |
159 | TestSingleByte(NEW_MAX_BUFFER, 1); | |
160 | } | |
161 | #endif | |
162 | ||
163 | static void TestSkip(int32_t inputsize, int32_t outputsize) | |
164 | { | |
165 | static const uint8_t expskipIBM_949[]= { | |
166 | 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 }; | |
167 | ||
168 | static const uint8_t expskipIBM_943[] = { | |
169 | 0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 }; | |
170 | ||
171 | static const uint8_t expskipIBM_930[] = { | |
172 | 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f }; | |
173 | ||
174 | gInBufferSize = inputsize; | |
175 | gOutBufferSize = outputsize; | |
176 | ||
177 | /*From Unicode*/ | |
178 | log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP \n"); | |
179 | ||
180 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
181 | { | |
182 | static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; | |
183 | static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; | |
184 | ||
185 | static const int32_t toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 }; | |
186 | static const int32_t toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 }; | |
187 | ||
188 | if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText), | |
189 | expskipIBM_949, UPRV_LENGTHOF(expskipIBM_949), "ibm-949", | |
190 | UCNV_FROM_U_CALLBACK_SKIP, toIBM949Offsskip, NULL, 0 )) | |
191 | log_err("u-> ibm-949 with skip did not match.\n"); | |
192 | if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2), | |
193 | expskipIBM_943, UPRV_LENGTHOF(expskipIBM_943), "ibm-943", | |
194 | UCNV_FROM_U_CALLBACK_SKIP, toIBM943Offsskip, NULL, 0 )) | |
195 | log_err("u-> ibm-943 with skip did not match.\n"); | |
196 | } | |
197 | ||
198 | { | |
199 | static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 }; | |
200 | static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f }; | |
201 | static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 }; | |
202 | ||
203 | /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */ | |
204 | if(!testConvertFromUnicode(fromU, UPRV_LENGTHOF(fromU), | |
205 | fromUBytes, UPRV_LENGTHOF(fromUBytes), | |
206 | "ibm-930", | |
207 | UCNV_FROM_U_CALLBACK_SKIP, fromUOffsets, | |
208 | NULL, 0) | |
209 | ) { | |
210 | log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n"); | |
211 | } | |
212 | } | |
213 | #endif | |
214 | ||
215 | { | |
216 | static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 }; | |
217 | static const uint8_t usasciiFromUBytes[] = { 0x61, 0x31, 0x39 }; | |
218 | static const int32_t usasciiFromUOffsets[] = { 0, 3, 6 }; | |
219 | ||
220 | static const UChar latin1FromU[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 }; | |
221 | static const uint8_t latin1FromUBytes[] = { 0x61, 0xa0, 0x31, 0x39 }; | |
222 | static const int32_t latin1FromUOffsets[] = { 0, 1, 3, 6 }; | |
223 | ||
224 | /* US-ASCII */ | |
225 | if(!testConvertFromUnicode(usasciiFromU, UPRV_LENGTHOF(usasciiFromU), | |
226 | usasciiFromUBytes, UPRV_LENGTHOF(usasciiFromUBytes), | |
227 | "US-ASCII", | |
228 | UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets, | |
229 | NULL, 0) | |
230 | ) { | |
231 | log_err("u->US-ASCII with skip did not match.\n"); | |
232 | } | |
233 | ||
234 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
235 | /* SBCS NLTC codepage 367 for US-ASCII */ | |
236 | if(!testConvertFromUnicode(usasciiFromU, UPRV_LENGTHOF(usasciiFromU), | |
237 | usasciiFromUBytes, UPRV_LENGTHOF(usasciiFromUBytes), | |
238 | "ibm-367", | |
239 | UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets, | |
240 | NULL, 0) | |
241 | ) { | |
242 | log_err("u->ibm-367 with skip did not match.\n"); | |
243 | } | |
244 | #endif | |
245 | ||
246 | /* ISO-Latin-1 */ | |
247 | if(!testConvertFromUnicode(latin1FromU, UPRV_LENGTHOF(latin1FromU), | |
248 | latin1FromUBytes, UPRV_LENGTHOF(latin1FromUBytes), | |
249 | "LATIN_1", | |
250 | UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets, | |
251 | NULL, 0) | |
252 | ) { | |
253 | log_err("u->LATIN_1 with skip did not match.\n"); | |
254 | } | |
255 | ||
256 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
257 | /* windows-1252 */ | |
258 | if(!testConvertFromUnicode(latin1FromU, UPRV_LENGTHOF(latin1FromU), | |
259 | latin1FromUBytes, UPRV_LENGTHOF(latin1FromUBytes), | |
260 | "windows-1252", | |
261 | UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets, | |
262 | NULL, 0) | |
263 | ) { | |
264 | log_err("u->windows-1252 with skip did not match.\n"); | |
265 | } | |
266 | } | |
267 | ||
268 | { | |
269 | static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; | |
270 | static const uint8_t toIBM943[]= { 0x61, 0x61 }; | |
271 | static const int32_t offset[]= {0, 4}; | |
272 | ||
273 | /* EUC_JP*/ | |
274 | static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 }; | |
275 | static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
276 | 0x61, 0x8e, 0xe0, | |
277 | }; | |
278 | static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7}; | |
279 | ||
280 | /*EUC_TW*/ | |
281 | static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; | |
282 | static const uint8_t to_euc_tw[]={ | |
283 | 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
284 | 0x61, 0xe6, 0xca, 0x8a, | |
285 | }; | |
286 | static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,}; | |
287 | ||
288 | /*ISO-2022-JP*/ | |
289 | static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9/*unassigned*/,0x0042, }; | |
290 | static const uint8_t to_iso_2022_jp[]={ | |
291 | 0x41, | |
292 | 0x42, | |
293 | ||
294 | }; | |
295 | static const int32_t from_iso_2022_jpOffs [] ={0,2}; | |
296 | ||
297 | /*ISO-2022-JP*/ | |
298 | UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; | |
299 | static const uint8_t to_iso_2022_jp2[]={ | |
300 | 0x41, | |
301 | 0x43, | |
302 | ||
303 | }; | |
304 | static const int32_t from_iso_2022_jpOffs2 [] ={0,2}; | |
305 | ||
306 | /*ISO-2022-cn*/ | |
307 | static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, }; | |
308 | static const uint8_t to_iso_2022_cn[]={ | |
309 | 0x41, 0x42 | |
310 | }; | |
311 | static const int32_t from_iso_2022_cnOffs [] ={ | |
312 | 0, 2 | |
313 | }; | |
314 | ||
315 | /*ISO-2022-CN*/ | |
316 | static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; | |
317 | static const uint8_t to_iso_2022_cn1[]={ | |
318 | 0x41, 0x43 | |
319 | ||
320 | }; | |
321 | static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 }; | |
322 | ||
323 | /*ISO-2022-kr*/ | |
324 | static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, }; | |
325 | static const uint8_t to_iso_2022_kr[]={ | |
326 | 0x1b, 0x24, 0x29, 0x43, | |
327 | 0x41, | |
328 | 0x0e, 0x25, 0x50, | |
329 | 0x25, 0x50, | |
330 | 0x0f, 0x42, | |
331 | }; | |
332 | static const int32_t from_iso_2022_krOffs [] ={ | |
333 | -1,-1,-1,-1, | |
334 | 0, | |
335 | 1,1,1, | |
336 | 3,3, | |
337 | 4,4 | |
338 | }; | |
339 | ||
340 | /*ISO-2022-kr*/ | |
341 | static const UChar iso_2022_kr_inputText1[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, }; | |
342 | static const uint8_t to_iso_2022_kr1[]={ | |
343 | 0x1b, 0x24, 0x29, 0x43, | |
344 | 0x41, | |
345 | 0x0e, 0x25, 0x50, | |
346 | 0x25, 0x50, | |
347 | ||
348 | }; | |
349 | static const int32_t from_iso_2022_krOffs1 [] ={ | |
350 | -1,-1,-1,-1, | |
351 | 0, | |
352 | 1,1,1, | |
353 | 3,3, | |
354 | ||
355 | }; | |
356 | /* HZ encoding */ | |
357 | static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, }; | |
358 | ||
359 | static const uint8_t to_hz[]={ | |
360 | 0x7e, 0x7d, 0x41, | |
361 | 0x7e, 0x7b, 0x26, 0x30, | |
362 | 0x26, 0x30, | |
363 | 0x7e, 0x7d, 0x42, | |
364 | ||
365 | }; | |
366 | static const int32_t from_hzOffs [] ={ | |
367 | 0,0,0, | |
368 | 1,1,1,1, | |
369 | 3,3, | |
370 | 4,4,4,4 | |
371 | }; | |
372 | ||
373 | static const UChar hz_inputText1[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, }; | |
374 | ||
375 | static const uint8_t to_hz1[]={ | |
376 | 0x7e, 0x7d, 0x41, | |
377 | 0x7e, 0x7b, 0x26, 0x30, | |
378 | 0x26, 0x30, | |
379 | ||
380 | ||
381 | }; | |
382 | static const int32_t from_hzOffs1 [] ={ | |
383 | 0,0,0, | |
384 | 1,1,1,1, | |
385 | 3,3, | |
386 | ||
387 | }; | |
388 | ||
389 | #endif | |
390 | ||
391 | static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; | |
392 | ||
393 | static const uint8_t to_SCSU[]={ | |
394 | 0x41, | |
395 | 0x42 | |
396 | ||
397 | ||
398 | }; | |
399 | static const int32_t from_SCSUOffs [] ={ | |
400 | 0, | |
401 | 2, | |
402 | ||
403 | }; | |
404 | ||
405 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
406 | /* ISCII */ | |
407 | static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, }; | |
408 | static const uint8_t to_iscii[]={ | |
409 | 0x41, | |
410 | 0x42, | |
411 | }; | |
412 | static const int32_t from_isciiOffs [] ={ | |
413 | 0,2, | |
414 | ||
415 | }; | |
416 | /*ISCII*/ | |
417 | static const UChar iscii_inputText1[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; | |
418 | static const uint8_t to_iscii1[]={ | |
419 | 0x44, | |
420 | 0x43, | |
421 | ||
422 | }; | |
423 | static const int32_t from_isciiOffs1 [] ={0,2}; | |
424 | ||
425 | if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest), | |
426 | toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943", | |
427 | UCNV_FROM_U_CALLBACK_SKIP, offset, NULL, 0 )) | |
428 | log_err("u-> ibm-943 with skip did not match.\n"); | |
429 | ||
430 | if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText), | |
431 | to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP", | |
432 | UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 )) | |
433 | log_err("u-> euc-jp with skip did not match.\n"); | |
434 | ||
435 | if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText), | |
436 | to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw", | |
437 | UCNV_FROM_U_CALLBACK_SKIP, from_euc_twOffs, NULL, 0 )) | |
438 | log_err("u-> euc-tw with skip did not match.\n"); | |
439 | ||
440 | /*iso_2022_jp*/ | |
441 | if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText), | |
442 | to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp", | |
443 | UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 )) | |
444 | log_err("u-> iso-2022-jp with skip did not match.\n"); | |
445 | ||
446 | /* with context */ | |
447 | if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, UPRV_LENGTHOF(iso_2022_jp_inputText2), | |
448 | to_iso_2022_jp2, UPRV_LENGTHOF(to_iso_2022_jp2), "iso-2022-jp", | |
449 | UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs2, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) | |
450 | log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); | |
451 | ||
452 | /*iso_2022_cn*/ | |
453 | if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText), | |
454 | to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn", | |
455 | UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs, NULL, 0 )) | |
456 | log_err("u-> iso-2022-cn with skip did not match.\n"); | |
457 | /*with context*/ | |
458 | if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1, UPRV_LENGTHOF(iso_2022_cn_inputText1), | |
459 | to_iso_2022_cn1, UPRV_LENGTHOF(to_iso_2022_cn1), "iso-2022-cn", | |
460 | UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) | |
461 | log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); | |
462 | ||
463 | /*iso_2022_kr*/ | |
464 | if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText), | |
465 | to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr", | |
466 | UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs, NULL, 0 )) | |
467 | log_err("u-> iso-2022-kr with skip did not match.\n"); | |
468 | /*with context*/ | |
469 | if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1, UPRV_LENGTHOF(iso_2022_kr_inputText1), | |
470 | to_iso_2022_kr1, UPRV_LENGTHOF(to_iso_2022_kr1), "iso-2022-kr", | |
471 | UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) | |
472 | log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); | |
473 | ||
474 | /*hz*/ | |
475 | if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText), | |
476 | to_hz, UPRV_LENGTHOF(to_hz), "HZ", | |
477 | UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs, NULL, 0 )) | |
478 | log_err("u-> HZ with skip did not match.\n"); | |
479 | /*with context*/ | |
480 | if(!testConvertFromUnicodeWithContext(hz_inputText1, UPRV_LENGTHOF(hz_inputText1), | |
481 | to_hz1, UPRV_LENGTHOF(to_hz1), "hz", | |
482 | UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) | |
483 | log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); | |
484 | #endif | |
485 | ||
486 | /*SCSU*/ | |
487 | if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText), | |
488 | to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU", | |
489 | UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 )) | |
490 | log_err("u-> SCSU with skip did not match.\n"); | |
491 | ||
492 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
493 | /*ISCII*/ | |
494 | if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText), | |
495 | to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0", | |
496 | UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs, NULL, 0 )) | |
497 | log_err("u-> iscii with skip did not match.\n"); | |
498 | /*with context*/ | |
499 | if(!testConvertFromUnicodeWithContext(iscii_inputText1, UPRV_LENGTHOF(iscii_inputText1), | |
500 | to_iscii1, UPRV_LENGTHOF(to_iscii1), "ISCII,version=0", | |
501 | UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) | |
502 | log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); | |
503 | #endif | |
504 | } | |
505 | ||
506 | log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n"); | |
507 | { | |
508 | static const uint8_t sampleText[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */ | |
509 | 0xFB, 0xEE, 0x28, /* from source offset 0 */ | |
510 | 0x24, 0x1E, 0x52, | |
511 | 0xB2, | |
512 | 0x20, | |
513 | 0xB3, | |
514 | 0xB1, | |
515 | 0x0D, | |
516 | 0x0A, | |
517 | ||
518 | 0x20, /* from 8 */ | |
519 | 0x00, | |
520 | 0xD0, 0x6C, | |
521 | 0xB6, | |
522 | 0xD8, 0xA5, | |
523 | 0x20, | |
524 | 0x68, | |
525 | 0x59, | |
526 | ||
527 | 0xF9, 0x28, /* from 16 */ | |
528 | 0x6D, | |
529 | 0x20, | |
530 | 0x73, | |
531 | 0xE0, 0x2D, | |
532 | 0xDE, 0x43, | |
533 | 0xD0, 0x33, | |
534 | 0x20, | |
535 | ||
536 | 0xFA, 0x83, /* from 24 */ | |
537 | 0x25, 0x01, | |
538 | 0xFB, 0x16, 0x87, | |
539 | 0x4B, 0x16, | |
540 | 0x20, | |
541 | 0xE6, 0xBD, | |
542 | 0xEB, 0x5B, | |
543 | 0x4B, 0xCC, | |
544 | ||
545 | 0xF9, 0xA2, /* from 32 */ | |
546 | 0xFC, 0x10, 0x3E, | |
547 | 0xFE, 0x16, 0x3A, 0x8C, | |
548 | 0x20, | |
549 | 0xFC, 0x03, 0xAC, | |
550 | ||
551 | 0x01, /* from 41 */ | |
552 | 0xDE, 0x83, | |
553 | 0x20, | |
554 | 0x09 | |
555 | }; | |
556 | static const UChar expected[]={ | |
557 | 0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */ | |
558 | 0x0063, 0x0061, 0x000D, 0x000A, | |
559 | ||
560 | 0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */ | |
561 | 0x0930, 0x0020, 0x0918, 0x0909, | |
562 | ||
563 | 0x3086, 0x304D, 0x0020, 0x3053, /* 16 */ | |
564 | 0x4000, 0x4E00, 0x7777, 0x0020, | |
565 | ||
566 | 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */ | |
567 | 0x0020, 0xD7A3, 0xDC00, 0xD800, | |
568 | ||
569 | 0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */ | |
570 | 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, | |
571 | ||
572 | 0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */ | |
573 | 0x0009 | |
574 | }; | |
575 | static const int32_t offsets[]={ | |
576 | 0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7, | |
577 | 8, 9, 10, 10, 11, 12, 12, 13, 14, 15, | |
578 | 16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23, | |
579 | 24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31, | |
580 | 32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39, | |
581 | 41, 42, 42, 43, 44 | |
582 | }; | |
583 | ||
584 | /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */ | |
585 | if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected), | |
586 | sampleText, UPRV_LENGTHOF(sampleText), | |
587 | "BOCU-1", | |
588 | UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0) | |
589 | ) { | |
590 | log_err("u->BOCU-1 with skip did not match.\n"); | |
591 | } | |
592 | } | |
593 | ||
594 | log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n"); | |
595 | { | |
596 | const uint8_t sampleText[]={ | |
597 | 0x61, /* 'a' */ | |
598 | 0xc4, 0xb5, /* U+0135 */ | |
599 | 0xed, 0x80, 0xa0, /* Hangul U+d020 */ | |
600 | 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */ | |
601 | 0xee, 0x80, 0x80, /* PUA U+e000 */ | |
602 | 0xed, 0xb0, 0x81, /* unpaired trail surrogate U+dc01 */ | |
603 | 0x62, /* 'b' */ | |
604 | 0xed, 0xa0, 0x81, /* unpaired lead surrogate U+d801 */ | |
605 | 0xd0, 0x80 /* U+0400 */ | |
606 | }; | |
607 | UChar expected[]={ | |
608 | 0x0061, | |
609 | 0x0135, | |
610 | 0xd020, | |
611 | 0xd801, 0xdc01, | |
612 | 0xe000, | |
613 | 0xdc01, | |
614 | 0x0062, | |
615 | 0xd801, | |
616 | 0x0400 | |
617 | }; | |
618 | int32_t offsets[]={ | |
619 | 0, | |
620 | 1, 1, | |
621 | 2, 2, 2, | |
622 | 3, 3, 3, 4, 4, 4, | |
623 | 5, 5, 5, | |
624 | 6, 6, 6, | |
625 | 7, | |
626 | 8, 8, 8, | |
627 | 9, 9 | |
628 | }; | |
629 | ||
630 | /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */ | |
631 | ||
632 | /* without offsets */ | |
633 | if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected), | |
634 | sampleText, UPRV_LENGTHOF(sampleText), | |
635 | "CESU-8", | |
636 | UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, 0) | |
637 | ) { | |
638 | log_err("u->CESU-8 with skip did not match.\n"); | |
639 | } | |
640 | ||
641 | /* with offsets */ | |
642 | if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected), | |
643 | sampleText, UPRV_LENGTHOF(sampleText), | |
644 | "CESU-8", | |
645 | UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0) | |
646 | ) { | |
647 | log_err("u->CESU-8 with skip did not match.\n"); | |
648 | } | |
649 | } | |
650 | ||
651 | /*to Unicode*/ | |
652 | log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP \n"); | |
653 | ||
654 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
655 | { | |
656 | ||
657 | static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD700 }; | |
658 | static const UChar IBM_943skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 }; | |
659 | static const UChar IBM_930skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 }; | |
660 | ||
661 | static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5}; | |
662 | static const int32_t fromIBM943Offs [] = { 0, 2, 4}; | |
663 | static const int32_t fromIBM930Offs [] = { 1, 3, 5}; | |
664 | ||
665 | if(!testConvertToUnicode(expskipIBM_949, UPRV_LENGTHOF(expskipIBM_949), | |
666 | IBM_949skiptoUnicode, UPRV_LENGTHOF(IBM_949skiptoUnicode),"ibm-949", | |
667 | UCNV_TO_U_CALLBACK_SKIP, fromIBM949Offs, NULL, 0 )) | |
668 | log_err("ibm-949->u with skip did not match.\n"); | |
669 | if(!testConvertToUnicode(expskipIBM_943, UPRV_LENGTHOF(expskipIBM_943), | |
670 | IBM_943skiptoUnicode, UPRV_LENGTHOF(IBM_943skiptoUnicode),"ibm-943", | |
671 | UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offs, NULL, 0 )) | |
672 | log_err("ibm-943->u with skip did not match.\n"); | |
673 | ||
674 | ||
675 | if(!testConvertToUnicode(expskipIBM_930, UPRV_LENGTHOF(expskipIBM_930), | |
676 | IBM_930skiptoUnicode, UPRV_LENGTHOF(IBM_930skiptoUnicode),"ibm-930", | |
677 | UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0 )) | |
678 | log_err("ibm-930->u with skip did not match.\n"); | |
679 | ||
680 | ||
681 | if(!testConvertToUnicodeWithContext(expskipIBM_930, UPRV_LENGTHOF(expskipIBM_930), | |
682 | IBM_930skiptoUnicode, UPRV_LENGTHOF(IBM_930skiptoUnicode),"ibm-930", | |
683 | UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) | |
684 | log_err("ibm-930->u with skip did not match.\n"); | |
685 | } | |
686 | #endif | |
687 | ||
688 | { | |
689 | static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 }; | |
690 | static const UChar usasciiToU[] = { 0x61, 0x31 }; | |
691 | static const int32_t usasciiToUOffsets[] = { 0, 2 }; | |
692 | ||
693 | static const uint8_t latin1ToUBytes[] = { 0x61, 0xa0, 0x31 }; | |
694 | static const UChar latin1ToU[] = { 0x61, 0xa0, 0x31 }; | |
695 | static const int32_t latin1ToUOffsets[] = { 0, 1, 2 }; | |
696 | ||
697 | /* US-ASCII */ | |
698 | if(!testConvertToUnicode(usasciiToUBytes, UPRV_LENGTHOF(usasciiToUBytes), | |
699 | usasciiToU, UPRV_LENGTHOF(usasciiToU), | |
700 | "US-ASCII", | |
701 | UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets, | |
702 | NULL, 0) | |
703 | ) { | |
704 | log_err("US-ASCII->u with skip did not match.\n"); | |
705 | } | |
706 | ||
707 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
708 | /* SBCS NLTC codepage 367 for US-ASCII */ | |
709 | if(!testConvertToUnicode(usasciiToUBytes, UPRV_LENGTHOF(usasciiToUBytes), | |
710 | usasciiToU, UPRV_LENGTHOF(usasciiToU), | |
711 | "ibm-367", | |
712 | UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets, | |
713 | NULL, 0) | |
714 | ) { | |
715 | log_err("ibm-367->u with skip did not match.\n"); | |
716 | } | |
717 | #endif | |
718 | ||
719 | /* ISO-Latin-1 */ | |
720 | if(!testConvertToUnicode(latin1ToUBytes, UPRV_LENGTHOF(latin1ToUBytes), | |
721 | latin1ToU, UPRV_LENGTHOF(latin1ToU), | |
722 | "LATIN_1", | |
723 | UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets, | |
724 | NULL, 0) | |
725 | ) { | |
726 | log_err("LATIN_1->u with skip did not match.\n"); | |
727 | } | |
728 | ||
729 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
730 | /* windows-1252 */ | |
731 | if(!testConvertToUnicode(latin1ToUBytes, UPRV_LENGTHOF(latin1ToUBytes), | |
732 | latin1ToU, UPRV_LENGTHOF(latin1ToU), | |
733 | "windows-1252", | |
734 | UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets, | |
735 | NULL, 0) | |
736 | ) { | |
737 | log_err("windows-1252->u with skip did not match.\n"); | |
738 | } | |
739 | #endif | |
740 | } | |
741 | ||
742 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
743 | { | |
744 | static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ | |
745 | 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 | |
746 | }; | |
747 | static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0x03b4 | |
748 | }; | |
749 | static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 5}; | |
750 | ||
751 | ||
752 | /* euc-jp*/ | |
753 | static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
754 | 0x8f, 0xda, 0xa1, /*unassigned*/ | |
755 | 0x8e, 0xe0, | |
756 | }; | |
757 | static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2}; | |
758 | static const int32_t from_euc_jpOffs [] ={ 0, 1, 3, 9}; | |
759 | ||
760 | /*EUC_TW*/ | |
761 | static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
762 | 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ | |
763 | 0xe6, 0xca, 0x8a, | |
764 | }; | |
765 | static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, }; | |
766 | static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 11, 13}; | |
767 | /*iso-2022-jp*/ | |
768 | static const uint8_t sampleTxt_iso_2022_jp[]={ | |
769 | 0x41, | |
770 | 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/ | |
771 | 0x1b, 0x28, 0x42, 0x42, | |
772 | ||
773 | }; | |
774 | static const UChar iso_2022_jptoUnicode[]={ 0x41,0x42 }; | |
775 | static const int32_t from_iso_2022_jpOffs [] ={ 0,9 }; | |
776 | ||
777 | /*iso-2022-cn*/ | |
778 | static const uint8_t sampleTxt_iso_2022_cn[]={ | |
779 | 0x0f, 0x41, 0x44, | |
780 | 0x1B, 0x24, 0x29, 0x47, | |
781 | 0x0E, 0x40, 0x6f, /*unassigned*/ | |
782 | 0x0f, 0x42, | |
783 | ||
784 | }; | |
785 | ||
786 | static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x42 }; | |
787 | static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 11 }; | |
788 | ||
789 | /*iso-2022-kr*/ | |
790 | static const uint8_t sampleTxt_iso_2022_kr[]={ | |
791 | 0x1b, 0x24, 0x29, 0x43, | |
792 | 0x41, | |
793 | 0x0E, 0x7f, 0x1E, | |
794 | 0x0e, 0x25, 0x50, | |
795 | 0x0f, 0x51, | |
796 | 0x42, 0x43, | |
797 | ||
798 | }; | |
799 | static const UChar iso_2022_krtoUnicode[]={ 0x41,0x03A0,0x51, 0x42,0x43}; | |
800 | static const int32_t from_iso_2022_krOffs [] ={ 4, 9, 12, 13 , 14 }; | |
801 | ||
802 | /*hz*/ | |
803 | static const uint8_t sampleTxt_hz[]={ | |
804 | 0x41, | |
805 | 0x7e, 0x7b, 0x26, 0x30, | |
806 | 0x7f, 0x1E, /*unassigned*/ | |
807 | 0x26, 0x30, | |
808 | 0x7e, 0x7d, 0x42, | |
809 | 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/ | |
810 | 0x7e, 0x7d, 0x42, | |
811 | }; | |
812 | static const UChar hztoUnicode[]={ | |
813 | 0x41, | |
814 | 0x03a0, | |
815 | 0x03A0, | |
816 | 0x42, | |
817 | 0x42,}; | |
818 | ||
819 | static const int32_t from_hzOffs [] ={0,3,7,11,18, }; | |
820 | ||
821 | /*ISCII*/ | |
822 | static const uint8_t sampleTxt_iscii[]={ | |
823 | 0x41, | |
824 | 0xa1, | |
825 | 0xEB, /*unassigned*/ | |
826 | 0x26, | |
827 | 0x30, | |
828 | 0xa2, | |
829 | 0xEC, /*unassigned*/ | |
830 | 0x42, | |
831 | }; | |
832 | static const UChar isciitoUnicode[]={ | |
833 | 0x41, | |
834 | 0x0901, | |
835 | 0x26, | |
836 | 0x30, | |
837 | 0x0902, | |
838 | 0x42, | |
839 | }; | |
840 | ||
841 | static const int32_t from_isciiOffs [] ={0,1,3,4,5,7 }; | |
842 | ||
843 | /*LMBCS*/ | |
844 | static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50, | |
845 | 0x12, 0x92, 0xa0, /*unassigned*/ | |
846 | 0x12, 0x92, 0xA1, | |
847 | }; | |
848 | static const UChar LMBCSToUnicode[]={ 0x4e2e, 0xe5c4}; | |
849 | static const int32_t fromLMBCS[] = {0, 6}; | |
850 | ||
851 | if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL), | |
852 | EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930", | |
853 | UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) | |
854 | log_err("EBCIDIC_STATEFUL->u with skip did not match.\n"); | |
855 | ||
856 | if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL), | |
857 | EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930", | |
858 | UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) | |
859 | log_err("EBCIDIC_STATEFUL->u with skip did not match.\n"); | |
860 | ||
861 | if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp), | |
862 | euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP", | |
863 | UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0)) | |
864 | log_err("euc-jp->u with skip did not match.\n"); | |
865 | ||
866 | ||
867 | ||
868 | if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw), | |
869 | euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw", | |
870 | UCNV_TO_U_CALLBACK_SKIP, from_euc_twOffs , NULL, 0)) | |
871 | log_err("euc-tw->u with skip did not match.\n"); | |
872 | ||
873 | ||
874 | if(!testConvertToUnicode(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp), | |
875 | iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp", | |
876 | UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_jpOffs , NULL, 0)) | |
877 | log_err("iso-2022-jp->u with skip did not match.\n"); | |
878 | ||
879 | if(!testConvertToUnicode(sampleTxt_iso_2022_cn, UPRV_LENGTHOF(sampleTxt_iso_2022_cn), | |
880 | iso_2022_cntoUnicode, UPRV_LENGTHOF(iso_2022_cntoUnicode),"iso-2022-cn", | |
881 | UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_cnOffs , NULL, 0)) | |
882 | log_err("iso-2022-cn->u with skip did not match.\n"); | |
883 | ||
884 | if(!testConvertToUnicode(sampleTxt_iso_2022_kr, UPRV_LENGTHOF(sampleTxt_iso_2022_kr), | |
885 | iso_2022_krtoUnicode, UPRV_LENGTHOF(iso_2022_krtoUnicode),"iso-2022-kr", | |
886 | UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_krOffs , NULL, 0)) | |
887 | log_err("iso-2022-kr->u with skip did not match.\n"); | |
888 | ||
889 | if(!testConvertToUnicode(sampleTxt_hz, UPRV_LENGTHOF(sampleTxt_hz), | |
890 | hztoUnicode, UPRV_LENGTHOF(hztoUnicode),"HZ", | |
891 | UCNV_TO_U_CALLBACK_SKIP, from_hzOffs , NULL, 0)) | |
892 | log_err("HZ->u with skip did not match.\n"); | |
893 | ||
894 | if(!testConvertToUnicode(sampleTxt_iscii, UPRV_LENGTHOF(sampleTxt_iscii), | |
895 | isciitoUnicode, UPRV_LENGTHOF(isciitoUnicode),"ISCII,version=0", | |
896 | UCNV_TO_U_CALLBACK_SKIP, from_isciiOffs , NULL, 0)) | |
897 | log_err("iscii->u with skip did not match.\n"); | |
898 | ||
899 | if(!testConvertToUnicode(sampleTxtLMBCS, UPRV_LENGTHOF(sampleTxtLMBCS), | |
900 | LMBCSToUnicode, UPRV_LENGTHOF(LMBCSToUnicode),"LMBCS-1", | |
901 | UCNV_TO_U_CALLBACK_SKIP, fromLMBCS , NULL, 0)) | |
902 | log_err("LMBCS->u with skip did not match.\n"); | |
903 | ||
904 | } | |
905 | #endif | |
906 | ||
907 | log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n"); | |
908 | { | |
909 | const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, | |
910 | 0xe0, 0x80, 0x61,}; | |
911 | UChar expected1[] = { 0x0031, 0x4e8c, 0x0061}; | |
912 | int32_t offsets1[] = { 0x0000, 0x0001, 0x0006}; | |
913 | ||
914 | if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1), | |
915 | expected1, UPRV_LENGTHOF(expected1),"utf8", | |
916 | UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 )) | |
917 | log_err("utf8->u with skip did not match.\n"); | |
918 | } | |
919 | ||
920 | log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n"); | |
921 | { | |
922 | const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,}; | |
923 | UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffe,0xfffe}; | |
924 | int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5}; | |
925 | ||
926 | if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1), | |
927 | expected1, UPRV_LENGTHOF(expected1),"SCSU", | |
928 | UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 )) | |
929 | log_err("scsu->u with skip did not match.\n"); | |
930 | } | |
931 | ||
932 | log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n"); | |
933 | { | |
934 | const uint8_t sampleText[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */ | |
935 | 0xFB, 0xEE, 0x28, /* single-code point sequence at offset 0 */ | |
936 | 0x24, 0x1E, 0x52, /* 3 */ | |
937 | 0xB2, /* 6 */ | |
938 | 0x20, /* 7 */ | |
939 | 0x40, 0x07, /* 8 - wrong trail byte */ | |
940 | 0xB3, /* 10 */ | |
941 | 0xB1, /* 11 */ | |
942 | 0xD0, 0x20, /* 12 - wrong trail byte */ | |
943 | 0x0D, /* 14 */ | |
944 | 0x0A, /* 15 */ | |
945 | 0x20, /* 16 */ | |
946 | 0x00, /* 17 */ | |
947 | 0xD0, 0x6C, /* 18 */ | |
948 | 0xB6, /* 20 */ | |
949 | 0xD8, 0xA5, /* 21 */ | |
950 | 0x20, /* 23 */ | |
951 | 0x68, /* 24 */ | |
952 | 0x59, /* 25 */ | |
953 | 0xF9, 0x28, /* 26 */ | |
954 | 0x6D, /* 28 */ | |
955 | 0x20, /* 29 */ | |
956 | 0x73, /* 30 */ | |
957 | 0xE0, 0x2D, /* 31 */ | |
958 | 0xDE, 0x43, /* 33 */ | |
959 | 0xD0, 0x33, /* 35 */ | |
960 | 0x20, /* 37 */ | |
961 | 0xFA, 0x83, /* 38 */ | |
962 | 0x25, 0x01, /* 40 */ | |
963 | 0xFB, 0x16, 0x87, /* 42 */ | |
964 | 0x4B, 0x16, /* 45 */ | |
965 | 0x20, /* 47 */ | |
966 | 0xE6, 0xBD, /* 48 */ | |
967 | 0xEB, 0x5B, /* 50 */ | |
968 | 0x4B, 0xCC, /* 52 */ | |
969 | 0xF9, 0xA2, /* 54 */ | |
970 | 0xFC, 0x10, 0x3E, /* 56 */ | |
971 | 0xFE, 0x16, 0x3A, 0x8C, /* 59 */ | |
972 | 0x20, /* 63 */ | |
973 | 0xFC, 0x03, 0xAC, /* 64 */ | |
974 | 0xFF, /* 67 - FF just resets the state without encoding anything */ | |
975 | 0x01, /* 68 */ | |
976 | 0xDE, 0x83, /* 69 */ | |
977 | 0x20, /* 71 */ | |
978 | 0x09 /* 72 */ | |
979 | }; | |
980 | UChar expected[]={ | |
981 | 0xFEFF, 0x0061, 0x0062, 0x0020, | |
982 | 0x0063, 0x0061, 0x000D, 0x000A, | |
983 | 0x0020, 0x0000, 0x00DF, 0x00E6, | |
984 | 0x0930, 0x0020, 0x0918, 0x0909, | |
985 | 0x3086, 0x304D, 0x0020, 0x3053, | |
986 | 0x4000, 0x4E00, 0x7777, 0x0020, | |
987 | 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, | |
988 | 0x0020, 0xD7A3, 0xDC00, 0xD800, | |
989 | 0xD800, 0xDC00, 0xD845, 0xDDDD, | |
990 | 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, | |
991 | 0xDFFF, 0x0001, 0x0E40, 0x0020, | |
992 | 0x0009 | |
993 | }; | |
994 | int32_t offsets[]={ | |
995 | 0, 3, 6, 7, /* skip 8, */ | |
996 | 10, 11, /* skip 12, */ | |
997 | 14, 15, 16, 17, 18, | |
998 | 20, 21, 23, 24, 25, 26, 28, 29, | |
999 | 30, 31, 33, 35, 37, 38, | |
1000 | 40, 42, 45, 47, 48, | |
1001 | 50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59, | |
1002 | 63, 64, /* trail */ 64, /* reset only 67, */ | |
1003 | 68, 69, | |
1004 | 71, 72 | |
1005 | }; | |
1006 | ||
1007 | if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText), | |
1008 | expected, UPRV_LENGTHOF(expected), "BOCU-1", | |
1009 | UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0) | |
1010 | ) { | |
1011 | log_err("BOCU-1->u with skip did not match.\n"); | |
1012 | } | |
1013 | } | |
1014 | ||
1015 | log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n"); | |
1016 | { | |
1017 | const uint8_t sampleText[]={ | |
1018 | 0x61, /* 0 'a' */ | |
1019 | 0xc0, 0x80, /* 1 non-shortest form */ | |
1020 | 0xc4, 0xb5, /* 3 U+0135 */ | |
1021 | 0xed, 0x80, 0xa0, /* 5 Hangul U+d020 */ | |
1022 | 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8 surrogate pair for U+10401 */ | |
1023 | 0xee, 0x80, 0x80, /* 14 PUA U+e000 */ | |
1024 | 0xed, 0xb0, 0x81, /* 17 unpaired trail surrogate U+dc01 */ | |
1025 | 0xf0, 0x90, 0x80, 0x80, /* 20 illegal 4-byte form for U+10000 */ | |
1026 | 0x62, /* 24 'b' */ | |
1027 | 0xed, 0xa0, 0x81, /* 25 unpaired lead surrogate U+d801 */ | |
1028 | 0xed, 0xa0, /* 28 incomplete sequence */ | |
1029 | 0xd0, 0x80 /* 30 U+0400 */ | |
1030 | }; | |
1031 | UChar expected[]={ | |
1032 | 0x0061, | |
1033 | /* skip */ | |
1034 | 0x0135, | |
1035 | 0xd020, | |
1036 | 0xd801, 0xdc01, | |
1037 | 0xe000, | |
1038 | 0xdc01, | |
1039 | /* skip */ | |
1040 | 0x0062, | |
1041 | 0xd801, | |
1042 | 0x0400 | |
1043 | }; | |
1044 | int32_t offsets[]={ | |
1045 | 0, | |
1046 | /* skip 1, */ | |
1047 | 3, | |
1048 | 5, | |
1049 | 8, 11, | |
1050 | 14, | |
1051 | 17, | |
1052 | /* skip 20, 20, */ | |
1053 | 24, | |
1054 | 25, | |
1055 | /* skip 28 */ | |
1056 | 30 | |
1057 | }; | |
1058 | ||
1059 | /* without offsets */ | |
1060 | if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText), | |
1061 | expected, UPRV_LENGTHOF(expected), "CESU-8", | |
1062 | UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, 0) | |
1063 | ) { | |
1064 | log_err("CESU-8->u with skip did not match.\n"); | |
1065 | } | |
1066 | ||
1067 | /* with offsets */ | |
1068 | if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText), | |
1069 | expected, UPRV_LENGTHOF(expected), "CESU-8", | |
1070 | UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0) | |
1071 | ) { | |
1072 | log_err("CESU-8->u with skip did not match.\n"); | |
1073 | } | |
1074 | } | |
1075 | } | |
1076 | ||
1077 | static void TestStop(int32_t inputsize, int32_t outputsize) | |
1078 | { | |
1079 | static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; | |
1080 | static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; | |
1081 | ||
1082 | static const uint8_t expstopIBM_949[]= { | |
1083 | 0x00, 0xb0, 0xa1, 0xb0, 0xa2}; | |
1084 | ||
1085 | static const uint8_t expstopIBM_943[] = { | |
1086 | 0x9f, 0xaf, 0x9f, 0xb1}; | |
1087 | ||
1088 | static const uint8_t expstopIBM_930[] = { | |
1089 | 0x0e, 0x5d, 0x5f, 0x5d, 0x63}; | |
1090 | ||
1091 | static const UChar IBM_949stoptoUnicode[]= {0x0000, 0xAC00, 0xAC01}; | |
1092 | static const UChar IBM_943stoptoUnicode[]= { 0x6D63, 0x6D64}; | |
1093 | static const UChar IBM_930stoptoUnicode[]= { 0x6D63, 0x6D64}; | |
1094 | ||
1095 | ||
1096 | static const int32_t toIBM949Offsstop [] = { 0, 1, 1, 2, 2}; | |
1097 | static const int32_t toIBM943Offsstop [] = { 0, 0, 1, 1}; | |
1098 | static const int32_t toIBM930Offsstop [] = { 0, 0, 0, 1, 1}; | |
1099 | ||
1100 | static const int32_t fromIBM949Offs [] = { 0, 1, 3}; | |
1101 | static const int32_t fromIBM943Offs [] = { 0, 2}; | |
1102 | static const int32_t fromIBM930Offs [] = { 1, 3}; | |
1103 | ||
1104 | gInBufferSize = inputsize; | |
1105 | gOutBufferSize = outputsize; | |
1106 | ||
1107 | /*From Unicode*/ | |
1108 | ||
1109 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
1110 | if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText), | |
1111 | expstopIBM_949, UPRV_LENGTHOF(expstopIBM_949), "ibm-949", | |
1112 | UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 )) | |
1113 | log_err("u-> ibm-949 with stop did not match.\n"); | |
1114 | if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2), | |
1115 | expstopIBM_943, UPRV_LENGTHOF(expstopIBM_943), "ibm-943", | |
1116 | UCNV_FROM_U_CALLBACK_STOP, toIBM943Offsstop , NULL, 0)) | |
1117 | log_err("u-> ibm-943 with stop did not match.\n"); | |
1118 | if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2), | |
1119 | expstopIBM_930, UPRV_LENGTHOF(expstopIBM_930), "ibm-930", | |
1120 | UCNV_FROM_U_CALLBACK_STOP, toIBM930Offsstop, NULL, 0 )) | |
1121 | log_err("u-> ibm-930 with stop did not match.\n"); | |
1122 | ||
1123 | log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP \n"); | |
1124 | { | |
1125 | static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; | |
1126 | static const uint8_t toIBM943[]= { 0x61,}; | |
1127 | static const int32_t offset[]= {0,} ; | |
1128 | ||
1129 | /*EUC_JP*/ | |
1130 | static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 }; | |
1131 | static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,}; | |
1132 | static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,}; | |
1133 | ||
1134 | /*EUC_TW*/ | |
1135 | static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; | |
1136 | static const uint8_t to_euc_tw[]={ | |
1137 | 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,}; | |
1138 | static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,}; | |
1139 | ||
1140 | /*ISO-2022-JP*/ | |
1141 | static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9, 0x0042, }; | |
1142 | static const uint8_t to_iso_2022_jp[]={ | |
1143 | 0x41, | |
1144 | ||
1145 | }; | |
1146 | static const int32_t from_iso_2022_jpOffs [] ={0,}; | |
1147 | ||
1148 | /*ISO-2022-cn*/ | |
1149 | static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, }; | |
1150 | static const uint8_t to_iso_2022_cn[]={ | |
1151 | 0x41, | |
1152 | ||
1153 | }; | |
1154 | static const int32_t from_iso_2022_cnOffs [] ={ | |
1155 | 0,0, | |
1156 | 2,2, | |
1157 | }; | |
1158 | ||
1159 | /*ISO-2022-kr*/ | |
1160 | static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, }; | |
1161 | static const uint8_t to_iso_2022_kr[]={ | |
1162 | 0x1b, 0x24, 0x29, 0x43, | |
1163 | 0x41, | |
1164 | 0x0e, 0x25, 0x50, | |
1165 | }; | |
1166 | static const int32_t from_iso_2022_krOffs [] ={ | |
1167 | -1,-1,-1,-1, | |
1168 | 0, | |
1169 | 1,1,1, | |
1170 | }; | |
1171 | ||
1172 | /* HZ encoding */ | |
1173 | static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, }; | |
1174 | ||
1175 | static const uint8_t to_hz[]={ | |
1176 | 0x7e, 0x7d, 0x41, | |
1177 | 0x7e, 0x7b, 0x26, 0x30, | |
1178 | ||
1179 | }; | |
1180 | static const int32_t from_hzOffs [] ={ | |
1181 | 0, 0,0, | |
1182 | 1,1,1,1, | |
1183 | }; | |
1184 | ||
1185 | /*ISCII*/ | |
1186 | static const UChar iscii_inputText[]={ 0x0041, 0x3712, 0x0042, }; | |
1187 | static const uint8_t to_iscii[]={ | |
1188 | 0x41, | |
1189 | }; | |
1190 | static const int32_t from_isciiOffs [] ={ | |
1191 | 0, | |
1192 | }; | |
1193 | ||
1194 | if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest), | |
1195 | toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943", | |
1196 | UCNV_FROM_U_CALLBACK_STOP, offset, NULL, 0 )) | |
1197 | log_err("u-> ibm-943 with stop did not match.\n"); | |
1198 | ||
1199 | if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText), | |
1200 | to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP", | |
1201 | UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 )) | |
1202 | log_err("u-> euc-jp with stop did not match.\n"); | |
1203 | ||
1204 | if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText), | |
1205 | to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw", | |
1206 | UCNV_FROM_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 )) | |
1207 | log_err("u-> euc-tw with stop did not match.\n"); | |
1208 | ||
1209 | if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText), | |
1210 | to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp", | |
1211 | UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 )) | |
1212 | log_err("u-> iso-2022-jp with stop did not match.\n"); | |
1213 | ||
1214 | if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText), | |
1215 | to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp", | |
1216 | UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 )) | |
1217 | log_err("u-> iso-2022-jp with stop did not match.\n"); | |
1218 | ||
1219 | if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText), | |
1220 | to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn", | |
1221 | UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_cnOffs, NULL, 0 )) | |
1222 | log_err("u-> iso-2022-cn with stop did not match.\n"); | |
1223 | ||
1224 | if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText), | |
1225 | to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr", | |
1226 | UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_krOffs, NULL, 0 )) | |
1227 | log_err("u-> iso-2022-kr with stop did not match.\n"); | |
1228 | ||
1229 | if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText), | |
1230 | to_hz, UPRV_LENGTHOF(to_hz), "HZ", | |
1231 | UCNV_FROM_U_CALLBACK_STOP, from_hzOffs, NULL, 0 )) | |
1232 | log_err("u-> HZ with stop did not match.\n");\ | |
1233 | ||
1234 | if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText), | |
1235 | to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0", | |
1236 | UCNV_FROM_U_CALLBACK_STOP, from_isciiOffs, NULL, 0 )) | |
1237 | log_err("u-> iscii with stop did not match.\n"); | |
1238 | ||
1239 | ||
1240 | } | |
1241 | #endif | |
1242 | ||
1243 | log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n"); | |
1244 | { | |
1245 | static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; | |
1246 | ||
1247 | static const uint8_t to_SCSU[]={ | |
1248 | 0x41, | |
1249 | ||
1250 | }; | |
1251 | int32_t from_SCSUOffs [] ={ | |
1252 | 0, | |
1253 | ||
1254 | }; | |
1255 | if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText), | |
1256 | to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU", | |
1257 | UCNV_FROM_U_CALLBACK_STOP, from_SCSUOffs, NULL, 0 )) | |
1258 | log_err("u-> SCSU with skip did not match.\n"); | |
1259 | ||
1260 | } | |
1261 | ||
1262 | /*to Unicode*/ | |
1263 | ||
1264 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
1265 | if(!testConvertToUnicode(expstopIBM_949, UPRV_LENGTHOF(expstopIBM_949), | |
1266 | IBM_949stoptoUnicode, UPRV_LENGTHOF(IBM_949stoptoUnicode),"ibm-949", | |
1267 | UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 )) | |
1268 | log_err("ibm-949->u with stop did not match.\n"); | |
1269 | if(!testConvertToUnicode(expstopIBM_943, UPRV_LENGTHOF(expstopIBM_943), | |
1270 | IBM_943stoptoUnicode, UPRV_LENGTHOF(IBM_943stoptoUnicode),"ibm-943", | |
1271 | UCNV_TO_U_CALLBACK_STOP, fromIBM943Offs, NULL, 0 )) | |
1272 | log_err("ibm-943->u with stop did not match.\n"); | |
1273 | if(!testConvertToUnicode(expstopIBM_930, UPRV_LENGTHOF(expstopIBM_930), | |
1274 | IBM_930stoptoUnicode, UPRV_LENGTHOF(IBM_930stoptoUnicode),"ibm-930", | |
1275 | UCNV_TO_U_CALLBACK_STOP, fromIBM930Offs, NULL, 0 )) | |
1276 | log_err("ibm-930->u with stop did not match.\n"); | |
1277 | ||
1278 | log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n"); | |
1279 | { | |
1280 | ||
1281 | static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ | |
1282 | 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 | |
1283 | }; | |
1284 | static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63 }; | |
1285 | static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1}; | |
1286 | ||
1287 | ||
1288 | /*EUC-JP*/ | |
1289 | static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
1290 | 0x8f, 0xda, 0xa1, /*unassigned*/ | |
1291 | 0x8e, 0xe0, | |
1292 | }; | |
1293 | static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec}; | |
1294 | static const int32_t from_euc_jpOffs [] ={ 0, 1, 3}; | |
1295 | ||
1296 | /*EUC_TW*/ | |
1297 | static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
1298 | 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ | |
1299 | 0xe6, 0xca, 0x8a, | |
1300 | }; | |
1301 | UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2}; | |
1302 | int32_t from_euc_twOffs [] ={ 0, 1, 3}; | |
1303 | ||
1304 | ||
1305 | ||
1306 | if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL), | |
1307 | EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930", | |
1308 | UCNV_TO_U_CALLBACK_STOP, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) | |
1309 | log_err("EBCIDIC_STATEFUL->u with stop did not match.\n"); | |
1310 | ||
1311 | if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp), | |
1312 | euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP", | |
1313 | UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0)) | |
1314 | log_err("euc-jp->u with stop did not match.\n"); | |
1315 | ||
1316 | if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw), | |
1317 | euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw", | |
1318 | UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 )) | |
1319 | log_err("euc-tw->u with stop did not match.\n"); | |
1320 | } | |
1321 | #endif | |
1322 | ||
1323 | log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n"); | |
1324 | { | |
1325 | static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, | |
1326 | 0xe0, 0x80, 0x61,}; | |
1327 | static const UChar expected1[] = { 0x0031, 0x4e8c,}; | |
1328 | static const int32_t offsets1[] = { 0x0000, 0x0001}; | |
1329 | ||
1330 | if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1), | |
1331 | expected1, UPRV_LENGTHOF(expected1),"utf8", | |
1332 | UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 )) | |
1333 | log_err("utf8->u with stop did not match.\n"); | |
1334 | } | |
1335 | log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n"); | |
1336 | { | |
1337 | static const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04}; | |
1338 | static const UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061}; | |
1339 | static const int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003}; | |
1340 | ||
1341 | if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1), | |
1342 | expected1, UPRV_LENGTHOF(expected1),"SCSU", | |
1343 | UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 )) | |
1344 | log_err("scsu->u with stop did not match.\n"); | |
1345 | } | |
1346 | ||
1347 | } | |
1348 | ||
1349 | static void TestSub(int32_t inputsize, int32_t outputsize) | |
1350 | { | |
1351 | static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; | |
1352 | static const UChar sampleText2[]= { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; | |
1353 | ||
1354 | static const uint8_t expsubIBM_949[] = | |
1355 | { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 }; | |
1356 | ||
1357 | static const uint8_t expsubIBM_943[] = { | |
1358 | 0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 }; | |
1359 | ||
1360 | static const uint8_t expsubIBM_930[] = { | |
1361 | 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f }; | |
1362 | ||
1363 | static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 }; | |
1364 | static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 }; | |
1365 | static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 }; | |
1366 | ||
1367 | static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 }; | |
1368 | static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 }; | |
1369 | static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 }; | |
1370 | ||
1371 | static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5, 7 }; | |
1372 | static const int32_t fromIBM943Offs [] = { 0, 2, 4, 6 }; | |
1373 | static const int32_t fromIBM930Offs [] = { 1, 3, 5, 7 }; | |
1374 | ||
1375 | gInBufferSize = inputsize; | |
1376 | gOutBufferSize = outputsize; | |
1377 | ||
1378 | /*from unicode*/ | |
1379 | ||
1380 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
1381 | if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText), | |
1382 | expsubIBM_949, UPRV_LENGTHOF(expsubIBM_949), "ibm-949", | |
1383 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 )) | |
1384 | log_err("u-> ibm-949 with subst did not match.\n"); | |
1385 | if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2), | |
1386 | expsubIBM_943, UPRV_LENGTHOF(expsubIBM_943), "ibm-943", | |
1387 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM943Offssub , NULL, 0)) | |
1388 | log_err("u-> ibm-943 with subst did not match.\n"); | |
1389 | if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2), | |
1390 | expsubIBM_930, UPRV_LENGTHOF(expsubIBM_930), "ibm-930", | |
1391 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM930Offssub, NULL, 0 )) | |
1392 | log_err("u-> ibm-930 with subst did not match.\n"); | |
1393 | ||
1394 | log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n"); | |
1395 | { | |
1396 | static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; | |
1397 | static const uint8_t toIBM943[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 }; | |
1398 | static const int32_t offset[]= {0, 1, 1, 3, 3, 4}; | |
1399 | ||
1400 | ||
1401 | /* EUC_JP*/ | |
1402 | static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 }; | |
1403 | static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
1404 | 0xf4, 0xfe, 0xf4, 0xfe, | |
1405 | 0x61, 0x8e, 0xe0, | |
1406 | }; | |
1407 | static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7}; | |
1408 | ||
1409 | /*EUC_TW*/ | |
1410 | static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; | |
1411 | static const uint8_t to_euc_tw[]={ | |
1412 | 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
1413 | 0xfd, 0xfe, 0xfd, 0xfe, | |
1414 | 0x61, 0xe6, 0xca, 0x8a, | |
1415 | }; | |
1416 | ||
1417 | static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,}; | |
1418 | ||
1419 | if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest), | |
1420 | toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943", | |
1421 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset, NULL, 0 )) | |
1422 | log_err("u-> ibm-943 with substitute did not match.\n"); | |
1423 | ||
1424 | if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText), | |
1425 | to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP", | |
1426 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 )) | |
1427 | log_err("u-> euc-jp with substitute did not match.\n"); | |
1428 | ||
1429 | if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText), | |
1430 | to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw", | |
1431 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 )) | |
1432 | log_err("u-> euc-tw with substitute did not match.\n"); | |
1433 | } | |
1434 | #endif | |
1435 | ||
1436 | log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n"); | |
1437 | { | |
1438 | UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; | |
1439 | ||
1440 | const uint8_t to_SCSU[]={ | |
1441 | 0x41, | |
1442 | 0x0e, 0xff,0xfd, | |
1443 | 0x42 | |
1444 | ||
1445 | ||
1446 | }; | |
1447 | int32_t from_SCSUOffs [] ={ | |
1448 | 0, | |
1449 | 1,1,1, | |
1450 | 2, | |
1451 | ||
1452 | }; | |
1453 | const uint8_t to_SCSU_1[]={ | |
1454 | 0x41, | |
1455 | ||
1456 | }; | |
1457 | int32_t from_SCSUOffs_1 [] ={ | |
1458 | 0, | |
1459 | ||
1460 | }; | |
1461 | if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText), | |
1462 | to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU", | |
1463 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs, NULL, 0 )) | |
1464 | log_err("u-> SCSU with substitute did not match.\n"); | |
1465 | ||
1466 | if(!testConvertFromUnicodeWithContext(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText), | |
1467 | to_SCSU_1, UPRV_LENGTHOF(to_SCSU_1), "SCSU", | |
1468 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs_1, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) | |
1469 | log_err("u-> SCSU with substitute did not match.\n"); | |
1470 | } | |
1471 | ||
1472 | log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n"); | |
1473 | { | |
1474 | static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,}; | |
1475 | static const uint8_t expectedUTF8[]= { 0xe2, 0x82, 0xac, | |
1476 | 0xf0, 0x90, 0x90, 0x81, | |
1477 | 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, | |
1478 | 0xef, 0xbf, 0xbf, 0x61, | |
1479 | ||
1480 | }; | |
1481 | static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 }; | |
1482 | if(!testConvertFromUnicode(testinput, UPRV_LENGTHOF(testinput), | |
1483 | expectedUTF8, UPRV_LENGTHOF(expectedUTF8), "utf8", | |
1484 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0 )) { | |
1485 | log_err("u-> utf8 with substitute did not match.\n"); | |
1486 | } | |
1487 | } | |
1488 | ||
1489 | log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n"); | |
1490 | { | |
1491 | static const UChar in[]={ 0x0041, 0xfeff }; | |
1492 | ||
1493 | static const uint8_t out[]={ | |
1494 | #if U_IS_BIG_ENDIAN | |
1495 | 0xfe, 0xff, | |
1496 | 0x00, 0x41, | |
1497 | 0xfe, 0xff | |
1498 | #else | |
1499 | 0xff, 0xfe, | |
1500 | 0x41, 0x00, | |
1501 | 0xff, 0xfe | |
1502 | #endif | |
1503 | }; | |
1504 | static const int32_t offsets[]={ | |
1505 | -1, -1, 0, 0, 1, 1 | |
1506 | }; | |
1507 | ||
1508 | if(!testConvertFromUnicode(in, UPRV_LENGTHOF(in), | |
1509 | out, UPRV_LENGTHOF(out), "UTF-16", | |
1510 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0) | |
1511 | ) { | |
1512 | log_err("u->UTF-16 with substitute did not match.\n"); | |
1513 | } | |
1514 | } | |
1515 | ||
1516 | log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n"); | |
1517 | { | |
1518 | static const UChar in[]={ 0x0041, 0xfeff }; | |
1519 | ||
1520 | static const uint8_t out[]={ | |
1521 | #if U_IS_BIG_ENDIAN | |
1522 | 0x00, 0x00, 0xfe, 0xff, | |
1523 | 0x00, 0x00, 0x00, 0x41, | |
1524 | 0x00, 0x00, 0xfe, 0xff | |
1525 | #else | |
1526 | 0xff, 0xfe, 0x00, 0x00, | |
1527 | 0x41, 0x00, 0x00, 0x00, | |
1528 | 0xff, 0xfe, 0x00, 0x00 | |
1529 | #endif | |
1530 | }; | |
1531 | static const int32_t offsets[]={ | |
1532 | -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1 | |
1533 | }; | |
1534 | ||
1535 | if(!testConvertFromUnicode(in, UPRV_LENGTHOF(in), | |
1536 | out, UPRV_LENGTHOF(out), "UTF-32", | |
1537 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0) | |
1538 | ) { | |
1539 | log_err("u->UTF-32 with substitute did not match.\n"); | |
1540 | } | |
1541 | } | |
1542 | ||
1543 | /*to unicode*/ | |
1544 | ||
1545 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
1546 | if(!testConvertToUnicode(expsubIBM_949, UPRV_LENGTHOF(expsubIBM_949), | |
1547 | IBM_949subtoUnicode, UPRV_LENGTHOF(IBM_949subtoUnicode),"ibm-949", | |
1548 | UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 )) | |
1549 | log_err("ibm-949->u with substitute did not match.\n"); | |
1550 | if(!testConvertToUnicode(expsubIBM_943, UPRV_LENGTHOF(expsubIBM_943), | |
1551 | IBM_943subtoUnicode, UPRV_LENGTHOF(IBM_943subtoUnicode),"ibm-943", | |
1552 | UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offs, NULL, 0 )) | |
1553 | log_err("ibm-943->u with substitute did not match.\n"); | |
1554 | if(!testConvertToUnicode(expsubIBM_930, UPRV_LENGTHOF(expsubIBM_930), | |
1555 | IBM_930subtoUnicode, UPRV_LENGTHOF(IBM_930subtoUnicode),"ibm-930", | |
1556 | UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM930Offs, NULL, 0 )) | |
1557 | log_err("ibm-930->u with substitute did not match.\n"); | |
1558 | ||
1559 | log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); | |
1560 | { | |
1561 | ||
1562 | const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ | |
1563 | 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 | |
1564 | }; | |
1565 | UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0xfffd, 0x03b4 | |
1566 | }; | |
1567 | int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 3, 5}; | |
1568 | ||
1569 | ||
1570 | /* EUC_JP*/ | |
1571 | const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
1572 | 0x8f, 0xda, 0xa1, /*unassigned*/ | |
1573 | 0x8e, 0xe0, 0x8a | |
1574 | }; | |
1575 | UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a }; | |
1576 | int32_t from_euc_jpOffs [] ={ 0, 1, 3, 6, 9, 11 }; | |
1577 | ||
1578 | /*EUC_TW*/ | |
1579 | const uint8_t sampleTxt_euc_tw[]={ | |
1580 | 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
1581 | 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ | |
1582 | 0xe6, 0xca, 0x8a, | |
1583 | }; | |
1584 | UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, }; | |
1585 | int32_t from_euc_twOffs [] ={ 0, 1, 3, 7, 11, 13}; | |
1586 | ||
1587 | ||
1588 | if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL), | |
1589 | EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930", | |
1590 | UCNV_TO_U_CALLBACK_SUBSTITUTE, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) | |
1591 | log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n"); | |
1592 | ||
1593 | ||
1594 | if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp), | |
1595 | euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP", | |
1596 | UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 )) | |
1597 | log_err("euc-jp->u with substitute did not match.\n"); | |
1598 | ||
1599 | ||
1600 | if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw), | |
1601 | euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw", | |
1602 | UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 )) | |
1603 | log_err("euc-tw->u with substitute did not match.\n"); | |
1604 | ||
1605 | ||
1606 | if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp), | |
1607 | euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP", | |
1608 | UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGAL_CHAR_FOUND)) | |
1609 | log_err("euc-jp->u with substitute did not match.\n"); | |
1610 | } | |
1611 | #endif | |
1612 | ||
1613 | log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); | |
1614 | { | |
1615 | const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, | |
1616 | 0xe0, 0x80, 0x61,}; | |
1617 | UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0xfffd, 0x0061}; | |
1618 | int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0005, 0x0006}; | |
1619 | ||
1620 | if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1), | |
1621 | expected1, UPRV_LENGTHOF(expected1),"utf8", | |
1622 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 )) | |
1623 | log_err("utf8->u with substitute did not match.\n"); | |
1624 | } | |
1625 | log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); | |
1626 | { | |
1627 | const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,}; | |
1628 | UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffd,0xfffd}; | |
1629 | int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5}; | |
1630 | ||
1631 | if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1), | |
1632 | expected1, UPRV_LENGTHOF(expected1),"SCSU", | |
1633 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 )) | |
1634 | log_err("scsu->u with stop did not match.\n"); | |
1635 | } | |
1636 | ||
1637 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
1638 | log_verbose("Testing ibm-930 subchar/subchar1\n"); | |
1639 | { | |
1640 | static const UChar u1[]={ 0x6d63, 0x6d64, 0x6d65, 0x6d66, 0xdf }; | |
1641 | static const uint8_t s1[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f }; | |
1642 | static const int32_t offsets1[]={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4 }; | |
1643 | ||
1644 | static const UChar u2[]={ 0x6d63, 0x6d64, 0xfffd, 0x6d66, 0x1a }; | |
1645 | static const uint8_t s2[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 }; | |
1646 | static const int32_t offsets2[]={ 1, 3, 5, 7, 10 }; | |
1647 | ||
1648 | if(!testConvertFromUnicode(u1, UPRV_LENGTHOF(u1), s1, UPRV_LENGTHOF(s1), "ibm-930", | |
1649 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0) | |
1650 | ) { | |
1651 | log_err("u->ibm-930 subchar/subchar1 did not match.\n"); | |
1652 | } | |
1653 | ||
1654 | if(!testConvertToUnicode(s2, UPRV_LENGTHOF(s2), u2, UPRV_LENGTHOF(u2), "ibm-930", | |
1655 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) | |
1656 | ) { | |
1657 | log_err("ibm-930->u subchar/subchar1 did not match.\n"); | |
1658 | } | |
1659 | } | |
1660 | ||
1661 | log_verbose("Testing GB 18030 with substitute callbacks\n"); | |
1662 | { | |
1663 | static const UChar u2[]={ | |
1664 | 0x24, 0x7f, 0x80, 0x1f9, 0x20ac, 0x4e00, 0x9fa6, 0xffff, 0xd800, 0xdc00, 0xfffd, 0xdbff, 0xdfff }; | |
1665 | static const uint8_t gb2[]={ | |
1666 | 0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 }; | |
1667 | static const int32_t offsets2[]={ | |
1668 | 0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 }; | |
1669 | ||
1670 | if(!testConvertToUnicode(gb2, UPRV_LENGTHOF(gb2), u2, UPRV_LENGTHOF(u2), "gb18030", | |
1671 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) | |
1672 | ) { | |
1673 | log_err("gb18030->u with substitute did not match.\n"); | |
1674 | } | |
1675 | } | |
1676 | #endif | |
1677 | ||
1678 | log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n"); | |
1679 | { | |
1680 | static const uint8_t utf7[]={ | |
1681 | /* a~ a+AB~ a+AB\x0c a+AB- a+AB. a+. */ | |
1682 | 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x2e | |
1683 | }; | |
1684 | static const UChar unicode[]={ | |
1685 | 0x61, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x2e, 0x61, 0xfffd, 0x2e | |
1686 | }; | |
1687 | static const int32_t offsets[]={ | |
1688 | 0, 1, 2, 4, 6, 7, 9, 11, 12, 14, 17, 19, 21, 22, 23, 24 | |
1689 | }; | |
1690 | ||
1691 | if(!testConvertToUnicode(utf7, UPRV_LENGTHOF(utf7), unicode, UPRV_LENGTHOF(unicode), "UTF-7", | |
1692 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0) | |
1693 | ) { | |
1694 | log_err("UTF-7->u with substitute did not match.\n"); | |
1695 | } | |
1696 | } | |
1697 | ||
1698 | log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n"); | |
1699 | { | |
1700 | static const uint8_t | |
1701 | in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff }, | |
1702 | in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff }, | |
1703 | in3[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff }; | |
1704 | ||
1705 | static const UChar | |
1706 | out1[]={ 0x4e00, 0xfeff }, | |
1707 | out2[]={ 0x004e, 0xfffe }, | |
1708 | out3[]={ 0xfefd, 0x4e00, 0xfeff }; | |
1709 | ||
1710 | static const int32_t | |
1711 | offsets1[]={ 2, 4 }, | |
1712 | offsets2[]={ 2, 4 }, | |
1713 | offsets3[]={ 0, 2, 4 }; | |
1714 | ||
1715 | if(!testConvertToUnicode(in1, UPRV_LENGTHOF(in1), out1, UPRV_LENGTHOF(out1), "UTF-16", | |
1716 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0) | |
1717 | ) { | |
1718 | log_err("UTF-16 (BE BOM)->u with substitute did not match.\n"); | |
1719 | } | |
1720 | ||
1721 | if(!testConvertToUnicode(in2, UPRV_LENGTHOF(in2), out2, UPRV_LENGTHOF(out2), "UTF-16", | |
1722 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) | |
1723 | ) { | |
1724 | log_err("UTF-16 (LE BOM)->u with substitute did not match.\n"); | |
1725 | } | |
1726 | ||
1727 | if(!testConvertToUnicode(in3, UPRV_LENGTHOF(in3), out3, UPRV_LENGTHOF(out3), "UTF-16", | |
1728 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0) | |
1729 | ) { | |
1730 | log_err("UTF-16 (no BOM)->u with substitute did not match.\n"); | |
1731 | } | |
1732 | } | |
1733 | ||
1734 | log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n"); | |
1735 | { | |
1736 | static const uint8_t | |
1737 | in1[]={ 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff }, | |
1738 | in2[]={ 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 }, | |
1739 | in3[]={ 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 }, | |
1740 | in4[]={ 0x00, 0x01, 0x02, 0x03, 0x00, 0x11, 0x12, 0x00, 0x00, 0x00, 0x4e, 0x00 }; | |
1741 | ||
1742 | static const UChar | |
1743 | out1[]={ U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfeff }, | |
1744 | out2[]={ U16_LEAD(0x0f1000), U16_TRAIL(0x0f1000), 0xfffe }, | |
1745 | out3[]={ 0xfefe, U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfffd, 0xfffd }, | |
1746 | out4[]={ U16_LEAD(0x10203), U16_TRAIL(0x10203), 0xfffd, 0x4e00 }; | |
1747 | ||
1748 | static const int32_t | |
1749 | offsets1[]={ 4, 4, 8 }, | |
1750 | offsets2[]={ 4, 4, 8 }, | |
1751 | offsets3[]={ 0, 4, 4, 8, 12 }, | |
1752 | offsets4[]={ 0, 0, 4, 8 }; | |
1753 | ||
1754 | if(!testConvertToUnicode(in1, UPRV_LENGTHOF(in1), out1, UPRV_LENGTHOF(out1), "UTF-32", | |
1755 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0) | |
1756 | ) { | |
1757 | log_err("UTF-32 (BE BOM)->u with substitute did not match.\n"); | |
1758 | } | |
1759 | ||
1760 | if(!testConvertToUnicode(in2, UPRV_LENGTHOF(in2), out2, UPRV_LENGTHOF(out2), "UTF-32", | |
1761 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) | |
1762 | ) { | |
1763 | log_err("UTF-32 (LE BOM)->u with substitute did not match.\n"); | |
1764 | } | |
1765 | ||
1766 | if(!testConvertToUnicode(in3, UPRV_LENGTHOF(in3), out3, UPRV_LENGTHOF(out3), "UTF-32", | |
1767 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0) | |
1768 | ) { | |
1769 | log_err("UTF-32 (no BOM)->u with substitute did not match.\n"); | |
1770 | } | |
1771 | ||
1772 | if(!testConvertToUnicode(in4, UPRV_LENGTHOF(in4), out4, UPRV_LENGTHOF(out4), "UTF-32", | |
1773 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets4, NULL, 0) | |
1774 | ) { | |
1775 | log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n"); | |
1776 | } | |
1777 | } | |
1778 | } | |
1779 | ||
1780 | static void TestSubWithValue(int32_t inputsize, int32_t outputsize) | |
1781 | { | |
1782 | UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; | |
1783 | UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; | |
1784 | ||
1785 | const uint8_t expsubwvalIBM_949[]= { | |
1786 | 0x00, 0xb0, 0xa1, 0xb0, 0xa2, | |
1787 | 0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 }; | |
1788 | ||
1789 | const uint8_t expsubwvalIBM_943[]= { | |
1790 | 0x9f, 0xaf, 0x9f, 0xb1, | |
1791 | 0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 }; | |
1792 | ||
1793 | const uint8_t expsubwvalIBM_930[] = { | |
1794 | 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f }; | |
1795 | ||
1796 | int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 }; | |
1797 | int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 }; | |
1798 | int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */ | |
1799 | ||
1800 | gInBufferSize = inputsize; | |
1801 | gOutBufferSize = outputsize; | |
1802 | ||
1803 | /*from Unicode*/ | |
1804 | ||
1805 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
1806 | if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText), | |
1807 | expsubwvalIBM_949, UPRV_LENGTHOF(expsubwvalIBM_949), "ibm-949", | |
1808 | UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 )) | |
1809 | log_err("u-> ibm-949 with subst with value did not match.\n"); | |
1810 | ||
1811 | if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2), | |
1812 | expsubwvalIBM_943, UPRV_LENGTHOF(expsubwvalIBM_943), "ibm-943", | |
1813 | UCNV_FROM_U_CALLBACK_ESCAPE, toIBM943Offs, NULL, 0 )) | |
1814 | log_err("u-> ibm-943 with sub with value did not match.\n"); | |
1815 | ||
1816 | if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2), | |
1817 | expsubwvalIBM_930, UPRV_LENGTHOF(expsubwvalIBM_930), "ibm-930", | |
1818 | UCNV_FROM_U_CALLBACK_ESCAPE, toIBM930Offs, NULL, 0 )) | |
1819 | log_err("u-> ibm-930 with subst with value did not match.\n"); | |
1820 | ||
1821 | ||
1822 | log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n"); | |
1823 | { | |
1824 | static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; | |
1825 | static const uint8_t toIBM943[]= { 0x61, | |
1826 | 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
1827 | 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, | |
1828 | 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
1829 | 0x61 }; | |
1830 | static const int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4}; | |
1831 | ||
1832 | ||
1833 | /* EUC_JP*/ | |
1834 | static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, }; | |
1835 | static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
1836 | 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
1837 | 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, | |
1838 | 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
1839 | 0x61, 0x8e, 0xe0, | |
1840 | }; | |
1841 | static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, | |
1842 | 3, 3, 3, 3, 3, 3, | |
1843 | 3, 3, 3, 3, 3, 3, | |
1844 | 5, 5, 5, 5, 5, 5, | |
1845 | 6, 7, 7, | |
1846 | }; | |
1847 | ||
1848 | /*EUC_TW*/ | |
1849 | static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; | |
1850 | static const uint8_t to_euc_tw[]={ | |
1851 | 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
1852 | 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
1853 | 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, | |
1854 | 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
1855 | 0x61, 0xe6, 0xca, 0x8a, | |
1856 | }; | |
1857 | static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, | |
1858 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5, | |
1859 | 6, 7, 7, 8, | |
1860 | }; | |
1861 | /*ISO-2022-JP*/ | |
1862 | static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ; | |
1863 | static const uint8_t to_iso_2022_jp1[]={ | |
1864 | 0x1b, 0x24, 0x42, 0x21, 0x21, | |
1865 | 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39, | |
1866 | 0x1b, 0x24, 0x42, 0x21, 0x22, | |
1867 | 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39, | |
1868 | 0x42, | |
1869 | }; | |
1870 | ||
1871 | static const int32_t from_iso_2022_jpOffs1 [] ={ | |
1872 | 0,0,0,0,0, | |
1873 | 1,1,1,1,1,1,1,1,1, | |
1874 | 2,2,2,2,2, | |
1875 | 3,3,3,3,3,3,3,3,3, | |
1876 | 4, | |
1877 | }; | |
1878 | /* surrogate pair*/ | |
1879 | static const UChar iso_2022_jp_inputText2[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ; | |
1880 | static const uint8_t to_iso_2022_jp2[]={ | |
1881 | 0x1b, 0x24, 0x42, 0x21, 0x21, | |
1882 | 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
1883 | 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
1884 | 0x1b, 0x24, 0x42, 0x21, 0x22, | |
1885 | 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
1886 | 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
1887 | 0x42, | |
1888 | }; | |
1889 | static const int32_t from_iso_2022_jpOffs2 [] ={ | |
1890 | 0,0,0,0,0, | |
1891 | 1,1,1,1,1,1,1,1,1, | |
1892 | 1,1,1,1,1,1, | |
1893 | 3,3,3,3,3, | |
1894 | 4,4,4,4,4,4,4,4,4, | |
1895 | 4,4,4,4,4,4, | |
1896 | 6, | |
1897 | }; | |
1898 | ||
1899 | /*ISO-2022-cn*/ | |
1900 | static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, }; | |
1901 | static const uint8_t to_iso_2022_cn[]={ | |
1902 | 0x41, | |
1903 | 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, | |
1904 | 0x42, | |
1905 | }; | |
1906 | static const int32_t from_iso_2022_cnOffs [] ={ | |
1907 | 0, | |
1908 | 1,1,1,1,1,1, | |
1909 | 2, | |
1910 | }; | |
1911 | ||
1912 | static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042}; | |
1913 | ||
1914 | static const uint8_t to_iso_2022_cn4[]={ | |
1915 | 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, | |
1916 | 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
1917 | 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
1918 | 0x0e, 0x21, 0x22, | |
1919 | 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
1920 | 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
1921 | 0x42, | |
1922 | }; | |
1923 | static const int32_t from_iso_2022_cnOffs4 [] ={ | |
1924 | 0,0,0,0,0,0,0, | |
1925 | 1,1,1,1,1,1,1, | |
1926 | 1,1,1,1,1,1, | |
1927 | 3,3,3, | |
1928 | 4,4,4,4,4,4,4, | |
1929 | 4,4,4,4,4,4, | |
1930 | 6 | |
1931 | ||
1932 | }; | |
1933 | ||
1934 | /*ISO-2022-kr*/ | |
1935 | static const UChar iso_2022_kr_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 }; | |
1936 | static const uint8_t to_iso_2022_kr2[]={ | |
1937 | 0x1b, 0x24, 0x29, 0x43, | |
1938 | 0x41, | |
1939 | 0x0e, 0x25, 0x50, | |
1940 | 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
1941 | 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
1942 | 0x0e, 0x25, 0x50, | |
1943 | 0x0f, 0x42, | |
1944 | 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
1945 | 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
1946 | 0x43 | |
1947 | }; | |
1948 | static const int32_t from_iso_2022_krOffs2 [] ={ | |
1949 | -1,-1,-1,-1, | |
1950 | 0, | |
1951 | 1,1,1, | |
1952 | 2,2,2,2,2,2,2, | |
1953 | 2,2,2,2,2,2, | |
1954 | 4,4,4, | |
1955 | 5,5, | |
1956 | 6,6,6,6,6,6, | |
1957 | 6,6,6,6,6,6, | |
1958 | 8, | |
1959 | }; | |
1960 | ||
1961 | static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 }; | |
1962 | static const uint8_t to_iso_2022_kr[]={ | |
1963 | 0x1b, 0x24, 0x29, 0x43, | |
1964 | 0x41, | |
1965 | 0x0e, 0x25, 0x50, | |
1966 | 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ | |
1967 | 0x0e, 0x25, 0x50, | |
1968 | 0x0f, 0x42, | |
1969 | 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ | |
1970 | 0x43 | |
1971 | }; | |
1972 | ||
1973 | ||
1974 | static const int32_t from_iso_2022_krOffs [] ={ | |
1975 | -1,-1,-1,-1, | |
1976 | 0, | |
1977 | 1,1,1, | |
1978 | 2,2,2,2,2,2,2, | |
1979 | 3,3,3, | |
1980 | 4,4, | |
1981 | 5,5,5,5,5,5, | |
1982 | 6, | |
1983 | }; | |
1984 | /* HZ encoding */ | |
1985 | static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, }; | |
1986 | ||
1987 | static const uint8_t to_hz[]={ | |
1988 | 0x7e, 0x7d, 0x41, | |
1989 | 0x7e, 0x7b, 0x26, 0x30, | |
1990 | 0x7e, 0x7d, 0x25, 0x55, 0x30, 0x36, 0x36, 0x32, /*unassigned*/ | |
1991 | 0x7e, 0x7b, 0x26, 0x30, | |
1992 | 0x7e, 0x7d, 0x42, | |
1993 | ||
1994 | }; | |
1995 | static const int32_t from_hzOffs [] ={ | |
1996 | 0,0,0, | |
1997 | 1,1,1,1, | |
1998 | 2,2,2,2,2,2,2,2, | |
1999 | 3,3,3,3, | |
2000 | 4,4,4 | |
2001 | }; | |
2002 | ||
2003 | static const UChar hz_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 }; | |
2004 | static const uint8_t to_hz2[]={ | |
2005 | 0x7e, 0x7d, 0x41, | |
2006 | 0x7e, 0x7b, 0x26, 0x30, | |
2007 | 0x7e, 0x7d, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
2008 | 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
2009 | 0x7e, 0x7b, 0x26, 0x30, | |
2010 | 0x7e, 0x7d, 0x42, | |
2011 | 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
2012 | 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
2013 | 0x43 | |
2014 | }; | |
2015 | static const int32_t from_hzOffs2 [] ={ | |
2016 | 0,0,0, | |
2017 | 1,1,1,1, | |
2018 | 2,2,2,2,2,2,2,2, | |
2019 | 2,2,2,2,2,2, | |
2020 | 4,4,4,4, | |
2021 | 5,5,5, | |
2022 | 6,6,6,6,6,6, | |
2023 | 6,6,6,6,6,6, | |
2024 | 8, | |
2025 | }; | |
2026 | ||
2027 | /*ISCII*/ | |
2028 | static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 }; | |
2029 | static const uint8_t to_iscii[]={ | |
2030 | 0x41, | |
2031 | 0xef, 0x42, 0xa1, | |
2032 | 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ | |
2033 | 0xa2, | |
2034 | 0x42, | |
2035 | 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ | |
2036 | 0x43 | |
2037 | }; | |
2038 | ||
2039 | ||
2040 | static const int32_t from_isciiOffs [] ={ | |
2041 | 0, | |
2042 | 1,1,1, | |
2043 | 2,2,2,2,2,2, | |
2044 | 3, | |
2045 | 4, | |
2046 | 5,5,5,5,5,5, | |
2047 | 6, | |
2048 | }; | |
2049 | ||
2050 | if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest), | |
2051 | toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943", | |
2052 | UCNV_FROM_U_CALLBACK_ESCAPE, offset, NULL, 0 )) | |
2053 | log_err("u-> ibm-943 with subst with value did not match.\n"); | |
2054 | ||
2055 | if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText), | |
2056 | to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP", | |
2057 | UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 )) | |
2058 | log_err("u-> euc-jp with subst with value did not match.\n"); | |
2059 | ||
2060 | if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText), | |
2061 | to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw", | |
2062 | UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 )) | |
2063 | log_err("u-> euc-tw with subst with value did not match.\n"); | |
2064 | ||
2065 | if(!testConvertFromUnicode(iso_2022_jp_inputText1, UPRV_LENGTHOF(iso_2022_jp_inputText1), | |
2066 | to_iso_2022_jp1, UPRV_LENGTHOF(to_iso_2022_jp1), "iso-2022-jp", | |
2067 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 )) | |
2068 | log_err("u-> iso_2022_jp with subst with value did not match.\n"); | |
2069 | ||
2070 | if(!testConvertFromUnicode(iso_2022_jp_inputText1, UPRV_LENGTHOF(iso_2022_jp_inputText1), | |
2071 | to_iso_2022_jp1, UPRV_LENGTHOF(to_iso_2022_jp1), "iso-2022-jp", | |
2072 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 )) | |
2073 | log_err("u-> iso_2022_jp with subst with value did not match.\n"); | |
2074 | ||
2075 | if(!testConvertFromUnicode(iso_2022_jp_inputText2, UPRV_LENGTHOF(iso_2022_jp_inputText2), | |
2076 | to_iso_2022_jp2, UPRV_LENGTHOF(to_iso_2022_jp2), "iso-2022-jp", | |
2077 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs2, NULL, 0 )) | |
2078 | log_err("u-> iso_2022_jp with subst with value did not match.\n"); | |
2079 | /*ESCAPE OPTIONS*/ | |
2080 | { | |
2081 | /* surrogate pair*/ | |
2082 | static const UChar iso_2022_jp_inputText3[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ; | |
2083 | static const uint8_t to_iso_2022_jp3_v2[]={ | |
2084 | 0x1b, 0x24, 0x42, 0x21, 0x21, | |
2085 | 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b, | |
2086 | ||
2087 | 0x1b, 0x24, 0x42, 0x21, 0x22, | |
2088 | 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b, | |
2089 | ||
2090 | 0x42, | |
2091 | 0x26, 0x23, 0x33, 0x36, 0x38, 0x39, 0x32, 0x3b, | |
2092 | }; | |
2093 | ||
2094 | static const int32_t from_iso_2022_jpOffs3_v2 [] ={ | |
2095 | 0,0,0,0,0, | |
2096 | 1,1,1,1,1,1,1,1,1,1,1,1, | |
2097 | ||
2098 | 3,3,3,3,3, | |
2099 | 4,4,4,4,4,4,4,4,4,4,4,4, | |
2100 | ||
2101 | 6, | |
2102 | 7,7,7,7,7,7,7,7,7 | |
2103 | }; | |
2104 | ||
2105 | if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3, UPRV_LENGTHOF(iso_2022_jp_inputText3), | |
2106 | to_iso_2022_jp3_v2, UPRV_LENGTHOF(to_iso_2022_jp3_v2), "iso-2022-jp", | |
2107 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR )) | |
2108 | log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n"); | |
2109 | } | |
2110 | { | |
2111 | static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; | |
2112 | static const uint8_t to_iso_2022_cn5_v2[]={ | |
2113 | 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, | |
2114 | 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44, | |
2115 | 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36, | |
2116 | 0x0e, 0x21, 0x22, | |
2117 | 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44, | |
2118 | 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36, | |
2119 | 0x42, | |
2120 | 0x5c, 0x75, 0x30, 0x39, 0x30, 0x32, | |
2121 | }; | |
2122 | static const int32_t from_iso_2022_cnOffs5_v2 [] ={ | |
2123 | 0,0,0,0,0,0,0, | |
2124 | 1,1,1,1,1,1,1, | |
2125 | 1,1,1,1,1,1, | |
2126 | 3,3,3, | |
2127 | 4,4,4,4,4,4,4, | |
2128 | 4,4,4,4,4,4, | |
2129 | 6, | |
2130 | 7,7,7,7,7,7 | |
2131 | }; | |
2132 | if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, UPRV_LENGTHOF(iso_2022_cn_inputText5), | |
2133 | to_iso_2022_cn5_v2, UPRV_LENGTHOF(to_iso_2022_cn5_v2), "iso-2022-cn", | |
2134 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs5_v2, NULL, 0,UCNV_ESCAPE_JAVA,U_ZERO_ERROR )) | |
2135 | log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n"); | |
2136 | ||
2137 | } | |
2138 | { | |
2139 | static const UChar iso_2022_cn_inputText6[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; | |
2140 | static const uint8_t to_iso_2022_cn6_v2[]={ | |
2141 | 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, | |
2142 | 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d, | |
2143 | 0x0e, 0x21, 0x22, | |
2144 | 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d, | |
2145 | 0x42, | |
2146 | 0x7b, 0x55, 0x2b, 0x30, 0x39, 0x30, 0x32, 0x7d | |
2147 | }; | |
2148 | static const int32_t from_iso_2022_cnOffs6_v2 [] ={ | |
2149 | 0, 0, 0, 0, 0, 0, 0, | |
2150 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
2151 | 3, 3, 3, | |
2152 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
2153 | 6, | |
2154 | 7, 7, 7, 7, 7, 7, 7, 7, | |
2155 | }; | |
2156 | if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, UPRV_LENGTHOF(iso_2022_cn_inputText6), | |
2157 | to_iso_2022_cn6_v2, UPRV_LENGTHOF(to_iso_2022_cn6_v2), "iso-2022-cn", | |
2158 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs6_v2, NULL, 0,UCNV_ESCAPE_UNICODE,U_ZERO_ERROR )) | |
2159 | log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n"); | |
2160 | ||
2161 | } | |
2162 | { | |
2163 | static const UChar iso_2022_cn_inputText7[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; | |
2164 | static const uint8_t to_iso_2022_cn7_v2[]={ | |
2165 | 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, | |
2166 | 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
2167 | 0x0e, 0x21, 0x22, | |
2168 | 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
2169 | 0x42, 0x25, 0x55, 0x30, 0x39, 0x30, 0x32, | |
2170 | }; | |
2171 | static const int32_t from_iso_2022_cnOffs7_v2 [] ={ | |
2172 | 0, 0, 0, 0, 0, 0, 0, | |
2173 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
2174 | 3, 3, 3, | |
2175 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
2176 | 6, | |
2177 | 7, 7, 7, 7, 7, 7, | |
2178 | }; | |
2179 | if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, UPRV_LENGTHOF(iso_2022_cn_inputText7), | |
2180 | to_iso_2022_cn7_v2, UPRV_LENGTHOF(to_iso_2022_cn7_v2), "iso-2022-cn", | |
2181 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs7_v2, NULL, 0,"K" ,U_ZERO_ERROR )) | |
2182 | log_err("u-> iso-2022-cn with sub & K did not match.\n"); | |
2183 | ||
2184 | } | |
2185 | { | |
2186 | static const UChar iso_2022_cn_inputText8[]={ | |
2187 | 0x3000, | |
2188 | 0xD84D, 0xDC56, | |
2189 | 0x3001, | |
2190 | 0xD84D, 0xDC56, | |
2191 | 0xDBFF, 0xDFFF, | |
2192 | 0x0042, | |
2193 | 0x0902}; | |
2194 | static const uint8_t to_iso_2022_cn8_v2[]={ | |
2195 | 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, | |
2196 | 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20, | |
2197 | 0x0e, 0x21, 0x22, | |
2198 | 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20, | |
2199 | 0x5c, 0x31, 0x30, 0x46, 0x46, 0x46, 0x46, 0x20, | |
2200 | 0x42, | |
2201 | 0x5c, 0x39, 0x30, 0x32, 0x20 | |
2202 | }; | |
2203 | static const int32_t from_iso_2022_cnOffs8_v2 [] ={ | |
2204 | 0, 0, 0, 0, 0, 0, 0, | |
2205 | 1, 1, 1, 1, 1, 1, 1, 1, | |
2206 | 3, 3, 3, | |
2207 | 4, 4, 4, 4, 4, 4, 4, 4, | |
2208 | 6, 6, 6, 6, 6, 6, 6, 6, | |
2209 | 8, | |
2210 | 9, 9, 9, 9, 9 | |
2211 | }; | |
2212 | if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8, UPRV_LENGTHOF(iso_2022_cn_inputText8), | |
2213 | to_iso_2022_cn8_v2, UPRV_LENGTHOF(to_iso_2022_cn8_v2), "iso-2022-cn", | |
2214 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs8_v2, NULL, 0,UCNV_ESCAPE_CSS2,U_ZERO_ERROR )) | |
2215 | log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not match.\n"); | |
2216 | ||
2217 | } | |
2218 | { | |
2219 | static const uint8_t to_iso_2022_cn4_v3[]={ | |
2220 | 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, | |
2221 | 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36, | |
2222 | 0x0e, 0x21, 0x22, | |
2223 | 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36, | |
2224 | 0x42 | |
2225 | }; | |
2226 | ||
2227 | ||
2228 | static const int32_t from_iso_2022_cnOffs4_v3 [] ={ | |
2229 | 0,0,0,0,0,0,0, | |
2230 | 1,1,1,1,1,1,1,1,1,1,1, | |
2231 | ||
2232 | 3,3,3, | |
2233 | 4,4,4,4,4,4,4,4,4,4,4, | |
2234 | ||
2235 | 6 | |
2236 | ||
2237 | }; | |
2238 | if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4, UPRV_LENGTHOF(iso_2022_cn_inputText4), | |
2239 | to_iso_2022_cn4_v3, UPRV_LENGTHOF(to_iso_2022_cn4_v3), "iso-2022-cn", | |
2240 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4_v3, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR )) | |
2241 | { | |
2242 | log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n"); | |
2243 | } | |
2244 | } | |
2245 | if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText), | |
2246 | to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn", | |
2247 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 )) | |
2248 | log_err("u-> iso_2022_cn with subst with value did not match.\n"); | |
2249 | ||
2250 | if(!testConvertFromUnicode(iso_2022_cn_inputText4, UPRV_LENGTHOF(iso_2022_cn_inputText4), | |
2251 | to_iso_2022_cn4, UPRV_LENGTHOF(to_iso_2022_cn4), "iso-2022-cn", | |
2252 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 )) | |
2253 | log_err("u-> iso_2022_cn with subst with value did not match.\n"); | |
2254 | if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText), | |
2255 | to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr", | |
2256 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0 )) | |
2257 | log_err("u-> iso_2022_kr with subst with value did not match.\n"); | |
2258 | if(!testConvertFromUnicode(iso_2022_kr_inputText2, UPRV_LENGTHOF(iso_2022_kr_inputText2), | |
2259 | to_iso_2022_kr2, UPRV_LENGTHOF(to_iso_2022_kr2), "iso-2022-kr", | |
2260 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs2, NULL, 0 )) | |
2261 | log_err("u-> iso_2022_kr2 with subst with value did not match.\n"); | |
2262 | if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText), | |
2263 | to_hz, UPRV_LENGTHOF(to_hz), "HZ", | |
2264 | UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0 )) | |
2265 | log_err("u-> hz with subst with value did not match.\n"); | |
2266 | if(!testConvertFromUnicode(hz_inputText2, UPRV_LENGTHOF(hz_inputText2), | |
2267 | to_hz2, UPRV_LENGTHOF(to_hz2), "HZ", | |
2268 | UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs2, NULL, 0 )) | |
2269 | log_err("u-> hz with subst with value did not match.\n"); | |
2270 | ||
2271 | if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText), | |
2272 | to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0", | |
2273 | UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 )) | |
2274 | log_err("u-> iscii with subst with value did not match.\n"); | |
2275 | } | |
2276 | #endif | |
2277 | ||
2278 | log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n"); | |
2279 | /*to Unicode*/ | |
2280 | { | |
2281 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
2282 | static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf, | |
2283 | 0x81, 0xad, /*unassigned*/ | |
2284 | 0x89, 0xd3 }; | |
2285 | static const UChar IBM_943toUnicode[] = { 0x0000, 0x6D63, | |
2286 | 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44, | |
2287 | 0x7B87}; | |
2288 | static const int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5}; | |
2289 | ||
2290 | /* EUC_JP*/ | |
2291 | static const uint8_t sampleTxt_EUC_JP[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
2292 | 0x8f, 0xda, 0xa1, /*unassigned*/ | |
2293 | 0x8e, 0xe0, | |
2294 | }; | |
2295 | static const UChar EUC_JPtoUnicode[]={ 0x0061, 0x4edd, 0x5bec, | |
2296 | 0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31, | |
2297 | 0x00a2 }; | |
2298 | static const int32_t fromEUC_JPOffs [] ={ 0, 1, 3, | |
2299 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
2300 | 9, | |
2301 | }; | |
2302 | ||
2303 | /*EUC_TW*/ | |
2304 | static const uint8_t sampleTxt_euc_tw[]={ | |
2305 | 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
2306 | 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ | |
2307 | 0xe6, 0xca, 0x8a, | |
2308 | }; | |
2309 | static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, | |
2310 | 0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43, | |
2311 | 0x8706, 0x8a, }; | |
2312 | static const int32_t from_euc_twOffs [] ={ 0, 1, 3, | |
2313 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, | |
2314 | 11, 13}; | |
2315 | ||
2316 | /*iso-2022-jp*/ | |
2317 | static const uint8_t sampleTxt_iso_2022_jp[]={ | |
2318 | 0x1b, 0x28, 0x42, 0x41, | |
2319 | 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/ | |
2320 | 0x1b, 0x28, 0x42, 0x42, | |
2321 | ||
2322 | }; | |
2323 | /* A % X 3 A % X 1 A B */ | |
2324 | static const UChar iso_2022_jptoUnicode[]={ 0x41,0x25,0x58,0x33,0x41,0x25,0x58,0x31,0x41, 0x42 }; | |
2325 | static const int32_t from_iso_2022_jpOffs [] ={ 3, 7, 7, 7, 7, 7, 7, 7, 7, 12 }; | |
2326 | ||
2327 | /*iso-2022-cn*/ | |
2328 | static const uint8_t sampleTxt_iso_2022_cn[]={ | |
2329 | 0x0f, 0x41, 0x44, | |
2330 | 0x1B, 0x24, 0x29, 0x47, | |
2331 | 0x0E, 0x40, 0x6c, /*unassigned*/ | |
2332 | 0x0f, 0x42, | |
2333 | ||
2334 | }; | |
2335 | static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 }; | |
2336 | static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 8, 8, 8, 8, 8, 8, 8, 8, 11 }; | |
2337 | ||
2338 | /*iso-2022-kr*/ | |
2339 | static const uint8_t sampleTxt_iso_2022_kr[]={ | |
2340 | 0x1b, 0x24, 0x29, 0x43, | |
2341 | 0x41, | |
2342 | 0x0E, 0x7f, 0x1E, | |
2343 | 0x0e, 0x25, 0x50, | |
2344 | 0x0f, 0x51, | |
2345 | 0x42, 0x43, | |
2346 | ||
2347 | }; | |
2348 | static const UChar iso_2022_krtoUnicode[]={ 0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43}; | |
2349 | static const int32_t from_iso_2022_krOffs [] ={ 4, 6, 6, 6, 6, 6, 6, 6, 6, 9, 12, 13 , 14 }; | |
2350 | ||
2351 | /*hz*/ | |
2352 | static const uint8_t sampleTxt_hz[]={ | |
2353 | 0x41, | |
2354 | 0x7e, 0x7b, 0x26, 0x30, | |
2355 | 0x7f, 0x1E, /*unassigned*/ | |
2356 | 0x26, 0x30, | |
2357 | 0x7e, 0x7d, 0x42, | |
2358 | 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/ | |
2359 | 0x7e, 0x7d, 0x42, | |
2360 | }; | |
2361 | static const UChar hztoUnicode[]={ | |
2362 | 0x41, | |
2363 | 0x03a0, | |
2364 | 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45, | |
2365 | 0x03A0, | |
2366 | 0x42, | |
2367 | 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45, | |
2368 | 0x42,}; | |
2369 | ||
2370 | static const int32_t from_hzOffs [] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18, }; | |
2371 | ||
2372 | ||
2373 | /*iscii*/ | |
2374 | static const uint8_t sampleTxt_iscii[]={ | |
2375 | 0x41, | |
2376 | 0x30, | |
2377 | 0xEB, /*unassigned*/ | |
2378 | 0xa3, | |
2379 | 0x42, | |
2380 | 0xEC, /*unassigned*/ | |
2381 | 0x42, | |
2382 | }; | |
2383 | static const UChar isciitoUnicode[]={ | |
2384 | 0x41, | |
2385 | 0x30, | |
2386 | 0x25, 0x58, 0x45, 0x42, | |
2387 | 0x0903, | |
2388 | 0x42, | |
2389 | 0x25, 0x58, 0x45, 0x43, | |
2390 | 0x42,}; | |
2391 | ||
2392 | static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6 }; | |
2393 | #endif | |
2394 | ||
2395 | /*UTF8*/ | |
2396 | static const uint8_t sampleTxtUTF8[]={ | |
2397 | 0x20, 0x64, 0x50, | |
2398 | 0xC2, 0x7E, /* truncated char */ | |
2399 | 0x20, | |
2400 | 0xE0, 0xB5, 0x7E, /* truncated char */ | |
2401 | 0x40, | |
2402 | }; | |
2403 | static const UChar UTF8ToUnicode[]={ | |
2404 | 0x0020, 0x0064, 0x0050, | |
2405 | 0x0025, 0x0058, 0x0043, 0x0032, 0x007E, /* \xC2~ */ | |
2406 | 0x0020, | |
2407 | 0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E, | |
2408 | 0x0040 | |
2409 | }; | |
2410 | static const int32_t fromUTF8[] = { | |
2411 | 0, 1, 2, | |
2412 | 3, 3, 3, 3, 4, | |
2413 | 5, | |
2414 | 6, 6, 6, 6, 6, 6, 6, 6, 8, | |
2415 | 9 | |
2416 | }; | |
2417 | static const UChar UTF8ToUnicodeXML_DEC[]={ | |
2418 | 0x0020, 0x0064, 0x0050, | |
2419 | 0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E, /* Â~ */ | |
2420 | 0x0020, | |
2421 | 0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E, | |
2422 | 0x0040 | |
2423 | }; | |
2424 | static const int32_t fromUTF8XML_DEC[] = { | |
2425 | 0, 1, 2, | |
2426 | 3, 3, 3, 3, 3, 3, 4, | |
2427 | 5, | |
2428 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, | |
2429 | 9 | |
2430 | }; | |
2431 | ||
2432 | ||
2433 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
2434 | if(!testConvertToUnicode(sampleTxtToU, UPRV_LENGTHOF(sampleTxtToU), | |
2435 | IBM_943toUnicode, UPRV_LENGTHOF(IBM_943toUnicode),"ibm-943", | |
2436 | UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 )) | |
2437 | log_err("ibm-943->u with substitute with value did not match.\n"); | |
2438 | ||
2439 | if(!testConvertToUnicode(sampleTxt_EUC_JP, UPRV_LENGTHOF(sampleTxt_EUC_JP), | |
2440 | EUC_JPtoUnicode, UPRV_LENGTHOF(EUC_JPtoUnicode),"IBM-eucJP", | |
2441 | UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0)) | |
2442 | log_err("euc-jp->u with substitute with value did not match.\n"); | |
2443 | ||
2444 | if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw), | |
2445 | euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw", | |
2446 | UCNV_TO_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0)) | |
2447 | log_err("euc-tw->u with substitute with value did not match.\n"); | |
2448 | ||
2449 | if(!testConvertToUnicode(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp), | |
2450 | iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp", | |
2451 | UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0)) | |
2452 | log_err("iso-2022-jp->u with substitute with value did not match.\n"); | |
2453 | ||
2454 | if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp), | |
2455 | iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp", | |
2456 | UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0,"K",U_ZERO_ERROR)) | |
2457 | log_err("iso-2022-jp->u with substitute with value did not match.\n"); | |
2458 | ||
2459 | {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */ | |
2460 | { | |
2461 | static const UChar iso_2022_jptoUnicodeDec[]={ | |
2462 | 0x0041, | |
2463 | /* & # 5 8 ; */ | |
2464 | 0x0026, 0x0023, 0x0035, 0x0038, 0x003b, | |
2465 | 0x0026, 0x0023, 0x0032, 0x0036, 0x003b, | |
2466 | 0x0042 }; | |
2467 | static const int32_t from_iso_2022_jpOffsDec [] ={ 3,7,7,7,7,7,7,7,7,7,7,12, }; | |
2468 | if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp), | |
2469 | iso_2022_jptoUnicodeDec, UPRV_LENGTHOF(iso_2022_jptoUnicodeDec),"iso-2022-jp", | |
2470 | UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsDec, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR )) | |
2471 | log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n"); | |
2472 | } | |
2473 | { | |
2474 | static const UChar iso_2022_jptoUnicodeHex[]={ | |
2475 | 0x0041, | |
2476 | /* & # x 3 A ; */ | |
2477 | 0x0026, 0x0023, 0x0078, 0x0033, 0x0041, 0x003b, | |
2478 | 0x0026, 0x0023, 0x0078, 0x0031, 0x0041, 0x003b, | |
2479 | 0x0042 }; | |
2480 | static const int32_t from_iso_2022_jpOffsHex [] ={ 3,7,7,7,7,7,7,7,7,7,7,7,7,12 }; | |
2481 | if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp), | |
2482 | iso_2022_jptoUnicodeHex, UPRV_LENGTHOF(iso_2022_jptoUnicodeHex),"iso-2022-jp", | |
2483 | UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsHex, NULL, 0,UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR )) | |
2484 | log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n"); | |
2485 | } | |
2486 | { | |
2487 | static const UChar iso_2022_jptoUnicodeC[]={ | |
2488 | 0x0041, | |
2489 | 0x005C, 0x0078, 0x0033, 0x0041, /* \x3A */ | |
2490 | 0x005C, 0x0078, 0x0031, 0x0041, /* \x1A */ | |
2491 | 0x0042 }; | |
2492 | int32_t from_iso_2022_jpOffsC [] ={ 3,7,7,7,7,7,7,7,7,12 }; | |
2493 | if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp), | |
2494 | iso_2022_jptoUnicodeC, UPRV_LENGTHOF(iso_2022_jptoUnicodeC),"iso-2022-jp", | |
2495 | UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsC, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR )) | |
2496 | log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n"); | |
2497 | } | |
2498 | } | |
2499 | if(!testConvertToUnicode(sampleTxt_iso_2022_cn, UPRV_LENGTHOF(sampleTxt_iso_2022_cn), | |
2500 | iso_2022_cntoUnicode, UPRV_LENGTHOF(iso_2022_cntoUnicode),"iso-2022-cn", | |
2501 | UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0)) | |
2502 | log_err("iso-2022-cn->u with substitute with value did not match.\n"); | |
2503 | ||
2504 | if(!testConvertToUnicode(sampleTxt_iso_2022_kr, UPRV_LENGTHOF(sampleTxt_iso_2022_kr), | |
2505 | iso_2022_krtoUnicode, UPRV_LENGTHOF(iso_2022_krtoUnicode),"iso-2022-kr", | |
2506 | UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0)) | |
2507 | log_err("iso-2022-kr->u with substitute with value did not match.\n"); | |
2508 | ||
2509 | if(!testConvertToUnicode(sampleTxt_hz, UPRV_LENGTHOF(sampleTxt_hz), | |
2510 | hztoUnicode, UPRV_LENGTHOF(hztoUnicode),"HZ", | |
2511 | UCNV_TO_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0)) | |
2512 | log_err("hz->u with substitute with value did not match.\n"); | |
2513 | ||
2514 | if(!testConvertToUnicode(sampleTxt_iscii, UPRV_LENGTHOF(sampleTxt_iscii), | |
2515 | isciitoUnicode, UPRV_LENGTHOF(isciitoUnicode),"ISCII,version=0", | |
2516 | UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0)) | |
2517 | log_err("ISCII ->u with substitute with value did not match.\n"); | |
2518 | #endif | |
2519 | ||
2520 | if(!testConvertToUnicode(sampleTxtUTF8, UPRV_LENGTHOF(sampleTxtUTF8), | |
2521 | UTF8ToUnicode, UPRV_LENGTHOF(UTF8ToUnicode),"UTF-8", | |
2522 | UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0)) | |
2523 | log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n"); | |
2524 | if(!testConvertToUnicodeWithContext(sampleTxtUTF8, UPRV_LENGTHOF(sampleTxtUTF8), | |
2525 | UTF8ToUnicodeXML_DEC, UPRV_LENGTHOF(UTF8ToUnicodeXML_DEC),"UTF-8", | |
2526 | UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8XML_DEC, NULL, 0, UCNV_ESCAPE_XML_DEC, U_ZERO_ERROR)) | |
2527 | log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n"); | |
2528 | } | |
2529 | } | |
2530 | ||
2531 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
2532 | static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize) | |
2533 | { | |
2534 | static const UChar legalText[] = { 0x0000, 0xAC00, 0xAC01, 0xD700 }; | |
2535 | static const uint8_t templegal949[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 }; | |
2536 | static const int32_t to949legal[] = {0, 1, 1, 2, 2, 3, 3}; | |
2537 | ||
2538 | ||
2539 | static const uint8_t text943[] = { | |
2540 | 0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a }; | |
2541 | static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22, 0x5b57 }; | |
2542 | static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22, 0x5b57 }; | |
2543 | static const UChar toUnicode943stop[]= { 0x304b}; | |
2544 | ||
2545 | static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 7 }; | |
2546 | static const int32_t fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 }; | |
2547 | static const int32_t fromIBM943Offsstop[] = { 0}; | |
2548 | ||
2549 | gInBufferSize = inputsize; | |
2550 | gOutBufferSize = outputsize; | |
2551 | /*checking with a legal value*/ | |
2552 | if(!testConvertFromUnicode(legalText, UPRV_LENGTHOF(legalText), | |
2553 | templegal949, UPRV_LENGTHOF(templegal949), "ibm-949", | |
2554 | UCNV_FROM_U_CALLBACK_SKIP, to949legal, NULL, 0 )) | |
2555 | log_err("u-> ibm-949 with skip did not match.\n"); | |
2556 | ||
2557 | /*checking illegal value for ibm-943 with substitute*/ | |
2558 | if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943), | |
2559 | toUnicode943sub, UPRV_LENGTHOF(toUnicode943sub),"ibm-943", | |
2560 | UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 )) | |
2561 | log_err("ibm-943->u with subst did not match.\n"); | |
2562 | /*checking illegal value for ibm-943 with skip */ | |
2563 | if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943), | |
2564 | toUnicode943skip, UPRV_LENGTHOF(toUnicode943skip),"ibm-943", | |
2565 | UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offsskip, NULL, 0 )) | |
2566 | log_err("ibm-943->u with skip did not match.\n"); | |
2567 | ||
2568 | /*checking illegal value for ibm-943 with stop */ | |
2569 | if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943), | |
2570 | toUnicode943stop, UPRV_LENGTHOF(toUnicode943stop),"ibm-943", | |
2571 | UCNV_TO_U_CALLBACK_STOP, fromIBM943Offsstop, NULL, 0 )) | |
2572 | log_err("ibm-943->u with stop did not match.\n"); | |
2573 | ||
2574 | } | |
2575 | ||
2576 | static void TestSingleByte(int32_t inputsize, int32_t outputsize) | |
2577 | { | |
2578 | static const uint8_t sampleText[] = { | |
2579 | 0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82, | |
2580 | 0xff, 0x32, 0x33}; | |
2581 | static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 }; | |
2582 | static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 }; | |
2583 | /*checking illegal value for ibm-943 with substitute*/ | |
2584 | gInBufferSize = inputsize; | |
2585 | gOutBufferSize = outputsize; | |
2586 | ||
2587 | if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText), | |
2588 | toUnicode943sub, UPRV_LENGTHOF(toUnicode943sub),"ibm-943", | |
2589 | UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 )) | |
2590 | log_err("ibm-943->u with subst did not match.\n"); | |
2591 | } | |
2592 | ||
2593 | static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize) | |
2594 | { | |
2595 | /*EBCDIC_STATEFUL*/ | |
2596 | static const UChar ebcdic_inputTest[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 }; | |
2597 | static const uint8_t toIBM930[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 }; | |
2598 | static const int32_t offset_930[]= { 0, 1, 1, 1, 2, 2, 3, 4, 4, 4, 5, 5 }; | |
2599 | /* s SO doubl SI sng s SO fe fe SI s */ | |
2600 | ||
2601 | /*EBCDIC_STATEFUL with subChar=3f*/ | |
2602 | static const uint8_t toIBM930_subvaried[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 }; | |
2603 | static const int32_t offset_930_subvaried[]= { 0, 1, 1, 1, 2, 2, 3, 4, 5 }; | |
2604 | static const char mySubChar[]={ 0x3f}; | |
2605 | ||
2606 | gInBufferSize = inputsize; | |
2607 | gOutBufferSize = outputsize; | |
2608 | ||
2609 | if(!testConvertFromUnicode(ebcdic_inputTest, UPRV_LENGTHOF(ebcdic_inputTest), | |
2610 | toIBM930, UPRV_LENGTHOF(toIBM930), "ibm-930", | |
2611 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930, NULL, 0 )) | |
2612 | log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n"); | |
2613 | ||
2614 | if(!testConvertFromUnicode(ebcdic_inputTest, UPRV_LENGTHOF(ebcdic_inputTest), | |
2615 | toIBM930_subvaried, UPRV_LENGTHOF(toIBM930_subvaried), "ibm-930", | |
2616 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 )) | |
2617 | log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n"); | |
2618 | } | |
2619 | #endif | |
2620 | ||
2621 | UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, | |
2622 | const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, | |
2623 | const char *mySubChar, int8_t len) | |
2624 | { | |
2625 | ||
2626 | ||
2627 | UErrorCode status = U_ZERO_ERROR; | |
2628 | UConverter *conv = 0; | |
2629 | char junkout[NEW_MAX_BUFFER]; /* FIX */ | |
2630 | int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ | |
2631 | const UChar *src; | |
2632 | char *end; | |
2633 | char *targ; | |
2634 | int32_t *offs; | |
2635 | int i; | |
2636 | int32_t realBufferSize; | |
2637 | char *realBufferEnd; | |
2638 | const UChar *realSourceEnd; | |
2639 | const UChar *sourceLimit; | |
2640 | UBool checkOffsets = TRUE; | |
2641 | UBool doFlush; | |
2642 | char junk[9999]; | |
2643 | char offset_str[9999]; | |
2644 | char *p; | |
2645 | UConverterFromUCallback oldAction = NULL; | |
2646 | const void* oldContext = NULL; | |
2647 | ||
2648 | ||
2649 | for(i=0;i<NEW_MAX_BUFFER;i++) | |
2650 | junkout[i] = (char)0xF0; | |
2651 | for(i=0;i<NEW_MAX_BUFFER;i++) | |
2652 | junokout[i] = 0xFF; | |
2653 | setNuConvTestName(codepage, "FROM"); | |
2654 | ||
2655 | log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize, | |
2656 | gOutBufferSize); | |
2657 | ||
2658 | conv = ucnv_open(codepage, &status); | |
2659 | if(U_FAILURE(status)) | |
2660 | { | |
2661 | log_data_err("Couldn't open converter %s\n",codepage); | |
2662 | return TRUE; | |
2663 | } | |
2664 | ||
2665 | log_verbose("Converter opened..\n"); | |
2666 | ||
2667 | /*----setting the callback routine----*/ | |
2668 | ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); | |
2669 | if (U_FAILURE(status)) | |
2670 | { | |
2671 | log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); | |
2672 | } | |
2673 | /*------------------------*/ | |
2674 | /*setting the subChar*/ | |
2675 | if(mySubChar != NULL){ | |
2676 | ucnv_setSubstChars(conv, mySubChar, len, &status); | |
2677 | if (U_FAILURE(status)) { | |
2678 | log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); | |
2679 | } | |
2680 | } | |
2681 | /*------------*/ | |
2682 | ||
2683 | src = source; | |
2684 | targ = junkout; | |
2685 | offs = junokout; | |
2686 | ||
2687 | realBufferSize = UPRV_LENGTHOF(junkout); | |
2688 | realBufferEnd = junkout + realBufferSize; | |
2689 | realSourceEnd = source + sourceLen; | |
2690 | ||
2691 | if ( gOutBufferSize != realBufferSize ) | |
2692 | checkOffsets = FALSE; | |
2693 | ||
2694 | if( gInBufferSize != NEW_MAX_BUFFER ) | |
2695 | checkOffsets = FALSE; | |
2696 | ||
2697 | do | |
2698 | { | |
2699 | end = nct_min(targ + gOutBufferSize, realBufferEnd); | |
2700 | sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); | |
2701 | ||
2702 | doFlush = (UBool)(sourceLimit == realSourceEnd); | |
2703 | ||
2704 | if(targ == realBufferEnd) | |
2705 | { | |
2706 | log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); | |
2707 | return FALSE; | |
2708 | } | |
2709 | log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); | |
2710 | ||
2711 | ||
2712 | status = U_ZERO_ERROR; | |
2713 | ||
2714 | ucnv_fromUnicode (conv, | |
2715 | (char **)&targ, | |
2716 | (const char *)end, | |
2717 | &src, | |
2718 | sourceLimit, | |
2719 | checkOffsets ? offs : NULL, | |
2720 | doFlush, /* flush if we're at the end of the input data */ | |
2721 | &status); | |
2722 | } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) ); | |
2723 | ||
2724 | ||
2725 | if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){ | |
2726 | UChar errChars[50]; /* should be sufficient */ | |
2727 | int8_t errLen = 50; | |
2728 | UErrorCode err = U_ZERO_ERROR; | |
2729 | const UChar* start= NULL; | |
2730 | ucnv_getInvalidUChars(conv,errChars, &errLen, &err); | |
2731 | if(U_FAILURE(err)){ | |
2732 | log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err)); | |
2733 | } | |
2734 | /* length of in invalid chars should be equal to returned length*/ | |
2735 | start = src - errLen; | |
2736 | if(u_strncmp(errChars,start,errLen)!=0){ | |
2737 | log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err)); | |
2738 | } | |
2739 | } | |
2740 | /* allow failure codes for the stop callback */ | |
2741 | if(U_FAILURE(status) && | |
2742 | (callback != UCNV_FROM_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND))) | |
2743 | { | |
2744 | log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); | |
2745 | return FALSE; | |
2746 | } | |
2747 | ||
2748 | log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", | |
2749 | sourceLen, targ-junkout); | |
2750 | if(getTestOption(VERBOSITY_OPTION)) | |
2751 | { | |
2752 | ||
2753 | junk[0] = 0; | |
2754 | offset_str[0] = 0; | |
2755 | for(p = junkout;p<targ;p++) | |
2756 | { | |
2757 | sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p); | |
2758 | sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]); | |
2759 | } | |
2760 | ||
2761 | log_verbose(junk); | |
2762 | printSeq(expect, expectLen); | |
2763 | if ( checkOffsets ) | |
2764 | { | |
2765 | log_verbose("\nOffsets:"); | |
2766 | log_verbose(offset_str); | |
2767 | } | |
2768 | log_verbose("\n"); | |
2769 | } | |
2770 | ucnv_close(conv); | |
2771 | ||
2772 | ||
2773 | if(expectLen != targ-junkout) | |
2774 | { | |
2775 | log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); | |
2776 | log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); | |
2777 | printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); | |
2778 | printSeqErr(expect, expectLen); | |
2779 | return FALSE; | |
2780 | } | |
2781 | ||
2782 | if (checkOffsets && (expectOffsets != 0) ) | |
2783 | { | |
2784 | log_verbose("comparing %d offsets..\n", targ-junkout); | |
2785 | if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ | |
2786 | log_err("did not get the expected offsets while %s \n", gNuConvTestName); | |
2787 | log_err("Got Output : "); | |
2788 | printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); | |
2789 | log_err("Got Offsets: "); | |
2790 | for(p=junkout;p<targ;p++) | |
2791 | log_err("%d,", junokout[p-junkout]); | |
2792 | log_err("\n"); | |
2793 | log_err("Expected Offsets: "); | |
2794 | for(i=0; i<(targ-junkout); i++) | |
2795 | log_err("%d,", expectOffsets[i]); | |
2796 | log_err("\n"); | |
2797 | return FALSE; | |
2798 | } | |
2799 | } | |
2800 | ||
2801 | if(!memcmp(junkout, expect, expectLen)) | |
2802 | { | |
2803 | log_verbose("String matches! %s\n", gNuConvTestName); | |
2804 | return TRUE; | |
2805 | } | |
2806 | else | |
2807 | { | |
2808 | log_err("String does not match. %s\n", gNuConvTestName); | |
2809 | log_err("source: "); | |
2810 | printUSeqErr(source, sourceLen); | |
2811 | log_err("Got: "); | |
2812 | printSeqErr((const uint8_t *)junkout, expectLen); | |
2813 | log_err("Expected: "); | |
2814 | printSeqErr(expect, expectLen); | |
2815 | return FALSE; | |
2816 | } | |
2817 | } | |
2818 | ||
2819 | UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, | |
2820 | const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, | |
2821 | const char *mySubChar, int8_t len) | |
2822 | { | |
2823 | UErrorCode status = U_ZERO_ERROR; | |
2824 | UConverter *conv = 0; | |
2825 | UChar junkout[NEW_MAX_BUFFER]; /* FIX */ | |
2826 | int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ | |
2827 | const char *src; | |
2828 | const char *realSourceEnd; | |
2829 | const char *srcLimit; | |
2830 | UChar *targ; | |
2831 | UChar *end; | |
2832 | int32_t *offs; | |
2833 | int i; | |
2834 | UBool checkOffsets = TRUE; | |
2835 | char junk[9999]; | |
2836 | char offset_str[9999]; | |
2837 | UChar *p; | |
2838 | UConverterToUCallback oldAction = NULL; | |
2839 | const void* oldContext = NULL; | |
2840 | ||
2841 | int32_t realBufferSize; | |
2842 | UChar *realBufferEnd; | |
2843 | ||
2844 | ||
2845 | for(i=0;i<NEW_MAX_BUFFER;i++) | |
2846 | junkout[i] = 0xFFFE; | |
2847 | ||
2848 | for(i=0;i<NEW_MAX_BUFFER;i++) | |
2849 | junokout[i] = -1; | |
2850 | ||
2851 | setNuConvTestName(codepage, "TO"); | |
2852 | ||
2853 | log_verbose("\n========= %s\n", gNuConvTestName); | |
2854 | ||
2855 | conv = ucnv_open(codepage, &status); | |
2856 | if(U_FAILURE(status)) | |
2857 | { | |
2858 | log_data_err("Couldn't open converter %s\n",gNuConvTestName); | |
2859 | return TRUE; | |
2860 | } | |
2861 | ||
2862 | log_verbose("Converter opened..\n"); | |
2863 | ||
2864 | src = (const char *)source; | |
2865 | targ = junkout; | |
2866 | offs = junokout; | |
2867 | ||
2868 | realBufferSize = UPRV_LENGTHOF(junkout); | |
2869 | realBufferEnd = junkout + realBufferSize; | |
2870 | realSourceEnd = src + sourcelen; | |
2871 | /*----setting the callback routine----*/ | |
2872 | ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); | |
2873 | if (U_FAILURE(status)) | |
2874 | { | |
2875 | log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); | |
2876 | } | |
2877 | /*-------------------------------------*/ | |
2878 | /*setting the subChar*/ | |
2879 | if(mySubChar != NULL){ | |
2880 | ucnv_setSubstChars(conv, mySubChar, len, &status); | |
2881 | if (U_FAILURE(status)) { | |
2882 | log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); | |
2883 | } | |
2884 | } | |
2885 | /*------------*/ | |
2886 | ||
2887 | ||
2888 | if ( gOutBufferSize != realBufferSize ) | |
2889 | checkOffsets = FALSE; | |
2890 | ||
2891 | if( gInBufferSize != NEW_MAX_BUFFER ) | |
2892 | checkOffsets = FALSE; | |
2893 | ||
2894 | do | |
2895 | { | |
2896 | end = nct_min( targ + gOutBufferSize, realBufferEnd); | |
2897 | srcLimit = nct_min(realSourceEnd, src + gInBufferSize); | |
2898 | ||
2899 | if(targ == realBufferEnd) | |
2900 | { | |
2901 | log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); | |
2902 | return FALSE; | |
2903 | } | |
2904 | log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); | |
2905 | ||
2906 | ||
2907 | ||
2908 | status = U_ZERO_ERROR; | |
2909 | ||
2910 | ucnv_toUnicode (conv, | |
2911 | &targ, | |
2912 | end, | |
2913 | (const char **)&src, | |
2914 | (const char *)srcLimit, | |
2915 | checkOffsets ? offs : NULL, | |
2916 | (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */ | |
2917 | &status); | |
2918 | } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ | |
2919 | ||
2920 | if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){ | |
2921 | char errChars[50]; /* should be sufficient */ | |
2922 | int8_t errLen = 50; | |
2923 | UErrorCode err = U_ZERO_ERROR; | |
2924 | const char* start= NULL; | |
2925 | ucnv_getInvalidChars(conv,errChars, &errLen, &err); | |
2926 | if(U_FAILURE(err)){ | |
2927 | log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err)); | |
2928 | } | |
2929 | /* length of in invalid chars should be equal to returned length*/ | |
2930 | start = src - errLen; | |
2931 | if(uprv_strncmp(errChars,start,errLen)!=0){ | |
2932 | log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err)); | |
2933 | } | |
2934 | } | |
2935 | /* allow failure codes for the stop callback */ | |
2936 | if(U_FAILURE(status) && | |
2937 | (callback != UCNV_TO_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND && status != U_TRUNCATED_CHAR_FOUND))) | |
2938 | { | |
2939 | log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); | |
2940 | return FALSE; | |
2941 | } | |
2942 | ||
2943 | log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", | |
2944 | sourcelen, targ-junkout); | |
2945 | if(getTestOption(VERBOSITY_OPTION)) | |
2946 | { | |
2947 | ||
2948 | junk[0] = 0; | |
2949 | offset_str[0] = 0; | |
2950 | ||
2951 | for(p = junkout;p<targ;p++) | |
2952 | { | |
2953 | sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p); | |
2954 | sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]); | |
2955 | } | |
2956 | ||
2957 | log_verbose(junk); | |
2958 | printUSeq(expect, expectlen); | |
2959 | if ( checkOffsets ) | |
2960 | { | |
2961 | log_verbose("\nOffsets:"); | |
2962 | log_verbose(offset_str); | |
2963 | } | |
2964 | log_verbose("\n"); | |
2965 | } | |
2966 | ucnv_close(conv); | |
2967 | ||
2968 | log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); | |
2969 | ||
2970 | if (checkOffsets && (expectOffsets != 0)) | |
2971 | { | |
2972 | if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))) | |
2973 | { | |
2974 | log_err("did not get the expected offsets while %s \n", gNuConvTestName); | |
2975 | log_err("Got offsets: "); | |
2976 | for(p=junkout;p<targ;p++) | |
2977 | log_err(" %2d,", junokout[p-junkout]); | |
2978 | log_err("\n"); | |
2979 | log_err("Expected offsets: "); | |
2980 | for(i=0; i<(targ-junkout); i++) | |
2981 | log_err(" %2d,", expectOffsets[i]); | |
2982 | log_err("\n"); | |
2983 | log_err("Got output: "); | |
2984 | for(i=0; i<(targ-junkout); i++) | |
2985 | log_err("0x%04x,", junkout[i]); | |
2986 | log_err("\n"); | |
2987 | log_err("From source: "); | |
2988 | for(i=0; i<(src-(const char *)source); i++) | |
2989 | log_err(" 0x%02x,", (unsigned char)source[i]); | |
2990 | log_err("\n"); | |
2991 | } | |
2992 | } | |
2993 | ||
2994 | if(!memcmp(junkout, expect, expectlen*2)) | |
2995 | { | |
2996 | log_verbose("Matches!\n"); | |
2997 | return TRUE; | |
2998 | } | |
2999 | else | |
3000 | { | |
3001 | log_err("String does not match. %s\n", gNuConvTestName); | |
3002 | log_verbose("String does not match. %s\n", gNuConvTestName); | |
3003 | log_err("Got: "); | |
3004 | printUSeqErr(junkout, expectlen); | |
3005 | log_err("Expected: "); | |
3006 | printUSeqErr(expect, expectlen); | |
3007 | log_err("\n"); | |
3008 | return FALSE; | |
3009 | } | |
3010 | } | |
3011 | ||
3012 | UBool testConvertFromUnicodeWithContext(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, | |
3013 | const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, | |
3014 | const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError) | |
3015 | { | |
3016 | ||
3017 | ||
3018 | UErrorCode status = U_ZERO_ERROR; | |
3019 | UConverter *conv = 0; | |
3020 | char junkout[NEW_MAX_BUFFER]; /* FIX */ | |
3021 | int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ | |
3022 | const UChar *src; | |
3023 | char *end; | |
3024 | char *targ; | |
3025 | int32_t *offs; | |
3026 | int i; | |
3027 | int32_t realBufferSize; | |
3028 | char *realBufferEnd; | |
3029 | const UChar *realSourceEnd; | |
3030 | const UChar *sourceLimit; | |
3031 | UBool checkOffsets = TRUE; | |
3032 | UBool doFlush; | |
3033 | char junk[9999]; | |
3034 | char offset_str[9999]; | |
3035 | char *p; | |
3036 | UConverterFromUCallback oldAction = NULL; | |
3037 | const void* oldContext = NULL; | |
3038 | ||
3039 | ||
3040 | for(i=0;i<NEW_MAX_BUFFER;i++) | |
3041 | junkout[i] = (char)0xF0; | |
3042 | for(i=0;i<NEW_MAX_BUFFER;i++) | |
3043 | junokout[i] = 0xFF; | |
3044 | setNuConvTestName(codepage, "FROM"); | |
3045 | ||
3046 | log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize, | |
3047 | gOutBufferSize); | |
3048 | ||
3049 | conv = ucnv_open(codepage, &status); | |
3050 | if(U_FAILURE(status)) | |
3051 | { | |
3052 | log_data_err("Couldn't open converter %s\n",codepage); | |
3053 | return TRUE; /* Because the err has already been logged. */ | |
3054 | } | |
3055 | ||
3056 | log_verbose("Converter opened..\n"); | |
3057 | ||
3058 | /*----setting the callback routine----*/ | |
3059 | ucnv_setFromUCallBack (conv, callback, context, &oldAction, &oldContext, &status); | |
3060 | if (U_FAILURE(status)) | |
3061 | { | |
3062 | log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); | |
3063 | } | |
3064 | /*------------------------*/ | |
3065 | /*setting the subChar*/ | |
3066 | if(mySubChar != NULL){ | |
3067 | ucnv_setSubstChars(conv, mySubChar, len, &status); | |
3068 | if (U_FAILURE(status)) { | |
3069 | log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status)); | |
3070 | } | |
3071 | } | |
3072 | /*------------*/ | |
3073 | ||
3074 | src = source; | |
3075 | targ = junkout; | |
3076 | offs = junokout; | |
3077 | ||
3078 | realBufferSize = UPRV_LENGTHOF(junkout); | |
3079 | realBufferEnd = junkout + realBufferSize; | |
3080 | realSourceEnd = source + sourceLen; | |
3081 | ||
3082 | if ( gOutBufferSize != realBufferSize ) | |
3083 | checkOffsets = FALSE; | |
3084 | ||
3085 | if( gInBufferSize != NEW_MAX_BUFFER ) | |
3086 | checkOffsets = FALSE; | |
3087 | ||
3088 | do | |
3089 | { | |
3090 | end = nct_min(targ + gOutBufferSize, realBufferEnd); | |
3091 | sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); | |
3092 | ||
3093 | doFlush = (UBool)(sourceLimit == realSourceEnd); | |
3094 | ||
3095 | if(targ == realBufferEnd) | |
3096 | { | |
3097 | log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); | |
3098 | return FALSE; | |
3099 | } | |
3100 | log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); | |
3101 | ||
3102 | ||
3103 | status = U_ZERO_ERROR; | |
3104 | ||
3105 | ucnv_fromUnicode (conv, | |
3106 | (char **)&targ, | |
3107 | (const char *)end, | |
3108 | &src, | |
3109 | sourceLimit, | |
3110 | checkOffsets ? offs : NULL, | |
3111 | doFlush, /* flush if we're at the end of the input data */ | |
3112 | &status); | |
3113 | } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) ); | |
3114 | ||
3115 | /* allow failure codes for the stop callback */ | |
3116 | if(U_FAILURE(status) && status != expectedError) | |
3117 | { | |
3118 | log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); | |
3119 | return FALSE; | |
3120 | } | |
3121 | ||
3122 | log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", | |
3123 | sourceLen, targ-junkout); | |
3124 | if(getTestOption(VERBOSITY_OPTION)) | |
3125 | { | |
3126 | ||
3127 | junk[0] = 0; | |
3128 | offset_str[0] = 0; | |
3129 | for(p = junkout;p<targ;p++) | |
3130 | { | |
3131 | sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p); | |
3132 | sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]); | |
3133 | } | |
3134 | ||
3135 | log_verbose(junk); | |
3136 | printSeq(expect, expectLen); | |
3137 | if ( checkOffsets ) | |
3138 | { | |
3139 | log_verbose("\nOffsets:"); | |
3140 | log_verbose(offset_str); | |
3141 | } | |
3142 | log_verbose("\n"); | |
3143 | } | |
3144 | ucnv_close(conv); | |
3145 | ||
3146 | ||
3147 | if(expectLen != targ-junkout) | |
3148 | { | |
3149 | log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); | |
3150 | log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); | |
3151 | printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); | |
3152 | printSeqErr(expect, expectLen); | |
3153 | return FALSE; | |
3154 | } | |
3155 | ||
3156 | if (checkOffsets && (expectOffsets != 0) ) | |
3157 | { | |
3158 | log_verbose("comparing %d offsets..\n", targ-junkout); | |
3159 | if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ | |
3160 | log_err("did not get the expected offsets while %s \n", gNuConvTestName); | |
3161 | log_err("Got Output : "); | |
3162 | printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); | |
3163 | log_err("Got Offsets: "); | |
3164 | for(p=junkout;p<targ;p++) | |
3165 | log_err("%d,", junokout[p-junkout]); | |
3166 | log_err("\n"); | |
3167 | log_err("Expected Offsets: "); | |
3168 | for(i=0; i<(targ-junkout); i++) | |
3169 | log_err("%d,", expectOffsets[i]); | |
3170 | log_err("\n"); | |
3171 | return FALSE; | |
3172 | } | |
3173 | } | |
3174 | ||
3175 | if(!memcmp(junkout, expect, expectLen)) | |
3176 | { | |
3177 | log_verbose("String matches! %s\n", gNuConvTestName); | |
3178 | return TRUE; | |
3179 | } | |
3180 | else | |
3181 | { | |
3182 | log_err("String does not match. %s\n", gNuConvTestName); | |
3183 | log_err("source: "); | |
3184 | printUSeqErr(source, sourceLen); | |
3185 | log_err("Got: "); | |
3186 | printSeqErr((const uint8_t *)junkout, expectLen); | |
3187 | log_err("Expected: "); | |
3188 | printSeqErr(expect, expectLen); | |
3189 | return FALSE; | |
3190 | } | |
3191 | } | |
3192 | UBool testConvertToUnicodeWithContext( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, | |
3193 | const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, | |
3194 | const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError) | |
3195 | { | |
3196 | UErrorCode status = U_ZERO_ERROR; | |
3197 | UConverter *conv = 0; | |
3198 | UChar junkout[NEW_MAX_BUFFER]; /* FIX */ | |
3199 | int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ | |
3200 | const char *src; | |
3201 | const char *realSourceEnd; | |
3202 | const char *srcLimit; | |
3203 | UChar *targ; | |
3204 | UChar *end; | |
3205 | int32_t *offs; | |
3206 | int i; | |
3207 | UBool checkOffsets = TRUE; | |
3208 | char junk[9999]; | |
3209 | char offset_str[9999]; | |
3210 | UChar *p; | |
3211 | UConverterToUCallback oldAction = NULL; | |
3212 | const void* oldContext = NULL; | |
3213 | ||
3214 | int32_t realBufferSize; | |
3215 | UChar *realBufferEnd; | |
3216 | ||
3217 | ||
3218 | for(i=0;i<NEW_MAX_BUFFER;i++) | |
3219 | junkout[i] = 0xFFFE; | |
3220 | ||
3221 | for(i=0;i<NEW_MAX_BUFFER;i++) | |
3222 | junokout[i] = -1; | |
3223 | ||
3224 | setNuConvTestName(codepage, "TO"); | |
3225 | ||
3226 | log_verbose("\n========= %s\n", gNuConvTestName); | |
3227 | ||
3228 | conv = ucnv_open(codepage, &status); | |
3229 | if(U_FAILURE(status)) | |
3230 | { | |
3231 | log_data_err("Couldn't open converter %s\n",gNuConvTestName); | |
3232 | return TRUE; | |
3233 | } | |
3234 | ||
3235 | log_verbose("Converter opened..\n"); | |
3236 | ||
3237 | src = (const char *)source; | |
3238 | targ = junkout; | |
3239 | offs = junokout; | |
3240 | ||
3241 | realBufferSize = UPRV_LENGTHOF(junkout); | |
3242 | realBufferEnd = junkout + realBufferSize; | |
3243 | realSourceEnd = src + sourcelen; | |
3244 | /*----setting the callback routine----*/ | |
3245 | ucnv_setToUCallBack (conv, callback, context, &oldAction, &oldContext, &status); | |
3246 | if (U_FAILURE(status)) | |
3247 | { | |
3248 | log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); | |
3249 | } | |
3250 | /*-------------------------------------*/ | |
3251 | /*setting the subChar*/ | |
3252 | if(mySubChar != NULL){ | |
3253 | ucnv_setSubstChars(conv, mySubChar, len, &status); | |
3254 | if (U_FAILURE(status)) { | |
3255 | log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); | |
3256 | } | |
3257 | } | |
3258 | /*------------*/ | |
3259 | ||
3260 | ||
3261 | if ( gOutBufferSize != realBufferSize ) | |
3262 | checkOffsets = FALSE; | |
3263 | ||
3264 | if( gInBufferSize != NEW_MAX_BUFFER ) | |
3265 | checkOffsets = FALSE; | |
3266 | ||
3267 | do | |
3268 | { | |
3269 | end = nct_min( targ + gOutBufferSize, realBufferEnd); | |
3270 | srcLimit = nct_min(realSourceEnd, src + gInBufferSize); | |
3271 | ||
3272 | if(targ == realBufferEnd) | |
3273 | { | |
3274 | log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); | |
3275 | return FALSE; | |
3276 | } | |
3277 | log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); | |
3278 | ||
3279 | ||
3280 | ||
3281 | status = U_ZERO_ERROR; | |
3282 | ||
3283 | ucnv_toUnicode (conv, | |
3284 | &targ, | |
3285 | end, | |
3286 | (const char **)&src, | |
3287 | (const char *)srcLimit, | |
3288 | checkOffsets ? offs : NULL, | |
3289 | (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */ | |
3290 | &status); | |
3291 | } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ | |
3292 | ||
3293 | /* allow failure codes for the stop callback */ | |
3294 | if(U_FAILURE(status) && status!=expectedError) | |
3295 | { | |
3296 | log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); | |
3297 | return FALSE; | |
3298 | } | |
3299 | ||
3300 | log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", | |
3301 | sourcelen, targ-junkout); | |
3302 | if(getTestOption(VERBOSITY_OPTION)) | |
3303 | { | |
3304 | ||
3305 | junk[0] = 0; | |
3306 | offset_str[0] = 0; | |
3307 | ||
3308 | for(p = junkout;p<targ;p++) | |
3309 | { | |
3310 | sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p); | |
3311 | sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]); | |
3312 | } | |
3313 | ||
3314 | log_verbose(junk); | |
3315 | printUSeq(expect, expectlen); | |
3316 | if ( checkOffsets ) | |
3317 | { | |
3318 | log_verbose("\nOffsets:"); | |
3319 | log_verbose(offset_str); | |
3320 | } | |
3321 | log_verbose("\n"); | |
3322 | } | |
3323 | ucnv_close(conv); | |
3324 | ||
3325 | log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); | |
3326 | ||
3327 | if (checkOffsets && (expectOffsets != 0)) | |
3328 | { | |
3329 | if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))) | |
3330 | { | |
3331 | log_err("did not get the expected offsets while %s \n", gNuConvTestName); | |
3332 | log_err("Got offsets: "); | |
3333 | for(p=junkout;p<targ;p++) | |
3334 | log_err(" %2d,", junokout[p-junkout]); | |
3335 | log_err("\n"); | |
3336 | log_err("Expected offsets: "); | |
3337 | for(i=0; i<(targ-junkout); i++) | |
3338 | log_err(" %2d,", expectOffsets[i]); | |
3339 | log_err("\n"); | |
3340 | log_err("Got output: "); | |
3341 | for(i=0; i<(targ-junkout); i++) | |
3342 | log_err("0x%04x,", junkout[i]); | |
3343 | log_err("\n"); | |
3344 | log_err("From source: "); | |
3345 | for(i=0; i<(src-(const char *)source); i++) | |
3346 | log_err(" 0x%02x,", (unsigned char)source[i]); | |
3347 | log_err("\n"); | |
3348 | } | |
3349 | } | |
3350 | ||
3351 | if(!memcmp(junkout, expect, expectlen*2)) | |
3352 | { | |
3353 | log_verbose("Matches!\n"); | |
3354 | return TRUE; | |
3355 | } | |
3356 | else | |
3357 | { | |
3358 | log_err("String does not match. %s\n", gNuConvTestName); | |
3359 | log_verbose("String does not match. %s\n", gNuConvTestName); | |
3360 | log_err("Got: "); | |
3361 | printUSeqErr(junkout, expectlen); | |
3362 | log_err("Expected: "); | |
3363 | printUSeqErr(expect, expectlen); | |
3364 | log_err("\n"); | |
3365 | return FALSE; | |
3366 | } | |
3367 | } | |
3368 | ||
3369 | static void TestCallBackFailure(void) { | |
3370 | UErrorCode status = U_USELESS_COLLATOR_ERROR; | |
3371 | ucnv_cbFromUWriteBytes(NULL, NULL, -1, -1, &status); | |
3372 | if (status != U_USELESS_COLLATOR_ERROR) { | |
3373 | log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad UErrorCode\n"); | |
3374 | } | |
3375 | ucnv_cbFromUWriteUChars(NULL, NULL, NULL, -1, &status); | |
3376 | if (status != U_USELESS_COLLATOR_ERROR) { | |
3377 | log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad UErrorCode\n"); | |
3378 | } | |
3379 | ucnv_cbFromUWriteSub(NULL, -1, &status); | |
3380 | if (status != U_USELESS_COLLATOR_ERROR) { | |
3381 | log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UErrorCode\n"); | |
3382 | } | |
3383 | ucnv_cbToUWriteUChars(NULL, NULL, -1, -1, &status); | |
3384 | if (status != U_USELESS_COLLATOR_ERROR) { | |
3385 | log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad UErrorCode\n"); | |
3386 | } | |
3387 | } |