]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /******************************************************************** |
2 | * COPYRIGHT: | |
46f4442e | 3 | * Copyright (c) 1997-2008, International Business Machines Corporation and |
b75a7d8f A |
4 | * others. All Rights Reserved. |
5 | ********************************************************************/ | |
6 | /* | |
73c04bcf | 7 | ******************************************************************************** |
b75a7d8f A |
8 | * File NCCBTST.C |
9 | * | |
10 | * Modification History: | |
11 | * Name Description | |
12 | * Madhu Katragadda 7/21/1999 Testing error callback routines | |
73c04bcf | 13 | ******************************************************************************** |
b75a7d8f A |
14 | */ |
15 | #include <stdio.h> | |
16 | #include <stdlib.h> | |
17 | #include <string.h> | |
18 | #include <ctype.h> | |
19 | #include "cstring.h" | |
20 | #include "unicode/uloc.h" | |
21 | #include "unicode/ucnv.h" | |
22 | #include "unicode/ucnv_err.h" | |
23 | #include "cintltst.h" | |
24 | #include "unicode/utypes.h" | |
25 | #include "unicode/ustring.h" | |
26 | #include "nccbtst.h" | |
73c04bcf | 27 | #include "unicode/ucnv_cb.h" |
b75a7d8f A |
28 | #define NEW_MAX_BUFFER 999 |
29 | ||
30 | #define nct_min(x,y) ((x<y) ? x : y) | |
31 | #define ARRAY_LENGTH(array) (sizeof(array)/sizeof((array)[0])) | |
32 | ||
33 | static int32_t gInBufferSize = 0; | |
34 | static int32_t gOutBufferSize = 0; | |
35 | static char gNuConvTestName[1024]; | |
36 | ||
37 | static void printSeq(const uint8_t* a, int len) | |
38 | { | |
39 | int i=0; | |
40 | log_verbose("\n{"); | |
41 | while (i<len) | |
42 | log_verbose("0x%02X, ", a[i++]); | |
43 | log_verbose("}\n"); | |
44 | } | |
45 | ||
46 | static void printUSeq(const UChar* a, int len) | |
47 | { | |
48 | int i=0; | |
49 | log_verbose("{"); | |
50 | while (i<len) | |
51 | log_verbose(" 0x%04x, ", a[i++]); | |
52 | log_verbose("}\n"); | |
53 | } | |
54 | ||
55 | static void printSeqErr(const uint8_t* a, int len) | |
56 | { | |
57 | int i=0; | |
58 | fprintf(stderr, "{"); | |
59 | while (i<len) | |
60 | fprintf(stderr, " 0x%02x, ", a[i++]); | |
61 | fprintf(stderr, "}\n"); | |
62 | } | |
63 | ||
64 | static void printUSeqErr(const UChar* a, int len) | |
65 | { | |
66 | int i=0; | |
67 | fprintf(stderr, "{"); | |
68 | while (i<len) | |
69 | fprintf(stderr, "0x%04x, ", a[i++]); | |
70 | fprintf(stderr,"}\n"); | |
71 | } | |
72 | ||
73 | static void setNuConvTestName(const char *codepage, const char *direction) | |
74 | { | |
75 | sprintf(gNuConvTestName, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", | |
76 | codepage, | |
77 | direction, | |
374ca955 A |
78 | (int)gInBufferSize, |
79 | (int)gOutBufferSize); | |
b75a7d8f A |
80 | } |
81 | ||
82 | ||
73c04bcf A |
83 | static void TestCallBackFailure(void); |
84 | ||
b75a7d8f A |
85 | void addTestConvertErrorCallBack(TestNode** root); |
86 | ||
87 | void addTestConvertErrorCallBack(TestNode** root) | |
88 | { | |
89 | addTest(root, &TestSkipCallBack, "tsconv/nccbtst/TestSkipCallBack"); | |
90 | addTest(root, &TestStopCallBack, "tsconv/nccbtst/TestStopCallBack"); | |
91 | addTest(root, &TestSubCallBack, "tsconv/nccbtst/TestSubCallBack"); | |
92 | addTest(root, &TestSubWithValueCallBack, "tsconv/nccbtst/TestSubWithValueCallBack"); | |
73c04bcf A |
93 | |
94 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
b75a7d8f A |
95 | addTest(root, &TestLegalAndOtherCallBack, "tsconv/nccbtst/TestLegalAndOtherCallBack"); |
96 | addTest(root, &TestSingleByteCallBack, "tsconv/nccbtst/TestSingleByteCallBack"); | |
73c04bcf A |
97 | #endif |
98 | ||
99 | addTest(root, &TestCallBackFailure, "tsconv/nccbtst/TestCallBackFailure"); | |
b75a7d8f A |
100 | } |
101 | ||
102 | static void TestSkipCallBack() | |
103 | { | |
104 | TestSkip(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
105 | TestSkip(1,NEW_MAX_BUFFER); | |
106 | TestSkip(1,1); | |
107 | TestSkip(NEW_MAX_BUFFER, 1); | |
108 | } | |
109 | ||
110 | static void TestStopCallBack() | |
111 | { | |
112 | TestStop(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
113 | TestStop(1,NEW_MAX_BUFFER); | |
114 | TestStop(1,1); | |
115 | TestStop(NEW_MAX_BUFFER, 1); | |
116 | } | |
117 | ||
118 | static void TestSubCallBack() | |
119 | { | |
120 | TestSub(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
121 | TestSub(1,NEW_MAX_BUFFER); | |
122 | TestSub(1,1); | |
123 | TestSub(NEW_MAX_BUFFER, 1); | |
73c04bcf A |
124 | |
125 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
b75a7d8f A |
126 | TestEBCDIC_STATEFUL_Sub(1, 1); |
127 | TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER); | |
128 | TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, 1); | |
129 | TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
73c04bcf | 130 | #endif |
b75a7d8f A |
131 | } |
132 | ||
133 | static void TestSubWithValueCallBack() | |
134 | { | |
135 | TestSubWithValue(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
136 | TestSubWithValue(1,NEW_MAX_BUFFER); | |
137 | TestSubWithValue(1,1); | |
138 | TestSubWithValue(NEW_MAX_BUFFER, 1); | |
139 | } | |
140 | ||
73c04bcf | 141 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
142 | static void TestLegalAndOtherCallBack() |
143 | { | |
144 | TestLegalAndOthers(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
145 | TestLegalAndOthers(1,NEW_MAX_BUFFER); | |
146 | TestLegalAndOthers(1,1); | |
147 | TestLegalAndOthers(NEW_MAX_BUFFER, 1); | |
148 | } | |
149 | ||
150 | static void TestSingleByteCallBack() | |
151 | { | |
152 | TestSingleByte(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
153 | TestSingleByte(1,NEW_MAX_BUFFER); | |
154 | TestSingleByte(1,1); | |
155 | TestSingleByte(NEW_MAX_BUFFER, 1); | |
156 | } | |
73c04bcf | 157 | #endif |
b75a7d8f A |
158 | |
159 | static void TestSkip(int32_t inputsize, int32_t outputsize) | |
160 | { | |
161 | static const uint8_t expskipIBM_949[]= { | |
162 | 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 }; | |
163 | ||
164 | static const uint8_t expskipIBM_943[] = { | |
165 | 0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 }; | |
166 | ||
167 | static const uint8_t expskipIBM_930[] = { | |
168 | 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f }; | |
169 | ||
170 | gInBufferSize = inputsize; | |
171 | gOutBufferSize = outputsize; | |
172 | ||
173 | /*From Unicode*/ | |
174 | log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP \n"); | |
175 | ||
73c04bcf | 176 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
177 | { |
178 | static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; | |
179 | static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; | |
180 | ||
181 | static const int32_t toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 }; | |
182 | static const int32_t toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 }; | |
b75a7d8f A |
183 | |
184 | if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), | |
185 | expskipIBM_949, sizeof(expskipIBM_949), "ibm-949", | |
186 | UCNV_FROM_U_CALLBACK_SKIP, toIBM949Offsskip, NULL, 0 )) | |
187 | log_err("u-> ibm-949 with skip did not match.\n"); | |
188 | if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), | |
189 | expskipIBM_943, sizeof(expskipIBM_943), "ibm-943", | |
190 | UCNV_FROM_U_CALLBACK_SKIP, toIBM943Offsskip, NULL, 0 )) | |
191 | log_err("u-> ibm-943 with skip did not match.\n"); | |
b75a7d8f A |
192 | } |
193 | ||
194 | { | |
195 | static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 }; | |
196 | static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f }; | |
197 | static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 }; | |
198 | ||
199 | /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */ | |
200 | if(!testConvertFromUnicode(fromU, sizeof(fromU)/U_SIZEOF_UCHAR, | |
201 | fromUBytes, sizeof(fromUBytes), | |
202 | "ibm-930", | |
203 | UCNV_FROM_U_CALLBACK_SKIP, fromUOffsets, | |
204 | NULL, 0) | |
205 | ) { | |
206 | log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n"); | |
207 | } | |
208 | } | |
73c04bcf | 209 | #endif |
b75a7d8f A |
210 | |
211 | { | |
212 | static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 }; | |
213 | static const uint8_t usasciiFromUBytes[] = { 0x61, 0x31, 0x39 }; | |
214 | static const int32_t usasciiFromUOffsets[] = { 0, 3, 6 }; | |
215 | ||
216 | static const UChar latin1FromU[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 }; | |
217 | static const uint8_t latin1FromUBytes[] = { 0x61, 0xa0, 0x31, 0x39 }; | |
218 | static const int32_t latin1FromUOffsets[] = { 0, 1, 3, 6 }; | |
219 | ||
220 | /* US-ASCII */ | |
221 | if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR, | |
222 | usasciiFromUBytes, sizeof(usasciiFromUBytes), | |
223 | "US-ASCII", | |
224 | UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets, | |
225 | NULL, 0) | |
226 | ) { | |
227 | log_err("u->US-ASCII with skip did not match.\n"); | |
228 | } | |
229 | ||
73c04bcf | 230 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
231 | /* SBCS NLTC codepage 367 for US-ASCII */ |
232 | if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR, | |
233 | usasciiFromUBytes, sizeof(usasciiFromUBytes), | |
234 | "ibm-367", | |
235 | UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets, | |
236 | NULL, 0) | |
237 | ) { | |
238 | log_err("u->ibm-367 with skip did not match.\n"); | |
239 | } | |
73c04bcf | 240 | #endif |
b75a7d8f A |
241 | |
242 | /* ISO-Latin-1 */ | |
243 | if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR, | |
244 | latin1FromUBytes, sizeof(latin1FromUBytes), | |
245 | "LATIN_1", | |
246 | UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets, | |
247 | NULL, 0) | |
248 | ) { | |
249 | log_err("u->LATIN_1 with skip did not match.\n"); | |
250 | } | |
251 | ||
73c04bcf | 252 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
253 | /* windows-1252 */ |
254 | if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR, | |
255 | latin1FromUBytes, sizeof(latin1FromUBytes), | |
256 | "windows-1252", | |
257 | UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets, | |
258 | NULL, 0) | |
259 | ) { | |
260 | log_err("u->windows-1252 with skip did not match.\n"); | |
261 | } | |
262 | } | |
263 | ||
264 | { | |
265 | static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; | |
266 | static const uint8_t toIBM943[]= { 0x61, 0x61 }; | |
267 | static const int32_t offset[]= {0, 4}; | |
268 | ||
269 | /* EUC_JP*/ | |
270 | static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 }; | |
271 | static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
272 | 0x61, 0x8e, 0xe0, | |
273 | }; | |
274 | static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7}; | |
275 | ||
276 | /*EUC_TW*/ | |
277 | static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; | |
278 | static const uint8_t to_euc_tw[]={ | |
279 | 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
280 | 0x61, 0xe6, 0xca, 0x8a, | |
281 | }; | |
282 | static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,}; | |
283 | ||
284 | /*ISO-2022-JP*/ | |
285 | static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9/*unassigned*/,0x0042, }; | |
286 | static const uint8_t to_iso_2022_jp[]={ | |
287 | 0x41, | |
288 | 0x42, | |
289 | ||
290 | }; | |
291 | static const int32_t from_iso_2022_jpOffs [] ={0,2}; | |
292 | ||
b75a7d8f A |
293 | /*ISO-2022-JP*/ |
294 | UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; | |
295 | static const uint8_t to_iso_2022_jp2[]={ | |
296 | 0x41, | |
297 | 0x43, | |
298 | ||
299 | }; | |
300 | static const int32_t from_iso_2022_jpOffs2 [] ={0,2}; | |
301 | ||
302 | /*ISO-2022-cn*/ | |
303 | static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, }; | |
304 | static const uint8_t to_iso_2022_cn[]={ | |
374ca955 | 305 | 0x41, 0x42 |
b75a7d8f A |
306 | }; |
307 | static const int32_t from_iso_2022_cnOffs [] ={ | |
374ca955 | 308 | 0, 2 |
b75a7d8f A |
309 | }; |
310 | ||
311 | /*ISO-2022-CN*/ | |
312 | static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; | |
313 | static const uint8_t to_iso_2022_cn1[]={ | |
374ca955 | 314 | 0x41, 0x43 |
b75a7d8f A |
315 | |
316 | }; | |
374ca955 | 317 | static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 }; |
b75a7d8f A |
318 | |
319 | /*ISO-2022-kr*/ | |
320 | static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, }; | |
321 | static const uint8_t to_iso_2022_kr[]={ | |
322 | 0x1b, 0x24, 0x29, 0x43, | |
323 | 0x41, | |
324 | 0x0e, 0x25, 0x50, | |
325 | 0x25, 0x50, | |
326 | 0x0f, 0x42, | |
327 | }; | |
328 | static const int32_t from_iso_2022_krOffs [] ={ | |
329 | -1,-1,-1,-1, | |
330 | 0, | |
331 | 1,1,1, | |
332 | 3,3, | |
333 | 4,4 | |
334 | }; | |
335 | ||
336 | /*ISO-2022-kr*/ | |
337 | static const UChar iso_2022_kr_inputText1[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, }; | |
338 | static const uint8_t to_iso_2022_kr1[]={ | |
339 | 0x1b, 0x24, 0x29, 0x43, | |
340 | 0x41, | |
341 | 0x0e, 0x25, 0x50, | |
342 | 0x25, 0x50, | |
343 | ||
344 | }; | |
345 | static const int32_t from_iso_2022_krOffs1 [] ={ | |
346 | -1,-1,-1,-1, | |
347 | 0, | |
348 | 1,1,1, | |
349 | 3,3, | |
350 | ||
351 | }; | |
352 | /* HZ encoding */ | |
353 | static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, }; | |
354 | ||
355 | static const uint8_t to_hz[]={ | |
356 | 0x7e, 0x7d, 0x41, | |
357 | 0x7e, 0x7b, 0x26, 0x30, | |
358 | 0x26, 0x30, | |
359 | 0x7e, 0x7d, 0x42, | |
360 | ||
361 | }; | |
362 | static const int32_t from_hzOffs [] ={ | |
363 | 0,0,0, | |
364 | 1,1,1,1, | |
365 | 3,3, | |
366 | 4,4,4,4 | |
367 | }; | |
368 | ||
369 | static const UChar hz_inputText1[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, }; | |
370 | ||
371 | static const uint8_t to_hz1[]={ | |
372 | 0x7e, 0x7d, 0x41, | |
373 | 0x7e, 0x7b, 0x26, 0x30, | |
374 | 0x26, 0x30, | |
375 | ||
376 | ||
377 | }; | |
378 | static const int32_t from_hzOffs1 [] ={ | |
379 | 0,0,0, | |
380 | 1,1,1,1, | |
381 | 3,3, | |
382 | ||
383 | }; | |
384 | ||
73c04bcf | 385 | #endif |
b75a7d8f A |
386 | |
387 | static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; | |
388 | ||
389 | static const uint8_t to_SCSU[]={ | |
390 | 0x41, | |
391 | 0x42 | |
392 | ||
393 | ||
394 | }; | |
395 | static const int32_t from_SCSUOffs [] ={ | |
396 | 0, | |
397 | 2, | |
398 | ||
399 | }; | |
73c04bcf A |
400 | |
401 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
b75a7d8f A |
402 | /* ISCII */ |
403 | static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, }; | |
404 | static const uint8_t to_iscii[]={ | |
405 | 0x41, | |
406 | 0x42, | |
407 | }; | |
408 | static const int32_t from_isciiOffs [] ={ | |
409 | 0,2, | |
410 | ||
411 | }; | |
412 | /*ISCII*/ | |
413 | static const UChar iscii_inputText1[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; | |
414 | static const uint8_t to_iscii1[]={ | |
415 | 0x44, | |
416 | 0x43, | |
417 | ||
418 | }; | |
419 | static const int32_t from_isciiOffs1 [] ={0,2}; | |
420 | ||
421 | if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), | |
422 | toIBM943, sizeof(toIBM943), "ibm-943", | |
423 | UCNV_FROM_U_CALLBACK_SKIP, offset, NULL, 0 )) | |
424 | log_err("u-> ibm-943 with skip did not match.\n"); | |
425 | ||
426 | if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]), | |
427 | to_euc_jp, sizeof(to_euc_jp), "euc-jp", | |
428 | UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 )) | |
429 | log_err("u-> euc-jp with skip did not match.\n"); | |
430 | ||
431 | if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]), | |
432 | to_euc_tw, sizeof(to_euc_tw), "euc-tw", | |
433 | UCNV_FROM_U_CALLBACK_SKIP, from_euc_twOffs, NULL, 0 )) | |
434 | log_err("u-> euc-tw with skip did not match.\n"); | |
435 | ||
436 | /*iso_2022_jp*/ | |
437 | if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]), | |
438 | to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", | |
439 | UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 )) | |
440 | log_err("u-> iso-2022-jp with skip did not match.\n"); | |
441 | ||
b75a7d8f A |
442 | /* with context */ |
443 | if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]), | |
444 | to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp", | |
445 | UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs2, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) | |
446 | log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); | |
447 | ||
448 | /*iso_2022_cn*/ | |
449 | if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]), | |
450 | to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", | |
451 | UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs, NULL, 0 )) | |
452 | log_err("u-> iso-2022-cn with skip did not match.\n"); | |
453 | /*with context*/ | |
454 | if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1, sizeof(iso_2022_cn_inputText1)/sizeof(iso_2022_cn_inputText1[0]), | |
455 | to_iso_2022_cn1, sizeof(to_iso_2022_cn1), "iso-2022-cn", | |
456 | UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) | |
457 | log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); | |
458 | ||
459 | /*iso_2022_kr*/ | |
460 | if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]), | |
461 | to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", | |
462 | UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs, NULL, 0 )) | |
463 | log_err("u-> iso-2022-kr with skip did not match.\n"); | |
464 | /*with context*/ | |
465 | if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1, sizeof(iso_2022_kr_inputText1)/sizeof(iso_2022_kr_inputText1[0]), | |
466 | to_iso_2022_kr1, sizeof(to_iso_2022_kr1), "iso-2022-kr", | |
467 | UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) | |
468 | log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); | |
469 | ||
470 | /*hz*/ | |
471 | if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]), | |
472 | to_hz, sizeof(to_hz), "HZ", | |
473 | UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs, NULL, 0 )) | |
474 | log_err("u-> HZ with skip did not match.\n"); | |
475 | /*with context*/ | |
476 | if(!testConvertFromUnicodeWithContext(hz_inputText1, sizeof(hz_inputText1)/sizeof(hz_inputText1[0]), | |
477 | to_hz1, sizeof(to_hz1), "hz", | |
478 | UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) | |
73c04bcf A |
479 | log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); |
480 | #endif | |
b75a7d8f A |
481 | |
482 | /*SCSU*/ | |
483 | if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), | |
484 | to_SCSU, sizeof(to_SCSU), "SCSU", | |
485 | UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 )) | |
486 | log_err("u-> SCSU with skip did not match.\n"); | |
487 | ||
73c04bcf | 488 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
489 | /*ISCII*/ |
490 | if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]), | |
491 | to_iscii, sizeof(to_iscii), "ISCII,version=0", | |
492 | UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs, NULL, 0 )) | |
493 | log_err("u-> iscii with skip did not match.\n"); | |
494 | /*with context*/ | |
495 | if(!testConvertFromUnicodeWithContext(iscii_inputText1, sizeof(iscii_inputText1)/sizeof(iscii_inputText1[0]), | |
496 | to_iscii1, sizeof(to_iscii1), "ISCII,version=0", | |
497 | UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) | |
498 | log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); | |
73c04bcf | 499 | #endif |
b75a7d8f A |
500 | } |
501 | ||
502 | log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n"); | |
503 | { | |
504 | static const uint8_t sampleText[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */ | |
505 | 0xFB, 0xEE, 0x28, /* from source offset 0 */ | |
506 | 0x24, 0x1E, 0x52, | |
507 | 0xB2, | |
508 | 0x20, | |
509 | 0xB3, | |
510 | 0xB1, | |
511 | 0x0D, | |
512 | 0x0A, | |
513 | ||
514 | 0x20, /* from 8 */ | |
515 | 0x00, | |
516 | 0xD0, 0x6C, | |
517 | 0xB6, | |
518 | 0xD8, 0xA5, | |
519 | 0x20, | |
520 | 0x68, | |
521 | 0x59, | |
522 | ||
523 | 0xF9, 0x28, /* from 16 */ | |
524 | 0x6D, | |
525 | 0x20, | |
526 | 0x73, | |
527 | 0xE0, 0x2D, | |
528 | 0xDE, 0x43, | |
529 | 0xD0, 0x33, | |
530 | 0x20, | |
531 | ||
532 | 0xFA, 0x83, /* from 24 */ | |
533 | 0x25, 0x01, | |
534 | 0xFB, 0x16, 0x87, | |
535 | 0x4B, 0x16, | |
536 | 0x20, | |
537 | 0xE6, 0xBD, | |
538 | 0xEB, 0x5B, | |
539 | 0x4B, 0xCC, | |
540 | ||
541 | 0xF9, 0xA2, /* from 32 */ | |
542 | 0xFC, 0x10, 0x3E, | |
543 | 0xFE, 0x16, 0x3A, 0x8C, | |
544 | 0x20, | |
545 | 0xFC, 0x03, 0xAC, | |
546 | ||
547 | 0x01, /* from 41 */ | |
548 | 0xDE, 0x83, | |
549 | 0x20, | |
550 | 0x09 | |
551 | }; | |
552 | static const UChar expected[]={ | |
553 | 0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */ | |
554 | 0x0063, 0x0061, 0x000D, 0x000A, | |
555 | ||
556 | 0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */ | |
557 | 0x0930, 0x0020, 0x0918, 0x0909, | |
558 | ||
559 | 0x3086, 0x304D, 0x0020, 0x3053, /* 16 */ | |
560 | 0x4000, 0x4E00, 0x7777, 0x0020, | |
561 | ||
562 | 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */ | |
563 | 0x0020, 0xD7A3, 0xDC00, 0xD800, | |
564 | ||
565 | 0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */ | |
566 | 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, | |
567 | ||
568 | 0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */ | |
569 | 0x0009 | |
570 | }; | |
571 | static const int32_t offsets[]={ | |
572 | 0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7, | |
573 | 8, 9, 10, 10, 11, 12, 12, 13, 14, 15, | |
574 | 16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23, | |
575 | 24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31, | |
576 | 32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39, | |
577 | 41, 42, 42, 43, 44 | |
578 | }; | |
579 | ||
580 | /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */ | |
581 | if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), | |
582 | sampleText, sizeof(sampleText), | |
583 | "BOCU-1", | |
584 | UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0) | |
585 | ) { | |
586 | log_err("u->BOCU-1 with skip did not match.\n"); | |
587 | } | |
588 | } | |
589 | ||
590 | log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n"); | |
591 | { | |
592 | const uint8_t sampleText[]={ | |
593 | 0x61, /* 'a' */ | |
594 | 0xc4, 0xb5, /* U+0135 */ | |
595 | 0xed, 0x80, 0xa0, /* Hangul U+d020 */ | |
596 | 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */ | |
597 | 0xee, 0x80, 0x80, /* PUA U+e000 */ | |
598 | 0xed, 0xb0, 0x81, /* unpaired trail surrogate U+dc01 */ | |
599 | 0x62, /* 'b' */ | |
600 | 0xed, 0xa0, 0x81, /* unpaired lead surrogate U+d801 */ | |
601 | 0xd0, 0x80 /* U+0400 */ | |
602 | }; | |
603 | UChar expected[]={ | |
604 | 0x0061, | |
605 | 0x0135, | |
606 | 0xd020, | |
607 | 0xd801, 0xdc01, | |
608 | 0xe000, | |
609 | 0xdc01, | |
610 | 0x0062, | |
611 | 0xd801, | |
612 | 0x0400 | |
613 | }; | |
614 | int32_t offsets[]={ | |
615 | 0, | |
616 | 1, 1, | |
617 | 2, 2, 2, | |
618 | 3, 3, 3, 4, 4, 4, | |
619 | 5, 5, 5, | |
620 | 6, 6, 6, | |
621 | 7, | |
622 | 8, 8, 8, | |
623 | 9, 9 | |
624 | }; | |
625 | ||
626 | /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */ | |
627 | ||
628 | /* without offsets */ | |
629 | if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), | |
630 | sampleText, sizeof(sampleText), | |
631 | "CESU-8", | |
632 | UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, 0) | |
633 | ) { | |
634 | log_err("u->CESU-8 with skip did not match.\n"); | |
635 | } | |
636 | ||
637 | /* with offsets */ | |
638 | if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), | |
639 | sampleText, sizeof(sampleText), | |
640 | "CESU-8", | |
641 | UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0) | |
642 | ) { | |
643 | log_err("u->CESU-8 with skip did not match.\n"); | |
644 | } | |
645 | } | |
646 | ||
647 | /*to Unicode*/ | |
648 | log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP \n"); | |
649 | ||
73c04bcf | 650 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
651 | { |
652 | ||
653 | static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD700 }; | |
654 | static const UChar IBM_943skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 }; | |
655 | static const UChar IBM_930skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 }; | |
656 | ||
657 | static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5}; | |
658 | static const int32_t fromIBM943Offs [] = { 0, 2, 4}; | |
659 | static const int32_t fromIBM930Offs [] = { 1, 3, 5}; | |
660 | ||
661 | if(!testConvertToUnicode(expskipIBM_949, sizeof(expskipIBM_949), | |
662 | IBM_949skiptoUnicode, sizeof(IBM_949skiptoUnicode)/sizeof(IBM_949skiptoUnicode),"ibm-949", | |
663 | UCNV_TO_U_CALLBACK_SKIP, fromIBM949Offs, NULL, 0 )) | |
664 | log_err("ibm-949->u with skip did not match.\n"); | |
665 | if(!testConvertToUnicode(expskipIBM_943, sizeof(expskipIBM_943), | |
666 | IBM_943skiptoUnicode, sizeof(IBM_943skiptoUnicode)/sizeof(IBM_943skiptoUnicode[0]),"ibm-943", | |
667 | UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offs, NULL, 0 )) | |
668 | log_err("ibm-943->u with skip did not match.\n"); | |
669 | ||
670 | ||
671 | if(!testConvertToUnicode(expskipIBM_930, sizeof(expskipIBM_930), | |
672 | IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930", | |
673 | UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0 )) | |
674 | log_err("ibm-930->u with skip did not match.\n"); | |
675 | ||
676 | ||
677 | if(!testConvertToUnicodeWithContext(expskipIBM_930, sizeof(expskipIBM_930), | |
678 | IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930", | |
679 | UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) | |
680 | log_err("ibm-930->u with skip did not match.\n"); | |
681 | } | |
73c04bcf | 682 | #endif |
b75a7d8f A |
683 | |
684 | { | |
685 | static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 }; | |
686 | static const UChar usasciiToU[] = { 0x61, 0x31 }; | |
687 | static const int32_t usasciiToUOffsets[] = { 0, 2 }; | |
688 | ||
689 | static const uint8_t latin1ToUBytes[] = { 0x61, 0xa0, 0x31 }; | |
690 | static const UChar latin1ToU[] = { 0x61, 0xa0, 0x31 }; | |
691 | static const int32_t latin1ToUOffsets[] = { 0, 1, 2 }; | |
692 | ||
693 | /* US-ASCII */ | |
694 | if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes), | |
695 | usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR, | |
696 | "US-ASCII", | |
697 | UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets, | |
698 | NULL, 0) | |
699 | ) { | |
700 | log_err("US-ASCII->u with skip did not match.\n"); | |
701 | } | |
702 | ||
73c04bcf | 703 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
704 | /* SBCS NLTC codepage 367 for US-ASCII */ |
705 | if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes), | |
706 | usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR, | |
707 | "ibm-367", | |
708 | UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets, | |
709 | NULL, 0) | |
710 | ) { | |
711 | log_err("ibm-367->u with skip did not match.\n"); | |
712 | } | |
73c04bcf | 713 | #endif |
b75a7d8f A |
714 | |
715 | /* ISO-Latin-1 */ | |
716 | if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes), | |
717 | latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR, | |
718 | "LATIN_1", | |
719 | UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets, | |
720 | NULL, 0) | |
721 | ) { | |
722 | log_err("LATIN_1->u with skip did not match.\n"); | |
723 | } | |
724 | ||
73c04bcf | 725 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
726 | /* windows-1252 */ |
727 | if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes), | |
728 | latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR, | |
729 | "windows-1252", | |
730 | UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets, | |
731 | NULL, 0) | |
732 | ) { | |
733 | log_err("windows-1252->u with skip did not match.\n"); | |
734 | } | |
73c04bcf | 735 | #endif |
b75a7d8f A |
736 | } |
737 | ||
73c04bcf | 738 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
739 | { |
740 | static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ | |
741 | 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 | |
742 | }; | |
743 | static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0x03b4 | |
744 | }; | |
745 | static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 5}; | |
746 | ||
747 | ||
748 | /* euc-jp*/ | |
749 | static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
750 | 0x8f, 0xda, 0xa1, /*unassigned*/ | |
751 | 0x8e, 0xe0, | |
752 | }; | |
753 | static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2}; | |
754 | static const int32_t from_euc_jpOffs [] ={ 0, 1, 3, 9}; | |
755 | ||
756 | /*EUC_TW*/ | |
757 | static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
758 | 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ | |
759 | 0xe6, 0xca, 0x8a, | |
760 | }; | |
761 | static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, }; | |
762 | static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 11, 13}; | |
763 | /*iso-2022-jp*/ | |
764 | static const uint8_t sampleTxt_iso_2022_jp[]={ | |
765 | 0x41, | |
766 | 0x1b, 0x24, 0x42, 0x2A, 0x44, /*unassigned*/ | |
767 | 0x1b, 0x28, 0x42, 0x42, | |
768 | ||
769 | }; | |
770 | static const UChar iso_2022_jptoUnicode[]={ 0x41,0x42 }; | |
771 | static const int32_t from_iso_2022_jpOffs [] ={ 0,9 }; | |
772 | ||
773 | /*iso-2022-cn*/ | |
774 | static const uint8_t sampleTxt_iso_2022_cn[]={ | |
775 | 0x0f, 0x41, 0x44, | |
776 | 0x1B, 0x24, 0x29, 0x47, | |
777 | 0x0E, 0x40, 0x6f, /*unassigned*/ | |
778 | 0x0f, 0x42, | |
779 | ||
780 | }; | |
781 | ||
782 | static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x42 }; | |
783 | static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 11 }; | |
784 | ||
785 | /*iso-2022-kr*/ | |
786 | static const uint8_t sampleTxt_iso_2022_kr[]={ | |
787 | 0x1b, 0x24, 0x29, 0x43, | |
788 | 0x41, | |
789 | 0x0E, 0x7f, 0x1E, | |
790 | 0x0e, 0x25, 0x50, | |
791 | 0x0f, 0x51, | |
792 | 0x42, 0x43, | |
793 | ||
794 | }; | |
795 | static const UChar iso_2022_krtoUnicode[]={ 0x41,0x03A0,0x51, 0x42,0x43}; | |
796 | static const int32_t from_iso_2022_krOffs [] ={ 4, 9, 12, 13 , 14 }; | |
797 | ||
798 | /*hz*/ | |
799 | static const uint8_t sampleTxt_hz[]={ | |
800 | 0x41, | |
801 | 0x7e, 0x7b, 0x26, 0x30, | |
802 | 0x7f, 0x1E, /*unassigned*/ | |
803 | 0x26, 0x30, | |
804 | 0x7e, 0x7d, 0x42, | |
805 | 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/ | |
806 | 0x7e, 0x7d, 0x42, | |
807 | }; | |
808 | static const UChar hztoUnicode[]={ | |
809 | 0x41, | |
810 | 0x03a0, | |
811 | 0x03A0, | |
812 | 0x42, | |
813 | 0x42,}; | |
814 | ||
815 | static const int32_t from_hzOffs [] ={0,3,7,11,18, }; | |
816 | ||
817 | /*ISCII*/ | |
818 | static const uint8_t sampleTxt_iscii[]={ | |
819 | 0x41, | |
820 | 0xa1, | |
821 | 0xEB, /*unassigned*/ | |
822 | 0x26, | |
823 | 0x30, | |
824 | 0xa2, | |
825 | 0xEC, /*unassigned*/ | |
826 | 0x42, | |
827 | }; | |
828 | static const UChar isciitoUnicode[]={ | |
829 | 0x41, | |
830 | 0x0901, | |
831 | 0x26, | |
832 | 0x30, | |
833 | 0x0902, | |
834 | 0x42, | |
835 | }; | |
836 | ||
837 | static const int32_t from_isciiOffs [] ={0,1,3,4,5,7 }; | |
838 | ||
839 | /*LMBCS*/ | |
840 | static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50, | |
841 | 0x12, 0x92, 0xa0, /*unassigned*/ | |
842 | 0x12, 0x92, 0xA1, | |
843 | }; | |
844 | static const UChar LMBCSToUnicode[]={ 0x4e2e, 0xe5c4}; | |
845 | static const int32_t fromLMBCS[] = {0, 6}; | |
846 | ||
847 | if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL), | |
848 | EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", | |
849 | UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) | |
850 | log_err("EBCIDIC_STATEFUL->u with skip did not match.\n"); | |
851 | ||
852 | if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL), | |
853 | EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", | |
854 | UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) | |
855 | log_err("EBCIDIC_STATEFUL->u with skip did not match.\n"); | |
856 | ||
857 | if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), | |
858 | euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp", | |
859 | UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0)) | |
860 | log_err("euc-jp->u with skip did not match.\n"); | |
861 | ||
862 | ||
863 | ||
864 | if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), | |
865 | euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw", | |
866 | UCNV_TO_U_CALLBACK_SKIP, from_euc_twOffs , NULL, 0)) | |
867 | log_err("euc-tw->u with skip did not match.\n"); | |
868 | ||
869 | ||
870 | if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), | |
871 | iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", | |
872 | UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_jpOffs , NULL, 0)) | |
873 | log_err("iso-2022-jp->u with skip did not match.\n"); | |
874 | ||
875 | if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn), | |
876 | iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn", | |
877 | UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_cnOffs , NULL, 0)) | |
878 | log_err("iso-2022-cn->u with skip did not match.\n"); | |
879 | ||
880 | if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr), | |
881 | iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr", | |
882 | UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_krOffs , NULL, 0)) | |
883 | log_err("iso-2022-kr->u with skip did not match.\n"); | |
884 | ||
885 | if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz), | |
886 | hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ", | |
887 | UCNV_TO_U_CALLBACK_SKIP, from_hzOffs , NULL, 0)) | |
888 | log_err("HZ->u with skip did not match.\n"); | |
889 | ||
890 | if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii), | |
891 | isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0", | |
892 | UCNV_TO_U_CALLBACK_SKIP, from_isciiOffs , NULL, 0)) | |
893 | log_err("iscii->u with skip did not match.\n"); | |
894 | ||
895 | if(!testConvertToUnicode(sampleTxtLMBCS, sizeof(sampleTxtLMBCS), | |
896 | LMBCSToUnicode, sizeof(LMBCSToUnicode)/sizeof(LMBCSToUnicode[0]),"LMBCS-1", | |
897 | UCNV_TO_U_CALLBACK_SKIP, fromLMBCS , NULL, 0)) | |
898 | log_err("LMBCS->u with skip did not match.\n"); | |
899 | ||
900 | } | |
73c04bcf A |
901 | #endif |
902 | ||
b75a7d8f A |
903 | log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n"); |
904 | { | |
905 | const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, | |
906 | 0xe0, 0x80, 0x61,}; | |
907 | UChar expected1[] = { 0x0031, 0x4e8c, 0x0061}; | |
908 | int32_t offsets1[] = { 0x0000, 0x0001, 0x0006}; | |
909 | ||
910 | if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), | |
911 | expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", | |
912 | UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 )) | |
913 | log_err("utf8->u with skip did not match.\n");; | |
914 | } | |
915 | ||
916 | log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n"); | |
917 | { | |
918 | const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,}; | |
919 | UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffe,0xfffe}; | |
920 | int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5}; | |
921 | ||
922 | if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), | |
923 | expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", | |
924 | UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 )) | |
925 | log_err("scsu->u with skip did not match.\n"); | |
926 | } | |
927 | ||
928 | log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n"); | |
929 | { | |
930 | const uint8_t sampleText[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */ | |
931 | 0xFB, 0xEE, 0x28, /* single-code point sequence at offset 0 */ | |
932 | 0x24, 0x1E, 0x52, /* 3 */ | |
933 | 0xB2, /* 6 */ | |
934 | 0x20, /* 7 */ | |
935 | 0x40, 0x07, /* 8 - wrong trail byte */ | |
936 | 0xB3, /* 10 */ | |
937 | 0xB1, /* 11 */ | |
938 | 0xD0, 0x20, /* 12 - wrong trail byte */ | |
939 | 0x0D, /* 14 */ | |
940 | 0x0A, /* 15 */ | |
941 | 0x20, /* 16 */ | |
942 | 0x00, /* 17 */ | |
943 | 0xD0, 0x6C, /* 18 */ | |
944 | 0xB6, /* 20 */ | |
945 | 0xD8, 0xA5, /* 21 */ | |
946 | 0x20, /* 23 */ | |
947 | 0x68, /* 24 */ | |
948 | 0x59, /* 25 */ | |
949 | 0xF9, 0x28, /* 26 */ | |
950 | 0x6D, /* 28 */ | |
951 | 0x20, /* 29 */ | |
952 | 0x73, /* 30 */ | |
953 | 0xE0, 0x2D, /* 31 */ | |
954 | 0xDE, 0x43, /* 33 */ | |
955 | 0xD0, 0x33, /* 35 */ | |
956 | 0x20, /* 37 */ | |
957 | 0xFA, 0x83, /* 38 */ | |
958 | 0x25, 0x01, /* 40 */ | |
959 | 0xFB, 0x16, 0x87, /* 42 */ | |
960 | 0x4B, 0x16, /* 45 */ | |
961 | 0x20, /* 47 */ | |
962 | 0xE6, 0xBD, /* 48 */ | |
963 | 0xEB, 0x5B, /* 50 */ | |
964 | 0x4B, 0xCC, /* 52 */ | |
965 | 0xF9, 0xA2, /* 54 */ | |
966 | 0xFC, 0x10, 0x3E, /* 56 */ | |
967 | 0xFE, 0x16, 0x3A, 0x8C, /* 59 */ | |
968 | 0x20, /* 63 */ | |
969 | 0xFC, 0x03, 0xAC, /* 64 */ | |
970 | 0xFF, /* 67 - FF just resets the state without encoding anything */ | |
971 | 0x01, /* 68 */ | |
972 | 0xDE, 0x83, /* 69 */ | |
973 | 0x20, /* 71 */ | |
974 | 0x09 /* 72 */ | |
975 | }; | |
976 | UChar expected[]={ | |
977 | 0xFEFF, 0x0061, 0x0062, 0x0020, | |
978 | 0x0063, 0x0061, 0x000D, 0x000A, | |
979 | 0x0020, 0x0000, 0x00DF, 0x00E6, | |
980 | 0x0930, 0x0020, 0x0918, 0x0909, | |
981 | 0x3086, 0x304D, 0x0020, 0x3053, | |
982 | 0x4000, 0x4E00, 0x7777, 0x0020, | |
983 | 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, | |
984 | 0x0020, 0xD7A3, 0xDC00, 0xD800, | |
985 | 0xD800, 0xDC00, 0xD845, 0xDDDD, | |
986 | 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, | |
987 | 0xDFFF, 0x0001, 0x0E40, 0x0020, | |
988 | 0x0009 | |
989 | }; | |
990 | int32_t offsets[]={ | |
991 | 0, 3, 6, 7, /* skip 8, */ | |
992 | 10, 11, /* skip 12, */ | |
993 | 14, 15, 16, 17, 18, | |
994 | 20, 21, 23, 24, 25, 26, 28, 29, | |
995 | 30, 31, 33, 35, 37, 38, | |
996 | 40, 42, 45, 47, 48, | |
997 | 50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59, | |
998 | 63, 64, /* trail */ 64, /* reset only 67, */ | |
999 | 68, 69, | |
1000 | 71, 72 | |
1001 | }; | |
1002 | ||
1003 | if(!testConvertToUnicode(sampleText, sizeof(sampleText), | |
1004 | expected, ARRAY_LENGTH(expected), "BOCU-1", | |
1005 | UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0) | |
1006 | ) { | |
1007 | log_err("BOCU-1->u with skip did not match.\n"); | |
1008 | } | |
1009 | } | |
1010 | ||
1011 | log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n"); | |
1012 | { | |
1013 | const uint8_t sampleText[]={ | |
1014 | 0x61, /* 0 'a' */ | |
1015 | 0xc0, 0x80, /* 1 non-shortest form */ | |
1016 | 0xc4, 0xb5, /* 3 U+0135 */ | |
1017 | 0xed, 0x80, 0xa0, /* 5 Hangul U+d020 */ | |
1018 | 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8 surrogate pair for U+10401 */ | |
1019 | 0xee, 0x80, 0x80, /* 14 PUA U+e000 */ | |
1020 | 0xed, 0xb0, 0x81, /* 17 unpaired trail surrogate U+dc01 */ | |
1021 | 0xf0, 0x90, 0x80, 0x80, /* 20 illegal 4-byte form for U+10000 */ | |
1022 | 0x62, /* 24 'b' */ | |
1023 | 0xed, 0xa0, 0x81, /* 25 unpaired lead surrogate U+d801 */ | |
1024 | 0xed, 0xa0, /* 28 incomplete sequence */ | |
1025 | 0xd0, 0x80 /* 30 U+0400 */ | |
1026 | }; | |
1027 | UChar expected[]={ | |
1028 | 0x0061, | |
1029 | /* skip */ | |
1030 | 0x0135, | |
1031 | 0xd020, | |
1032 | 0xd801, 0xdc01, | |
1033 | 0xe000, | |
1034 | 0xdc01, | |
1035 | /* skip */ | |
1036 | 0x0062, | |
1037 | 0xd801, | |
1038 | 0x0400 | |
1039 | }; | |
1040 | int32_t offsets[]={ | |
1041 | 0, | |
1042 | /* skip 1, */ | |
1043 | 3, | |
1044 | 5, | |
1045 | 8, 11, | |
1046 | 14, | |
1047 | 17, | |
1048 | /* skip 20, 20, */ | |
1049 | 24, | |
1050 | 25, | |
1051 | /* skip 28 */ | |
1052 | 30 | |
1053 | }; | |
1054 | ||
1055 | /* without offsets */ | |
1056 | if(!testConvertToUnicode(sampleText, sizeof(sampleText), | |
1057 | expected, ARRAY_LENGTH(expected), "CESU-8", | |
1058 | UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, 0) | |
1059 | ) { | |
1060 | log_err("CESU-8->u with skip did not match.\n"); | |
1061 | } | |
1062 | ||
1063 | /* with offsets */ | |
1064 | if(!testConvertToUnicode(sampleText, sizeof(sampleText), | |
1065 | expected, ARRAY_LENGTH(expected), "CESU-8", | |
1066 | UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0) | |
1067 | ) { | |
1068 | log_err("CESU-8->u with skip did not match.\n"); | |
1069 | } | |
1070 | } | |
1071 | } | |
1072 | ||
1073 | static void TestStop(int32_t inputsize, int32_t outputsize) | |
1074 | { | |
1075 | static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; | |
1076 | static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; | |
1077 | ||
1078 | static const uint8_t expstopIBM_949[]= { | |
1079 | 0x00, 0xb0, 0xa1, 0xb0, 0xa2}; | |
1080 | ||
1081 | static const uint8_t expstopIBM_943[] = { | |
1082 | 0x9f, 0xaf, 0x9f, 0xb1}; | |
1083 | ||
1084 | static const uint8_t expstopIBM_930[] = { | |
1085 | 0x0e, 0x5d, 0x5f, 0x5d, 0x63}; | |
1086 | ||
1087 | static const UChar IBM_949stoptoUnicode[]= {0x0000, 0xAC00, 0xAC01}; | |
1088 | static const UChar IBM_943stoptoUnicode[]= { 0x6D63, 0x6D64}; | |
1089 | static const UChar IBM_930stoptoUnicode[]= { 0x6D63, 0x6D64}; | |
1090 | ||
1091 | ||
1092 | static const int32_t toIBM949Offsstop [] = { 0, 1, 1, 2, 2}; | |
1093 | static const int32_t toIBM943Offsstop [] = { 0, 0, 1, 1}; | |
1094 | static const int32_t toIBM930Offsstop [] = { 0, 0, 0, 1, 1}; | |
1095 | ||
1096 | static const int32_t fromIBM949Offs [] = { 0, 1, 3}; | |
1097 | static const int32_t fromIBM943Offs [] = { 0, 2}; | |
1098 | static const int32_t fromIBM930Offs [] = { 1, 3}; | |
1099 | ||
1100 | gInBufferSize = inputsize; | |
1101 | gOutBufferSize = outputsize; | |
73c04bcf | 1102 | |
b75a7d8f | 1103 | /*From Unicode*/ |
73c04bcf A |
1104 | |
1105 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
b75a7d8f A |
1106 | if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
1107 | expstopIBM_949, sizeof(expstopIBM_949), "ibm-949", | |
1108 | UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 )) | |
1109 | log_err("u-> ibm-949 with stop did not match.\n"); | |
1110 | if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), | |
1111 | expstopIBM_943, sizeof(expstopIBM_943), "ibm-943", | |
1112 | UCNV_FROM_U_CALLBACK_STOP, toIBM943Offsstop , NULL, 0)) | |
1113 | log_err("u-> ibm-943 with stop did not match.\n"); | |
1114 | if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), | |
1115 | expstopIBM_930, sizeof(expstopIBM_930), "ibm-930", | |
1116 | UCNV_FROM_U_CALLBACK_STOP, toIBM930Offsstop, NULL, 0 )) | |
1117 | log_err("u-> ibm-930 with stop did not match.\n"); | |
1118 | ||
1119 | log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP \n"); | |
1120 | { | |
1121 | static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; | |
1122 | static const uint8_t toIBM943[]= { 0x61,}; | |
1123 | static const int32_t offset[]= {0,} ; | |
1124 | ||
1125 | /*EUC_JP*/ | |
1126 | static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 }; | |
1127 | static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,}; | |
1128 | static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,}; | |
1129 | ||
1130 | /*EUC_TW*/ | |
1131 | static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; | |
1132 | static const uint8_t to_euc_tw[]={ | |
1133 | 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,}; | |
1134 | static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,}; | |
1135 | ||
1136 | /*ISO-2022-JP*/ | |
1137 | static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9, 0x0042, }; | |
1138 | static const uint8_t to_iso_2022_jp[]={ | |
1139 | 0x41, | |
1140 | ||
1141 | }; | |
1142 | static const int32_t from_iso_2022_jpOffs [] ={0,}; | |
1143 | ||
1144 | /*ISO-2022-cn*/ | |
1145 | static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, }; | |
1146 | static const uint8_t to_iso_2022_cn[]={ | |
374ca955 | 1147 | 0x41, |
b75a7d8f A |
1148 | |
1149 | }; | |
1150 | static const int32_t from_iso_2022_cnOffs [] ={ | |
1151 | 0,0, | |
1152 | 2,2, | |
1153 | }; | |
1154 | ||
1155 | /*ISO-2022-kr*/ | |
1156 | static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, }; | |
1157 | static const uint8_t to_iso_2022_kr[]={ | |
1158 | 0x1b, 0x24, 0x29, 0x43, | |
1159 | 0x41, | |
1160 | 0x0e, 0x25, 0x50, | |
1161 | }; | |
1162 | static const int32_t from_iso_2022_krOffs [] ={ | |
1163 | -1,-1,-1,-1, | |
1164 | 0, | |
1165 | 1,1,1, | |
1166 | }; | |
1167 | ||
1168 | /* HZ encoding */ | |
1169 | static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, }; | |
1170 | ||
1171 | static const uint8_t to_hz[]={ | |
1172 | 0x7e, 0x7d, 0x41, | |
1173 | 0x7e, 0x7b, 0x26, 0x30, | |
1174 | ||
1175 | }; | |
1176 | static const int32_t from_hzOffs [] ={ | |
1177 | 0, 0,0, | |
1178 | 1,1,1,1, | |
1179 | }; | |
1180 | ||
1181 | /*ISCII*/ | |
1182 | static const UChar iscii_inputText[]={ 0x0041, 0x3712, 0x0042, }; | |
1183 | static const uint8_t to_iscii[]={ | |
1184 | 0x41, | |
1185 | }; | |
1186 | static const int32_t from_isciiOffs [] ={ | |
1187 | 0, | |
1188 | }; | |
1189 | ||
1190 | if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), | |
1191 | toIBM943, sizeof(toIBM943), "ibm-943", | |
1192 | UCNV_FROM_U_CALLBACK_STOP, offset, NULL, 0 )) | |
1193 | log_err("u-> ibm-943 with stop did not match.\n"); | |
1194 | ||
1195 | if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]), | |
1196 | to_euc_jp, sizeof(to_euc_jp), "euc-jp", | |
1197 | UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 )) | |
1198 | log_err("u-> euc-jp with stop did not match.\n"); | |
1199 | ||
1200 | if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]), | |
1201 | to_euc_tw, sizeof(to_euc_tw), "euc-tw", | |
1202 | UCNV_FROM_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 )) | |
1203 | log_err("u-> euc-tw with stop did not match.\n"); | |
1204 | ||
1205 | if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]), | |
1206 | to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", | |
1207 | UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 )) | |
1208 | log_err("u-> iso-2022-jp with stop did not match.\n"); | |
1209 | ||
1210 | if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]), | |
1211 | to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", | |
1212 | UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 )) | |
1213 | log_err("u-> iso-2022-jp with stop did not match.\n"); | |
1214 | ||
1215 | if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]), | |
1216 | to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", | |
1217 | UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_cnOffs, NULL, 0 )) | |
1218 | log_err("u-> iso-2022-cn with stop did not match.\n"); | |
1219 | ||
1220 | if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]), | |
1221 | to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", | |
1222 | UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_krOffs, NULL, 0 )) | |
1223 | log_err("u-> iso-2022-kr with stop did not match.\n"); | |
1224 | ||
1225 | if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]), | |
1226 | to_hz, sizeof(to_hz), "HZ", | |
1227 | UCNV_FROM_U_CALLBACK_STOP, from_hzOffs, NULL, 0 )) | |
1228 | log_err("u-> HZ with stop did not match.\n");\ | |
1229 | ||
1230 | if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]), | |
1231 | to_iscii, sizeof(to_iscii), "ISCII,version=0", | |
1232 | UCNV_FROM_U_CALLBACK_STOP, from_isciiOffs, NULL, 0 )) | |
1233 | log_err("u-> iscii with stop did not match.\n"); | |
1234 | ||
1235 | ||
1236 | } | |
73c04bcf A |
1237 | #endif |
1238 | ||
b75a7d8f A |
1239 | log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n"); |
1240 | { | |
1241 | static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; | |
1242 | ||
1243 | static const uint8_t to_SCSU[]={ | |
1244 | 0x41, | |
1245 | ||
1246 | }; | |
1247 | int32_t from_SCSUOffs [] ={ | |
1248 | 0, | |
1249 | ||
1250 | }; | |
1251 | if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), | |
1252 | to_SCSU, sizeof(to_SCSU), "SCSU", | |
1253 | UCNV_FROM_U_CALLBACK_STOP, from_SCSUOffs, NULL, 0 )) | |
1254 | log_err("u-> SCSU with skip did not match.\n"); | |
1255 | ||
1256 | } | |
73c04bcf | 1257 | |
b75a7d8f | 1258 | /*to Unicode*/ |
73c04bcf A |
1259 | |
1260 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
b75a7d8f A |
1261 | if(!testConvertToUnicode(expstopIBM_949, sizeof(expstopIBM_949), |
1262 | IBM_949stoptoUnicode, sizeof(IBM_949stoptoUnicode)/sizeof(IBM_949stoptoUnicode[0]),"ibm-949", | |
1263 | UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 )) | |
1264 | log_err("ibm-949->u with stop did not match.\n"); | |
1265 | if(!testConvertToUnicode(expstopIBM_943, sizeof(expstopIBM_943), | |
1266 | IBM_943stoptoUnicode, sizeof(IBM_943stoptoUnicode)/sizeof(IBM_943stoptoUnicode[0]),"ibm-943", | |
1267 | UCNV_TO_U_CALLBACK_STOP, fromIBM943Offs, NULL, 0 )) | |
1268 | log_err("ibm-943->u with stop did not match.\n"); | |
1269 | if(!testConvertToUnicode(expstopIBM_930, sizeof(expstopIBM_930), | |
1270 | IBM_930stoptoUnicode, sizeof(IBM_930stoptoUnicode)/sizeof(IBM_930stoptoUnicode[0]),"ibm-930", | |
1271 | UCNV_TO_U_CALLBACK_STOP, fromIBM930Offs, NULL, 0 )) | |
1272 | log_err("ibm-930->u with stop did not match.\n"); | |
1273 | ||
1274 | log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n"); | |
1275 | { | |
1276 | ||
1277 | static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ | |
1278 | 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 | |
1279 | }; | |
1280 | static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63 }; | |
1281 | static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1}; | |
1282 | ||
1283 | ||
1284 | /*EUC-JP*/ | |
1285 | static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
1286 | 0x8f, 0xda, 0xa1, /*unassigned*/ | |
1287 | 0x8e, 0xe0, | |
1288 | }; | |
1289 | static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec}; | |
1290 | static const int32_t from_euc_jpOffs [] ={ 0, 1, 3}; | |
1291 | ||
1292 | /*EUC_TW*/ | |
1293 | static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
1294 | 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ | |
1295 | 0xe6, 0xca, 0x8a, | |
1296 | }; | |
1297 | UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2}; | |
1298 | int32_t from_euc_twOffs [] ={ 0, 1, 3}; | |
1299 | ||
1300 | ||
1301 | ||
1302 | if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL), | |
1303 | EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", | |
1304 | UCNV_TO_U_CALLBACK_STOP, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) | |
1305 | log_err("EBCIDIC_STATEFUL->u with stop did not match.\n"); | |
1306 | ||
1307 | if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), | |
1308 | euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp", | |
1309 | UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0)) | |
1310 | log_err("euc-jp->u with stop did not match.\n"); | |
1311 | ||
1312 | if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), | |
1313 | euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw", | |
1314 | UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 )) | |
1315 | log_err("euc-tw->u with stop did not match.\n"); | |
1316 | } | |
73c04bcf A |
1317 | #endif |
1318 | ||
b75a7d8f A |
1319 | log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n"); |
1320 | { | |
1321 | static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, | |
1322 | 0xe0, 0x80, 0x61,}; | |
1323 | static const UChar expected1[] = { 0x0031, 0x4e8c,}; | |
1324 | static const int32_t offsets1[] = { 0x0000, 0x0001}; | |
1325 | ||
1326 | if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), | |
1327 | expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", | |
1328 | UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 )) | |
1329 | log_err("utf8->u with stop did not match.\n");; | |
1330 | } | |
1331 | log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n"); | |
1332 | { | |
1333 | static const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04}; | |
1334 | static const UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061}; | |
1335 | static const int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003}; | |
1336 | ||
1337 | if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), | |
1338 | expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", | |
1339 | UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 )) | |
1340 | log_err("scsu->u with stop did not match.\n");; | |
1341 | } | |
1342 | ||
1343 | } | |
1344 | ||
1345 | static void TestSub(int32_t inputsize, int32_t outputsize) | |
1346 | { | |
1347 | static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; | |
1348 | static const UChar sampleText2[]= { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; | |
1349 | ||
1350 | static const uint8_t expsubIBM_949[] = | |
1351 | { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 }; | |
1352 | ||
1353 | static const uint8_t expsubIBM_943[] = { | |
1354 | 0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 }; | |
1355 | ||
1356 | static const uint8_t expsubIBM_930[] = { | |
1357 | 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f }; | |
1358 | ||
1359 | static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 }; | |
1360 | static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 }; | |
1361 | static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 }; | |
1362 | ||
1363 | static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 }; | |
1364 | static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 }; | |
1365 | static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 }; | |
1366 | ||
1367 | static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5, 7 }; | |
1368 | static const int32_t fromIBM943Offs [] = { 0, 2, 4, 6 }; | |
1369 | static const int32_t fromIBM930Offs [] = { 1, 3, 5, 7 }; | |
1370 | ||
1371 | gInBufferSize = inputsize; | |
1372 | gOutBufferSize = outputsize; | |
1373 | ||
1374 | /*from unicode*/ | |
73c04bcf A |
1375 | |
1376 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
b75a7d8f A |
1377 | if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
1378 | expsubIBM_949, sizeof(expsubIBM_949), "ibm-949", | |
1379 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 )) | |
1380 | log_err("u-> ibm-949 with subst did not match.\n"); | |
1381 | if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), | |
1382 | expsubIBM_943, sizeof(expsubIBM_943), "ibm-943", | |
1383 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM943Offssub , NULL, 0)) | |
1384 | log_err("u-> ibm-943 with subst did not match.\n"); | |
1385 | if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), | |
1386 | expsubIBM_930, sizeof(expsubIBM_930), "ibm-930", | |
1387 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM930Offssub, NULL, 0 )) | |
1388 | log_err("u-> ibm-930 with subst did not match.\n"); | |
1389 | ||
1390 | log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n"); | |
1391 | { | |
1392 | static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; | |
1393 | static const uint8_t toIBM943[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 }; | |
1394 | static const int32_t offset[]= {0, 1, 1, 3, 3, 4}; | |
1395 | ||
1396 | ||
1397 | /* EUC_JP*/ | |
1398 | static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 }; | |
1399 | static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
1400 | 0xf4, 0xfe, 0xf4, 0xfe, | |
1401 | 0x61, 0x8e, 0xe0, | |
1402 | }; | |
1403 | static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7}; | |
1404 | ||
1405 | /*EUC_TW*/ | |
1406 | static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; | |
1407 | static const uint8_t to_euc_tw[]={ | |
1408 | 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
1409 | 0xfd, 0xfe, 0xfd, 0xfe, | |
1410 | 0x61, 0xe6, 0xca, 0x8a, | |
1411 | }; | |
1412 | ||
b75a7d8f A |
1413 | static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,}; |
1414 | ||
1415 | if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), | |
1416 | toIBM943, sizeof(toIBM943), "ibm-943", | |
1417 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset, NULL, 0 )) | |
1418 | log_err("u-> ibm-943 with substitute did not match.\n"); | |
1419 | ||
1420 | if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]), | |
1421 | to_euc_jp, sizeof(to_euc_jp), "euc-jp", | |
1422 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 )) | |
1423 | log_err("u-> euc-jp with substitute did not match.\n"); | |
1424 | ||
1425 | if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]), | |
1426 | to_euc_tw, sizeof(to_euc_tw), "euc-tw", | |
1427 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 )) | |
1428 | log_err("u-> euc-tw with substitute did not match.\n"); | |
b75a7d8f | 1429 | } |
73c04bcf | 1430 | #endif |
b75a7d8f A |
1431 | |
1432 | log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n"); | |
1433 | { | |
1434 | UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; | |
1435 | ||
1436 | const uint8_t to_SCSU[]={ | |
1437 | 0x41, | |
1438 | 0x0e, 0xff,0xfd, | |
1439 | 0x42 | |
1440 | ||
1441 | ||
1442 | }; | |
1443 | int32_t from_SCSUOffs [] ={ | |
1444 | 0, | |
1445 | 1,1,1, | |
1446 | 2, | |
1447 | ||
1448 | }; | |
1449 | const uint8_t to_SCSU_1[]={ | |
1450 | 0x41, | |
1451 | ||
1452 | }; | |
1453 | int32_t from_SCSUOffs_1 [] ={ | |
1454 | 0, | |
1455 | ||
1456 | }; | |
1457 | if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), | |
1458 | to_SCSU, sizeof(to_SCSU), "SCSU", | |
1459 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs, NULL, 0 )) | |
1460 | log_err("u-> SCSU with substitute did not match.\n"); | |
1461 | ||
1462 | if(!testConvertFromUnicodeWithContext(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), | |
1463 | to_SCSU_1, sizeof(to_SCSU_1), "SCSU", | |
1464 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs_1, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) | |
1465 | log_err("u-> SCSU with substitute did not match.\n"); | |
1466 | } | |
1467 | ||
1468 | log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n"); | |
1469 | { | |
1470 | static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,}; | |
1471 | static const uint8_t expectedUTF8[]= { 0xe2, 0x82, 0xac, | |
1472 | 0xf0, 0x90, 0x90, 0x81, | |
1473 | 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, | |
1474 | 0xef, 0xbf, 0xbf, 0x61, | |
1475 | ||
1476 | }; | |
1477 | static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 }; | |
1478 | if(!testConvertFromUnicode(testinput, sizeof(testinput)/sizeof(testinput[0]), | |
1479 | expectedUTF8, sizeof(expectedUTF8), "utf8", | |
1480 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0 )) { | |
1481 | log_err("u-> utf8 with stop did not match.\n"); | |
1482 | } | |
1483 | } | |
1484 | ||
1485 | log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n"); | |
1486 | { | |
1487 | static const UChar in[]={ 0x0041, 0xfeff }; | |
1488 | ||
1489 | static const uint8_t out[]={ | |
1490 | #if U_IS_BIG_ENDIAN | |
1491 | 0xfe, 0xff, | |
1492 | 0x00, 0x41, | |
1493 | 0xfe, 0xff | |
1494 | #else | |
1495 | 0xff, 0xfe, | |
1496 | 0x41, 0x00, | |
1497 | 0xff, 0xfe | |
1498 | #endif | |
1499 | }; | |
1500 | static const int32_t offsets[]={ | |
1501 | -1, -1, 0, 0, 1, 1 | |
1502 | }; | |
1503 | ||
1504 | if(!testConvertFromUnicode(in, ARRAY_LENGTH(in), | |
1505 | out, sizeof(out), "UTF-16", | |
1506 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0) | |
1507 | ) { | |
1508 | log_err("u->UTF-16 with substitute did not match.\n"); | |
1509 | } | |
1510 | } | |
1511 | ||
1512 | log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n"); | |
1513 | { | |
1514 | static const UChar in[]={ 0x0041, 0xfeff }; | |
1515 | ||
1516 | static const uint8_t out[]={ | |
1517 | #if U_IS_BIG_ENDIAN | |
1518 | 0x00, 0x00, 0xfe, 0xff, | |
1519 | 0x00, 0x00, 0x00, 0x41, | |
1520 | 0x00, 0x00, 0xfe, 0xff | |
1521 | #else | |
1522 | 0xff, 0xfe, 0x00, 0x00, | |
1523 | 0x41, 0x00, 0x00, 0x00, | |
1524 | 0xff, 0xfe, 0x00, 0x00 | |
1525 | #endif | |
1526 | }; | |
1527 | static const int32_t offsets[]={ | |
1528 | -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1 | |
1529 | }; | |
1530 | ||
1531 | if(!testConvertFromUnicode(in, ARRAY_LENGTH(in), | |
1532 | out, sizeof(out), "UTF-32", | |
1533 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0) | |
1534 | ) { | |
1535 | log_err("u->UTF-32 with substitute did not match.\n"); | |
1536 | } | |
1537 | } | |
1538 | ||
1539 | /*to unicode*/ | |
73c04bcf A |
1540 | |
1541 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
b75a7d8f A |
1542 | if(!testConvertToUnicode(expsubIBM_949, sizeof(expsubIBM_949), |
1543 | IBM_949subtoUnicode, sizeof(IBM_949subtoUnicode)/sizeof(IBM_949subtoUnicode[0]),"ibm-949", | |
1544 | UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 )) | |
1545 | log_err("ibm-949->u with substitute did not match.\n"); | |
1546 | if(!testConvertToUnicode(expsubIBM_943, sizeof(expsubIBM_943), | |
1547 | IBM_943subtoUnicode, sizeof(IBM_943subtoUnicode)/sizeof(IBM_943subtoUnicode[0]),"ibm-943", | |
1548 | UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offs, NULL, 0 )) | |
1549 | log_err("ibm-943->u with substitute did not match.\n"); | |
1550 | if(!testConvertToUnicode(expsubIBM_930, sizeof(expsubIBM_930), | |
1551 | IBM_930subtoUnicode, sizeof(IBM_930subtoUnicode)/sizeof(IBM_930subtoUnicode[0]),"ibm-930", | |
1552 | UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM930Offs, NULL, 0 )) | |
1553 | log_err("ibm-930->u with substitute did not match.\n"); | |
1554 | ||
1555 | log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); | |
1556 | { | |
1557 | ||
1558 | const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ | |
1559 | 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 | |
1560 | }; | |
1561 | UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0xfffd, 0x03b4 | |
1562 | }; | |
1563 | int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 3, 5}; | |
1564 | ||
1565 | ||
1566 | /* EUC_JP*/ | |
1567 | const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
1568 | 0x8f, 0xda, 0xa1, /*unassigned*/ | |
1569 | 0x8e, 0xe0, 0x8a | |
1570 | }; | |
1571 | UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a }; | |
1572 | int32_t from_euc_jpOffs [] ={ 0, 1, 3, 6, 9, 11 }; | |
1573 | ||
1574 | /*EUC_TW*/ | |
1575 | const uint8_t sampleTxt_euc_tw[]={ | |
1576 | 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
1577 | 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ | |
1578 | 0xe6, 0xca, 0x8a, | |
1579 | }; | |
1580 | UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, }; | |
1581 | int32_t from_euc_twOffs [] ={ 0, 1, 3, 7, 11, 13}; | |
1582 | ||
1583 | ||
1584 | if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL), | |
1585 | EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", | |
1586 | UCNV_TO_U_CALLBACK_SUBSTITUTE, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) | |
1587 | log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n"); | |
1588 | ||
1589 | ||
1590 | if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), | |
1591 | euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp", | |
1592 | UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 )) | |
1593 | log_err("euc-jp->u with substitute did not match.\n"); | |
1594 | ||
1595 | ||
1596 | if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), | |
1597 | euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw", | |
1598 | UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 )) | |
1599 | log_err("euc-tw->u with substitute did not match.\n"); | |
1600 | ||
1601 | ||
1602 | if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), | |
1603 | euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp", | |
1604 | UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGAL_CHAR_FOUND)) | |
1605 | log_err("euc-jp->u with substitute did not match.\n"); | |
b75a7d8f | 1606 | } |
73c04bcf A |
1607 | #endif |
1608 | ||
b75a7d8f A |
1609 | log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); |
1610 | { | |
1611 | const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, | |
1612 | 0xe0, 0x80, 0x61,}; | |
1613 | UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0x0061}; | |
1614 | int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0006}; | |
1615 | ||
1616 | if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), | |
1617 | expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", | |
1618 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 )) | |
1619 | log_err("utf8->u with substitute did not match.\n");; | |
1620 | } | |
1621 | log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); | |
1622 | { | |
1623 | const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,}; | |
1624 | UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffd,0xfffd}; | |
1625 | int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5}; | |
1626 | ||
1627 | if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), | |
1628 | expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", | |
1629 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 )) | |
1630 | log_err("scsu->u with stop did not match.\n");; | |
1631 | } | |
1632 | ||
73c04bcf | 1633 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
1634 | log_verbose("Testing ibm-930 subchar/subchar1\n"); |
1635 | { | |
1636 | static const UChar u1[]={ 0x6d63, 0x6d64, 0x6d65, 0x6d66, 0xdf }; | |
1637 | static const uint8_t s1[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f }; | |
1638 | static const int32_t offsets1[]={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4 }; | |
1639 | ||
1640 | static const UChar u2[]={ 0x6d63, 0x6d64, 0xfffd, 0x6d66, 0x1a }; | |
1641 | static const uint8_t s2[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 }; | |
1642 | static const int32_t offsets2[]={ 1, 3, 5, 7, 10 }; | |
1643 | ||
1644 | if(!testConvertFromUnicode(u1, ARRAY_LENGTH(u1), s1, ARRAY_LENGTH(s1), "ibm-930", | |
1645 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0) | |
1646 | ) { | |
1647 | log_err("u->ibm-930 subchar/subchar1 did not match.\n"); | |
1648 | } | |
1649 | ||
1650 | if(!testConvertToUnicode(s2, ARRAY_LENGTH(s2), u2, ARRAY_LENGTH(u2), "ibm-930", | |
1651 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) | |
1652 | ) { | |
1653 | log_err("ibm-930->u subchar/subchar1 did not match.\n"); | |
1654 | } | |
1655 | } | |
1656 | ||
1657 | log_verbose("Testing GB 18030 with substitute callbacks\n"); | |
1658 | { | |
b75a7d8f A |
1659 | static const UChar u2[]={ |
1660 | 0x24, 0x7f, 0x80, 0x1f9, 0x20ac, 0x4e00, 0x9fa6, 0xffff, 0xd800, 0xdc00, 0xfffd, 0xdbff, 0xdfff }; | |
1661 | static const uint8_t gb2[]={ | |
1662 | 0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 }; | |
1663 | static const int32_t offsets2[]={ | |
1664 | 0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 }; | |
1665 | ||
b75a7d8f A |
1666 | if(!testConvertToUnicode(gb2, ARRAY_LENGTH(gb2), u2, ARRAY_LENGTH(u2), "gb18030", |
1667 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) | |
1668 | ) { | |
1669 | log_err("gb18030->u with substitute did not match.\n"); | |
1670 | } | |
1671 | } | |
73c04bcf | 1672 | #endif |
b75a7d8f A |
1673 | |
1674 | log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n"); | |
1675 | { | |
1676 | static const uint8_t utf7[]={ | |
1677 | /* a~ a+AB~ a+AB\x0c a+AB- a+AB. a+. */ | |
1678 | 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x2e | |
1679 | }; | |
1680 | static const UChar unicode[]={ | |
1681 | 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd | |
1682 | }; | |
1683 | static const int32_t offsets[]={ | |
1684 | 0, 1, 2, 4, 7, 9, 12, 14, 17, 19, 22, 23 | |
1685 | }; | |
1686 | ||
1687 | if(!testConvertToUnicode(utf7, ARRAY_LENGTH(utf7), unicode, ARRAY_LENGTH(unicode), "UTF-7", | |
1688 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0) | |
1689 | ) { | |
1690 | log_err("UTF-7->u with substitute did not match.\n"); | |
1691 | } | |
1692 | } | |
1693 | ||
b75a7d8f A |
1694 | log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n"); |
1695 | { | |
1696 | static const uint8_t | |
1697 | in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff }, | |
1698 | in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff }, | |
1699 | in3[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff }; | |
1700 | ||
1701 | static const UChar | |
1702 | out1[]={ 0x4e00, 0xfeff }, | |
1703 | out2[]={ 0x004e, 0xfffe }, | |
1704 | out3[]={ 0xfefd, 0x4e00, 0xfeff }; | |
1705 | ||
1706 | static const int32_t | |
1707 | offsets1[]={ 2, 4 }, | |
1708 | offsets2[]={ 2, 4 }, | |
1709 | offsets3[]={ 0, 2, 4 }; | |
1710 | ||
1711 | if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-16", | |
1712 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0) | |
1713 | ) { | |
1714 | log_err("UTF-16 (BE BOM)->u with substitute did not match.\n"); | |
1715 | } | |
1716 | ||
1717 | if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-16", | |
1718 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) | |
1719 | ) { | |
1720 | log_err("UTF-16 (LE BOM)->u with substitute did not match.\n"); | |
1721 | } | |
1722 | ||
1723 | if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-16", | |
1724 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0) | |
1725 | ) { | |
1726 | log_err("UTF-16 (no BOM)->u with substitute did not match.\n"); | |
1727 | } | |
1728 | } | |
1729 | ||
1730 | log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n"); | |
1731 | { | |
1732 | static const uint8_t | |
1733 | in1[]={ 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff }, | |
1734 | in2[]={ 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 }, | |
1735 | in3[]={ 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 }, | |
1736 | in4[]={ 0x00, 0x01, 0x02, 0x03, 0x00, 0x11, 0x12, 0x00, 0x00, 0x00, 0x4e, 0x00 }; | |
1737 | ||
1738 | static const UChar | |
1739 | out1[]={ UTF16_LEAD(0x100f00), UTF16_TRAIL(0x100f00), 0xfeff }, | |
1740 | out2[]={ UTF16_LEAD(0x0f1000), UTF16_TRAIL(0x0f1000), 0xfffe }, | |
374ca955 | 1741 | out3[]={ 0xfefe, UTF16_LEAD(0x100f00), UTF16_TRAIL(0x100f00), 0xfffd, 0xfffd }, |
b75a7d8f A |
1742 | out4[]={ UTF16_LEAD(0x10203), UTF16_TRAIL(0x10203), 0xfffd, 0x4e00 }; |
1743 | ||
1744 | static const int32_t | |
1745 | offsets1[]={ 4, 4, 8 }, | |
1746 | offsets2[]={ 4, 4, 8 }, | |
1747 | offsets3[]={ 0, 4, 4, 8, 12 }, | |
1748 | offsets4[]={ 0, 0, 4, 8 }; | |
1749 | ||
1750 | if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-32", | |
1751 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0) | |
1752 | ) { | |
1753 | log_err("UTF-32 (BE BOM)->u with substitute did not match.\n"); | |
1754 | } | |
1755 | ||
1756 | if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-32", | |
1757 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) | |
1758 | ) { | |
1759 | log_err("UTF-32 (LE BOM)->u with substitute did not match.\n"); | |
1760 | } | |
1761 | ||
1762 | if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-32", | |
1763 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0) | |
1764 | ) { | |
1765 | log_err("UTF-32 (no BOM)->u with substitute did not match.\n"); | |
1766 | } | |
1767 | ||
1768 | if(!testConvertToUnicode(in4, ARRAY_LENGTH(in4), out4, ARRAY_LENGTH(out4), "UTF-32", | |
1769 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets4, NULL, 0) | |
1770 | ) { | |
1771 | log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n"); | |
1772 | } | |
1773 | } | |
1774 | } | |
1775 | ||
1776 | static void TestSubWithValue(int32_t inputsize, int32_t outputsize) | |
1777 | { | |
1778 | UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; | |
1779 | UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; | |
1780 | ||
1781 | const uint8_t expsubwvalIBM_949[]= { | |
1782 | 0x00, 0xb0, 0xa1, 0xb0, 0xa2, | |
1783 | 0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 }; | |
1784 | ||
1785 | const uint8_t expsubwvalIBM_943[]= { | |
1786 | 0x9f, 0xaf, 0x9f, 0xb1, | |
1787 | 0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 }; | |
1788 | ||
1789 | const uint8_t expsubwvalIBM_930[] = { | |
1790 | 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f }; | |
1791 | ||
1792 | int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 }; | |
1793 | int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 }; | |
1794 | int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */ | |
1795 | ||
1796 | gInBufferSize = inputsize; | |
1797 | gOutBufferSize = outputsize; | |
1798 | ||
1799 | /*from Unicode*/ | |
73c04bcf A |
1800 | |
1801 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
b75a7d8f A |
1802 | if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
1803 | expsubwvalIBM_949, sizeof(expsubwvalIBM_949), "ibm-949", | |
1804 | UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 )) | |
1805 | log_err("u-> ibm-949 with subst with value did not match.\n"); | |
1806 | ||
1807 | if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), | |
1808 | expsubwvalIBM_943, sizeof(expsubwvalIBM_943), "ibm-943", | |
1809 | UCNV_FROM_U_CALLBACK_ESCAPE, toIBM943Offs, NULL, 0 )) | |
1810 | log_err("u-> ibm-943 with sub with value did not match.\n"); | |
1811 | ||
1812 | if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), | |
1813 | expsubwvalIBM_930, sizeof(expsubwvalIBM_930), "ibm-930", | |
1814 | UCNV_FROM_U_CALLBACK_ESCAPE, toIBM930Offs, NULL, 0 )) | |
1815 | log_err("u-> ibm-930 with subst with value did not match.\n"); | |
1816 | ||
1817 | ||
1818 | log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n"); | |
1819 | { | |
1820 | static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; | |
1821 | static const uint8_t toIBM943[]= { 0x61, | |
1822 | 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
1823 | 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, | |
1824 | 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
1825 | 0x61 }; | |
1826 | static const int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4}; | |
1827 | ||
1828 | ||
1829 | /* EUC_JP*/ | |
1830 | static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, }; | |
1831 | static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
1832 | 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
1833 | 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, | |
1834 | 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
1835 | 0x61, 0x8e, 0xe0, | |
1836 | }; | |
1837 | static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, | |
1838 | 3, 3, 3, 3, 3, 3, | |
1839 | 3, 3, 3, 3, 3, 3, | |
1840 | 5, 5, 5, 5, 5, 5, | |
1841 | 6, 7, 7, | |
1842 | }; | |
1843 | ||
1844 | /*EUC_TW*/ | |
1845 | static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; | |
1846 | static const uint8_t to_euc_tw[]={ | |
1847 | 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
1848 | 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
1849 | 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, | |
1850 | 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
1851 | 0x61, 0xe6, 0xca, 0x8a, | |
1852 | }; | |
1853 | static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, | |
1854 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5, | |
1855 | 6, 7, 7, 8, | |
1856 | }; | |
1857 | /*ISO-2022-JP*/ | |
b75a7d8f A |
1858 | static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ; |
1859 | static const uint8_t to_iso_2022_jp1[]={ | |
1860 | 0x1b, 0x24, 0x42, 0x21, 0x21, | |
1861 | 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39, | |
1862 | 0x1b, 0x24, 0x42, 0x21, 0x22, | |
1863 | 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39, | |
1864 | 0x42, | |
1865 | }; | |
1866 | ||
1867 | static const int32_t from_iso_2022_jpOffs1 [] ={ | |
1868 | 0,0,0,0,0, | |
1869 | 1,1,1,1,1,1,1,1,1, | |
1870 | 2,2,2,2,2, | |
1871 | 3,3,3,3,3,3,3,3,3, | |
1872 | 4, | |
1873 | }; | |
1874 | /* surrogate pair*/ | |
1875 | static const UChar iso_2022_jp_inputText2[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ; | |
1876 | static const uint8_t to_iso_2022_jp2[]={ | |
1877 | 0x1b, 0x24, 0x42, 0x21, 0x21, | |
1878 | 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
1879 | 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
1880 | 0x1b, 0x24, 0x42, 0x21, 0x22, | |
1881 | 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
1882 | 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
1883 | 0x42, | |
1884 | }; | |
1885 | static const int32_t from_iso_2022_jpOffs2 [] ={ | |
1886 | 0,0,0,0,0, | |
1887 | 1,1,1,1,1,1,1,1,1, | |
1888 | 1,1,1,1,1,1, | |
1889 | 3,3,3,3,3, | |
1890 | 4,4,4,4,4,4,4,4,4, | |
1891 | 4,4,4,4,4,4, | |
1892 | 6, | |
1893 | }; | |
1894 | ||
1895 | /*ISO-2022-cn*/ | |
1896 | static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, }; | |
1897 | static const uint8_t to_iso_2022_cn[]={ | |
374ca955 A |
1898 | 0x41, |
1899 | 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, | |
b75a7d8f A |
1900 | 0x42, |
1901 | }; | |
1902 | static const int32_t from_iso_2022_cnOffs [] ={ | |
374ca955 A |
1903 | 0, |
1904 | 1,1,1,1,1,1, | |
b75a7d8f A |
1905 | 2, |
1906 | }; | |
b75a7d8f A |
1907 | |
1908 | static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042}; | |
1909 | ||
1910 | static const uint8_t to_iso_2022_cn4[]={ | |
1911 | 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, | |
1912 | 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
1913 | 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
374ca955 | 1914 | 0x0e, 0x21, 0x22, |
b75a7d8f A |
1915 | 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, |
1916 | 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
1917 | 0x42, | |
1918 | }; | |
1919 | static const int32_t from_iso_2022_cnOffs4 [] ={ | |
1920 | 0,0,0,0,0,0,0, | |
1921 | 1,1,1,1,1,1,1, | |
1922 | 1,1,1,1,1,1, | |
374ca955 | 1923 | 3,3,3, |
b75a7d8f A |
1924 | 4,4,4,4,4,4,4, |
1925 | 4,4,4,4,4,4, | |
1926 | 6 | |
1927 | ||
1928 | }; | |
1929 | ||
1930 | /*ISO-2022-kr*/ | |
1931 | static const UChar iso_2022_kr_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 }; | |
1932 | static const uint8_t to_iso_2022_kr2[]={ | |
1933 | 0x1b, 0x24, 0x29, 0x43, | |
1934 | 0x41, | |
1935 | 0x0e, 0x25, 0x50, | |
1936 | 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
1937 | 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
1938 | 0x0e, 0x25, 0x50, | |
1939 | 0x0f, 0x42, | |
1940 | 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
1941 | 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
1942 | 0x43 | |
1943 | }; | |
1944 | static const int32_t from_iso_2022_krOffs2 [] ={ | |
1945 | -1,-1,-1,-1, | |
1946 | 0, | |
1947 | 1,1,1, | |
1948 | 2,2,2,2,2,2,2, | |
1949 | 2,2,2,2,2,2, | |
1950 | 4,4,4, | |
1951 | 5,5, | |
1952 | 6,6,6,6,6,6, | |
1953 | 6,6,6,6,6,6, | |
1954 | 8, | |
1955 | }; | |
1956 | ||
1957 | static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 }; | |
1958 | static const uint8_t to_iso_2022_kr[]={ | |
1959 | 0x1b, 0x24, 0x29, 0x43, | |
1960 | 0x41, | |
1961 | 0x0e, 0x25, 0x50, | |
1962 | 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ | |
1963 | 0x0e, 0x25, 0x50, | |
1964 | 0x0f, 0x42, | |
1965 | 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ | |
1966 | 0x43 | |
1967 | }; | |
1968 | ||
1969 | ||
1970 | static const int32_t from_iso_2022_krOffs [] ={ | |
1971 | -1,-1,-1,-1, | |
1972 | 0, | |
1973 | 1,1,1, | |
1974 | 2,2,2,2,2,2,2, | |
1975 | 3,3,3, | |
1976 | 4,4, | |
1977 | 5,5,5,5,5,5, | |
1978 | 6, | |
1979 | }; | |
1980 | /* HZ encoding */ | |
1981 | static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, }; | |
1982 | ||
1983 | static const uint8_t to_hz[]={ | |
1984 | 0x7e, 0x7d, 0x41, | |
1985 | 0x7e, 0x7b, 0x26, 0x30, | |
1986 | 0x7e, 0x7d, 0x25, 0x55, 0x30, 0x36, 0x36, 0x32, /*unassigned*/ | |
1987 | 0x7e, 0x7b, 0x26, 0x30, | |
1988 | 0x7e, 0x7d, 0x42, | |
1989 | ||
1990 | }; | |
1991 | static const int32_t from_hzOffs [] ={ | |
1992 | 0,0,0, | |
1993 | 1,1,1,1, | |
1994 | 2,2,2,2,2,2,2,2, | |
1995 | 3,3,3,3, | |
1996 | 4,4,4 | |
1997 | }; | |
1998 | ||
1999 | static const UChar hz_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 }; | |
2000 | static const uint8_t to_hz2[]={ | |
2001 | 0x7e, 0x7d, 0x41, | |
2002 | 0x7e, 0x7b, 0x26, 0x30, | |
2003 | 0x7e, 0x7d, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
2004 | 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
2005 | 0x7e, 0x7b, 0x26, 0x30, | |
2006 | 0x7e, 0x7d, 0x42, | |
2007 | 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
2008 | 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
2009 | 0x43 | |
2010 | }; | |
2011 | static const int32_t from_hzOffs2 [] ={ | |
2012 | 0,0,0, | |
2013 | 1,1,1,1, | |
2014 | 2,2,2,2,2,2,2,2, | |
2015 | 2,2,2,2,2,2, | |
2016 | 4,4,4,4, | |
2017 | 5,5,5, | |
2018 | 6,6,6,6,6,6, | |
2019 | 6,6,6,6,6,6, | |
2020 | 8, | |
2021 | }; | |
2022 | ||
2023 | /*ISCII*/ | |
b75a7d8f A |
2024 | static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 }; |
2025 | static const uint8_t to_iscii[]={ | |
2026 | 0x41, | |
2027 | 0xef, 0x42, 0xa1, | |
2028 | 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ | |
2029 | 0xa2, | |
2030 | 0x42, | |
2031 | 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ | |
2032 | 0x43 | |
2033 | }; | |
2034 | ||
2035 | ||
2036 | static const int32_t from_isciiOffs [] ={ | |
2037 | 0, | |
2038 | 1,1,1, | |
2039 | 2,2,2,2,2,2, | |
2040 | 3, | |
2041 | 4, | |
2042 | 5,5,5,5,5,5, | |
2043 | 6, | |
2044 | }; | |
2045 | ||
2046 | if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), | |
2047 | toIBM943, sizeof(toIBM943), "ibm-943", | |
2048 | UCNV_FROM_U_CALLBACK_ESCAPE, offset, NULL, 0 )) | |
2049 | log_err("u-> ibm-943 with subst with value did not match.\n"); | |
2050 | ||
2051 | if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]), | |
2052 | to_euc_jp, sizeof(to_euc_jp), "euc-jp", | |
2053 | UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 )) | |
2054 | log_err("u-> euc-jp with subst with value did not match.\n"); | |
2055 | ||
2056 | if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]), | |
2057 | to_euc_tw, sizeof(to_euc_tw), "euc-tw", | |
2058 | UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 )) | |
2059 | log_err("u-> euc-tw with subst with value did not match.\n"); | |
2060 | ||
b75a7d8f A |
2061 | if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]), |
2062 | to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp", | |
2063 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 )) | |
2064 | log_err("u-> iso_2022_jp with subst with value did not match.\n"); | |
2065 | ||
2066 | if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]), | |
2067 | to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp", | |
2068 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 )) | |
2069 | log_err("u-> iso_2022_jp with subst with value did not match.\n"); | |
2070 | ||
2071 | if(!testConvertFromUnicode(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]), | |
2072 | to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp", | |
2073 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs2, NULL, 0 )) | |
2074 | log_err("u-> iso_2022_jp with subst with value did not match.\n"); | |
2075 | /*ESCAPE OPTIONS*/ | |
2076 | { | |
2077 | /* surrogate pair*/ | |
2078 | static const UChar iso_2022_jp_inputText3[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ; | |
2079 | static const uint8_t to_iso_2022_jp3_v2[]={ | |
2080 | 0x1b, 0x24, 0x42, 0x21, 0x21, | |
2081 | 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b, | |
2082 | ||
2083 | 0x1b, 0x24, 0x42, 0x21, 0x22, | |
2084 | 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b, | |
2085 | ||
2086 | 0x42, | |
2087 | 0x26, 0x23, 0x33, 0x36, 0x38, 0x39, 0x32, 0x3b, | |
2088 | }; | |
2089 | ||
2090 | static const int32_t from_iso_2022_jpOffs3_v2 [] ={ | |
2091 | 0,0,0,0,0, | |
2092 | 1,1,1,1,1,1,1,1,1,1,1,1, | |
2093 | ||
2094 | 3,3,3,3,3, | |
2095 | 4,4,4,4,4,4,4,4,4,4,4,4, | |
2096 | ||
2097 | 6, | |
2098 | 7,7,7,7,7,7,7,7,7 | |
2099 | }; | |
2100 | ||
2101 | if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3, sizeof(iso_2022_jp_inputText3)/sizeof(iso_2022_jp_inputText3[0]), | |
2102 | to_iso_2022_jp3_v2, sizeof(to_iso_2022_jp3_v2), "iso-2022-jp", | |
2103 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR )) | |
2104 | log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n"); | |
2105 | } | |
b75a7d8f A |
2106 | { |
2107 | static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; | |
2108 | static const uint8_t to_iso_2022_cn5_v2[]={ | |
2109 | 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, | |
2110 | 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44, | |
2111 | 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36, | |
374ca955 | 2112 | 0x0e, 0x21, 0x22, |
b75a7d8f A |
2113 | 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44, |
2114 | 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36, | |
2115 | 0x42, | |
374ca955 | 2116 | 0x5c, 0x75, 0x30, 0x39, 0x30, 0x32, |
b75a7d8f A |
2117 | }; |
2118 | static const int32_t from_iso_2022_cnOffs5_v2 [] ={ | |
2119 | 0,0,0,0,0,0,0, | |
2120 | 1,1,1,1,1,1,1, | |
2121 | 1,1,1,1,1,1, | |
374ca955 | 2122 | 3,3,3, |
b75a7d8f A |
2123 | 4,4,4,4,4,4,4, |
2124 | 4,4,4,4,4,4, | |
2125 | 6, | |
374ca955 | 2126 | 7,7,7,7,7,7 |
b75a7d8f A |
2127 | }; |
2128 | if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, sizeof(iso_2022_cn_inputText5)/sizeof(iso_2022_cn_inputText5[0]), | |
2129 | to_iso_2022_cn5_v2, sizeof(to_iso_2022_cn5_v2), "iso-2022-cn", | |
2130 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs5_v2, NULL, 0,UCNV_ESCAPE_JAVA,U_ZERO_ERROR )) | |
2131 | log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n"); | |
2132 | ||
2133 | } | |
2134 | { | |
2135 | static const UChar iso_2022_cn_inputText6[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; | |
2136 | static const uint8_t to_iso_2022_cn6_v2[]={ | |
2137 | 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, | |
2138 | 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d, | |
374ca955 | 2139 | 0x0e, 0x21, 0x22, |
b75a7d8f A |
2140 | 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d, |
2141 | 0x42, | |
374ca955 | 2142 | 0x7b, 0x55, 0x2b, 0x30, 0x39, 0x30, 0x32, 0x7d |
b75a7d8f A |
2143 | }; |
2144 | static const int32_t from_iso_2022_cnOffs6_v2 [] ={ | |
2145 | 0, 0, 0, 0, 0, 0, 0, | |
2146 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
374ca955 | 2147 | 3, 3, 3, |
b75a7d8f A |
2148 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
2149 | 6, | |
374ca955 | 2150 | 7, 7, 7, 7, 7, 7, 7, 7, |
b75a7d8f A |
2151 | }; |
2152 | if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, sizeof(iso_2022_cn_inputText6)/sizeof(iso_2022_cn_inputText6[0]), | |
2153 | to_iso_2022_cn6_v2, sizeof(to_iso_2022_cn6_v2), "iso-2022-cn", | |
2154 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs6_v2, NULL, 0,UCNV_ESCAPE_UNICODE,U_ZERO_ERROR )) | |
2155 | log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n"); | |
2156 | ||
2157 | } | |
2158 | { | |
2159 | static const UChar iso_2022_cn_inputText7[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; | |
2160 | static const uint8_t to_iso_2022_cn7_v2[]={ | |
2161 | 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, | |
2162 | 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
374ca955 | 2163 | 0x0e, 0x21, 0x22, |
b75a7d8f | 2164 | 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, |
374ca955 | 2165 | 0x42, 0x25, 0x55, 0x30, 0x39, 0x30, 0x32, |
b75a7d8f A |
2166 | }; |
2167 | static const int32_t from_iso_2022_cnOffs7_v2 [] ={ | |
2168 | 0, 0, 0, 0, 0, 0, 0, | |
2169 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
374ca955 | 2170 | 3, 3, 3, |
b75a7d8f A |
2171 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
2172 | 6, | |
374ca955 | 2173 | 7, 7, 7, 7, 7, 7, |
b75a7d8f A |
2174 | }; |
2175 | if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, sizeof(iso_2022_cn_inputText7)/sizeof(iso_2022_cn_inputText7[0]), | |
2176 | to_iso_2022_cn7_v2, sizeof(to_iso_2022_cn7_v2), "iso-2022-cn", | |
2177 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs7_v2, NULL, 0,"K" ,U_ZERO_ERROR )) | |
2178 | log_err("u-> iso-2022-cn with sub & K did not match.\n"); | |
2179 | ||
46f4442e A |
2180 | } |
2181 | { | |
2182 | static const UChar iso_2022_cn_inputText8[]={ | |
2183 | 0x3000, | |
2184 | 0xD84D, 0xDC56, | |
2185 | 0x3001, | |
2186 | 0xD84D, 0xDC56, | |
2187 | 0xDBFF, 0xDFFF, | |
2188 | 0x0042, | |
2189 | 0x0902}; | |
2190 | static const uint8_t to_iso_2022_cn8_v2[]={ | |
2191 | 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, | |
2192 | 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20, | |
2193 | 0x0e, 0x21, 0x22, | |
2194 | 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20, | |
2195 | 0x5c, 0x31, 0x30, 0x46, 0x46, 0x46, 0x46, 0x20, | |
2196 | 0x42, | |
2197 | 0x5c, 0x39, 0x30, 0x32, 0x20 | |
2198 | }; | |
2199 | static const int32_t from_iso_2022_cnOffs8_v2 [] ={ | |
2200 | 0, 0, 0, 0, 0, 0, 0, | |
2201 | 1, 1, 1, 1, 1, 1, 1, 1, | |
2202 | 3, 3, 3, | |
2203 | 4, 4, 4, 4, 4, 4, 4, 4, | |
2204 | 6, 6, 6, 6, 6, 6, 6, 6, | |
2205 | 8, | |
2206 | 9, 9, 9, 9, 9 | |
2207 | }; | |
2208 | if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8, sizeof(iso_2022_cn_inputText8)/sizeof(iso_2022_cn_inputText8[0]), | |
2209 | to_iso_2022_cn8_v2, sizeof(to_iso_2022_cn8_v2), "iso-2022-cn", | |
2210 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs8_v2, NULL, 0,UCNV_ESCAPE_CSS2,U_ZERO_ERROR )) | |
2211 | log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not match.\n"); | |
2212 | ||
b75a7d8f A |
2213 | } |
2214 | { | |
2215 | static const uint8_t to_iso_2022_cn4_v3[]={ | |
2216 | 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, | |
2217 | 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36, | |
374ca955 | 2218 | 0x0e, 0x21, 0x22, |
b75a7d8f A |
2219 | 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36, |
2220 | 0x42 | |
2221 | }; | |
2222 | ||
2223 | ||
2224 | static const int32_t from_iso_2022_cnOffs4_v3 [] ={ | |
2225 | 0,0,0,0,0,0,0, | |
2226 | 1,1,1,1,1,1,1,1,1,1,1, | |
2227 | ||
374ca955 | 2228 | 3,3,3, |
b75a7d8f A |
2229 | 4,4,4,4,4,4,4,4,4,4,4, |
2230 | ||
2231 | 6 | |
2232 | ||
2233 | }; | |
2234 | if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]), | |
2235 | to_iso_2022_cn4_v3, sizeof(to_iso_2022_cn4_v3), "iso-2022-cn", | |
2236 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4_v3, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR )) | |
2237 | { | |
2238 | log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n"); | |
2239 | } | |
2240 | } | |
2241 | if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]), | |
2242 | to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", | |
2243 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 )) | |
2244 | log_err("u-> iso_2022_cn with subst with value did not match.\n"); | |
2245 | ||
b75a7d8f A |
2246 | if(!testConvertFromUnicode(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]), |
2247 | to_iso_2022_cn4, sizeof(to_iso_2022_cn4), "iso-2022-cn", | |
2248 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 )) | |
2249 | log_err("u-> iso_2022_cn with subst with value did not match.\n"); | |
2250 | if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]), | |
2251 | to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", | |
2252 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0 )) | |
2253 | log_err("u-> iso_2022_kr with subst with value did not match.\n"); | |
2254 | if(!testConvertFromUnicode(iso_2022_kr_inputText2, sizeof(iso_2022_kr_inputText2)/sizeof(iso_2022_kr_inputText2[0]), | |
2255 | to_iso_2022_kr2, sizeof(to_iso_2022_kr2), "iso-2022-kr", | |
2256 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs2, NULL, 0 )) | |
2257 | log_err("u-> iso_2022_kr2 with subst with value did not match.\n"); | |
2258 | if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]), | |
2259 | to_hz, sizeof(to_hz), "HZ", | |
2260 | UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0 )) | |
2261 | log_err("u-> hz with subst with value did not match.\n"); | |
2262 | if(!testConvertFromUnicode(hz_inputText2, sizeof(hz_inputText2)/sizeof(hz_inputText2[0]), | |
2263 | to_hz2, sizeof(to_hz2), "HZ", | |
2264 | UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs2, NULL, 0 )) | |
2265 | log_err("u-> hz with subst with value did not match.\n"); | |
2266 | ||
2267 | if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]), | |
2268 | to_iscii, sizeof(to_iscii), "ISCII,version=0", | |
2269 | UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 )) | |
2270 | log_err("u-> iscii with subst with value did not match.\n"); | |
b75a7d8f | 2271 | } |
73c04bcf | 2272 | #endif |
b75a7d8f A |
2273 | |
2274 | log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n"); | |
2275 | /*to Unicode*/ | |
2276 | { | |
73c04bcf | 2277 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
2278 | static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf, |
2279 | 0x81, 0xad, /*unassigned*/ | |
2280 | 0x89, 0xd3 }; | |
2281 | static const UChar IBM_943toUnicode[] = { 0x0000, 0x6D63, | |
2282 | 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44, | |
2283 | 0x7B87}; | |
2284 | static const int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5}; | |
2285 | ||
2286 | /* EUC_JP*/ | |
2287 | static const uint8_t sampleTxt_EUC_JP[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
2288 | 0x8f, 0xda, 0xa1, /*unassigned*/ | |
2289 | 0x8e, 0xe0, | |
2290 | }; | |
2291 | static const UChar EUC_JPtoUnicode[]={ 0x0061, 0x4edd, 0x5bec, | |
2292 | 0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31, | |
2293 | 0x00a2 }; | |
2294 | static const int32_t fromEUC_JPOffs [] ={ 0, 1, 3, | |
2295 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
2296 | 9, | |
2297 | }; | |
2298 | ||
2299 | /*EUC_TW*/ | |
2300 | static const uint8_t sampleTxt_euc_tw[]={ | |
2301 | 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
2302 | 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ | |
2303 | 0xe6, 0xca, 0x8a, | |
2304 | }; | |
2305 | static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, | |
2306 | 0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43, | |
2307 | 0x8706, 0x8a, }; | |
2308 | static const int32_t from_euc_twOffs [] ={ 0, 1, 3, | |
2309 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, | |
2310 | 11, 13}; | |
2311 | ||
2312 | /*iso-2022-jp*/ | |
2313 | static const uint8_t sampleTxt_iso_2022_jp[]={ | |
2314 | 0x1b, 0x28, 0x42, 0x41, | |
2315 | 0x1b, 0x24, 0x42, 0x2A, 0x44, /*unassigned*/ | |
2316 | 0x1b, 0x28, 0x42, 0x42, | |
2317 | ||
2318 | }; | |
2319 | static const UChar iso_2022_jptoUnicode[]={ 0x41,0x25,0x58,0x32,0x41,0x25,0x58,0x34,0x34, 0x42 }; | |
2320 | static const int32_t from_iso_2022_jpOffs [] ={ 3, 7, 7, 7, 7, 7, 7, 7, 7, 12 }; | |
2321 | ||
2322 | /*iso-2022-cn*/ | |
2323 | static const uint8_t sampleTxt_iso_2022_cn[]={ | |
2324 | 0x0f, 0x41, 0x44, | |
2325 | 0x1B, 0x24, 0x29, 0x47, | |
2326 | 0x0E, 0x40, 0x6c, /*unassigned*/ | |
2327 | 0x0f, 0x42, | |
2328 | ||
2329 | }; | |
2330 | static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 }; | |
2331 | static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 8, 8, 8, 8, 8, 8, 8, 8, 11 }; | |
2332 | ||
2333 | /*iso-2022-kr*/ | |
2334 | static const uint8_t sampleTxt_iso_2022_kr[]={ | |
2335 | 0x1b, 0x24, 0x29, 0x43, | |
2336 | 0x41, | |
2337 | 0x0E, 0x7f, 0x1E, | |
2338 | 0x0e, 0x25, 0x50, | |
2339 | 0x0f, 0x51, | |
2340 | 0x42, 0x43, | |
2341 | ||
2342 | }; | |
2343 | static const UChar iso_2022_krtoUnicode[]={ 0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43}; | |
2344 | static const int32_t from_iso_2022_krOffs [] ={ 4, 6, 6, 6, 6, 6, 6, 6, 6, 9, 12, 13 , 14 }; | |
2345 | ||
2346 | /*hz*/ | |
2347 | static const uint8_t sampleTxt_hz[]={ | |
2348 | 0x41, | |
2349 | 0x7e, 0x7b, 0x26, 0x30, | |
2350 | 0x7f, 0x1E, /*unassigned*/ | |
2351 | 0x26, 0x30, | |
2352 | 0x7e, 0x7d, 0x42, | |
2353 | 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/ | |
2354 | 0x7e, 0x7d, 0x42, | |
2355 | }; | |
2356 | static const UChar hztoUnicode[]={ | |
2357 | 0x41, | |
2358 | 0x03a0, | |
2359 | 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45, | |
2360 | 0x03A0, | |
2361 | 0x42, | |
2362 | 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45, | |
2363 | 0x42,}; | |
2364 | ||
2365 | static const int32_t from_hzOffs [] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18, }; | |
2366 | ||
2367 | ||
2368 | /*iscii*/ | |
2369 | static const uint8_t sampleTxt_iscii[]={ | |
2370 | 0x41, | |
2371 | 0x30, | |
2372 | 0xEB, /*unassigned*/ | |
2373 | 0xa3, | |
2374 | 0x42, | |
2375 | 0xEC, /*unassigned*/ | |
2376 | 0x42, | |
2377 | }; | |
2378 | static const UChar isciitoUnicode[]={ | |
2379 | 0x41, | |
2380 | 0x30, | |
2381 | 0x25, 0x58, 0x45, 0x42, | |
2382 | 0x0903, | |
2383 | 0x42, | |
2384 | 0x25, 0x58, 0x45, 0x43, | |
2385 | 0x42,}; | |
2386 | ||
2387 | static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6 }; | |
73c04bcf | 2388 | #endif |
b75a7d8f | 2389 | |
b75a7d8f A |
2390 | /*UTF8*/ |
2391 | static const uint8_t sampleTxtUTF8[]={ | |
2392 | 0x20, 0x64, 0x50, | |
2393 | 0xC2, 0x7E, /* truncated char */ | |
2394 | 0x20, | |
2395 | 0xE0, 0xB5, 0x7E, /* truncated char */ | |
2396 | 0x40, | |
2397 | }; | |
2398 | static const UChar UTF8ToUnicode[]={ | |
2399 | 0x0020, 0x0064, 0x0050, | |
2400 | 0x0025, 0x0058, 0x0043, 0x0032, 0x007E, /* \xC2~ */ | |
2401 | 0x0020, | |
2402 | 0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E, | |
2403 | 0x0040 | |
2404 | }; | |
2405 | static const int32_t fromUTF8[] = { | |
2406 | 0, 1, 2, | |
2407 | 3, 3, 3, 3, 4, | |
2408 | 5, | |
2409 | 6, 6, 6, 6, 6, 6, 6, 6, 8, | |
2410 | 9 | |
2411 | }; | |
2412 | static const UChar UTF8ToUnicodeXML_DEC[]={ | |
2413 | 0x0020, 0x0064, 0x0050, | |
2414 | 0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E, /* Â~ */ | |
2415 | 0x0020, | |
2416 | 0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E, | |
2417 | 0x0040 | |
2418 | }; | |
2419 | static const int32_t fromUTF8XML_DEC[] = { | |
2420 | 0, 1, 2, | |
2421 | 3, 3, 3, 3, 3, 3, 4, | |
2422 | 5, | |
2423 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, | |
2424 | 9 | |
2425 | }; | |
2426 | ||
73c04bcf A |
2427 | |
2428 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
b75a7d8f A |
2429 | if(!testConvertToUnicode(sampleTxtToU, sizeof(sampleTxtToU), |
2430 | IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnicode[0]),"ibm-943", | |
2431 | UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 )) | |
2432 | log_err("ibm-943->u with substitute with value did not match.\n"); | |
2433 | ||
2434 | if(!testConvertToUnicode(sampleTxt_EUC_JP, sizeof(sampleTxt_EUC_JP), | |
2435 | EUC_JPtoUnicode, sizeof(EUC_JPtoUnicode)/sizeof(EUC_JPtoUnicode[0]),"euc-jp", | |
2436 | UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0)) | |
2437 | log_err("euc-jp->u with substitute with value did not match.\n"); | |
2438 | ||
2439 | if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), | |
2440 | euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw", | |
2441 | UCNV_TO_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0)) | |
2442 | log_err("euc-tw->u with substitute with value did not match.\n"); | |
2443 | ||
2444 | if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), | |
2445 | iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", | |
2446 | UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0)) | |
2447 | log_err("iso-2022-jp->u with substitute with value did not match.\n"); | |
2448 | ||
2449 | if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), | |
2450 | iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", | |
2451 | UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0,"K",U_ZERO_ERROR)) | |
2452 | log_err("iso-2022-jp->u with substitute with value did not match.\n"); | |
2453 | ||
2454 | {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */ | |
2455 | { | |
2456 | static const UChar iso_2022_jptoUnicodeDec[]={ | |
2457 | 0x0041, | |
2458 | 0x0026, 0x0023, 0x0034, 0x0032, 0x003b, | |
2459 | 0x0026, 0x0023, 0x0036, 0x0038, 0x003b, | |
2460 | 0x0042 }; | |
2461 | static const int32_t from_iso_2022_jpOffsDec [] ={ 3,7,7,7,7,7,7,7,7,7,7,12, }; | |
2462 | if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), | |
2463 | iso_2022_jptoUnicodeDec, sizeof(iso_2022_jptoUnicodeDec)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", | |
2464 | UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsDec, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR )) | |
2465 | log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n"); | |
2466 | } | |
2467 | { | |
2468 | static const UChar iso_2022_jptoUnicodeHex[]={ | |
2469 | 0x0041, | |
2470 | 0x0026, 0x0023, 0x0078, 0x0032, 0x0041, 0x003b, | |
2471 | 0x0026, 0x0023, 0x0078, 0x0034, 0x0034, 0x003b, | |
2472 | 0x0042 }; | |
2473 | static const int32_t from_iso_2022_jpOffsHex [] ={ 3,7,7,7,7,7,7,7,7,7,7,7,7,12 }; | |
2474 | if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), | |
2475 | iso_2022_jptoUnicodeHex, sizeof(iso_2022_jptoUnicodeHex)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", | |
2476 | UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsHex, NULL, 0,UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR )) | |
2477 | log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n"); | |
2478 | } | |
2479 | { | |
2480 | static const UChar iso_2022_jptoUnicodeC[]={ | |
2481 | 0x0041, | |
2482 | 0x005C, 0x0078, 0x0032, 0x0041, | |
2483 | 0x005C, 0x0078, 0x0034, 0x0034, | |
2484 | 0x0042 }; | |
2485 | int32_t from_iso_2022_jpOffsC [] ={ 3,7,7,7,7,7,7,7,7,12 }; | |
2486 | if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), | |
2487 | iso_2022_jptoUnicodeC, sizeof(iso_2022_jptoUnicodeC)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", | |
2488 | UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsC, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR )) | |
2489 | log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n"); | |
2490 | } | |
2491 | } | |
2492 | if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn), | |
2493 | iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn", | |
2494 | UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0)) | |
2495 | log_err("iso-2022-cn->u with substitute with value did not match.\n"); | |
2496 | ||
2497 | if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr), | |
2498 | iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr", | |
2499 | UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0)) | |
2500 | log_err("iso-2022-kr->u with substitute with value did not match.\n"); | |
2501 | ||
2502 | if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz), | |
2503 | hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ", | |
2504 | UCNV_TO_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0)) | |
2505 | log_err("hz->u with substitute with value did not match.\n"); | |
2506 | ||
2507 | if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii), | |
2508 | isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0", | |
2509 | UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0)) | |
2510 | log_err("ISCII ->u with substitute with value did not match.\n"); | |
73c04bcf A |
2511 | #endif |
2512 | ||
b75a7d8f A |
2513 | if(!testConvertToUnicode(sampleTxtUTF8, sizeof(sampleTxtUTF8), |
2514 | UTF8ToUnicode, sizeof(UTF8ToUnicode)/sizeof(UTF8ToUnicode[0]),"UTF-8", | |
2515 | UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0)) | |
2516 | log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n"); | |
2517 | if(!testConvertToUnicodeWithContext(sampleTxtUTF8, sizeof(sampleTxtUTF8), | |
2518 | UTF8ToUnicodeXML_DEC, sizeof(UTF8ToUnicodeXML_DEC)/sizeof(UTF8ToUnicodeXML_DEC[0]),"UTF-8", | |
2519 | UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8XML_DEC, NULL, 0, UCNV_ESCAPE_XML_DEC, U_ZERO_ERROR)) | |
2520 | log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n"); | |
2521 | } | |
2522 | } | |
2523 | ||
73c04bcf | 2524 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
2525 | static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize) |
2526 | { | |
2527 | static const UChar legalText[] = { 0x0000, 0xAC00, 0xAC01, 0xD700 }; | |
2528 | static const uint8_t templegal949[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 }; | |
2529 | static const int32_t to949legal[] = {0, 1, 1, 2, 2, 3, 3}; | |
2530 | ||
2531 | ||
2532 | static const uint8_t text943[] = { | |
fd0068a8 A |
2533 | 0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a }; |
2534 | static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22, 0x5b57 }; | |
2535 | static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22, 0x5b57 }; | |
b75a7d8f A |
2536 | static const UChar toUnicode943stop[]= { 0x304b}; |
2537 | ||
fd0068a8 A |
2538 | static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 7 }; |
2539 | static const int32_t fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 }; | |
b75a7d8f A |
2540 | static const int32_t fromIBM943Offsstop[] = { 0}; |
2541 | ||
2542 | gInBufferSize = inputsize; | |
2543 | gOutBufferSize = outputsize; | |
2544 | /*checking with a legal value*/ | |
2545 | if(!testConvertFromUnicode(legalText, sizeof(legalText)/sizeof(legalText[0]), | |
2546 | templegal949, sizeof(templegal949), "ibm-949", | |
2547 | UCNV_FROM_U_CALLBACK_SKIP, to949legal, NULL, 0 )) | |
2548 | log_err("u-> ibm-949 with skip did not match.\n"); | |
2549 | ||
2550 | /*checking illegal value for ibm-943 with substitute*/ | |
2551 | if(!testConvertToUnicode(text943, sizeof(text943), | |
2552 | toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943", | |
2553 | UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 )) | |
2554 | log_err("ibm-943->u with subst did not match.\n"); | |
2555 | /*checking illegal value for ibm-943 with skip */ | |
2556 | if(!testConvertToUnicode(text943, sizeof(text943), | |
2557 | toUnicode943skip, sizeof(toUnicode943skip)/sizeof(toUnicode943skip[0]),"ibm-943", | |
2558 | UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offsskip, NULL, 0 )) | |
2559 | log_err("ibm-943->u with skip did not match.\n"); | |
2560 | ||
2561 | /*checking illegal value for ibm-943 with stop */ | |
2562 | if(!testConvertToUnicode(text943, sizeof(text943), | |
2563 | toUnicode943stop, sizeof(toUnicode943stop)/sizeof(toUnicode943stop[0]),"ibm-943", | |
2564 | UCNV_TO_U_CALLBACK_STOP, fromIBM943Offsstop, NULL, 0 )) | |
2565 | log_err("ibm-943->u with stop did not match.\n"); | |
2566 | ||
2567 | } | |
2568 | ||
2569 | static void TestSingleByte(int32_t inputsize, int32_t outputsize) | |
2570 | { | |
2571 | static const uint8_t sampleText[] = { | |
2572 | 0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82, | |
fd0068a8 A |
2573 | 0xff, 0x32, 0x33}; |
2574 | static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 }; | |
2575 | static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 }; | |
b75a7d8f A |
2576 | /*checking illegal value for ibm-943 with substitute*/ |
2577 | gInBufferSize = inputsize; | |
2578 | gOutBufferSize = outputsize; | |
2579 | ||
2580 | if(!testConvertToUnicode(sampleText, sizeof(sampleText), | |
2581 | toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943", | |
2582 | UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 )) | |
2583 | log_err("ibm-943->u with subst did not match.\n"); | |
2584 | } | |
2585 | ||
2586 | static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize) | |
2587 | { | |
2588 | /*EBCDIC_STATEFUL*/ | |
2589 | static const UChar ebcdic_inputTest[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 }; | |
2590 | static const uint8_t toIBM930[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 }; | |
2591 | static const int32_t offset_930[]= { 0, 1, 1, 1, 2, 2, 3, 4, 4, 4, 5, 5 }; | |
2592 | /* s SO doubl SI sng s SO fe fe SI s */ | |
2593 | ||
2594 | /*EBCDIC_STATEFUL with subChar=3f*/ | |
2595 | static const uint8_t toIBM930_subvaried[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 }; | |
2596 | static const int32_t offset_930_subvaried[]= { 0, 1, 1, 1, 2, 2, 3, 4, 5 }; | |
2597 | static const char mySubChar[]={ 0x3f}; | |
2598 | ||
2599 | gInBufferSize = inputsize; | |
2600 | gOutBufferSize = outputsize; | |
2601 | ||
2602 | if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]), | |
2603 | toIBM930, sizeof(toIBM930), "ibm-930", | |
2604 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930, NULL, 0 )) | |
2605 | log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n"); | |
2606 | ||
2607 | if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]), | |
2608 | toIBM930_subvaried, sizeof(toIBM930_subvaried), "ibm-930", | |
2609 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 )) | |
2610 | log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n"); | |
2611 | } | |
73c04bcf | 2612 | #endif |
b75a7d8f A |
2613 | |
2614 | UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, | |
2615 | const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, | |
2616 | const char *mySubChar, int8_t len) | |
2617 | { | |
2618 | ||
2619 | ||
2620 | UErrorCode status = U_ZERO_ERROR; | |
2621 | UConverter *conv = 0; | |
73c04bcf | 2622 | char junkout[NEW_MAX_BUFFER]; /* FIX */ |
b75a7d8f A |
2623 | int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ |
2624 | const UChar *src; | |
73c04bcf A |
2625 | char *end; |
2626 | char *targ; | |
b75a7d8f A |
2627 | int32_t *offs; |
2628 | int i; | |
2629 | int32_t realBufferSize; | |
73c04bcf | 2630 | char *realBufferEnd; |
b75a7d8f A |
2631 | const UChar *realSourceEnd; |
2632 | const UChar *sourceLimit; | |
2633 | UBool checkOffsets = TRUE; | |
2634 | UBool doFlush; | |
2635 | char junk[9999]; | |
2636 | char offset_str[9999]; | |
73c04bcf | 2637 | char *p; |
b75a7d8f A |
2638 | UConverterFromUCallback oldAction = NULL; |
2639 | const void* oldContext = NULL; | |
2640 | ||
2641 | ||
2642 | for(i=0;i<NEW_MAX_BUFFER;i++) | |
73c04bcf | 2643 | junkout[i] = (char)0xF0; |
b75a7d8f A |
2644 | for(i=0;i<NEW_MAX_BUFFER;i++) |
2645 | junokout[i] = 0xFF; | |
2646 | setNuConvTestName(codepage, "FROM"); | |
2647 | ||
2648 | log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize, | |
2649 | gOutBufferSize); | |
2650 | ||
2651 | conv = ucnv_open(codepage, &status); | |
2652 | if(U_FAILURE(status)) | |
2653 | { | |
2654 | log_data_err("Couldn't open converter %s\n",codepage); | |
2655 | return TRUE; | |
2656 | } | |
2657 | ||
2658 | log_verbose("Converter opened..\n"); | |
2659 | ||
2660 | /*----setting the callback routine----*/ | |
2661 | ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); | |
2662 | if (U_FAILURE(status)) | |
2663 | { | |
2664 | log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); | |
2665 | } | |
2666 | /*------------------------*/ | |
2667 | /*setting the subChar*/ | |
2668 | if(mySubChar != NULL){ | |
2669 | ucnv_setSubstChars(conv, mySubChar, len, &status); | |
2670 | if (U_FAILURE(status)) { | |
2671 | log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); | |
2672 | } | |
2673 | } | |
2674 | /*------------*/ | |
2675 | ||
2676 | src = source; | |
2677 | targ = junkout; | |
2678 | offs = junokout; | |
2679 | ||
2680 | realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); | |
2681 | realBufferEnd = junkout + realBufferSize; | |
2682 | realSourceEnd = source + sourceLen; | |
2683 | ||
2684 | if ( gOutBufferSize != realBufferSize ) | |
2685 | checkOffsets = FALSE; | |
2686 | ||
2687 | if( gInBufferSize != NEW_MAX_BUFFER ) | |
2688 | checkOffsets = FALSE; | |
2689 | ||
2690 | do | |
2691 | { | |
2692 | end = nct_min(targ + gOutBufferSize, realBufferEnd); | |
2693 | sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); | |
2694 | ||
2695 | doFlush = (UBool)(sourceLimit == realSourceEnd); | |
2696 | ||
2697 | if(targ == realBufferEnd) | |
2698 | { | |
2699 | log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); | |
2700 | return FALSE; | |
2701 | } | |
2702 | log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); | |
2703 | ||
2704 | ||
2705 | status = U_ZERO_ERROR; | |
2706 | ||
2707 | ucnv_fromUnicode (conv, | |
2708 | (char **)&targ, | |
2709 | (const char *)end, | |
2710 | &src, | |
2711 | sourceLimit, | |
2712 | checkOffsets ? offs : NULL, | |
2713 | doFlush, /* flush if we're at the end of the input data */ | |
2714 | &status); | |
2715 | } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) ); | |
2716 | ||
2717 | ||
2718 | if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){ | |
2719 | UChar errChars[50]; /* should be sufficient */ | |
2720 | int8_t errLen = 50; | |
2721 | UErrorCode err = U_ZERO_ERROR; | |
2722 | const UChar* limit= NULL; | |
2723 | const UChar* start= NULL; | |
2724 | ucnv_getInvalidUChars(conv,errChars, &errLen, &err); | |
2725 | if(U_FAILURE(err)){ | |
2726 | log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err)); | |
2727 | } | |
2728 | /* src points to limit of invalid chars */ | |
2729 | limit = src; | |
2730 | /* length of in invalid chars should be equal to returned length*/ | |
2731 | start = src - errLen; | |
2732 | if(u_strncmp(errChars,start,errLen)!=0){ | |
2733 | log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err)); | |
2734 | } | |
2735 | } | |
2736 | /* allow failure codes for the stop callback */ | |
2737 | if(U_FAILURE(status) && | |
2738 | (callback != UCNV_FROM_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND))) | |
2739 | { | |
2740 | log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); | |
2741 | return FALSE; | |
2742 | } | |
2743 | ||
2744 | log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", | |
2745 | sourceLen, targ-junkout); | |
2746 | if(VERBOSITY) | |
2747 | { | |
2748 | ||
2749 | junk[0] = 0; | |
2750 | offset_str[0] = 0; | |
2751 | for(p = junkout;p<targ;p++) | |
2752 | { | |
2753 | sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p); | |
2754 | sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]); | |
2755 | } | |
2756 | ||
2757 | log_verbose(junk); | |
2758 | printSeq(expect, expectLen); | |
2759 | if ( checkOffsets ) | |
2760 | { | |
2761 | log_verbose("\nOffsets:"); | |
2762 | log_verbose(offset_str); | |
2763 | } | |
2764 | log_verbose("\n"); | |
2765 | } | |
2766 | ucnv_close(conv); | |
2767 | ||
2768 | ||
2769 | if(expectLen != targ-junkout) | |
2770 | { | |
2771 | log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); | |
2772 | log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); | |
73c04bcf | 2773 | printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); |
b75a7d8f A |
2774 | printSeqErr(expect, expectLen); |
2775 | return FALSE; | |
2776 | } | |
2777 | ||
2778 | if (checkOffsets && (expectOffsets != 0) ) | |
2779 | { | |
2780 | log_verbose("comparing %d offsets..\n", targ-junkout); | |
2781 | if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ | |
2782 | log_err("did not get the expected offsets while %s \n", gNuConvTestName); | |
2783 | log_err("Got Output : "); | |
73c04bcf | 2784 | printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); |
b75a7d8f A |
2785 | log_err("Got Offsets: "); |
2786 | for(p=junkout;p<targ;p++) | |
2787 | log_err("%d,", junokout[p-junkout]); | |
2788 | log_err("\n"); | |
2789 | log_err("Expected Offsets: "); | |
2790 | for(i=0; i<(targ-junkout); i++) | |
2791 | log_err("%d,", expectOffsets[i]); | |
2792 | log_err("\n"); | |
2793 | return FALSE; | |
2794 | } | |
2795 | } | |
2796 | ||
2797 | if(!memcmp(junkout, expect, expectLen)) | |
2798 | { | |
2799 | log_verbose("String matches! %s\n", gNuConvTestName); | |
2800 | return TRUE; | |
2801 | } | |
2802 | else | |
2803 | { | |
2804 | log_err("String does not match. %s\n", gNuConvTestName); | |
2805 | log_err("source: "); | |
2806 | printUSeqErr(source, sourceLen); | |
2807 | log_err("Got: "); | |
73c04bcf | 2808 | printSeqErr((const uint8_t *)junkout, expectLen); |
b75a7d8f A |
2809 | log_err("Expected: "); |
2810 | printSeqErr(expect, expectLen); | |
2811 | return FALSE; | |
2812 | } | |
2813 | } | |
2814 | ||
2815 | UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, | |
2816 | const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, | |
2817 | const char *mySubChar, int8_t len) | |
2818 | { | |
2819 | UErrorCode status = U_ZERO_ERROR; | |
2820 | UConverter *conv = 0; | |
2821 | UChar junkout[NEW_MAX_BUFFER]; /* FIX */ | |
2822 | int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ | |
73c04bcf A |
2823 | const char *src; |
2824 | const char *realSourceEnd; | |
2825 | const char *srcLimit; | |
b75a7d8f A |
2826 | UChar *targ; |
2827 | UChar *end; | |
2828 | int32_t *offs; | |
2829 | int i; | |
2830 | UBool checkOffsets = TRUE; | |
2831 | char junk[9999]; | |
2832 | char offset_str[9999]; | |
2833 | UChar *p; | |
2834 | UConverterToUCallback oldAction = NULL; | |
2835 | const void* oldContext = NULL; | |
2836 | ||
2837 | int32_t realBufferSize; | |
2838 | UChar *realBufferEnd; | |
2839 | ||
2840 | ||
2841 | for(i=0;i<NEW_MAX_BUFFER;i++) | |
2842 | junkout[i] = 0xFFFE; | |
2843 | ||
2844 | for(i=0;i<NEW_MAX_BUFFER;i++) | |
2845 | junokout[i] = -1; | |
2846 | ||
2847 | setNuConvTestName(codepage, "TO"); | |
2848 | ||
2849 | log_verbose("\n========= %s\n", gNuConvTestName); | |
2850 | ||
2851 | conv = ucnv_open(codepage, &status); | |
2852 | if(U_FAILURE(status)) | |
2853 | { | |
2854 | log_data_err("Couldn't open converter %s\n",gNuConvTestName); | |
2855 | return TRUE; | |
2856 | } | |
2857 | ||
2858 | log_verbose("Converter opened..\n"); | |
2859 | ||
73c04bcf | 2860 | src = (const char *)source; |
b75a7d8f A |
2861 | targ = junkout; |
2862 | offs = junokout; | |
2863 | ||
2864 | realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); | |
2865 | realBufferEnd = junkout + realBufferSize; | |
2866 | realSourceEnd = src + sourcelen; | |
2867 | /*----setting the callback routine----*/ | |
2868 | ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); | |
2869 | if (U_FAILURE(status)) | |
2870 | { | |
2871 | log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); | |
2872 | } | |
2873 | /*-------------------------------------*/ | |
2874 | /*setting the subChar*/ | |
2875 | if(mySubChar != NULL){ | |
2876 | ucnv_setSubstChars(conv, mySubChar, len, &status); | |
2877 | if (U_FAILURE(status)) { | |
2878 | log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); | |
2879 | } | |
2880 | } | |
2881 | /*------------*/ | |
2882 | ||
2883 | ||
2884 | if ( gOutBufferSize != realBufferSize ) | |
2885 | checkOffsets = FALSE; | |
2886 | ||
2887 | if( gInBufferSize != NEW_MAX_BUFFER ) | |
2888 | checkOffsets = FALSE; | |
2889 | ||
2890 | do | |
2891 | { | |
2892 | end = nct_min( targ + gOutBufferSize, realBufferEnd); | |
2893 | srcLimit = nct_min(realSourceEnd, src + gInBufferSize); | |
2894 | ||
2895 | if(targ == realBufferEnd) | |
2896 | { | |
2897 | log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); | |
2898 | return FALSE; | |
2899 | } | |
2900 | log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); | |
2901 | ||
2902 | ||
2903 | ||
2904 | status = U_ZERO_ERROR; | |
2905 | ||
2906 | ucnv_toUnicode (conv, | |
2907 | &targ, | |
2908 | end, | |
2909 | (const char **)&src, | |
2910 | (const char *)srcLimit, | |
2911 | checkOffsets ? offs : NULL, | |
2912 | (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */ | |
2913 | &status); | |
2914 | } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ | |
2915 | ||
2916 | if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){ | |
2917 | char errChars[50]; /* should be sufficient */ | |
2918 | int8_t errLen = 50; | |
2919 | UErrorCode err = U_ZERO_ERROR; | |
73c04bcf A |
2920 | const char* limit= NULL; |
2921 | const char* start= NULL; | |
b75a7d8f A |
2922 | ucnv_getInvalidChars(conv,errChars, &errLen, &err); |
2923 | if(U_FAILURE(err)){ | |
2924 | log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err)); | |
2925 | } | |
2926 | /* src points to limit of invalid chars */ | |
2927 | limit = src; | |
2928 | /* length of in invalid chars should be equal to returned length*/ | |
2929 | start = src - errLen; | |
73c04bcf | 2930 | if(uprv_strncmp(errChars,start,errLen)!=0){ |
b75a7d8f A |
2931 | log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err)); |
2932 | } | |
2933 | } | |
2934 | /* allow failure codes for the stop callback */ | |
2935 | if(U_FAILURE(status) && | |
2936 | (callback != UCNV_TO_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND && status != U_TRUNCATED_CHAR_FOUND))) | |
2937 | { | |
2938 | log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); | |
2939 | return FALSE; | |
2940 | } | |
2941 | ||
2942 | log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", | |
2943 | sourcelen, targ-junkout); | |
2944 | if(VERBOSITY) | |
2945 | { | |
2946 | ||
2947 | junk[0] = 0; | |
2948 | offset_str[0] = 0; | |
2949 | ||
2950 | for(p = junkout;p<targ;p++) | |
2951 | { | |
2952 | sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p); | |
2953 | sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]); | |
2954 | } | |
2955 | ||
2956 | log_verbose(junk); | |
2957 | printUSeq(expect, expectlen); | |
2958 | if ( checkOffsets ) | |
2959 | { | |
2960 | log_verbose("\nOffsets:"); | |
2961 | log_verbose(offset_str); | |
2962 | } | |
2963 | log_verbose("\n"); | |
2964 | } | |
2965 | ucnv_close(conv); | |
2966 | ||
2967 | log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); | |
2968 | ||
2969 | if (checkOffsets && (expectOffsets != 0)) | |
2970 | { | |
2971 | if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))) | |
2972 | { | |
2973 | log_err("did not get the expected offsets while %s \n", gNuConvTestName); | |
2974 | log_err("Got offsets: "); | |
2975 | for(p=junkout;p<targ;p++) | |
2976 | log_err(" %2d,", junokout[p-junkout]); | |
2977 | log_err("\n"); | |
2978 | log_err("Expected offsets: "); | |
2979 | for(i=0; i<(targ-junkout); i++) | |
2980 | log_err(" %2d,", expectOffsets[i]); | |
2981 | log_err("\n"); | |
2982 | log_err("Got output: "); | |
2983 | for(i=0; i<(targ-junkout); i++) | |
2984 | log_err("0x%04x,", junkout[i]); | |
2985 | log_err("\n"); | |
2986 | log_err("From source: "); | |
73c04bcf | 2987 | for(i=0; i<(src-(const char *)source); i++) |
b75a7d8f A |
2988 | log_err(" 0x%02x,", (unsigned char)source[i]); |
2989 | log_err("\n"); | |
2990 | } | |
2991 | } | |
2992 | ||
2993 | if(!memcmp(junkout, expect, expectlen*2)) | |
2994 | { | |
2995 | log_verbose("Matches!\n"); | |
2996 | return TRUE; | |
2997 | } | |
2998 | else | |
2999 | { | |
3000 | log_err("String does not match. %s\n", gNuConvTestName); | |
3001 | log_verbose("String does not match. %s\n", gNuConvTestName); | |
3002 | log_err("Got: "); | |
3003 | printUSeqErr(junkout, expectlen); | |
3004 | log_err("Expected: "); | |
3005 | printUSeqErr(expect, expectlen); | |
3006 | log_err("\n"); | |
3007 | return FALSE; | |
3008 | } | |
3009 | } | |
3010 | ||
3011 | UBool testConvertFromUnicodeWithContext(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, | |
3012 | const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, | |
3013 | const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError) | |
3014 | { | |
3015 | ||
3016 | ||
3017 | UErrorCode status = U_ZERO_ERROR; | |
3018 | UConverter *conv = 0; | |
73c04bcf | 3019 | char junkout[NEW_MAX_BUFFER]; /* FIX */ |
b75a7d8f A |
3020 | int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ |
3021 | const UChar *src; | |
73c04bcf A |
3022 | char *end; |
3023 | char *targ; | |
b75a7d8f A |
3024 | int32_t *offs; |
3025 | int i; | |
3026 | int32_t realBufferSize; | |
73c04bcf | 3027 | char *realBufferEnd; |
b75a7d8f A |
3028 | const UChar *realSourceEnd; |
3029 | const UChar *sourceLimit; | |
3030 | UBool checkOffsets = TRUE; | |
3031 | UBool doFlush; | |
3032 | char junk[9999]; | |
3033 | char offset_str[9999]; | |
73c04bcf | 3034 | char *p; |
b75a7d8f A |
3035 | UConverterFromUCallback oldAction = NULL; |
3036 | const void* oldContext = NULL; | |
3037 | ||
3038 | ||
3039 | for(i=0;i<NEW_MAX_BUFFER;i++) | |
73c04bcf | 3040 | junkout[i] = (char)0xF0; |
b75a7d8f A |
3041 | for(i=0;i<NEW_MAX_BUFFER;i++) |
3042 | junokout[i] = 0xFF; | |
3043 | setNuConvTestName(codepage, "FROM"); | |
3044 | ||
3045 | log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize, | |
3046 | gOutBufferSize); | |
3047 | ||
3048 | conv = ucnv_open(codepage, &status); | |
3049 | if(U_FAILURE(status)) | |
3050 | { | |
3051 | log_data_err("Couldn't open converter %s\n",codepage); | |
3052 | return TRUE; /* Because the err has already been logged. */ | |
3053 | } | |
3054 | ||
3055 | log_verbose("Converter opened..\n"); | |
3056 | ||
3057 | /*----setting the callback routine----*/ | |
3058 | ucnv_setFromUCallBack (conv, callback, context, &oldAction, &oldContext, &status); | |
3059 | if (U_FAILURE(status)) | |
3060 | { | |
3061 | log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); | |
3062 | } | |
3063 | /*------------------------*/ | |
3064 | /*setting the subChar*/ | |
3065 | if(mySubChar != NULL){ | |
3066 | ucnv_setSubstChars(conv, mySubChar, len, &status); | |
3067 | if (U_FAILURE(status)) { | |
3068 | log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status)); | |
3069 | } | |
3070 | } | |
3071 | /*------------*/ | |
3072 | ||
3073 | src = source; | |
3074 | targ = junkout; | |
3075 | offs = junokout; | |
3076 | ||
3077 | realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); | |
3078 | realBufferEnd = junkout + realBufferSize; | |
3079 | realSourceEnd = source + sourceLen; | |
3080 | ||
3081 | if ( gOutBufferSize != realBufferSize ) | |
3082 | checkOffsets = FALSE; | |
3083 | ||
3084 | if( gInBufferSize != NEW_MAX_BUFFER ) | |
3085 | checkOffsets = FALSE; | |
3086 | ||
3087 | do | |
3088 | { | |
3089 | end = nct_min(targ + gOutBufferSize, realBufferEnd); | |
3090 | sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); | |
3091 | ||
3092 | doFlush = (UBool)(sourceLimit == realSourceEnd); | |
3093 | ||
3094 | if(targ == realBufferEnd) | |
3095 | { | |
3096 | log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); | |
3097 | return FALSE; | |
3098 | } | |
3099 | log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); | |
3100 | ||
3101 | ||
3102 | status = U_ZERO_ERROR; | |
3103 | ||
3104 | ucnv_fromUnicode (conv, | |
3105 | (char **)&targ, | |
3106 | (const char *)end, | |
3107 | &src, | |
3108 | sourceLimit, | |
3109 | checkOffsets ? offs : NULL, | |
3110 | doFlush, /* flush if we're at the end of the input data */ | |
3111 | &status); | |
3112 | } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) ); | |
3113 | ||
3114 | /* allow failure codes for the stop callback */ | |
3115 | if(U_FAILURE(status) && status != expectedError) | |
3116 | { | |
3117 | log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); | |
3118 | return FALSE; | |
3119 | } | |
3120 | ||
3121 | log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", | |
3122 | sourceLen, targ-junkout); | |
3123 | if(VERBOSITY) | |
3124 | { | |
3125 | ||
3126 | junk[0] = 0; | |
3127 | offset_str[0] = 0; | |
3128 | for(p = junkout;p<targ;p++) | |
3129 | { | |
3130 | sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p); | |
3131 | sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]); | |
3132 | } | |
3133 | ||
3134 | log_verbose(junk); | |
3135 | printSeq(expect, expectLen); | |
3136 | if ( checkOffsets ) | |
3137 | { | |
3138 | log_verbose("\nOffsets:"); | |
3139 | log_verbose(offset_str); | |
3140 | } | |
3141 | log_verbose("\n"); | |
3142 | } | |
3143 | ucnv_close(conv); | |
3144 | ||
3145 | ||
3146 | if(expectLen != targ-junkout) | |
3147 | { | |
3148 | log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); | |
3149 | log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); | |
73c04bcf | 3150 | printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); |
b75a7d8f A |
3151 | printSeqErr(expect, expectLen); |
3152 | return FALSE; | |
3153 | } | |
3154 | ||
3155 | if (checkOffsets && (expectOffsets != 0) ) | |
3156 | { | |
3157 | log_verbose("comparing %d offsets..\n", targ-junkout); | |
3158 | if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ | |
3159 | log_err("did not get the expected offsets while %s \n", gNuConvTestName); | |
3160 | log_err("Got Output : "); | |
73c04bcf | 3161 | printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); |
b75a7d8f A |
3162 | log_err("Got Offsets: "); |
3163 | for(p=junkout;p<targ;p++) | |
3164 | log_err("%d,", junokout[p-junkout]); | |
3165 | log_err("\n"); | |
3166 | log_err("Expected Offsets: "); | |
3167 | for(i=0; i<(targ-junkout); i++) | |
3168 | log_err("%d,", expectOffsets[i]); | |
3169 | log_err("\n"); | |
3170 | return FALSE; | |
3171 | } | |
3172 | } | |
3173 | ||
3174 | if(!memcmp(junkout, expect, expectLen)) | |
3175 | { | |
3176 | log_verbose("String matches! %s\n", gNuConvTestName); | |
3177 | return TRUE; | |
3178 | } | |
3179 | else | |
3180 | { | |
3181 | log_err("String does not match. %s\n", gNuConvTestName); | |
3182 | log_err("source: "); | |
3183 | printUSeqErr(source, sourceLen); | |
3184 | log_err("Got: "); | |
73c04bcf | 3185 | printSeqErr((const uint8_t *)junkout, expectLen); |
b75a7d8f A |
3186 | log_err("Expected: "); |
3187 | printSeqErr(expect, expectLen); | |
3188 | return FALSE; | |
3189 | } | |
3190 | } | |
3191 | UBool testConvertToUnicodeWithContext( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, | |
3192 | const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, | |
3193 | const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError) | |
3194 | { | |
3195 | UErrorCode status = U_ZERO_ERROR; | |
3196 | UConverter *conv = 0; | |
3197 | UChar junkout[NEW_MAX_BUFFER]; /* FIX */ | |
3198 | int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ | |
73c04bcf A |
3199 | const char *src; |
3200 | const char *realSourceEnd; | |
3201 | const char *srcLimit; | |
b75a7d8f A |
3202 | UChar *targ; |
3203 | UChar *end; | |
3204 | int32_t *offs; | |
3205 | int i; | |
3206 | UBool checkOffsets = TRUE; | |
3207 | char junk[9999]; | |
3208 | char offset_str[9999]; | |
3209 | UChar *p; | |
3210 | UConverterToUCallback oldAction = NULL; | |
3211 | const void* oldContext = NULL; | |
3212 | ||
3213 | int32_t realBufferSize; | |
3214 | UChar *realBufferEnd; | |
3215 | ||
3216 | ||
3217 | for(i=0;i<NEW_MAX_BUFFER;i++) | |
3218 | junkout[i] = 0xFFFE; | |
3219 | ||
3220 | for(i=0;i<NEW_MAX_BUFFER;i++) | |
3221 | junokout[i] = -1; | |
3222 | ||
3223 | setNuConvTestName(codepage, "TO"); | |
3224 | ||
3225 | log_verbose("\n========= %s\n", gNuConvTestName); | |
3226 | ||
3227 | conv = ucnv_open(codepage, &status); | |
3228 | if(U_FAILURE(status)) | |
3229 | { | |
3230 | log_data_err("Couldn't open converter %s\n",gNuConvTestName); | |
3231 | return TRUE; | |
3232 | } | |
3233 | ||
3234 | log_verbose("Converter opened..\n"); | |
3235 | ||
73c04bcf | 3236 | src = (const char *)source; |
b75a7d8f A |
3237 | targ = junkout; |
3238 | offs = junokout; | |
3239 | ||
3240 | realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); | |
3241 | realBufferEnd = junkout + realBufferSize; | |
3242 | realSourceEnd = src + sourcelen; | |
3243 | /*----setting the callback routine----*/ | |
3244 | ucnv_setToUCallBack (conv, callback, context, &oldAction, &oldContext, &status); | |
3245 | if (U_FAILURE(status)) | |
3246 | { | |
3247 | log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); | |
3248 | } | |
3249 | /*-------------------------------------*/ | |
3250 | /*setting the subChar*/ | |
3251 | if(mySubChar != NULL){ | |
3252 | ucnv_setSubstChars(conv, mySubChar, len, &status); | |
3253 | if (U_FAILURE(status)) { | |
3254 | log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); | |
3255 | } | |
3256 | } | |
3257 | /*------------*/ | |
3258 | ||
3259 | ||
3260 | if ( gOutBufferSize != realBufferSize ) | |
3261 | checkOffsets = FALSE; | |
3262 | ||
3263 | if( gInBufferSize != NEW_MAX_BUFFER ) | |
3264 | checkOffsets = FALSE; | |
3265 | ||
3266 | do | |
3267 | { | |
3268 | end = nct_min( targ + gOutBufferSize, realBufferEnd); | |
3269 | srcLimit = nct_min(realSourceEnd, src + gInBufferSize); | |
3270 | ||
3271 | if(targ == realBufferEnd) | |
3272 | { | |
3273 | log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); | |
3274 | return FALSE; | |
3275 | } | |
3276 | log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); | |
3277 | ||
3278 | ||
3279 | ||
3280 | status = U_ZERO_ERROR; | |
3281 | ||
3282 | ucnv_toUnicode (conv, | |
3283 | &targ, | |
3284 | end, | |
3285 | (const char **)&src, | |
3286 | (const char *)srcLimit, | |
3287 | checkOffsets ? offs : NULL, | |
3288 | (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */ | |
3289 | &status); | |
3290 | } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ | |
3291 | ||
3292 | /* allow failure codes for the stop callback */ | |
3293 | if(U_FAILURE(status) && status!=expectedError) | |
3294 | { | |
3295 | log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); | |
3296 | return FALSE; | |
3297 | } | |
3298 | ||
3299 | log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", | |
3300 | sourcelen, targ-junkout); | |
3301 | if(VERBOSITY) | |
3302 | { | |
3303 | ||
3304 | junk[0] = 0; | |
3305 | offset_str[0] = 0; | |
3306 | ||
3307 | for(p = junkout;p<targ;p++) | |
3308 | { | |
3309 | sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p); | |
3310 | sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]); | |
3311 | } | |
3312 | ||
3313 | log_verbose(junk); | |
3314 | printUSeq(expect, expectlen); | |
3315 | if ( checkOffsets ) | |
3316 | { | |
3317 | log_verbose("\nOffsets:"); | |
3318 | log_verbose(offset_str); | |
3319 | } | |
3320 | log_verbose("\n"); | |
3321 | } | |
3322 | ucnv_close(conv); | |
3323 | ||
3324 | log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); | |
3325 | ||
3326 | if (checkOffsets && (expectOffsets != 0)) | |
3327 | { | |
3328 | if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))) | |
3329 | { | |
3330 | log_err("did not get the expected offsets while %s \n", gNuConvTestName); | |
3331 | log_err("Got offsets: "); | |
3332 | for(p=junkout;p<targ;p++) | |
3333 | log_err(" %2d,", junokout[p-junkout]); | |
3334 | log_err("\n"); | |
3335 | log_err("Expected offsets: "); | |
3336 | for(i=0; i<(targ-junkout); i++) | |
3337 | log_err(" %2d,", expectOffsets[i]); | |
3338 | log_err("\n"); | |
3339 | log_err("Got output: "); | |
3340 | for(i=0; i<(targ-junkout); i++) | |
3341 | log_err("0x%04x,", junkout[i]); | |
3342 | log_err("\n"); | |
3343 | log_err("From source: "); | |
73c04bcf | 3344 | for(i=0; i<(src-(const char *)source); i++) |
b75a7d8f A |
3345 | log_err(" 0x%02x,", (unsigned char)source[i]); |
3346 | log_err("\n"); | |
3347 | } | |
3348 | } | |
3349 | ||
3350 | if(!memcmp(junkout, expect, expectlen*2)) | |
3351 | { | |
3352 | log_verbose("Matches!\n"); | |
3353 | return TRUE; | |
3354 | } | |
3355 | else | |
3356 | { | |
3357 | log_err("String does not match. %s\n", gNuConvTestName); | |
3358 | log_verbose("String does not match. %s\n", gNuConvTestName); | |
3359 | log_err("Got: "); | |
3360 | printUSeqErr(junkout, expectlen); | |
3361 | log_err("Expected: "); | |
3362 | printUSeqErr(expect, expectlen); | |
3363 | log_err("\n"); | |
3364 | return FALSE; | |
3365 | } | |
3366 | } | |
73c04bcf A |
3367 | |
3368 | static void TestCallBackFailure(void) { | |
3369 | UErrorCode status = U_USELESS_COLLATOR_ERROR; | |
3370 | ucnv_cbFromUWriteBytes(NULL, NULL, -1, -1, &status); | |
3371 | if (status != U_USELESS_COLLATOR_ERROR) { | |
3372 | log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad UErrorCode\n"); | |
3373 | } | |
3374 | ucnv_cbFromUWriteUChars(NULL, NULL, NULL, -1, &status); | |
3375 | if (status != U_USELESS_COLLATOR_ERROR) { | |
3376 | log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad UErrorCode\n"); | |
3377 | } | |
3378 | ucnv_cbFromUWriteSub(NULL, -1, &status); | |
3379 | if (status != U_USELESS_COLLATOR_ERROR) { | |
3380 | log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UErrorCode\n"); | |
3381 | } | |
3382 | ucnv_cbToUWriteUChars(NULL, NULL, -1, -1, &status); | |
3383 | if (status != U_USELESS_COLLATOR_ERROR) { | |
3384 | log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad UErrorCode\n"); | |
3385 | } | |
3386 | } |