]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /******************************************************************** |
2 | * COPYRIGHT: | |
4388f060 | 3 | * Copyright (c) 1997-2011, International Business Machines Corporation and |
b75a7d8f A |
4 | * others. All Rights Reserved. |
5 | ********************************************************************/ | |
6 | /* | |
73c04bcf | 7 | ******************************************************************************** |
b75a7d8f A |
8 | * File NCCBTST.C |
9 | * | |
10 | * Modification History: | |
11 | * Name Description | |
12 | * Madhu Katragadda 7/21/1999 Testing error callback routines | |
73c04bcf | 13 | ******************************************************************************** |
b75a7d8f A |
14 | */ |
15 | #include <stdio.h> | |
16 | #include <stdlib.h> | |
17 | #include <string.h> | |
18 | #include <ctype.h> | |
19 | #include "cstring.h" | |
20 | #include "unicode/uloc.h" | |
21 | #include "unicode/ucnv.h" | |
22 | #include "unicode/ucnv_err.h" | |
23 | #include "cintltst.h" | |
24 | #include "unicode/utypes.h" | |
25 | #include "unicode/ustring.h" | |
26 | #include "nccbtst.h" | |
73c04bcf | 27 | #include "unicode/ucnv_cb.h" |
4388f060 A |
28 | #include "unicode/utf16.h" |
29 | ||
b75a7d8f A |
30 | #define NEW_MAX_BUFFER 999 |
31 | ||
32 | #define nct_min(x,y) ((x<y) ? x : y) | |
33 | #define ARRAY_LENGTH(array) (sizeof(array)/sizeof((array)[0])) | |
34 | ||
35 | static int32_t gInBufferSize = 0; | |
36 | static int32_t gOutBufferSize = 0; | |
37 | static char gNuConvTestName[1024]; | |
38 | ||
39 | static void printSeq(const uint8_t* a, int len) | |
40 | { | |
41 | int i=0; | |
42 | log_verbose("\n{"); | |
43 | while (i<len) | |
44 | log_verbose("0x%02X, ", a[i++]); | |
45 | log_verbose("}\n"); | |
46 | } | |
47 | ||
48 | static void printUSeq(const UChar* a, int len) | |
49 | { | |
50 | int i=0; | |
51 | log_verbose("{"); | |
52 | while (i<len) | |
53 | log_verbose(" 0x%04x, ", a[i++]); | |
54 | log_verbose("}\n"); | |
55 | } | |
56 | ||
57 | static void printSeqErr(const uint8_t* a, int len) | |
58 | { | |
59 | int i=0; | |
60 | fprintf(stderr, "{"); | |
61 | while (i<len) | |
62 | fprintf(stderr, " 0x%02x, ", a[i++]); | |
63 | fprintf(stderr, "}\n"); | |
64 | } | |
65 | ||
66 | static void printUSeqErr(const UChar* a, int len) | |
67 | { | |
68 | int i=0; | |
69 | fprintf(stderr, "{"); | |
70 | while (i<len) | |
71 | fprintf(stderr, "0x%04x, ", a[i++]); | |
72 | fprintf(stderr,"}\n"); | |
73 | } | |
74 | ||
75 | static void setNuConvTestName(const char *codepage, const char *direction) | |
76 | { | |
77 | sprintf(gNuConvTestName, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", | |
78 | codepage, | |
79 | direction, | |
374ca955 A |
80 | (int)gInBufferSize, |
81 | (int)gOutBufferSize); | |
b75a7d8f A |
82 | } |
83 | ||
84 | ||
73c04bcf A |
85 | static void TestCallBackFailure(void); |
86 | ||
b75a7d8f A |
87 | void addTestConvertErrorCallBack(TestNode** root); |
88 | ||
89 | void addTestConvertErrorCallBack(TestNode** root) | |
90 | { | |
91 | addTest(root, &TestSkipCallBack, "tsconv/nccbtst/TestSkipCallBack"); | |
92 | addTest(root, &TestStopCallBack, "tsconv/nccbtst/TestStopCallBack"); | |
93 | addTest(root, &TestSubCallBack, "tsconv/nccbtst/TestSubCallBack"); | |
94 | addTest(root, &TestSubWithValueCallBack, "tsconv/nccbtst/TestSubWithValueCallBack"); | |
73c04bcf A |
95 | |
96 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
b75a7d8f A |
97 | addTest(root, &TestLegalAndOtherCallBack, "tsconv/nccbtst/TestLegalAndOtherCallBack"); |
98 | addTest(root, &TestSingleByteCallBack, "tsconv/nccbtst/TestSingleByteCallBack"); | |
73c04bcf A |
99 | #endif |
100 | ||
101 | addTest(root, &TestCallBackFailure, "tsconv/nccbtst/TestCallBackFailure"); | |
b75a7d8f A |
102 | } |
103 | ||
104 | static void TestSkipCallBack() | |
105 | { | |
106 | TestSkip(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
107 | TestSkip(1,NEW_MAX_BUFFER); | |
108 | TestSkip(1,1); | |
109 | TestSkip(NEW_MAX_BUFFER, 1); | |
110 | } | |
111 | ||
112 | static void TestStopCallBack() | |
113 | { | |
114 | TestStop(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
115 | TestStop(1,NEW_MAX_BUFFER); | |
116 | TestStop(1,1); | |
117 | TestStop(NEW_MAX_BUFFER, 1); | |
118 | } | |
119 | ||
120 | static void TestSubCallBack() | |
121 | { | |
122 | TestSub(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
123 | TestSub(1,NEW_MAX_BUFFER); | |
124 | TestSub(1,1); | |
125 | TestSub(NEW_MAX_BUFFER, 1); | |
73c04bcf A |
126 | |
127 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
b75a7d8f A |
128 | TestEBCDIC_STATEFUL_Sub(1, 1); |
129 | TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER); | |
130 | TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, 1); | |
131 | TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
73c04bcf | 132 | #endif |
b75a7d8f A |
133 | } |
134 | ||
135 | static void TestSubWithValueCallBack() | |
136 | { | |
137 | TestSubWithValue(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
138 | TestSubWithValue(1,NEW_MAX_BUFFER); | |
139 | TestSubWithValue(1,1); | |
140 | TestSubWithValue(NEW_MAX_BUFFER, 1); | |
141 | } | |
142 | ||
73c04bcf | 143 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
144 | static void TestLegalAndOtherCallBack() |
145 | { | |
146 | TestLegalAndOthers(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
147 | TestLegalAndOthers(1,NEW_MAX_BUFFER); | |
148 | TestLegalAndOthers(1,1); | |
149 | TestLegalAndOthers(NEW_MAX_BUFFER, 1); | |
150 | } | |
151 | ||
152 | static void TestSingleByteCallBack() | |
153 | { | |
154 | TestSingleByte(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
155 | TestSingleByte(1,NEW_MAX_BUFFER); | |
156 | TestSingleByte(1,1); | |
157 | TestSingleByte(NEW_MAX_BUFFER, 1); | |
158 | } | |
73c04bcf | 159 | #endif |
b75a7d8f A |
160 | |
161 | static void TestSkip(int32_t inputsize, int32_t outputsize) | |
162 | { | |
163 | static const uint8_t expskipIBM_949[]= { | |
164 | 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 }; | |
165 | ||
166 | static const uint8_t expskipIBM_943[] = { | |
167 | 0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 }; | |
168 | ||
169 | static const uint8_t expskipIBM_930[] = { | |
170 | 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f }; | |
171 | ||
172 | gInBufferSize = inputsize; | |
173 | gOutBufferSize = outputsize; | |
174 | ||
175 | /*From Unicode*/ | |
176 | log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP \n"); | |
177 | ||
73c04bcf | 178 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
179 | { |
180 | static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; | |
181 | static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; | |
182 | ||
183 | static const int32_t toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 }; | |
184 | static const int32_t toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 }; | |
b75a7d8f A |
185 | |
186 | if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), | |
187 | expskipIBM_949, sizeof(expskipIBM_949), "ibm-949", | |
188 | UCNV_FROM_U_CALLBACK_SKIP, toIBM949Offsskip, NULL, 0 )) | |
189 | log_err("u-> ibm-949 with skip did not match.\n"); | |
190 | if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), | |
191 | expskipIBM_943, sizeof(expskipIBM_943), "ibm-943", | |
192 | UCNV_FROM_U_CALLBACK_SKIP, toIBM943Offsskip, NULL, 0 )) | |
193 | log_err("u-> ibm-943 with skip did not match.\n"); | |
b75a7d8f A |
194 | } |
195 | ||
196 | { | |
197 | static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 }; | |
198 | static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f }; | |
199 | static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 }; | |
200 | ||
201 | /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */ | |
202 | if(!testConvertFromUnicode(fromU, sizeof(fromU)/U_SIZEOF_UCHAR, | |
203 | fromUBytes, sizeof(fromUBytes), | |
204 | "ibm-930", | |
205 | UCNV_FROM_U_CALLBACK_SKIP, fromUOffsets, | |
206 | NULL, 0) | |
207 | ) { | |
208 | log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n"); | |
209 | } | |
210 | } | |
73c04bcf | 211 | #endif |
b75a7d8f A |
212 | |
213 | { | |
214 | static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 }; | |
215 | static const uint8_t usasciiFromUBytes[] = { 0x61, 0x31, 0x39 }; | |
216 | static const int32_t usasciiFromUOffsets[] = { 0, 3, 6 }; | |
217 | ||
218 | static const UChar latin1FromU[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 }; | |
219 | static const uint8_t latin1FromUBytes[] = { 0x61, 0xa0, 0x31, 0x39 }; | |
220 | static const int32_t latin1FromUOffsets[] = { 0, 1, 3, 6 }; | |
221 | ||
222 | /* US-ASCII */ | |
223 | if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR, | |
224 | usasciiFromUBytes, sizeof(usasciiFromUBytes), | |
225 | "US-ASCII", | |
226 | UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets, | |
227 | NULL, 0) | |
228 | ) { | |
229 | log_err("u->US-ASCII with skip did not match.\n"); | |
230 | } | |
231 | ||
73c04bcf | 232 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
233 | /* SBCS NLTC codepage 367 for US-ASCII */ |
234 | if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR, | |
235 | usasciiFromUBytes, sizeof(usasciiFromUBytes), | |
236 | "ibm-367", | |
237 | UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets, | |
238 | NULL, 0) | |
239 | ) { | |
240 | log_err("u->ibm-367 with skip did not match.\n"); | |
241 | } | |
73c04bcf | 242 | #endif |
b75a7d8f A |
243 | |
244 | /* ISO-Latin-1 */ | |
245 | if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR, | |
246 | latin1FromUBytes, sizeof(latin1FromUBytes), | |
247 | "LATIN_1", | |
248 | UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets, | |
249 | NULL, 0) | |
250 | ) { | |
251 | log_err("u->LATIN_1 with skip did not match.\n"); | |
252 | } | |
253 | ||
73c04bcf | 254 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
255 | /* windows-1252 */ |
256 | if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR, | |
257 | latin1FromUBytes, sizeof(latin1FromUBytes), | |
258 | "windows-1252", | |
259 | UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets, | |
260 | NULL, 0) | |
261 | ) { | |
262 | log_err("u->windows-1252 with skip did not match.\n"); | |
263 | } | |
264 | } | |
265 | ||
266 | { | |
267 | static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; | |
268 | static const uint8_t toIBM943[]= { 0x61, 0x61 }; | |
269 | static const int32_t offset[]= {0, 4}; | |
270 | ||
271 | /* EUC_JP*/ | |
272 | static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 }; | |
273 | static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
274 | 0x61, 0x8e, 0xe0, | |
275 | }; | |
276 | static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7}; | |
277 | ||
278 | /*EUC_TW*/ | |
279 | static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; | |
280 | static const uint8_t to_euc_tw[]={ | |
281 | 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
282 | 0x61, 0xe6, 0xca, 0x8a, | |
283 | }; | |
284 | static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,}; | |
285 | ||
286 | /*ISO-2022-JP*/ | |
287 | static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9/*unassigned*/,0x0042, }; | |
288 | static const uint8_t to_iso_2022_jp[]={ | |
289 | 0x41, | |
290 | 0x42, | |
291 | ||
292 | }; | |
293 | static const int32_t from_iso_2022_jpOffs [] ={0,2}; | |
294 | ||
b75a7d8f A |
295 | /*ISO-2022-JP*/ |
296 | UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; | |
297 | static const uint8_t to_iso_2022_jp2[]={ | |
298 | 0x41, | |
299 | 0x43, | |
300 | ||
301 | }; | |
302 | static const int32_t from_iso_2022_jpOffs2 [] ={0,2}; | |
303 | ||
304 | /*ISO-2022-cn*/ | |
305 | static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, }; | |
306 | static const uint8_t to_iso_2022_cn[]={ | |
374ca955 | 307 | 0x41, 0x42 |
b75a7d8f A |
308 | }; |
309 | static const int32_t from_iso_2022_cnOffs [] ={ | |
374ca955 | 310 | 0, 2 |
b75a7d8f A |
311 | }; |
312 | ||
313 | /*ISO-2022-CN*/ | |
314 | static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; | |
315 | static const uint8_t to_iso_2022_cn1[]={ | |
374ca955 | 316 | 0x41, 0x43 |
b75a7d8f A |
317 | |
318 | }; | |
374ca955 | 319 | static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 }; |
b75a7d8f A |
320 | |
321 | /*ISO-2022-kr*/ | |
322 | static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, }; | |
323 | static const uint8_t to_iso_2022_kr[]={ | |
324 | 0x1b, 0x24, 0x29, 0x43, | |
325 | 0x41, | |
326 | 0x0e, 0x25, 0x50, | |
327 | 0x25, 0x50, | |
328 | 0x0f, 0x42, | |
329 | }; | |
330 | static const int32_t from_iso_2022_krOffs [] ={ | |
331 | -1,-1,-1,-1, | |
332 | 0, | |
333 | 1,1,1, | |
334 | 3,3, | |
335 | 4,4 | |
336 | }; | |
337 | ||
338 | /*ISO-2022-kr*/ | |
339 | static const UChar iso_2022_kr_inputText1[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, }; | |
340 | static const uint8_t to_iso_2022_kr1[]={ | |
341 | 0x1b, 0x24, 0x29, 0x43, | |
342 | 0x41, | |
343 | 0x0e, 0x25, 0x50, | |
344 | 0x25, 0x50, | |
345 | ||
346 | }; | |
347 | static const int32_t from_iso_2022_krOffs1 [] ={ | |
348 | -1,-1,-1,-1, | |
349 | 0, | |
350 | 1,1,1, | |
351 | 3,3, | |
352 | ||
353 | }; | |
354 | /* HZ encoding */ | |
355 | static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, }; | |
356 | ||
357 | static const uint8_t to_hz[]={ | |
358 | 0x7e, 0x7d, 0x41, | |
359 | 0x7e, 0x7b, 0x26, 0x30, | |
360 | 0x26, 0x30, | |
361 | 0x7e, 0x7d, 0x42, | |
362 | ||
363 | }; | |
364 | static const int32_t from_hzOffs [] ={ | |
365 | 0,0,0, | |
366 | 1,1,1,1, | |
367 | 3,3, | |
368 | 4,4,4,4 | |
369 | }; | |
370 | ||
371 | static const UChar hz_inputText1[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, }; | |
372 | ||
373 | static const uint8_t to_hz1[]={ | |
374 | 0x7e, 0x7d, 0x41, | |
375 | 0x7e, 0x7b, 0x26, 0x30, | |
376 | 0x26, 0x30, | |
377 | ||
378 | ||
379 | }; | |
380 | static const int32_t from_hzOffs1 [] ={ | |
381 | 0,0,0, | |
382 | 1,1,1,1, | |
383 | 3,3, | |
384 | ||
385 | }; | |
386 | ||
73c04bcf | 387 | #endif |
b75a7d8f A |
388 | |
389 | static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; | |
390 | ||
391 | static const uint8_t to_SCSU[]={ | |
392 | 0x41, | |
393 | 0x42 | |
394 | ||
395 | ||
396 | }; | |
397 | static const int32_t from_SCSUOffs [] ={ | |
398 | 0, | |
399 | 2, | |
400 | ||
401 | }; | |
73c04bcf A |
402 | |
403 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
b75a7d8f A |
404 | /* ISCII */ |
405 | static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, }; | |
406 | static const uint8_t to_iscii[]={ | |
407 | 0x41, | |
408 | 0x42, | |
409 | }; | |
410 | static const int32_t from_isciiOffs [] ={ | |
411 | 0,2, | |
412 | ||
413 | }; | |
414 | /*ISCII*/ | |
415 | static const UChar iscii_inputText1[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; | |
416 | static const uint8_t to_iscii1[]={ | |
417 | 0x44, | |
418 | 0x43, | |
419 | ||
420 | }; | |
421 | static const int32_t from_isciiOffs1 [] ={0,2}; | |
422 | ||
423 | if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), | |
424 | toIBM943, sizeof(toIBM943), "ibm-943", | |
425 | UCNV_FROM_U_CALLBACK_SKIP, offset, NULL, 0 )) | |
426 | log_err("u-> ibm-943 with skip did not match.\n"); | |
427 | ||
428 | if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]), | |
429 | to_euc_jp, sizeof(to_euc_jp), "euc-jp", | |
430 | UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 )) | |
431 | log_err("u-> euc-jp with skip did not match.\n"); | |
432 | ||
433 | if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]), | |
434 | to_euc_tw, sizeof(to_euc_tw), "euc-tw", | |
435 | UCNV_FROM_U_CALLBACK_SKIP, from_euc_twOffs, NULL, 0 )) | |
436 | log_err("u-> euc-tw with skip did not match.\n"); | |
437 | ||
438 | /*iso_2022_jp*/ | |
439 | if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]), | |
440 | to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", | |
441 | UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 )) | |
442 | log_err("u-> iso-2022-jp with skip did not match.\n"); | |
443 | ||
b75a7d8f A |
444 | /* with context */ |
445 | if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]), | |
446 | to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp", | |
447 | UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs2, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) | |
448 | log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); | |
449 | ||
450 | /*iso_2022_cn*/ | |
451 | if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]), | |
452 | to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", | |
453 | UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs, NULL, 0 )) | |
454 | log_err("u-> iso-2022-cn with skip did not match.\n"); | |
455 | /*with context*/ | |
456 | if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1, sizeof(iso_2022_cn_inputText1)/sizeof(iso_2022_cn_inputText1[0]), | |
457 | to_iso_2022_cn1, sizeof(to_iso_2022_cn1), "iso-2022-cn", | |
458 | UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) | |
459 | log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); | |
460 | ||
461 | /*iso_2022_kr*/ | |
462 | if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]), | |
463 | to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", | |
464 | UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs, NULL, 0 )) | |
465 | log_err("u-> iso-2022-kr with skip did not match.\n"); | |
466 | /*with context*/ | |
467 | if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1, sizeof(iso_2022_kr_inputText1)/sizeof(iso_2022_kr_inputText1[0]), | |
468 | to_iso_2022_kr1, sizeof(to_iso_2022_kr1), "iso-2022-kr", | |
469 | UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) | |
470 | log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); | |
471 | ||
472 | /*hz*/ | |
473 | if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]), | |
474 | to_hz, sizeof(to_hz), "HZ", | |
475 | UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs, NULL, 0 )) | |
476 | log_err("u-> HZ with skip did not match.\n"); | |
477 | /*with context*/ | |
478 | if(!testConvertFromUnicodeWithContext(hz_inputText1, sizeof(hz_inputText1)/sizeof(hz_inputText1[0]), | |
479 | to_hz1, sizeof(to_hz1), "hz", | |
480 | UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) | |
73c04bcf A |
481 | log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); |
482 | #endif | |
b75a7d8f A |
483 | |
484 | /*SCSU*/ | |
485 | if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), | |
486 | to_SCSU, sizeof(to_SCSU), "SCSU", | |
487 | UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 )) | |
488 | log_err("u-> SCSU with skip did not match.\n"); | |
489 | ||
73c04bcf | 490 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
491 | /*ISCII*/ |
492 | if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]), | |
493 | to_iscii, sizeof(to_iscii), "ISCII,version=0", | |
494 | UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs, NULL, 0 )) | |
495 | log_err("u-> iscii with skip did not match.\n"); | |
496 | /*with context*/ | |
497 | if(!testConvertFromUnicodeWithContext(iscii_inputText1, sizeof(iscii_inputText1)/sizeof(iscii_inputText1[0]), | |
498 | to_iscii1, sizeof(to_iscii1), "ISCII,version=0", | |
499 | UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) | |
500 | log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); | |
73c04bcf | 501 | #endif |
b75a7d8f A |
502 | } |
503 | ||
504 | log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n"); | |
505 | { | |
506 | static const uint8_t sampleText[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */ | |
507 | 0xFB, 0xEE, 0x28, /* from source offset 0 */ | |
508 | 0x24, 0x1E, 0x52, | |
509 | 0xB2, | |
510 | 0x20, | |
511 | 0xB3, | |
512 | 0xB1, | |
513 | 0x0D, | |
514 | 0x0A, | |
515 | ||
516 | 0x20, /* from 8 */ | |
517 | 0x00, | |
518 | 0xD0, 0x6C, | |
519 | 0xB6, | |
520 | 0xD8, 0xA5, | |
521 | 0x20, | |
522 | 0x68, | |
523 | 0x59, | |
524 | ||
525 | 0xF9, 0x28, /* from 16 */ | |
526 | 0x6D, | |
527 | 0x20, | |
528 | 0x73, | |
529 | 0xE0, 0x2D, | |
530 | 0xDE, 0x43, | |
531 | 0xD0, 0x33, | |
532 | 0x20, | |
533 | ||
534 | 0xFA, 0x83, /* from 24 */ | |
535 | 0x25, 0x01, | |
536 | 0xFB, 0x16, 0x87, | |
537 | 0x4B, 0x16, | |
538 | 0x20, | |
539 | 0xE6, 0xBD, | |
540 | 0xEB, 0x5B, | |
541 | 0x4B, 0xCC, | |
542 | ||
543 | 0xF9, 0xA2, /* from 32 */ | |
544 | 0xFC, 0x10, 0x3E, | |
545 | 0xFE, 0x16, 0x3A, 0x8C, | |
546 | 0x20, | |
547 | 0xFC, 0x03, 0xAC, | |
548 | ||
549 | 0x01, /* from 41 */ | |
550 | 0xDE, 0x83, | |
551 | 0x20, | |
552 | 0x09 | |
553 | }; | |
554 | static const UChar expected[]={ | |
555 | 0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */ | |
556 | 0x0063, 0x0061, 0x000D, 0x000A, | |
557 | ||
558 | 0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */ | |
559 | 0x0930, 0x0020, 0x0918, 0x0909, | |
560 | ||
561 | 0x3086, 0x304D, 0x0020, 0x3053, /* 16 */ | |
562 | 0x4000, 0x4E00, 0x7777, 0x0020, | |
563 | ||
564 | 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */ | |
565 | 0x0020, 0xD7A3, 0xDC00, 0xD800, | |
566 | ||
567 | 0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */ | |
568 | 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, | |
569 | ||
570 | 0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */ | |
571 | 0x0009 | |
572 | }; | |
573 | static const int32_t offsets[]={ | |
574 | 0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7, | |
575 | 8, 9, 10, 10, 11, 12, 12, 13, 14, 15, | |
576 | 16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23, | |
577 | 24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31, | |
578 | 32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39, | |
579 | 41, 42, 42, 43, 44 | |
580 | }; | |
581 | ||
582 | /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */ | |
583 | if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), | |
584 | sampleText, sizeof(sampleText), | |
585 | "BOCU-1", | |
586 | UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0) | |
587 | ) { | |
588 | log_err("u->BOCU-1 with skip did not match.\n"); | |
589 | } | |
590 | } | |
591 | ||
592 | log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n"); | |
593 | { | |
594 | const uint8_t sampleText[]={ | |
595 | 0x61, /* 'a' */ | |
596 | 0xc4, 0xb5, /* U+0135 */ | |
597 | 0xed, 0x80, 0xa0, /* Hangul U+d020 */ | |
598 | 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */ | |
599 | 0xee, 0x80, 0x80, /* PUA U+e000 */ | |
600 | 0xed, 0xb0, 0x81, /* unpaired trail surrogate U+dc01 */ | |
601 | 0x62, /* 'b' */ | |
602 | 0xed, 0xa0, 0x81, /* unpaired lead surrogate U+d801 */ | |
603 | 0xd0, 0x80 /* U+0400 */ | |
604 | }; | |
605 | UChar expected[]={ | |
606 | 0x0061, | |
607 | 0x0135, | |
608 | 0xd020, | |
609 | 0xd801, 0xdc01, | |
610 | 0xe000, | |
611 | 0xdc01, | |
612 | 0x0062, | |
613 | 0xd801, | |
614 | 0x0400 | |
615 | }; | |
616 | int32_t offsets[]={ | |
617 | 0, | |
618 | 1, 1, | |
619 | 2, 2, 2, | |
620 | 3, 3, 3, 4, 4, 4, | |
621 | 5, 5, 5, | |
622 | 6, 6, 6, | |
623 | 7, | |
624 | 8, 8, 8, | |
625 | 9, 9 | |
626 | }; | |
627 | ||
628 | /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */ | |
629 | ||
630 | /* without offsets */ | |
631 | if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), | |
632 | sampleText, sizeof(sampleText), | |
633 | "CESU-8", | |
634 | UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, 0) | |
635 | ) { | |
636 | log_err("u->CESU-8 with skip did not match.\n"); | |
637 | } | |
638 | ||
639 | /* with offsets */ | |
640 | if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), | |
641 | sampleText, sizeof(sampleText), | |
642 | "CESU-8", | |
643 | UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0) | |
644 | ) { | |
645 | log_err("u->CESU-8 with skip did not match.\n"); | |
646 | } | |
647 | } | |
648 | ||
649 | /*to Unicode*/ | |
650 | log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP \n"); | |
651 | ||
73c04bcf | 652 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
653 | { |
654 | ||
655 | static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD700 }; | |
656 | static const UChar IBM_943skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 }; | |
657 | static const UChar IBM_930skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 }; | |
658 | ||
659 | static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5}; | |
660 | static const int32_t fromIBM943Offs [] = { 0, 2, 4}; | |
661 | static const int32_t fromIBM930Offs [] = { 1, 3, 5}; | |
662 | ||
663 | if(!testConvertToUnicode(expskipIBM_949, sizeof(expskipIBM_949), | |
664 | IBM_949skiptoUnicode, sizeof(IBM_949skiptoUnicode)/sizeof(IBM_949skiptoUnicode),"ibm-949", | |
665 | UCNV_TO_U_CALLBACK_SKIP, fromIBM949Offs, NULL, 0 )) | |
666 | log_err("ibm-949->u with skip did not match.\n"); | |
667 | if(!testConvertToUnicode(expskipIBM_943, sizeof(expskipIBM_943), | |
668 | IBM_943skiptoUnicode, sizeof(IBM_943skiptoUnicode)/sizeof(IBM_943skiptoUnicode[0]),"ibm-943", | |
669 | UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offs, NULL, 0 )) | |
670 | log_err("ibm-943->u with skip did not match.\n"); | |
671 | ||
672 | ||
673 | if(!testConvertToUnicode(expskipIBM_930, sizeof(expskipIBM_930), | |
674 | IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930", | |
675 | UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0 )) | |
676 | log_err("ibm-930->u with skip did not match.\n"); | |
677 | ||
678 | ||
679 | if(!testConvertToUnicodeWithContext(expskipIBM_930, sizeof(expskipIBM_930), | |
680 | IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930", | |
681 | UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) | |
682 | log_err("ibm-930->u with skip did not match.\n"); | |
683 | } | |
73c04bcf | 684 | #endif |
b75a7d8f A |
685 | |
686 | { | |
687 | static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 }; | |
688 | static const UChar usasciiToU[] = { 0x61, 0x31 }; | |
689 | static const int32_t usasciiToUOffsets[] = { 0, 2 }; | |
690 | ||
691 | static const uint8_t latin1ToUBytes[] = { 0x61, 0xa0, 0x31 }; | |
692 | static const UChar latin1ToU[] = { 0x61, 0xa0, 0x31 }; | |
693 | static const int32_t latin1ToUOffsets[] = { 0, 1, 2 }; | |
694 | ||
695 | /* US-ASCII */ | |
696 | if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes), | |
697 | usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR, | |
698 | "US-ASCII", | |
699 | UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets, | |
700 | NULL, 0) | |
701 | ) { | |
702 | log_err("US-ASCII->u with skip did not match.\n"); | |
703 | } | |
704 | ||
73c04bcf | 705 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
706 | /* SBCS NLTC codepage 367 for US-ASCII */ |
707 | if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes), | |
708 | usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR, | |
709 | "ibm-367", | |
710 | UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets, | |
711 | NULL, 0) | |
712 | ) { | |
713 | log_err("ibm-367->u with skip did not match.\n"); | |
714 | } | |
73c04bcf | 715 | #endif |
b75a7d8f A |
716 | |
717 | /* ISO-Latin-1 */ | |
718 | if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes), | |
719 | latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR, | |
720 | "LATIN_1", | |
721 | UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets, | |
722 | NULL, 0) | |
723 | ) { | |
724 | log_err("LATIN_1->u with skip did not match.\n"); | |
725 | } | |
726 | ||
73c04bcf | 727 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
728 | /* windows-1252 */ |
729 | if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes), | |
730 | latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR, | |
731 | "windows-1252", | |
732 | UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets, | |
733 | NULL, 0) | |
734 | ) { | |
735 | log_err("windows-1252->u with skip did not match.\n"); | |
736 | } | |
73c04bcf | 737 | #endif |
b75a7d8f A |
738 | } |
739 | ||
73c04bcf | 740 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
741 | { |
742 | static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ | |
743 | 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 | |
744 | }; | |
745 | static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0x03b4 | |
746 | }; | |
747 | static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 5}; | |
748 | ||
749 | ||
750 | /* euc-jp*/ | |
751 | static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
752 | 0x8f, 0xda, 0xa1, /*unassigned*/ | |
753 | 0x8e, 0xe0, | |
754 | }; | |
755 | static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2}; | |
756 | static const int32_t from_euc_jpOffs [] ={ 0, 1, 3, 9}; | |
757 | ||
758 | /*EUC_TW*/ | |
759 | static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
760 | 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ | |
761 | 0xe6, 0xca, 0x8a, | |
762 | }; | |
763 | static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, }; | |
764 | static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 11, 13}; | |
765 | /*iso-2022-jp*/ | |
766 | static const uint8_t sampleTxt_iso_2022_jp[]={ | |
767 | 0x41, | |
768 | 0x1b, 0x24, 0x42, 0x2A, 0x44, /*unassigned*/ | |
769 | 0x1b, 0x28, 0x42, 0x42, | |
770 | ||
771 | }; | |
772 | static const UChar iso_2022_jptoUnicode[]={ 0x41,0x42 }; | |
773 | static const int32_t from_iso_2022_jpOffs [] ={ 0,9 }; | |
774 | ||
775 | /*iso-2022-cn*/ | |
776 | static const uint8_t sampleTxt_iso_2022_cn[]={ | |
777 | 0x0f, 0x41, 0x44, | |
778 | 0x1B, 0x24, 0x29, 0x47, | |
779 | 0x0E, 0x40, 0x6f, /*unassigned*/ | |
780 | 0x0f, 0x42, | |
781 | ||
782 | }; | |
783 | ||
784 | static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x42 }; | |
785 | static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 11 }; | |
786 | ||
787 | /*iso-2022-kr*/ | |
788 | static const uint8_t sampleTxt_iso_2022_kr[]={ | |
789 | 0x1b, 0x24, 0x29, 0x43, | |
790 | 0x41, | |
791 | 0x0E, 0x7f, 0x1E, | |
792 | 0x0e, 0x25, 0x50, | |
793 | 0x0f, 0x51, | |
794 | 0x42, 0x43, | |
795 | ||
796 | }; | |
797 | static const UChar iso_2022_krtoUnicode[]={ 0x41,0x03A0,0x51, 0x42,0x43}; | |
798 | static const int32_t from_iso_2022_krOffs [] ={ 4, 9, 12, 13 , 14 }; | |
799 | ||
800 | /*hz*/ | |
801 | static const uint8_t sampleTxt_hz[]={ | |
802 | 0x41, | |
803 | 0x7e, 0x7b, 0x26, 0x30, | |
804 | 0x7f, 0x1E, /*unassigned*/ | |
805 | 0x26, 0x30, | |
806 | 0x7e, 0x7d, 0x42, | |
807 | 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/ | |
808 | 0x7e, 0x7d, 0x42, | |
809 | }; | |
810 | static const UChar hztoUnicode[]={ | |
811 | 0x41, | |
812 | 0x03a0, | |
813 | 0x03A0, | |
814 | 0x42, | |
815 | 0x42,}; | |
816 | ||
817 | static const int32_t from_hzOffs [] ={0,3,7,11,18, }; | |
818 | ||
819 | /*ISCII*/ | |
820 | static const uint8_t sampleTxt_iscii[]={ | |
821 | 0x41, | |
822 | 0xa1, | |
823 | 0xEB, /*unassigned*/ | |
824 | 0x26, | |
825 | 0x30, | |
826 | 0xa2, | |
827 | 0xEC, /*unassigned*/ | |
828 | 0x42, | |
829 | }; | |
830 | static const UChar isciitoUnicode[]={ | |
831 | 0x41, | |
832 | 0x0901, | |
833 | 0x26, | |
834 | 0x30, | |
835 | 0x0902, | |
836 | 0x42, | |
837 | }; | |
838 | ||
839 | static const int32_t from_isciiOffs [] ={0,1,3,4,5,7 }; | |
840 | ||
841 | /*LMBCS*/ | |
842 | static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50, | |
843 | 0x12, 0x92, 0xa0, /*unassigned*/ | |
844 | 0x12, 0x92, 0xA1, | |
845 | }; | |
846 | static const UChar LMBCSToUnicode[]={ 0x4e2e, 0xe5c4}; | |
847 | static const int32_t fromLMBCS[] = {0, 6}; | |
848 | ||
849 | if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL), | |
850 | EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", | |
851 | UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) | |
852 | log_err("EBCIDIC_STATEFUL->u with skip did not match.\n"); | |
853 | ||
854 | if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL), | |
855 | EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", | |
856 | UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) | |
857 | log_err("EBCIDIC_STATEFUL->u with skip did not match.\n"); | |
858 | ||
859 | if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), | |
860 | euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp", | |
861 | UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0)) | |
862 | log_err("euc-jp->u with skip did not match.\n"); | |
863 | ||
864 | ||
865 | ||
866 | if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), | |
867 | euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw", | |
868 | UCNV_TO_U_CALLBACK_SKIP, from_euc_twOffs , NULL, 0)) | |
869 | log_err("euc-tw->u with skip did not match.\n"); | |
870 | ||
871 | ||
872 | if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), | |
873 | iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", | |
874 | UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_jpOffs , NULL, 0)) | |
875 | log_err("iso-2022-jp->u with skip did not match.\n"); | |
876 | ||
877 | if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn), | |
878 | iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn", | |
879 | UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_cnOffs , NULL, 0)) | |
880 | log_err("iso-2022-cn->u with skip did not match.\n"); | |
881 | ||
882 | if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr), | |
883 | iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr", | |
884 | UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_krOffs , NULL, 0)) | |
885 | log_err("iso-2022-kr->u with skip did not match.\n"); | |
886 | ||
887 | if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz), | |
888 | hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ", | |
889 | UCNV_TO_U_CALLBACK_SKIP, from_hzOffs , NULL, 0)) | |
890 | log_err("HZ->u with skip did not match.\n"); | |
891 | ||
892 | if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii), | |
893 | isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0", | |
894 | UCNV_TO_U_CALLBACK_SKIP, from_isciiOffs , NULL, 0)) | |
895 | log_err("iscii->u with skip did not match.\n"); | |
896 | ||
897 | if(!testConvertToUnicode(sampleTxtLMBCS, sizeof(sampleTxtLMBCS), | |
898 | LMBCSToUnicode, sizeof(LMBCSToUnicode)/sizeof(LMBCSToUnicode[0]),"LMBCS-1", | |
899 | UCNV_TO_U_CALLBACK_SKIP, fromLMBCS , NULL, 0)) | |
900 | log_err("LMBCS->u with skip did not match.\n"); | |
901 | ||
902 | } | |
73c04bcf A |
903 | #endif |
904 | ||
b75a7d8f A |
905 | log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n"); |
906 | { | |
907 | const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, | |
908 | 0xe0, 0x80, 0x61,}; | |
909 | UChar expected1[] = { 0x0031, 0x4e8c, 0x0061}; | |
910 | int32_t offsets1[] = { 0x0000, 0x0001, 0x0006}; | |
911 | ||
912 | if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), | |
913 | expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", | |
914 | UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 )) | |
915 | log_err("utf8->u with skip did not match.\n");; | |
916 | } | |
917 | ||
918 | log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n"); | |
919 | { | |
920 | const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,}; | |
921 | UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffe,0xfffe}; | |
922 | int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5}; | |
923 | ||
924 | if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), | |
925 | expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", | |
926 | UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 )) | |
927 | log_err("scsu->u with skip did not match.\n"); | |
928 | } | |
929 | ||
930 | log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n"); | |
931 | { | |
932 | const uint8_t sampleText[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */ | |
933 | 0xFB, 0xEE, 0x28, /* single-code point sequence at offset 0 */ | |
934 | 0x24, 0x1E, 0x52, /* 3 */ | |
935 | 0xB2, /* 6 */ | |
936 | 0x20, /* 7 */ | |
937 | 0x40, 0x07, /* 8 - wrong trail byte */ | |
938 | 0xB3, /* 10 */ | |
939 | 0xB1, /* 11 */ | |
940 | 0xD0, 0x20, /* 12 - wrong trail byte */ | |
941 | 0x0D, /* 14 */ | |
942 | 0x0A, /* 15 */ | |
943 | 0x20, /* 16 */ | |
944 | 0x00, /* 17 */ | |
945 | 0xD0, 0x6C, /* 18 */ | |
946 | 0xB6, /* 20 */ | |
947 | 0xD8, 0xA5, /* 21 */ | |
948 | 0x20, /* 23 */ | |
949 | 0x68, /* 24 */ | |
950 | 0x59, /* 25 */ | |
951 | 0xF9, 0x28, /* 26 */ | |
952 | 0x6D, /* 28 */ | |
953 | 0x20, /* 29 */ | |
954 | 0x73, /* 30 */ | |
955 | 0xE0, 0x2D, /* 31 */ | |
956 | 0xDE, 0x43, /* 33 */ | |
957 | 0xD0, 0x33, /* 35 */ | |
958 | 0x20, /* 37 */ | |
959 | 0xFA, 0x83, /* 38 */ | |
960 | 0x25, 0x01, /* 40 */ | |
961 | 0xFB, 0x16, 0x87, /* 42 */ | |
962 | 0x4B, 0x16, /* 45 */ | |
963 | 0x20, /* 47 */ | |
964 | 0xE6, 0xBD, /* 48 */ | |
965 | 0xEB, 0x5B, /* 50 */ | |
966 | 0x4B, 0xCC, /* 52 */ | |
967 | 0xF9, 0xA2, /* 54 */ | |
968 | 0xFC, 0x10, 0x3E, /* 56 */ | |
969 | 0xFE, 0x16, 0x3A, 0x8C, /* 59 */ | |
970 | 0x20, /* 63 */ | |
971 | 0xFC, 0x03, 0xAC, /* 64 */ | |
972 | 0xFF, /* 67 - FF just resets the state without encoding anything */ | |
973 | 0x01, /* 68 */ | |
974 | 0xDE, 0x83, /* 69 */ | |
975 | 0x20, /* 71 */ | |
976 | 0x09 /* 72 */ | |
977 | }; | |
978 | UChar expected[]={ | |
979 | 0xFEFF, 0x0061, 0x0062, 0x0020, | |
980 | 0x0063, 0x0061, 0x000D, 0x000A, | |
981 | 0x0020, 0x0000, 0x00DF, 0x00E6, | |
982 | 0x0930, 0x0020, 0x0918, 0x0909, | |
983 | 0x3086, 0x304D, 0x0020, 0x3053, | |
984 | 0x4000, 0x4E00, 0x7777, 0x0020, | |
985 | 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, | |
986 | 0x0020, 0xD7A3, 0xDC00, 0xD800, | |
987 | 0xD800, 0xDC00, 0xD845, 0xDDDD, | |
988 | 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, | |
989 | 0xDFFF, 0x0001, 0x0E40, 0x0020, | |
990 | 0x0009 | |
991 | }; | |
992 | int32_t offsets[]={ | |
993 | 0, 3, 6, 7, /* skip 8, */ | |
994 | 10, 11, /* skip 12, */ | |
995 | 14, 15, 16, 17, 18, | |
996 | 20, 21, 23, 24, 25, 26, 28, 29, | |
997 | 30, 31, 33, 35, 37, 38, | |
998 | 40, 42, 45, 47, 48, | |
999 | 50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59, | |
1000 | 63, 64, /* trail */ 64, /* reset only 67, */ | |
1001 | 68, 69, | |
1002 | 71, 72 | |
1003 | }; | |
1004 | ||
1005 | if(!testConvertToUnicode(sampleText, sizeof(sampleText), | |
1006 | expected, ARRAY_LENGTH(expected), "BOCU-1", | |
1007 | UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0) | |
1008 | ) { | |
1009 | log_err("BOCU-1->u with skip did not match.\n"); | |
1010 | } | |
1011 | } | |
1012 | ||
1013 | log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n"); | |
1014 | { | |
1015 | const uint8_t sampleText[]={ | |
1016 | 0x61, /* 0 'a' */ | |
1017 | 0xc0, 0x80, /* 1 non-shortest form */ | |
1018 | 0xc4, 0xb5, /* 3 U+0135 */ | |
1019 | 0xed, 0x80, 0xa0, /* 5 Hangul U+d020 */ | |
1020 | 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8 surrogate pair for U+10401 */ | |
1021 | 0xee, 0x80, 0x80, /* 14 PUA U+e000 */ | |
1022 | 0xed, 0xb0, 0x81, /* 17 unpaired trail surrogate U+dc01 */ | |
1023 | 0xf0, 0x90, 0x80, 0x80, /* 20 illegal 4-byte form for U+10000 */ | |
1024 | 0x62, /* 24 'b' */ | |
1025 | 0xed, 0xa0, 0x81, /* 25 unpaired lead surrogate U+d801 */ | |
1026 | 0xed, 0xa0, /* 28 incomplete sequence */ | |
1027 | 0xd0, 0x80 /* 30 U+0400 */ | |
1028 | }; | |
1029 | UChar expected[]={ | |
1030 | 0x0061, | |
1031 | /* skip */ | |
1032 | 0x0135, | |
1033 | 0xd020, | |
1034 | 0xd801, 0xdc01, | |
1035 | 0xe000, | |
1036 | 0xdc01, | |
1037 | /* skip */ | |
1038 | 0x0062, | |
1039 | 0xd801, | |
1040 | 0x0400 | |
1041 | }; | |
1042 | int32_t offsets[]={ | |
1043 | 0, | |
1044 | /* skip 1, */ | |
1045 | 3, | |
1046 | 5, | |
1047 | 8, 11, | |
1048 | 14, | |
1049 | 17, | |
1050 | /* skip 20, 20, */ | |
1051 | 24, | |
1052 | 25, | |
1053 | /* skip 28 */ | |
1054 | 30 | |
1055 | }; | |
1056 | ||
1057 | /* without offsets */ | |
1058 | if(!testConvertToUnicode(sampleText, sizeof(sampleText), | |
1059 | expected, ARRAY_LENGTH(expected), "CESU-8", | |
1060 | UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, 0) | |
1061 | ) { | |
1062 | log_err("CESU-8->u with skip did not match.\n"); | |
1063 | } | |
1064 | ||
1065 | /* with offsets */ | |
1066 | if(!testConvertToUnicode(sampleText, sizeof(sampleText), | |
1067 | expected, ARRAY_LENGTH(expected), "CESU-8", | |
1068 | UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0) | |
1069 | ) { | |
1070 | log_err("CESU-8->u with skip did not match.\n"); | |
1071 | } | |
1072 | } | |
1073 | } | |
1074 | ||
1075 | static void TestStop(int32_t inputsize, int32_t outputsize) | |
1076 | { | |
1077 | static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; | |
1078 | static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; | |
1079 | ||
1080 | static const uint8_t expstopIBM_949[]= { | |
1081 | 0x00, 0xb0, 0xa1, 0xb0, 0xa2}; | |
1082 | ||
1083 | static const uint8_t expstopIBM_943[] = { | |
1084 | 0x9f, 0xaf, 0x9f, 0xb1}; | |
1085 | ||
1086 | static const uint8_t expstopIBM_930[] = { | |
1087 | 0x0e, 0x5d, 0x5f, 0x5d, 0x63}; | |
1088 | ||
1089 | static const UChar IBM_949stoptoUnicode[]= {0x0000, 0xAC00, 0xAC01}; | |
1090 | static const UChar IBM_943stoptoUnicode[]= { 0x6D63, 0x6D64}; | |
1091 | static const UChar IBM_930stoptoUnicode[]= { 0x6D63, 0x6D64}; | |
1092 | ||
1093 | ||
1094 | static const int32_t toIBM949Offsstop [] = { 0, 1, 1, 2, 2}; | |
1095 | static const int32_t toIBM943Offsstop [] = { 0, 0, 1, 1}; | |
1096 | static const int32_t toIBM930Offsstop [] = { 0, 0, 0, 1, 1}; | |
1097 | ||
1098 | static const int32_t fromIBM949Offs [] = { 0, 1, 3}; | |
1099 | static const int32_t fromIBM943Offs [] = { 0, 2}; | |
1100 | static const int32_t fromIBM930Offs [] = { 1, 3}; | |
1101 | ||
1102 | gInBufferSize = inputsize; | |
1103 | gOutBufferSize = outputsize; | |
73c04bcf | 1104 | |
b75a7d8f | 1105 | /*From Unicode*/ |
73c04bcf A |
1106 | |
1107 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
b75a7d8f A |
1108 | if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
1109 | expstopIBM_949, sizeof(expstopIBM_949), "ibm-949", | |
1110 | UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 )) | |
1111 | log_err("u-> ibm-949 with stop did not match.\n"); | |
1112 | if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), | |
1113 | expstopIBM_943, sizeof(expstopIBM_943), "ibm-943", | |
1114 | UCNV_FROM_U_CALLBACK_STOP, toIBM943Offsstop , NULL, 0)) | |
1115 | log_err("u-> ibm-943 with stop did not match.\n"); | |
1116 | if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), | |
1117 | expstopIBM_930, sizeof(expstopIBM_930), "ibm-930", | |
1118 | UCNV_FROM_U_CALLBACK_STOP, toIBM930Offsstop, NULL, 0 )) | |
1119 | log_err("u-> ibm-930 with stop did not match.\n"); | |
1120 | ||
1121 | log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP \n"); | |
1122 | { | |
1123 | static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; | |
1124 | static const uint8_t toIBM943[]= { 0x61,}; | |
1125 | static const int32_t offset[]= {0,} ; | |
1126 | ||
1127 | /*EUC_JP*/ | |
1128 | static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 }; | |
1129 | static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,}; | |
1130 | static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,}; | |
1131 | ||
1132 | /*EUC_TW*/ | |
1133 | static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; | |
1134 | static const uint8_t to_euc_tw[]={ | |
1135 | 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,}; | |
1136 | static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,}; | |
1137 | ||
1138 | /*ISO-2022-JP*/ | |
1139 | static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9, 0x0042, }; | |
1140 | static const uint8_t to_iso_2022_jp[]={ | |
1141 | 0x41, | |
1142 | ||
1143 | }; | |
1144 | static const int32_t from_iso_2022_jpOffs [] ={0,}; | |
1145 | ||
1146 | /*ISO-2022-cn*/ | |
1147 | static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, }; | |
1148 | static const uint8_t to_iso_2022_cn[]={ | |
374ca955 | 1149 | 0x41, |
b75a7d8f A |
1150 | |
1151 | }; | |
1152 | static const int32_t from_iso_2022_cnOffs [] ={ | |
1153 | 0,0, | |
1154 | 2,2, | |
1155 | }; | |
1156 | ||
1157 | /*ISO-2022-kr*/ | |
1158 | static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, }; | |
1159 | static const uint8_t to_iso_2022_kr[]={ | |
1160 | 0x1b, 0x24, 0x29, 0x43, | |
1161 | 0x41, | |
1162 | 0x0e, 0x25, 0x50, | |
1163 | }; | |
1164 | static const int32_t from_iso_2022_krOffs [] ={ | |
1165 | -1,-1,-1,-1, | |
1166 | 0, | |
1167 | 1,1,1, | |
1168 | }; | |
1169 | ||
1170 | /* HZ encoding */ | |
1171 | static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, }; | |
1172 | ||
1173 | static const uint8_t to_hz[]={ | |
1174 | 0x7e, 0x7d, 0x41, | |
1175 | 0x7e, 0x7b, 0x26, 0x30, | |
1176 | ||
1177 | }; | |
1178 | static const int32_t from_hzOffs [] ={ | |
1179 | 0, 0,0, | |
1180 | 1,1,1,1, | |
1181 | }; | |
1182 | ||
1183 | /*ISCII*/ | |
1184 | static const UChar iscii_inputText[]={ 0x0041, 0x3712, 0x0042, }; | |
1185 | static const uint8_t to_iscii[]={ | |
1186 | 0x41, | |
1187 | }; | |
1188 | static const int32_t from_isciiOffs [] ={ | |
1189 | 0, | |
1190 | }; | |
1191 | ||
1192 | if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), | |
1193 | toIBM943, sizeof(toIBM943), "ibm-943", | |
1194 | UCNV_FROM_U_CALLBACK_STOP, offset, NULL, 0 )) | |
1195 | log_err("u-> ibm-943 with stop did not match.\n"); | |
1196 | ||
1197 | if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]), | |
1198 | to_euc_jp, sizeof(to_euc_jp), "euc-jp", | |
1199 | UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 )) | |
1200 | log_err("u-> euc-jp with stop did not match.\n"); | |
1201 | ||
1202 | if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]), | |
1203 | to_euc_tw, sizeof(to_euc_tw), "euc-tw", | |
1204 | UCNV_FROM_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 )) | |
1205 | log_err("u-> euc-tw with stop did not match.\n"); | |
1206 | ||
1207 | if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]), | |
1208 | to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", | |
1209 | UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 )) | |
1210 | log_err("u-> iso-2022-jp with stop did not match.\n"); | |
1211 | ||
1212 | if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]), | |
1213 | to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", | |
1214 | UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 )) | |
1215 | log_err("u-> iso-2022-jp with stop did not match.\n"); | |
1216 | ||
1217 | if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]), | |
1218 | to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", | |
1219 | UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_cnOffs, NULL, 0 )) | |
1220 | log_err("u-> iso-2022-cn with stop did not match.\n"); | |
1221 | ||
1222 | if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]), | |
1223 | to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", | |
1224 | UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_krOffs, NULL, 0 )) | |
1225 | log_err("u-> iso-2022-kr with stop did not match.\n"); | |
1226 | ||
1227 | if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]), | |
1228 | to_hz, sizeof(to_hz), "HZ", | |
1229 | UCNV_FROM_U_CALLBACK_STOP, from_hzOffs, NULL, 0 )) | |
1230 | log_err("u-> HZ with stop did not match.\n");\ | |
1231 | ||
1232 | if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]), | |
1233 | to_iscii, sizeof(to_iscii), "ISCII,version=0", | |
1234 | UCNV_FROM_U_CALLBACK_STOP, from_isciiOffs, NULL, 0 )) | |
1235 | log_err("u-> iscii with stop did not match.\n"); | |
1236 | ||
1237 | ||
1238 | } | |
73c04bcf A |
1239 | #endif |
1240 | ||
b75a7d8f A |
1241 | log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n"); |
1242 | { | |
1243 | static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; | |
1244 | ||
1245 | static const uint8_t to_SCSU[]={ | |
1246 | 0x41, | |
1247 | ||
1248 | }; | |
1249 | int32_t from_SCSUOffs [] ={ | |
1250 | 0, | |
1251 | ||
1252 | }; | |
1253 | if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), | |
1254 | to_SCSU, sizeof(to_SCSU), "SCSU", | |
1255 | UCNV_FROM_U_CALLBACK_STOP, from_SCSUOffs, NULL, 0 )) | |
1256 | log_err("u-> SCSU with skip did not match.\n"); | |
1257 | ||
1258 | } | |
73c04bcf | 1259 | |
b75a7d8f | 1260 | /*to Unicode*/ |
73c04bcf A |
1261 | |
1262 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
b75a7d8f A |
1263 | if(!testConvertToUnicode(expstopIBM_949, sizeof(expstopIBM_949), |
1264 | IBM_949stoptoUnicode, sizeof(IBM_949stoptoUnicode)/sizeof(IBM_949stoptoUnicode[0]),"ibm-949", | |
1265 | UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 )) | |
1266 | log_err("ibm-949->u with stop did not match.\n"); | |
1267 | if(!testConvertToUnicode(expstopIBM_943, sizeof(expstopIBM_943), | |
1268 | IBM_943stoptoUnicode, sizeof(IBM_943stoptoUnicode)/sizeof(IBM_943stoptoUnicode[0]),"ibm-943", | |
1269 | UCNV_TO_U_CALLBACK_STOP, fromIBM943Offs, NULL, 0 )) | |
1270 | log_err("ibm-943->u with stop did not match.\n"); | |
1271 | if(!testConvertToUnicode(expstopIBM_930, sizeof(expstopIBM_930), | |
1272 | IBM_930stoptoUnicode, sizeof(IBM_930stoptoUnicode)/sizeof(IBM_930stoptoUnicode[0]),"ibm-930", | |
1273 | UCNV_TO_U_CALLBACK_STOP, fromIBM930Offs, NULL, 0 )) | |
1274 | log_err("ibm-930->u with stop did not match.\n"); | |
1275 | ||
1276 | log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n"); | |
1277 | { | |
1278 | ||
1279 | static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ | |
1280 | 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 | |
1281 | }; | |
1282 | static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63 }; | |
1283 | static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1}; | |
1284 | ||
1285 | ||
1286 | /*EUC-JP*/ | |
1287 | static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
1288 | 0x8f, 0xda, 0xa1, /*unassigned*/ | |
1289 | 0x8e, 0xe0, | |
1290 | }; | |
1291 | static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec}; | |
1292 | static const int32_t from_euc_jpOffs [] ={ 0, 1, 3}; | |
1293 | ||
1294 | /*EUC_TW*/ | |
1295 | static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
1296 | 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ | |
1297 | 0xe6, 0xca, 0x8a, | |
1298 | }; | |
1299 | UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2}; | |
1300 | int32_t from_euc_twOffs [] ={ 0, 1, 3}; | |
1301 | ||
1302 | ||
1303 | ||
1304 | if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL), | |
1305 | EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", | |
1306 | UCNV_TO_U_CALLBACK_STOP, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) | |
1307 | log_err("EBCIDIC_STATEFUL->u with stop did not match.\n"); | |
1308 | ||
1309 | if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), | |
1310 | euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp", | |
1311 | UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0)) | |
1312 | log_err("euc-jp->u with stop did not match.\n"); | |
1313 | ||
1314 | if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), | |
1315 | euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw", | |
1316 | UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 )) | |
1317 | log_err("euc-tw->u with stop did not match.\n"); | |
1318 | } | |
73c04bcf A |
1319 | #endif |
1320 | ||
b75a7d8f A |
1321 | log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n"); |
1322 | { | |
1323 | static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, | |
1324 | 0xe0, 0x80, 0x61,}; | |
1325 | static const UChar expected1[] = { 0x0031, 0x4e8c,}; | |
1326 | static const int32_t offsets1[] = { 0x0000, 0x0001}; | |
1327 | ||
1328 | if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), | |
1329 | expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", | |
1330 | UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 )) | |
1331 | log_err("utf8->u with stop did not match.\n");; | |
1332 | } | |
1333 | log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n"); | |
1334 | { | |
1335 | static const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04}; | |
1336 | static const UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061}; | |
1337 | static const int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003}; | |
1338 | ||
1339 | if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), | |
1340 | expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", | |
1341 | UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 )) | |
1342 | log_err("scsu->u with stop did not match.\n");; | |
1343 | } | |
1344 | ||
1345 | } | |
1346 | ||
1347 | static void TestSub(int32_t inputsize, int32_t outputsize) | |
1348 | { | |
1349 | static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; | |
1350 | static const UChar sampleText2[]= { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; | |
1351 | ||
1352 | static const uint8_t expsubIBM_949[] = | |
1353 | { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 }; | |
1354 | ||
1355 | static const uint8_t expsubIBM_943[] = { | |
1356 | 0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 }; | |
1357 | ||
1358 | static const uint8_t expsubIBM_930[] = { | |
1359 | 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f }; | |
1360 | ||
1361 | static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 }; | |
1362 | static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 }; | |
1363 | static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 }; | |
1364 | ||
1365 | static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 }; | |
1366 | static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 }; | |
1367 | static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 }; | |
1368 | ||
1369 | static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5, 7 }; | |
1370 | static const int32_t fromIBM943Offs [] = { 0, 2, 4, 6 }; | |
1371 | static const int32_t fromIBM930Offs [] = { 1, 3, 5, 7 }; | |
1372 | ||
1373 | gInBufferSize = inputsize; | |
1374 | gOutBufferSize = outputsize; | |
1375 | ||
1376 | /*from unicode*/ | |
73c04bcf A |
1377 | |
1378 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
b75a7d8f A |
1379 | if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
1380 | expsubIBM_949, sizeof(expsubIBM_949), "ibm-949", | |
1381 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 )) | |
1382 | log_err("u-> ibm-949 with subst did not match.\n"); | |
1383 | if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), | |
1384 | expsubIBM_943, sizeof(expsubIBM_943), "ibm-943", | |
1385 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM943Offssub , NULL, 0)) | |
1386 | log_err("u-> ibm-943 with subst did not match.\n"); | |
1387 | if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), | |
1388 | expsubIBM_930, sizeof(expsubIBM_930), "ibm-930", | |
1389 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM930Offssub, NULL, 0 )) | |
1390 | log_err("u-> ibm-930 with subst did not match.\n"); | |
1391 | ||
1392 | log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n"); | |
1393 | { | |
1394 | static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; | |
1395 | static const uint8_t toIBM943[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 }; | |
1396 | static const int32_t offset[]= {0, 1, 1, 3, 3, 4}; | |
1397 | ||
1398 | ||
1399 | /* EUC_JP*/ | |
1400 | static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 }; | |
1401 | static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
1402 | 0xf4, 0xfe, 0xf4, 0xfe, | |
1403 | 0x61, 0x8e, 0xe0, | |
1404 | }; | |
1405 | static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7}; | |
1406 | ||
1407 | /*EUC_TW*/ | |
1408 | static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; | |
1409 | static const uint8_t to_euc_tw[]={ | |
1410 | 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
1411 | 0xfd, 0xfe, 0xfd, 0xfe, | |
1412 | 0x61, 0xe6, 0xca, 0x8a, | |
1413 | }; | |
1414 | ||
b75a7d8f A |
1415 | static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,}; |
1416 | ||
1417 | if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), | |
1418 | toIBM943, sizeof(toIBM943), "ibm-943", | |
1419 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset, NULL, 0 )) | |
1420 | log_err("u-> ibm-943 with substitute did not match.\n"); | |
1421 | ||
1422 | if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]), | |
1423 | to_euc_jp, sizeof(to_euc_jp), "euc-jp", | |
1424 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 )) | |
1425 | log_err("u-> euc-jp with substitute did not match.\n"); | |
1426 | ||
1427 | if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]), | |
1428 | to_euc_tw, sizeof(to_euc_tw), "euc-tw", | |
1429 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 )) | |
1430 | log_err("u-> euc-tw with substitute did not match.\n"); | |
b75a7d8f | 1431 | } |
73c04bcf | 1432 | #endif |
b75a7d8f A |
1433 | |
1434 | log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n"); | |
1435 | { | |
1436 | UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; | |
1437 | ||
1438 | const uint8_t to_SCSU[]={ | |
1439 | 0x41, | |
1440 | 0x0e, 0xff,0xfd, | |
1441 | 0x42 | |
1442 | ||
1443 | ||
1444 | }; | |
1445 | int32_t from_SCSUOffs [] ={ | |
1446 | 0, | |
1447 | 1,1,1, | |
1448 | 2, | |
1449 | ||
1450 | }; | |
1451 | const uint8_t to_SCSU_1[]={ | |
1452 | 0x41, | |
1453 | ||
1454 | }; | |
1455 | int32_t from_SCSUOffs_1 [] ={ | |
1456 | 0, | |
1457 | ||
1458 | }; | |
1459 | if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), | |
1460 | to_SCSU, sizeof(to_SCSU), "SCSU", | |
1461 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs, NULL, 0 )) | |
1462 | log_err("u-> SCSU with substitute did not match.\n"); | |
1463 | ||
1464 | if(!testConvertFromUnicodeWithContext(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), | |
1465 | to_SCSU_1, sizeof(to_SCSU_1), "SCSU", | |
1466 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs_1, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) | |
1467 | log_err("u-> SCSU with substitute did not match.\n"); | |
1468 | } | |
1469 | ||
1470 | log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n"); | |
1471 | { | |
1472 | static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,}; | |
1473 | static const uint8_t expectedUTF8[]= { 0xe2, 0x82, 0xac, | |
1474 | 0xf0, 0x90, 0x90, 0x81, | |
1475 | 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, | |
1476 | 0xef, 0xbf, 0xbf, 0x61, | |
1477 | ||
1478 | }; | |
1479 | static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 }; | |
1480 | if(!testConvertFromUnicode(testinput, sizeof(testinput)/sizeof(testinput[0]), | |
1481 | expectedUTF8, sizeof(expectedUTF8), "utf8", | |
1482 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0 )) { | |
1483 | log_err("u-> utf8 with stop did not match.\n"); | |
1484 | } | |
1485 | } | |
1486 | ||
1487 | log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n"); | |
1488 | { | |
1489 | static const UChar in[]={ 0x0041, 0xfeff }; | |
1490 | ||
1491 | static const uint8_t out[]={ | |
1492 | #if U_IS_BIG_ENDIAN | |
1493 | 0xfe, 0xff, | |
1494 | 0x00, 0x41, | |
1495 | 0xfe, 0xff | |
1496 | #else | |
1497 | 0xff, 0xfe, | |
1498 | 0x41, 0x00, | |
1499 | 0xff, 0xfe | |
1500 | #endif | |
1501 | }; | |
1502 | static const int32_t offsets[]={ | |
1503 | -1, -1, 0, 0, 1, 1 | |
1504 | }; | |
1505 | ||
1506 | if(!testConvertFromUnicode(in, ARRAY_LENGTH(in), | |
1507 | out, sizeof(out), "UTF-16", | |
1508 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0) | |
1509 | ) { | |
1510 | log_err("u->UTF-16 with substitute did not match.\n"); | |
1511 | } | |
1512 | } | |
1513 | ||
1514 | log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n"); | |
1515 | { | |
1516 | static const UChar in[]={ 0x0041, 0xfeff }; | |
1517 | ||
1518 | static const uint8_t out[]={ | |
1519 | #if U_IS_BIG_ENDIAN | |
1520 | 0x00, 0x00, 0xfe, 0xff, | |
1521 | 0x00, 0x00, 0x00, 0x41, | |
1522 | 0x00, 0x00, 0xfe, 0xff | |
1523 | #else | |
1524 | 0xff, 0xfe, 0x00, 0x00, | |
1525 | 0x41, 0x00, 0x00, 0x00, | |
1526 | 0xff, 0xfe, 0x00, 0x00 | |
1527 | #endif | |
1528 | }; | |
1529 | static const int32_t offsets[]={ | |
1530 | -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1 | |
1531 | }; | |
1532 | ||
1533 | if(!testConvertFromUnicode(in, ARRAY_LENGTH(in), | |
1534 | out, sizeof(out), "UTF-32", | |
1535 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0) | |
1536 | ) { | |
1537 | log_err("u->UTF-32 with substitute did not match.\n"); | |
1538 | } | |
1539 | } | |
1540 | ||
1541 | /*to unicode*/ | |
73c04bcf A |
1542 | |
1543 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
b75a7d8f A |
1544 | if(!testConvertToUnicode(expsubIBM_949, sizeof(expsubIBM_949), |
1545 | IBM_949subtoUnicode, sizeof(IBM_949subtoUnicode)/sizeof(IBM_949subtoUnicode[0]),"ibm-949", | |
1546 | UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 )) | |
1547 | log_err("ibm-949->u with substitute did not match.\n"); | |
1548 | if(!testConvertToUnicode(expsubIBM_943, sizeof(expsubIBM_943), | |
1549 | IBM_943subtoUnicode, sizeof(IBM_943subtoUnicode)/sizeof(IBM_943subtoUnicode[0]),"ibm-943", | |
1550 | UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offs, NULL, 0 )) | |
1551 | log_err("ibm-943->u with substitute did not match.\n"); | |
1552 | if(!testConvertToUnicode(expsubIBM_930, sizeof(expsubIBM_930), | |
1553 | IBM_930subtoUnicode, sizeof(IBM_930subtoUnicode)/sizeof(IBM_930subtoUnicode[0]),"ibm-930", | |
1554 | UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM930Offs, NULL, 0 )) | |
1555 | log_err("ibm-930->u with substitute did not match.\n"); | |
1556 | ||
1557 | log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); | |
1558 | { | |
1559 | ||
1560 | const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ | |
1561 | 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 | |
1562 | }; | |
1563 | UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0xfffd, 0x03b4 | |
1564 | }; | |
1565 | int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 3, 5}; | |
1566 | ||
1567 | ||
1568 | /* EUC_JP*/ | |
1569 | const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
1570 | 0x8f, 0xda, 0xa1, /*unassigned*/ | |
1571 | 0x8e, 0xe0, 0x8a | |
1572 | }; | |
1573 | UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a }; | |
1574 | int32_t from_euc_jpOffs [] ={ 0, 1, 3, 6, 9, 11 }; | |
1575 | ||
1576 | /*EUC_TW*/ | |
1577 | const uint8_t sampleTxt_euc_tw[]={ | |
1578 | 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
1579 | 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ | |
1580 | 0xe6, 0xca, 0x8a, | |
1581 | }; | |
1582 | UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, }; | |
1583 | int32_t from_euc_twOffs [] ={ 0, 1, 3, 7, 11, 13}; | |
1584 | ||
1585 | ||
1586 | if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL), | |
1587 | EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", | |
1588 | UCNV_TO_U_CALLBACK_SUBSTITUTE, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) | |
1589 | log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n"); | |
1590 | ||
1591 | ||
1592 | if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), | |
1593 | euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp", | |
1594 | UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 )) | |
1595 | log_err("euc-jp->u with substitute did not match.\n"); | |
1596 | ||
1597 | ||
1598 | if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), | |
1599 | euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw", | |
1600 | UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 )) | |
1601 | log_err("euc-tw->u with substitute did not match.\n"); | |
1602 | ||
1603 | ||
1604 | if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), | |
1605 | euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp", | |
1606 | UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGAL_CHAR_FOUND)) | |
1607 | log_err("euc-jp->u with substitute did not match.\n"); | |
b75a7d8f | 1608 | } |
73c04bcf A |
1609 | #endif |
1610 | ||
b75a7d8f A |
1611 | log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); |
1612 | { | |
1613 | const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, | |
1614 | 0xe0, 0x80, 0x61,}; | |
1615 | UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0x0061}; | |
1616 | int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0006}; | |
1617 | ||
1618 | if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), | |
1619 | expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", | |
1620 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 )) | |
1621 | log_err("utf8->u with substitute did not match.\n");; | |
1622 | } | |
1623 | log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); | |
1624 | { | |
1625 | const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,}; | |
1626 | UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffd,0xfffd}; | |
1627 | int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5}; | |
1628 | ||
1629 | if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), | |
1630 | expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", | |
1631 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 )) | |
1632 | log_err("scsu->u with stop did not match.\n");; | |
1633 | } | |
1634 | ||
73c04bcf | 1635 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
1636 | log_verbose("Testing ibm-930 subchar/subchar1\n"); |
1637 | { | |
1638 | static const UChar u1[]={ 0x6d63, 0x6d64, 0x6d65, 0x6d66, 0xdf }; | |
1639 | static const uint8_t s1[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f }; | |
1640 | static const int32_t offsets1[]={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4 }; | |
1641 | ||
1642 | static const UChar u2[]={ 0x6d63, 0x6d64, 0xfffd, 0x6d66, 0x1a }; | |
1643 | static const uint8_t s2[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 }; | |
1644 | static const int32_t offsets2[]={ 1, 3, 5, 7, 10 }; | |
1645 | ||
1646 | if(!testConvertFromUnicode(u1, ARRAY_LENGTH(u1), s1, ARRAY_LENGTH(s1), "ibm-930", | |
1647 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0) | |
1648 | ) { | |
1649 | log_err("u->ibm-930 subchar/subchar1 did not match.\n"); | |
1650 | } | |
1651 | ||
1652 | if(!testConvertToUnicode(s2, ARRAY_LENGTH(s2), u2, ARRAY_LENGTH(u2), "ibm-930", | |
1653 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) | |
1654 | ) { | |
1655 | log_err("ibm-930->u subchar/subchar1 did not match.\n"); | |
1656 | } | |
1657 | } | |
1658 | ||
1659 | log_verbose("Testing GB 18030 with substitute callbacks\n"); | |
1660 | { | |
b75a7d8f A |
1661 | static const UChar u2[]={ |
1662 | 0x24, 0x7f, 0x80, 0x1f9, 0x20ac, 0x4e00, 0x9fa6, 0xffff, 0xd800, 0xdc00, 0xfffd, 0xdbff, 0xdfff }; | |
1663 | static const uint8_t gb2[]={ | |
1664 | 0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 }; | |
1665 | static const int32_t offsets2[]={ | |
1666 | 0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 }; | |
1667 | ||
b75a7d8f A |
1668 | if(!testConvertToUnicode(gb2, ARRAY_LENGTH(gb2), u2, ARRAY_LENGTH(u2), "gb18030", |
1669 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) | |
1670 | ) { | |
1671 | log_err("gb18030->u with substitute did not match.\n"); | |
1672 | } | |
1673 | } | |
73c04bcf | 1674 | #endif |
b75a7d8f A |
1675 | |
1676 | log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n"); | |
1677 | { | |
1678 | static const uint8_t utf7[]={ | |
729e4ab9 A |
1679 | /* a~ a+AB~ a+AB\x0c a+AB- a+AB. a+. */ |
1680 | 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x2e | |
b75a7d8f A |
1681 | }; |
1682 | static const UChar unicode[]={ | |
729e4ab9 | 1683 | 0x61, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x2e, 0x61, 0xfffd, 0x2e |
b75a7d8f A |
1684 | }; |
1685 | static const int32_t offsets[]={ | |
729e4ab9 | 1686 | 0, 1, 2, 4, 6, 7, 9, 11, 12, 14, 17, 19, 21, 22, 23, 24 |
b75a7d8f A |
1687 | }; |
1688 | ||
1689 | if(!testConvertToUnicode(utf7, ARRAY_LENGTH(utf7), unicode, ARRAY_LENGTH(unicode), "UTF-7", | |
1690 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0) | |
1691 | ) { | |
1692 | log_err("UTF-7->u with substitute did not match.\n"); | |
1693 | } | |
1694 | } | |
1695 | ||
b75a7d8f A |
1696 | log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n"); |
1697 | { | |
1698 | static const uint8_t | |
1699 | in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff }, | |
1700 | in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff }, | |
1701 | in3[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff }; | |
1702 | ||
1703 | static const UChar | |
1704 | out1[]={ 0x4e00, 0xfeff }, | |
1705 | out2[]={ 0x004e, 0xfffe }, | |
1706 | out3[]={ 0xfefd, 0x4e00, 0xfeff }; | |
1707 | ||
1708 | static const int32_t | |
1709 | offsets1[]={ 2, 4 }, | |
1710 | offsets2[]={ 2, 4 }, | |
1711 | offsets3[]={ 0, 2, 4 }; | |
1712 | ||
1713 | if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-16", | |
1714 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0) | |
1715 | ) { | |
1716 | log_err("UTF-16 (BE BOM)->u with substitute did not match.\n"); | |
1717 | } | |
1718 | ||
1719 | if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-16", | |
1720 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) | |
1721 | ) { | |
1722 | log_err("UTF-16 (LE BOM)->u with substitute did not match.\n"); | |
1723 | } | |
1724 | ||
1725 | if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-16", | |
1726 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0) | |
1727 | ) { | |
1728 | log_err("UTF-16 (no BOM)->u with substitute did not match.\n"); | |
1729 | } | |
1730 | } | |
1731 | ||
1732 | log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n"); | |
1733 | { | |
1734 | static const uint8_t | |
1735 | in1[]={ 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff }, | |
1736 | in2[]={ 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 }, | |
1737 | in3[]={ 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 }, | |
1738 | in4[]={ 0x00, 0x01, 0x02, 0x03, 0x00, 0x11, 0x12, 0x00, 0x00, 0x00, 0x4e, 0x00 }; | |
1739 | ||
1740 | static const UChar | |
4388f060 A |
1741 | out1[]={ U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfeff }, |
1742 | out2[]={ U16_LEAD(0x0f1000), U16_TRAIL(0x0f1000), 0xfffe }, | |
1743 | out3[]={ 0xfefe, U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfffd, 0xfffd }, | |
1744 | out4[]={ U16_LEAD(0x10203), U16_TRAIL(0x10203), 0xfffd, 0x4e00 }; | |
b75a7d8f A |
1745 | |
1746 | static const int32_t | |
1747 | offsets1[]={ 4, 4, 8 }, | |
1748 | offsets2[]={ 4, 4, 8 }, | |
1749 | offsets3[]={ 0, 4, 4, 8, 12 }, | |
1750 | offsets4[]={ 0, 0, 4, 8 }; | |
1751 | ||
1752 | if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-32", | |
1753 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0) | |
1754 | ) { | |
1755 | log_err("UTF-32 (BE BOM)->u with substitute did not match.\n"); | |
1756 | } | |
1757 | ||
1758 | if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-32", | |
1759 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) | |
1760 | ) { | |
1761 | log_err("UTF-32 (LE BOM)->u with substitute did not match.\n"); | |
1762 | } | |
1763 | ||
1764 | if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-32", | |
1765 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0) | |
1766 | ) { | |
1767 | log_err("UTF-32 (no BOM)->u with substitute did not match.\n"); | |
1768 | } | |
1769 | ||
1770 | if(!testConvertToUnicode(in4, ARRAY_LENGTH(in4), out4, ARRAY_LENGTH(out4), "UTF-32", | |
1771 | UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets4, NULL, 0) | |
1772 | ) { | |
1773 | log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n"); | |
1774 | } | |
1775 | } | |
1776 | } | |
1777 | ||
1778 | static void TestSubWithValue(int32_t inputsize, int32_t outputsize) | |
1779 | { | |
1780 | UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; | |
1781 | UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; | |
1782 | ||
1783 | const uint8_t expsubwvalIBM_949[]= { | |
1784 | 0x00, 0xb0, 0xa1, 0xb0, 0xa2, | |
1785 | 0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 }; | |
1786 | ||
1787 | const uint8_t expsubwvalIBM_943[]= { | |
1788 | 0x9f, 0xaf, 0x9f, 0xb1, | |
1789 | 0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 }; | |
1790 | ||
1791 | const uint8_t expsubwvalIBM_930[] = { | |
1792 | 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f }; | |
1793 | ||
1794 | int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 }; | |
1795 | int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 }; | |
1796 | int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */ | |
1797 | ||
1798 | gInBufferSize = inputsize; | |
1799 | gOutBufferSize = outputsize; | |
1800 | ||
1801 | /*from Unicode*/ | |
73c04bcf A |
1802 | |
1803 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
b75a7d8f A |
1804 | if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), |
1805 | expsubwvalIBM_949, sizeof(expsubwvalIBM_949), "ibm-949", | |
1806 | UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 )) | |
1807 | log_err("u-> ibm-949 with subst with value did not match.\n"); | |
1808 | ||
1809 | if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), | |
1810 | expsubwvalIBM_943, sizeof(expsubwvalIBM_943), "ibm-943", | |
1811 | UCNV_FROM_U_CALLBACK_ESCAPE, toIBM943Offs, NULL, 0 )) | |
1812 | log_err("u-> ibm-943 with sub with value did not match.\n"); | |
1813 | ||
1814 | if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), | |
1815 | expsubwvalIBM_930, sizeof(expsubwvalIBM_930), "ibm-930", | |
1816 | UCNV_FROM_U_CALLBACK_ESCAPE, toIBM930Offs, NULL, 0 )) | |
1817 | log_err("u-> ibm-930 with subst with value did not match.\n"); | |
1818 | ||
1819 | ||
1820 | log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n"); | |
1821 | { | |
1822 | static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; | |
1823 | static const uint8_t toIBM943[]= { 0x61, | |
1824 | 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
1825 | 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, | |
1826 | 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
1827 | 0x61 }; | |
1828 | static const int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4}; | |
1829 | ||
1830 | ||
1831 | /* EUC_JP*/ | |
1832 | static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, }; | |
1833 | static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
1834 | 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
1835 | 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, | |
1836 | 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
1837 | 0x61, 0x8e, 0xe0, | |
1838 | }; | |
1839 | static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, | |
1840 | 3, 3, 3, 3, 3, 3, | |
1841 | 3, 3, 3, 3, 3, 3, | |
1842 | 5, 5, 5, 5, 5, 5, | |
1843 | 6, 7, 7, | |
1844 | }; | |
1845 | ||
1846 | /*EUC_TW*/ | |
1847 | static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; | |
1848 | static const uint8_t to_euc_tw[]={ | |
1849 | 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
1850 | 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
1851 | 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, | |
1852 | 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
1853 | 0x61, 0xe6, 0xca, 0x8a, | |
1854 | }; | |
1855 | static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, | |
1856 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5, | |
1857 | 6, 7, 7, 8, | |
1858 | }; | |
1859 | /*ISO-2022-JP*/ | |
b75a7d8f A |
1860 | static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ; |
1861 | static const uint8_t to_iso_2022_jp1[]={ | |
1862 | 0x1b, 0x24, 0x42, 0x21, 0x21, | |
1863 | 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39, | |
1864 | 0x1b, 0x24, 0x42, 0x21, 0x22, | |
1865 | 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39, | |
1866 | 0x42, | |
1867 | }; | |
1868 | ||
1869 | static const int32_t from_iso_2022_jpOffs1 [] ={ | |
1870 | 0,0,0,0,0, | |
1871 | 1,1,1,1,1,1,1,1,1, | |
1872 | 2,2,2,2,2, | |
1873 | 3,3,3,3,3,3,3,3,3, | |
1874 | 4, | |
1875 | }; | |
1876 | /* surrogate pair*/ | |
1877 | static const UChar iso_2022_jp_inputText2[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ; | |
1878 | static const uint8_t to_iso_2022_jp2[]={ | |
1879 | 0x1b, 0x24, 0x42, 0x21, 0x21, | |
1880 | 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
1881 | 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
1882 | 0x1b, 0x24, 0x42, 0x21, 0x22, | |
1883 | 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
1884 | 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
1885 | 0x42, | |
1886 | }; | |
1887 | static const int32_t from_iso_2022_jpOffs2 [] ={ | |
1888 | 0,0,0,0,0, | |
1889 | 1,1,1,1,1,1,1,1,1, | |
1890 | 1,1,1,1,1,1, | |
1891 | 3,3,3,3,3, | |
1892 | 4,4,4,4,4,4,4,4,4, | |
1893 | 4,4,4,4,4,4, | |
1894 | 6, | |
1895 | }; | |
1896 | ||
1897 | /*ISO-2022-cn*/ | |
1898 | static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, }; | |
1899 | static const uint8_t to_iso_2022_cn[]={ | |
374ca955 A |
1900 | 0x41, |
1901 | 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, | |
b75a7d8f A |
1902 | 0x42, |
1903 | }; | |
1904 | static const int32_t from_iso_2022_cnOffs [] ={ | |
374ca955 A |
1905 | 0, |
1906 | 1,1,1,1,1,1, | |
b75a7d8f A |
1907 | 2, |
1908 | }; | |
b75a7d8f A |
1909 | |
1910 | static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042}; | |
1911 | ||
1912 | static const uint8_t to_iso_2022_cn4[]={ | |
1913 | 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, | |
1914 | 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
1915 | 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
374ca955 | 1916 | 0x0e, 0x21, 0x22, |
b75a7d8f A |
1917 | 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, |
1918 | 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
1919 | 0x42, | |
1920 | }; | |
1921 | static const int32_t from_iso_2022_cnOffs4 [] ={ | |
1922 | 0,0,0,0,0,0,0, | |
1923 | 1,1,1,1,1,1,1, | |
1924 | 1,1,1,1,1,1, | |
374ca955 | 1925 | 3,3,3, |
b75a7d8f A |
1926 | 4,4,4,4,4,4,4, |
1927 | 4,4,4,4,4,4, | |
1928 | 6 | |
1929 | ||
1930 | }; | |
1931 | ||
1932 | /*ISO-2022-kr*/ | |
1933 | static const UChar iso_2022_kr_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 }; | |
1934 | static const uint8_t to_iso_2022_kr2[]={ | |
1935 | 0x1b, 0x24, 0x29, 0x43, | |
1936 | 0x41, | |
1937 | 0x0e, 0x25, 0x50, | |
1938 | 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
1939 | 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
1940 | 0x0e, 0x25, 0x50, | |
1941 | 0x0f, 0x42, | |
1942 | 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
1943 | 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
1944 | 0x43 | |
1945 | }; | |
1946 | static const int32_t from_iso_2022_krOffs2 [] ={ | |
1947 | -1,-1,-1,-1, | |
1948 | 0, | |
1949 | 1,1,1, | |
1950 | 2,2,2,2,2,2,2, | |
1951 | 2,2,2,2,2,2, | |
1952 | 4,4,4, | |
1953 | 5,5, | |
1954 | 6,6,6,6,6,6, | |
1955 | 6,6,6,6,6,6, | |
1956 | 8, | |
1957 | }; | |
1958 | ||
1959 | static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 }; | |
1960 | static const uint8_t to_iso_2022_kr[]={ | |
1961 | 0x1b, 0x24, 0x29, 0x43, | |
1962 | 0x41, | |
1963 | 0x0e, 0x25, 0x50, | |
1964 | 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ | |
1965 | 0x0e, 0x25, 0x50, | |
1966 | 0x0f, 0x42, | |
1967 | 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ | |
1968 | 0x43 | |
1969 | }; | |
1970 | ||
1971 | ||
1972 | static const int32_t from_iso_2022_krOffs [] ={ | |
1973 | -1,-1,-1,-1, | |
1974 | 0, | |
1975 | 1,1,1, | |
1976 | 2,2,2,2,2,2,2, | |
1977 | 3,3,3, | |
1978 | 4,4, | |
1979 | 5,5,5,5,5,5, | |
1980 | 6, | |
1981 | }; | |
1982 | /* HZ encoding */ | |
1983 | static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, }; | |
1984 | ||
1985 | static const uint8_t to_hz[]={ | |
1986 | 0x7e, 0x7d, 0x41, | |
1987 | 0x7e, 0x7b, 0x26, 0x30, | |
1988 | 0x7e, 0x7d, 0x25, 0x55, 0x30, 0x36, 0x36, 0x32, /*unassigned*/ | |
1989 | 0x7e, 0x7b, 0x26, 0x30, | |
1990 | 0x7e, 0x7d, 0x42, | |
1991 | ||
1992 | }; | |
1993 | static const int32_t from_hzOffs [] ={ | |
1994 | 0,0,0, | |
1995 | 1,1,1,1, | |
1996 | 2,2,2,2,2,2,2,2, | |
1997 | 3,3,3,3, | |
1998 | 4,4,4 | |
1999 | }; | |
2000 | ||
2001 | static const UChar hz_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 }; | |
2002 | static const uint8_t to_hz2[]={ | |
2003 | 0x7e, 0x7d, 0x41, | |
2004 | 0x7e, 0x7b, 0x26, 0x30, | |
2005 | 0x7e, 0x7d, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
2006 | 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
2007 | 0x7e, 0x7b, 0x26, 0x30, | |
2008 | 0x7e, 0x7d, 0x42, | |
2009 | 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
2010 | 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
2011 | 0x43 | |
2012 | }; | |
2013 | static const int32_t from_hzOffs2 [] ={ | |
2014 | 0,0,0, | |
2015 | 1,1,1,1, | |
2016 | 2,2,2,2,2,2,2,2, | |
2017 | 2,2,2,2,2,2, | |
2018 | 4,4,4,4, | |
2019 | 5,5,5, | |
2020 | 6,6,6,6,6,6, | |
2021 | 6,6,6,6,6,6, | |
2022 | 8, | |
2023 | }; | |
2024 | ||
2025 | /*ISCII*/ | |
b75a7d8f A |
2026 | static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 }; |
2027 | static const uint8_t to_iscii[]={ | |
2028 | 0x41, | |
2029 | 0xef, 0x42, 0xa1, | |
2030 | 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ | |
2031 | 0xa2, | |
2032 | 0x42, | |
2033 | 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ | |
2034 | 0x43 | |
2035 | }; | |
2036 | ||
2037 | ||
2038 | static const int32_t from_isciiOffs [] ={ | |
2039 | 0, | |
2040 | 1,1,1, | |
2041 | 2,2,2,2,2,2, | |
2042 | 3, | |
2043 | 4, | |
2044 | 5,5,5,5,5,5, | |
2045 | 6, | |
2046 | }; | |
2047 | ||
2048 | if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), | |
2049 | toIBM943, sizeof(toIBM943), "ibm-943", | |
2050 | UCNV_FROM_U_CALLBACK_ESCAPE, offset, NULL, 0 )) | |
2051 | log_err("u-> ibm-943 with subst with value did not match.\n"); | |
2052 | ||
2053 | if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]), | |
2054 | to_euc_jp, sizeof(to_euc_jp), "euc-jp", | |
2055 | UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 )) | |
2056 | log_err("u-> euc-jp with subst with value did not match.\n"); | |
2057 | ||
2058 | if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]), | |
2059 | to_euc_tw, sizeof(to_euc_tw), "euc-tw", | |
2060 | UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 )) | |
2061 | log_err("u-> euc-tw with subst with value did not match.\n"); | |
2062 | ||
b75a7d8f A |
2063 | if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]), |
2064 | to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp", | |
2065 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 )) | |
2066 | log_err("u-> iso_2022_jp with subst with value did not match.\n"); | |
2067 | ||
2068 | if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]), | |
2069 | to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp", | |
2070 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 )) | |
2071 | log_err("u-> iso_2022_jp with subst with value did not match.\n"); | |
2072 | ||
2073 | if(!testConvertFromUnicode(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]), | |
2074 | to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp", | |
2075 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs2, NULL, 0 )) | |
2076 | log_err("u-> iso_2022_jp with subst with value did not match.\n"); | |
2077 | /*ESCAPE OPTIONS*/ | |
2078 | { | |
2079 | /* surrogate pair*/ | |
2080 | static const UChar iso_2022_jp_inputText3[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ; | |
2081 | static const uint8_t to_iso_2022_jp3_v2[]={ | |
2082 | 0x1b, 0x24, 0x42, 0x21, 0x21, | |
2083 | 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b, | |
2084 | ||
2085 | 0x1b, 0x24, 0x42, 0x21, 0x22, | |
2086 | 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b, | |
2087 | ||
2088 | 0x42, | |
2089 | 0x26, 0x23, 0x33, 0x36, 0x38, 0x39, 0x32, 0x3b, | |
2090 | }; | |
2091 | ||
2092 | static const int32_t from_iso_2022_jpOffs3_v2 [] ={ | |
2093 | 0,0,0,0,0, | |
2094 | 1,1,1,1,1,1,1,1,1,1,1,1, | |
2095 | ||
2096 | 3,3,3,3,3, | |
2097 | 4,4,4,4,4,4,4,4,4,4,4,4, | |
2098 | ||
2099 | 6, | |
2100 | 7,7,7,7,7,7,7,7,7 | |
2101 | }; | |
2102 | ||
2103 | if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3, sizeof(iso_2022_jp_inputText3)/sizeof(iso_2022_jp_inputText3[0]), | |
2104 | to_iso_2022_jp3_v2, sizeof(to_iso_2022_jp3_v2), "iso-2022-jp", | |
2105 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR )) | |
2106 | log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n"); | |
2107 | } | |
b75a7d8f A |
2108 | { |
2109 | static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; | |
2110 | static const uint8_t to_iso_2022_cn5_v2[]={ | |
2111 | 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, | |
2112 | 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44, | |
2113 | 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36, | |
374ca955 | 2114 | 0x0e, 0x21, 0x22, |
b75a7d8f A |
2115 | 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44, |
2116 | 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36, | |
2117 | 0x42, | |
374ca955 | 2118 | 0x5c, 0x75, 0x30, 0x39, 0x30, 0x32, |
b75a7d8f A |
2119 | }; |
2120 | static const int32_t from_iso_2022_cnOffs5_v2 [] ={ | |
2121 | 0,0,0,0,0,0,0, | |
2122 | 1,1,1,1,1,1,1, | |
2123 | 1,1,1,1,1,1, | |
374ca955 | 2124 | 3,3,3, |
b75a7d8f A |
2125 | 4,4,4,4,4,4,4, |
2126 | 4,4,4,4,4,4, | |
2127 | 6, | |
374ca955 | 2128 | 7,7,7,7,7,7 |
b75a7d8f A |
2129 | }; |
2130 | if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, sizeof(iso_2022_cn_inputText5)/sizeof(iso_2022_cn_inputText5[0]), | |
2131 | to_iso_2022_cn5_v2, sizeof(to_iso_2022_cn5_v2), "iso-2022-cn", | |
2132 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs5_v2, NULL, 0,UCNV_ESCAPE_JAVA,U_ZERO_ERROR )) | |
2133 | log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n"); | |
2134 | ||
2135 | } | |
2136 | { | |
2137 | static const UChar iso_2022_cn_inputText6[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; | |
2138 | static const uint8_t to_iso_2022_cn6_v2[]={ | |
2139 | 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, | |
2140 | 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d, | |
374ca955 | 2141 | 0x0e, 0x21, 0x22, |
b75a7d8f A |
2142 | 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d, |
2143 | 0x42, | |
374ca955 | 2144 | 0x7b, 0x55, 0x2b, 0x30, 0x39, 0x30, 0x32, 0x7d |
b75a7d8f A |
2145 | }; |
2146 | static const int32_t from_iso_2022_cnOffs6_v2 [] ={ | |
2147 | 0, 0, 0, 0, 0, 0, 0, | |
2148 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
374ca955 | 2149 | 3, 3, 3, |
b75a7d8f A |
2150 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
2151 | 6, | |
374ca955 | 2152 | 7, 7, 7, 7, 7, 7, 7, 7, |
b75a7d8f A |
2153 | }; |
2154 | if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, sizeof(iso_2022_cn_inputText6)/sizeof(iso_2022_cn_inputText6[0]), | |
2155 | to_iso_2022_cn6_v2, sizeof(to_iso_2022_cn6_v2), "iso-2022-cn", | |
2156 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs6_v2, NULL, 0,UCNV_ESCAPE_UNICODE,U_ZERO_ERROR )) | |
2157 | log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n"); | |
2158 | ||
2159 | } | |
2160 | { | |
2161 | static const UChar iso_2022_cn_inputText7[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; | |
2162 | static const uint8_t to_iso_2022_cn7_v2[]={ | |
2163 | 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, | |
2164 | 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
374ca955 | 2165 | 0x0e, 0x21, 0x22, |
b75a7d8f | 2166 | 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, |
374ca955 | 2167 | 0x42, 0x25, 0x55, 0x30, 0x39, 0x30, 0x32, |
b75a7d8f A |
2168 | }; |
2169 | static const int32_t from_iso_2022_cnOffs7_v2 [] ={ | |
2170 | 0, 0, 0, 0, 0, 0, 0, | |
2171 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
374ca955 | 2172 | 3, 3, 3, |
b75a7d8f A |
2173 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
2174 | 6, | |
374ca955 | 2175 | 7, 7, 7, 7, 7, 7, |
b75a7d8f A |
2176 | }; |
2177 | if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, sizeof(iso_2022_cn_inputText7)/sizeof(iso_2022_cn_inputText7[0]), | |
2178 | to_iso_2022_cn7_v2, sizeof(to_iso_2022_cn7_v2), "iso-2022-cn", | |
2179 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs7_v2, NULL, 0,"K" ,U_ZERO_ERROR )) | |
2180 | log_err("u-> iso-2022-cn with sub & K did not match.\n"); | |
2181 | ||
46f4442e A |
2182 | } |
2183 | { | |
2184 | static const UChar iso_2022_cn_inputText8[]={ | |
2185 | 0x3000, | |
2186 | 0xD84D, 0xDC56, | |
2187 | 0x3001, | |
2188 | 0xD84D, 0xDC56, | |
2189 | 0xDBFF, 0xDFFF, | |
2190 | 0x0042, | |
2191 | 0x0902}; | |
2192 | static const uint8_t to_iso_2022_cn8_v2[]={ | |
2193 | 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, | |
2194 | 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20, | |
2195 | 0x0e, 0x21, 0x22, | |
2196 | 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20, | |
2197 | 0x5c, 0x31, 0x30, 0x46, 0x46, 0x46, 0x46, 0x20, | |
2198 | 0x42, | |
2199 | 0x5c, 0x39, 0x30, 0x32, 0x20 | |
2200 | }; | |
2201 | static const int32_t from_iso_2022_cnOffs8_v2 [] ={ | |
2202 | 0, 0, 0, 0, 0, 0, 0, | |
2203 | 1, 1, 1, 1, 1, 1, 1, 1, | |
2204 | 3, 3, 3, | |
2205 | 4, 4, 4, 4, 4, 4, 4, 4, | |
2206 | 6, 6, 6, 6, 6, 6, 6, 6, | |
2207 | 8, | |
2208 | 9, 9, 9, 9, 9 | |
2209 | }; | |
2210 | if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8, sizeof(iso_2022_cn_inputText8)/sizeof(iso_2022_cn_inputText8[0]), | |
2211 | to_iso_2022_cn8_v2, sizeof(to_iso_2022_cn8_v2), "iso-2022-cn", | |
2212 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs8_v2, NULL, 0,UCNV_ESCAPE_CSS2,U_ZERO_ERROR )) | |
2213 | log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not match.\n"); | |
2214 | ||
b75a7d8f A |
2215 | } |
2216 | { | |
2217 | static const uint8_t to_iso_2022_cn4_v3[]={ | |
2218 | 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, | |
2219 | 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36, | |
374ca955 | 2220 | 0x0e, 0x21, 0x22, |
b75a7d8f A |
2221 | 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36, |
2222 | 0x42 | |
2223 | }; | |
2224 | ||
2225 | ||
2226 | static const int32_t from_iso_2022_cnOffs4_v3 [] ={ | |
2227 | 0,0,0,0,0,0,0, | |
2228 | 1,1,1,1,1,1,1,1,1,1,1, | |
2229 | ||
374ca955 | 2230 | 3,3,3, |
b75a7d8f A |
2231 | 4,4,4,4,4,4,4,4,4,4,4, |
2232 | ||
2233 | 6 | |
2234 | ||
2235 | }; | |
2236 | if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]), | |
2237 | to_iso_2022_cn4_v3, sizeof(to_iso_2022_cn4_v3), "iso-2022-cn", | |
2238 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4_v3, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR )) | |
2239 | { | |
2240 | log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n"); | |
2241 | } | |
2242 | } | |
2243 | if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]), | |
2244 | to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", | |
2245 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 )) | |
2246 | log_err("u-> iso_2022_cn with subst with value did not match.\n"); | |
2247 | ||
b75a7d8f A |
2248 | if(!testConvertFromUnicode(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]), |
2249 | to_iso_2022_cn4, sizeof(to_iso_2022_cn4), "iso-2022-cn", | |
2250 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 )) | |
2251 | log_err("u-> iso_2022_cn with subst with value did not match.\n"); | |
2252 | if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]), | |
2253 | to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", | |
2254 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0 )) | |
2255 | log_err("u-> iso_2022_kr with subst with value did not match.\n"); | |
2256 | if(!testConvertFromUnicode(iso_2022_kr_inputText2, sizeof(iso_2022_kr_inputText2)/sizeof(iso_2022_kr_inputText2[0]), | |
2257 | to_iso_2022_kr2, sizeof(to_iso_2022_kr2), "iso-2022-kr", | |
2258 | UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs2, NULL, 0 )) | |
2259 | log_err("u-> iso_2022_kr2 with subst with value did not match.\n"); | |
2260 | if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]), | |
2261 | to_hz, sizeof(to_hz), "HZ", | |
2262 | UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0 )) | |
2263 | log_err("u-> hz with subst with value did not match.\n"); | |
2264 | if(!testConvertFromUnicode(hz_inputText2, sizeof(hz_inputText2)/sizeof(hz_inputText2[0]), | |
2265 | to_hz2, sizeof(to_hz2), "HZ", | |
2266 | UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs2, NULL, 0 )) | |
2267 | log_err("u-> hz with subst with value did not match.\n"); | |
2268 | ||
2269 | if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]), | |
2270 | to_iscii, sizeof(to_iscii), "ISCII,version=0", | |
2271 | UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 )) | |
2272 | log_err("u-> iscii with subst with value did not match.\n"); | |
b75a7d8f | 2273 | } |
73c04bcf | 2274 | #endif |
b75a7d8f A |
2275 | |
2276 | log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n"); | |
2277 | /*to Unicode*/ | |
2278 | { | |
73c04bcf | 2279 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
2280 | static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf, |
2281 | 0x81, 0xad, /*unassigned*/ | |
2282 | 0x89, 0xd3 }; | |
2283 | static const UChar IBM_943toUnicode[] = { 0x0000, 0x6D63, | |
2284 | 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44, | |
2285 | 0x7B87}; | |
2286 | static const int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5}; | |
2287 | ||
2288 | /* EUC_JP*/ | |
2289 | static const uint8_t sampleTxt_EUC_JP[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
2290 | 0x8f, 0xda, 0xa1, /*unassigned*/ | |
2291 | 0x8e, 0xe0, | |
2292 | }; | |
2293 | static const UChar EUC_JPtoUnicode[]={ 0x0061, 0x4edd, 0x5bec, | |
2294 | 0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31, | |
2295 | 0x00a2 }; | |
2296 | static const int32_t fromEUC_JPOffs [] ={ 0, 1, 3, | |
2297 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
2298 | 9, | |
2299 | }; | |
2300 | ||
2301 | /*EUC_TW*/ | |
2302 | static const uint8_t sampleTxt_euc_tw[]={ | |
2303 | 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
2304 | 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ | |
2305 | 0xe6, 0xca, 0x8a, | |
2306 | }; | |
2307 | static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, | |
2308 | 0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43, | |
2309 | 0x8706, 0x8a, }; | |
2310 | static const int32_t from_euc_twOffs [] ={ 0, 1, 3, | |
2311 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, | |
2312 | 11, 13}; | |
2313 | ||
2314 | /*iso-2022-jp*/ | |
2315 | static const uint8_t sampleTxt_iso_2022_jp[]={ | |
2316 | 0x1b, 0x28, 0x42, 0x41, | |
2317 | 0x1b, 0x24, 0x42, 0x2A, 0x44, /*unassigned*/ | |
2318 | 0x1b, 0x28, 0x42, 0x42, | |
2319 | ||
2320 | }; | |
2321 | static const UChar iso_2022_jptoUnicode[]={ 0x41,0x25,0x58,0x32,0x41,0x25,0x58,0x34,0x34, 0x42 }; | |
2322 | static const int32_t from_iso_2022_jpOffs [] ={ 3, 7, 7, 7, 7, 7, 7, 7, 7, 12 }; | |
2323 | ||
2324 | /*iso-2022-cn*/ | |
2325 | static const uint8_t sampleTxt_iso_2022_cn[]={ | |
2326 | 0x0f, 0x41, 0x44, | |
2327 | 0x1B, 0x24, 0x29, 0x47, | |
2328 | 0x0E, 0x40, 0x6c, /*unassigned*/ | |
2329 | 0x0f, 0x42, | |
2330 | ||
2331 | }; | |
2332 | static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 }; | |
2333 | static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 8, 8, 8, 8, 8, 8, 8, 8, 11 }; | |
2334 | ||
2335 | /*iso-2022-kr*/ | |
2336 | static const uint8_t sampleTxt_iso_2022_kr[]={ | |
2337 | 0x1b, 0x24, 0x29, 0x43, | |
2338 | 0x41, | |
2339 | 0x0E, 0x7f, 0x1E, | |
2340 | 0x0e, 0x25, 0x50, | |
2341 | 0x0f, 0x51, | |
2342 | 0x42, 0x43, | |
2343 | ||
2344 | }; | |
2345 | static const UChar iso_2022_krtoUnicode[]={ 0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43}; | |
2346 | static const int32_t from_iso_2022_krOffs [] ={ 4, 6, 6, 6, 6, 6, 6, 6, 6, 9, 12, 13 , 14 }; | |
2347 | ||
2348 | /*hz*/ | |
2349 | static const uint8_t sampleTxt_hz[]={ | |
2350 | 0x41, | |
2351 | 0x7e, 0x7b, 0x26, 0x30, | |
2352 | 0x7f, 0x1E, /*unassigned*/ | |
2353 | 0x26, 0x30, | |
2354 | 0x7e, 0x7d, 0x42, | |
2355 | 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/ | |
2356 | 0x7e, 0x7d, 0x42, | |
2357 | }; | |
2358 | static const UChar hztoUnicode[]={ | |
2359 | 0x41, | |
2360 | 0x03a0, | |
2361 | 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45, | |
2362 | 0x03A0, | |
2363 | 0x42, | |
2364 | 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45, | |
2365 | 0x42,}; | |
2366 | ||
2367 | static const int32_t from_hzOffs [] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18, }; | |
2368 | ||
2369 | ||
2370 | /*iscii*/ | |
2371 | static const uint8_t sampleTxt_iscii[]={ | |
2372 | 0x41, | |
2373 | 0x30, | |
2374 | 0xEB, /*unassigned*/ | |
2375 | 0xa3, | |
2376 | 0x42, | |
2377 | 0xEC, /*unassigned*/ | |
2378 | 0x42, | |
2379 | }; | |
2380 | static const UChar isciitoUnicode[]={ | |
2381 | 0x41, | |
2382 | 0x30, | |
2383 | 0x25, 0x58, 0x45, 0x42, | |
2384 | 0x0903, | |
2385 | 0x42, | |
2386 | 0x25, 0x58, 0x45, 0x43, | |
2387 | 0x42,}; | |
2388 | ||
2389 | static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6 }; | |
73c04bcf | 2390 | #endif |
b75a7d8f | 2391 | |
b75a7d8f A |
2392 | /*UTF8*/ |
2393 | static const uint8_t sampleTxtUTF8[]={ | |
2394 | 0x20, 0x64, 0x50, | |
2395 | 0xC2, 0x7E, /* truncated char */ | |
2396 | 0x20, | |
2397 | 0xE0, 0xB5, 0x7E, /* truncated char */ | |
2398 | 0x40, | |
2399 | }; | |
2400 | static const UChar UTF8ToUnicode[]={ | |
2401 | 0x0020, 0x0064, 0x0050, | |
2402 | 0x0025, 0x0058, 0x0043, 0x0032, 0x007E, /* \xC2~ */ | |
2403 | 0x0020, | |
2404 | 0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E, | |
2405 | 0x0040 | |
2406 | }; | |
2407 | static const int32_t fromUTF8[] = { | |
2408 | 0, 1, 2, | |
2409 | 3, 3, 3, 3, 4, | |
2410 | 5, | |
2411 | 6, 6, 6, 6, 6, 6, 6, 6, 8, | |
2412 | 9 | |
2413 | }; | |
2414 | static const UChar UTF8ToUnicodeXML_DEC[]={ | |
2415 | 0x0020, 0x0064, 0x0050, | |
2416 | 0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E, /* Â~ */ | |
2417 | 0x0020, | |
2418 | 0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E, | |
2419 | 0x0040 | |
2420 | }; | |
2421 | static const int32_t fromUTF8XML_DEC[] = { | |
2422 | 0, 1, 2, | |
2423 | 3, 3, 3, 3, 3, 3, 4, | |
2424 | 5, | |
2425 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, | |
2426 | 9 | |
2427 | }; | |
2428 | ||
73c04bcf A |
2429 | |
2430 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
b75a7d8f A |
2431 | if(!testConvertToUnicode(sampleTxtToU, sizeof(sampleTxtToU), |
2432 | IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnicode[0]),"ibm-943", | |
2433 | UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 )) | |
2434 | log_err("ibm-943->u with substitute with value did not match.\n"); | |
2435 | ||
2436 | if(!testConvertToUnicode(sampleTxt_EUC_JP, sizeof(sampleTxt_EUC_JP), | |
2437 | EUC_JPtoUnicode, sizeof(EUC_JPtoUnicode)/sizeof(EUC_JPtoUnicode[0]),"euc-jp", | |
2438 | UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0)) | |
2439 | log_err("euc-jp->u with substitute with value did not match.\n"); | |
2440 | ||
2441 | if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), | |
2442 | euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw", | |
2443 | UCNV_TO_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0)) | |
2444 | log_err("euc-tw->u with substitute with value did not match.\n"); | |
2445 | ||
2446 | if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), | |
2447 | iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", | |
2448 | UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0)) | |
2449 | log_err("iso-2022-jp->u with substitute with value did not match.\n"); | |
2450 | ||
2451 | if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), | |
2452 | iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", | |
2453 | UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0,"K",U_ZERO_ERROR)) | |
2454 | log_err("iso-2022-jp->u with substitute with value did not match.\n"); | |
2455 | ||
2456 | {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */ | |
2457 | { | |
2458 | static const UChar iso_2022_jptoUnicodeDec[]={ | |
2459 | 0x0041, | |
2460 | 0x0026, 0x0023, 0x0034, 0x0032, 0x003b, | |
2461 | 0x0026, 0x0023, 0x0036, 0x0038, 0x003b, | |
2462 | 0x0042 }; | |
2463 | static const int32_t from_iso_2022_jpOffsDec [] ={ 3,7,7,7,7,7,7,7,7,7,7,12, }; | |
2464 | if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), | |
2465 | iso_2022_jptoUnicodeDec, sizeof(iso_2022_jptoUnicodeDec)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", | |
2466 | UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsDec, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR )) | |
2467 | log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n"); | |
2468 | } | |
2469 | { | |
2470 | static const UChar iso_2022_jptoUnicodeHex[]={ | |
2471 | 0x0041, | |
2472 | 0x0026, 0x0023, 0x0078, 0x0032, 0x0041, 0x003b, | |
2473 | 0x0026, 0x0023, 0x0078, 0x0034, 0x0034, 0x003b, | |
2474 | 0x0042 }; | |
2475 | static const int32_t from_iso_2022_jpOffsHex [] ={ 3,7,7,7,7,7,7,7,7,7,7,7,7,12 }; | |
2476 | if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), | |
2477 | iso_2022_jptoUnicodeHex, sizeof(iso_2022_jptoUnicodeHex)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", | |
2478 | UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsHex, NULL, 0,UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR )) | |
2479 | log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n"); | |
2480 | } | |
2481 | { | |
2482 | static const UChar iso_2022_jptoUnicodeC[]={ | |
2483 | 0x0041, | |
2484 | 0x005C, 0x0078, 0x0032, 0x0041, | |
2485 | 0x005C, 0x0078, 0x0034, 0x0034, | |
2486 | 0x0042 }; | |
2487 | int32_t from_iso_2022_jpOffsC [] ={ 3,7,7,7,7,7,7,7,7,12 }; | |
2488 | if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), | |
2489 | iso_2022_jptoUnicodeC, sizeof(iso_2022_jptoUnicodeC)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", | |
2490 | UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsC, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR )) | |
2491 | log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n"); | |
2492 | } | |
2493 | } | |
2494 | if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn), | |
2495 | iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn", | |
2496 | UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0)) | |
2497 | log_err("iso-2022-cn->u with substitute with value did not match.\n"); | |
2498 | ||
2499 | if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr), | |
2500 | iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr", | |
2501 | UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0)) | |
2502 | log_err("iso-2022-kr->u with substitute with value did not match.\n"); | |
2503 | ||
2504 | if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz), | |
2505 | hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ", | |
2506 | UCNV_TO_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0)) | |
2507 | log_err("hz->u with substitute with value did not match.\n"); | |
2508 | ||
2509 | if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii), | |
2510 | isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0", | |
2511 | UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0)) | |
2512 | log_err("ISCII ->u with substitute with value did not match.\n"); | |
73c04bcf A |
2513 | #endif |
2514 | ||
b75a7d8f A |
2515 | if(!testConvertToUnicode(sampleTxtUTF8, sizeof(sampleTxtUTF8), |
2516 | UTF8ToUnicode, sizeof(UTF8ToUnicode)/sizeof(UTF8ToUnicode[0]),"UTF-8", | |
2517 | UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0)) | |
2518 | log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n"); | |
2519 | if(!testConvertToUnicodeWithContext(sampleTxtUTF8, sizeof(sampleTxtUTF8), | |
2520 | UTF8ToUnicodeXML_DEC, sizeof(UTF8ToUnicodeXML_DEC)/sizeof(UTF8ToUnicodeXML_DEC[0]),"UTF-8", | |
2521 | UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8XML_DEC, NULL, 0, UCNV_ESCAPE_XML_DEC, U_ZERO_ERROR)) | |
2522 | log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n"); | |
2523 | } | |
2524 | } | |
2525 | ||
73c04bcf | 2526 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
2527 | static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize) |
2528 | { | |
2529 | static const UChar legalText[] = { 0x0000, 0xAC00, 0xAC01, 0xD700 }; | |
2530 | static const uint8_t templegal949[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 }; | |
2531 | static const int32_t to949legal[] = {0, 1, 1, 2, 2, 3, 3}; | |
2532 | ||
2533 | ||
2534 | static const uint8_t text943[] = { | |
fd0068a8 A |
2535 | 0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a }; |
2536 | static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22, 0x5b57 }; | |
2537 | static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22, 0x5b57 }; | |
b75a7d8f A |
2538 | static const UChar toUnicode943stop[]= { 0x304b}; |
2539 | ||
fd0068a8 A |
2540 | static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 7 }; |
2541 | static const int32_t fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 }; | |
b75a7d8f A |
2542 | static const int32_t fromIBM943Offsstop[] = { 0}; |
2543 | ||
2544 | gInBufferSize = inputsize; | |
2545 | gOutBufferSize = outputsize; | |
2546 | /*checking with a legal value*/ | |
2547 | if(!testConvertFromUnicode(legalText, sizeof(legalText)/sizeof(legalText[0]), | |
2548 | templegal949, sizeof(templegal949), "ibm-949", | |
2549 | UCNV_FROM_U_CALLBACK_SKIP, to949legal, NULL, 0 )) | |
2550 | log_err("u-> ibm-949 with skip did not match.\n"); | |
2551 | ||
2552 | /*checking illegal value for ibm-943 with substitute*/ | |
2553 | if(!testConvertToUnicode(text943, sizeof(text943), | |
2554 | toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943", | |
2555 | UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 )) | |
2556 | log_err("ibm-943->u with subst did not match.\n"); | |
2557 | /*checking illegal value for ibm-943 with skip */ | |
2558 | if(!testConvertToUnicode(text943, sizeof(text943), | |
2559 | toUnicode943skip, sizeof(toUnicode943skip)/sizeof(toUnicode943skip[0]),"ibm-943", | |
2560 | UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offsskip, NULL, 0 )) | |
2561 | log_err("ibm-943->u with skip did not match.\n"); | |
2562 | ||
2563 | /*checking illegal value for ibm-943 with stop */ | |
2564 | if(!testConvertToUnicode(text943, sizeof(text943), | |
2565 | toUnicode943stop, sizeof(toUnicode943stop)/sizeof(toUnicode943stop[0]),"ibm-943", | |
2566 | UCNV_TO_U_CALLBACK_STOP, fromIBM943Offsstop, NULL, 0 )) | |
2567 | log_err("ibm-943->u with stop did not match.\n"); | |
2568 | ||
2569 | } | |
2570 | ||
2571 | static void TestSingleByte(int32_t inputsize, int32_t outputsize) | |
2572 | { | |
2573 | static const uint8_t sampleText[] = { | |
2574 | 0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82, | |
fd0068a8 A |
2575 | 0xff, 0x32, 0x33}; |
2576 | static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 }; | |
2577 | static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 }; | |
b75a7d8f A |
2578 | /*checking illegal value for ibm-943 with substitute*/ |
2579 | gInBufferSize = inputsize; | |
2580 | gOutBufferSize = outputsize; | |
2581 | ||
2582 | if(!testConvertToUnicode(sampleText, sizeof(sampleText), | |
2583 | toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943", | |
2584 | UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 )) | |
2585 | log_err("ibm-943->u with subst did not match.\n"); | |
2586 | } | |
2587 | ||
2588 | static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize) | |
2589 | { | |
2590 | /*EBCDIC_STATEFUL*/ | |
2591 | static const UChar ebcdic_inputTest[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 }; | |
2592 | static const uint8_t toIBM930[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 }; | |
2593 | static const int32_t offset_930[]= { 0, 1, 1, 1, 2, 2, 3, 4, 4, 4, 5, 5 }; | |
2594 | /* s SO doubl SI sng s SO fe fe SI s */ | |
2595 | ||
2596 | /*EBCDIC_STATEFUL with subChar=3f*/ | |
2597 | static const uint8_t toIBM930_subvaried[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 }; | |
2598 | static const int32_t offset_930_subvaried[]= { 0, 1, 1, 1, 2, 2, 3, 4, 5 }; | |
2599 | static const char mySubChar[]={ 0x3f}; | |
2600 | ||
2601 | gInBufferSize = inputsize; | |
2602 | gOutBufferSize = outputsize; | |
2603 | ||
2604 | if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]), | |
2605 | toIBM930, sizeof(toIBM930), "ibm-930", | |
2606 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930, NULL, 0 )) | |
2607 | log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n"); | |
2608 | ||
2609 | if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]), | |
2610 | toIBM930_subvaried, sizeof(toIBM930_subvaried), "ibm-930", | |
2611 | UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 )) | |
2612 | log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n"); | |
2613 | } | |
73c04bcf | 2614 | #endif |
b75a7d8f A |
2615 | |
2616 | UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, | |
2617 | const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, | |
2618 | const char *mySubChar, int8_t len) | |
2619 | { | |
2620 | ||
2621 | ||
2622 | UErrorCode status = U_ZERO_ERROR; | |
2623 | UConverter *conv = 0; | |
73c04bcf | 2624 | char junkout[NEW_MAX_BUFFER]; /* FIX */ |
b75a7d8f A |
2625 | int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ |
2626 | const UChar *src; | |
73c04bcf A |
2627 | char *end; |
2628 | char *targ; | |
b75a7d8f A |
2629 | int32_t *offs; |
2630 | int i; | |
2631 | int32_t realBufferSize; | |
73c04bcf | 2632 | char *realBufferEnd; |
b75a7d8f A |
2633 | const UChar *realSourceEnd; |
2634 | const UChar *sourceLimit; | |
2635 | UBool checkOffsets = TRUE; | |
2636 | UBool doFlush; | |
2637 | char junk[9999]; | |
2638 | char offset_str[9999]; | |
73c04bcf | 2639 | char *p; |
b75a7d8f A |
2640 | UConverterFromUCallback oldAction = NULL; |
2641 | const void* oldContext = NULL; | |
2642 | ||
2643 | ||
2644 | for(i=0;i<NEW_MAX_BUFFER;i++) | |
73c04bcf | 2645 | junkout[i] = (char)0xF0; |
b75a7d8f A |
2646 | for(i=0;i<NEW_MAX_BUFFER;i++) |
2647 | junokout[i] = 0xFF; | |
2648 | setNuConvTestName(codepage, "FROM"); | |
2649 | ||
2650 | log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize, | |
2651 | gOutBufferSize); | |
2652 | ||
2653 | conv = ucnv_open(codepage, &status); | |
2654 | if(U_FAILURE(status)) | |
2655 | { | |
2656 | log_data_err("Couldn't open converter %s\n",codepage); | |
2657 | return TRUE; | |
2658 | } | |
2659 | ||
2660 | log_verbose("Converter opened..\n"); | |
2661 | ||
2662 | /*----setting the callback routine----*/ | |
2663 | ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); | |
2664 | if (U_FAILURE(status)) | |
2665 | { | |
2666 | log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); | |
2667 | } | |
2668 | /*------------------------*/ | |
2669 | /*setting the subChar*/ | |
2670 | if(mySubChar != NULL){ | |
2671 | ucnv_setSubstChars(conv, mySubChar, len, &status); | |
2672 | if (U_FAILURE(status)) { | |
2673 | log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); | |
2674 | } | |
2675 | } | |
2676 | /*------------*/ | |
2677 | ||
2678 | src = source; | |
2679 | targ = junkout; | |
2680 | offs = junokout; | |
2681 | ||
2682 | realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); | |
2683 | realBufferEnd = junkout + realBufferSize; | |
2684 | realSourceEnd = source + sourceLen; | |
2685 | ||
2686 | if ( gOutBufferSize != realBufferSize ) | |
2687 | checkOffsets = FALSE; | |
2688 | ||
2689 | if( gInBufferSize != NEW_MAX_BUFFER ) | |
2690 | checkOffsets = FALSE; | |
2691 | ||
2692 | do | |
2693 | { | |
2694 | end = nct_min(targ + gOutBufferSize, realBufferEnd); | |
2695 | sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); | |
2696 | ||
2697 | doFlush = (UBool)(sourceLimit == realSourceEnd); | |
2698 | ||
2699 | if(targ == realBufferEnd) | |
2700 | { | |
2701 | log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); | |
2702 | return FALSE; | |
2703 | } | |
2704 | log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); | |
2705 | ||
2706 | ||
2707 | status = U_ZERO_ERROR; | |
2708 | ||
2709 | ucnv_fromUnicode (conv, | |
2710 | (char **)&targ, | |
2711 | (const char *)end, | |
2712 | &src, | |
2713 | sourceLimit, | |
2714 | checkOffsets ? offs : NULL, | |
2715 | doFlush, /* flush if we're at the end of the input data */ | |
2716 | &status); | |
2717 | } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) ); | |
2718 | ||
2719 | ||
2720 | if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){ | |
2721 | UChar errChars[50]; /* should be sufficient */ | |
2722 | int8_t errLen = 50; | |
2723 | UErrorCode err = U_ZERO_ERROR; | |
2724 | const UChar* limit= NULL; | |
2725 | const UChar* start= NULL; | |
2726 | ucnv_getInvalidUChars(conv,errChars, &errLen, &err); | |
2727 | if(U_FAILURE(err)){ | |
2728 | log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err)); | |
2729 | } | |
2730 | /* src points to limit of invalid chars */ | |
2731 | limit = src; | |
2732 | /* length of in invalid chars should be equal to returned length*/ | |
2733 | start = src - errLen; | |
2734 | if(u_strncmp(errChars,start,errLen)!=0){ | |
2735 | log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err)); | |
2736 | } | |
2737 | } | |
2738 | /* allow failure codes for the stop callback */ | |
2739 | if(U_FAILURE(status) && | |
2740 | (callback != UCNV_FROM_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND))) | |
2741 | { | |
2742 | log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); | |
2743 | return FALSE; | |
2744 | } | |
2745 | ||
2746 | log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", | |
2747 | sourceLen, targ-junkout); | |
729e4ab9 | 2748 | if(getTestOption(VERBOSITY_OPTION)) |
b75a7d8f A |
2749 | { |
2750 | ||
2751 | junk[0] = 0; | |
2752 | offset_str[0] = 0; | |
2753 | for(p = junkout;p<targ;p++) | |
2754 | { | |
2755 | sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p); | |
2756 | sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]); | |
2757 | } | |
2758 | ||
2759 | log_verbose(junk); | |
2760 | printSeq(expect, expectLen); | |
2761 | if ( checkOffsets ) | |
2762 | { | |
2763 | log_verbose("\nOffsets:"); | |
2764 | log_verbose(offset_str); | |
2765 | } | |
2766 | log_verbose("\n"); | |
2767 | } | |
2768 | ucnv_close(conv); | |
2769 | ||
2770 | ||
2771 | if(expectLen != targ-junkout) | |
2772 | { | |
2773 | log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); | |
2774 | log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); | |
73c04bcf | 2775 | printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); |
b75a7d8f A |
2776 | printSeqErr(expect, expectLen); |
2777 | return FALSE; | |
2778 | } | |
2779 | ||
2780 | if (checkOffsets && (expectOffsets != 0) ) | |
2781 | { | |
2782 | log_verbose("comparing %d offsets..\n", targ-junkout); | |
2783 | if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ | |
2784 | log_err("did not get the expected offsets while %s \n", gNuConvTestName); | |
2785 | log_err("Got Output : "); | |
73c04bcf | 2786 | printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); |
b75a7d8f A |
2787 | log_err("Got Offsets: "); |
2788 | for(p=junkout;p<targ;p++) | |
2789 | log_err("%d,", junokout[p-junkout]); | |
2790 | log_err("\n"); | |
2791 | log_err("Expected Offsets: "); | |
2792 | for(i=0; i<(targ-junkout); i++) | |
2793 | log_err("%d,", expectOffsets[i]); | |
2794 | log_err("\n"); | |
2795 | return FALSE; | |
2796 | } | |
2797 | } | |
2798 | ||
2799 | if(!memcmp(junkout, expect, expectLen)) | |
2800 | { | |
2801 | log_verbose("String matches! %s\n", gNuConvTestName); | |
2802 | return TRUE; | |
2803 | } | |
2804 | else | |
2805 | { | |
2806 | log_err("String does not match. %s\n", gNuConvTestName); | |
2807 | log_err("source: "); | |
2808 | printUSeqErr(source, sourceLen); | |
2809 | log_err("Got: "); | |
73c04bcf | 2810 | printSeqErr((const uint8_t *)junkout, expectLen); |
b75a7d8f A |
2811 | log_err("Expected: "); |
2812 | printSeqErr(expect, expectLen); | |
2813 | return FALSE; | |
2814 | } | |
2815 | } | |
2816 | ||
2817 | UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, | |
2818 | const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, | |
2819 | const char *mySubChar, int8_t len) | |
2820 | { | |
2821 | UErrorCode status = U_ZERO_ERROR; | |
2822 | UConverter *conv = 0; | |
2823 | UChar junkout[NEW_MAX_BUFFER]; /* FIX */ | |
2824 | int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ | |
73c04bcf A |
2825 | const char *src; |
2826 | const char *realSourceEnd; | |
2827 | const char *srcLimit; | |
b75a7d8f A |
2828 | UChar *targ; |
2829 | UChar *end; | |
2830 | int32_t *offs; | |
2831 | int i; | |
2832 | UBool checkOffsets = TRUE; | |
2833 | char junk[9999]; | |
2834 | char offset_str[9999]; | |
2835 | UChar *p; | |
2836 | UConverterToUCallback oldAction = NULL; | |
2837 | const void* oldContext = NULL; | |
2838 | ||
2839 | int32_t realBufferSize; | |
2840 | UChar *realBufferEnd; | |
2841 | ||
2842 | ||
2843 | for(i=0;i<NEW_MAX_BUFFER;i++) | |
2844 | junkout[i] = 0xFFFE; | |
2845 | ||
2846 | for(i=0;i<NEW_MAX_BUFFER;i++) | |
2847 | junokout[i] = -1; | |
2848 | ||
2849 | setNuConvTestName(codepage, "TO"); | |
2850 | ||
2851 | log_verbose("\n========= %s\n", gNuConvTestName); | |
2852 | ||
2853 | conv = ucnv_open(codepage, &status); | |
2854 | if(U_FAILURE(status)) | |
2855 | { | |
2856 | log_data_err("Couldn't open converter %s\n",gNuConvTestName); | |
2857 | return TRUE; | |
2858 | } | |
2859 | ||
2860 | log_verbose("Converter opened..\n"); | |
2861 | ||
73c04bcf | 2862 | src = (const char *)source; |
b75a7d8f A |
2863 | targ = junkout; |
2864 | offs = junokout; | |
2865 | ||
2866 | realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); | |
2867 | realBufferEnd = junkout + realBufferSize; | |
2868 | realSourceEnd = src + sourcelen; | |
2869 | /*----setting the callback routine----*/ | |
2870 | ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); | |
2871 | if (U_FAILURE(status)) | |
2872 | { | |
2873 | log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); | |
2874 | } | |
2875 | /*-------------------------------------*/ | |
2876 | /*setting the subChar*/ | |
2877 | if(mySubChar != NULL){ | |
2878 | ucnv_setSubstChars(conv, mySubChar, len, &status); | |
2879 | if (U_FAILURE(status)) { | |
2880 | log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); | |
2881 | } | |
2882 | } | |
2883 | /*------------*/ | |
2884 | ||
2885 | ||
2886 | if ( gOutBufferSize != realBufferSize ) | |
2887 | checkOffsets = FALSE; | |
2888 | ||
2889 | if( gInBufferSize != NEW_MAX_BUFFER ) | |
2890 | checkOffsets = FALSE; | |
2891 | ||
2892 | do | |
2893 | { | |
2894 | end = nct_min( targ + gOutBufferSize, realBufferEnd); | |
2895 | srcLimit = nct_min(realSourceEnd, src + gInBufferSize); | |
2896 | ||
2897 | if(targ == realBufferEnd) | |
2898 | { | |
2899 | log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); | |
2900 | return FALSE; | |
2901 | } | |
2902 | log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); | |
2903 | ||
2904 | ||
2905 | ||
2906 | status = U_ZERO_ERROR; | |
2907 | ||
2908 | ucnv_toUnicode (conv, | |
2909 | &targ, | |
2910 | end, | |
2911 | (const char **)&src, | |
2912 | (const char *)srcLimit, | |
2913 | checkOffsets ? offs : NULL, | |
2914 | (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */ | |
2915 | &status); | |
2916 | } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ | |
2917 | ||
2918 | if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){ | |
2919 | char errChars[50]; /* should be sufficient */ | |
2920 | int8_t errLen = 50; | |
2921 | UErrorCode err = U_ZERO_ERROR; | |
73c04bcf A |
2922 | const char* limit= NULL; |
2923 | const char* start= NULL; | |
b75a7d8f A |
2924 | ucnv_getInvalidChars(conv,errChars, &errLen, &err); |
2925 | if(U_FAILURE(err)){ | |
2926 | log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err)); | |
2927 | } | |
2928 | /* src points to limit of invalid chars */ | |
2929 | limit = src; | |
2930 | /* length of in invalid chars should be equal to returned length*/ | |
2931 | start = src - errLen; | |
73c04bcf | 2932 | if(uprv_strncmp(errChars,start,errLen)!=0){ |
b75a7d8f A |
2933 | log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err)); |
2934 | } | |
2935 | } | |
2936 | /* allow failure codes for the stop callback */ | |
2937 | if(U_FAILURE(status) && | |
2938 | (callback != UCNV_TO_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND && status != U_TRUNCATED_CHAR_FOUND))) | |
2939 | { | |
2940 | log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); | |
2941 | return FALSE; | |
2942 | } | |
2943 | ||
2944 | log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", | |
2945 | sourcelen, targ-junkout); | |
729e4ab9 | 2946 | if(getTestOption(VERBOSITY_OPTION)) |
b75a7d8f A |
2947 | { |
2948 | ||
2949 | junk[0] = 0; | |
2950 | offset_str[0] = 0; | |
2951 | ||
2952 | for(p = junkout;p<targ;p++) | |
2953 | { | |
2954 | sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p); | |
2955 | sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]); | |
2956 | } | |
2957 | ||
2958 | log_verbose(junk); | |
2959 | printUSeq(expect, expectlen); | |
2960 | if ( checkOffsets ) | |
2961 | { | |
2962 | log_verbose("\nOffsets:"); | |
2963 | log_verbose(offset_str); | |
2964 | } | |
2965 | log_verbose("\n"); | |
2966 | } | |
2967 | ucnv_close(conv); | |
2968 | ||
2969 | log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); | |
2970 | ||
2971 | if (checkOffsets && (expectOffsets != 0)) | |
2972 | { | |
2973 | if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))) | |
2974 | { | |
2975 | log_err("did not get the expected offsets while %s \n", gNuConvTestName); | |
2976 | log_err("Got offsets: "); | |
2977 | for(p=junkout;p<targ;p++) | |
2978 | log_err(" %2d,", junokout[p-junkout]); | |
2979 | log_err("\n"); | |
2980 | log_err("Expected offsets: "); | |
2981 | for(i=0; i<(targ-junkout); i++) | |
2982 | log_err(" %2d,", expectOffsets[i]); | |
2983 | log_err("\n"); | |
2984 | log_err("Got output: "); | |
2985 | for(i=0; i<(targ-junkout); i++) | |
2986 | log_err("0x%04x,", junkout[i]); | |
2987 | log_err("\n"); | |
2988 | log_err("From source: "); | |
73c04bcf | 2989 | for(i=0; i<(src-(const char *)source); i++) |
b75a7d8f A |
2990 | log_err(" 0x%02x,", (unsigned char)source[i]); |
2991 | log_err("\n"); | |
2992 | } | |
2993 | } | |
2994 | ||
2995 | if(!memcmp(junkout, expect, expectlen*2)) | |
2996 | { | |
2997 | log_verbose("Matches!\n"); | |
2998 | return TRUE; | |
2999 | } | |
3000 | else | |
3001 | { | |
3002 | log_err("String does not match. %s\n", gNuConvTestName); | |
3003 | log_verbose("String does not match. %s\n", gNuConvTestName); | |
3004 | log_err("Got: "); | |
3005 | printUSeqErr(junkout, expectlen); | |
3006 | log_err("Expected: "); | |
3007 | printUSeqErr(expect, expectlen); | |
3008 | log_err("\n"); | |
3009 | return FALSE; | |
3010 | } | |
3011 | } | |
3012 | ||
3013 | UBool testConvertFromUnicodeWithContext(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, | |
3014 | const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, | |
3015 | const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError) | |
3016 | { | |
3017 | ||
3018 | ||
3019 | UErrorCode status = U_ZERO_ERROR; | |
3020 | UConverter *conv = 0; | |
73c04bcf | 3021 | char junkout[NEW_MAX_BUFFER]; /* FIX */ |
b75a7d8f A |
3022 | int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ |
3023 | const UChar *src; | |
73c04bcf A |
3024 | char *end; |
3025 | char *targ; | |
b75a7d8f A |
3026 | int32_t *offs; |
3027 | int i; | |
3028 | int32_t realBufferSize; | |
73c04bcf | 3029 | char *realBufferEnd; |
b75a7d8f A |
3030 | const UChar *realSourceEnd; |
3031 | const UChar *sourceLimit; | |
3032 | UBool checkOffsets = TRUE; | |
3033 | UBool doFlush; | |
3034 | char junk[9999]; | |
3035 | char offset_str[9999]; | |
73c04bcf | 3036 | char *p; |
b75a7d8f A |
3037 | UConverterFromUCallback oldAction = NULL; |
3038 | const void* oldContext = NULL; | |
3039 | ||
3040 | ||
3041 | for(i=0;i<NEW_MAX_BUFFER;i++) | |
73c04bcf | 3042 | junkout[i] = (char)0xF0; |
b75a7d8f A |
3043 | for(i=0;i<NEW_MAX_BUFFER;i++) |
3044 | junokout[i] = 0xFF; | |
3045 | setNuConvTestName(codepage, "FROM"); | |
3046 | ||
3047 | log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize, | |
3048 | gOutBufferSize); | |
3049 | ||
3050 | conv = ucnv_open(codepage, &status); | |
3051 | if(U_FAILURE(status)) | |
3052 | { | |
3053 | log_data_err("Couldn't open converter %s\n",codepage); | |
3054 | return TRUE; /* Because the err has already been logged. */ | |
3055 | } | |
3056 | ||
3057 | log_verbose("Converter opened..\n"); | |
3058 | ||
3059 | /*----setting the callback routine----*/ | |
3060 | ucnv_setFromUCallBack (conv, callback, context, &oldAction, &oldContext, &status); | |
3061 | if (U_FAILURE(status)) | |
3062 | { | |
3063 | log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); | |
3064 | } | |
3065 | /*------------------------*/ | |
3066 | /*setting the subChar*/ | |
3067 | if(mySubChar != NULL){ | |
3068 | ucnv_setSubstChars(conv, mySubChar, len, &status); | |
3069 | if (U_FAILURE(status)) { | |
3070 | log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status)); | |
3071 | } | |
3072 | } | |
3073 | /*------------*/ | |
3074 | ||
3075 | src = source; | |
3076 | targ = junkout; | |
3077 | offs = junokout; | |
3078 | ||
3079 | realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); | |
3080 | realBufferEnd = junkout + realBufferSize; | |
3081 | realSourceEnd = source + sourceLen; | |
3082 | ||
3083 | if ( gOutBufferSize != realBufferSize ) | |
3084 | checkOffsets = FALSE; | |
3085 | ||
3086 | if( gInBufferSize != NEW_MAX_BUFFER ) | |
3087 | checkOffsets = FALSE; | |
3088 | ||
3089 | do | |
3090 | { | |
3091 | end = nct_min(targ + gOutBufferSize, realBufferEnd); | |
3092 | sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); | |
3093 | ||
3094 | doFlush = (UBool)(sourceLimit == realSourceEnd); | |
3095 | ||
3096 | if(targ == realBufferEnd) | |
3097 | { | |
3098 | log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); | |
3099 | return FALSE; | |
3100 | } | |
3101 | log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); | |
3102 | ||
3103 | ||
3104 | status = U_ZERO_ERROR; | |
3105 | ||
3106 | ucnv_fromUnicode (conv, | |
3107 | (char **)&targ, | |
3108 | (const char *)end, | |
3109 | &src, | |
3110 | sourceLimit, | |
3111 | checkOffsets ? offs : NULL, | |
3112 | doFlush, /* flush if we're at the end of the input data */ | |
3113 | &status); | |
3114 | } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) ); | |
3115 | ||
3116 | /* allow failure codes for the stop callback */ | |
3117 | if(U_FAILURE(status) && status != expectedError) | |
3118 | { | |
3119 | log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); | |
3120 | return FALSE; | |
3121 | } | |
3122 | ||
3123 | log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", | |
3124 | sourceLen, targ-junkout); | |
729e4ab9 | 3125 | if(getTestOption(VERBOSITY_OPTION)) |
b75a7d8f A |
3126 | { |
3127 | ||
3128 | junk[0] = 0; | |
3129 | offset_str[0] = 0; | |
3130 | for(p = junkout;p<targ;p++) | |
3131 | { | |
3132 | sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p); | |
3133 | sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]); | |
3134 | } | |
3135 | ||
3136 | log_verbose(junk); | |
3137 | printSeq(expect, expectLen); | |
3138 | if ( checkOffsets ) | |
3139 | { | |
3140 | log_verbose("\nOffsets:"); | |
3141 | log_verbose(offset_str); | |
3142 | } | |
3143 | log_verbose("\n"); | |
3144 | } | |
3145 | ucnv_close(conv); | |
3146 | ||
3147 | ||
3148 | if(expectLen != targ-junkout) | |
3149 | { | |
3150 | log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); | |
3151 | log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); | |
73c04bcf | 3152 | printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); |
b75a7d8f A |
3153 | printSeqErr(expect, expectLen); |
3154 | return FALSE; | |
3155 | } | |
3156 | ||
3157 | if (checkOffsets && (expectOffsets != 0) ) | |
3158 | { | |
3159 | log_verbose("comparing %d offsets..\n", targ-junkout); | |
3160 | if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ | |
3161 | log_err("did not get the expected offsets while %s \n", gNuConvTestName); | |
3162 | log_err("Got Output : "); | |
73c04bcf | 3163 | printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); |
b75a7d8f A |
3164 | log_err("Got Offsets: "); |
3165 | for(p=junkout;p<targ;p++) | |
3166 | log_err("%d,", junokout[p-junkout]); | |
3167 | log_err("\n"); | |
3168 | log_err("Expected Offsets: "); | |
3169 | for(i=0; i<(targ-junkout); i++) | |
3170 | log_err("%d,", expectOffsets[i]); | |
3171 | log_err("\n"); | |
3172 | return FALSE; | |
3173 | } | |
3174 | } | |
3175 | ||
3176 | if(!memcmp(junkout, expect, expectLen)) | |
3177 | { | |
3178 | log_verbose("String matches! %s\n", gNuConvTestName); | |
3179 | return TRUE; | |
3180 | } | |
3181 | else | |
3182 | { | |
3183 | log_err("String does not match. %s\n", gNuConvTestName); | |
3184 | log_err("source: "); | |
3185 | printUSeqErr(source, sourceLen); | |
3186 | log_err("Got: "); | |
73c04bcf | 3187 | printSeqErr((const uint8_t *)junkout, expectLen); |
b75a7d8f A |
3188 | log_err("Expected: "); |
3189 | printSeqErr(expect, expectLen); | |
3190 | return FALSE; | |
3191 | } | |
3192 | } | |
3193 | UBool testConvertToUnicodeWithContext( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, | |
3194 | const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, | |
3195 | const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError) | |
3196 | { | |
3197 | UErrorCode status = U_ZERO_ERROR; | |
3198 | UConverter *conv = 0; | |
3199 | UChar junkout[NEW_MAX_BUFFER]; /* FIX */ | |
3200 | int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ | |
73c04bcf A |
3201 | const char *src; |
3202 | const char *realSourceEnd; | |
3203 | const char *srcLimit; | |
b75a7d8f A |
3204 | UChar *targ; |
3205 | UChar *end; | |
3206 | int32_t *offs; | |
3207 | int i; | |
3208 | UBool checkOffsets = TRUE; | |
3209 | char junk[9999]; | |
3210 | char offset_str[9999]; | |
3211 | UChar *p; | |
3212 | UConverterToUCallback oldAction = NULL; | |
3213 | const void* oldContext = NULL; | |
3214 | ||
3215 | int32_t realBufferSize; | |
3216 | UChar *realBufferEnd; | |
3217 | ||
3218 | ||
3219 | for(i=0;i<NEW_MAX_BUFFER;i++) | |
3220 | junkout[i] = 0xFFFE; | |
3221 | ||
3222 | for(i=0;i<NEW_MAX_BUFFER;i++) | |
3223 | junokout[i] = -1; | |
3224 | ||
3225 | setNuConvTestName(codepage, "TO"); | |
3226 | ||
3227 | log_verbose("\n========= %s\n", gNuConvTestName); | |
3228 | ||
3229 | conv = ucnv_open(codepage, &status); | |
3230 | if(U_FAILURE(status)) | |
3231 | { | |
3232 | log_data_err("Couldn't open converter %s\n",gNuConvTestName); | |
3233 | return TRUE; | |
3234 | } | |
3235 | ||
3236 | log_verbose("Converter opened..\n"); | |
3237 | ||
73c04bcf | 3238 | src = (const char *)source; |
b75a7d8f A |
3239 | targ = junkout; |
3240 | offs = junokout; | |
3241 | ||
3242 | realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); | |
3243 | realBufferEnd = junkout + realBufferSize; | |
3244 | realSourceEnd = src + sourcelen; | |
3245 | /*----setting the callback routine----*/ | |
3246 | ucnv_setToUCallBack (conv, callback, context, &oldAction, &oldContext, &status); | |
3247 | if (U_FAILURE(status)) | |
3248 | { | |
3249 | log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); | |
3250 | } | |
3251 | /*-------------------------------------*/ | |
3252 | /*setting the subChar*/ | |
3253 | if(mySubChar != NULL){ | |
3254 | ucnv_setSubstChars(conv, mySubChar, len, &status); | |
3255 | if (U_FAILURE(status)) { | |
3256 | log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); | |
3257 | } | |
3258 | } | |
3259 | /*------------*/ | |
3260 | ||
3261 | ||
3262 | if ( gOutBufferSize != realBufferSize ) | |
3263 | checkOffsets = FALSE; | |
3264 | ||
3265 | if( gInBufferSize != NEW_MAX_BUFFER ) | |
3266 | checkOffsets = FALSE; | |
3267 | ||
3268 | do | |
3269 | { | |
3270 | end = nct_min( targ + gOutBufferSize, realBufferEnd); | |
3271 | srcLimit = nct_min(realSourceEnd, src + gInBufferSize); | |
3272 | ||
3273 | if(targ == realBufferEnd) | |
3274 | { | |
3275 | log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); | |
3276 | return FALSE; | |
3277 | } | |
3278 | log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); | |
3279 | ||
3280 | ||
3281 | ||
3282 | status = U_ZERO_ERROR; | |
3283 | ||
3284 | ucnv_toUnicode (conv, | |
3285 | &targ, | |
3286 | end, | |
3287 | (const char **)&src, | |
3288 | (const char *)srcLimit, | |
3289 | checkOffsets ? offs : NULL, | |
3290 | (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */ | |
3291 | &status); | |
3292 | } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ | |
3293 | ||
3294 | /* allow failure codes for the stop callback */ | |
3295 | if(U_FAILURE(status) && status!=expectedError) | |
3296 | { | |
3297 | log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); | |
3298 | return FALSE; | |
3299 | } | |
3300 | ||
3301 | log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", | |
3302 | sourcelen, targ-junkout); | |
729e4ab9 | 3303 | if(getTestOption(VERBOSITY_OPTION)) |
b75a7d8f A |
3304 | { |
3305 | ||
3306 | junk[0] = 0; | |
3307 | offset_str[0] = 0; | |
3308 | ||
3309 | for(p = junkout;p<targ;p++) | |
3310 | { | |
3311 | sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p); | |
3312 | sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]); | |
3313 | } | |
3314 | ||
3315 | log_verbose(junk); | |
3316 | printUSeq(expect, expectlen); | |
3317 | if ( checkOffsets ) | |
3318 | { | |
3319 | log_verbose("\nOffsets:"); | |
3320 | log_verbose(offset_str); | |
3321 | } | |
3322 | log_verbose("\n"); | |
3323 | } | |
3324 | ucnv_close(conv); | |
3325 | ||
3326 | log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); | |
3327 | ||
3328 | if (checkOffsets && (expectOffsets != 0)) | |
3329 | { | |
3330 | if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))) | |
3331 | { | |
3332 | log_err("did not get the expected offsets while %s \n", gNuConvTestName); | |
3333 | log_err("Got offsets: "); | |
3334 | for(p=junkout;p<targ;p++) | |
3335 | log_err(" %2d,", junokout[p-junkout]); | |
3336 | log_err("\n"); | |
3337 | log_err("Expected offsets: "); | |
3338 | for(i=0; i<(targ-junkout); i++) | |
3339 | log_err(" %2d,", expectOffsets[i]); | |
3340 | log_err("\n"); | |
3341 | log_err("Got output: "); | |
3342 | for(i=0; i<(targ-junkout); i++) | |
3343 | log_err("0x%04x,", junkout[i]); | |
3344 | log_err("\n"); | |
3345 | log_err("From source: "); | |
73c04bcf | 3346 | for(i=0; i<(src-(const char *)source); i++) |
b75a7d8f A |
3347 | log_err(" 0x%02x,", (unsigned char)source[i]); |
3348 | log_err("\n"); | |
3349 | } | |
3350 | } | |
3351 | ||
3352 | if(!memcmp(junkout, expect, expectlen*2)) | |
3353 | { | |
3354 | log_verbose("Matches!\n"); | |
3355 | return TRUE; | |
3356 | } | |
3357 | else | |
3358 | { | |
3359 | log_err("String does not match. %s\n", gNuConvTestName); | |
3360 | log_verbose("String does not match. %s\n", gNuConvTestName); | |
3361 | log_err("Got: "); | |
3362 | printUSeqErr(junkout, expectlen); | |
3363 | log_err("Expected: "); | |
3364 | printUSeqErr(expect, expectlen); | |
3365 | log_err("\n"); | |
3366 | return FALSE; | |
3367 | } | |
3368 | } | |
73c04bcf A |
3369 | |
3370 | static void TestCallBackFailure(void) { | |
3371 | UErrorCode status = U_USELESS_COLLATOR_ERROR; | |
3372 | ucnv_cbFromUWriteBytes(NULL, NULL, -1, -1, &status); | |
3373 | if (status != U_USELESS_COLLATOR_ERROR) { | |
3374 | log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad UErrorCode\n"); | |
3375 | } | |
3376 | ucnv_cbFromUWriteUChars(NULL, NULL, NULL, -1, &status); | |
3377 | if (status != U_USELESS_COLLATOR_ERROR) { | |
3378 | log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad UErrorCode\n"); | |
3379 | } | |
3380 | ucnv_cbFromUWriteSub(NULL, -1, &status); | |
3381 | if (status != U_USELESS_COLLATOR_ERROR) { | |
3382 | log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UErrorCode\n"); | |
3383 | } | |
3384 | ucnv_cbToUWriteUChars(NULL, NULL, -1, -1, &status); | |
3385 | if (status != U_USELESS_COLLATOR_ERROR) { | |
3386 | log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad UErrorCode\n"); | |
3387 | } | |
3388 | } |