]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/ncnvtst.c
ICU-3.13.tar.gz
[apple/icu.git] / icuSources / test / cintltst / ncnvtst.c
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2003, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
7 *
8 * File CCONVTST.C
9 *
10 * Modification History:
11 * Name Description
12 * Madhu Katragadda 7/7/2000 Converter Tests for extended code coverage
13 *********************************************************************************
14 */
15 #include <stdio.h>
16 #include "cmemory.h"
17 #include "unicode/uloc.h"
18 #include "unicode/ucnv.h"
19 #include "unicode/utypes.h"
20 #include "unicode/ustring.h"
21 #include "unicode/uset.h"
22 #include "cintltst.h"
23
24 #define MAX_LENGTH 999
25
26 #define UNICODE_LIMIT 0x10FFFF
27 #define SURROGATE_HIGH_START 0xD800
28 #define SURROGATE_LOW_END 0xDFFF
29
30 static int32_t gInBufferSize = 0;
31 static int32_t gOutBufferSize = 0;
32 static char gNuConvTestName[1024];
33
34 #define nct_min(x,y) ((x<y) ? x : y)
35 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
36
37 static void printSeq(const unsigned char* a, int len);
38 static void printSeqErr(const unsigned char* a, int len);
39 static void printUSeq(const UChar* a, int len);
40 static void printUSeqErr(const UChar* a, int len);
41 static UBool convertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
42 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus);
43 static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen,
44 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus);
45
46 static UBool testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
47 const char *codepage, UConverterFromUCallback callback, const int32_t *expectOffsets, UBool testReset);
48 static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
49 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset);
50
51 static void setNuConvTestName(const char *codepage, const char *direction)
52 {
53 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
54 codepage,
55 direction,
56 gInBufferSize,
57 gOutBufferSize);
58 }
59
60
61 static void TestSurrogateBehaviour(void);
62 static void TestErrorBehaviour(void);
63 static void TestToUnicodeErrorBehaviour(void);
64 static void TestGetNextErrorBehaviour(void);
65 static void TestRegressionUTF8(void);
66 static void TestRegressionUTF32(void);
67 static void TestAvailableConverters(void);
68 static void TestFlushInternalBuffer(void); /*for improved code coverage in ucnv_cnv.c*/
69 static void TestResetBehaviour(void);
70 static void TestTruncated(void);
71 static void TestUnicodeSet(void);
72
73 static void TestWithBufferSize(int32_t osize, int32_t isize);
74
75
76 static void printSeq(const unsigned char* a, int len)
77 {
78 int i=0;
79 log_verbose("\n{");
80 while (i<len)
81 log_verbose("0x%02X ", a[i++]);
82 log_verbose("}\n");
83 }
84
85 static void printUSeq(const UChar* a, int len)
86 {
87 int i=0;
88 log_verbose("\n{");
89 while (i<len)
90 log_verbose("%0x04X ", a[i++]);
91 log_verbose("}\n");
92 }
93
94 static void printSeqErr(const unsigned char* a, int len)
95 {
96 int i=0;
97 fprintf(stderr, "\n{");
98 while (i<len) fprintf(stderr, "0x%02X ", a[i++]);
99 fprintf(stderr, "}\n");
100 }
101
102 static void printUSeqErr(const UChar* a, int len)
103 {
104 int i=0;
105 fprintf(stderr, "\n{");
106 while (i<len)
107 fprintf(stderr, "0x%04X ", a[i++]);
108 fprintf(stderr,"}\n");
109 }
110
111 void addExtraTests(TestNode** root);
112
113 void addExtraTests(TestNode** root)
114 {
115 addTest(root, &TestSurrogateBehaviour, "tsconv/ncnvtst/TestSurrogateBehaviour");
116 addTest(root, &TestErrorBehaviour, "tsconv/ncnvtst/TestErrorBehaviour");
117 addTest(root, &TestToUnicodeErrorBehaviour, "tsconv/ncnvtst/ToUnicodeErrorBehaviour");
118 addTest(root, &TestGetNextErrorBehaviour, "tsconv/ncnvtst/TestGetNextErrorBehaviour");
119 addTest(root, &TestAvailableConverters, "tsconv/ncnvtst/TestAvailableConverters");
120 addTest(root, &TestFlushInternalBuffer, "tsconv/ncnvtst/TestFlushInternalBuffer");
121 addTest(root, &TestResetBehaviour, "tsconv/ncnvtst/TestResetBehaviour");
122 addTest(root, &TestRegressionUTF8, "tsconv/ncnvtst/TestRegressionUTF8");
123 addTest(root, &TestRegressionUTF32, "tsconv/ncnvtst/TestRegressionUTF32");
124 addTest(root, &TestTruncated, "tsconv/ncnvtst/TestTruncated");
125 addTest(root, &TestUnicodeSet, "tsconv/ncnvtst/TestUnicodeSet");
126 }
127
128 /*test surrogate behaviour*/
129 static void TestSurrogateBehaviour(){
130 log_verbose("Testing for SBCS and LATIN_1\n");
131 {
132 UChar sampleText[] = {0x0031, 0xd801, 0xdc01, 0x0032};
133 const uint8_t expected[] = {0x31, 0x1a, 0x32};
134 /*SBCS*/
135 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
136 expected, sizeof(expected), "ibm-920", 0 , TRUE, U_ZERO_ERROR))
137 log_err("u-> ibm-920 [UCNV_SBCS] not match.\n");
138
139 /*LATIN_1*/
140 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
141 expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR ))
142 log_err("u-> LATIN_1 not match.\n");
143
144 }
145 log_verbose("Testing for DBCS and MBCS\n");
146 {
147 UChar sampleText[] = {0x00a1, 0xd801, 0xdc01, 0x00a4};
148 const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
149 int32_t offsets[] = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 };
150
151 /*DBCS*/
152 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
153 expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR))
154 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
155 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
156 expected, sizeof(expected), "ibm-1363", offsets , TRUE, U_ZERO_ERROR))
157 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
158 /*MBCS*/
159 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
160 expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR))
161 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
162 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
163 expected, sizeof(expected), "ibm-1363", offsets, TRUE, U_ZERO_ERROR))
164 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
165 }
166 log_verbose("Testing for ISO-2022-jp\n");
167 {
168 UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
169
170 const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
171 0x31,0x1A, 0x32};
172
173
174 int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 };
175
176 /*iso-2022-jp*/
177 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
178 expected, sizeof(expected), "iso-2022-jp", 0 , TRUE, U_ZERO_ERROR))
179 log_err("u-> not match.\n");
180 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
181 expected, sizeof(expected), "iso-2022-jp", offsets , TRUE, U_ZERO_ERROR))
182 log_err("u-> not match.\n");
183 }
184 log_verbose("Testing for ISO-2022-cn\n");
185 {
186 static const UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
187
188 static const uint8_t expected[] = {
189 0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
190 0x36, 0x21,
191 0x0F, 0x31,
192 0x1A,
193 0x0f, 0x32
194 };
195
196
197
198 static const int32_t offsets[] = {
199 0, 0, 0, 0, 0, 0, 0,
200 1, 1,
201 2, 2,
202 3,
203 5, 5, };
204
205 /*iso-2022-CN*/
206 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
207 expected, sizeof(expected), "iso-2022-cn", 0 , TRUE, U_ZERO_ERROR))
208 log_err("u-> not match.\n");
209 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
210 expected, sizeof(expected), "iso-2022-cn", offsets , TRUE, U_ZERO_ERROR))
211 log_err("u-> not match.\n");
212 }
213 log_verbose("Testing for ISO-2022-kr\n");
214 {
215 static const UChar sampleText[] = { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
216
217 static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43,
218 0x0E, 0x6C, 0x69,
219 0x0f, 0x1A,
220 0x0e, 0x6F, 0x4B,
221 0x0F, 0x31,
222 0x1A,
223 0x32 };
224
225 static const int32_t offsets[] = {-1, -1, -1, -1,
226 0, 0, 0,
227 1, 1,
228 3, 3, 3,
229 4, 4,
230 5,
231 7,
232 };
233
234 /*iso-2022-kr*/
235 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
236 expected, sizeof(expected), "iso-2022-kr", 0 , TRUE, U_ZERO_ERROR))
237 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
238 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
239 expected, sizeof(expected), "iso-2022-kr", offsets , TRUE, U_ZERO_ERROR))
240 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
241 }
242 log_verbose("Testing for HZ\n");
243 {
244 static const UChar sampleText[] = { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
245
246 static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B,
247 0x7E, 0x7D, 0x1A,
248 0x7E, 0x7B, 0x36, 0x21,
249 0x7E, 0x7D, 0x31,
250 0x1A,
251 0x32 };
252
253
254 static const int32_t offsets[] = {0,0,0,0,
255 1,1,1,
256 3,3,3,3,
257 4,4,4,
258 5,
259 7,};
260
261 /*hz*/
262 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
263 expected, sizeof(expected), "HZ", 0 , TRUE, U_ZERO_ERROR))
264 log_err("u-> not match.\n");
265 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
266 expected, sizeof(expected), "HZ", offsets , TRUE, U_ZERO_ERROR))
267 log_err("u-> not match.\n");
268 }
269 /*UTF-8*/
270 log_verbose("Testing for UTF8\n");
271 {
272 static const UChar sampleText[] = { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032};
273 static const int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02,
274 0x03, 0x03, 0x03, 0x04, 0x04, 0x04,
275 0x04, 0x06 };
276 static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31,
277 0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32};
278
279
280 static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D };
281 /*UTF-8*/
282 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
283 expected, sizeof(expected), "UTF8", offsets, TRUE, U_ZERO_ERROR ))
284 log_err("u-> UTF8 with offsets and flush true did not match.\n");
285 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
286 expected, sizeof(expected), "UTF8", 0, TRUE, U_ZERO_ERROR ))
287 log_err("u-> UTF8 with offsets and flush true did not match.\n");
288 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
289 expected, sizeof(expected), "UTF8", offsets, FALSE, U_ZERO_ERROR ))
290 log_err("u-> UTF8 with offsets and flush true did not match.\n");
291 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
292 expected, sizeof(expected), "UTF8", 0, FALSE, U_ZERO_ERROR ))
293 log_err("u-> UTF8 with offsets and flush true did not match.\n");
294
295 if(!convertToU(expected, sizeof(expected),
296 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", 0, TRUE, U_ZERO_ERROR ))
297 log_err("UTF8 -> did not match.\n");
298 if(!convertToU(expected, sizeof(expected),
299 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", 0, FALSE, U_ZERO_ERROR ))
300 log_err("UTF8 -> did not match.\n");
301 if(!convertToU(expected, sizeof(expected),
302 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", fromOffsets, TRUE, U_ZERO_ERROR ))
303 log_err("UTF8 -> did not match.\n");
304 if(!convertToU(expected, sizeof(expected),
305 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", fromOffsets, FALSE, U_ZERO_ERROR ))
306 log_err("UTF8 -> did not match.\n");
307
308 }
309
310
311
312 }
313
314 /*test various error behaviours*/
315 static void TestErrorBehaviour(){
316 log_verbose("Testing for SBCS and LATIN_1\n");
317 {
318 static const UChar sampleText[] = { 0x0031, 0xd801};
319 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032};
320 static const uint8_t expected[] = { 0x31};
321 static const uint8_t expected2[] = { 0x31, 0x1a, 0x32};
322
323 /*SBCS*/
324 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
325 expected, sizeof(expected), "ibm-920", 0, TRUE, U_TRUNCATED_CHAR_FOUND))
326 log_err("u-> ibm-920 [UCNV_SBCS] \n");
327 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
328 expected, sizeof(expected), "ibm-920", 0, FALSE, U_ZERO_ERROR))
329 log_err("u-> ibm-920 [UCNV_SBCS] \n");
330 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
331 expected2, sizeof(expected2), "ibm-920", 0, TRUE, U_ZERO_ERROR))
332 log_err("u-> ibm-920 [UCNV_SBCS] did not match\n");
333
334
335 /*LATIN_1*/
336 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
337 expected, sizeof(expected), "LATIN_1", 0, TRUE, U_TRUNCATED_CHAR_FOUND))
338 log_err("u-> LATIN_1 is supposed to fail\n");
339 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
340 expected, sizeof(expected), "LATIN_1", 0, FALSE, U_ZERO_ERROR))
341 log_err("u-> LATIN_1 is supposed to fail\n");
342
343 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
344 expected2, sizeof(expected2), "LATIN_1", 0, TRUE, U_ZERO_ERROR))
345 log_err("u-> LATIN_1 did not match\n");
346 }
347
348
349 log_verbose("Testing for DBCS and MBCS\n");
350 {
351 static const UChar sampleText[] = { 0x00a1, 0xd801};
352 static const uint8_t expected[] = { 0xa2, 0xae};
353 static const int32_t offsets[] = { 0x00, 0x00, 0x01, 0x01};
354
355 static const UChar sampleText2[] = { 0x00a1, 0xd801, 0x00a4};
356 static const uint8_t expected2[] = { 0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
357 static const int32_t offsets2[] = { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02};
358
359 static const UChar sampleText3MBCS[] = { 0x0001, 0x00a4, 0xdc01};
360 static const uint8_t expected3MBCS[] = { 0x01, 0xa2, 0xb4, 0xa1, 0xe0};
361 static const int32_t offsets3MBCS[] = { 0x00, 0x01, 0x01, 0x02, 0x02};
362
363 static const UChar sampleText4MBCS[] = { 0x0061, 0x00a6, 0xdc01};
364 static const uint8_t expected4MBCS[] = { 0x61, 0x8f, 0xa2, 0xc3, 0xf4, 0xfe};
365 static const int32_t offsets4MBCS[] = { 0x00, 0x01, 0x01, 0x01, 0x02, 0x02 };
366
367
368
369
370
371 /*DBCS*/
372 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
373 expected, sizeof(expected), "ibm-1363", 0, TRUE, U_TRUNCATED_CHAR_FOUND))
374 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
375 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
376 expected, sizeof(expected), "ibm-1363", 0, FALSE, U_ZERO_ERROR))
377 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
378
379 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
380 expected, sizeof(expected), "ibm-1363", offsets, TRUE, U_TRUNCATED_CHAR_FOUND))
381 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
382 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
383 expected, sizeof(expected), "ibm-1363", offsets, FALSE, U_ZERO_ERROR))
384 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
385
386
387 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
388 expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
389 log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n");
390 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
391 expected2, sizeof(expected2), "ibm-1363", offsets2, TRUE, U_ZERO_ERROR))
392 log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n");
393
394 /*MBCS*/
395 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
396 expected, sizeof(expected), "ibm-1363", 0, TRUE, U_TRUNCATED_CHAR_FOUND))
397 log_err("u-> ibm-1363 [UCNV_MBCS] \n");
398 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
399 expected, sizeof(expected), "ibm-1363", 0, FALSE, U_ZERO_ERROR))
400 log_err("u-> ibm-1363 [UCNV_MBCS] \n");
401
402 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
403 expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
404 log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
405 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
406 expected2, sizeof(expected2), "ibm-1363", 0, FALSE, U_ZERO_ERROR))
407 log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
408 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
409 expected2, sizeof(expected2), "ibm-1363", offsets2, FALSE, U_ZERO_ERROR))
410 log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
411
412 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
413 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, TRUE, U_ZERO_ERROR))
414 log_err("u-> ibm-1363 [UCNV_MBCS] \n");
415 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
416 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, FALSE, U_ZERO_ERROR))
417 log_err("u-> ibm-1363 [UCNV_MBCS] \n");
418
419 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
420 expected4MBCS, sizeof(expected4MBCS), "euc-jp", offsets4MBCS, TRUE, U_ZERO_ERROR))
421 log_err("u-> euc-jp [UCNV_MBCS] \n");
422 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
423 expected4MBCS, sizeof(expected4MBCS), "euc-jp", offsets4MBCS, FALSE, U_ZERO_ERROR))
424 log_err("u-> euc-jp [UCNV_MBCS] \n");
425 }
426 /*iso-2022-jp*/
427 log_verbose("Testing for iso-2022-jp\n");
428 {
429 static const UChar sampleText[] = { 0x0031, 0xd801};
430 static const uint8_t expected[] = { 0x31};
431 static const int32_t offsets[] = { 0x00};
432
433 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032};
434 static const uint8_t expected2[] = { 0x31,0x1A,0x32};
435 static const int32_t offsets2[] = { 0x00,0x01,0x02};
436
437 static const UChar sampleText3MBCS[] = { 0x3000, 0x0050, 0xdc01,0x3001};
438 static const uint8_t expected3MBCS[] = { 0x1B, 0x24, 0x42, 0x21, 0x21, 0x1B, 0x28, 0x42, 0x50, 0x1A, 0x1B, 0x24, 0x42, 0x21, 0x22,};
439 static const int32_t offsets3MBCS[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03,};
440
441 static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
442 static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x42, 0x30, 0x6c,0x1b,0x28,0x42,0x1a};
443 static const int32_t offsets4MBCS[] = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01,0x02,0x02,0x02,0x02 };
444 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
445 expected, sizeof(expected), "iso-2022-jp", offsets, TRUE, U_TRUNCATED_CHAR_FOUND))
446 log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
447 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
448 expected, sizeof(expected), "iso-2022-jp", offsets, FALSE, U_ZERO_ERROR))
449 log_err("u-> ibm-1363 [UCNV_MBCS] \n");
450
451 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
452 expected2, sizeof(expected2), "iso-2022-jp", offsets2, TRUE, U_ZERO_ERROR))
453 log_err("u->iso-2022-jp[UCNV_DBCS] did not match\n");
454 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
455 expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR))
456 log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n");
457 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
458 expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR))
459 log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n");
460
461 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
462 expected3MBCS, sizeof(expected3MBCS), "iso-2022-jp", offsets3MBCS, TRUE, U_ZERO_ERROR))
463 log_err("u->iso-2022-jp [UCNV_MBCS] \n");
464 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
465 expected3MBCS, sizeof(expected3MBCS), "iso-2022-jp", offsets3MBCS, FALSE, U_ZERO_ERROR))
466 log_err("u-> iso-2022-jp[UCNV_MBCS] \n");
467
468 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
469 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, TRUE, U_ZERO_ERROR))
470 log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
471 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
472 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, FALSE, U_ZERO_ERROR))
473 log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
474 }
475 /*iso-2022-cn*/
476 log_verbose("Testing for iso-2022-cn\n");
477 {
478 static const UChar sampleText[] = { 0x0031, 0xd801};
479 static const uint8_t expected[] = { 0x0f, 0x31};
480 static const int32_t offsets[] = { 0x00, 0x00};
481
482 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032};
483 static const uint8_t expected2[] = { 0x0f, 0x31, 0x1A,0x32};
484 static const int32_t offsets2[] = { 0x00, 0x00, 0x01,0x02};
485
486 static const UChar sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
487 static const uint8_t expected3MBCS[] = {0x0f, 0x51, 0x50, 0x1A};
488 static const int32_t offsets3MBCS[] = { 0x00, 0x00, 0x01, 0x02 };
489
490 static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
491 static const uint8_t expected4MBCS[] = { 0x0f, 0x61, 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x52, 0x3b, 0x0f, 0x1a };
492 static const int32_t offsets4MBCS[] = { 0x00, 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02 };
493 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
494 expected, sizeof(expected), "iso-2022-cn", offsets, TRUE, U_TRUNCATED_CHAR_FOUND))
495 log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
496 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
497 expected, sizeof(expected), "iso-2022-cn", offsets, FALSE, U_ZERO_ERROR))
498 log_err("u-> ibm-1363 [UCNV_MBCS] \n");
499
500 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
501 expected2, sizeof(expected2), "iso-2022-cn", offsets2, TRUE, U_ZERO_ERROR))
502 log_err("u->iso-2022-cn[UCNV_DBCS] did not match\n");
503 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
504 expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR))
505 log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n");
506 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
507 expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR))
508 log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n");
509
510 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
511 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, TRUE, U_ZERO_ERROR))
512 log_err("u->iso-2022-cn [UCNV_MBCS] \n");
513 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
514 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, FALSE, U_ZERO_ERROR))
515 log_err("u-> iso-2022-cn[UCNV_MBCS] \n");
516
517 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
518 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, TRUE, U_ZERO_ERROR))
519 log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
520 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
521 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, FALSE, U_ZERO_ERROR))
522 log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
523 }
524 /*iso-2022-kr*/
525 log_verbose("Testing for iso-2022-kr\n");
526 {
527 static const UChar sampleText[] = { 0x0031, 0xd801};
528 static const uint8_t expected[] = { 0x1b, 0x24, 0x29, 0x43, 0x31};
529 static const int32_t offsets[] = { -1, -1, -1, -1, 0x00};
530
531 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032};
532 static const uint8_t expected2[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A, 0x32};
533 static const int32_t offsets2[] = { -1, -1, -1, -1, 0x00, 0x01, 0x02};
534
535 static const UChar sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
536 static const uint8_t expected3MBCS[] = { 0x1b, 0x24, 0x29, 0x43, 0x51, 0x50, 0x1A };
537 static const int32_t offsets3MBCS[] = { -1, -1, -1, -1, 0x00, 0x01, 0x02, 0x02 };
538
539 static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01,0x4e00};
540 static const uint8_t expected4MBCS[] = { 0x1b, 0x24, 0x29, 0x43,
541 0x61,
542 0x0e, 0x6c, 0x69,
543 0x0f, 0x1a,
544 0x0e, 0x6c, 0x69,};
545 static const int32_t offsets4MBCS[] = { -1, -1, -1, -1, 0x00, 0x01 ,0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03 };
546 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
547 expected, sizeof(expected), "iso-2022-kr", offsets, TRUE, U_TRUNCATED_CHAR_FOUND))
548 log_err("u-> iso-2022-kr [UCNV_MBCS] \n");
549 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
550 expected, sizeof(expected), "iso-2022-kr", offsets, FALSE, U_ZERO_ERROR))
551 log_err("u-> ibm-1363 [UCNV_MBCS] \n");
552
553 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
554 expected2, sizeof(expected2), "iso-2022-kr", offsets2, TRUE, U_ZERO_ERROR))
555 log_err("u->iso-2022-kr[UCNV_DBCS] did not match\n");
556 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
557 expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR))
558 log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n");
559 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
560 expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR))
561 log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n");
562
563 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
564 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, TRUE, U_ZERO_ERROR))
565 log_err("u->iso-2022-kr [UCNV_MBCS] \n");
566 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
567 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, FALSE, U_ZERO_ERROR))
568 log_err("u-> iso-2022-kr[UCNV_MBCS] \n");
569
570 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
571 expected4MBCS, sizeof(expected4MBCS), "iso-2022-kr", offsets4MBCS, TRUE, U_ZERO_ERROR))
572 log_err("u-> iso-2022-kr [UCNV_MBCS] \n");
573 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
574 expected4MBCS, sizeof(expected4MBCS), "iso-2022-kr", offsets4MBCS, FALSE, U_ZERO_ERROR))
575 log_err("u-> iso-2022-kr [UCNV_MBCS] \n");
576 }
577
578 /*HZ*/
579 log_verbose("Testing for HZ\n");
580 {
581 static const UChar sampleText[] = { 0x0031, 0xd801};
582 static const uint8_t expected[] = { 0x7e, 0x7d, 0x31};
583 static const int32_t offsets[] = { 0x00, 0x00, 0x00};
584
585 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032};
586 static const uint8_t expected2[] = { 0x7e, 0x7d, 0x31, 0x1A, 0x32 };
587 static const int32_t offsets2[] = { 0x00, 0x00, 0x00, 0x01, 0x02 };
588
589 static const UChar sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
590 static const uint8_t expected3MBCS[] = { 0x7e, 0x7d, 0x51, 0x50, 0x1A };
591 static const int32_t offsets3MBCS[] = { 0x00, 0x00, 0x00, 0x01, 0x02};
592
593 static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
594 static const uint8_t expected4MBCS[] = { 0x7e, 0x7d, 0x61, 0x7e, 0x7b, 0x52, 0x3b, 0x7e, 0x7d, 0x1a };
595 static const int32_t offsets4MBCS[] = { 0x00, 0x00, 0x00, 0x01, 0x01, 0x01 ,0x01, 0x02, 0x02, 0x02 };
596 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
597 expected, sizeof(expected), "HZ", offsets, TRUE, U_TRUNCATED_CHAR_FOUND))
598 log_err("u-> HZ [UCNV_MBCS] \n");
599 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
600 expected, sizeof(expected), "HZ", offsets, FALSE, U_ZERO_ERROR))
601 log_err("u-> ibm-1363 [UCNV_MBCS] \n");
602
603 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
604 expected2, sizeof(expected2), "HZ", offsets2, TRUE, U_ZERO_ERROR))
605 log_err("u->HZ[UCNV_DBCS] did not match\n");
606 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
607 expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR))
608 log_err("u-> HZ [UCNV_DBCS] did not match\n");
609 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
610 expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR))
611 log_err("u-> HZ [UCNV_DBCS] did not match\n");
612
613 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
614 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, TRUE, U_ZERO_ERROR))
615 log_err("u->HZ [UCNV_MBCS] \n");
616 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
617 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, FALSE, U_ZERO_ERROR))
618 log_err("u-> HZ[UCNV_MBCS] \n");
619
620 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
621 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, TRUE, U_ZERO_ERROR))
622 log_err("u-> HZ [UCNV_MBCS] \n");
623 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
624 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, FALSE, U_ZERO_ERROR))
625 log_err("u-> HZ [UCNV_MBCS] \n");
626 }
627
628
629 }
630
631 /*test different convertToUnicode error behaviours*/
632 static void TestToUnicodeErrorBehaviour()
633 {
634 log_verbose("Testing error conditions for DBCS\n");
635 {
636 uint8_t sampleText[] = { 0xa2, 0xae, 0x03, 0x04};
637 const UChar expected[] = { 0x00a1 };
638
639 uint8_t sampleText2[] = { 0xa2, 0xae, 0xa2};
640 const UChar expected2[] = { 0x00a1 };
641
642 if(!convertToU(sampleText, sizeof(sampleText),
643 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1363", 0, TRUE, U_ZERO_ERROR ))
644 log_err("DBCS (ibm-1363)->Unicode did not match.\n");
645 if(!convertToU(sampleText, sizeof(sampleText),
646 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1363", 0, FALSE, U_ZERO_ERROR ))
647 log_err("DBCS (ibm-1363)->Unicode with flush = false did not match.\n");
648
649 if(!convertToU(sampleText2, sizeof(sampleText2),
650 expected2, sizeof(expected2)/sizeof(expected2[0]), "ibm-1363", 0, TRUE, U_TRUNCATED_CHAR_FOUND ))
651 log_err("DBCS (ibm-1363)->Unicode with TRUNCATED CHARACTER did not match.\n");
652
653
654 }
655 log_verbose("Testing error conditions for SBCS\n");
656 {
657 uint8_t sampleText[] = { 0xa2, 0xFF};
658 const UChar expected[] = { 0x00c2 };
659
660 /* uint8_t sampleText2[] = { 0xa2, 0x70 };
661 const UChar expected2[] = { 0x0073 };*/
662
663 if(!convertToU(sampleText, sizeof(sampleText),
664 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1051", 0, TRUE, U_ZERO_ERROR ))
665 log_err("SBCS (ibm-1051)->Unicode did not match.\n");
666 if(!convertToU(sampleText, sizeof(sampleText),
667 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1051", 0, FALSE, U_ZERO_ERROR ))
668 log_err("SBCS (ibm-1051)->Unicode with flush = false did not match.\n");
669
670 }
671 log_verbose("Testing error conditions for UTF8\n");
672 {
673 const uint8_t sampleText[] = { 0x31, 0xe4, 0xba, 0x8c, 0xe4, 0xb8 };
674 UChar expectedUTF8[] = { 0x0031, 0x4e8c};
675 int32_t offsets[] = { 0x0000, 0x0001};
676
677 const uint8_t sampleText2[] = { 0x31, 0xff, 0xe4, 0xba, 0x8c,
678 0xe0, 0x80, 0x61};
679 UChar expected2UTF8[] = { 0x0031, 0xfffd, 0x4e8c, 0xfffd, 0x0061};
680 int32_t offsets2[] = { 0x0000, 0x0001, 0x0002, 0x0005, 0x0007};
681
682 const uint8_t sampleText3[] = { 0x31, 0xfb, 0xbf, 0xbf, 0xbf, 0xbf,
683 0x61};
684 UChar expected3UTF8[] = { 0x0031, 0xfffd, 0x0061};
685 int32_t offsets3[] = { 0x0000, 0x0001, 0x0006};
686
687 if(!convertToU(sampleText, sizeof(sampleText),
688 expectedUTF8, sizeof(expectedUTF8)/sizeof(expectedUTF8[0]), "utf-8", 0, TRUE, U_TRUNCATED_CHAR_FOUND ))
689 log_err("utf-8->Unicode did not match.\n");
690 if(!convertToU(sampleText, sizeof(sampleText),
691 expectedUTF8, sizeof(expectedUTF8)/sizeof(expectedUTF8[0]), "utf-8", 0, FALSE, U_ZERO_ERROR ))
692 log_err("utf-8->Unicode did not match.\n");
693 if(!convertToU(sampleText, sizeof(sampleText),
694 expectedUTF8, sizeof(expectedUTF8)/sizeof(expectedUTF8[0]), "utf-8", offsets, TRUE, U_TRUNCATED_CHAR_FOUND ))
695 log_err("utf-8->Unicode did not match.\n");
696 if(!convertToU(sampleText, sizeof(sampleText),
697 expectedUTF8, sizeof(expectedUTF8)/sizeof(expectedUTF8[0]), "utf-8", offsets, FALSE, U_ZERO_ERROR ))
698 log_err("utf-8->Unicode did not match.\n");
699
700 if(!convertToU(sampleText2, sizeof(sampleText2),
701 expected2UTF8, sizeof(expected2UTF8)/sizeof(expected2UTF8[0]), "utf-8", 0, TRUE, U_ZERO_ERROR ))
702 log_err("utf-8->Unicode did not match.\n");
703 if(!convertToU(sampleText2, sizeof(sampleText2),
704 expected2UTF8, sizeof(expected2UTF8)/sizeof(expected2UTF8[0]), "utf-8", 0, FALSE, U_ZERO_ERROR ))
705 log_err("utf-8->Unicode did not match.\n");
706 if(!convertToU(sampleText2, sizeof(sampleText2),
707 expected2UTF8, sizeof(expected2UTF8)/sizeof(expected2UTF8[0]), "utf-8", offsets2, TRUE, U_ZERO_ERROR ))
708 log_err("utf-8->Unicode did not match.\n");
709 if(!convertToU(sampleText2, sizeof(sampleText2),
710 expected2UTF8, sizeof(expected2UTF8)/sizeof(expected2UTF8[0]), "utf-8", offsets2, FALSE, U_ZERO_ERROR ))
711 log_err("utf-8->Unicode did not match.\n");
712
713 if(!convertToU(sampleText3, sizeof(sampleText3),
714 expected3UTF8, sizeof(expected3UTF8)/sizeof(expected3UTF8[0]), "utf-8", offsets3, TRUE, U_ZERO_ERROR ))
715 log_err("utf-8->Unicode did not match.\n");
716 if(!convertToU(sampleText3, sizeof(sampleText3),
717 expected3UTF8, sizeof(expected3UTF8)/sizeof(expected3UTF8[0]), "utf-8", offsets3, FALSE, U_ZERO_ERROR ))
718 log_err("utf-8->Unicode did not match with flush false.\n");
719
720 }
721
722 }
723
724 static void TestGetNextErrorBehaviour(){
725 /*Test for unassigned character*/
726 #define INPUT_SIZE 1
727 static const char input1[INPUT_SIZE]={ 0x70 };
728 const char* source=(const char*)input1;
729 UErrorCode err=U_ZERO_ERROR;
730 UChar32 c=0;
731 UConverter *cnv=ucnv_open("ibm-424", &err);
732 if(U_FAILURE(err)) {
733 log_data_err("Unable to open a SBCS(ibm-424) converter: %s\n", u_errorName(err));
734 return;
735 }
736 c=ucnv_getNextUChar(cnv, &source, source + INPUT_SIZE, &err);
737 if(err != U_INVALID_CHAR_FOUND && c!=0xfffd){
738 log_err("FAIL in TestGetNextErrorBehaviour(unassigned): Expected: U_INVALID_CHAR_ERROR or 0xfffd ----Got:%s and 0x%lx\n", myErrorName(err), c);
739 }
740 ucnv_close(cnv);
741 }
742
743 #define MAX_UTF16_LEN 2
744 #define MAX_UTF8_LEN 4
745
746 /*Regression test for utf8 converter*/
747 static void TestRegressionUTF8(){
748 UChar32 currCh = 0;
749 int32_t offset8;
750 int32_t offset16;
751 UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar));
752 uint8_t *utf8 = (uint8_t*)malloc(MAX_LENGTH);
753
754 while (currCh <= UNICODE_LIMIT) {
755 offset16 = 0;
756 offset8 = 0;
757 while(currCh <= UNICODE_LIMIT
758 && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN)
759 && offset8 < (MAX_LENGTH - MAX_UTF8_LEN))
760 {
761 if (currCh == SURROGATE_HIGH_START) {
762 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */
763 }
764 UTF16_APPEND_CHAR_SAFE(standardForm, offset16, MAX_LENGTH, currCh);
765 UTF8_APPEND_CHAR_SAFE(utf8, offset8, MAX_LENGTH, currCh);
766 currCh++;
767 }
768 if(!convertFromU(standardForm, offset16,
769 utf8, offset8, "UTF8", 0, TRUE, U_ZERO_ERROR )) {
770 log_err("Unicode->UTF8 did not match.\n");
771 }
772 if(!convertToU(utf8, offset8,
773 standardForm, offset16, "UTF8", 0, TRUE, U_ZERO_ERROR )) {
774 log_err("UTF8->Unicode did not match.\n");
775 }
776 }
777 free(standardForm);
778 free(utf8);
779 }
780
781 #define MAX_UTF32_LEN 1
782
783 static void TestRegressionUTF32(){
784 UChar32 currCh = 0;
785 int32_t offset32;
786 int32_t offset16;
787 UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar));
788 UChar32 *utf32 = (UChar32*)malloc(MAX_LENGTH*sizeof(UChar32));
789
790 while (currCh <= UNICODE_LIMIT) {
791 offset16 = 0;
792 offset32 = 0;
793 while(currCh <= UNICODE_LIMIT
794 && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN)
795 && offset32 < (MAX_LENGTH/sizeof(UChar32) - MAX_UTF32_LEN))
796 {
797 if (currCh == SURROGATE_HIGH_START) {
798 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */
799 }
800 UTF16_APPEND_CHAR_SAFE(standardForm, offset16, MAX_LENGTH, currCh);
801 UTF32_APPEND_CHAR_SAFE(utf32, offset32, MAX_LENGTH, currCh);
802 currCh++;
803 }
804 if(!convertFromU(standardForm, offset16,
805 (const uint8_t *)utf32, offset32*sizeof(UChar32), "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) {
806 log_err("Unicode->UTF32 did not match.\n");
807 }
808 if(!convertToU((const uint8_t *)utf32, offset32*sizeof(UChar32),
809 standardForm, offset16, "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) {
810 log_err("UTF32->Unicode did not match.\n");
811 }
812 }
813 free(standardForm);
814 free(utf32);
815 }
816
817 /*Walk through the available converters*/
818 static void TestAvailableConverters(){
819 UErrorCode status=U_ZERO_ERROR;
820 UConverter *conv=NULL;
821 int32_t i=0;
822 for(i=0; i < ucnv_countAvailable(); i++){
823 status=U_ZERO_ERROR;
824 conv=ucnv_open(ucnv_getAvailableName(i), &status);
825 if(U_FAILURE(status)){
826 log_err("ERROR: converter creation failed. Failure in alias table or the data table for \n converter=%s. Error=%s\n",
827 ucnv_getAvailableName(i), myErrorName(status));
828 continue;
829 }
830 ucnv_close(conv);
831 }
832
833 }
834
835 static void TestFlushInternalBuffer(){
836 TestWithBufferSize(MAX_LENGTH, 1);
837 TestWithBufferSize(1, 1);
838 TestWithBufferSize(1, MAX_LENGTH);
839 TestWithBufferSize(MAX_LENGTH, MAX_LENGTH);
840 }
841
842 static void TestWithBufferSize(int32_t insize, int32_t outsize){
843
844 gInBufferSize =insize;
845 gOutBufferSize = outsize;
846
847 log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
848 {
849 UChar sampleText[] =
850 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E };
851 const uint8_t expectedUTF8[] =
852 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
853 int32_t toUTF8Offs[] =
854 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07};
855 /* int32_t fmUTF8Offs[] =
856 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };*/
857
858 /*UTF-8*/
859 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
860 expectedUTF8, sizeof(expectedUTF8), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE, toUTF8Offs ,FALSE))
861 log_err("u-> UTF8 did not match.\n");
862 }
863
864 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n");
865 {
866 UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
867 const uint8_t toIBM943[]= { 0x61,
868 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
869 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
870 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
871 0x61 };
872 int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
873
874 if(!testConvertFromU(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
875 toIBM943, sizeof(toIBM943), "ibm-943",
876 (UConverterFromUCallback)UCNV_FROM_U_CALLBACK_ESCAPE, offset,FALSE))
877 log_err("u-> ibm-943 with subst with value did not match.\n");
878 }
879
880 log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
881 {
882 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
883 0xe0, 0x80, 0x61};
884 UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0x0061};
885 int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0006};
886
887 if(!testConvertToU(sampleText1, sizeof(sampleText1),
888 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1,FALSE))
889 log_err("utf8->u with substitute did not match.\n");;
890 }
891
892
893 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
894 /*to Unicode*/
895 {
896 const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf,
897 0x81, 0xad, /*unassigned*/
898 0x89, 0xd3 };
899 UChar IBM_943toUnicode[] = { 0x0000, 0x6D63,
900 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
901 0x7B87};
902 int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
903
904 if(!testConvertToU(sampleTxtToU, sizeof(sampleTxtToU),
905 IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnicode[0]),"ibm-943",
906 (UConverterToUCallback)UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs,FALSE))
907 log_err("ibm-943->u with substitute with value did not match.\n");
908
909 }
910
911 }
912
913 static UBool convertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
914 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus)
915 {
916
917 int32_t i=0;
918 uint8_t *p=0;
919 const UChar *src;
920 uint8_t buffer[MAX_LENGTH];
921 int32_t offsetBuffer[MAX_LENGTH];
922 int32_t *offs=0;
923 uint8_t *targ;
924 uint8_t *targetLimit;
925 UChar *sourceLimit=0;
926 UErrorCode status = U_ZERO_ERROR;
927 UConverter *conv = 0;
928 conv = ucnv_open(codepage, &status);
929 if(U_FAILURE(status))
930 {
931 log_data_err("Couldn't open converter %s\n",codepage);
932 return TRUE;
933 }
934 log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status));
935
936 for(i=0; i<MAX_LENGTH; i++){
937 buffer[i]=0xF0;
938 offsetBuffer[i]=0xFF;
939 }
940
941 src=source;
942 sourceLimit=(UChar*)src+(sourceLen);
943 targ=buffer;
944 targetLimit=targ+MAX_LENGTH;
945 offs=offsetBuffer;
946 ucnv_fromUnicode (conv,
947 (char **)&targ,
948 (const char *)targetLimit,
949 &src,
950 sourceLimit,
951 expectOffsets ? offs : NULL,
952 doFlush,
953 &status);
954 ucnv_close(conv);
955 if(status != expectedStatus){
956 log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus));
957 return FALSE;
958 }
959
960 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
961 sourceLen, targ-buffer);
962
963 if(expectLen != targ-buffer)
964 {
965 log_err("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage);
966 log_verbose("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage);
967 printSeqErr((const unsigned char *)buffer, targ-buffer);
968 printSeqErr((const unsigned char*)expect, expectLen);
969 return FALSE;
970 }
971
972 if(memcmp(buffer, expect, expectLen)){
973 log_err("String does not match. FROM Unicode to codePage%s\n", codepage);
974 printf("\nGot:");
975 printSeqErr((const unsigned char *)buffer, expectLen);
976 printf("\nExpected:");
977 printSeqErr((const unsigned char *)expect, expectLen);
978 return FALSE;
979 }
980 else {
981 log_verbose("Matches!\n");
982 }
983
984 if (expectOffsets != 0){
985 log_verbose("comparing %d offsets..\n", targ-buffer);
986 if(memcmp(offsetBuffer,expectOffsets,(targ-buffer) * sizeof(int32_t) )){
987 log_err("did not get the expected offsets. for FROM Unicode to %s\n", codepage);
988 printf("\nGot : ");
989 printSeqErr((const unsigned char*)buffer, targ-buffer);
990 for(p=buffer;p<targ;p++)
991 printf("%d, ", offsetBuffer[p-buffer]);
992 printf("\nExpected: ");
993 for(i=0; i< (targ-buffer); i++)
994 printf("%d,", expectOffsets[i]);
995 }
996 }
997
998 return TRUE;
999 }
1000
1001
1002 static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen,
1003 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus)
1004 {
1005 UErrorCode status = U_ZERO_ERROR;
1006 UConverter *conv = 0;
1007 int32_t i=0;
1008 UChar *p=0;
1009 const uint8_t* src;
1010 UChar buffer[MAX_LENGTH];
1011 int32_t offsetBuffer[MAX_LENGTH];
1012 int32_t *offs=0;
1013 UChar *targ;
1014 UChar *targetLimit;
1015 uint8_t *sourceLimit=0;
1016
1017
1018
1019 conv = ucnv_open(codepage, &status);
1020 if(U_FAILURE(status))
1021 {
1022 log_data_err("Couldn't open converter %s\n",codepage);
1023 return TRUE;
1024 }
1025 log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status));
1026
1027
1028
1029 for(i=0; i<MAX_LENGTH; i++){
1030 buffer[i]=0xFFFE;
1031 offsetBuffer[i]=-1;
1032 }
1033
1034 src=source;
1035 sourceLimit=(uint8_t*)(src+(sourceLen));
1036 targ=buffer;
1037 targetLimit=targ+MAX_LENGTH;
1038 offs=offsetBuffer;
1039
1040
1041
1042 ucnv_toUnicode (conv,
1043 &targ,
1044 targetLimit,
1045 (const char **)&src,
1046 (const char *)sourceLimit,
1047 expectOffsets ? offs : NULL,
1048 doFlush,
1049 &status);
1050
1051 ucnv_close(conv);
1052 if(status != expectedStatus){
1053 log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus));
1054 return FALSE;
1055 }
1056 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
1057 sourceLen, targ-buffer);
1058
1059
1060
1061
1062 log_verbose("comparing %d uchars (%d bytes)..\n",expectLen,expectLen*2);
1063
1064 if (expectOffsets != 0) {
1065 if(memcmp(offsetBuffer, expectOffsets, (targ-buffer) * sizeof(int32_t))){
1066
1067 log_err("did not get the expected offsets from %s To UNICODE\n", codepage);
1068 printf("\nGot : ");
1069 for(p=buffer;p<targ;p++)
1070 printf("%d, ", offsetBuffer[p-buffer]);
1071 printf("\nExpected: ");
1072 for(i=0; i<(targ-buffer); i++)
1073 printf("%d, ", expectOffsets[i]);
1074 printf("\nGot result:");
1075 for(i=0; i<(targ-buffer); i++)
1076 printf("0x%04X,", buffer[i]);
1077 printf("\nFrom Input:");
1078 for(i=0; i<(src-source); i++)
1079 printf("0x%02X,", (unsigned char)source[i]);
1080 puts("\n");
1081 }
1082 }
1083 if(memcmp(buffer, expect, expectLen*2)){
1084 log_err("String does not match. from codePage %s TO Unicode\n", codepage);
1085 printf("\nGot:");
1086 printUSeqErr(buffer, expectLen);
1087 printf("\nExpected:");
1088 printUSeqErr(expect, expectLen);
1089 return FALSE;
1090 }
1091 else {
1092 log_verbose("Matches!\n");
1093 }
1094
1095 return TRUE;
1096 }
1097
1098
1099 static UBool testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
1100 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, UBool testReset)
1101 {
1102 UErrorCode status = U_ZERO_ERROR;
1103 UConverter *conv = 0;
1104 uint8_t junkout[MAX_LENGTH]; /* FIX */
1105 int32_t junokout[MAX_LENGTH]; /* FIX */
1106 uint8_t *p;
1107 const UChar *src;
1108 uint8_t *end;
1109 uint8_t *targ;
1110 int32_t *offs;
1111 int i;
1112 int32_t realBufferSize;
1113 uint8_t *realBufferEnd;
1114 const UChar *realSourceEnd;
1115 const UChar *sourceLimit;
1116 UBool checkOffsets = TRUE;
1117 UBool doFlush;
1118
1119 UConverterFromUCallback oldAction = NULL;
1120 const void* oldContext = NULL;
1121
1122 for(i=0;i<MAX_LENGTH;i++)
1123 junkout[i] = 0xF0;
1124 for(i=0;i<MAX_LENGTH;i++)
1125 junokout[i] = 0xFF;
1126
1127 setNuConvTestName(codepage, "FROM");
1128
1129 log_verbose("\n========= %s\n", gNuConvTestName);
1130
1131 conv = ucnv_open(codepage, &status);
1132 if(U_FAILURE(status))
1133 {
1134 log_data_err("Couldn't open converter %s\n",codepage);
1135 return TRUE;
1136 }
1137
1138 log_verbose("Converter opened..\n");
1139 /*----setting the callback routine----*/
1140 ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
1141 if (U_FAILURE(status)) {
1142 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
1143 }
1144 /*------------------------*/
1145
1146 src = source;
1147 targ = junkout;
1148 offs = junokout;
1149
1150 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
1151 realBufferEnd = junkout + realBufferSize;
1152 realSourceEnd = source + sourceLen;
1153
1154 if ( gOutBufferSize != realBufferSize )
1155 checkOffsets = FALSE;
1156
1157 if( gInBufferSize != MAX_LENGTH )
1158 checkOffsets = FALSE;
1159
1160 do
1161 {
1162 end = nct_min(targ + gOutBufferSize, realBufferEnd);
1163 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
1164
1165 doFlush = (UBool)(sourceLimit == realSourceEnd);
1166
1167 if(targ == realBufferEnd)
1168 {
1169 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
1170 return FALSE;
1171 }
1172 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
1173
1174
1175 status = U_ZERO_ERROR;
1176 if(gInBufferSize ==999 && gOutBufferSize==999)
1177 doFlush = FALSE;
1178 ucnv_fromUnicode (conv,
1179 (char **)&targ,
1180 (const char *)end,
1181 &src,
1182 sourceLimit,
1183 offs,
1184 doFlush, /* flush if we're at the end of the input data */
1185 &status);
1186 if(testReset)
1187 ucnv_resetToUnicode(conv);
1188 if(gInBufferSize ==999 && gOutBufferSize==999)
1189 ucnv_resetToUnicode(conv);
1190
1191 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
1192
1193 if(U_FAILURE(status)) {
1194 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
1195 return FALSE;
1196 }
1197
1198 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
1199 sourceLen, targ-junkout);
1200 if(VERBOSITY)
1201 {
1202 char junk[999];
1203 char offset_str[999];
1204 uint8_t *ptr;
1205
1206 junk[0] = 0;
1207 offset_str[0] = 0;
1208 for(ptr = junkout;ptr<targ;ptr++)
1209 {
1210 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*ptr);
1211 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[ptr-junkout]);
1212 }
1213
1214 log_verbose(junk);
1215 printSeq((const unsigned char *)expect, expectLen);
1216 if ( checkOffsets )
1217 {
1218 log_verbose("\nOffsets:");
1219 log_verbose(offset_str);
1220 }
1221 log_verbose("\n");
1222 }
1223 ucnv_close(conv);
1224
1225
1226 if(expectLen != targ-junkout)
1227 {
1228 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
1229 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
1230 printf("\nGot:");
1231 printSeqErr((const unsigned char*)junkout, targ-junkout);
1232 printf("\nExpected:");
1233 printSeqErr((const unsigned char*)expect, expectLen);
1234 return FALSE;
1235 }
1236
1237 if (checkOffsets && (expectOffsets != 0) )
1238 {
1239 log_verbose("comparing %d offsets..\n", targ-junkout);
1240 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
1241 log_err("did not get the expected offsets. %s", gNuConvTestName);
1242 log_err("Got : ");
1243 printSeqErr((const unsigned char*)junkout, targ-junkout);
1244 for(p=junkout;p<targ;p++)
1245 log_err("%d, ", junokout[p-junkout]);
1246 log_err("\nExpected: ");
1247 for(i=0; i<(targ-junkout); i++)
1248 log_err("%d,", expectOffsets[i]);
1249 }
1250 }
1251
1252 log_verbose("comparing..\n");
1253 if(!memcmp(junkout, expect, expectLen))
1254 {
1255 log_verbose("Matches!\n");
1256 return TRUE;
1257 }
1258 else
1259 {
1260 log_err("String does not match. %s\n", gNuConvTestName);
1261 printUSeqErr(source, sourceLen);
1262 printf("\nGot:");
1263 printSeqErr((const unsigned char *)junkout, expectLen);
1264 printf("\nExpected:");
1265 printSeqErr((const unsigned char *)expect, expectLen);
1266
1267 return FALSE;
1268 }
1269 }
1270
1271 static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
1272 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset)
1273 {
1274 UErrorCode status = U_ZERO_ERROR;
1275 UConverter *conv = 0;
1276 UChar junkout[MAX_LENGTH]; /* FIX */
1277 int32_t junokout[MAX_LENGTH]; /* FIX */
1278 const uint8_t *src;
1279 const uint8_t *realSourceEnd;
1280 const uint8_t *srcLimit;
1281 UChar *p;
1282 UChar *targ;
1283 UChar *end;
1284 int32_t *offs;
1285 int i;
1286 UBool checkOffsets = TRUE;
1287 int32_t realBufferSize;
1288 UChar *realBufferEnd;
1289 UBool doFlush;
1290
1291 UConverterToUCallback oldAction = NULL;
1292 const void* oldContext = NULL;
1293
1294
1295 for(i=0;i<MAX_LENGTH;i++)
1296 junkout[i] = 0xFFFE;
1297
1298 for(i=0;i<MAX_LENGTH;i++)
1299 junokout[i] = -1;
1300
1301 setNuConvTestName(codepage, "TO");
1302
1303 log_verbose("\n========= %s\n", gNuConvTestName);
1304
1305 conv = ucnv_open(codepage, &status);
1306 if(U_FAILURE(status))
1307 {
1308 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
1309 return TRUE;
1310 }
1311
1312 log_verbose("Converter opened..\n");
1313 /*----setting the callback routine----*/
1314 ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
1315 if (U_FAILURE(status)) {
1316 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
1317 }
1318 /*-------------------------------------*/
1319
1320 src = source;
1321 targ = junkout;
1322 offs = junokout;
1323
1324 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
1325 realBufferEnd = junkout + realBufferSize;
1326 realSourceEnd = src + sourcelen;
1327
1328 if ( gOutBufferSize != realBufferSize )
1329 checkOffsets = FALSE;
1330
1331 if( gInBufferSize != MAX_LENGTH )
1332 checkOffsets = FALSE;
1333
1334 do
1335 {
1336 end = nct_min( targ + gOutBufferSize, realBufferEnd);
1337 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
1338
1339 if(targ == realBufferEnd)
1340 {
1341 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
1342 return FALSE;
1343 }
1344 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
1345
1346 /* oldTarg = targ; */
1347
1348 status = U_ZERO_ERROR;
1349 doFlush=(UBool)((gInBufferSize ==999 && gOutBufferSize==999)?(srcLimit == realSourceEnd) : FALSE);
1350
1351 ucnv_toUnicode (conv,
1352 &targ,
1353 end,
1354 (const char **)&src,
1355 (const char *)srcLimit,
1356 offs,
1357 doFlush, /* flush if we're at the end of hte source data */
1358 &status);
1359 if(testReset)
1360 ucnv_resetFromUnicode(conv);
1361 if(gInBufferSize ==999 && gOutBufferSize==999)
1362 ucnv_resetToUnicode(conv);
1363 /* offs += (targ-oldTarg); */
1364
1365 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
1366
1367 if(U_FAILURE(status))
1368 {
1369 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
1370 return FALSE;
1371 }
1372
1373 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
1374 sourcelen, targ-junkout);
1375 if(VERBOSITY)
1376 {
1377 char junk[999];
1378 char offset_str[999];
1379
1380 UChar *ptr;
1381
1382 junk[0] = 0;
1383 offset_str[0] = 0;
1384
1385 for(ptr = junkout;ptr<targ;ptr++)
1386 {
1387 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
1388 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
1389 }
1390
1391 log_verbose(junk);
1392
1393 if ( checkOffsets )
1394 {
1395 log_verbose("\nOffsets:");
1396 log_verbose(offset_str);
1397 }
1398 log_verbose("\n");
1399 }
1400 ucnv_close(conv);
1401
1402 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
1403
1404 if (checkOffsets && (expectOffsets != 0))
1405 {
1406 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
1407
1408 log_err("did not get the expected offsets. %s",gNuConvTestName);
1409 for(p=junkout;p<targ;p++)
1410 log_err("%d, ", junokout[p-junkout]);
1411 log_err("\nExpected: ");
1412 for(i=0; i<(targ-junkout); i++)
1413 log_err("%d,", expectOffsets[i]);
1414 log_err("");
1415 for(i=0; i<(targ-junkout); i++)
1416 log_err("%X,", junkout[i]);
1417 log_err("");
1418 for(i=0; i<(src-source); i++)
1419 log_err("%X,", (unsigned char)source[i]);
1420 }
1421 }
1422
1423 if(!memcmp(junkout, expect, expectlen*2))
1424 {
1425 log_verbose("Matches!\n");
1426 return TRUE;
1427 }
1428 else
1429 {
1430 log_err("String does not match. %s\n", gNuConvTestName);
1431 log_verbose("String does not match. %s\n", gNuConvTestName);
1432 printf("\nGot:");
1433 printUSeq(junkout, expectlen);
1434 printf("\nExpected:");
1435 printUSeq(expect, expectlen);
1436 return FALSE;
1437 }
1438 }
1439
1440
1441 static void TestResetBehaviour(void){
1442 log_verbose("Testing Reset for SBCS and LATIN_1\n");
1443 {
1444 static const UChar sampleText[] = {0x0031, 0xd801, 0xdc01, 0x0032};
1445 static const uint8_t expected[] = {0x31, 0x1a, 0x32};
1446 static const int32_t offsets[] = { 0,1,3};
1447
1448 static const UChar sampleText1[] = {0x0031, 0x0033, 0x0034, 0x0032};
1449 static const uint8_t expected1[] = {0x31, 0x33,0x34, 0x32};
1450 static const int32_t offsets1[] = { 0,1,2,3};
1451
1452 /*SBCS*/
1453 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1454 expected, sizeof(expected), "ibm-920", UCNV_FROM_U_CALLBACK_SUBSTITUTE , offsets, TRUE))
1455 log_err("u-> ibm-920 [UCNV_SBCS] not match.\n");
1456 if(!testConvertToU(expected1, sizeof(expected1),
1457 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "ibm-920",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1458 offsets1, TRUE))
1459 log_err("ibm -920 -> did not match.\n");
1460
1461 /*LATIN_1*/
1462 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1463 expected, sizeof(expected), "LATIN1", UCNV_FROM_U_CALLBACK_SUBSTITUTE , offsets, TRUE))
1464 log_err("u-> LATIN_1 not match.\n");
1465 if(!testConvertToU(expected1, sizeof(expected1),
1466 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "LATIN1",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1467 offsets1, TRUE))
1468 log_err("LATIN1 -> did not match.\n");
1469
1470
1471
1472 }
1473 log_verbose("Testing Reset for DBCS and MBCS\n");
1474 {
1475 static const UChar sampleText[] = {0x00a1, 0xd801, 0xdc01, 0x00a4};
1476 static const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
1477 static const int32_t offsets[] = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 };
1478
1479
1480 static const UChar sampleText1[] = {0x00a1, 0x00a4, 0x00a7, 0x00a8};
1481 static const uint8_t expected1[] = {0xa2, 0xae,0xA2,0xB4,0xA1,0xD7,0xA1,0xA7};
1482 static const int32_t offsets1[] = { 0,2,4,6};
1483
1484 /*DBCS*/
1485 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1486 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
1487 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
1488 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1489 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1490 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
1491
1492 if(!testConvertToU(expected1, sizeof(expected1),
1493 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1494 offsets1, TRUE))
1495 log_err("ibm-1363 -> did not match.\n");
1496 /*MBCS*/
1497 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1498 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
1499 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
1500 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1501 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1502 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
1503
1504 if(!testConvertToU(expected1, sizeof(expected1),
1505 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1506 offsets1, TRUE))
1507 log_err("ibm-1363 -> did not match.\n");
1508
1509 }
1510 log_verbose("Testing Reset for ISO-2022-jp\n");
1511 {
1512 static const UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
1513
1514 static const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
1515 0x31,0x1A, 0x32};
1516
1517
1518 static const int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 };
1519
1520
1521 static const UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032};
1522 static const uint8_t expected1[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
1523 0x31,0x1A, 0x32};
1524 static const int32_t offsets1[] = { 3,5,10,11,12};
1525
1526 /*iso-2022-jp*/
1527 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1528 expected, sizeof(expected), "iso-2022-jp", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
1529 log_err("u-> not match.\n");
1530 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1531 expected, sizeof(expected), "iso-2022-jp", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1532 log_err("u-> not match.\n");
1533
1534 if(!testConvertToU(expected1, sizeof(expected1),
1535 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-jp",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1536 offsets1, TRUE))
1537 log_err("iso-2022-jp -> did not match.\n");
1538
1539 }
1540 log_verbose("Testing Reset for ISO-2022-cn\n");
1541 {
1542 static const UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
1543
1544 static const uint8_t expected[] = {
1545 0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
1546 0x36, 0x21,
1547 0x0f, 0x31,
1548 0x1A,
1549 0x0f, 0x32
1550 };
1551
1552
1553 static const int32_t offsets[] = {
1554 0, 0, 0, 0, 0, 0, 0,
1555 1, 1,
1556 2, 2,
1557 3,
1558 5, 5, };
1559
1560 UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032};
1561 static const uint8_t expected1[] = {
1562 0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
1563 0x36, 0x21,
1564 0x1B, 0x24, 0x29, 0x47, 0x1B, 0x4E, 0x24, 0x22,
1565 0x0f, 0x1A,
1566 0x32
1567 };
1568 static const int32_t offsets1[] = { 5,7,15,18,19};
1569
1570 /*iso-2022-CN*/
1571 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1572 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
1573 log_err("u-> not match.\n");
1574 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1575 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1576 log_err("u-> not match.\n");
1577
1578 if(!testConvertToU(expected1, sizeof(expected1),
1579 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-cn",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1580 offsets1, TRUE))
1581 log_err("iso-2022-cn -> did not match.\n");
1582 }
1583 log_verbose("Testing Reset for ISO-2022-kr\n");
1584 {
1585 UChar sampleText[] = { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
1586
1587 static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43,
1588 0x0E, 0x6C, 0x69,
1589 0x0f, 0x1A,
1590 0x0e, 0x6F, 0x4B,
1591 0x0F, 0x31,
1592 0x1A,
1593 0x32 };
1594
1595 static const int32_t offsets[] = {-1, -1, -1, -1,
1596 0, 0, 0,
1597 1, 1,
1598 3, 3, 3,
1599 4, 4,
1600 5,
1601 7,
1602 };
1603 static const UChar sampleText1[] = { 0x4e00,0x0041, 0x04e01, 0x0031, 0x0042, 0x0032};
1604
1605 static const uint8_t expected1[] = {0x1B, 0x24, 0x29, 0x43,
1606 0x0E, 0x6C, 0x69,
1607 0x0f, 0x41,
1608 0x0e, 0x6F, 0x4B,
1609 0x0F, 0x31,
1610 0x42,
1611 0x32 };
1612
1613 static const int32_t offsets1[] = {
1614 5, 8, 10,
1615 13, 14, 15
1616
1617 };
1618 /*iso-2022-kr*/
1619 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1620 expected, sizeof(expected), "iso-2022-kr", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
1621 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
1622 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1623 expected, sizeof(expected), "iso-2022-kr", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1624 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
1625 if(!testConvertToU(expected1, sizeof(expected1),
1626 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-kr",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1627 offsets1, TRUE))
1628 log_err("iso-2022-kr -> did not match.\n");
1629 }
1630 log_verbose("Testing Reset for HZ\n");
1631 {
1632 static const UChar sampleText[] = { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
1633
1634 static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B,
1635 0x7E, 0x7D, 0x1A,
1636 0x7E, 0x7B, 0x36, 0x21,
1637 0x7E, 0x7D, 0x31,
1638 0x1A,
1639 0x32 };
1640
1641
1642 static const int32_t offsets[] = {0,0,0,0,
1643 1,1,1,
1644 3,3,3,3,
1645 4,4,4,
1646 5,
1647 7,};
1648 static const UChar sampleText1[] = { 0x4e00, 0x0035, 0x04e01, 0x0031, 0x0041, 0x0032};
1649
1650 static const uint8_t expected1[] = {0x7E, 0x7B, 0x52, 0x3B,
1651 0x7E, 0x7D, 0x35,
1652 0x7E, 0x7B, 0x36, 0x21,
1653 0x7E, 0x7D, 0x31,
1654 0x41,
1655 0x32 };
1656
1657
1658 static const int32_t offsets1[] = {2,6,9,13,14,15
1659 };
1660
1661 /*hz*/
1662 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1663 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE))
1664 log_err("u-> not match.\n");
1665 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1666 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1667 log_err("u-> not match.\n");
1668 if(!testConvertToU(expected1, sizeof(expected1),
1669 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "hz",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1670 offsets1, TRUE))
1671 log_err("hz -> did not match.\n");
1672 }
1673 /*UTF-8*/
1674 log_verbose("Testing for UTF8\n");
1675 {
1676 static const UChar sampleText[] = { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032};
1677 int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02,
1678 0x03, 0x03, 0x03, 0x04, 0x04, 0x04,
1679 0x04, 0x06 };
1680 static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31,
1681 0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32};
1682
1683
1684 static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D };
1685 /*UTF-8*/
1686 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1687 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1688 log_err("u-> UTF8 with offsets and flush true did not match.\n");
1689 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1690 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE))
1691 log_err("u-> UTF8 with offsets and flush true did not match.\n");
1692 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1693 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1694 log_err("u-> UTF8 with offsets and flush true did not match.\n");
1695 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1696 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE))
1697 log_err("u-> UTF8 with offsets and flush true did not match.\n");
1698 if(!testConvertToU(expected, sizeof(expected),
1699 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
1700 log_err("UTF8 -> did not match.\n");
1701 if(!testConvertToU(expected, sizeof(expected),
1702 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
1703 log_err("UTF8 -> did not match.\n");
1704 if(!testConvertToU(expected, sizeof(expected),
1705 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE))
1706 log_err("UTF8 -> did not match.\n");
1707 if(!testConvertToU(expected, sizeof(expected),
1708 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE))
1709 log_err("UTF8 -> did not match.\n");
1710
1711 }
1712
1713 }
1714
1715 /* Test that U_TRUNCATED_CHAR_FOUND is set. */
1716 static void
1717 doTestTruncated(const char *cnvName, const uint8_t *bytes, int32_t length) {
1718 UConverter *cnv;
1719
1720 UChar buffer[2];
1721 UChar *target, *targetLimit;
1722 const char *source, *sourceLimit;
1723
1724 UErrorCode errorCode;
1725
1726 errorCode=U_ZERO_ERROR;
1727 cnv=ucnv_open(cnvName, &errorCode);
1728 if(U_FAILURE(errorCode)) {
1729 log_data_err("error TestTruncated: unable to open \"%s\" - %s\n", cnvName, u_errorName(errorCode));
1730 return;
1731 }
1732
1733 source=(const char *)bytes;
1734 sourceLimit=source+length;
1735 target=buffer;
1736 targetLimit=buffer+LENGTHOF(buffer);
1737
1738 /* 1. input bytes with flush=FALSE, then input nothing with flush=TRUE */
1739 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, FALSE, &errorCode);
1740 if(U_FAILURE(errorCode) || source!=sourceLimit || target!=buffer) {
1741 log_err("error TestTruncated(%s, 1a): input bytes[%d], flush=FALSE: %s, input left %d, output %d\n",
1742 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer));
1743 }
1744
1745 errorCode=U_ZERO_ERROR;
1746 source=sourceLimit;
1747 target=buffer;
1748 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode);
1749 if(errorCode!=U_TRUNCATED_CHAR_FOUND || target!=buffer) {
1750 log_err("error TestTruncated(%s, 1b): no input, flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), output %d\n",
1751 cnvName, u_errorName(errorCode), (int)(target-buffer));
1752 }
1753
1754 /*
1755 * ### TODO: flush=TRUE resets; make sure this is well documented; question -
1756 * does it also delete ucnv_getInvalidChars()?
1757 * resetting logically should delete them, but then it is not possible to figure out which bytes are left in the converter.
1758 */
1759
1760 /* 2. input bytes with flush=TRUE */
1761 ucnv_resetToUnicode(cnv);
1762
1763 errorCode=U_ZERO_ERROR;
1764 source=(const char *)bytes;
1765 target=buffer;
1766 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode);
1767 if(errorCode!=U_TRUNCATED_CHAR_FOUND || target!=buffer) {
1768 log_err("error TestTruncated(%s, 2): input bytes[%d], flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), input left %d, output %d\n",
1769 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer));
1770 }
1771
1772
1773 ucnv_close(cnv);
1774 }
1775
1776 static void
1777 TestTruncated() {
1778 static const struct {
1779 const char *cnvName;
1780 uint8_t bytes[8]; /* partial input bytes resulting in no output */
1781 int32_t length;
1782 } testCases[]={
1783 { "IMAP-mailbox-name", { 0x26 }, 1 }, /* & */
1784 { "IMAP-mailbox-name", { 0x26, 0x42 }, 2 }, /* &B */
1785 { "IMAP-mailbox-name", { 0x26, 0x42, 0x42 }, 3 }, /* &BB */
1786 { "IMAP-mailbox-name", { 0x26, 0x41, 0x41 }, 3 }, /* &AA */
1787
1788 { "UTF-7", { 0x2b, 0x42 }, 2 }, /* +B */
1789 { "UTF-8", { 0xd1 }, 1 },
1790
1791 { "UTF-16BE", { 0x4e }, 1 },
1792 { "UTF-16LE", { 0x4e }, 1 },
1793 { "UTF-16", { 0x4e }, 1 },
1794 { "UTF-16", { 0xff }, 1 },
1795 { "UTF-16", { 0xfe, 0xff, 0x4e }, 3 },
1796
1797 { "UTF-32BE", { 0, 0, 0x4e }, 3 },
1798 { "UTF-32LE", { 0x4e }, 1 },
1799 { "UTF-32", { 0, 0, 0x4e }, 3 },
1800 { "UTF-32", { 0xff }, 1 },
1801 { "UTF-32", { 0, 0, 0xfe, 0xff, 0 }, 5 },
1802
1803 { "SCSU", { 0x0e, 0x4e }, 2 }, /* SQU 0x4e */
1804 { "BOCU-1", { 0xd5 }, 1 },
1805
1806 { "Shift-JIS", { 0xe0 }, 1 },
1807
1808 { "ibm-939", { 0x0e, 0x41 }, 2 } /* SO 0x41 */
1809 };
1810 int32_t i;
1811
1812 for(i=0; i<LENGTHOF(testCases); ++i) {
1813 doTestTruncated(testCases[i].cnvName, testCases[i].bytes, testCases[i].length);
1814 }
1815 }
1816
1817 typedef struct NameRange {
1818 const char *name;
1819 UChar32 start, end, start2, end2, notStart, notEnd;
1820 } NameRange;
1821
1822 static void
1823 TestUnicodeSet() {
1824 UErrorCode errorCode;
1825 UConverter *cnv;
1826 USet *set;
1827 const char *name;
1828 int32_t i, count;
1829
1830 static const char *const completeSetNames[]={
1831 "UTF-7",
1832 "UTF-8",
1833 "UTF-16",
1834 "UTF-16BE",
1835 "UTF-16LE",
1836 "UTF-32",
1837 "UTF-32BE",
1838 "UTF-32LE",
1839 "SCSU",
1840 "BOCU-1",
1841 "CESU-8",
1842 "gb18030",
1843 "IMAP-mailbox-name",
1844 "LMBCS-1",
1845 "LMBCS-2",
1846 "LMBCS-3",
1847 "LMBCS-4",
1848 "LMBCS-5",
1849 "LMBCS-6",
1850 "LMBCS-8",
1851 "LMBCS-11",
1852 "LMBCS-16",
1853 "LMBCS-17",
1854 "LMBCS-18",
1855 "LMBCS-19"
1856 };
1857
1858 static const NameRange nameRanges[]={
1859 { "US-ASCII", 0, 0x7f, -1, -1, 0x80, 0x10ffff },
1860 { "ibm-367", 0, 0x7f, -1, -1, 0x80, 0x10ffff },
1861 { "ISO-8859-1", 0, 0x7f, -1, -1, 0x100, 0x10ffff },
1862 { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff },
1863 { "windows-1251", 0, 0x7f, 0x410, 0x44f, 0x3000, 0xd7ff },
1864 { "HZ", 0x410, 0x44f, 0x4e00, 0x4eff, 0xac00, 0xd7ff },
1865 { "shift-jis", 0x3041, 0x3093, 0x30a1, 0x30f3, 0x900, 0x1cff }
1866 };
1867
1868 /* open an empty set */
1869 set=uset_open(1, 0);
1870
1871 count=ucnv_countAvailable();
1872 for(i=0; i<count; ++i) {
1873 errorCode=U_ZERO_ERROR;
1874 name=ucnv_getAvailableName(i);
1875 cnv=ucnv_open(name, &errorCode);
1876 if(U_FAILURE(errorCode)) {
1877 log_err("error: unable to open converter %s - %s\n",
1878 name, u_errorName(errorCode));
1879 continue;
1880 }
1881
1882 uset_clear(set);
1883 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
1884 if(U_FAILURE(errorCode)) {
1885 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
1886 name, u_errorName(errorCode));
1887 } else if(uset_size(set)==0) {
1888 log_err("error: ucnv_getUnicodeSet(%s) returns an empty set\n", name);
1889 }
1890
1891 ucnv_close(cnv);
1892 }
1893
1894 /* test converters that are known to convert all of Unicode (except maybe for surrogates) */
1895 for(i=0; i<LENGTHOF(completeSetNames); ++i) {
1896 errorCode=U_ZERO_ERROR;
1897 name=completeSetNames[i];
1898 cnv=ucnv_open(name, &errorCode);
1899 if(U_FAILURE(errorCode)) {
1900 log_err("error: unable to open converter %s - %s\n",
1901 name, u_errorName(errorCode));
1902 continue;
1903 }
1904
1905 uset_clear(set);
1906 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
1907 if(U_FAILURE(errorCode)) {
1908 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
1909 name, u_errorName(errorCode));
1910 } else if(!uset_containsRange(set, 0, 0xd7ff) || !uset_containsRange(set, 0xe000, 0x10ffff)) {
1911 log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set\n", name);
1912 }
1913
1914 ucnv_close(cnv);
1915 }
1916
1917 /* test specific sets */
1918 for(i=0; i<LENGTHOF(nameRanges); ++i) {
1919 errorCode=U_ZERO_ERROR;
1920 name=nameRanges[i].name;
1921 cnv=ucnv_open(name, &errorCode);
1922 if(U_FAILURE(errorCode)) {
1923 log_data_err("error: unable to open converter %s - %s\n",
1924 name, u_errorName(errorCode));
1925 continue;
1926 }
1927
1928 uset_clear(set);
1929 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
1930 if(U_FAILURE(errorCode)) {
1931 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
1932 name, u_errorName(errorCode));
1933 } else if(
1934 !uset_containsRange(set, nameRanges[i].start, nameRanges[i].end) ||
1935 (nameRanges[i].start2>=0 && !uset_containsRange(set, nameRanges[i].start2, nameRanges[i].end2))
1936 ) {
1937 log_err("error: ucnv_getUnicodeSet(%s) does not contain the expected ranges\n", name);
1938 } else if(nameRanges[i].notStart>=0) {
1939 /* simulate containsAny() with the C API */
1940 uset_complement(set);
1941 if(!uset_containsRange(set, nameRanges[i].notStart, nameRanges[i].notEnd)) {
1942 log_err("error: ucnv_getUnicodeSet(%s) contains part of the unexpected range\n", name);
1943 }
1944 }
1945
1946 ucnv_close(cnv);
1947 }
1948
1949 uset_close(set);
1950 }