]>
Commit | Line | Data |
---|---|---|
1 | // © 2016 and later: Unicode, Inc. and others. | |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | /******************************************************************** | |
4 | * COPYRIGHT: | |
5 | * Copyright (c) 1997-2016, International Business Machines Corporation and | |
6 | * others. All Rights Reserved. | |
7 | ********************************************************************/ | |
8 | /******************************************************************************* | |
9 | * | |
10 | * File nucnvtst.c | |
11 | * | |
12 | * Modification History: | |
13 | * Name Description | |
14 | * Steven R. Loomis 7/8/1999 Adding input buffer test | |
15 | ******************************************************************************** | |
16 | */ | |
17 | #include <stdio.h> | |
18 | #include "cstring.h" | |
19 | #include "unicode/uloc.h" | |
20 | #include "unicode/ucnv.h" | |
21 | #include "unicode/ucnv_err.h" | |
22 | #include "unicode/ucnv_cb.h" | |
23 | #include "cintltst.h" | |
24 | #include "unicode/utypes.h" | |
25 | #include "unicode/ustring.h" | |
26 | #include "unicode/ucol.h" | |
27 | #include "unicode/utf16.h" | |
28 | #include "cmemory.h" | |
29 | #include "nucnvtst.h" | |
30 | ||
31 | static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message); | |
32 | static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message); | |
33 | #if !UCONFIG_NO_COLLATION | |
34 | static void TestJitterbug981(void); | |
35 | #endif | |
36 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
37 | static void TestJitterbug1293(void); | |
38 | #endif | |
39 | static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ; | |
40 | static void TestConverterTypesAndStarters(void); | |
41 | static void TestAmbiguous(void); | |
42 | static void TestSignatureDetection(void); | |
43 | static void TestUTF7(void); | |
44 | static void TestIMAP(void); | |
45 | static void TestUTF8(void); | |
46 | static void TestCESU8(void); | |
47 | static void TestUTF16(void); | |
48 | static void TestUTF16BE(void); | |
49 | static void TestUTF16LE(void); | |
50 | static void TestUTF32(void); | |
51 | static void TestUTF32BE(void); | |
52 | static void TestUTF32LE(void); | |
53 | static void TestLATIN1(void); | |
54 | ||
55 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
56 | static void TestSBCS(void); | |
57 | static void TestDBCS(void); | |
58 | static void TestMBCS(void); | |
59 | #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO | |
60 | static void TestICCRunout(void); | |
61 | #endif | |
62 | ||
63 | #ifdef U_ENABLE_GENERIC_ISO_2022 | |
64 | static void TestISO_2022(void); | |
65 | #endif | |
66 | ||
67 | static void TestISO_2022_JP(void); | |
68 | static void TestISO_2022_JP_1(void); | |
69 | static void TestISO_2022_JP_2(void); | |
70 | static void TestISO_2022_KR(void); | |
71 | static void TestISO_2022_KR_1(void); | |
72 | static void TestISO_2022_CN(void); | |
73 | #if 0 | |
74 | /* | |
75 | * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 | |
76 | */ | |
77 | static void TestISO_2022_CN_EXT(void); | |
78 | #endif | |
79 | static void TestJIS(void); | |
80 | static void TestHZ(void); | |
81 | #endif | |
82 | ||
83 | static void TestSCSU(void); | |
84 | ||
85 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
86 | static void TestEBCDIC_STATEFUL(void); | |
87 | static void TestGB18030(void); | |
88 | static void TestLMBCS(void); | |
89 | static void TestJitterbug255(void); | |
90 | static void TestEBCDICUS4XML(void); | |
91 | #if 0 | |
92 | /* | |
93 | * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 | |
94 | */ | |
95 | static void TestJitterbug915(void); | |
96 | #endif | |
97 | static void TestISCII(void); | |
98 | ||
99 | static void TestCoverageMBCS(void); | |
100 | static void TestJitterbug2346(void); | |
101 | static void TestJitterbug2411(void); | |
102 | static void TestJB5275(void); | |
103 | static void TestJB5275_1(void); | |
104 | static void TestJitterbug6175(void); | |
105 | ||
106 | static void TestIsFixedWidth(void); | |
107 | #endif | |
108 | ||
109 | static void TestInBufSizes(void); | |
110 | ||
111 | static void TestRoundTrippingAllUTF(void); | |
112 | static void TestConv(const uint16_t in[], | |
113 | int len, | |
114 | const char* conv, | |
115 | const char* lang, | |
116 | char byteArr[], | |
117 | int byteArrLen); | |
118 | ||
119 | /* open a converter, using test data if it begins with '@' */ | |
120 | static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err); | |
121 | ||
122 | ||
123 | #define NEW_MAX_BUFFER 999 | |
124 | ||
125 | static int32_t gInBufferSize = NEW_MAX_BUFFER; | |
126 | static int32_t gOutBufferSize = NEW_MAX_BUFFER; | |
127 | static char gNuConvTestName[1024]; | |
128 | ||
129 | #define nct_min(x,y) ((x<y) ? x : y) | |
130 | ||
131 | static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err) | |
132 | { | |
133 | if(cnv && cnv[0] == '@') { | |
134 | return ucnv_openPackage(loadTestData(err), cnv+1, err); | |
135 | } else { | |
136 | return ucnv_open(cnv, err); | |
137 | } | |
138 | } | |
139 | ||
140 | static void printSeq(const unsigned char* a, int len) | |
141 | { | |
142 | int i=0; | |
143 | log_verbose("{"); | |
144 | while (i<len) | |
145 | log_verbose("0x%02x ", a[i++]); | |
146 | log_verbose("}\n"); | |
147 | } | |
148 | ||
149 | static void printUSeq(const UChar* a, int len) | |
150 | { | |
151 | int i=0; | |
152 | log_verbose("{U+"); | |
153 | while (i<len) log_verbose("0x%04x ", a[i++]); | |
154 | log_verbose("}\n"); | |
155 | } | |
156 | ||
157 | static void printSeqErr(const unsigned char* a, int len) | |
158 | { | |
159 | int i=0; | |
160 | fprintf(stderr, "{"); | |
161 | while (i<len) | |
162 | fprintf(stderr, "0x%02x ", a[i++]); | |
163 | fprintf(stderr, "}\n"); | |
164 | } | |
165 | ||
166 | static void printUSeqErr(const UChar* a, int len) | |
167 | { | |
168 | int i=0; | |
169 | fprintf(stderr, "{U+"); | |
170 | while (i<len) | |
171 | fprintf(stderr, "0x%04x ", a[i++]); | |
172 | fprintf(stderr,"}\n"); | |
173 | } | |
174 | ||
175 | static void | |
176 | TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message) | |
177 | { | |
178 | const char* s0; | |
179 | const char* s=(char*)source; | |
180 | const int32_t *r=results; | |
181 | UErrorCode errorCode=U_ZERO_ERROR; | |
182 | UChar32 c; | |
183 | ||
184 | while(s<limit) { | |
185 | s0=s; | |
186 | c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); | |
187 | if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { | |
188 | break; /* no more significant input */ | |
189 | } else if(U_FAILURE(errorCode)) { | |
190 | log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode)); | |
191 | break; | |
192 | } else if( | |
193 | /* test the expected number of input bytes only if >=0 */ | |
194 | (*r>=0 && (int32_t)(s-s0)!=*r) || | |
195 | c!=*(r+1) | |
196 | ) { | |
197 | log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n", | |
198 | message, c, (s-s0), *(r+1), *r); | |
199 | break; | |
200 | } | |
201 | r+=2; | |
202 | } | |
203 | } | |
204 | ||
205 | static void | |
206 | TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message) | |
207 | { | |
208 | const char* s=(char*)source; | |
209 | UErrorCode errorCode=U_ZERO_ERROR; | |
210 | uint32_t c; | |
211 | c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); | |
212 | if(errorCode != expected){ | |
213 | log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode)); | |
214 | } | |
215 | if(c != 0xFFFD && c != 0xffff){ | |
216 | log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c); | |
217 | } | |
218 | ||
219 | } | |
220 | ||
221 | static void TestInBufSizes(void) | |
222 | { | |
223 | TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1); | |
224 | #if 1 | |
225 | TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2); | |
226 | TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3); | |
227 | TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4); | |
228 | TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5); | |
229 | TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6); | |
230 | TestNewConvertWithBufferSizes(1,1); | |
231 | TestNewConvertWithBufferSizes(2,3); | |
232 | TestNewConvertWithBufferSizes(3,2); | |
233 | #endif | |
234 | } | |
235 | ||
236 | static void TestOutBufSizes(void) | |
237 | { | |
238 | #if 1 | |
239 | TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER); | |
240 | TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER); | |
241 | TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER); | |
242 | TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER); | |
243 | TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER); | |
244 | TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER); | |
245 | ||
246 | #endif | |
247 | } | |
248 | ||
249 | ||
250 | void addTestNewConvert(TestNode** root) | |
251 | { | |
252 | #if !UCONFIG_NO_FILE_IO | |
253 | addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes"); | |
254 | addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes"); | |
255 | #endif | |
256 | addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters"); | |
257 | addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous"); | |
258 | addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection"); | |
259 | addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7"); | |
260 | addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP"); | |
261 | addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8"); | |
262 | ||
263 | /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */ | |
264 | addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8"); | |
265 | addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16"); | |
266 | addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE"); | |
267 | addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE"); | |
268 | addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32"); | |
269 | addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE"); | |
270 | addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE"); | |
271 | ||
272 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
273 | addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS"); | |
274 | #endif | |
275 | ||
276 | addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1"); | |
277 | ||
278 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
279 | addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS"); | |
280 | #if !UCONFIG_NO_FILE_IO | |
281 | addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS"); | |
282 | addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout"); | |
283 | #endif | |
284 | addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS"); | |
285 | ||
286 | #ifdef U_ENABLE_GENERIC_ISO_2022 | |
287 | addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022"); | |
288 | #endif | |
289 | ||
290 | addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP"); | |
291 | addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS"); | |
292 | addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1"); | |
293 | addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2"); | |
294 | addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR"); | |
295 | addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1"); | |
296 | addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN"); | |
297 | /* | |
298 | * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 | |
299 | addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT"); | |
300 | addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915"); | |
301 | */ | |
302 | addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ"); | |
303 | #endif | |
304 | ||
305 | addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU"); | |
306 | ||
307 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
308 | addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL"); | |
309 | addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030"); | |
310 | addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255"); | |
311 | addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML"); | |
312 | addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII"); | |
313 | addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275"); | |
314 | addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1"); | |
315 | #if !UCONFIG_NO_COLLATION | |
316 | addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981"); | |
317 | #endif | |
318 | ||
319 | addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293"); | |
320 | #endif | |
321 | ||
322 | ||
323 | #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO | |
324 | addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS"); | |
325 | #endif | |
326 | ||
327 | addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF"); | |
328 | ||
329 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
330 | addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346"); | |
331 | addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411"); | |
332 | addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175"); | |
333 | ||
334 | addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth"); | |
335 | #endif | |
336 | } | |
337 | ||
338 | ||
339 | /* Note that this test already makes use of statics, so it's not really | |
340 | multithread safe. | |
341 | This convenience function lets us make the error messages actually useful. | |
342 | */ | |
343 | ||
344 | static void setNuConvTestName(const char *codepage, const char *direction) | |
345 | { | |
346 | sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", | |
347 | codepage, | |
348 | direction, | |
349 | (int)gInBufferSize, | |
350 | (int)gOutBufferSize); | |
351 | } | |
352 | ||
353 | typedef enum | |
354 | { | |
355 | TC_OK = 0, /* test was OK */ | |
356 | TC_MISMATCH = 1, /* Match failed - err was printed */ | |
357 | TC_FAIL = 2 /* Test failed, don't print an err because it was already printed. */ | |
358 | } ETestConvertResult; | |
359 | ||
360 | /* Note: This function uses global variables and it will not do offset | |
361 | checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ | |
362 | static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, | |
363 | const char *codepage, const int32_t *expectOffsets , UBool useFallback) | |
364 | { | |
365 | UErrorCode status = U_ZERO_ERROR; | |
366 | UConverter *conv = 0; | |
367 | char junkout[NEW_MAX_BUFFER]; /* FIX */ | |
368 | int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ | |
369 | char *p; | |
370 | const UChar *src; | |
371 | char *end; | |
372 | char *targ; | |
373 | int32_t *offs; | |
374 | int i; | |
375 | int32_t realBufferSize; | |
376 | char *realBufferEnd; | |
377 | const UChar *realSourceEnd; | |
378 | const UChar *sourceLimit; | |
379 | UBool checkOffsets = TRUE; | |
380 | UBool doFlush; | |
381 | ||
382 | for(i=0;i<NEW_MAX_BUFFER;i++) | |
383 | junkout[i] = (char)0xF0; | |
384 | for(i=0;i<NEW_MAX_BUFFER;i++) | |
385 | junokout[i] = 0xFF; | |
386 | ||
387 | setNuConvTestName(codepage, "FROM"); | |
388 | ||
389 | log_verbose("\n========= %s\n", gNuConvTestName); | |
390 | ||
391 | conv = my_ucnv_open(codepage, &status); | |
392 | ||
393 | if(U_FAILURE(status)) | |
394 | { | |
395 | log_data_err("Couldn't open converter %s\n",codepage); | |
396 | return TC_FAIL; | |
397 | } | |
398 | if(useFallback){ | |
399 | ucnv_setFallback(conv,useFallback); | |
400 | } | |
401 | ||
402 | log_verbose("Converter opened..\n"); | |
403 | ||
404 | src = source; | |
405 | targ = junkout; | |
406 | offs = junokout; | |
407 | ||
408 | realBufferSize = UPRV_LENGTHOF(junkout); | |
409 | realBufferEnd = junkout + realBufferSize; | |
410 | realSourceEnd = source + sourceLen; | |
411 | ||
412 | if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) | |
413 | checkOffsets = FALSE; | |
414 | ||
415 | do | |
416 | { | |
417 | end = nct_min(targ + gOutBufferSize, realBufferEnd); | |
418 | sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); | |
419 | ||
420 | doFlush = (UBool)(sourceLimit == realSourceEnd); | |
421 | ||
422 | if(targ == realBufferEnd) { | |
423 | log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); | |
424 | return TC_FAIL; | |
425 | } | |
426 | log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); | |
427 | ||
428 | ||
429 | status = U_ZERO_ERROR; | |
430 | ||
431 | ucnv_fromUnicode (conv, | |
432 | &targ, | |
433 | end, | |
434 | &src, | |
435 | sourceLimit, | |
436 | checkOffsets ? offs : NULL, | |
437 | doFlush, /* flush if we're at the end of the input data */ | |
438 | &status); | |
439 | } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) ); | |
440 | ||
441 | if(U_FAILURE(status)) { | |
442 | log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); | |
443 | return TC_FAIL; | |
444 | } | |
445 | ||
446 | log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", | |
447 | sourceLen, targ-junkout); | |
448 | ||
449 | if(getTestOption(VERBOSITY_OPTION)) | |
450 | { | |
451 | char junk[9999]; | |
452 | char offset_str[9999]; | |
453 | char *ptr; | |
454 | ||
455 | junk[0] = 0; | |
456 | offset_str[0] = 0; | |
457 | for(ptr = junkout;ptr<targ;ptr++) { | |
458 | sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr)); | |
459 | sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout])); | |
460 | } | |
461 | ||
462 | log_verbose(junk); | |
463 | printSeq((const uint8_t *)expect, expectLen); | |
464 | if ( checkOffsets ) { | |
465 | log_verbose("\nOffsets:"); | |
466 | log_verbose(offset_str); | |
467 | } | |
468 | log_verbose("\n"); | |
469 | } | |
470 | ucnv_close(conv); | |
471 | ||
472 | if(expectLen != targ-junkout) { | |
473 | log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); | |
474 | log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); | |
475 | fprintf(stderr, "Got:\n"); | |
476 | printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); | |
477 | fprintf(stderr, "Expected:\n"); | |
478 | printSeqErr((const unsigned char*)expect, expectLen); | |
479 | return TC_MISMATCH; | |
480 | } | |
481 | ||
482 | if (checkOffsets && (expectOffsets != 0) ) { | |
483 | log_verbose("comparing %d offsets..\n", targ-junkout); | |
484 | if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ | |
485 | log_err("did not get the expected offsets. %s\n", gNuConvTestName); | |
486 | printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); | |
487 | log_err("\n"); | |
488 | log_err("Got : "); | |
489 | for(p=junkout;p<targ;p++) { | |
490 | log_err("%d,", junokout[p-junkout]); | |
491 | } | |
492 | log_err("\n"); | |
493 | log_err("Expected: "); | |
494 | for(i=0; i<(targ-junkout); i++) { | |
495 | log_err("%d,", expectOffsets[i]); | |
496 | } | |
497 | log_err("\n"); | |
498 | } | |
499 | } | |
500 | ||
501 | log_verbose("comparing..\n"); | |
502 | if(!memcmp(junkout, expect, expectLen)) { | |
503 | log_verbose("Matches!\n"); | |
504 | return TC_OK; | |
505 | } else { | |
506 | log_err("String does not match u->%s\n", gNuConvTestName); | |
507 | printUSeqErr(source, sourceLen); | |
508 | fprintf(stderr, "Got:\n"); | |
509 | printSeqErr((const unsigned char *)junkout, expectLen); | |
510 | fprintf(stderr, "Expected:\n"); | |
511 | printSeqErr((const unsigned char *)expect, expectLen); | |
512 | ||
513 | return TC_MISMATCH; | |
514 | } | |
515 | } | |
516 | ||
517 | /* Note: This function uses global variables and it will not do offset | |
518 | checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ | |
519 | static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, | |
520 | const char *codepage, const int32_t *expectOffsets, UBool useFallback) | |
521 | { | |
522 | UErrorCode status = U_ZERO_ERROR; | |
523 | UConverter *conv = 0; | |
524 | UChar junkout[NEW_MAX_BUFFER]; /* FIX */ | |
525 | int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ | |
526 | const char *src; | |
527 | const char *realSourceEnd; | |
528 | const char *srcLimit; | |
529 | UChar *p; | |
530 | UChar *targ; | |
531 | UChar *end; | |
532 | int32_t *offs; | |
533 | int i; | |
534 | UBool checkOffsets = TRUE; | |
535 | ||
536 | int32_t realBufferSize; | |
537 | UChar *realBufferEnd; | |
538 | ||
539 | ||
540 | for(i=0;i<NEW_MAX_BUFFER;i++) | |
541 | junkout[i] = 0xFFFE; | |
542 | ||
543 | for(i=0;i<NEW_MAX_BUFFER;i++) | |
544 | junokout[i] = -1; | |
545 | ||
546 | setNuConvTestName(codepage, "TO"); | |
547 | ||
548 | log_verbose("\n========= %s\n", gNuConvTestName); | |
549 | ||
550 | conv = my_ucnv_open(codepage, &status); | |
551 | ||
552 | if(U_FAILURE(status)) | |
553 | { | |
554 | log_data_err("Couldn't open converter %s\n",gNuConvTestName); | |
555 | return TC_FAIL; | |
556 | } | |
557 | if(useFallback){ | |
558 | ucnv_setFallback(conv,useFallback); | |
559 | } | |
560 | log_verbose("Converter opened..\n"); | |
561 | ||
562 | src = (const char *)source; | |
563 | targ = junkout; | |
564 | offs = junokout; | |
565 | ||
566 | realBufferSize = UPRV_LENGTHOF(junkout); | |
567 | realBufferEnd = junkout + realBufferSize; | |
568 | realSourceEnd = src + sourcelen; | |
569 | ||
570 | if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) | |
571 | checkOffsets = FALSE; | |
572 | ||
573 | do | |
574 | { | |
575 | end = nct_min( targ + gOutBufferSize, realBufferEnd); | |
576 | srcLimit = nct_min(realSourceEnd, src + gInBufferSize); | |
577 | ||
578 | if(targ == realBufferEnd) | |
579 | { | |
580 | log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName); | |
581 | return TC_FAIL; | |
582 | } | |
583 | log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); | |
584 | ||
585 | /* oldTarg = targ; */ | |
586 | ||
587 | status = U_ZERO_ERROR; | |
588 | ||
589 | ucnv_toUnicode (conv, | |
590 | &targ, | |
591 | end, | |
592 | &src, | |
593 | srcLimit, | |
594 | checkOffsets ? offs : NULL, | |
595 | (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */ | |
596 | &status); | |
597 | ||
598 | /* offs += (targ-oldTarg); */ | |
599 | ||
600 | } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ | |
601 | ||
602 | if(U_FAILURE(status)) | |
603 | { | |
604 | log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); | |
605 | return TC_FAIL; | |
606 | } | |
607 | ||
608 | log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", | |
609 | sourcelen, targ-junkout); | |
610 | if(getTestOption(VERBOSITY_OPTION)) | |
611 | { | |
612 | char junk[9999]; | |
613 | char offset_str[9999]; | |
614 | UChar *ptr; | |
615 | ||
616 | junk[0] = 0; | |
617 | offset_str[0] = 0; | |
618 | ||
619 | for(ptr = junkout;ptr<targ;ptr++) | |
620 | { | |
621 | sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr); | |
622 | sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]); | |
623 | } | |
624 | ||
625 | log_verbose(junk); | |
626 | printUSeq(expect, expectlen); | |
627 | if ( checkOffsets ) | |
628 | { | |
629 | log_verbose("\nOffsets:"); | |
630 | log_verbose(offset_str); | |
631 | } | |
632 | log_verbose("\n"); | |
633 | } | |
634 | ucnv_close(conv); | |
635 | ||
636 | log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); | |
637 | ||
638 | if (checkOffsets && (expectOffsets != 0)) | |
639 | { | |
640 | if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){ | |
641 | log_err("did not get the expected offsets. %s\n",gNuConvTestName); | |
642 | log_err("Got: "); | |
643 | for(p=junkout;p<targ;p++) { | |
644 | log_err("%d,", junokout[p-junkout]); | |
645 | } | |
646 | log_err("\n"); | |
647 | log_err("Expected: "); | |
648 | for(i=0; i<(targ-junkout); i++) { | |
649 | log_err("%d,", expectOffsets[i]); | |
650 | } | |
651 | log_err("\n"); | |
652 | log_err("output: "); | |
653 | for(i=0; i<(targ-junkout); i++) { | |
654 | log_err("%X,", junkout[i]); | |
655 | } | |
656 | log_err("\n"); | |
657 | log_err("input: "); | |
658 | for(i=0; i<(src-(const char *)source); i++) { | |
659 | log_err("%X,", (unsigned char)source[i]); | |
660 | } | |
661 | log_err("\n"); | |
662 | } | |
663 | } | |
664 | ||
665 | if(!memcmp(junkout, expect, expectlen*2)) | |
666 | { | |
667 | log_verbose("Matches!\n"); | |
668 | return TC_OK; | |
669 | } | |
670 | else | |
671 | { | |
672 | log_err("String does not match. %s\n", gNuConvTestName); | |
673 | log_verbose("String does not match. %s\n", gNuConvTestName); | |
674 | printf("\nGot:"); | |
675 | printUSeqErr(junkout, expectlen); | |
676 | printf("\nExpected:"); | |
677 | printUSeqErr(expect, expectlen); | |
678 | return TC_MISMATCH; | |
679 | } | |
680 | } | |
681 | ||
682 | ||
683 | static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) | |
684 | { | |
685 | /** test chars #1 */ | |
686 | /* 1 2 3 1Han 2Han 3Han . */ | |
687 | static const UChar sampleText[] = | |
688 | { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 }; | |
689 | static const UChar sampleTextRoundTripUnmappable[] = | |
690 | { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd }; | |
691 | ||
692 | ||
693 | static const uint8_t expectedUTF8[] = | |
694 | { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 }; | |
695 | static const int32_t toUTF8Offs[] = | |
696 | { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 }; | |
697 | static const int32_t fmUTF8Offs[] = | |
698 | { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e }; | |
699 | ||
700 | #ifdef U_ENABLE_GENERIC_ISO_2022 | |
701 | /* Same as UTF8, but with ^[%B preceeding */ | |
702 | static const const uint8_t expectedISO2022[] = | |
703 | { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E }; | |
704 | static const int32_t toISO2022Offs[] = | |
705 | { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, | |
706 | 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */ | |
707 | static const int32_t fmISO2022Offs[] = | |
708 | { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */ | |
709 | #endif | |
710 | ||
711 | /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */ | |
712 | static const uint8_t expectedIBM930[] = | |
713 | { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f }; | |
714 | static const int32_t toIBM930Offs[] = | |
715 | { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 }; | |
716 | static const int32_t fmIBM930Offs[] = | |
717 | { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e }; | |
718 | ||
719 | /* 1 2 3 0 h1 h2 h3 . MBCS*/ | |
720 | static const uint8_t expectedIBM943[] = | |
721 | { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc }; | |
722 | static const int32_t toIBM943Offs [] = | |
723 | { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 }; | |
724 | static const int32_t fmIBM943Offs[] = | |
725 | { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b }; | |
726 | ||
727 | /* 1 2 3 0 h1 h2 h3 . DBCS*/ | |
728 | static const uint8_t expectedIBM9027[] = | |
729 | { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe }; | |
730 | static const int32_t toIBM9027Offs [] = | |
731 | { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 }; | |
732 | ||
733 | /* 1 2 3 0 <?> <?> <?> . SBCS*/ | |
734 | static const uint8_t expectedIBM920[] = | |
735 | { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a }; | |
736 | static const int32_t toIBM920Offs [] = | |
737 | { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; | |
738 | ||
739 | /* 1 2 3 0 <?> <?> <?> . SBCS*/ | |
740 | static const uint8_t expectedISO88593[] = | |
741 | { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; | |
742 | static const int32_t toISO88593Offs[] = | |
743 | { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; | |
744 | ||
745 | /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/ | |
746 | static const uint8_t expectedLATIN1[] = | |
747 | { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; | |
748 | static const int32_t toLATIN1Offs[] = | |
749 | { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; | |
750 | ||
751 | ||
752 | /* etc */ | |
753 | static const uint8_t expectedUTF16BE[] = | |
754 | { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 }; | |
755 | static const int32_t toUTF16BEOffs[]= | |
756 | { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; | |
757 | static const int32_t fmUTF16BEOffs[] = | |
758 | { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 }; | |
759 | ||
760 | static const uint8_t expectedUTF16LE[] = | |
761 | { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc }; | |
762 | static const int32_t toUTF16LEOffs[]= | |
763 | { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; | |
764 | static const int32_t fmUTF16LEOffs[] = | |
765 | { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 }; | |
766 | ||
767 | static const uint8_t expectedUTF32BE[] = | |
768 | { 0x00, 0x00, 0x00, 0x31, | |
769 | 0x00, 0x00, 0x00, 0x32, | |
770 | 0x00, 0x00, 0x00, 0x33, | |
771 | 0x00, 0x00, 0x00, 0x00, | |
772 | 0x00, 0x00, 0x4e, 0x00, | |
773 | 0x00, 0x00, 0x4e, 0x8c, | |
774 | 0x00, 0x00, 0x4e, 0x09, | |
775 | 0x00, 0x00, 0x00, 0x2e, | |
776 | 0x00, 0x02, 0x00, 0x21 }; | |
777 | static const int32_t toUTF32BEOffs[]= | |
778 | { 0x00, 0x00, 0x00, 0x00, | |
779 | 0x01, 0x01, 0x01, 0x01, | |
780 | 0x02, 0x02, 0x02, 0x02, | |
781 | 0x03, 0x03, 0x03, 0x03, | |
782 | 0x04, 0x04, 0x04, 0x04, | |
783 | 0x05, 0x05, 0x05, 0x05, | |
784 | 0x06, 0x06, 0x06, 0x06, | |
785 | 0x07, 0x07, 0x07, 0x07, | |
786 | 0x08, 0x08, 0x08, 0x08, | |
787 | 0x08, 0x08, 0x08, 0x08 }; | |
788 | static const int32_t fmUTF32BEOffs[] = | |
789 | { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 }; | |
790 | ||
791 | static const uint8_t expectedUTF32LE[] = | |
792 | { 0x31, 0x00, 0x00, 0x00, | |
793 | 0x32, 0x00, 0x00, 0x00, | |
794 | 0x33, 0x00, 0x00, 0x00, | |
795 | 0x00, 0x00, 0x00, 0x00, | |
796 | 0x00, 0x4e, 0x00, 0x00, | |
797 | 0x8c, 0x4e, 0x00, 0x00, | |
798 | 0x09, 0x4e, 0x00, 0x00, | |
799 | 0x2e, 0x00, 0x00, 0x00, | |
800 | 0x21, 0x00, 0x02, 0x00 }; | |
801 | static const int32_t toUTF32LEOffs[]= | |
802 | { 0x00, 0x00, 0x00, 0x00, | |
803 | 0x01, 0x01, 0x01, 0x01, | |
804 | 0x02, 0x02, 0x02, 0x02, | |
805 | 0x03, 0x03, 0x03, 0x03, | |
806 | 0x04, 0x04, 0x04, 0x04, | |
807 | 0x05, 0x05, 0x05, 0x05, | |
808 | 0x06, 0x06, 0x06, 0x06, | |
809 | 0x07, 0x07, 0x07, 0x07, | |
810 | 0x08, 0x08, 0x08, 0x08, | |
811 | 0x08, 0x08, 0x08, 0x08 }; | |
812 | static const int32_t fmUTF32LEOffs[] = | |
813 | { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 }; | |
814 | ||
815 | ||
816 | ||
817 | ||
818 | /** Test chars #2 **/ | |
819 | ||
820 | /* Sahha [health], slashed h's */ | |
821 | static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 }; | |
822 | static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 }; | |
823 | ||
824 | /* LMBCS */ | |
825 | static const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 }; | |
826 | static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 }; | |
827 | static const int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 }; | |
828 | static const int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008}; | |
829 | /*********************************** START OF CODE finally *************/ | |
830 | ||
831 | gInBufferSize = insize; | |
832 | gOutBufferSize = outsize; | |
833 | ||
834 | log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize); | |
835 | ||
836 | ||
837 | /*UTF-8*/ | |
838 | testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), | |
839 | expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE ); | |
840 | ||
841 | log_verbose("Test surrogate behaviour for UTF8\n"); | |
842 | { | |
843 | static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 }; | |
844 | static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac, | |
845 | 0xf0, 0x90, 0x90, 0x81, | |
846 | 0xef, 0xbf, 0xbd | |
847 | }; | |
848 | static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 }; | |
849 | testConvertFromU(testinput, UPRV_LENGTHOF(testinput), | |
850 | expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE ); | |
851 | ||
852 | ||
853 | } | |
854 | ||
855 | #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) | |
856 | /*ISO-2022*/ | |
857 | testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), | |
858 | expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE ); | |
859 | #endif | |
860 | ||
861 | /*UTF16 LE*/ | |
862 | testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), | |
863 | expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE ); | |
864 | /*UTF16 BE*/ | |
865 | testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), | |
866 | expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE ); | |
867 | /*UTF32 LE*/ | |
868 | testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), | |
869 | expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE ); | |
870 | /*UTF32 BE*/ | |
871 | testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), | |
872 | expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE ); | |
873 | ||
874 | /*LATIN_1*/ | |
875 | testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), | |
876 | expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE ); | |
877 | ||
878 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
879 | /*EBCDIC_STATEFUL*/ | |
880 | testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), | |
881 | expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE ); | |
882 | ||
883 | testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), | |
884 | expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); | |
885 | ||
886 | /*MBCS*/ | |
887 | ||
888 | testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), | |
889 | expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE ); | |
890 | /*DBCS*/ | |
891 | testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), | |
892 | expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE ); | |
893 | /*SBCS*/ | |
894 | testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), | |
895 | expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE ); | |
896 | /*SBCS*/ | |
897 | testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), | |
898 | expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); | |
899 | #endif | |
900 | ||
901 | ||
902 | /****/ | |
903 | ||
904 | /*UTF-8*/ | |
905 | testConvertToU(expectedUTF8, sizeof(expectedUTF8), | |
906 | sampleText, UPRV_LENGTHOF(sampleText), "utf8", fmUTF8Offs,FALSE); | |
907 | #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) | |
908 | /*ISO-2022*/ | |
909 | testConvertToU(expectedISO2022, sizeof(expectedISO2022), | |
910 | sampleText, UPRV_LENGTHOF(sampleText), "ISO_2022", fmISO2022Offs,FALSE); | |
911 | #endif | |
912 | ||
913 | /*UTF16 LE*/ | |
914 | testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), | |
915 | sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,FALSE); | |
916 | /*UTF16 BE*/ | |
917 | testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE), | |
918 | sampleText, UPRV_LENGTHOF(sampleText), "utf-16be", fmUTF16BEOffs,FALSE); | |
919 | /*UTF32 LE*/ | |
920 | testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE), | |
921 | sampleText, UPRV_LENGTHOF(sampleText), "utf-32le", fmUTF32LEOffs,FALSE); | |
922 | /*UTF32 BE*/ | |
923 | testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE), | |
924 | sampleText, UPRV_LENGTHOF(sampleText), "utf-32be", fmUTF32BEOffs,FALSE); | |
925 | ||
926 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
927 | /*EBCDIC_STATEFUL*/ | |
928 | testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable, | |
929 | UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-930", fmIBM930Offs,FALSE); | |
930 | /*MBCS*/ | |
931 | testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable, | |
932 | UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-943", fmIBM943Offs,FALSE); | |
933 | #endif | |
934 | ||
935 | /* Try it again to make sure it still works */ | |
936 | testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), | |
937 | sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,FALSE); | |
938 | ||
939 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
940 | testConvertToU(expectedMaltese913, sizeof(expectedMaltese913), | |
941 | malteseUChars, UPRV_LENGTHOF(malteseUChars), "latin3", NULL,FALSE); | |
942 | ||
943 | testConvertFromU(malteseUChars, UPRV_LENGTHOF(malteseUChars), | |
944 | expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE ); | |
945 | ||
946 | /*LMBCS*/ | |
947 | testConvertFromU(LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars), | |
948 | expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE ); | |
949 | testConvertToU(expectedLMBCS, sizeof(expectedLMBCS), | |
950 | LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars), "LMBCS-1", fmLMBCSOffs,FALSE); | |
951 | #endif | |
952 | ||
953 | /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */ | |
954 | { | |
955 | /* encode directly set D and set O */ | |
956 | static const uint8_t utf7[] = { | |
957 | /* | |
958 | Hi Mom -+Jjo--! | |
959 | A+ImIDkQ. | |
960 | +- | |
961 | +ZeVnLIqe- | |
962 | */ | |
963 | 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, | |
964 | 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, | |
965 | 0x2b, 0x2d, | |
966 | 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d | |
967 | }; | |
968 | static const UChar unicode[] = { | |
969 | /* | |
970 | Hi Mom -<WHITE SMILING FACE>-! | |
971 | A<NOT IDENTICAL TO><ALPHA>. | |
972 | + | |
973 | [Japanese word "nihongo"] | |
974 | */ | |
975 | 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, | |
976 | 0x41, 0x2262, 0x0391, 0x2e, | |
977 | 0x2b, | |
978 | 0x65e5, 0x672c, 0x8a9e | |
979 | }; | |
980 | static const int32_t toUnicodeOffsets[] = { | |
981 | 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, | |
982 | 15, 17, 19, 23, | |
983 | 24, | |
984 | 27, 29, 32 | |
985 | }; | |
986 | static const int32_t fromUnicodeOffsets[] = { | |
987 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, | |
988 | 11, 12, 12, 12, 13, 13, 13, 13, 14, | |
989 | 15, 15, | |
990 | 16, 16, 16, 17, 17, 17, 18, 18, 18, 18 | |
991 | }; | |
992 | ||
993 | /* same but escaping set O (the exclamation mark) */ | |
994 | static const uint8_t utf7Restricted[] = { | |
995 | /* | |
996 | Hi Mom -+Jjo--+ACE- | |
997 | A+ImIDkQ. | |
998 | +- | |
999 | +ZeVnLIqe- | |
1000 | */ | |
1001 | 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d, | |
1002 | 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, | |
1003 | 0x2b, 0x2d, | |
1004 | 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d | |
1005 | }; | |
1006 | static const int32_t toUnicodeOffsetsR[] = { | |
1007 | 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15, | |
1008 | 19, 21, 23, 27, | |
1009 | 28, | |
1010 | 31, 33, 36 | |
1011 | }; | |
1012 | static const int32_t fromUnicodeOffsetsR[] = { | |
1013 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10, | |
1014 | 11, 12, 12, 12, 13, 13, 13, 13, 14, | |
1015 | 15, 15, | |
1016 | 16, 16, 16, 17, 17, 17, 18, 18, 18, 18 | |
1017 | }; | |
1018 | ||
1019 | testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE); | |
1020 | ||
1021 | testConvertToU(utf7, sizeof(utf7), unicode, UPRV_LENGTHOF(unicode), "UTF-7", toUnicodeOffsets,FALSE); | |
1022 | ||
1023 | testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE); | |
1024 | ||
1025 | testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, UPRV_LENGTHOF(unicode), "UTF-7,version=1", toUnicodeOffsetsR,FALSE); | |
1026 | } | |
1027 | ||
1028 | /* | |
1029 | * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152, | |
1030 | * modified according to RFC 2060, | |
1031 | * and supplemented with the one example in RFC 2060 itself. | |
1032 | */ | |
1033 | { | |
1034 | static const uint8_t imap[] = { | |
1035 | /* Hi Mom -&Jjo--! | |
1036 | A&ImIDkQ-. | |
1037 | &- | |
1038 | &ZeVnLIqe- | |
1039 | \ | |
1040 | ~peter | |
1041 | ||
1042 | /&ZeVnLIqe- | |
1043 | /&U,BTFw- | |
1044 | */ | |
1045 | 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, | |
1046 | 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e, | |
1047 | 0x26, 0x2d, | |
1048 | 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, | |
1049 | 0x5c, | |
1050 | 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, | |
1051 | 0x2f, 0x6d, 0x61, 0x69, 0x6c, | |
1052 | 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, | |
1053 | 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d | |
1054 | }; | |
1055 | static const UChar unicode[] = { | |
1056 | /* Hi Mom -<WHITE SMILING FACE>-! | |
1057 | A<NOT IDENTICAL TO><ALPHA>. | |
1058 | & | |
1059 | [Japanese word "nihongo"] | |
1060 | \ | |
1061 | ~peter | |
1062 | ||
1063 | /<65e5, 672c, 8a9e> | |
1064 | /<53f0, 5317> | |
1065 | */ | |
1066 | 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, | |
1067 | 0x41, 0x2262, 0x0391, 0x2e, | |
1068 | 0x26, | |
1069 | 0x65e5, 0x672c, 0x8a9e, | |
1070 | 0x5c, | |
1071 | 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, | |
1072 | 0x2f, 0x6d, 0x61, 0x69, 0x6c, | |
1073 | 0x2f, 0x65e5, 0x672c, 0x8a9e, | |
1074 | 0x2f, 0x53f0, 0x5317 | |
1075 | }; | |
1076 | static const int32_t toUnicodeOffsets[] = { | |
1077 | 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, | |
1078 | 15, 17, 19, 24, | |
1079 | 25, | |
1080 | 28, 30, 33, | |
1081 | 37, | |
1082 | 38, 39, 40, 41, 42, 43, | |
1083 | 44, 45, 46, 47, 48, | |
1084 | 49, 51, 53, 56, | |
1085 | 60, 62, 64 | |
1086 | }; | |
1087 | static const int32_t fromUnicodeOffsets[] = { | |
1088 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, | |
1089 | 11, 12, 12, 12, 13, 13, 13, 13, 13, 14, | |
1090 | 15, 15, | |
1091 | 16, 16, 16, 17, 17, 17, 18, 18, 18, 18, | |
1092 | 19, | |
1093 | 20, 21, 22, 23, 24, 25, | |
1094 | 26, 27, 28, 29, 30, | |
1095 | 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, | |
1096 | 35, 36, 36, 36, 37, 37, 37, 37, 37 | |
1097 | }; | |
1098 | ||
1099 | testConvertFromU(unicode, UPRV_LENGTHOF(unicode), imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE); | |
1100 | ||
1101 | testConvertToU(imap, sizeof(imap), unicode, UPRV_LENGTHOF(unicode), "IMAP-mailbox-name", toUnicodeOffsets,FALSE); | |
1102 | } | |
1103 | ||
1104 | /* Test UTF-8 bad data handling*/ | |
1105 | { | |
1106 | static const uint8_t utf8[]={ | |
1107 | 0x61, | |
1108 | 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ | |
1109 | 0x00, | |
1110 | 0x62, | |
1111 | 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ | |
1112 | 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ | |
1113 | 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */ | |
1114 | 0xdf, 0xbf, /* 7ff */ | |
1115 | 0xbf, /* truncated tail */ | |
1116 | 0xf4, 0x90, 0x80, 0x80, /* 110000 */ | |
1117 | 0x02 | |
1118 | }; | |
1119 | ||
1120 | static const uint16_t utf8Expected[]={ | |
1121 | 0x0061, | |
1122 | 0xfffd, 0xfffd, 0xfffd, 0xfffd, | |
1123 | 0x0000, | |
1124 | 0x0062, | |
1125 | 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, | |
1126 | 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, | |
1127 | 0xdbff, 0xdfff, | |
1128 | 0x07ff, | |
1129 | 0xfffd, | |
1130 | 0xfffd, 0xfffd, 0xfffd, 0xfffd, | |
1131 | 0x0002 | |
1132 | }; | |
1133 | ||
1134 | static const int32_t utf8Offsets[]={ | |
1135 | 0, | |
1136 | 1, 2, 3, 4, | |
1137 | 5, | |
1138 | 6, | |
1139 | 7, 8, 9, 10, 11, | |
1140 | 12, 13, 14, 15, 16, | |
1141 | 17, 17, | |
1142 | 21, | |
1143 | 23, | |
1144 | 24, 25, 26, 27, | |
1145 | 28 | |
1146 | }; | |
1147 | testConvertToU(utf8, sizeof(utf8), | |
1148 | utf8Expected, UPRV_LENGTHOF(utf8Expected), "utf-8", utf8Offsets ,FALSE); | |
1149 | ||
1150 | } | |
1151 | ||
1152 | /* Test UTF-32BE bad data handling*/ | |
1153 | { | |
1154 | static const uint8_t utf32[]={ | |
1155 | 0x00, 0x00, 0x00, 0x61, | |
1156 | 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ | |
1157 | 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ | |
1158 | 0x00, 0x00, 0x00, 0x62, | |
1159 | 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ | |
1160 | 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ | |
1161 | 0x00, 0x00, 0x01, 0x62, | |
1162 | 0x00, 0x00, 0x02, 0x62 | |
1163 | }; | |
1164 | static const uint16_t utf32Expected[]={ | |
1165 | 0x0061, | |
1166 | 0xfffd, /* 0x110000 out of range */ | |
1167 | 0xDBFF, /* 0x10FFFF in range */ | |
1168 | 0xDFFF, | |
1169 | 0x0062, | |
1170 | 0xfffd, /* 0xffffffff out of range */ | |
1171 | 0xfffd, /* 0x7fffffff out of range */ | |
1172 | 0x0162, | |
1173 | 0x0262 | |
1174 | }; | |
1175 | static const int32_t utf32Offsets[]={ | |
1176 | 0, 4, 8, 8, 12, 16, 20, 24, 28 | |
1177 | }; | |
1178 | static const uint8_t utf32ExpectedBack[]={ | |
1179 | 0x00, 0x00, 0x00, 0x61, | |
1180 | 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */ | |
1181 | 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ | |
1182 | 0x00, 0x00, 0x00, 0x62, | |
1183 | 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */ | |
1184 | 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */ | |
1185 | 0x00, 0x00, 0x01, 0x62, | |
1186 | 0x00, 0x00, 0x02, 0x62 | |
1187 | }; | |
1188 | static const int32_t utf32OffsetsBack[]={ | |
1189 | 0,0,0,0, | |
1190 | 1,1,1,1, | |
1191 | 2,2,2,2, | |
1192 | 4,4,4,4, | |
1193 | 5,5,5,5, | |
1194 | 6,6,6,6, | |
1195 | 7,7,7,7, | |
1196 | 8,8,8,8 | |
1197 | }; | |
1198 | ||
1199 | testConvertToU(utf32, sizeof(utf32), | |
1200 | utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32be", utf32Offsets ,FALSE); | |
1201 | testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected), | |
1202 | utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE); | |
1203 | } | |
1204 | ||
1205 | /* Test UTF-32LE bad data handling*/ | |
1206 | { | |
1207 | static const uint8_t utf32[]={ | |
1208 | 0x61, 0x00, 0x00, 0x00, | |
1209 | 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ | |
1210 | 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ | |
1211 | 0x62, 0x00, 0x00, 0x00, | |
1212 | 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ | |
1213 | 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ | |
1214 | 0x62, 0x01, 0x00, 0x00, | |
1215 | 0x62, 0x02, 0x00, 0x00, | |
1216 | }; | |
1217 | ||
1218 | static const uint16_t utf32Expected[]={ | |
1219 | 0x0061, | |
1220 | 0xfffd, /* 0x110000 out of range */ | |
1221 | 0xDBFF, /* 0x10FFFF in range */ | |
1222 | 0xDFFF, | |
1223 | 0x0062, | |
1224 | 0xfffd, /* 0xffffffff out of range */ | |
1225 | 0xfffd, /* 0x7fffffff out of range */ | |
1226 | 0x0162, | |
1227 | 0x0262 | |
1228 | }; | |
1229 | static const int32_t utf32Offsets[]={ | |
1230 | 0, 4, 8, 8, 12, 16, 20, 24, 28 | |
1231 | }; | |
1232 | static const uint8_t utf32ExpectedBack[]={ | |
1233 | 0x61, 0x00, 0x00, 0x00, | |
1234 | 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */ | |
1235 | 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ | |
1236 | 0x62, 0x00, 0x00, 0x00, | |
1237 | 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */ | |
1238 | 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */ | |
1239 | 0x62, 0x01, 0x00, 0x00, | |
1240 | 0x62, 0x02, 0x00, 0x00 | |
1241 | }; | |
1242 | static const int32_t utf32OffsetsBack[]={ | |
1243 | 0,0,0,0, | |
1244 | 1,1,1,1, | |
1245 | 2,2,2,2, | |
1246 | 4,4,4,4, | |
1247 | 5,5,5,5, | |
1248 | 6,6,6,6, | |
1249 | 7,7,7,7, | |
1250 | 8,8,8,8 | |
1251 | }; | |
1252 | testConvertToU(utf32, sizeof(utf32), | |
1253 | utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32le", utf32Offsets,FALSE ); | |
1254 | testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected), | |
1255 | utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE); | |
1256 | } | |
1257 | } | |
1258 | ||
1259 | static void TestCoverageMBCS(){ | |
1260 | #if 0 | |
1261 | UErrorCode status = U_ZERO_ERROR; | |
1262 | const char *directory = loadTestData(&status); | |
1263 | char* tdpath = NULL; | |
1264 | char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1)); | |
1265 | int len = strlen(directory); | |
1266 | char* index=NULL; | |
1267 | ||
1268 | tdpath = (char*) malloc(sizeof(char) * (len * 2)); | |
1269 | uprv_strcpy(saveDirectory,u_getDataDirectory()); | |
1270 | log_verbose("Retrieved data directory %s \n",saveDirectory); | |
1271 | uprv_strcpy(tdpath,directory); | |
1272 | index=strrchr(tdpath,(char)U_FILE_SEP_CHAR); | |
1273 | ||
1274 | if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){ | |
1275 | *(index+1)=0; | |
1276 | } | |
1277 | u_setDataDirectory(tdpath); | |
1278 | log_verbose("ICU data directory is set to: %s \n" ,tdpath); | |
1279 | #endif | |
1280 | ||
1281 | /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm | |
1282 | which is test file for MBCS conversion with single-byte codepage data.*/ | |
1283 | { | |
1284 | ||
1285 | /* MBCS with single byte codepage data test1.ucm*/ | |
1286 | const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003}; | |
1287 | const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,}; | |
1288 | int32_t totest1Offs[] = { 0, 1, 2, 3, 5, }; | |
1289 | ||
1290 | /*from Unicode*/ | |
1291 | testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput), | |
1292 | expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE ); | |
1293 | } | |
1294 | ||
1295 | /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm | |
1296 | which is test file for MBCS conversion with three-byte codepage data.*/ | |
1297 | { | |
1298 | ||
1299 | /* MBCS with three byte codepage data test3.ucm*/ | |
1300 | const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; | |
1301 | const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,}; | |
1302 | int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8}; | |
1303 | ||
1304 | const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,}; | |
1305 | const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; | |
1306 | int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 }; | |
1307 | ||
1308 | /*from Unicode*/ | |
1309 | testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput), | |
1310 | expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE ); | |
1311 | ||
1312 | /*to Unicode*/ | |
1313 | testConvertToU(test3input, sizeof(test3input), | |
1314 | expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test3", fromtest3Offs ,FALSE); | |
1315 | ||
1316 | } | |
1317 | ||
1318 | /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm | |
1319 | which is test file for MBCS conversion with four-byte codepage data.*/ | |
1320 | { | |
1321 | ||
1322 | /* MBCS with three byte codepage data test4.ucm*/ | |
1323 | static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; | |
1324 | static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,}; | |
1325 | static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,}; | |
1326 | ||
1327 | static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,}; | |
1328 | static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; | |
1329 | static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,}; | |
1330 | ||
1331 | /*from Unicode*/ | |
1332 | testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput), | |
1333 | expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE ); | |
1334 | ||
1335 | /*to Unicode*/ | |
1336 | testConvertToU(test4input, sizeof(test4input), | |
1337 | expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test4", fromtest4Offs,FALSE ); | |
1338 | ||
1339 | } | |
1340 | #if 0 | |
1341 | free(tdpath); | |
1342 | /* restore the original data directory */ | |
1343 | log_verbose("Setting the data directory to %s \n", saveDirectory); | |
1344 | u_setDataDirectory(saveDirectory); | |
1345 | free(saveDirectory); | |
1346 | #endif | |
1347 | ||
1348 | } | |
1349 | ||
1350 | static void TestConverterType(const char *convName, UConverterType convType) { | |
1351 | UConverter* myConverter; | |
1352 | UErrorCode err = U_ZERO_ERROR; | |
1353 | ||
1354 | myConverter = my_ucnv_open(convName, &err); | |
1355 | ||
1356 | if (U_FAILURE(err)) { | |
1357 | log_data_err("Failed to create an %s converter\n", convName); | |
1358 | return; | |
1359 | } | |
1360 | else | |
1361 | { | |
1362 | if (ucnv_getType(myConverter)!=convType) { | |
1363 | log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n", | |
1364 | convName, convType); | |
1365 | } | |
1366 | else { | |
1367 | log_verbose("ucnv_getType %s ok\n", convName); | |
1368 | } | |
1369 | } | |
1370 | ucnv_close(myConverter); | |
1371 | } | |
1372 | ||
1373 | static void TestConverterTypesAndStarters() | |
1374 | { | |
1375 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
1376 | UConverter* myConverter; | |
1377 | UErrorCode err = U_ZERO_ERROR; | |
1378 | UBool mystarters[256]; | |
1379 | ||
1380 | /* const UBool expectedKSCstarters[256] = { | |
1381 | FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
1382 | FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
1383 | FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
1384 | FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
1385 | FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
1386 | FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
1387 | FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
1388 | FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
1389 | FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
1390 | FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
1391 | FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
1392 | FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
1393 | FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
1394 | FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
1395 | FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
1396 | TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
1397 | TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
1398 | TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
1399 | TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
1400 | TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
1401 | TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
1402 | TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
1403 | TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
1404 | TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
1405 | TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
1406 | TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/ | |
1407 | ||
1408 | ||
1409 | log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types."); | |
1410 | ||
1411 | myConverter = ucnv_open("ksc", &err); | |
1412 | if (U_FAILURE(err)) { | |
1413 | log_data_err("Failed to create an ibm-ksc converter\n"); | |
1414 | return; | |
1415 | } | |
1416 | else | |
1417 | { | |
1418 | if (ucnv_getType(myConverter)!=UCNV_MBCS) | |
1419 | log_err("ucnv_getType Failed for ibm-949\n"); | |
1420 | else | |
1421 | log_verbose("ucnv_getType ibm-949 ok\n"); | |
1422 | ||
1423 | if(myConverter!=NULL) | |
1424 | ucnv_getStarters(myConverter, mystarters, &err); | |
1425 | ||
1426 | /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters))) | |
1427 | log_err("Failed ucnv_getStarters for ksc\n"); | |
1428 | else | |
1429 | log_verbose("ucnv_getStarters ok\n");*/ | |
1430 | ||
1431 | } | |
1432 | ucnv_close(myConverter); | |
1433 | ||
1434 | TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL); | |
1435 | TestConverterType("ibm-878", UCNV_SBCS); | |
1436 | #endif | |
1437 | ||
1438 | TestConverterType("iso-8859-1", UCNV_LATIN_1); | |
1439 | ||
1440 | TestConverterType("ibm-1208", UCNV_UTF8); | |
1441 | ||
1442 | TestConverterType("utf-8", UCNV_UTF8); | |
1443 | TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian); | |
1444 | TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian); | |
1445 | TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian); | |
1446 | TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian); | |
1447 | ||
1448 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
1449 | ||
1450 | #if defined(U_ENABLE_GENERIC_ISO_2022) | |
1451 | TestConverterType("iso-2022", UCNV_ISO_2022); | |
1452 | #endif | |
1453 | ||
1454 | TestConverterType("hz", UCNV_HZ); | |
1455 | #endif | |
1456 | ||
1457 | TestConverterType("scsu", UCNV_SCSU); | |
1458 | ||
1459 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
1460 | TestConverterType("x-iscii-de", UCNV_ISCII); | |
1461 | #endif | |
1462 | ||
1463 | TestConverterType("ascii", UCNV_US_ASCII); | |
1464 | TestConverterType("utf-7", UCNV_UTF7); | |
1465 | TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX); | |
1466 | TestConverterType("bocu-1", UCNV_BOCU1); | |
1467 | } | |
1468 | ||
1469 | static void | |
1470 | TestAmbiguousConverter(UConverter *cnv) { | |
1471 | static const char inBytes[3]={ 0x61, 0x5B, 0x5c }; | |
1472 | UChar outUnicode[20]={ 0, 0, 0, 0 }; | |
1473 | ||
1474 | const char *s; | |
1475 | UChar *u; | |
1476 | UErrorCode errorCode; | |
1477 | UBool isAmbiguous; | |
1478 | ||
1479 | /* try to convert an 'a', a square bracket and a US-ASCII backslash */ | |
1480 | errorCode=U_ZERO_ERROR; | |
1481 | s=inBytes; | |
1482 | u=outUnicode; | |
1483 | ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode); | |
1484 | if(U_FAILURE(errorCode)) { | |
1485 | /* we do not care about general failures in this test; the input may just not be mappable */ | |
1486 | return; | |
1487 | } | |
1488 | ||
1489 | if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) { | |
1490 | /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */ | |
1491 | /* There are some encodings that are partially ASCII based, | |
1492 | like the ISO-7 and GSM series of codepages, which we ignore. */ | |
1493 | return; | |
1494 | } | |
1495 | ||
1496 | isAmbiguous=ucnv_isAmbiguous(cnv); | |
1497 | ||
1498 | /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */ | |
1499 | if((outUnicode[2]!=0x5c)!=isAmbiguous) { | |
1500 | log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n", | |
1501 | ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous); | |
1502 | return; | |
1503 | } | |
1504 | ||
1505 | if(outUnicode[2]!=0x5c) { | |
1506 | /* needs fixup, fix it */ | |
1507 | ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode)); | |
1508 | if(outUnicode[2]!=0x5c) { | |
1509 | /* the fix failed */ | |
1510 | log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode)); | |
1511 | return; | |
1512 | } | |
1513 | } | |
1514 | } | |
1515 | ||
1516 | static void TestAmbiguous() | |
1517 | { | |
1518 | UErrorCode status = U_ZERO_ERROR; | |
1519 | UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv; | |
1520 | static const char target[] = { | |
1521 | /* "\\usr\\local\\share\\data\\icutest.txt" */ | |
1522 | 0x5c, 0x75, 0x73, 0x72, | |
1523 | 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c, | |
1524 | 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65, | |
1525 | 0x5c, 0x64, 0x61, 0x74, 0x61, | |
1526 | 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74, | |
1527 | 0 | |
1528 | }; | |
1529 | UChar asciiResult[200], sjisResult[200]; | |
1530 | int32_t /*asciiLength = 0,*/ sjisLength = 0, i; | |
1531 | const char *name; | |
1532 | ||
1533 | /* enumerate all converters */ | |
1534 | status=U_ZERO_ERROR; | |
1535 | for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) { | |
1536 | cnv=ucnv_open(name, &status); | |
1537 | if(U_SUCCESS(status)) { | |
1538 | TestAmbiguousConverter(cnv); | |
1539 | ucnv_close(cnv); | |
1540 | } else { | |
1541 | log_err("error: unable to open available converter \"%s\"\n", name); | |
1542 | status=U_ZERO_ERROR; | |
1543 | } | |
1544 | } | |
1545 | ||
1546 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
1547 | sjis_cnv = ucnv_open("ibm-943", &status); | |
1548 | if (U_FAILURE(status)) | |
1549 | { | |
1550 | log_data_err("Failed to create a SJIS converter\n"); | |
1551 | return; | |
1552 | } | |
1553 | ascii_cnv = ucnv_open("LATIN-1", &status); | |
1554 | if (U_FAILURE(status)) | |
1555 | { | |
1556 | log_data_err("Failed to create a LATIN-1 converter\n"); | |
1557 | ucnv_close(sjis_cnv); | |
1558 | return; | |
1559 | } | |
1560 | /* convert target from SJIS to Unicode */ | |
1561 | sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, UPRV_LENGTHOF(sjisResult), target, (int32_t)strlen(target), &status); | |
1562 | if (U_FAILURE(status)) | |
1563 | { | |
1564 | log_err("Failed to convert the SJIS string.\n"); | |
1565 | ucnv_close(sjis_cnv); | |
1566 | ucnv_close(ascii_cnv); | |
1567 | return; | |
1568 | } | |
1569 | /* convert target from Latin-1 to Unicode */ | |
1570 | /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, UPRV_LENGTHOF(asciiResult), target, (int32_t)strlen(target), &status); | |
1571 | if (U_FAILURE(status)) | |
1572 | { | |
1573 | log_err("Failed to convert the Latin-1 string.\n"); | |
1574 | ucnv_close(sjis_cnv); | |
1575 | ucnv_close(ascii_cnv); | |
1576 | return; | |
1577 | } | |
1578 | if (!ucnv_isAmbiguous(sjis_cnv)) | |
1579 | { | |
1580 | log_err("SJIS converter should contain ambiguous character mappings.\n"); | |
1581 | ucnv_close(sjis_cnv); | |
1582 | ucnv_close(ascii_cnv); | |
1583 | return; | |
1584 | } | |
1585 | if (u_strcmp(sjisResult, asciiResult) == 0) | |
1586 | { | |
1587 | log_err("File separators for SJIS don't need to be fixed.\n"); | |
1588 | } | |
1589 | ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength); | |
1590 | if (u_strcmp(sjisResult, asciiResult) != 0) | |
1591 | { | |
1592 | log_err("Fixing file separator for SJIS failed.\n"); | |
1593 | } | |
1594 | ucnv_close(sjis_cnv); | |
1595 | ucnv_close(ascii_cnv); | |
1596 | #endif | |
1597 | } | |
1598 | ||
1599 | static void | |
1600 | TestSignatureDetection(){ | |
1601 | /* with null terminated strings */ | |
1602 | { | |
1603 | static const char* data[] = { | |
1604 | "\xFE\xFF\x00\x00", /* UTF-16BE */ | |
1605 | "\xFF\xFE\x00\x00", /* UTF-16LE */ | |
1606 | "\xEF\xBB\xBF\x00", /* UTF-8 */ | |
1607 | "\x0E\xFE\xFF\x00", /* SCSU */ | |
1608 | ||
1609 | "\xFE\xFF", /* UTF-16BE */ | |
1610 | "\xFF\xFE", /* UTF-16LE */ | |
1611 | "\xEF\xBB\xBF", /* UTF-8 */ | |
1612 | "\x0E\xFE\xFF", /* SCSU */ | |
1613 | ||
1614 | "\xFE\xFF\x41\x42", /* UTF-16BE */ | |
1615 | "\xFF\xFE\x41\x41", /* UTF-16LE */ | |
1616 | "\xEF\xBB\xBF\x41", /* UTF-8 */ | |
1617 | "\x0E\xFE\xFF\x41", /* SCSU */ | |
1618 | ||
1619 | "\x2B\x2F\x76\x38\x2D", /* UTF-7 */ | |
1620 | "\x2B\x2F\x76\x38\x41", /* UTF-7 */ | |
1621 | "\x2B\x2F\x76\x39\x41", /* UTF-7 */ | |
1622 | "\x2B\x2F\x76\x2B\x41", /* UTF-7 */ | |
1623 | "\x2B\x2F\x76\x2F\x41", /* UTF-7 */ | |
1624 | ||
1625 | "\xDD\x73\x66\x73" /* UTF-EBCDIC */ | |
1626 | }; | |
1627 | static const char* expected[] = { | |
1628 | "UTF-16BE", | |
1629 | "UTF-16LE", | |
1630 | "UTF-8", | |
1631 | "SCSU", | |
1632 | ||
1633 | "UTF-16BE", | |
1634 | "UTF-16LE", | |
1635 | "UTF-8", | |
1636 | "SCSU", | |
1637 | ||
1638 | "UTF-16BE", | |
1639 | "UTF-16LE", | |
1640 | "UTF-8", | |
1641 | "SCSU", | |
1642 | ||
1643 | "UTF-7", | |
1644 | "UTF-7", | |
1645 | "UTF-7", | |
1646 | "UTF-7", | |
1647 | "UTF-7", | |
1648 | "UTF-EBCDIC" | |
1649 | }; | |
1650 | static const int32_t expectedLength[] ={ | |
1651 | 2, | |
1652 | 2, | |
1653 | 3, | |
1654 | 3, | |
1655 | ||
1656 | 2, | |
1657 | 2, | |
1658 | 3, | |
1659 | 3, | |
1660 | ||
1661 | 2, | |
1662 | 2, | |
1663 | 3, | |
1664 | 3, | |
1665 | ||
1666 | 5, | |
1667 | 4, | |
1668 | 4, | |
1669 | 4, | |
1670 | 4, | |
1671 | 4 | |
1672 | }; | |
1673 | int i=0; | |
1674 | UErrorCode err; | |
1675 | int32_t signatureLength = -1; | |
1676 | const char* source = NULL; | |
1677 | const char* enc = NULL; | |
1678 | for( ; i<UPRV_LENGTHOF(data); i++){ | |
1679 | err = U_ZERO_ERROR; | |
1680 | source = data[i]; | |
1681 | enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err); | |
1682 | if(U_FAILURE(err)){ | |
1683 | log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err)); | |
1684 | continue; | |
1685 | } | |
1686 | if(enc == NULL || strcmp(enc,expected[i]) !=0){ | |
1687 | log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); | |
1688 | continue; | |
1689 | } | |
1690 | if(signatureLength != expectedLength[i]){ | |
1691 | log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]); | |
1692 | } | |
1693 | } | |
1694 | } | |
1695 | { | |
1696 | static const char* data[] = { | |
1697 | "\xFE\xFF\x00", /* UTF-16BE */ | |
1698 | "\xFF\xFE\x00", /* UTF-16LE */ | |
1699 | "\xEF\xBB\xBF\x00", /* UTF-8 */ | |
1700 | "\x0E\xFE\xFF\x00", /* SCSU */ | |
1701 | "\x00\x00\xFE\xFF", /* UTF-32BE */ | |
1702 | "\xFF\xFE\x00\x00", /* UTF-32LE */ | |
1703 | "\xFE\xFF", /* UTF-16BE */ | |
1704 | "\xFF\xFE", /* UTF-16LE */ | |
1705 | "\xEF\xBB\xBF", /* UTF-8 */ | |
1706 | "\x0E\xFE\xFF", /* SCSU */ | |
1707 | "\x00\x00\xFE\xFF", /* UTF-32BE */ | |
1708 | "\xFF\xFE\x00\x00", /* UTF-32LE */ | |
1709 | "\xFE\xFF\x41\x42", /* UTF-16BE */ | |
1710 | "\xFF\xFE\x41\x41", /* UTF-16LE */ | |
1711 | "\xEF\xBB\xBF\x41", /* UTF-8 */ | |
1712 | "\x0E\xFE\xFF\x41", /* SCSU */ | |
1713 | "\x00\x00\xFE\xFF\x41", /* UTF-32BE */ | |
1714 | "\xFF\xFE\x00\x00\x42", /* UTF-32LE */ | |
1715 | "\xFB\xEE\x28", /* BOCU-1 */ | |
1716 | "\xFF\x41\x42" /* NULL */ | |
1717 | }; | |
1718 | static const int len[] = { | |
1719 | 3, | |
1720 | 3, | |
1721 | 4, | |
1722 | 4, | |
1723 | 4, | |
1724 | 4, | |
1725 | 2, | |
1726 | 2, | |
1727 | 3, | |
1728 | 3, | |
1729 | 4, | |
1730 | 4, | |
1731 | 4, | |
1732 | 4, | |
1733 | 4, | |
1734 | 4, | |
1735 | 5, | |
1736 | 5, | |
1737 | 3, | |
1738 | 3 | |
1739 | }; | |
1740 | ||
1741 | static const char* expected[] = { | |
1742 | "UTF-16BE", | |
1743 | "UTF-16LE", | |
1744 | "UTF-8", | |
1745 | "SCSU", | |
1746 | "UTF-32BE", | |
1747 | "UTF-32LE", | |
1748 | "UTF-16BE", | |
1749 | "UTF-16LE", | |
1750 | "UTF-8", | |
1751 | "SCSU", | |
1752 | "UTF-32BE", | |
1753 | "UTF-32LE", | |
1754 | "UTF-16BE", | |
1755 | "UTF-16LE", | |
1756 | "UTF-8", | |
1757 | "SCSU", | |
1758 | "UTF-32BE", | |
1759 | "UTF-32LE", | |
1760 | "BOCU-1", | |
1761 | NULL | |
1762 | }; | |
1763 | static const int32_t expectedLength[] ={ | |
1764 | 2, | |
1765 | 2, | |
1766 | 3, | |
1767 | 3, | |
1768 | 4, | |
1769 | 4, | |
1770 | 2, | |
1771 | 2, | |
1772 | 3, | |
1773 | 3, | |
1774 | 4, | |
1775 | 4, | |
1776 | 2, | |
1777 | 2, | |
1778 | 3, | |
1779 | 3, | |
1780 | 4, | |
1781 | 4, | |
1782 | 3, | |
1783 | 0 | |
1784 | }; | |
1785 | int i=0; | |
1786 | UErrorCode err; | |
1787 | int32_t signatureLength = -1; | |
1788 | int32_t sourceLength=-1; | |
1789 | const char* source = NULL; | |
1790 | const char* enc = NULL; | |
1791 | for( ; i<UPRV_LENGTHOF(data); i++){ | |
1792 | err = U_ZERO_ERROR; | |
1793 | source = data[i]; | |
1794 | sourceLength = len[i]; | |
1795 | enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err); | |
1796 | if(U_FAILURE(err)){ | |
1797 | log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err)); | |
1798 | continue; | |
1799 | } | |
1800 | if(enc == NULL || strcmp(enc,expected[i]) !=0){ | |
1801 | if(expected[i] !=NULL){ | |
1802 | log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); | |
1803 | continue; | |
1804 | } | |
1805 | } | |
1806 | if(signatureLength != expectedLength[i]){ | |
1807 | log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]); | |
1808 | } | |
1809 | } | |
1810 | } | |
1811 | } | |
1812 | ||
1813 | static void TestUTF7() { | |
1814 | /* test input */ | |
1815 | static const uint8_t in[]={ | |
1816 | /* H - +Jjo- - ! +- +2AHcAQ */ | |
1817 | 0x48, | |
1818 | 0x2d, | |
1819 | 0x2b, 0x4a, 0x6a, 0x6f, | |
1820 | 0x2d, 0x2d, | |
1821 | 0x21, | |
1822 | 0x2b, 0x2d, | |
1823 | 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51 | |
1824 | }; | |
1825 | ||
1826 | /* expected test results */ | |
1827 | static const int32_t results[]={ | |
1828 | /* number of bytes read, code point */ | |
1829 | 1, 0x48, | |
1830 | 1, 0x2d, | |
1831 | 4, 0x263a, /* <WHITE SMILING FACE> */ | |
1832 | 2, 0x2d, | |
1833 | 1, 0x21, | |
1834 | 2, 0x2b, | |
1835 | 7, 0x10401 | |
1836 | }; | |
1837 | ||
1838 | const char *cnvName; | |
1839 | const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
1840 | UErrorCode errorCode=U_ZERO_ERROR; | |
1841 | UConverter *cnv=ucnv_open("UTF-7", &errorCode); | |
1842 | if(U_FAILURE(errorCode)) { | |
1843 | log_data_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); | |
1844 | return; | |
1845 | } | |
1846 | TestNextUChar(cnv, source, limit, results, "UTF-7"); | |
1847 | /* Test the condition when source >= sourceLimit */ | |
1848 | TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); | |
1849 | cnvName = ucnv_getName(cnv, &errorCode); | |
1850 | if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) { | |
1851 | log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode)); | |
1852 | } | |
1853 | ucnv_close(cnv); | |
1854 | } | |
1855 | ||
1856 | static void TestIMAP() { | |
1857 | /* test input */ | |
1858 | static const uint8_t in[]={ | |
1859 | /* H - &Jjo- - ! &- &2AHcAQ- \ */ | |
1860 | 0x48, | |
1861 | 0x2d, | |
1862 | 0x26, 0x4a, 0x6a, 0x6f, | |
1863 | 0x2d, 0x2d, | |
1864 | 0x21, | |
1865 | 0x26, 0x2d, | |
1866 | 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d | |
1867 | }; | |
1868 | ||
1869 | /* expected test results */ | |
1870 | static const int32_t results[]={ | |
1871 | /* number of bytes read, code point */ | |
1872 | 1, 0x48, | |
1873 | 1, 0x2d, | |
1874 | 4, 0x263a, /* <WHITE SMILING FACE> */ | |
1875 | 2, 0x2d, | |
1876 | 1, 0x21, | |
1877 | 2, 0x26, | |
1878 | 7, 0x10401 | |
1879 | }; | |
1880 | ||
1881 | const char *cnvName; | |
1882 | const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
1883 | UErrorCode errorCode=U_ZERO_ERROR; | |
1884 | UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode); | |
1885 | if(U_FAILURE(errorCode)) { | |
1886 | log_data_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); | |
1887 | return; | |
1888 | } | |
1889 | TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name"); | |
1890 | /* Test the condition when source >= sourceLimit */ | |
1891 | TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); | |
1892 | cnvName = ucnv_getName(cnv, &errorCode); | |
1893 | if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) { | |
1894 | log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode)); | |
1895 | } | |
1896 | ucnv_close(cnv); | |
1897 | } | |
1898 | ||
1899 | static void TestUTF8() { | |
1900 | /* test input */ | |
1901 | static const uint8_t in[]={ | |
1902 | 0x61, | |
1903 | 0xc2, 0x80, | |
1904 | 0xe0, 0xa0, 0x80, | |
1905 | 0xf0, 0x90, 0x80, 0x80, | |
1906 | 0xf4, 0x84, 0x8c, 0xa1, | |
1907 | 0xf0, 0x90, 0x90, 0x81 | |
1908 | }; | |
1909 | ||
1910 | /* expected test results */ | |
1911 | static const int32_t results[]={ | |
1912 | /* number of bytes read, code point */ | |
1913 | 1, 0x61, | |
1914 | 2, 0x80, | |
1915 | 3, 0x800, | |
1916 | 4, 0x10000, | |
1917 | 4, 0x104321, | |
1918 | 4, 0x10401 | |
1919 | }; | |
1920 | ||
1921 | /* error test input */ | |
1922 | static const uint8_t in2[]={ | |
1923 | 0x61, | |
1924 | 0xc0, 0x80, /* illegal non-shortest form */ | |
1925 | 0xe0, 0x80, 0x80, /* illegal non-shortest form */ | |
1926 | 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ | |
1927 | 0xc0, 0xc0, /* illegal trail byte */ | |
1928 | 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ | |
1929 | 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ | |
1930 | 0xfe, /* illegal byte altogether */ | |
1931 | 0x62 | |
1932 | }; | |
1933 | ||
1934 | /* expected error test results */ | |
1935 | static const int32_t results2[]={ | |
1936 | /* number of bytes read, code point */ | |
1937 | 1, 0x61, | |
1938 | 22, 0x62 | |
1939 | }; | |
1940 | ||
1941 | UConverterToUCallback cb; | |
1942 | const void *p; | |
1943 | ||
1944 | const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); | |
1945 | UErrorCode errorCode=U_ZERO_ERROR; | |
1946 | UConverter *cnv=ucnv_open("UTF-8", &errorCode); | |
1947 | if(U_FAILURE(errorCode)) { | |
1948 | log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode)); | |
1949 | return; | |
1950 | } | |
1951 | TestNextUChar(cnv, source, limit, results, "UTF-8"); | |
1952 | /* Test the condition when source >= sourceLimit */ | |
1953 | TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); | |
1954 | ||
1955 | /* test error behavior with a skip callback */ | |
1956 | ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); | |
1957 | source=(const char *)in2; | |
1958 | limit=(const char *)(in2+sizeof(in2)); | |
1959 | TestNextUChar(cnv, source, limit, results2, "UTF-8"); | |
1960 | ||
1961 | ucnv_close(cnv); | |
1962 | } | |
1963 | ||
1964 | static void TestCESU8() { | |
1965 | /* test input */ | |
1966 | static const uint8_t in[]={ | |
1967 | 0x61, | |
1968 | 0xc2, 0x80, | |
1969 | 0xe0, 0xa0, 0x80, | |
1970 | 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, | |
1971 | 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82, | |
1972 | 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf, | |
1973 | 0xef, 0xbf, 0xbc | |
1974 | }; | |
1975 | ||
1976 | /* expected test results */ | |
1977 | static const int32_t results[]={ | |
1978 | /* number of bytes read, code point */ | |
1979 | 1, 0x61, | |
1980 | 2, 0x80, | |
1981 | 3, 0x800, | |
1982 | 6, 0x10000, | |
1983 | 3, 0xdc01, | |
1984 | -1,0xd802, /* may read 3 or 6 bytes */ | |
1985 | -1,0x10ffff,/* may read 0 or 3 bytes */ | |
1986 | 3, 0xfffc | |
1987 | }; | |
1988 | ||
1989 | /* error test input */ | |
1990 | static const uint8_t in2[]={ | |
1991 | 0x61, | |
1992 | 0xc0, 0x80, /* illegal non-shortest form */ | |
1993 | 0xe0, 0x80, 0x80, /* illegal non-shortest form */ | |
1994 | 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ | |
1995 | 0xc0, 0xc0, /* illegal trail byte */ | |
1996 | 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */ | |
1997 | 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */ | |
1998 | 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */ | |
1999 | 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ | |
2000 | 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ | |
2001 | 0xfe, /* illegal byte altogether */ | |
2002 | 0x62 | |
2003 | }; | |
2004 | ||
2005 | /* expected error test results */ | |
2006 | static const int32_t results2[]={ | |
2007 | /* number of bytes read, code point */ | |
2008 | 1, 0x61, | |
2009 | 34, 0x62 | |
2010 | }; | |
2011 | ||
2012 | UConverterToUCallback cb; | |
2013 | const void *p; | |
2014 | ||
2015 | const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); | |
2016 | UErrorCode errorCode=U_ZERO_ERROR; | |
2017 | UConverter *cnv=ucnv_open("CESU-8", &errorCode); | |
2018 | if(U_FAILURE(errorCode)) { | |
2019 | log_data_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode)); | |
2020 | return; | |
2021 | } | |
2022 | TestNextUChar(cnv, source, limit, results, "CESU-8"); | |
2023 | /* Test the condition when source >= sourceLimit */ | |
2024 | TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); | |
2025 | ||
2026 | /* test error behavior with a skip callback */ | |
2027 | ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); | |
2028 | source=(const char *)in2; | |
2029 | limit=(const char *)(in2+sizeof(in2)); | |
2030 | TestNextUChar(cnv, source, limit, results2, "CESU-8"); | |
2031 | ||
2032 | ucnv_close(cnv); | |
2033 | } | |
2034 | ||
2035 | static void TestUTF16() { | |
2036 | /* test input */ | |
2037 | static const uint8_t in1[]={ | |
2038 | 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff | |
2039 | }; | |
2040 | static const uint8_t in2[]={ | |
2041 | 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff | |
2042 | }; | |
2043 | static const uint8_t in3[]={ | |
2044 | 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01 | |
2045 | }; | |
2046 | ||
2047 | /* expected test results */ | |
2048 | static const int32_t results1[]={ | |
2049 | /* number of bytes read, code point */ | |
2050 | 4, 0x4e00, | |
2051 | 2, 0xfeff | |
2052 | }; | |
2053 | static const int32_t results2[]={ | |
2054 | /* number of bytes read, code point */ | |
2055 | 4, 0x004e, | |
2056 | 2, 0xfffe | |
2057 | }; | |
2058 | static const int32_t results3[]={ | |
2059 | /* number of bytes read, code point */ | |
2060 | 2, 0xfefe, | |
2061 | 2, 0x4e00, | |
2062 | 2, 0xfeff, | |
2063 | 4, 0x20001 | |
2064 | }; | |
2065 | ||
2066 | const char *source, *limit; | |
2067 | ||
2068 | UErrorCode errorCode=U_ZERO_ERROR; | |
2069 | UConverter *cnv=ucnv_open("UTF-16", &errorCode); | |
2070 | if(U_FAILURE(errorCode)) { | |
2071 | log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode)); | |
2072 | return; | |
2073 | } | |
2074 | ||
2075 | source=(const char *)in1, limit=(const char *)in1+sizeof(in1); | |
2076 | TestNextUChar(cnv, source, limit, results1, "UTF-16"); | |
2077 | ||
2078 | source=(const char *)in2, limit=(const char *)in2+sizeof(in2); | |
2079 | ucnv_resetToUnicode(cnv); | |
2080 | TestNextUChar(cnv, source, limit, results2, "UTF-16"); | |
2081 | ||
2082 | source=(const char *)in3, limit=(const char *)in3+sizeof(in3); | |
2083 | ucnv_resetToUnicode(cnv); | |
2084 | TestNextUChar(cnv, source, limit, results3, "UTF-16"); | |
2085 | ||
2086 | /* Test the condition when source >= sourceLimit */ | |
2087 | ucnv_resetToUnicode(cnv); | |
2088 | TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); | |
2089 | ||
2090 | ucnv_close(cnv); | |
2091 | } | |
2092 | ||
2093 | static void TestUTF16BE() { | |
2094 | /* test input */ | |
2095 | static const uint8_t in[]={ | |
2096 | 0x00, 0x61, | |
2097 | 0x00, 0xc0, | |
2098 | 0x00, 0x31, | |
2099 | 0x00, 0xf4, | |
2100 | 0xce, 0xfe, | |
2101 | 0xd8, 0x01, 0xdc, 0x01 | |
2102 | }; | |
2103 | ||
2104 | /* expected test results */ | |
2105 | static const int32_t results[]={ | |
2106 | /* number of bytes read, code point */ | |
2107 | 2, 0x61, | |
2108 | 2, 0xc0, | |
2109 | 2, 0x31, | |
2110 | 2, 0xf4, | |
2111 | 2, 0xcefe, | |
2112 | 4, 0x10401 | |
2113 | }; | |
2114 | ||
2115 | const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
2116 | UErrorCode errorCode=U_ZERO_ERROR; | |
2117 | UConverter *cnv=ucnv_open("utf-16be", &errorCode); | |
2118 | if(U_FAILURE(errorCode)) { | |
2119 | log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode)); | |
2120 | return; | |
2121 | } | |
2122 | TestNextUChar(cnv, source, limit, results, "UTF-16BE"); | |
2123 | /* Test the condition when source >= sourceLimit */ | |
2124 | TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); | |
2125 | /*Test for the condition where there is an invalid character*/ | |
2126 | { | |
2127 | static const uint8_t source2[]={0x61}; | |
2128 | ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); | |
2129 | TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); | |
2130 | } | |
2131 | #if 0 | |
2132 | /* | |
2133 | * Test disabled because currently the UTF-16BE/LE converters are supposed | |
2134 | * to not set errors for unpaired surrogates. | |
2135 | * This may change with | |
2136 | * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 | |
2137 | */ | |
2138 | ||
2139 | /*Test for the condition where there is a surrogate pair*/ | |
2140 | { | |
2141 | const uint8_t source2[]={0xd8, 0x01}; | |
2142 | TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); | |
2143 | } | |
2144 | #endif | |
2145 | ucnv_close(cnv); | |
2146 | } | |
2147 | ||
2148 | static void | |
2149 | TestUTF16LE() { | |
2150 | /* test input */ | |
2151 | static const uint8_t in[]={ | |
2152 | 0x61, 0x00, | |
2153 | 0x31, 0x00, | |
2154 | 0x4e, 0x2e, | |
2155 | 0x4e, 0x00, | |
2156 | 0x01, 0xd8, 0x01, 0xdc | |
2157 | }; | |
2158 | ||
2159 | /* expected test results */ | |
2160 | static const int32_t results[]={ | |
2161 | /* number of bytes read, code point */ | |
2162 | 2, 0x61, | |
2163 | 2, 0x31, | |
2164 | 2, 0x2e4e, | |
2165 | 2, 0x4e, | |
2166 | 4, 0x10401 | |
2167 | }; | |
2168 | ||
2169 | const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
2170 | UErrorCode errorCode=U_ZERO_ERROR; | |
2171 | UConverter *cnv=ucnv_open("utf-16le", &errorCode); | |
2172 | if(U_FAILURE(errorCode)) { | |
2173 | log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode)); | |
2174 | return; | |
2175 | } | |
2176 | TestNextUChar(cnv, source, limit, results, "UTF-16LE"); | |
2177 | /* Test the condition when source >= sourceLimit */ | |
2178 | TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); | |
2179 | /*Test for the condition where there is an invalid character*/ | |
2180 | { | |
2181 | static const uint8_t source2[]={0x61}; | |
2182 | ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); | |
2183 | TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); | |
2184 | } | |
2185 | #if 0 | |
2186 | /* | |
2187 | * Test disabled because currently the UTF-16BE/LE converters are supposed | |
2188 | * to not set errors for unpaired surrogates. | |
2189 | * This may change with | |
2190 | * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 | |
2191 | */ | |
2192 | ||
2193 | /*Test for the condition where there is a surrogate character*/ | |
2194 | { | |
2195 | static const uint8_t source2[]={0x01, 0xd8}; | |
2196 | TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); | |
2197 | } | |
2198 | #endif | |
2199 | ||
2200 | ucnv_close(cnv); | |
2201 | } | |
2202 | ||
2203 | static void TestUTF32() { | |
2204 | /* test input */ | |
2205 | static const uint8_t in1[]={ | |
2206 | 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff | |
2207 | }; | |
2208 | static const uint8_t in2[]={ | |
2209 | 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 | |
2210 | }; | |
2211 | static const uint8_t in3[]={ | |
2212 | 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 | |
2213 | }; | |
2214 | ||
2215 | /* expected test results */ | |
2216 | static const int32_t results1[]={ | |
2217 | /* number of bytes read, code point */ | |
2218 | 8, 0x100f00, | |
2219 | 4, 0xfeff | |
2220 | }; | |
2221 | static const int32_t results2[]={ | |
2222 | /* number of bytes read, code point */ | |
2223 | 8, 0x0f1000, | |
2224 | 4, 0xfffe | |
2225 | }; | |
2226 | static const int32_t results3[]={ | |
2227 | /* number of bytes read, code point */ | |
2228 | 4, 0xfefe, | |
2229 | 4, 0x100f00, | |
2230 | 4, 0xfffd, /* unmatched surrogate */ | |
2231 | 4, 0xfffd /* unmatched surrogate */ | |
2232 | }; | |
2233 | ||
2234 | const char *source, *limit; | |
2235 | ||
2236 | UErrorCode errorCode=U_ZERO_ERROR; | |
2237 | UConverter *cnv=ucnv_open("UTF-32", &errorCode); | |
2238 | if(U_FAILURE(errorCode)) { | |
2239 | log_data_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode)); | |
2240 | return; | |
2241 | } | |
2242 | ||
2243 | source=(const char *)in1, limit=(const char *)in1+sizeof(in1); | |
2244 | TestNextUChar(cnv, source, limit, results1, "UTF-32"); | |
2245 | ||
2246 | source=(const char *)in2, limit=(const char *)in2+sizeof(in2); | |
2247 | ucnv_resetToUnicode(cnv); | |
2248 | TestNextUChar(cnv, source, limit, results2, "UTF-32"); | |
2249 | ||
2250 | source=(const char *)in3, limit=(const char *)in3+sizeof(in3); | |
2251 | ucnv_resetToUnicode(cnv); | |
2252 | TestNextUChar(cnv, source, limit, results3, "UTF-32"); | |
2253 | ||
2254 | /* Test the condition when source >= sourceLimit */ | |
2255 | ucnv_resetToUnicode(cnv); | |
2256 | TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); | |
2257 | ||
2258 | ucnv_close(cnv); | |
2259 | } | |
2260 | ||
2261 | static void | |
2262 | TestUTF32BE() { | |
2263 | /* test input */ | |
2264 | static const uint8_t in[]={ | |
2265 | 0x00, 0x00, 0x00, 0x61, | |
2266 | 0x00, 0x00, 0x30, 0x61, | |
2267 | 0x00, 0x00, 0xdc, 0x00, | |
2268 | 0x00, 0x00, 0xd8, 0x00, | |
2269 | 0x00, 0x00, 0xdf, 0xff, | |
2270 | 0x00, 0x00, 0xff, 0xfe, | |
2271 | 0x00, 0x10, 0xab, 0xcd, | |
2272 | 0x00, 0x10, 0xff, 0xff | |
2273 | }; | |
2274 | ||
2275 | /* expected test results */ | |
2276 | static const int32_t results[]={ | |
2277 | /* number of bytes read, code point */ | |
2278 | 4, 0x61, | |
2279 | 4, 0x3061, | |
2280 | 4, 0xfffd, | |
2281 | 4, 0xfffd, | |
2282 | 4, 0xfffd, | |
2283 | 4, 0xfffe, | |
2284 | 4, 0x10abcd, | |
2285 | 4, 0x10ffff | |
2286 | }; | |
2287 | ||
2288 | /* error test input */ | |
2289 | static const uint8_t in2[]={ | |
2290 | 0x00, 0x00, 0x00, 0x61, | |
2291 | 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ | |
2292 | 0x00, 0x00, 0x00, 0x62, | |
2293 | 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ | |
2294 | 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ | |
2295 | 0x00, 0x00, 0x01, 0x62, | |
2296 | 0x00, 0x00, 0x02, 0x62 | |
2297 | }; | |
2298 | ||
2299 | /* expected error test results */ | |
2300 | static const int32_t results2[]={ | |
2301 | /* number of bytes read, code point */ | |
2302 | 4, 0x61, | |
2303 | 8, 0x62, | |
2304 | 12, 0x162, | |
2305 | 4, 0x262 | |
2306 | }; | |
2307 | ||
2308 | UConverterToUCallback cb; | |
2309 | const void *p; | |
2310 | ||
2311 | const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
2312 | UErrorCode errorCode=U_ZERO_ERROR; | |
2313 | UConverter *cnv=ucnv_open("UTF-32BE", &errorCode); | |
2314 | if(U_FAILURE(errorCode)) { | |
2315 | log_data_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode)); | |
2316 | return; | |
2317 | } | |
2318 | TestNextUChar(cnv, source, limit, results, "UTF-32BE"); | |
2319 | ||
2320 | /* Test the condition when source >= sourceLimit */ | |
2321 | TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); | |
2322 | ||
2323 | /* test error behavior with a skip callback */ | |
2324 | ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); | |
2325 | source=(const char *)in2; | |
2326 | limit=(const char *)(in2+sizeof(in2)); | |
2327 | TestNextUChar(cnv, source, limit, results2, "UTF-32BE"); | |
2328 | ||
2329 | ucnv_close(cnv); | |
2330 | } | |
2331 | ||
2332 | static void | |
2333 | TestUTF32LE() { | |
2334 | /* test input */ | |
2335 | static const uint8_t in[]={ | |
2336 | 0x61, 0x00, 0x00, 0x00, | |
2337 | 0x61, 0x30, 0x00, 0x00, | |
2338 | 0x00, 0xdc, 0x00, 0x00, | |
2339 | 0x00, 0xd8, 0x00, 0x00, | |
2340 | 0xff, 0xdf, 0x00, 0x00, | |
2341 | 0xfe, 0xff, 0x00, 0x00, | |
2342 | 0xcd, 0xab, 0x10, 0x00, | |
2343 | 0xff, 0xff, 0x10, 0x00 | |
2344 | }; | |
2345 | ||
2346 | /* expected test results */ | |
2347 | static const int32_t results[]={ | |
2348 | /* number of bytes read, code point */ | |
2349 | 4, 0x61, | |
2350 | 4, 0x3061, | |
2351 | 4, 0xfffd, | |
2352 | 4, 0xfffd, | |
2353 | 4, 0xfffd, | |
2354 | 4, 0xfffe, | |
2355 | 4, 0x10abcd, | |
2356 | 4, 0x10ffff | |
2357 | }; | |
2358 | ||
2359 | /* error test input */ | |
2360 | static const uint8_t in2[]={ | |
2361 | 0x61, 0x00, 0x00, 0x00, | |
2362 | 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ | |
2363 | 0x62, 0x00, 0x00, 0x00, | |
2364 | 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ | |
2365 | 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ | |
2366 | 0x62, 0x01, 0x00, 0x00, | |
2367 | 0x62, 0x02, 0x00, 0x00, | |
2368 | }; | |
2369 | ||
2370 | /* expected error test results */ | |
2371 | static const int32_t results2[]={ | |
2372 | /* number of bytes read, code point */ | |
2373 | 4, 0x61, | |
2374 | 8, 0x62, | |
2375 | 12, 0x162, | |
2376 | 4, 0x262, | |
2377 | }; | |
2378 | ||
2379 | UConverterToUCallback cb; | |
2380 | const void *p; | |
2381 | ||
2382 | const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
2383 | UErrorCode errorCode=U_ZERO_ERROR; | |
2384 | UConverter *cnv=ucnv_open("UTF-32LE", &errorCode); | |
2385 | if(U_FAILURE(errorCode)) { | |
2386 | log_data_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode)); | |
2387 | return; | |
2388 | } | |
2389 | TestNextUChar(cnv, source, limit, results, "UTF-32LE"); | |
2390 | ||
2391 | /* Test the condition when source >= sourceLimit */ | |
2392 | TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); | |
2393 | ||
2394 | /* test error behavior with a skip callback */ | |
2395 | ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); | |
2396 | source=(const char *)in2; | |
2397 | limit=(const char *)(in2+sizeof(in2)); | |
2398 | TestNextUChar(cnv, source, limit, results2, "UTF-32LE"); | |
2399 | ||
2400 | ucnv_close(cnv); | |
2401 | } | |
2402 | ||
2403 | static void | |
2404 | TestLATIN1() { | |
2405 | /* test input */ | |
2406 | static const uint8_t in[]={ | |
2407 | 0x61, | |
2408 | 0x31, | |
2409 | 0x32, | |
2410 | 0xc0, | |
2411 | 0xf0, | |
2412 | 0xf4, | |
2413 | }; | |
2414 | ||
2415 | /* expected test results */ | |
2416 | static const int32_t results[]={ | |
2417 | /* number of bytes read, code point */ | |
2418 | 1, 0x61, | |
2419 | 1, 0x31, | |
2420 | 1, 0x32, | |
2421 | 1, 0xc0, | |
2422 | 1, 0xf0, | |
2423 | 1, 0xf4, | |
2424 | }; | |
2425 | static const uint16_t in1[] = { | |
2426 | 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, | |
2427 | 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, | |
2428 | 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, | |
2429 | 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, | |
2430 | 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, | |
2431 | 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, | |
2432 | 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, | |
2433 | 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, | |
2434 | 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, | |
2435 | 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, | |
2436 | 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, | |
2437 | 0xcb, 0x82 | |
2438 | }; | |
2439 | static const uint8_t out1[] = { | |
2440 | 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, | |
2441 | 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, | |
2442 | 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, | |
2443 | 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, | |
2444 | 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, | |
2445 | 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, | |
2446 | 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, | |
2447 | 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, | |
2448 | 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, | |
2449 | 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, | |
2450 | 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, | |
2451 | 0xcb, 0x82 | |
2452 | }; | |
2453 | static const uint16_t in2[]={ | |
2454 | 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, | |
2455 | 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, | |
2456 | 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, | |
2457 | 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, | |
2458 | 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, | |
2459 | 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, | |
2460 | 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, | |
2461 | 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, | |
2462 | 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, | |
2463 | 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, | |
2464 | 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, | |
2465 | 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, | |
2466 | 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, | |
2467 | 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, | |
2468 | 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, | |
2469 | 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, | |
2470 | 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, | |
2471 | 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, | |
2472 | 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, | |
2473 | 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, | |
2474 | 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, | |
2475 | 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, | |
2476 | 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, | |
2477 | 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, | |
2478 | 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, | |
2479 | 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, | |
2480 | 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, | |
2481 | 0x37, 0x20, 0x2A, 0x2F, | |
2482 | }; | |
2483 | static const unsigned char out2[]={ | |
2484 | 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, | |
2485 | 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, | |
2486 | 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, | |
2487 | 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, | |
2488 | 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, | |
2489 | 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, | |
2490 | 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, | |
2491 | 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, | |
2492 | 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, | |
2493 | 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, | |
2494 | 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, | |
2495 | 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, | |
2496 | 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, | |
2497 | 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, | |
2498 | 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, | |
2499 | 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, | |
2500 | 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, | |
2501 | 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, | |
2502 | 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, | |
2503 | 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, | |
2504 | 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, | |
2505 | 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, | |
2506 | 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, | |
2507 | 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, | |
2508 | 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, | |
2509 | 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, | |
2510 | 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, | |
2511 | 0x37, 0x20, 0x2A, 0x2F, | |
2512 | }; | |
2513 | const char *source=(const char *)in; | |
2514 | const char *limit=(const char *)in+sizeof(in); | |
2515 | ||
2516 | UErrorCode errorCode=U_ZERO_ERROR; | |
2517 | UConverter *cnv=ucnv_open("LATIN_1", &errorCode); | |
2518 | if(U_FAILURE(errorCode)) { | |
2519 | log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode)); | |
2520 | return; | |
2521 | } | |
2522 | TestNextUChar(cnv, source, limit, results, "LATIN_1"); | |
2523 | /* Test the condition when source >= sourceLimit */ | |
2524 | TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); | |
2525 | TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1)); | |
2526 | TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2)); | |
2527 | ||
2528 | ucnv_close(cnv); | |
2529 | } | |
2530 | ||
2531 | static void | |
2532 | TestSBCS() { | |
2533 | /* test input */ | |
2534 | static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4}; | |
2535 | /* expected test results */ | |
2536 | static const int32_t results[]={ | |
2537 | /* number of bytes read, code point */ | |
2538 | 1, 0x61, | |
2539 | 1, 0xbf, | |
2540 | 1, 0xc4, | |
2541 | 1, 0x2021, | |
2542 | 1, 0xf8ff, | |
2543 | 1, 0x00d9 | |
2544 | }; | |
2545 | ||
2546 | const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
2547 | UErrorCode errorCode=U_ZERO_ERROR; | |
2548 | UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode); | |
2549 | if(U_FAILURE(errorCode)) { | |
2550 | log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode)); | |
2551 | return; | |
2552 | } | |
2553 | TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)"); | |
2554 | /* Test the condition when source >= sourceLimit */ | |
2555 | TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); | |
2556 | /*Test for Illegal character */ /* | |
2557 | { | |
2558 | static const uint8_t input1[]={ 0xA1 }; | |
2559 | const char* illegalsource=(const char*)input1; | |
2560 | TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte"); | |
2561 | } | |
2562 | */ | |
2563 | ucnv_close(cnv); | |
2564 | } | |
2565 | ||
2566 | static void | |
2567 | TestDBCS() { | |
2568 | /* test input */ | |
2569 | static const uint8_t in[]={ | |
2570 | 0x44, 0x6a, | |
2571 | 0xc4, 0x9c, | |
2572 | 0x7a, 0x74, | |
2573 | 0x46, 0xab, | |
2574 | 0x42, 0x5b, | |
2575 | ||
2576 | }; | |
2577 | ||
2578 | /* expected test results */ | |
2579 | static const int32_t results[]={ | |
2580 | /* number of bytes read, code point */ | |
2581 | 2, 0x00a7, | |
2582 | 2, 0xe1d2, | |
2583 | 2, 0x6962, | |
2584 | 2, 0xf842, | |
2585 | 2, 0xffe5, | |
2586 | }; | |
2587 | ||
2588 | const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
2589 | UErrorCode errorCode=U_ZERO_ERROR; | |
2590 | ||
2591 | UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode); | |
2592 | if(U_FAILURE(errorCode)) { | |
2593 | log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode)); | |
2594 | return; | |
2595 | } | |
2596 | TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)"); | |
2597 | /* Test the condition when source >= sourceLimit */ | |
2598 | TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); | |
2599 | /*Test for the condition where there is an invalid character*/ | |
2600 | { | |
2601 | static const uint8_t source2[]={0x1a, 0x1b}; | |
2602 | TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); | |
2603 | } | |
2604 | /*Test for the condition where we have a truncated char*/ | |
2605 | { | |
2606 | static const uint8_t source1[]={0xc4}; | |
2607 | ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); | |
2608 | TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); | |
2609 | } | |
2610 | ucnv_close(cnv); | |
2611 | } | |
2612 | ||
2613 | static void | |
2614 | TestMBCS() { | |
2615 | /* test input */ | |
2616 | static const uint8_t in[]={ | |
2617 | 0x01, | |
2618 | 0xa6, 0xa3, | |
2619 | 0x00, | |
2620 | 0xa6, 0xa1, | |
2621 | 0x08, | |
2622 | 0xc2, 0x76, | |
2623 | 0xc2, 0x78, | |
2624 | ||
2625 | }; | |
2626 | ||
2627 | /* expected test results */ | |
2628 | static const int32_t results[]={ | |
2629 | /* number of bytes read, code point */ | |
2630 | 1, 0x0001, | |
2631 | 2, 0x250c, | |
2632 | 1, 0x0000, | |
2633 | 2, 0x2500, | |
2634 | 1, 0x0008, | |
2635 | 2, 0xd60c, | |
2636 | 2, 0xd60e, | |
2637 | }; | |
2638 | ||
2639 | const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
2640 | UErrorCode errorCode=U_ZERO_ERROR; | |
2641 | ||
2642 | UConverter *cnv=ucnv_open("ibm-1363", &errorCode); | |
2643 | if(U_FAILURE(errorCode)) { | |
2644 | log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode)); | |
2645 | return; | |
2646 | } | |
2647 | TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)"); | |
2648 | /* Test the condition when source >= sourceLimit */ | |
2649 | TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); | |
2650 | /*Test for the condition where there is an invalid character*/ | |
2651 | { | |
2652 | static const uint8_t source2[]={0xa1, 0x80}; | |
2653 | TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); | |
2654 | } | |
2655 | /*Test for the condition where we have a truncated char*/ | |
2656 | { | |
2657 | static const uint8_t source1[]={0xc4}; | |
2658 | ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); | |
2659 | TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); | |
2660 | } | |
2661 | ucnv_close(cnv); | |
2662 | ||
2663 | } | |
2664 | ||
2665 | #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO | |
2666 | static void | |
2667 | TestICCRunout() { | |
2668 | /* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */ | |
2669 | ||
2670 | const char *cnvName = "ibm-1363"; | |
2671 | UErrorCode status = U_ZERO_ERROR; | |
2672 | const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 }; | |
2673 | /* UChar expectUData[] = { 0x00a1, 0x001a }; */ | |
2674 | const char *source = sourceData; | |
2675 | const char *sourceLim = sourceData+sizeof(sourceData); | |
2676 | UChar c1, c2, c3; | |
2677 | UConverter *cnv=ucnv_open(cnvName, &status); | |
2678 | if(U_FAILURE(status)) { | |
2679 | log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status)); | |
2680 | return; | |
2681 | } | |
2682 | ||
2683 | #if 0 | |
2684 | { | |
2685 | UChar targetBuf[256]; | |
2686 | UChar *target = targetBuf; | |
2687 | UChar *targetLim = target+256; | |
2688 | ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status); | |
2689 | ||
2690 | log_info("After convert: target@%d, source@%d, status%s\n", | |
2691 | target-targetBuf, source-sourceData, u_errorName(status)); | |
2692 | ||
2693 | if(U_FAILURE(status)) { | |
2694 | log_err("Failed to convert: %s\n", u_errorName(status)); | |
2695 | } else { | |
2696 | ||
2697 | } | |
2698 | } | |
2699 | #endif | |
2700 | ||
2701 | c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status); | |
2702 | log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status)); | |
2703 | ||
2704 | c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status); | |
2705 | log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status)); | |
2706 | ||
2707 | c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status); | |
2708 | log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status)); | |
2709 | ||
2710 | if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) { | |
2711 | log_verbose("OK\n"); | |
2712 | } else { | |
2713 | log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n"); | |
2714 | } | |
2715 | ||
2716 | ucnv_close(cnv); | |
2717 | ||
2718 | } | |
2719 | #endif | |
2720 | ||
2721 | #ifdef U_ENABLE_GENERIC_ISO_2022 | |
2722 | ||
2723 | static void | |
2724 | TestISO_2022() { | |
2725 | /* test input */ | |
2726 | static const uint8_t in[]={ | |
2727 | 0x1b, 0x25, 0x42, | |
2728 | 0x31, | |
2729 | 0x32, | |
2730 | 0x61, | |
2731 | 0xc2, 0x80, | |
2732 | 0xe0, 0xa0, 0x80, | |
2733 | 0xf0, 0x90, 0x80, 0x80 | |
2734 | }; | |
2735 | ||
2736 | ||
2737 | ||
2738 | /* expected test results */ | |
2739 | static const int32_t results[]={ | |
2740 | /* number of bytes read, code point */ | |
2741 | 4, 0x0031, /* 4 bytes including the escape sequence */ | |
2742 | 1, 0x0032, | |
2743 | 1, 0x61, | |
2744 | 2, 0x80, | |
2745 | 3, 0x800, | |
2746 | 4, 0x10000 | |
2747 | }; | |
2748 | ||
2749 | const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
2750 | UErrorCode errorCode=U_ZERO_ERROR; | |
2751 | UConverter *cnv; | |
2752 | ||
2753 | cnv=ucnv_open("ISO_2022", &errorCode); | |
2754 | if(U_FAILURE(errorCode)) { | |
2755 | log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); | |
2756 | return; | |
2757 | } | |
2758 | TestNextUChar(cnv, source, limit, results, "ISO_2022"); | |
2759 | ||
2760 | /* Test the condition when source >= sourceLimit */ | |
2761 | TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source"); | |
2762 | TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); | |
2763 | /*Test for the condition where we have a truncated char*/ | |
2764 | { | |
2765 | static const uint8_t source1[]={0xc4}; | |
2766 | ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); | |
2767 | TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); | |
2768 | } | |
2769 | /*Test for the condition where there is an invalid character*/ | |
2770 | { | |
2771 | static const uint8_t source2[]={0xa1, 0x01}; | |
2772 | TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character"); | |
2773 | } | |
2774 | ucnv_close(cnv); | |
2775 | } | |
2776 | ||
2777 | #endif | |
2778 | ||
2779 | static void | |
2780 | TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ | |
2781 | const UChar* uSource; | |
2782 | const UChar* uSourceLimit; | |
2783 | const char* cSource; | |
2784 | const char* cSourceLimit; | |
2785 | UChar *uTargetLimit =NULL; | |
2786 | UChar *uTarget; | |
2787 | char *cTarget; | |
2788 | const char *cTargetLimit; | |
2789 | char *cBuf; | |
2790 | UChar *uBuf; /*,*test;*/ | |
2791 | int32_t uBufSize = 120; | |
2792 | int len=0; | |
2793 | int i=2; | |
2794 | UErrorCode errorCode=U_ZERO_ERROR; | |
2795 | uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
2796 | cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); | |
2797 | ucnv_reset(cnv); | |
2798 | for(;--i>0; ){ | |
2799 | uSource = (UChar*) source; | |
2800 | uSourceLimit=(const UChar*)sourceLimit; | |
2801 | cTarget = cBuf; | |
2802 | uTarget = uBuf; | |
2803 | cSource = cBuf; | |
2804 | cTargetLimit = cBuf; | |
2805 | uTargetLimit = uBuf; | |
2806 | ||
2807 | do{ | |
2808 | ||
2809 | cTargetLimit = cTargetLimit+ i; | |
2810 | ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode); | |
2811 | if(errorCode==U_BUFFER_OVERFLOW_ERROR){ | |
2812 | errorCode=U_ZERO_ERROR; | |
2813 | continue; | |
2814 | } | |
2815 | ||
2816 | if(U_FAILURE(errorCode)){ | |
2817 | log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); | |
2818 | return; | |
2819 | } | |
2820 | ||
2821 | }while (uSource<uSourceLimit); | |
2822 | ||
2823 | cSourceLimit =cTarget; | |
2824 | do{ | |
2825 | uTargetLimit=uTargetLimit+i; | |
2826 | ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode); | |
2827 | if(errorCode==U_BUFFER_OVERFLOW_ERROR){ | |
2828 | errorCode=U_ZERO_ERROR; | |
2829 | continue; | |
2830 | } | |
2831 | if(U_FAILURE(errorCode)){ | |
2832 | log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); | |
2833 | return; | |
2834 | } | |
2835 | }while(cSource<cSourceLimit); | |
2836 | ||
2837 | uSource = source; | |
2838 | /*test =uBuf;*/ | |
2839 | for(len=0;len<(int)(source - sourceLimit);len++){ | |
2840 | if(uBuf[len]!=uSource[len]){ | |
2841 | log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ; | |
2842 | } | |
2843 | } | |
2844 | } | |
2845 | free(uBuf); | |
2846 | free(cBuf); | |
2847 | } | |
2848 | /* Test for Jitterbug 778 */ | |
2849 | static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ | |
2850 | const UChar* uSource; | |
2851 | const UChar* uSourceLimit; | |
2852 | const char* cSource; | |
2853 | UChar *uTargetLimit =NULL; | |
2854 | UChar *uTarget; | |
2855 | char *cTarget; | |
2856 | const char *cTargetLimit; | |
2857 | char *cBuf; | |
2858 | UChar *uBuf,*test; | |
2859 | int32_t uBufSize = 120; | |
2860 | int numCharsInTarget=0; | |
2861 | UErrorCode errorCode=U_ZERO_ERROR; | |
2862 | uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
2863 | cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); | |
2864 | uSource = source; | |
2865 | uSourceLimit=sourceLimit; | |
2866 | cTarget = cBuf; | |
2867 | cTargetLimit = cBuf +uBufSize*5; | |
2868 | uTarget = uBuf; | |
2869 | uTargetLimit = uBuf+ uBufSize*5; | |
2870 | ucnv_reset(cnv); | |
2871 | numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode); | |
2872 | if(U_FAILURE(errorCode)){ | |
2873 | log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); | |
2874 | return; | |
2875 | } | |
2876 | cSource = cBuf; | |
2877 | test =uBuf; | |
2878 | ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode); | |
2879 | if(U_FAILURE(errorCode)){ | |
2880 | log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode)); | |
2881 | return; | |
2882 | } | |
2883 | uSource = source; | |
2884 | while(uSource<uSourceLimit){ | |
2885 | if(*test!=*uSource){ | |
2886 | ||
2887 | log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; | |
2888 | } | |
2889 | uSource++; | |
2890 | test++; | |
2891 | } | |
2892 | free(uBuf); | |
2893 | free(cBuf); | |
2894 | } | |
2895 | ||
2896 | static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ | |
2897 | const UChar* uSource; | |
2898 | const UChar* uSourceLimit; | |
2899 | const char* cSource; | |
2900 | const char* cSourceLimit; | |
2901 | UChar *uTargetLimit =NULL; | |
2902 | UChar *uTarget; | |
2903 | char *cTarget; | |
2904 | const char *cTargetLimit; | |
2905 | char *cBuf; | |
2906 | UChar *uBuf; /*,*test;*/ | |
2907 | int32_t uBufSize = 120; | |
2908 | int len=0; | |
2909 | int i=2; | |
2910 | const UChar *temp = sourceLimit; | |
2911 | UErrorCode errorCode=U_ZERO_ERROR; | |
2912 | uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
2913 | cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); | |
2914 | ||
2915 | ucnv_reset(cnv); | |
2916 | for(;--i>0;){ | |
2917 | uSource = (UChar*) source; | |
2918 | cTarget = cBuf; | |
2919 | uTarget = uBuf; | |
2920 | cSource = cBuf; | |
2921 | cTargetLimit = cBuf; | |
2922 | uTargetLimit = uBuf+uBufSize*5; | |
2923 | cTargetLimit = cTargetLimit+uBufSize*10; | |
2924 | uSourceLimit=uSource; | |
2925 | do{ | |
2926 | ||
2927 | if (uSourceLimit < sourceLimit) { | |
2928 | uSourceLimit = uSourceLimit+1; | |
2929 | } | |
2930 | ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode); | |
2931 | if(errorCode==U_BUFFER_OVERFLOW_ERROR){ | |
2932 | errorCode=U_ZERO_ERROR; | |
2933 | continue; | |
2934 | } | |
2935 | ||
2936 | if(U_FAILURE(errorCode)){ | |
2937 | log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); | |
2938 | return; | |
2939 | } | |
2940 | ||
2941 | }while (uSource<temp); | |
2942 | ||
2943 | cSourceLimit =cBuf; | |
2944 | do{ | |
2945 | if (cSourceLimit < cBuf + (cTarget - cBuf)) { | |
2946 | cSourceLimit = cSourceLimit+1; | |
2947 | } | |
2948 | ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode); | |
2949 | if(errorCode==U_BUFFER_OVERFLOW_ERROR){ | |
2950 | errorCode=U_ZERO_ERROR; | |
2951 | continue; | |
2952 | } | |
2953 | if(U_FAILURE(errorCode)){ | |
2954 | log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); | |
2955 | return; | |
2956 | } | |
2957 | }while(cSource<cTarget); | |
2958 | ||
2959 | uSource = source; | |
2960 | /*test =uBuf;*/ | |
2961 | for(;len<(int)(source - sourceLimit);len++){ | |
2962 | if(uBuf[len]!=uSource[len]){ | |
2963 | log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ; | |
2964 | } | |
2965 | } | |
2966 | } | |
2967 | free(uBuf); | |
2968 | free(cBuf); | |
2969 | } | |
2970 | static void | |
2971 | TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit, | |
2972 | const uint16_t results[], const char* message){ | |
2973 | /* const char* s0; */ | |
2974 | const char* s=(char*)source; | |
2975 | const uint16_t *r=results; | |
2976 | UErrorCode errorCode=U_ZERO_ERROR; | |
2977 | uint32_t c,exC; | |
2978 | ucnv_reset(cnv); | |
2979 | while(s<limit) { | |
2980 | /* s0=s; */ | |
2981 | c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); | |
2982 | if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { | |
2983 | break; /* no more significant input */ | |
2984 | } else if(U_FAILURE(errorCode)) { | |
2985 | log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode)); | |
2986 | break; | |
2987 | } else { | |
2988 | if(U16_IS_LEAD(*r)){ | |
2989 | int i =0, len = 2; | |
2990 | U16_NEXT(r, i, len, exC); | |
2991 | r++; | |
2992 | }else{ | |
2993 | exC = *r; | |
2994 | } | |
2995 | if(c!=(uint32_t)(exC)) | |
2996 | log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message,(uint32_t) (*r),c); | |
2997 | } | |
2998 | r++; | |
2999 | } | |
3000 | } | |
3001 | ||
3002 | static int TestJitterbug930(const char* enc){ | |
3003 | UErrorCode err = U_ZERO_ERROR; | |
3004 | UConverter*converter; | |
3005 | char out[80]; | |
3006 | char*target = out; | |
3007 | UChar in[4]; | |
3008 | const UChar*source = in; | |
3009 | int32_t off[80]; | |
3010 | int32_t* offsets = off; | |
3011 | int numOffWritten=0; | |
3012 | UBool flush = 0; | |
3013 | converter = my_ucnv_open(enc, &err); | |
3014 | ||
3015 | in[0] = 0x41; /* 0x4E00;*/ | |
3016 | in[1] = 0x4E01; | |
3017 | in[2] = 0x4E02; | |
3018 | in[3] = 0x4E03; | |
3019 | ||
3020 | memset(off, '*', sizeof(off)); | |
3021 | ||
3022 | ucnv_fromUnicode (converter, | |
3023 | &target, | |
3024 | target+2, | |
3025 | &source, | |
3026 | source+3, | |
3027 | offsets, | |
3028 | flush, | |
3029 | &err); | |
3030 | ||
3031 | /* writes three bytes into the output buffer: 41 1B 24 | |
3032 | * but offsets contains 0 1 1 | |
3033 | */ | |
3034 | while(*offsets< off[10]){ | |
3035 | numOffWritten++; | |
3036 | offsets++; | |
3037 | } | |
3038 | log_verbose("Testing Jitterbug 930 for encoding %s",enc); | |
3039 | if(numOffWritten!= (int)(target-out)){ | |
3040 | log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten); | |
3041 | } | |
3042 | ||
3043 | err = U_ZERO_ERROR; | |
3044 | ||
3045 | memset(off,'*' , sizeof(off)); | |
3046 | ||
3047 | flush = 1; | |
3048 | offsets=off; | |
3049 | ucnv_fromUnicode (converter, | |
3050 | &target, | |
3051 | target+4, | |
3052 | &source, | |
3053 | source, | |
3054 | offsets, | |
3055 | flush, | |
3056 | &err); | |
3057 | numOffWritten=0; | |
3058 | while(*offsets< off[10]){ | |
3059 | numOffWritten++; | |
3060 | if(*offsets!= -1){ | |
3061 | log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ; | |
3062 | } | |
3063 | offsets++; | |
3064 | } | |
3065 | ||
3066 | /* writes 42 43 7A into output buffer, | |
3067 | * offsets contains -1 -1 -1 | |
3068 | */ | |
3069 | ucnv_close(converter); | |
3070 | return 0; | |
3071 | } | |
3072 | ||
3073 | static void | |
3074 | TestHZ() { | |
3075 | /* test input */ | |
3076 | static const uint16_t in[]={ | |
3077 | 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014, | |
3078 | 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0, | |
3079 | 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94, | |
3080 | 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355, | |
3081 | 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8, | |
3082 | 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496, | |
3083 | 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477, | |
3084 | 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480, | |
3085 | 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E, | |
3086 | 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, | |
3087 | 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, | |
3088 | 0x005A, 0x005B, 0x005C, 0x000A | |
3089 | }; | |
3090 | const UChar* uSource; | |
3091 | const UChar* uSourceLimit; | |
3092 | const char* cSource; | |
3093 | const char* cSourceLimit; | |
3094 | UChar *uTargetLimit =NULL; | |
3095 | UChar *uTarget; | |
3096 | char *cTarget; | |
3097 | const char *cTargetLimit; | |
3098 | char *cBuf = NULL; | |
3099 | UChar *uBuf = NULL; | |
3100 | UChar *test; | |
3101 | int32_t uBufSize = 120; | |
3102 | UErrorCode errorCode=U_ZERO_ERROR; | |
3103 | UConverter *cnv = NULL; | |
3104 | int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); | |
3105 | int32_t* myOff= offsets; | |
3106 | cnv=ucnv_open("HZ", &errorCode); | |
3107 | if(U_FAILURE(errorCode)) { | |
3108 | log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode)); | |
3109 | goto cleanup; | |
3110 | } | |
3111 | ||
3112 | uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
3113 | cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); | |
3114 | uSource = (const UChar*)in; | |
3115 | uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in); | |
3116 | cTarget = cBuf; | |
3117 | cTargetLimit = cBuf +uBufSize*5; | |
3118 | uTarget = uBuf; | |
3119 | uTargetLimit = uBuf+ uBufSize*5; | |
3120 | ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); | |
3121 | if(U_FAILURE(errorCode)){ | |
3122 | log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); | |
3123 | goto cleanup; | |
3124 | } | |
3125 | cSource = cBuf; | |
3126 | cSourceLimit =cTarget; | |
3127 | test =uBuf; | |
3128 | myOff=offsets; | |
3129 | ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); | |
3130 | if(U_FAILURE(errorCode)){ | |
3131 | log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); | |
3132 | goto cleanup; | |
3133 | } | |
3134 | uSource = (const UChar*)in; | |
3135 | while(uSource<uSourceLimit){ | |
3136 | if(*test!=*uSource){ | |
3137 | ||
3138 | log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; | |
3139 | } | |
3140 | uSource++; | |
3141 | test++; | |
3142 | } | |
3143 | TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding"); | |
3144 | TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); | |
3145 | TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); | |
3146 | TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); | |
3147 | TestJitterbug930("csISO2022JP"); | |
3148 | ||
3149 | cleanup: | |
3150 | ucnv_close(cnv); | |
3151 | free(offsets); | |
3152 | free(uBuf); | |
3153 | free(cBuf); | |
3154 | } | |
3155 | ||
3156 | static void | |
3157 | TestISCII(){ | |
3158 | /* test input */ | |
3159 | static const uint16_t in[]={ | |
3160 | /* test full range of Devanagari */ | |
3161 | 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A, | |
3162 | 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911, | |
3163 | 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D, | |
3164 | 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926, | |
3165 | 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F, | |
3166 | 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937, | |
3167 | 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943, | |
3168 | 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D, | |
3169 | 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C, | |
3170 | 0x096D,0x096E,0x096F, | |
3171 | /* test Soft halant*/ | |
3172 | 0x0915,0x094d, 0x200D, | |
3173 | /* test explicit halant */ | |
3174 | 0x0915,0x094d, 0x200c, | |
3175 | /* test double danda */ | |
3176 | 0x965, | |
3177 | /* test ASCII */ | |
3178 | 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, | |
3179 | 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, | |
3180 | 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, | |
3181 | /* tests from Lotus */ | |
3182 | 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043, | |
3183 | 0x0930,0x094D,0x200D, | |
3184 | 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043, | |
3185 | 0x0915,0x0921,0x002B,0x095F, | |
3186 | /* tamil range */ | |
3187 | 0x0B86, 0xB87, 0xB88, | |
3188 | /* telugu range */ | |
3189 | 0x0C05, 0x0C02, 0x0C03,0x0c31, | |
3190 | /* kannada range */ | |
3191 | 0x0C85, 0xC82, 0x0C83, | |
3192 | /* test Abbr sign and Anudatta */ | |
3193 | 0x0970, 0x952, | |
3194 | /* 0x0958, | |
3195 | 0x0959, | |
3196 | 0x095A, | |
3197 | 0x095B, | |
3198 | 0x095C, | |
3199 | 0x095D, | |
3200 | 0x095E, | |
3201 | 0x095F,*/ | |
3202 | 0x0960 /* Vocallic RRI 0xAB, 0xE9*/, | |
3203 | 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */, | |
3204 | 0x090C , | |
3205 | 0x0962, | |
3206 | 0x0961 /* Vocallic LL 0xa6, 0xE9 */, | |
3207 | 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */, | |
3208 | 0x0950 /* OM Symbol 0xa1, 0xE9,*/, | |
3209 | 0x093D /* Avagraha 0xEA, 0xE9*/, | |
3210 | 0x0958, | |
3211 | 0x0959, | |
3212 | 0x095A, | |
3213 | 0x095B, | |
3214 | 0x095C, | |
3215 | 0x095D, | |
3216 | 0x095E, | |
3217 | 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0 | |
3218 | }; | |
3219 | static const unsigned char byteArr[]={ | |
3220 | ||
3221 | 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9, | |
3222 | 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2, | |
3223 | 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb, | |
3224 | 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4, | |
3225 | 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd, | |
3226 | 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6, | |
3227 | 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf, | |
3228 | 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8, | |
3229 | 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7, | |
3230 | 0xf8,0xf9,0xfa, | |
3231 | /* test soft halant */ | |
3232 | 0xb3, 0xE8, 0xE9, | |
3233 | /* test explicit halant */ | |
3234 | 0xb3, 0xE8, 0xE8, | |
3235 | /* test double danda */ | |
3236 | 0xea, 0xea, | |
3237 | /* test ASCII */ | |
3238 | 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, | |
3239 | 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, | |
3240 | 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, | |
3241 | /* test ATR code */ | |
3242 | ||
3243 | /* tests from Lotus */ | |
3244 | 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43, | |
3245 | 0xEF,0x42,0xCF,0xE8,0xD9, | |
3246 | 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43, | |
3247 | 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE, | |
3248 | /* tamil range */ | |
3249 | 0xEF, 0x44, 0xa5, 0xa6, 0xa7, | |
3250 | /* telugu range */ | |
3251 | 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0, | |
3252 | /* kannada range */ | |
3253 | 0xEF, 0x48,0xa4, 0xa2, 0xa3, | |
3254 | /* anudatta and abbreviation sign */ | |
3255 | 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8, | |
3256 | ||
3257 | ||
3258 | 0xAA, 0xE9,/* RI + NUKTA 0x0960*/ | |
3259 | ||
3260 | 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/ | |
3261 | ||
3262 | 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/ | |
3263 | ||
3264 | 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/ | |
3265 | ||
3266 | 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/ | |
3267 | ||
3268 | 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/ | |
3269 | ||
3270 | 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/ | |
3271 | ||
3272 | 0xEA, 0xE9, /* Danda + Nukta 0x093D*/ | |
3273 | ||
3274 | 0xB3, 0xE9, /* Ka + NUKTA */ | |
3275 | ||
3276 | 0xB4, 0xE9, /* Kha + NUKTA */ | |
3277 | ||
3278 | 0xB5, 0xE9, /* Ga + NUKTA */ | |
3279 | ||
3280 | 0xBA, 0xE9, | |
3281 | ||
3282 | 0xBF, 0xE9, | |
3283 | ||
3284 | 0xC0, 0xE9, | |
3285 | ||
3286 | 0xC9, 0xE9, | |
3287 | /* INV halant RA */ | |
3288 | 0xD9, 0xE8, 0xCF, | |
3289 | 0x00, 0x00A0, | |
3290 | /* just consume unhandled codepoints */ | |
3291 | 0xEF, 0x30, | |
3292 | ||
3293 | }; | |
3294 | testConvertToU(byteArr,(sizeof(byteArr)),in,UPRV_LENGTHOF(in),"x-iscii-de",NULL,TRUE); | |
3295 | TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr)); | |
3296 | ||
3297 | } | |
3298 | ||
3299 | static void | |
3300 | TestISO_2022_JP() { | |
3301 | /* test input */ | |
3302 | static const uint16_t in[]={ | |
3303 | 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A, | |
3304 | 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, | |
3305 | 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, | |
3306 | 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A, | |
3307 | 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, | |
3308 | 0x201D, 0x3014, 0x000D, 0x000A, | |
3309 | 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, | |
3310 | 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, | |
3311 | }; | |
3312 | const UChar* uSource; | |
3313 | const UChar* uSourceLimit; | |
3314 | const char* cSource; | |
3315 | const char* cSourceLimit; | |
3316 | UChar *uTargetLimit =NULL; | |
3317 | UChar *uTarget; | |
3318 | char *cTarget; | |
3319 | const char *cTargetLimit; | |
3320 | char *cBuf = NULL; | |
3321 | UChar *uBuf = NULL; | |
3322 | UChar *test; | |
3323 | int32_t uBufSize = 120; | |
3324 | UErrorCode errorCode=U_ZERO_ERROR; | |
3325 | UConverter *cnv = NULL; | |
3326 | int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); | |
3327 | int32_t* myOff= offsets; | |
3328 | cnv=ucnv_open("ISO_2022_JP_1", &errorCode); | |
3329 | if(U_FAILURE(errorCode)) { | |
3330 | log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode)); | |
3331 | goto cleanup; | |
3332 | } | |
3333 | ||
3334 | uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
3335 | cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); | |
3336 | uSource = (const UChar*)in; | |
3337 | uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in); | |
3338 | cTarget = cBuf; | |
3339 | cTargetLimit = cBuf +uBufSize*5; | |
3340 | uTarget = uBuf; | |
3341 | uTargetLimit = uBuf+ uBufSize*5; | |
3342 | ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); | |
3343 | if(U_FAILURE(errorCode)){ | |
3344 | log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); | |
3345 | goto cleanup; | |
3346 | } | |
3347 | cSource = cBuf; | |
3348 | cSourceLimit =cTarget; | |
3349 | test =uBuf; | |
3350 | myOff=offsets; | |
3351 | ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); | |
3352 | if(U_FAILURE(errorCode)){ | |
3353 | log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); | |
3354 | goto cleanup; | |
3355 | } | |
3356 | ||
3357 | uSource = (const UChar*)in; | |
3358 | while(uSource<uSourceLimit){ | |
3359 | if(*test!=*uSource){ | |
3360 | ||
3361 | log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; | |
3362 | } | |
3363 | uSource++; | |
3364 | test++; | |
3365 | } | |
3366 | ||
3367 | TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); | |
3368 | TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); | |
3369 | TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding"); | |
3370 | TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); | |
3371 | TestJitterbug930("csISO2022JP"); | |
3372 | ||
3373 | cleanup: | |
3374 | ucnv_close(cnv); | |
3375 | free(uBuf); | |
3376 | free(cBuf); | |
3377 | free(offsets); | |
3378 | } | |
3379 | ||
3380 | static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){ | |
3381 | const UChar* uSource; | |
3382 | const UChar* uSourceLimit; | |
3383 | const char* cSource; | |
3384 | const char* cSourceLimit; | |
3385 | UChar *uTargetLimit =NULL; | |
3386 | UChar *uTarget; | |
3387 | char *cTarget; | |
3388 | const char *cTargetLimit; | |
3389 | char *cBuf; | |
3390 | UChar *uBuf,*test; | |
3391 | int32_t uBufSize = 120*10; | |
3392 | UErrorCode errorCode=U_ZERO_ERROR; | |
3393 | UConverter *cnv; | |
3394 | int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) ); | |
3395 | int32_t* myOff= offsets; | |
3396 | cnv=my_ucnv_open(conv, &errorCode); | |
3397 | if(U_FAILURE(errorCode)) { | |
3398 | log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode)); | |
3399 | return; | |
3400 | } | |
3401 | ||
3402 | uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)); | |
3403 | cBuf =(char*)malloc(uBufSize * sizeof(char)); | |
3404 | uSource = (const UChar*)in; | |
3405 | uSourceLimit=uSource+len; | |
3406 | cTarget = cBuf; | |
3407 | cTargetLimit = cBuf +uBufSize; | |
3408 | uTarget = uBuf; | |
3409 | uTargetLimit = uBuf+ uBufSize; | |
3410 | ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); | |
3411 | if(U_FAILURE(errorCode)){ | |
3412 | log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); | |
3413 | return; | |
3414 | } | |
3415 | /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/ | |
3416 | cSource = cBuf; | |
3417 | cSourceLimit =cTarget; | |
3418 | test =uBuf; | |
3419 | myOff=offsets; | |
3420 | ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); | |
3421 | if(U_FAILURE(errorCode)){ | |
3422 | log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode)); | |
3423 | return; | |
3424 | } | |
3425 | ||
3426 | uSource = (const UChar*)in; | |
3427 | while(uSource<uSourceLimit){ | |
3428 | if(*test!=*uSource){ | |
3429 | log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ; | |
3430 | } | |
3431 | uSource++; | |
3432 | test++; | |
3433 | } | |
3434 | TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv); | |
3435 | TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv); | |
3436 | TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv); | |
3437 | if(byteArr && byteArrLen!=0){ | |
3438 | TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang); | |
3439 | TestToAndFromUChars(in,(const UChar*)&in[len],cnv); | |
3440 | { | |
3441 | cSource = byteArr; | |
3442 | cSourceLimit = cSource+byteArrLen; | |
3443 | test=uBuf; | |
3444 | myOff = offsets; | |
3445 | ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); | |
3446 | if(U_FAILURE(errorCode)){ | |
3447 | log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); | |
3448 | return; | |
3449 | } | |
3450 | ||
3451 | uSource = (const UChar*)in; | |
3452 | while(uSource<uSourceLimit){ | |
3453 | if(*test!=*uSource){ | |
3454 | log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; | |
3455 | } | |
3456 | uSource++; | |
3457 | test++; | |
3458 | } | |
3459 | } | |
3460 | } | |
3461 | ||
3462 | ucnv_close(cnv); | |
3463 | free(uBuf); | |
3464 | free(cBuf); | |
3465 | free(offsets); | |
3466 | } | |
3467 | static UChar U_CALLCONV | |
3468 | _charAt(int32_t offset, void *context) { | |
3469 | return ((char*)context)[offset]; | |
3470 | } | |
3471 | ||
3472 | static int32_t | |
3473 | unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){ | |
3474 | int32_t srcIndex=0; | |
3475 | int32_t dstIndex=0; | |
3476 | if(U_FAILURE(*status)){ | |
3477 | return 0; | |
3478 | } | |
3479 | if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){ | |
3480 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
3481 | return 0; | |
3482 | } | |
3483 | if(srcLen==-1){ | |
3484 | srcLen = (int32_t)uprv_strlen(src); | |
3485 | } | |
3486 | ||
3487 | for (; srcIndex<srcLen; ) { | |
3488 | UChar32 c = src[srcIndex++]; | |
3489 | if (c == 0x005C /*'\\'*/) { | |
3490 | c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/ | |
3491 | if (c == (UChar32)0xFFFFFFFF) { | |
3492 | *status=U_INVALID_CHAR_FOUND; /* return empty string */ | |
3493 | break; /* invalid escape sequence */ | |
3494 | } | |
3495 | } | |
3496 | if(dstIndex < dstLen){ | |
3497 | if(c>0xFFFF){ | |
3498 | dst[dstIndex++] = U16_LEAD(c); | |
3499 | if(dstIndex<dstLen){ | |
3500 | dst[dstIndex]=U16_TRAIL(c); | |
3501 | }else{ | |
3502 | *status=U_BUFFER_OVERFLOW_ERROR; | |
3503 | } | |
3504 | }else{ | |
3505 | dst[dstIndex]=(UChar)c; | |
3506 | } | |
3507 | ||
3508 | }else{ | |
3509 | *status = U_BUFFER_OVERFLOW_ERROR; | |
3510 | } | |
3511 | dstIndex++; /* for preflighting */ | |
3512 | } | |
3513 | return dstIndex; | |
3514 | } | |
3515 | ||
3516 | static void | |
3517 | TestFullRoundtrip(const char* cp){ | |
3518 | UChar usource[10] ={0}; | |
3519 | UChar nsrc[10] = {0}; | |
3520 | uint32_t i=1; | |
3521 | int len=0, ulen; | |
3522 | nsrc[0]=0x0061; | |
3523 | /* Test codepoint 0 */ | |
3524 | TestConv(usource,1,cp,"",NULL,0); | |
3525 | TestConv(usource,2,cp,"",NULL,0); | |
3526 | nsrc[2]=0x5555; | |
3527 | TestConv(nsrc,3,cp,"",NULL,0); | |
3528 | ||
3529 | for(;i<=0x10FFFF;i++){ | |
3530 | if(i==0xD800){ | |
3531 | i=0xDFFF; | |
3532 | continue; | |
3533 | } | |
3534 | if(i<=0xFFFF){ | |
3535 | usource[0] =(UChar) i; | |
3536 | len=1; | |
3537 | }else{ | |
3538 | usource[0]=U16_LEAD(i); | |
3539 | usource[1]=U16_TRAIL(i); | |
3540 | len=2; | |
3541 | } | |
3542 | ulen=len; | |
3543 | if(i==0x80) { | |
3544 | usource[2]=0; | |
3545 | } | |
3546 | /* Test only single code points */ | |
3547 | TestConv(usource,ulen,cp,"",NULL,0); | |
3548 | /* Test codepoint repeated twice */ | |
3549 | usource[ulen]=usource[0]; | |
3550 | usource[ulen+1]=usource[1]; | |
3551 | ulen+=len; | |
3552 | TestConv(usource,ulen,cp,"",NULL,0); | |
3553 | /* Test codepoint repeated 3 times */ | |
3554 | usource[ulen]=usource[0]; | |
3555 | usource[ulen+1]=usource[1]; | |
3556 | ulen+=len; | |
3557 | TestConv(usource,ulen,cp,"",NULL,0); | |
3558 | /* Test codepoint in between 2 codepoints */ | |
3559 | nsrc[1]=usource[0]; | |
3560 | nsrc[2]=usource[1]; | |
3561 | nsrc[len+1]=0x5555; | |
3562 | TestConv(nsrc,len+2,cp,"",NULL,0); | |
3563 | uprv_memset(usource,0,sizeof(UChar)*10); | |
3564 | } | |
3565 | } | |
3566 | ||
3567 | static void | |
3568 | TestRoundTrippingAllUTF(void){ | |
3569 | if(!getTestOption(QUICK_OPTION)){ | |
3570 | log_verbose("Running exhaustive round trip test for BOCU-1\n"); | |
3571 | TestFullRoundtrip("BOCU-1"); | |
3572 | log_verbose("Running exhaustive round trip test for SCSU\n"); | |
3573 | TestFullRoundtrip("SCSU"); | |
3574 | log_verbose("Running exhaustive round trip test for UTF-8\n"); | |
3575 | TestFullRoundtrip("UTF-8"); | |
3576 | log_verbose("Running exhaustive round trip test for CESU-8\n"); | |
3577 | TestFullRoundtrip("CESU-8"); | |
3578 | log_verbose("Running exhaustive round trip test for UTF-16BE\n"); | |
3579 | TestFullRoundtrip("UTF-16BE"); | |
3580 | log_verbose("Running exhaustive round trip test for UTF-16LE\n"); | |
3581 | TestFullRoundtrip("UTF-16LE"); | |
3582 | log_verbose("Running exhaustive round trip test for UTF-16\n"); | |
3583 | TestFullRoundtrip("UTF-16"); | |
3584 | log_verbose("Running exhaustive round trip test for UTF-32BE\n"); | |
3585 | TestFullRoundtrip("UTF-32BE"); | |
3586 | log_verbose("Running exhaustive round trip test for UTF-32LE\n"); | |
3587 | TestFullRoundtrip("UTF-32LE"); | |
3588 | log_verbose("Running exhaustive round trip test for UTF-32\n"); | |
3589 | TestFullRoundtrip("UTF-32"); | |
3590 | log_verbose("Running exhaustive round trip test for UTF-7\n"); | |
3591 | TestFullRoundtrip("UTF-7"); | |
3592 | log_verbose("Running exhaustive round trip test for UTF-7\n"); | |
3593 | TestFullRoundtrip("UTF-7,version=1"); | |
3594 | log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n"); | |
3595 | TestFullRoundtrip("IMAP-mailbox-name"); | |
3596 | /* | |
3597 | * | |
3598 | * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of | |
3599 | * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA). | |
3600 | * The old mappings remain as fallbacks. | |
3601 | * This test may be reintroduced at a later time. | |
3602 | * | |
3603 | * 110118 - mow | |
3604 | */ | |
3605 | /* | |
3606 | log_verbose("Running exhaustive round trip test for GB18030\n"); | |
3607 | TestFullRoundtrip("GB18030"); | |
3608 | */ | |
3609 | } | |
3610 | } | |
3611 | ||
3612 | static void | |
3613 | TestSCSU() { | |
3614 | ||
3615 | static const uint16_t germanUTF16[]={ | |
3616 | 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074 | |
3617 | }; | |
3618 | ||
3619 | static const uint8_t germanSCSU[]={ | |
3620 | 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74 | |
3621 | }; | |
3622 | ||
3623 | static const uint16_t russianUTF16[]={ | |
3624 | 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430 | |
3625 | }; | |
3626 | ||
3627 | static const uint8_t russianSCSU[]={ | |
3628 | 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0 | |
3629 | }; | |
3630 | ||
3631 | static const uint16_t japaneseUTF16[]={ | |
3632 | 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b, | |
3633 | 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3, | |
3634 | 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b, | |
3635 | 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4, | |
3636 | 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a, | |
3637 | 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044, | |
3638 | 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3, | |
3639 | 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd, | |
3640 | 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de, | |
3641 | 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09, | |
3642 | 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b, | |
3643 | 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068, | |
3644 | 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1, | |
3645 | 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9, | |
3646 | 0x307e, 0x3067, 0x3042, 0x308b, 0x3002 | |
3647 | }; | |
3648 | ||
3649 | /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice: | |
3650 | it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */ | |
3651 | static const uint8_t japaneseSCSU[]={ | |
3652 | 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, | |
3653 | 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, | |
3654 | 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, | |
3655 | 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, | |
3656 | 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, | |
3657 | 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, | |
3658 | 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, | |
3659 | 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, | |
3660 | 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, | |
3661 | 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, | |
3662 | 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, | |
3663 | 0xcb, 0x82 | |
3664 | }; | |
3665 | ||
3666 | static const uint16_t allFeaturesUTF16[]={ | |
3667 | 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff, | |
3668 | 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, | |
3669 | 0x01df, 0xf000, 0xdbff, 0xdfff | |
3670 | }; | |
3671 | ||
3672 | /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter | |
3673 | * result here (34B vs. 35B) | |
3674 | */ | |
3675 | static const uint8_t allFeaturesSCSU[]={ | |
3676 | 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03, | |
3677 | 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a, | |
3678 | 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13, | |
3679 | 0xdf, 0x14, 0x80, 0x15, 0xff | |
3680 | }; | |
3681 | static const uint16_t monkeyIn[]={ | |
3682 | 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, | |
3683 | 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, | |
3684 | 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, | |
3685 | 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, | |
3686 | 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, | |
3687 | 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, | |
3688 | 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, | |
3689 | 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, | |
3690 | 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, | |
3691 | 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, | |
3692 | 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, | |
3693 | 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, | |
3694 | 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, | |
3695 | 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, | |
3696 | 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, | |
3697 | 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, | |
3698 | 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, | |
3699 | 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, | |
3700 | 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A, | |
3701 | /* test non-BMP code points */ | |
3702 | 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, | |
3703 | 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, | |
3704 | 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, | |
3705 | 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, | |
3706 | 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, | |
3707 | 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, | |
3708 | 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, | |
3709 | 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, | |
3710 | 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, | |
3711 | 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF, | |
3712 | 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF, | |
3713 | ||
3714 | ||
3715 | 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, | |
3716 | 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, | |
3717 | 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, | |
3718 | 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, | |
3719 | 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, | |
3720 | }; | |
3721 | static const char *fTestCases [] = { | |
3722 | "\\ud800\\udc00", /* smallest surrogate*/ | |
3723 | "\\ud8ff\\udcff", | |
3724 | "\\udBff\\udFff", /* largest surrogate pair*/ | |
3725 | "\\ud834\\udc00", | |
3726 | "\\U0010FFFF", | |
3727 | "Hello \\u9292 \\u9192 World!", | |
3728 | "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!", | |
3729 | "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", | |
3730 | ||
3731 | "\\u0648\\u06c8", /* catch missing reset*/ | |
3732 | "\\u0648\\u06c8", | |
3733 | ||
3734 | "\\u4444\\uE001", /* lowest quotable*/ | |
3735 | "\\u4444\\uf2FF", /* highest quotable*/ | |
3736 | "\\u4444\\uf188\\u4444", | |
3737 | "\\u4444\\uf188\\uf288", | |
3738 | "\\u4444\\uf188abc\\u0429\\uf288", | |
3739 | "\\u9292\\u2222", | |
3740 | "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!", | |
3741 | "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", | |
3742 | "Hello World!123456", | |
3743 | "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/ | |
3744 | ||
3745 | "abc\\u0301\\u0302", /* uses SQn for u301 u302*/ | |
3746 | "abc\\u4411d", /* uses SQU*/ | |
3747 | "abc\\u4411\\u4412d",/* uses SCU*/ | |
3748 | "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/ | |
3749 | "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/ | |
3750 | "\\u9292\\u2222", | |
3751 | "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", | |
3752 | "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c", | |
3753 | "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002", | |
3754 | ||
3755 | "", /* empty input*/ | |
3756 | "\\u0000", /* smallest BMP character*/ | |
3757 | "\\uFFFF", /* largest BMP character*/ | |
3758 | ||
3759 | /* regression tests*/ | |
3760 | "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa", | |
3761 | "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff", | |
3762 | "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c", | |
3763 | "\\u0041\\u00df\\u0401\\u015f", | |
3764 | "\\u9066\\u2123abc", | |
3765 | "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5", | |
3766 | "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489", | |
3767 | }; | |
3768 | int i=0; | |
3769 | for(;i<UPRV_LENGTHOF(fTestCases);i++){ | |
3770 | const char* cSrc = fTestCases[i]; | |
3771 | UErrorCode status = U_ZERO_ERROR; | |
3772 | int32_t cSrcLen,srcLen; | |
3773 | UChar* src; | |
3774 | /* UConverter* cnv = ucnv_open("SCSU",&status); */ | |
3775 | cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]); | |
3776 | src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar)); | |
3777 | srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status); | |
3778 | log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i); | |
3779 | TestConv(src,srcLen,"SCSU","Coverage",NULL,0); | |
3780 | free(src); | |
3781 | } | |
3782 | TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); | |
3783 | TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); | |
3784 | TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU)); | |
3785 | TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU)); | |
3786 | TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU)); | |
3787 | TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU)); | |
3788 | TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0); | |
3789 | } | |
3790 | ||
3791 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
3792 | static void TestJitterbug2346(){ | |
3793 | char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a, | |
3794 | 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a}; | |
3795 | uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A}; | |
3796 | ||
3797 | UChar uTarget[500]={'\0'}; | |
3798 | UChar* utarget=uTarget; | |
3799 | UChar* utargetLimit=uTarget+sizeof(uTarget)/2; | |
3800 | ||
3801 | char cTarget[500]={'\0'}; | |
3802 | char* ctarget=cTarget; | |
3803 | char* ctargetLimit=cTarget+sizeof(cTarget); | |
3804 | const char* csource=source; | |
3805 | UChar* temp = expected; | |
3806 | UErrorCode err=U_ZERO_ERROR; | |
3807 | ||
3808 | UConverter* conv =ucnv_open("ISO_2022_JP",&err); | |
3809 | if(U_FAILURE(err)) { | |
3810 | log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); | |
3811 | return; | |
3812 | } | |
3813 | ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err); | |
3814 | if(U_FAILURE(err)) { | |
3815 | log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err)); | |
3816 | return; | |
3817 | } | |
3818 | utargetLimit=utarget; | |
3819 | utarget = uTarget; | |
3820 | while(utarget<utargetLimit){ | |
3821 | if(*temp!=*utarget){ | |
3822 | ||
3823 | log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ; | |
3824 | } | |
3825 | utarget++; | |
3826 | temp++; | |
3827 | } | |
3828 | ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err); | |
3829 | if(U_FAILURE(err)) { | |
3830 | log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err)); | |
3831 | return; | |
3832 | } | |
3833 | ctargetLimit=ctarget; | |
3834 | ctarget =cTarget; | |
3835 | ucnv_close(conv); | |
3836 | ||
3837 | ||
3838 | } | |
3839 | ||
3840 | static void | |
3841 | TestISO_2022_JP_1() { | |
3842 | /* test input */ | |
3843 | static const uint16_t in[]={ | |
3844 | 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A, | |
3845 | 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, | |
3846 | 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A, | |
3847 | 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, | |
3848 | 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A, | |
3849 | 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, | |
3850 | 0x201D, 0x000D, 0x000A, | |
3851 | 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, | |
3852 | 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A, | |
3853 | 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, | |
3854 | 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A, | |
3855 | 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A, | |
3856 | 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A | |
3857 | }; | |
3858 | const UChar* uSource; | |
3859 | const UChar* uSourceLimit; | |
3860 | const char* cSource; | |
3861 | const char* cSourceLimit; | |
3862 | UChar *uTargetLimit =NULL; | |
3863 | UChar *uTarget; | |
3864 | char *cTarget; | |
3865 | const char *cTargetLimit; | |
3866 | char *cBuf; | |
3867 | UChar *uBuf,*test; | |
3868 | int32_t uBufSize = 120; | |
3869 | UErrorCode errorCode=U_ZERO_ERROR; | |
3870 | UConverter *cnv; | |
3871 | ||
3872 | cnv=ucnv_open("ISO_2022_JP_1", &errorCode); | |
3873 | if(U_FAILURE(errorCode)) { | |
3874 | log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); | |
3875 | return; | |
3876 | } | |
3877 | ||
3878 | uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
3879 | cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); | |
3880 | uSource = (const UChar*)in; | |
3881 | uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in); | |
3882 | cTarget = cBuf; | |
3883 | cTargetLimit = cBuf +uBufSize*5; | |
3884 | uTarget = uBuf; | |
3885 | uTargetLimit = uBuf+ uBufSize*5; | |
3886 | ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode); | |
3887 | if(U_FAILURE(errorCode)){ | |
3888 | log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); | |
3889 | return; | |
3890 | } | |
3891 | cSource = cBuf; | |
3892 | cSourceLimit =cTarget; | |
3893 | test =uBuf; | |
3894 | ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode); | |
3895 | if(U_FAILURE(errorCode)){ | |
3896 | log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); | |
3897 | return; | |
3898 | } | |
3899 | uSource = (const UChar*)in; | |
3900 | while(uSource<uSourceLimit){ | |
3901 | if(*test!=*uSource){ | |
3902 | ||
3903 | log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; | |
3904 | } | |
3905 | uSource++; | |
3906 | test++; | |
3907 | } | |
3908 | /*ucnv_close(cnv); | |
3909 | cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/ | |
3910 | /*Test for the condition where there is an invalid character*/ | |
3911 | ucnv_reset(cnv); | |
3912 | { | |
3913 | static const uint8_t source2[]={0x0e,0x24,0x053}; | |
3914 | TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]"); | |
3915 | } | |
3916 | TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); | |
3917 | TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); | |
3918 | ucnv_close(cnv); | |
3919 | free(uBuf); | |
3920 | free(cBuf); | |
3921 | } | |
3922 | ||
3923 | static void | |
3924 | TestISO_2022_JP_2() { | |
3925 | /* test input */ | |
3926 | static const uint16_t in[]={ | |
3927 | 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, | |
3928 | 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, | |
3929 | 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, | |
3930 | 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, | |
3931 | 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, | |
3932 | 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, | |
3933 | 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, | |
3934 | 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, | |
3935 | 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, | |
3936 | 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, | |
3937 | 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, | |
3938 | 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, | |
3939 | 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, | |
3940 | 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, | |
3941 | 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, | |
3942 | 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, | |
3943 | 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, | |
3944 | 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, | |
3945 | 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A | |
3946 | }; | |
3947 | const UChar* uSource; | |
3948 | const UChar* uSourceLimit; | |
3949 | const char* cSource; | |
3950 | const char* cSourceLimit; | |
3951 | UChar *uTargetLimit =NULL; | |
3952 | UChar *uTarget; | |
3953 | char *cTarget; | |
3954 | const char *cTargetLimit; | |
3955 | char *cBuf = NULL; | |
3956 | UChar *uBuf = NULL; | |
3957 | UChar *test; | |
3958 | int32_t uBufSize = 120; | |
3959 | UErrorCode errorCode=U_ZERO_ERROR; | |
3960 | UConverter *cnv = NULL; | |
3961 | int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); | |
3962 | int32_t* myOff= offsets; | |
3963 | cnv=ucnv_open("ISO_2022_JP_2", &errorCode); | |
3964 | if(U_FAILURE(errorCode)) { | |
3965 | log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); | |
3966 | goto cleanup; | |
3967 | } | |
3968 | ||
3969 | uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
3970 | cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); | |
3971 | uSource = (const UChar*)in; | |
3972 | uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in); | |
3973 | cTarget = cBuf; | |
3974 | cTargetLimit = cBuf +uBufSize*5; | |
3975 | uTarget = uBuf; | |
3976 | uTargetLimit = uBuf+ uBufSize*5; | |
3977 | ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); | |
3978 | if(U_FAILURE(errorCode)){ | |
3979 | log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); | |
3980 | goto cleanup; | |
3981 | } | |
3982 | cSource = cBuf; | |
3983 | cSourceLimit =cTarget; | |
3984 | test =uBuf; | |
3985 | myOff=offsets; | |
3986 | ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); | |
3987 | if(U_FAILURE(errorCode)){ | |
3988 | log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); | |
3989 | goto cleanup; | |
3990 | } | |
3991 | uSource = (const UChar*)in; | |
3992 | while(uSource<uSourceLimit){ | |
3993 | if(*test!=*uSource){ | |
3994 | ||
3995 | log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; | |
3996 | } | |
3997 | uSource++; | |
3998 | test++; | |
3999 | } | |
4000 | TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); | |
4001 | TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); | |
4002 | TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); | |
4003 | /*Test for the condition where there is an invalid character*/ | |
4004 | ucnv_reset(cnv); | |
4005 | { | |
4006 | static const uint8_t source2[]={0x0e,0x24,0x053}; | |
4007 | TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]"); | |
4008 | } | |
4009 | ||
4010 | cleanup: | |
4011 | ucnv_close(cnv); | |
4012 | free(uBuf); | |
4013 | free(cBuf); | |
4014 | free(offsets); | |
4015 | } | |
4016 | ||
4017 | static void | |
4018 | TestISO_2022_KR() { | |
4019 | /* test input */ | |
4020 | static const uint16_t in[]={ | |
4021 | 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D | |
4022 | ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04 | |
4023 | ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029 | |
4024 | ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB | |
4025 | ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2 | |
4026 | ,0x53E3,0x53E4,0x000A,0x000D}; | |
4027 | const UChar* uSource; | |
4028 | const UChar* uSourceLimit; | |
4029 | const char* cSource; | |
4030 | const char* cSourceLimit; | |
4031 | UChar *uTargetLimit =NULL; | |
4032 | UChar *uTarget; | |
4033 | char *cTarget; | |
4034 | const char *cTargetLimit; | |
4035 | char *cBuf = NULL; | |
4036 | UChar *uBuf = NULL; | |
4037 | UChar *test; | |
4038 | int32_t uBufSize = 120; | |
4039 | UErrorCode errorCode=U_ZERO_ERROR; | |
4040 | UConverter *cnv = NULL; | |
4041 | int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); | |
4042 | int32_t* myOff= offsets; | |
4043 | cnv=ucnv_open("ISO_2022,locale=kr", &errorCode); | |
4044 | if(U_FAILURE(errorCode)) { | |
4045 | log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); | |
4046 | goto cleanup; | |
4047 | } | |
4048 | ||
4049 | uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
4050 | cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); | |
4051 | uSource = (const UChar*)in; | |
4052 | uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in); | |
4053 | cTarget = cBuf; | |
4054 | cTargetLimit = cBuf +uBufSize*5; | |
4055 | uTarget = uBuf; | |
4056 | uTargetLimit = uBuf+ uBufSize*5; | |
4057 | ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); | |
4058 | if(U_FAILURE(errorCode)){ | |
4059 | log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); | |
4060 | goto cleanup; | |
4061 | } | |
4062 | cSource = cBuf; | |
4063 | cSourceLimit =cTarget; | |
4064 | test =uBuf; | |
4065 | myOff=offsets; | |
4066 | ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); | |
4067 | if(U_FAILURE(errorCode)){ | |
4068 | log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); | |
4069 | goto cleanup; | |
4070 | } | |
4071 | uSource = (const UChar*)in; | |
4072 | while(uSource<uSourceLimit){ | |
4073 | if(*test!=*uSource){ | |
4074 | log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; | |
4075 | } | |
4076 | uSource++; | |
4077 | test++; | |
4078 | } | |
4079 | TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); | |
4080 | TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); | |
4081 | TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); | |
4082 | TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); | |
4083 | TestJitterbug930("csISO2022KR"); | |
4084 | /*Test for the condition where there is an invalid character*/ | |
4085 | ucnv_reset(cnv); | |
4086 | { | |
4087 | static const uint8_t source2[]={0x1b,0x24,0x053}; | |
4088 | ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); | |
4089 | TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); | |
4090 | } | |
4091 | ||
4092 | cleanup: | |
4093 | ucnv_close(cnv); | |
4094 | free(uBuf); | |
4095 | free(cBuf); | |
4096 | free(offsets); | |
4097 | } | |
4098 | ||
4099 | static void | |
4100 | TestISO_2022_KR_1() { | |
4101 | /* test input */ | |
4102 | static const uint16_t in[]={ | |
4103 | 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D | |
4104 | ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04 | |
4105 | ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029 | |
4106 | ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB | |
4107 | ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2 | |
4108 | ,0x53E3,0x53E4,0x000A,0x000D}; | |
4109 | const UChar* uSource; | |
4110 | const UChar* uSourceLimit; | |
4111 | const char* cSource; | |
4112 | const char* cSourceLimit; | |
4113 | UChar *uTargetLimit =NULL; | |
4114 | UChar *uTarget; | |
4115 | char *cTarget; | |
4116 | const char *cTargetLimit; | |
4117 | char *cBuf = NULL; | |
4118 | UChar *uBuf = NULL; | |
4119 | UChar *test; | |
4120 | int32_t uBufSize = 120; | |
4121 | UErrorCode errorCode=U_ZERO_ERROR; | |
4122 | UConverter *cnv = NULL; | |
4123 | int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); | |
4124 | int32_t* myOff= offsets; | |
4125 | cnv=ucnv_open("ibm-25546", &errorCode); | |
4126 | if(U_FAILURE(errorCode)) { | |
4127 | log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); | |
4128 | goto cleanup; | |
4129 | } | |
4130 | ||
4131 | uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
4132 | cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); | |
4133 | uSource = (const UChar*)in; | |
4134 | uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in); | |
4135 | cTarget = cBuf; | |
4136 | cTargetLimit = cBuf +uBufSize*5; | |
4137 | uTarget = uBuf; | |
4138 | uTargetLimit = uBuf+ uBufSize*5; | |
4139 | ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); | |
4140 | if(U_FAILURE(errorCode)){ | |
4141 | log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); | |
4142 | goto cleanup; | |
4143 | } | |
4144 | cSource = cBuf; | |
4145 | cSourceLimit =cTarget; | |
4146 | test =uBuf; | |
4147 | myOff=offsets; | |
4148 | ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); | |
4149 | if(U_FAILURE(errorCode)){ | |
4150 | log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); | |
4151 | goto cleanup; | |
4152 | } | |
4153 | uSource = (const UChar*)in; | |
4154 | while(uSource<uSourceLimit){ | |
4155 | if(*test!=*uSource){ | |
4156 | log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; | |
4157 | } | |
4158 | uSource++; | |
4159 | test++; | |
4160 | } | |
4161 | ucnv_reset(cnv); | |
4162 | TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); | |
4163 | TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); | |
4164 | TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); | |
4165 | ucnv_reset(cnv); | |
4166 | TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); | |
4167 | /*Test for the condition where there is an invalid character*/ | |
4168 | ucnv_reset(cnv); | |
4169 | { | |
4170 | static const uint8_t source2[]={0x1b,0x24,0x053}; | |
4171 | ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); | |
4172 | TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); | |
4173 | } | |
4174 | ||
4175 | cleanup: | |
4176 | ucnv_close(cnv); | |
4177 | free(uBuf); | |
4178 | free(cBuf); | |
4179 | free(offsets); | |
4180 | } | |
4181 | ||
4182 | static void TestJitterbug2411(){ | |
4183 | static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A" | |
4184 | "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43"; | |
4185 | UConverter* kr=NULL, *kr1=NULL; | |
4186 | UErrorCode errorCode = U_ZERO_ERROR; | |
4187 | UChar tgt[100]={'\0'}; | |
4188 | UChar* target = tgt; | |
4189 | UChar* targetLimit = target+100; | |
4190 | kr=ucnv_open("iso-2022-kr", &errorCode); | |
4191 | if(U_FAILURE(errorCode)) { | |
4192 | log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode)); | |
4193 | return; | |
4194 | } | |
4195 | ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode); | |
4196 | if(U_FAILURE(errorCode)) { | |
4197 | log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode)); | |
4198 | return; | |
4199 | } | |
4200 | kr1 = ucnv_open("ibm-25546", &errorCode); | |
4201 | if(U_FAILURE(errorCode)) { | |
4202 | log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode)); | |
4203 | return; | |
4204 | } | |
4205 | target = tgt; | |
4206 | targetLimit = target+100; | |
4207 | ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode); | |
4208 | ||
4209 | if(U_FAILURE(errorCode)) { | |
4210 | log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode)); | |
4211 | return; | |
4212 | } | |
4213 | ||
4214 | ucnv_close(kr); | |
4215 | ucnv_close(kr1); | |
4216 | ||
4217 | } | |
4218 | ||
4219 | static void | |
4220 | TestJIS(){ | |
4221 | /* From Unicode moved to testdata/conversion.txt */ | |
4222 | /*To Unicode*/ | |
4223 | { | |
4224 | static const uint8_t sampleTextJIS[] = { | |
4225 | 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/ | |
4226 | 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ | |
4227 | 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/ | |
4228 | }; | |
4229 | static const uint16_t expectedISO2022JIS[] = { | |
4230 | 0x0041, 0x0042, | |
4231 | 0xFF81, 0xFF82, | |
4232 | 0x3000 | |
4233 | }; | |
4234 | static const int32_t toISO2022JISOffs[]={ | |
4235 | 3,4, | |
4236 | 8,9, | |
4237 | 16 | |
4238 | }; | |
4239 | ||
4240 | static const uint8_t sampleTextJIS7[] = { | |
4241 | 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/ | |
4242 | 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ | |
4243 | 0x1b,0x24,0x42,0x21,0x21, | |
4244 | 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */ | |
4245 | 0x21,0x22, | |
4246 | 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/ | |
4247 | }; | |
4248 | static const uint16_t expectedISO2022JIS7[] = { | |
4249 | 0x0041, 0x0042, | |
4250 | 0xFF81, 0xFF82, | |
4251 | 0x3000, | |
4252 | 0xFF81, 0xFF82, | |
4253 | 0x3001, | |
4254 | 0x3000 | |
4255 | }; | |
4256 | static const int32_t toISO2022JIS7Offs[]={ | |
4257 | 3,4, | |
4258 | 8,9, | |
4259 | 13,16, | |
4260 | 17, | |
4261 | 19,27 | |
4262 | }; | |
4263 | static const uint8_t sampleTextJIS8[] = { | |
4264 | 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/ | |
4265 | 0xa1,0xc8,0xd9,/*Katakana Set*/ | |
4266 | 0x1b,0x28,0x42, | |
4267 | 0x41,0x42, | |
4268 | 0xb1,0xc3, /*Katakana Set*/ | |
4269 | 0x1b,0x24,0x42,0x21,0x21 | |
4270 | }; | |
4271 | static const uint16_t expectedISO2022JIS8[] = { | |
4272 | 0x0041, 0x0042, | |
4273 | 0xff61, 0xff88, 0xff99, | |
4274 | 0x0041, 0x0042, | |
4275 | 0xff71, 0xff83, | |
4276 | 0x3000 | |
4277 | }; | |
4278 | static const int32_t toISO2022JIS8Offs[]={ | |
4279 | 3, 4, 5, 6, | |
4280 | 7, 11, 12, 13, | |
4281 | 14, 18, | |
4282 | }; | |
4283 | ||
4284 | testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS, | |
4285 | UPRV_LENGTHOF(expectedISO2022JIS),"JIS", toISO2022JISOffs,TRUE); | |
4286 | testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7, | |
4287 | UPRV_LENGTHOF(expectedISO2022JIS7),"JIS7", toISO2022JIS7Offs,TRUE); | |
4288 | testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8, | |
4289 | UPRV_LENGTHOF(expectedISO2022JIS8),"JIS8", toISO2022JIS8Offs,TRUE); | |
4290 | } | |
4291 | ||
4292 | } | |
4293 | ||
4294 | ||
4295 | #if 0 | |
4296 | ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 | |
4297 | ||
4298 | static void TestJitterbug915(){ | |
4299 | /* tests for roundtripping of the below sequence | |
4300 | \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * / | |
4301 | \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * / | |
4302 | \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * / | |
4303 | \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * / | |
4304 | \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * / | |
4305 | \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * / | |
4306 | \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * / | |
4307 | */ | |
4308 | static const char cSource[]={ | |
4309 | 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, | |
4310 | 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, | |
4311 | 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, | |
4312 | 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, | |
4313 | 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, | |
4314 | 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, | |
4315 | 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70, | |
4316 | 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, | |
4317 | 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, | |
4318 | 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, | |
4319 | 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61, | |
4320 | 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, | |
4321 | 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, | |
4322 | 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, | |
4323 | 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, | |
4324 | 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, | |
4325 | 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, | |
4326 | 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, | |
4327 | 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, | |
4328 | 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, | |
4329 | 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, | |
4330 | 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, | |
4331 | 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, | |
4332 | 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, | |
4333 | 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, | |
4334 | 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, | |
4335 | 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, | |
4336 | 0x37, 0x20, 0x2A, 0x2F | |
4337 | }; | |
4338 | UChar uTarget[500]={'\0'}; | |
4339 | UChar* utarget=uTarget; | |
4340 | UChar* utargetLimit=uTarget+sizeof(uTarget)/2; | |
4341 | ||
4342 | char cTarget[500]={'\0'}; | |
4343 | char* ctarget=cTarget; | |
4344 | char* ctargetLimit=cTarget+sizeof(cTarget); | |
4345 | const char* csource=cSource; | |
4346 | const char* tempSrc = cSource; | |
4347 | UErrorCode err=U_ZERO_ERROR; | |
4348 | ||
4349 | UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err); | |
4350 | if(U_FAILURE(err)) { | |
4351 | log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); | |
4352 | return; | |
4353 | } | |
4354 | ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err); | |
4355 | if(U_FAILURE(err)) { | |
4356 | log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err)); | |
4357 | return; | |
4358 | } | |
4359 | utargetLimit=utarget; | |
4360 | utarget = uTarget; | |
4361 | ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err); | |
4362 | if(U_FAILURE(err)) { | |
4363 | log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err)); | |
4364 | return; | |
4365 | } | |
4366 | ctargetLimit=ctarget; | |
4367 | ctarget =cTarget; | |
4368 | while(ctarget<ctargetLimit){ | |
4369 | if(*ctarget != *tempSrc){ | |
4370 | log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ; | |
4371 | } | |
4372 | ++ctarget; | |
4373 | ++tempSrc; | |
4374 | } | |
4375 | ||
4376 | ucnv_close(conv); | |
4377 | } | |
4378 | ||
4379 | static void | |
4380 | TestISO_2022_CN_EXT() { | |
4381 | /* test input */ | |
4382 | static const uint16_t in[]={ | |
4383 | /* test Non-BMP code points */ | |
4384 | 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, | |
4385 | 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, | |
4386 | 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, | |
4387 | 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, | |
4388 | 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, | |
4389 | 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, | |
4390 | 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, | |
4391 | 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, | |
4392 | 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, | |
4393 | 0xD869, 0xDED5, | |
4394 | ||
4395 | 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, | |
4396 | 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, | |
4397 | 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, | |
4398 | 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, | |
4399 | 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, | |
4400 | 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A, | |
4401 | 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A, | |
4402 | 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A, | |
4403 | 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, | |
4404 | 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A, | |
4405 | 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A, | |
4406 | 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A, | |
4407 | 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A, | |
4408 | 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A, | |
4409 | 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, | |
4410 | 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A, | |
4411 | 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A, | |
4412 | 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A, | |
4413 | ||
4414 | 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A | |
4415 | ||
4416 | }; | |
4417 | ||
4418 | const UChar* uSource; | |
4419 | const UChar* uSourceLimit; | |
4420 | const char* cSource; | |
4421 | const char* cSourceLimit; | |
4422 | UChar *uTargetLimit =NULL; | |
4423 | UChar *uTarget; | |
4424 | char *cTarget; | |
4425 | const char *cTargetLimit; | |
4426 | char *cBuf = NULL; | |
4427 | UChar *uBuf = NULL; | |
4428 | UChar *test; | |
4429 | int32_t uBufSize = 180; | |
4430 | UErrorCode errorCode=U_ZERO_ERROR; | |
4431 | UConverter *cnv = NULL; | |
4432 | int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); | |
4433 | int32_t* myOff= offsets; | |
4434 | cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode); | |
4435 | if(U_FAILURE(errorCode)) { | |
4436 | log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); | |
4437 | goto cleanup; | |
4438 | } | |
4439 | ||
4440 | uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
4441 | cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); | |
4442 | uSource = (const UChar*)in; | |
4443 | uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in); | |
4444 | cTarget = cBuf; | |
4445 | cTargetLimit = cBuf +uBufSize*5; | |
4446 | uTarget = uBuf; | |
4447 | uTargetLimit = uBuf+ uBufSize*5; | |
4448 | ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); | |
4449 | if(U_FAILURE(errorCode)){ | |
4450 | log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); | |
4451 | goto cleanup; | |
4452 | } | |
4453 | cSource = cBuf; | |
4454 | cSourceLimit =cTarget; | |
4455 | test =uBuf; | |
4456 | myOff=offsets; | |
4457 | ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); | |
4458 | if(U_FAILURE(errorCode)){ | |
4459 | log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); | |
4460 | goto cleanup; | |
4461 | } | |
4462 | uSource = (const UChar*)in; | |
4463 | while(uSource<uSourceLimit){ | |
4464 | if(*test!=*uSource){ | |
4465 | log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; | |
4466 | } | |
4467 | else{ | |
4468 | log_verbose(" Got: \\u%04X\n",(int)*test) ; | |
4469 | } | |
4470 | uSource++; | |
4471 | test++; | |
4472 | } | |
4473 | TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); | |
4474 | TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); | |
4475 | /*Test for the condition where there is an invalid character*/ | |
4476 | ucnv_reset(cnv); | |
4477 | { | |
4478 | static const uint8_t source2[]={0x0e,0x24,0x053}; | |
4479 | TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]"); | |
4480 | } | |
4481 | ||
4482 | cleanup: | |
4483 | ucnv_close(cnv); | |
4484 | free(uBuf); | |
4485 | free(cBuf); | |
4486 | free(offsets); | |
4487 | } | |
4488 | #endif | |
4489 | ||
4490 | static void | |
4491 | TestISO_2022_CN() { | |
4492 | /* test input */ | |
4493 | static const uint16_t in[]={ | |
4494 | /* jitterbug 951 */ | |
4495 | 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52, | |
4496 | 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17, | |
4497 | 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52, | |
4498 | 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45, | |
4499 | 0x0020, 0x0045, 0x004e, 0x0044, | |
4500 | /**/ | |
4501 | 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A, | |
4502 | 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A, | |
4503 | 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A, | |
4504 | 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, | |
4505 | 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, | |
4506 | 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A, | |
4507 | 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A, | |
4508 | 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, | |
4509 | 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A, | |
4510 | 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A, | |
4511 | 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A, | |
4512 | 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A, | |
4513 | 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A, | |
4514 | 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A, | |
4515 | 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A, | |
4516 | 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486, | |
4517 | 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A, | |
4518 | ||
4519 | }; | |
4520 | const UChar* uSource; | |
4521 | const UChar* uSourceLimit; | |
4522 | const char* cSource; | |
4523 | const char* cSourceLimit; | |
4524 | UChar *uTargetLimit =NULL; | |
4525 | UChar *uTarget; | |
4526 | char *cTarget; | |
4527 | const char *cTargetLimit; | |
4528 | char *cBuf = NULL; | |
4529 | UChar *uBuf = NULL; | |
4530 | UChar *test; | |
4531 | int32_t uBufSize = 180; | |
4532 | UErrorCode errorCode=U_ZERO_ERROR; | |
4533 | UConverter *cnv = NULL; | |
4534 | int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); | |
4535 | int32_t* myOff= offsets; | |
4536 | cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode); | |
4537 | if(U_FAILURE(errorCode)) { | |
4538 | log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); | |
4539 | goto cleanup; | |
4540 | } | |
4541 | ||
4542 | uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
4543 | cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); | |
4544 | uSource = (const UChar*)in; | |
4545 | uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in); | |
4546 | cTarget = cBuf; | |
4547 | cTargetLimit = cBuf +uBufSize*5; | |
4548 | uTarget = uBuf; | |
4549 | uTargetLimit = uBuf+ uBufSize*5; | |
4550 | ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); | |
4551 | if(U_FAILURE(errorCode)){ | |
4552 | log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); | |
4553 | goto cleanup; | |
4554 | } | |
4555 | cSource = cBuf; | |
4556 | cSourceLimit =cTarget; | |
4557 | test =uBuf; | |
4558 | myOff=offsets; | |
4559 | ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); | |
4560 | if(U_FAILURE(errorCode)){ | |
4561 | log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); | |
4562 | goto cleanup; | |
4563 | } | |
4564 | uSource = (const UChar*)in; | |
4565 | while(uSource<uSourceLimit){ | |
4566 | if(*test!=*uSource){ | |
4567 | log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; | |
4568 | } | |
4569 | else{ | |
4570 | log_verbose(" Got: \\u%04X\n",(int)*test) ; | |
4571 | } | |
4572 | uSource++; | |
4573 | test++; | |
4574 | } | |
4575 | TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding"); | |
4576 | TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); | |
4577 | TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); | |
4578 | TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); | |
4579 | TestJitterbug930("csISO2022CN"); | |
4580 | /*Test for the condition where there is an invalid character*/ | |
4581 | ucnv_reset(cnv); | |
4582 | { | |
4583 | static const uint8_t source2[]={0x0e,0x24,0x053}; | |
4584 | TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]"); | |
4585 | } | |
4586 | ||
4587 | cleanup: | |
4588 | ucnv_close(cnv); | |
4589 | free(uBuf); | |
4590 | free(cBuf); | |
4591 | free(offsets); | |
4592 | } | |
4593 | ||
4594 | /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */ | |
4595 | typedef struct { | |
4596 | const char * converterName; | |
4597 | const char * inputText; | |
4598 | int inputTextLength; | |
4599 | } EmptySegmentTest; | |
4600 | ||
4601 | /* Callback for TestJitterbug6175, should only get called for empty segment errors */ | |
4602 | static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits, | |
4603 | int32_t length, UConverterCallbackReason reason, UErrorCode * err ) { | |
4604 | // suppress compiler warnings about unused variables | |
4605 | (void)context; | |
4606 | (void)codeUnits; | |
4607 | (void)length; | |
4608 | if (reason > UCNV_IRREGULAR) { | |
4609 | return; | |
4610 | } | |
4611 | if (reason != UCNV_IRREGULAR) { | |
4612 | log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n"); | |
4613 | } | |
4614 | /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */ | |
4615 | *err = U_ZERO_ERROR; | |
4616 | ucnv_cbToUWriteSub(toArgs,0,err); | |
4617 | } | |
4618 | ||
4619 | enum { kEmptySegmentToUCharsMax = 64 }; | |
4620 | static void TestJitterbug6175(void) { | |
4621 | static const char iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A }; | |
4622 | static const char iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A }; | |
4623 | static const char iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A }; | |
4624 | static const char iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A }; | |
4625 | static const char hzGB2312_a[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 }; | |
4626 | static const EmptySegmentTest emptySegmentTests[] = { | |
4627 | /* converterName inputText inputTextLength */ | |
4628 | { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) }, | |
4629 | { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) }, | |
4630 | { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) }, | |
4631 | { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) }, | |
4632 | { "HZ-GB-2312", hzGB2312_a, sizeof(hzGB2312_a) }, | |
4633 | /* terminator: */ | |
4634 | { NULL, NULL, 0, } | |
4635 | }; | |
4636 | const EmptySegmentTest * testPtr; | |
4637 | for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) { | |
4638 | UErrorCode err = U_ZERO_ERROR; | |
4639 | UConverter * cnv = ucnv_open(testPtr->converterName, &err); | |
4640 | if (U_FAILURE(err)) { | |
4641 | log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err)); | |
4642 | return; | |
4643 | } | |
4644 | ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err); | |
4645 | if (U_FAILURE(err)) { | |
4646 | log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err)); | |
4647 | ucnv_close(cnv); | |
4648 | return; | |
4649 | } | |
4650 | { | |
4651 | UChar toUChars[kEmptySegmentToUCharsMax]; | |
4652 | UChar * toUCharsPtr = toUChars; | |
4653 | const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax; | |
4654 | const char * inCharsPtr = testPtr->inputText; | |
4655 | const char * inCharsLimit = inCharsPtr + testPtr->inputTextLength; | |
4656 | ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err); | |
4657 | } | |
4658 | ucnv_close(cnv); | |
4659 | } | |
4660 | } | |
4661 | ||
4662 | static void | |
4663 | TestEBCDIC_STATEFUL() { | |
4664 | /* test input */ | |
4665 | static const uint8_t in[]={ | |
4666 | 0x61, | |
4667 | 0x1a, | |
4668 | 0x0f, 0x4b, | |
4669 | 0x42, | |
4670 | 0x40, | |
4671 | 0x36, | |
4672 | }; | |
4673 | ||
4674 | /* expected test results */ | |
4675 | static const int32_t results[]={ | |
4676 | /* number of bytes read, code point */ | |
4677 | 1, 0x002f, | |
4678 | 1, 0x0092, | |
4679 | 2, 0x002e, | |
4680 | 1, 0xff62, | |
4681 | 1, 0x0020, | |
4682 | 1, 0x0096, | |
4683 | ||
4684 | }; | |
4685 | static const uint8_t in2[]={ | |
4686 | 0x0f, | |
4687 | 0xa1, | |
4688 | 0x01 | |
4689 | }; | |
4690 | ||
4691 | /* expected test results */ | |
4692 | static const int32_t results2[]={ | |
4693 | /* number of bytes read, code point */ | |
4694 | 2, 0x203E, | |
4695 | 1, 0x0001, | |
4696 | }; | |
4697 | ||
4698 | const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
4699 | UErrorCode errorCode=U_ZERO_ERROR; | |
4700 | UConverter *cnv=ucnv_open("ibm-930", &errorCode); | |
4701 | if(U_FAILURE(errorCode)) { | |
4702 | log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode)); | |
4703 | return; | |
4704 | } | |
4705 | TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)"); | |
4706 | ucnv_reset(cnv); | |
4707 | /* Test the condition when source >= sourceLimit */ | |
4708 | TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); | |
4709 | ucnv_reset(cnv); | |
4710 | /*Test for the condition where source > sourcelimit after consuming the shift chracter */ | |
4711 | { | |
4712 | static const uint8_t source1[]={0x0f}; | |
4713 | TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated"); | |
4714 | } | |
4715 | /*Test for the condition where there is an invalid character*/ | |
4716 | ucnv_reset(cnv); | |
4717 | { | |
4718 | static const uint8_t source2[]={0x0e, 0x7F, 0xFF}; | |
4719 | TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]"); | |
4720 | } | |
4721 | ucnv_reset(cnv); | |
4722 | source=(const char*)in2; | |
4723 | limit=(const char*)in2+sizeof(in2); | |
4724 | TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2"); | |
4725 | ucnv_close(cnv); | |
4726 | ||
4727 | } | |
4728 | ||
4729 | static void | |
4730 | TestGB18030() { | |
4731 | /* test input */ | |
4732 | static const uint8_t in[]={ | |
4733 | 0x24, | |
4734 | 0x7f, | |
4735 | 0x81, 0x30, 0x81, 0x30, | |
4736 | 0xa8, 0xbf, | |
4737 | 0xa2, 0xe3, | |
4738 | 0xd2, 0xbb, | |
4739 | 0x82, 0x35, 0x8f, 0x33, | |
4740 | 0x84, 0x31, 0xa4, 0x39, | |
4741 | 0x90, 0x30, 0x81, 0x30, | |
4742 | 0xe3, 0x32, 0x9a, 0x35 | |
4743 | #if 0 | |
4744 | /* | |
4745 | * Feature removed markus 2000-oct-26 | |
4746 | * Only some codepages must match surrogate pairs into supplementary code points - | |
4747 | * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c . | |
4748 | * GB 18030 provides direct encodings for supplementary code points, therefore | |
4749 | * it must not combine two single-encoded surrogates into one code point. | |
4750 | */ | |
4751 | 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */ | |
4752 | #endif | |
4753 | }; | |
4754 | ||
4755 | /* expected test results */ | |
4756 | static const int32_t results[]={ | |
4757 | /* number of bytes read, code point */ | |
4758 | 1, 0x24, | |
4759 | 1, 0x7f, | |
4760 | 4, 0x80, | |
4761 | 2, 0x1f9, | |
4762 | 2, 0x20ac, | |
4763 | 2, 0x4e00, | |
4764 | 4, 0x9fa6, | |
4765 | 4, 0xffff, | |
4766 | 4, 0x10000, | |
4767 | 4, 0x10ffff | |
4768 | #if 0 | |
4769 | /* Feature removed. See comment above. */ | |
4770 | 8, 0x10000 | |
4771 | #endif | |
4772 | }; | |
4773 | ||
4774 | /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */ | |
4775 | UErrorCode errorCode=U_ZERO_ERROR; | |
4776 | UConverter *cnv=ucnv_open("gb18030", &errorCode); | |
4777 | if(U_FAILURE(errorCode)) { | |
4778 | log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode)); | |
4779 | return; | |
4780 | } | |
4781 | TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030"); | |
4782 | ucnv_close(cnv); | |
4783 | } | |
4784 | ||
4785 | static void | |
4786 | TestLMBCS() { | |
4787 | /* LMBCS-1 string */ | |
4788 | static const uint8_t pszLMBCS[]={ | |
4789 | 0x61, | |
4790 | 0x01, 0x29, | |
4791 | 0x81, | |
4792 | 0xA0, | |
4793 | 0x0F, 0x27, | |
4794 | 0x0F, 0x91, | |
4795 | 0x14, 0x0a, 0x74, | |
4796 | 0x14, 0xF6, 0x02, | |
4797 | 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */ | |
4798 | 0x10, 0x88, 0xA0, | |
4799 | }; | |
4800 | ||
4801 | /* Unicode UChar32 equivalents */ | |
4802 | static const UChar32 pszUnicode32[]={ | |
4803 | /* code point */ | |
4804 | 0x00000061, | |
4805 | 0x00002013, | |
4806 | 0x000000FC, | |
4807 | 0x000000E1, | |
4808 | 0x00000007, | |
4809 | 0x00000091, | |
4810 | 0x00000a74, | |
4811 | 0x00000200, | |
4812 | 0x00023456, /* code point for surrogate pair */ | |
4813 | 0x00005516 | |
4814 | }; | |
4815 | ||
4816 | /* Unicode UChar equivalents */ | |
4817 | static const UChar pszUnicode[]={ | |
4818 | /* code point */ | |
4819 | 0x0061, | |
4820 | 0x2013, | |
4821 | 0x00FC, | |
4822 | 0x00E1, | |
4823 | 0x0007, | |
4824 | 0x0091, | |
4825 | 0x0a74, | |
4826 | 0x0200, | |
4827 | 0xD84D, /* low surrogate */ | |
4828 | 0xDC56, /* high surrogate */ | |
4829 | 0x5516 | |
4830 | }; | |
4831 | ||
4832 | /* expected test results */ | |
4833 | static const int offsets32[]={ | |
4834 | /* number of bytes read, code point */ | |
4835 | 0, | |
4836 | 1, | |
4837 | 3, | |
4838 | 4, | |
4839 | 5, | |
4840 | 7, | |
4841 | 9, | |
4842 | 12, | |
4843 | 15, | |
4844 | 21, | |
4845 | 24 | |
4846 | }; | |
4847 | ||
4848 | /* expected test results */ | |
4849 | static const int offsets[]={ | |
4850 | /* number of bytes read, code point */ | |
4851 | 0, | |
4852 | 1, | |
4853 | 3, | |
4854 | 4, | |
4855 | 5, | |
4856 | 7, | |
4857 | 9, | |
4858 | 12, | |
4859 | 15, | |
4860 | 18, | |
4861 | 21, | |
4862 | 24 | |
4863 | }; | |
4864 | ||
4865 | ||
4866 | UConverter *cnv; | |
4867 | ||
4868 | #define NAME_LMBCS_1 "LMBCS-1" | |
4869 | #define NAME_LMBCS_2 "LMBCS-2" | |
4870 | ||
4871 | ||
4872 | /* Some basic open/close/property tests on some LMBCS converters */ | |
4873 | { | |
4874 | ||
4875 | char expected_subchars[] = {0x3F}; /* ANSI Question Mark */ | |
4876 | char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/ | |
4877 | char get_subchars [1]; | |
4878 | const char * get_name; | |
4879 | UConverter *cnv1; | |
4880 | UConverter *cnv2; | |
4881 | ||
4882 | int8_t len = sizeof(get_subchars); | |
4883 | ||
4884 | UErrorCode errorCode=U_ZERO_ERROR; | |
4885 | ||
4886 | /* Open */ | |
4887 | cnv1=ucnv_open(NAME_LMBCS_1, &errorCode); | |
4888 | if(U_FAILURE(errorCode)) { | |
4889 | log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); | |
4890 | return; | |
4891 | } | |
4892 | cnv2=ucnv_open(NAME_LMBCS_2, &errorCode); | |
4893 | if(U_FAILURE(errorCode)) { | |
4894 | log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode)); | |
4895 | return; | |
4896 | } | |
4897 | ||
4898 | /* Name */ | |
4899 | get_name = ucnv_getName (cnv1, &errorCode); | |
4900 | if (strcmp(NAME_LMBCS_1,get_name)){ | |
4901 | log_err("Unexpected converter name: %s\n", get_name); | |
4902 | } | |
4903 | get_name = ucnv_getName (cnv2, &errorCode); | |
4904 | if (strcmp(NAME_LMBCS_2,get_name)){ | |
4905 | log_err("Unexpected converter name: %s\n", get_name); | |
4906 | } | |
4907 | ||
4908 | /* substitution chars */ | |
4909 | ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode); | |
4910 | if(U_FAILURE(errorCode)) { | |
4911 | log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); | |
4912 | } | |
4913 | if (len!=1){ | |
4914 | log_err("Unexpected length of sub chars\n"); | |
4915 | } | |
4916 | if (get_subchars[0] != expected_subchars[0]){ | |
4917 | log_err("Unexpected value of sub chars\n"); | |
4918 | } | |
4919 | ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode); | |
4920 | if(U_FAILURE(errorCode)) { | |
4921 | log_err("Failure on set subst chars: %s\n", u_errorName(errorCode)); | |
4922 | } | |
4923 | ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode); | |
4924 | if(U_FAILURE(errorCode)) { | |
4925 | log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); | |
4926 | } | |
4927 | if (len!=1){ | |
4928 | log_err("Unexpected length of sub chars\n"); | |
4929 | } | |
4930 | if (get_subchars[0] != new_subchars[0]){ | |
4931 | log_err("Unexpected value of sub chars\n"); | |
4932 | } | |
4933 | ucnv_close(cnv1); | |
4934 | ucnv_close(cnv2); | |
4935 | ||
4936 | } | |
4937 | ||
4938 | /* LMBCS to Unicode - offsets */ | |
4939 | { | |
4940 | UErrorCode errorCode=U_ZERO_ERROR; | |
4941 | ||
4942 | const char * pSource = (const char *)pszLMBCS; | |
4943 | const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); | |
4944 | ||
4945 | UChar Out [sizeof(pszUnicode) + 1]; | |
4946 | UChar * pOut = Out; | |
4947 | UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode); | |
4948 | ||
4949 | int32_t off [sizeof(offsets)]; | |
4950 | ||
4951 | /* last 'offset' in expected results is just the final size. | |
4952 | (Makes other tests easier). Compensate here: */ | |
4953 | ||
4954 | off[UPRV_LENGTHOF(offsets)-1] = sizeof(pszLMBCS); | |
4955 | ||
4956 | ||
4957 | ||
4958 | cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */ | |
4959 | if(U_FAILURE(errorCode)) { | |
4960 | log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode)); | |
4961 | return; | |
4962 | } | |
4963 | ||
4964 | ||
4965 | ||
4966 | ucnv_toUnicode (cnv, | |
4967 | &pOut, | |
4968 | OutLimit, | |
4969 | &pSource, | |
4970 | sourceLimit, | |
4971 | off, | |
4972 | TRUE, | |
4973 | &errorCode); | |
4974 | ||
4975 | ||
4976 | if (memcmp(off,offsets,sizeof(offsets))) | |
4977 | { | |
4978 | log_err("LMBCS->Uni: Calculated offsets do not match expected results\n"); | |
4979 | } | |
4980 | if (memcmp(Out,pszUnicode,sizeof(pszUnicode))) | |
4981 | { | |
4982 | log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n"); | |
4983 | } | |
4984 | ucnv_close(cnv); | |
4985 | } | |
4986 | { | |
4987 | /* LMBCS to Unicode - getNextUChar */ | |
4988 | const char * sourceStart; | |
4989 | const char *source=(const char *)pszLMBCS; | |
4990 | const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS); | |
4991 | const UChar32 *results= pszUnicode32; | |
4992 | const int *off = offsets32; | |
4993 | ||
4994 | UErrorCode errorCode=U_ZERO_ERROR; | |
4995 | UChar32 uniChar; | |
4996 | ||
4997 | cnv=ucnv_open("LMBCS-1", &errorCode); | |
4998 | if(U_FAILURE(errorCode)) { | |
4999 | log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); | |
5000 | return; | |
5001 | } | |
5002 | else | |
5003 | { | |
5004 | ||
5005 | while(source<limit) { | |
5006 | sourceStart=source; | |
5007 | uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode); | |
5008 | if(U_FAILURE(errorCode)) { | |
5009 | log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode)); | |
5010 | break; | |
5011 | } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) { | |
5012 | log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n", | |
5013 | uniChar, (source-sourceStart), *results, *off); | |
5014 | break; | |
5015 | } | |
5016 | results++; | |
5017 | off++; | |
5018 | } | |
5019 | } | |
5020 | ucnv_close(cnv); | |
5021 | } | |
5022 | { /* test locale & optimization group operations: Unicode to LMBCS */ | |
5023 | ||
5024 | UErrorCode errorCode=U_ZERO_ERROR; | |
5025 | UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode); | |
5026 | UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode); | |
5027 | UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode); | |
5028 | UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */ | |
5029 | const UChar * pUniOut = uniString; | |
5030 | UChar * pUniIn = uniString; | |
5031 | uint8_t lmbcsString [4]; | |
5032 | const char * pLMBCSOut = (const char *)lmbcsString; | |
5033 | char * pLMBCSIn = (char *)lmbcsString; | |
5034 | ||
5035 | /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */ | |
5036 | ucnv_fromUnicode (cnv16he, | |
5037 | &pLMBCSIn, (pLMBCSIn + UPRV_LENGTHOF(lmbcsString)), | |
5038 | &pUniOut, pUniOut + UPRV_LENGTHOF(uniString), | |
5039 | NULL, 1, &errorCode); | |
5040 | ||
5041 | if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83) | |
5042 | { | |
5043 | log_err("LMBCS-16,locale=he gives unexpected translation\n"); | |
5044 | } | |
5045 | ||
5046 | pLMBCSIn= (char *)lmbcsString; | |
5047 | pUniOut = uniString; | |
5048 | ucnv_fromUnicode (cnv01us, | |
5049 | &pLMBCSIn, (const char *)(lmbcsString + UPRV_LENGTHOF(lmbcsString)), | |
5050 | &pUniOut, pUniOut + UPRV_LENGTHOF(uniString), | |
5051 | NULL, 1, &errorCode); | |
5052 | ||
5053 | if (lmbcsString[0] != 0x9F) | |
5054 | { | |
5055 | log_err("LMBCS-1,locale=US gives unexpected translation\n"); | |
5056 | } | |
5057 | ||
5058 | /* single byte char from mbcs char set */ | |
5059 | lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */ | |
5060 | pLMBCSOut = (const char *)lmbcsString; | |
5061 | pUniIn = uniString; | |
5062 | ucnv_toUnicode (cnv16jp, | |
5063 | &pUniIn, pUniIn + 1, | |
5064 | &pLMBCSOut, (pLMBCSOut + 1), | |
5065 | NULL, 1, &errorCode); | |
5066 | if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E) | |
5067 | { | |
5068 | log_err("Unexpected results from LMBCS-16 single byte char\n"); | |
5069 | } | |
5070 | /* convert to group 1: should be 3 bytes */ | |
5071 | pLMBCSIn = (char *)lmbcsString; | |
5072 | pUniOut = uniString; | |
5073 | ucnv_fromUnicode (cnv01us, | |
5074 | &pLMBCSIn, (const char *)(pLMBCSIn + 3), | |
5075 | &pUniOut, pUniOut + 1, | |
5076 | NULL, 1, &errorCode); | |
5077 | if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1 | |
5078 | || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE) | |
5079 | { | |
5080 | log_err("Unexpected results to LMBCS-1 single byte mbcs char\n"); | |
5081 | } | |
5082 | pLMBCSOut = (const char *)lmbcsString; | |
5083 | pUniIn = uniString; | |
5084 | ucnv_toUnicode (cnv01us, | |
5085 | &pUniIn, pUniIn + 1, | |
5086 | &pLMBCSOut, (const char *)(pLMBCSOut + 3), | |
5087 | NULL, 1, &errorCode); | |
5088 | if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E) | |
5089 | { | |
5090 | log_err("Unexpected results from LMBCS-1 single byte mbcs char\n"); | |
5091 | } | |
5092 | pLMBCSIn = (char *)lmbcsString; | |
5093 | pUniOut = uniString; | |
5094 | ucnv_fromUnicode (cnv16jp, | |
5095 | &pLMBCSIn, (const char *)(pLMBCSIn + 1), | |
5096 | &pUniOut, pUniOut + 1, | |
5097 | NULL, 1, &errorCode); | |
5098 | if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE) | |
5099 | { | |
5100 | log_err("Unexpected results to LMBCS-16 single byte mbcs char\n"); | |
5101 | } | |
5102 | ucnv_close(cnv16he); | |
5103 | ucnv_close(cnv16jp); | |
5104 | ucnv_close(cnv01us); | |
5105 | } | |
5106 | { | |
5107 | /* Small source buffer testing, LMBCS -> Unicode */ | |
5108 | ||
5109 | UErrorCode errorCode=U_ZERO_ERROR; | |
5110 | ||
5111 | const char * pSource = (const char *)pszLMBCS; | |
5112 | const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); | |
5113 | int codepointCount = 0; | |
5114 | ||
5115 | UChar Out [sizeof(pszUnicode) + 1]; | |
5116 | UChar * pOut = Out; | |
5117 | UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode); | |
5118 | ||
5119 | ||
5120 | cnv = ucnv_open(NAME_LMBCS_1, &errorCode); | |
5121 | if(U_FAILURE(errorCode)) { | |
5122 | log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); | |
5123 | return; | |
5124 | } | |
5125 | ||
5126 | ||
5127 | while ((pSource < sourceLimit) && U_SUCCESS (errorCode)) | |
5128 | { | |
5129 | ucnv_toUnicode (cnv, | |
5130 | &pOut, | |
5131 | OutLimit, | |
5132 | &pSource, | |
5133 | (pSource+1), /* claim that this is a 1- byte buffer */ | |
5134 | NULL, | |
5135 | FALSE, /* FALSE means there might be more chars in the next buffer */ | |
5136 | &errorCode); | |
5137 | ||
5138 | if (U_SUCCESS (errorCode)) | |
5139 | { | |
5140 | if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1]) | |
5141 | { | |
5142 | /* we are on to the next code point: check value */ | |
5143 | ||
5144 | if (Out[0] != pszUnicode[codepointCount]){ | |
5145 | log_err("LMBCS->Uni result %lx should have been %lx \n", | |
5146 | Out[0], pszUnicode[codepointCount]); | |
5147 | } | |
5148 | ||
5149 | pOut = Out; /* reset for accumulating next code point */ | |
5150 | codepointCount++; | |
5151 | } | |
5152 | } | |
5153 | else | |
5154 | { | |
5155 | log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode)); | |
5156 | } | |
5157 | } | |
5158 | { | |
5159 | /* limits & surrogate error testing */ | |
5160 | char LIn [sizeof(pszLMBCS)]; | |
5161 | const char * pLIn = LIn; | |
5162 | ||
5163 | char LOut [sizeof(pszLMBCS)]; | |
5164 | char * pLOut = LOut; | |
5165 | ||
5166 | UChar UOut [sizeof(pszUnicode)]; | |
5167 | UChar * pUOut = UOut; | |
5168 | ||
5169 | UChar UIn [sizeof(pszUnicode)]; | |
5170 | const UChar * pUIn = UIn; | |
5171 | ||
5172 | int32_t off [sizeof(offsets)]; | |
5173 | UChar32 uniChar; | |
5174 | ||
5175 | errorCode=U_ZERO_ERROR; | |
5176 | ||
5177 | /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */ | |
5178 | pUIn++; | |
5179 | ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode); | |
5180 | if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) | |
5181 | { | |
5182 | log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode)); | |
5183 | } | |
5184 | pUIn--; | |
5185 | ||
5186 | errorCode=U_ZERO_ERROR; | |
5187 | ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode); | |
5188 | if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) | |
5189 | { | |
5190 | log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode)); | |
5191 | } | |
5192 | errorCode=U_ZERO_ERROR; | |
5193 | ||
5194 | uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode); | |
5195 | if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) | |
5196 | { | |
5197 | log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode)); | |
5198 | } | |
5199 | errorCode=U_ZERO_ERROR; | |
5200 | ||
5201 | /* 0 byte source request - no error, no pointer movement */ | |
5202 | ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode); | |
5203 | ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode); | |
5204 | if(U_FAILURE(errorCode)) { | |
5205 | log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode)); | |
5206 | } | |
5207 | if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn)) | |
5208 | { | |
5209 | log_err("Unexpected pointer move in 0 byte source request \n"); | |
5210 | } | |
5211 | /*0 byte source request - GetNextUChar : error & value == fffe or ffff */ | |
5212 | uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode); | |
5213 | if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR) | |
5214 | { | |
5215 | log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode)); | |
5216 | } | |
5217 | if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */ | |
5218 | { | |
5219 | log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n"); | |
5220 | } | |
5221 | errorCode = U_ZERO_ERROR; | |
5222 | ||
5223 | /* running out of target room : U_BUFFER_OVERFLOW_ERROR */ | |
5224 | ||
5225 | pUIn = pszUnicode; | |
5226 | ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+UPRV_LENGTHOF(pszUnicode),off,FALSE, &errorCode); | |
5227 | if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 ) | |
5228 | { | |
5229 | log_err("Unexpected results on out of target room to ucnv_fromUnicode\n"); | |
5230 | } | |
5231 | ||
5232 | errorCode = U_ZERO_ERROR; | |
5233 | ||
5234 | pLIn = (const char *)pszLMBCS; | |
5235 | ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode); | |
5236 | if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4]) | |
5237 | { | |
5238 | log_err("Unexpected results on out of target room to ucnv_toUnicode\n"); | |
5239 | } | |
5240 | ||
5241 | /* unpaired or chopped LMBCS surrogates */ | |
5242 | ||
5243 | /* OK high surrogate, Low surrogate is chopped */ | |
5244 | LIn [0] = (char)0x14; | |
5245 | LIn [1] = (char)0xD8; | |
5246 | LIn [2] = (char)0x01; | |
5247 | LIn [3] = (char)0x14; | |
5248 | LIn [4] = (char)0xDC; | |
5249 | pLIn = LIn; | |
5250 | errorCode = U_ZERO_ERROR; | |
5251 | pUOut = UOut; | |
5252 | ||
5253 | ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); | |
5254 | ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); | |
5255 | if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) | |
5256 | { | |
5257 | log_err("Unexpected results on chopped low surrogate\n"); | |
5258 | } | |
5259 | ||
5260 | /* chopped at surrogate boundary */ | |
5261 | LIn [0] = (char)0x14; | |
5262 | LIn [1] = (char)0xD8; | |
5263 | LIn [2] = (char)0x01; | |
5264 | pLIn = LIn; | |
5265 | errorCode = U_ZERO_ERROR; | |
5266 | pUOut = UOut; | |
5267 | ||
5268 | ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode); | |
5269 | if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3) | |
5270 | { | |
5271 | log_err("Unexpected results on chopped at surrogate boundary \n"); | |
5272 | } | |
5273 | ||
5274 | /* unpaired surrogate plus valid Unichar */ | |
5275 | LIn [0] = (char)0x14; | |
5276 | LIn [1] = (char)0xD8; | |
5277 | LIn [2] = (char)0x01; | |
5278 | LIn [3] = (char)0x14; | |
5279 | LIn [4] = (char)0xC9; | |
5280 | LIn [5] = (char)0xD0; | |
5281 | pLIn = LIn; | |
5282 | errorCode = U_ZERO_ERROR; | |
5283 | pUOut = UOut; | |
5284 | ||
5285 | ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode); | |
5286 | if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6) | |
5287 | { | |
5288 | log_err("Unexpected results after unpaired surrogate plus valid Unichar \n"); | |
5289 | } | |
5290 | ||
5291 | /* unpaired surrogate plus chopped Unichar */ | |
5292 | LIn [0] = (char)0x14; | |
5293 | LIn [1] = (char)0xD8; | |
5294 | LIn [2] = (char)0x01; | |
5295 | LIn [3] = (char)0x14; | |
5296 | LIn [4] = (char)0xC9; | |
5297 | ||
5298 | pLIn = LIn; | |
5299 | errorCode = U_ZERO_ERROR; | |
5300 | pUOut = UOut; | |
5301 | ||
5302 | ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); | |
5303 | if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) | |
5304 | { | |
5305 | log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n"); | |
5306 | } | |
5307 | ||
5308 | /* unpaired surrogate plus valid non-Unichar */ | |
5309 | LIn [0] = (char)0x14; | |
5310 | LIn [1] = (char)0xD8; | |
5311 | LIn [2] = (char)0x01; | |
5312 | LIn [3] = (char)0x0F; | |
5313 | LIn [4] = (char)0x3B; | |
5314 | ||
5315 | pLIn = LIn; | |
5316 | errorCode = U_ZERO_ERROR; | |
5317 | pUOut = UOut; | |
5318 | ||
5319 | ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); | |
5320 | if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5) | |
5321 | { | |
5322 | log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n"); | |
5323 | } | |
5324 | ||
5325 | /* unpaired surrogate plus chopped non-Unichar */ | |
5326 | LIn [0] = (char)0x14; | |
5327 | LIn [1] = (char)0xD8; | |
5328 | LIn [2] = (char)0x01; | |
5329 | LIn [3] = (char)0x0F; | |
5330 | ||
5331 | pLIn = LIn; | |
5332 | errorCode = U_ZERO_ERROR; | |
5333 | pUOut = UOut; | |
5334 | ||
5335 | ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode); | |
5336 | ||
5337 | if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4) | |
5338 | { | |
5339 | log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n"); | |
5340 | } | |
5341 | } | |
5342 | } | |
5343 | ucnv_close(cnv); /* final cleanup */ | |
5344 | } | |
5345 | ||
5346 | ||
5347 | static void TestJitterbug255() | |
5348 | { | |
5349 | static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 }; | |
5350 | const char *testBuffer = (const char *)testBytes; | |
5351 | const char *testEnd = (const char *)testBytes + sizeof(testBytes); | |
5352 | UErrorCode status = U_ZERO_ERROR; | |
5353 | /*UChar32 result;*/ | |
5354 | UConverter *cnv = 0; | |
5355 | ||
5356 | cnv = ucnv_open("shift-jis", &status); | |
5357 | if (U_FAILURE(status) || cnv == 0) { | |
5358 | log_data_err("Failed to open the converter for SJIS.\n"); | |
5359 | return; | |
5360 | } | |
5361 | while (testBuffer != testEnd) | |
5362 | { | |
5363 | /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status); | |
5364 | if (U_FAILURE(status)) | |
5365 | { | |
5366 | log_err("Failed to convert the next UChar for SJIS.\n"); | |
5367 | break; | |
5368 | } | |
5369 | } | |
5370 | ucnv_close(cnv); | |
5371 | } | |
5372 | ||
5373 | static void TestEBCDICUS4XML() | |
5374 | { | |
5375 | UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000}; | |
5376 | static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000}; | |
5377 | static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00}; | |
5378 | static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00}; | |
5379 | char target_x[] = {0x00, 0x00, 0x00, 0x00}; | |
5380 | UChar *unicodes = unicodes_x; | |
5381 | const UChar *toUnicodeMaps = toUnicodeMaps_x; | |
5382 | char *target = target_x; | |
5383 | const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x; | |
5384 | UErrorCode status = U_ZERO_ERROR; | |
5385 | UConverter *cnv = 0; | |
5386 | ||
5387 | cnv = ucnv_open("ebcdic-xml-us", &status); | |
5388 | if (U_FAILURE(status) || cnv == 0) { | |
5389 | log_data_err("Failed to open the converter for EBCDIC-XML-US.\n"); | |
5390 | return; | |
5391 | } | |
5392 | ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status); | |
5393 | if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) { | |
5394 | log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n", | |
5395 | u_errorName(status)); | |
5396 | printUSeqErr(unicodes_x, 3); | |
5397 | printUSeqErr(toUnicodeMaps, 3); | |
5398 | } | |
5399 | status = U_ZERO_ERROR; | |
5400 | ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status); | |
5401 | if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) { | |
5402 | log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n", | |
5403 | u_errorName(status)); | |
5404 | printSeqErr((const unsigned char*)target_x, 3); | |
5405 | printSeqErr((const unsigned char*)fromUnicodeMaps, 3); | |
5406 | } | |
5407 | ucnv_close(cnv); | |
5408 | } | |
5409 | #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */ | |
5410 | ||
5411 | #if !UCONFIG_NO_COLLATION | |
5412 | ||
5413 | static void TestJitterbug981(){ | |
5414 | const UChar* rules; | |
5415 | int32_t rules_length, target_cap, bytes_needed, buff_size; | |
5416 | UErrorCode status = U_ZERO_ERROR; | |
5417 | UConverter *utf8cnv; | |
5418 | UCollator* myCollator; | |
5419 | char *buff; | |
5420 | int numNeeded=0; | |
5421 | utf8cnv = ucnv_open ("utf8", &status); | |
5422 | if(U_FAILURE(status)){ | |
5423 | log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status)); | |
5424 | return; | |
5425 | } | |
5426 | myCollator = ucol_open("zh", &status); | |
5427 | if(U_FAILURE(status)){ | |
5428 | log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status)); | |
5429 | ucnv_close(utf8cnv); | |
5430 | return; | |
5431 | } | |
5432 | ||
5433 | rules = ucol_getRules(myCollator, &rules_length); | |
5434 | if(rules_length == 0) { | |
5435 | log_data_err("missing zh tailoring rule string\n"); | |
5436 | ucol_close(myCollator); | |
5437 | ucnv_close(utf8cnv); | |
5438 | return; | |
5439 | } | |
5440 | buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv); | |
5441 | buff = malloc(buff_size); | |
5442 | ||
5443 | target_cap = 0; | |
5444 | do { | |
5445 | ucnv_reset(utf8cnv); | |
5446 | status = U_ZERO_ERROR; | |
5447 | if(target_cap >= buff_size) { | |
5448 | log_err("wanted %d bytes, only %d available\n", target_cap, buff_size); | |
5449 | break; | |
5450 | } | |
5451 | bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap, | |
5452 | rules, rules_length, &status); | |
5453 | target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; | |
5454 | if(numNeeded!=0 && numNeeded!= bytes_needed){ | |
5455 | log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); | |
5456 | break; | |
5457 | } | |
5458 | numNeeded = bytes_needed; | |
5459 | } while (status == U_BUFFER_OVERFLOW_ERROR); | |
5460 | ucol_close(myCollator); | |
5461 | ucnv_close(utf8cnv); | |
5462 | free(buff); | |
5463 | } | |
5464 | ||
5465 | #endif | |
5466 | ||
5467 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
5468 | static void TestJitterbug1293(){ | |
5469 | static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000}; | |
5470 | char target[256]; | |
5471 | UErrorCode status = U_ZERO_ERROR; | |
5472 | UConverter* conv=NULL; | |
5473 | int32_t target_cap, bytes_needed, numNeeded = 0; | |
5474 | conv = ucnv_open("shift-jis",&status); | |
5475 | if(U_FAILURE(status)){ | |
5476 | log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status)); | |
5477 | return; | |
5478 | } | |
5479 | ||
5480 | do{ | |
5481 | target_cap =0; | |
5482 | bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status); | |
5483 | target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; | |
5484 | if(numNeeded!=0 && numNeeded!= bytes_needed){ | |
5485 | log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); | |
5486 | } | |
5487 | numNeeded = bytes_needed; | |
5488 | } while (status == U_BUFFER_OVERFLOW_ERROR); | |
5489 | if(U_FAILURE(status)){ | |
5490 | log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status)); | |
5491 | return; | |
5492 | } | |
5493 | ucnv_close(conv); | |
5494 | } | |
5495 | #endif | |
5496 | ||
5497 | static void TestJB5275_1(){ | |
5498 | ||
5499 | static const char* data = "\x3B\xB3\x0A" /* Easy characters */ | |
5500 | "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ | |
5501 | /* Switch script: */ | |
5502 | "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */ | |
5503 | "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/ | |
5504 | "\xEF\x40\x3B\xB3\x0A"; | |
5505 | static const UChar expected[] ={ | |
5506 | 0x003b, 0x0a15, 0x000a, /* Easy characters */ | |
5507 | 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */ | |
5508 | 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/ | |
5509 | 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/ | |
5510 | 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/ | |
5511 | }; | |
5512 | ||
5513 | UErrorCode status = U_ZERO_ERROR; | |
5514 | UConverter* conv = ucnv_open("iscii-gur", &status); | |
5515 | UChar dest[100] = {'\0'}; | |
5516 | UChar* target = dest; | |
5517 | UChar* targetLimit = dest+100; | |
5518 | const char* source = data; | |
5519 | const char* sourceLimit = data+strlen(data); | |
5520 | const UChar* exp = expected; | |
5521 | ||
5522 | if (U_FAILURE(status)) { | |
5523 | log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status)); | |
5524 | return; | |
5525 | } | |
5526 | ||
5527 | log_verbose("Testing switching back to default script when new line is encountered.\n"); | |
5528 | ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status); | |
5529 | if(U_FAILURE(status)){ | |
5530 | log_err("conversion failed: %s \n", u_errorName(status)); | |
5531 | } | |
5532 | targetLimit = target; | |
5533 | target = dest; | |
5534 | printUSeq(target, (int)(targetLimit-target)); | |
5535 | while(target<targetLimit){ | |
5536 | if(*exp!=*target){ | |
5537 | log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target); | |
5538 | } | |
5539 | target++; | |
5540 | exp++; | |
5541 | } | |
5542 | ucnv_close(conv); | |
5543 | } | |
5544 | ||
5545 | static void TestJB5275(){ | |
5546 | static const char* data = | |
5547 | /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41 */ | |
5548 | /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41 */ | |
5549 | /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsupported sequence \xEF\x41 */ | |
5550 | "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ | |
5551 | "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */ | |
5552 | "\xEF\x48\x38\xB3\x0A" /* Kannada test */ | |
5553 | "\xEF\x49\x39\xB3\x0A" /* Malayalam test */ | |
5554 | "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */ | |
5555 | "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */ | |
5556 | /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */; | |
5557 | static const UChar expected[] ={ | |
5558 | 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */ | |
5559 | 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati test */ | |
5560 | 0x0038, 0x0C95, 0x000A, /* Kannada test */ | |
5561 | 0x0039, 0x0D15, 0x000A, /* Malayalam test */ | |
5562 | 0x003A, 0x0A95, 0x000A, /* Gujarati test */ | |
5563 | 0x003B, 0x0A15, 0x000A, /* Punjabi test */ | |
5564 | }; | |
5565 | ||
5566 | UErrorCode status = U_ZERO_ERROR; | |
5567 | UConverter* conv = ucnv_open("iscii", &status); | |
5568 | UChar dest[100] = {'\0'}; | |
5569 | UChar* target = dest; | |
5570 | UChar* targetLimit = dest+100; | |
5571 | const char* source = data; | |
5572 | const char* sourceLimit = data+strlen(data); | |
5573 | const UChar* exp = expected; | |
5574 | ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status); | |
5575 | if(U_FAILURE(status)){ | |
5576 | log_data_err("conversion failed: %s \n", u_errorName(status)); | |
5577 | } | |
5578 | targetLimit = target; | |
5579 | target = dest; | |
5580 | ||
5581 | printUSeq(target, (int)(targetLimit-target)); | |
5582 | ||
5583 | while(target<targetLimit){ | |
5584 | if(*exp!=*target){ | |
5585 | log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target); | |
5586 | } | |
5587 | target++; | |
5588 | exp++; | |
5589 | } | |
5590 | ucnv_close(conv); | |
5591 | } | |
5592 | ||
5593 | static void | |
5594 | TestIsFixedWidth() { | |
5595 | UErrorCode status = U_ZERO_ERROR; | |
5596 | UConverter *cnv = NULL; | |
5597 | int32_t i; | |
5598 | ||
5599 | const char *fixedWidth[] = { | |
5600 | "US-ASCII", | |
5601 | "UTF32", | |
5602 | "ibm-5478_P100-1995" | |
5603 | }; | |
5604 | ||
5605 | const char *notFixedWidth[] = { | |
5606 | "GB18030", | |
5607 | "UTF8", | |
5608 | "windows-949-2000", | |
5609 | "UTF16" | |
5610 | }; | |
5611 | ||
5612 | for (i = 0; i < UPRV_LENGTHOF(fixedWidth); i++) { | |
5613 | cnv = ucnv_open(fixedWidth[i], &status); | |
5614 | if (cnv == NULL || U_FAILURE(status)) { | |
5615 | log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_errorName(status)); | |
5616 | continue; | |
5617 | } | |
5618 | ||
5619 | if (!ucnv_isFixedWidth(cnv, &status)) { | |
5620 | log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedWidth[i]); | |
5621 | } | |
5622 | ucnv_close(cnv); | |
5623 | } | |
5624 | ||
5625 | for (i = 0; i < UPRV_LENGTHOF(notFixedWidth); i++) { | |
5626 | cnv = ucnv_open(notFixedWidth[i], &status); | |
5627 | if (cnv == NULL || U_FAILURE(status)) { | |
5628 | log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u_errorName(status)); | |
5629 | continue; | |
5630 | } | |
5631 | ||
5632 | if (ucnv_isFixedWidth(cnv, &status)) { | |
5633 | log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", notFixedWidth[i]); | |
5634 | } | |
5635 | ucnv_close(cnv); | |
5636 | } | |
5637 | } |