]>
Commit | Line | Data |
---|---|---|
374ca955 A |
1 | /* |
2 | ******************************************************************************* | |
3 | * | |
4 | * Copyright (C) 2003-2004, International Business Machines | |
5 | * Corporation and others. All Rights Reserved. | |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: convtest.cpp | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2003jul15 | |
14 | * created by: Markus W. Scherer | |
15 | * | |
16 | * Test file for data-driven conversion tests. | |
17 | */ | |
18 | ||
19 | #include "unicode/utypes.h" | |
20 | ||
21 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
22 | /* | |
23 | * Note: Turning off all of convtest.cpp if !UCONFIG_NO_LEGACY_CONVERSION | |
24 | * is slightly unnecessary - it removes tests for Unicode charsets | |
25 | * like UTF-8 that should work. | |
26 | * However, there is no easy way for the test to detect whether a test case | |
27 | * is for a Unicode charset, so it would be difficult to only exclude those. | |
28 | * Also, regular testing of ICU is done with all modules on, therefore | |
29 | * not testing conversion for a custom configuration like this should be ok. | |
30 | */ | |
31 | ||
32 | #include "unicode/ucnv.h" | |
33 | #include "unicode/unistr.h" | |
34 | #include "unicode/parsepos.h" | |
35 | #include "unicode/uniset.h" | |
36 | #include "unicode/ustring.h" | |
37 | #include "unicode/ures.h" | |
38 | #include "convtest.h" | |
39 | #include "unicode/tstdtmod.h" | |
40 | #include <string.h> | |
41 | #include <stdlib.h> | |
42 | ||
43 | #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) | |
44 | ||
45 | enum { | |
46 | // characters used in test data for callbacks | |
47 | SUB_CB='?', | |
48 | SKIP_CB='0', | |
49 | STOP_CB='.', | |
50 | ESC_CB='&' | |
51 | }; | |
52 | ||
53 | ConversionTest::~ConversionTest() {} | |
54 | ||
55 | void | |
56 | ConversionTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { | |
57 | if (exec) logln("TestSuite ConversionTest: "); | |
58 | switch (index) { | |
59 | case 0: name="TestToUnicode"; if (exec) TestToUnicode(); break; | |
60 | case 1: name="TestFromUnicode"; if (exec) TestFromUnicode(); break; | |
61 | case 2: name="TestGetUnicodeSet"; if (exec) TestGetUnicodeSet(); break; | |
62 | default: name=""; break; //needed to end loop | |
63 | } | |
64 | } | |
65 | ||
66 | // test data interface ----------------------------------------------------- *** | |
67 | ||
68 | void | |
69 | ConversionTest::TestToUnicode() { | |
70 | ConversionCase cc; | |
71 | char charset[100], cbopt[4]; | |
72 | const char *option; | |
73 | UnicodeString s, unicode; | |
74 | int32_t offsetsLength; | |
75 | UConverterToUCallback callback; | |
76 | ||
77 | TestDataModule *dataModule; | |
78 | TestData *testData; | |
79 | const DataMap *testCase; | |
80 | UErrorCode errorCode; | |
81 | int32_t i; | |
82 | ||
83 | errorCode=U_ZERO_ERROR; | |
84 | dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode); | |
85 | if(U_SUCCESS(errorCode)) { | |
86 | testData=dataModule->createTestData("toUnicode", errorCode); | |
87 | if(U_SUCCESS(errorCode)) { | |
88 | for(i=0; testData->nextCase(testCase, errorCode); ++i) { | |
89 | if(U_FAILURE(errorCode)) { | |
90 | errln("error retrieving conversion/toUnicode test case %d - %s", | |
91 | i, u_errorName(errorCode)); | |
92 | errorCode=U_ZERO_ERROR; | |
93 | continue; | |
94 | } | |
95 | ||
96 | cc.caseNr=i; | |
97 | ||
98 | s=testCase->getString("charset", errorCode); | |
99 | s.extract(0, 0x7fffffff, charset, sizeof(charset), ""); | |
100 | cc.charset=charset; | |
101 | ||
102 | cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode); | |
103 | unicode=testCase->getString("unicode", errorCode); | |
104 | cc.unicode=unicode.getBuffer(); | |
105 | cc.unicodeLength=unicode.length(); | |
106 | ||
107 | offsetsLength=0; | |
108 | cc.offsets=testCase->getIntVector(offsetsLength, "offsets", errorCode); | |
109 | if(offsetsLength==0) { | |
110 | cc.offsets=NULL; | |
111 | } else if(offsetsLength!=unicode.length()) { | |
112 | errln("toUnicode[%d] unicode[%d] and offsets[%d] must have the same length", | |
113 | i, unicode.length(), offsetsLength); | |
114 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
115 | } | |
116 | ||
117 | cc.finalFlush= 0!=testCase->getInt28("flush", errorCode); | |
118 | cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode); | |
119 | ||
120 | s=testCase->getString("errorCode", errorCode); | |
121 | if(s==UNICODE_STRING("invalid", 7)) { | |
122 | cc.outErrorCode=U_INVALID_CHAR_FOUND; | |
123 | } else if(s==UNICODE_STRING("illegal", 7)) { | |
124 | cc.outErrorCode=U_ILLEGAL_CHAR_FOUND; | |
125 | } else if(s==UNICODE_STRING("truncated", 9)) { | |
126 | cc.outErrorCode=U_TRUNCATED_CHAR_FOUND; | |
127 | } else if(s==UNICODE_STRING("illesc", 6)) { | |
128 | cc.outErrorCode=U_ILLEGAL_ESCAPE_SEQUENCE; | |
129 | } else if(s==UNICODE_STRING("unsuppesc", 9)) { | |
130 | cc.outErrorCode=U_UNSUPPORTED_ESCAPE_SEQUENCE; | |
131 | } else { | |
132 | cc.outErrorCode=U_ZERO_ERROR; | |
133 | } | |
134 | ||
135 | s=testCase->getString("callback", errorCode); | |
136 | s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), ""); | |
137 | cc.cbopt=cbopt; | |
138 | switch(cbopt[0]) { | |
139 | case SUB_CB: | |
140 | callback=UCNV_TO_U_CALLBACK_SUBSTITUTE; | |
141 | break; | |
142 | case SKIP_CB: | |
143 | callback=UCNV_TO_U_CALLBACK_SKIP; | |
144 | break; | |
145 | case STOP_CB: | |
146 | callback=UCNV_TO_U_CALLBACK_STOP; | |
147 | break; | |
148 | case ESC_CB: | |
149 | callback=UCNV_TO_U_CALLBACK_ESCAPE; | |
150 | break; | |
151 | default: | |
152 | callback=NULL; | |
153 | break; | |
154 | } | |
155 | option=callback==NULL ? cbopt : cbopt+1; | |
156 | if(*option==0) { | |
157 | option=NULL; | |
158 | } | |
159 | ||
160 | cc.invalidChars=testCase->getBinary(cc.invalidLength, "invalidChars", errorCode); | |
161 | ||
162 | if(U_FAILURE(errorCode)) { | |
163 | errln("error parsing conversion/toUnicode test case %d - %s", | |
164 | i, u_errorName(errorCode)); | |
165 | errorCode=U_ZERO_ERROR; | |
166 | } else { | |
167 | logln("TestToUnicode[%d] %s", i, charset); | |
168 | ToUnicodeCase(cc, callback, option); | |
169 | } | |
170 | } | |
171 | delete testData; | |
172 | } | |
173 | delete dataModule; | |
174 | } | |
175 | else { | |
176 | errln("Failed: could not load test conversion data"); | |
177 | } | |
178 | } | |
179 | ||
180 | void | |
181 | ConversionTest::TestFromUnicode() { | |
182 | ConversionCase cc; | |
183 | char charset[100], cbopt[4]; | |
184 | const char *option; | |
185 | UnicodeString s, unicode, invalidUChars; | |
186 | int32_t offsetsLength; | |
187 | UConverterFromUCallback callback; | |
188 | ||
189 | TestDataModule *dataModule; | |
190 | TestData *testData; | |
191 | const DataMap *testCase; | |
192 | const UChar *p; | |
193 | UErrorCode errorCode; | |
194 | int32_t i, length; | |
195 | ||
196 | errorCode=U_ZERO_ERROR; | |
197 | dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode); | |
198 | if(U_SUCCESS(errorCode)) { | |
199 | testData=dataModule->createTestData("fromUnicode", errorCode); | |
200 | if(U_SUCCESS(errorCode)) { | |
201 | for(i=0; testData->nextCase(testCase, errorCode); ++i) { | |
202 | if(U_FAILURE(errorCode)) { | |
203 | errln("error retrieving conversion/fromUnicode test case %d - %s", | |
204 | i, u_errorName(errorCode)); | |
205 | errorCode=U_ZERO_ERROR; | |
206 | continue; | |
207 | } | |
208 | ||
209 | cc.caseNr=i; | |
210 | ||
211 | s=testCase->getString("charset", errorCode); | |
212 | s.extract(0, 0x7fffffff, charset, sizeof(charset), ""); | |
213 | cc.charset=charset; | |
214 | ||
215 | unicode=testCase->getString("unicode", errorCode); | |
216 | cc.unicode=unicode.getBuffer(); | |
217 | cc.unicodeLength=unicode.length(); | |
218 | cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode); | |
219 | ||
220 | offsetsLength=0; | |
221 | cc.offsets=testCase->getIntVector(offsetsLength, "offsets", errorCode); | |
222 | if(offsetsLength==0) { | |
223 | cc.offsets=NULL; | |
224 | } else if(offsetsLength!=cc.bytesLength) { | |
225 | errln("fromUnicode[%d] bytes[%d] and offsets[%d] must have the same length", | |
226 | i, cc.bytesLength, offsetsLength); | |
227 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
228 | } | |
229 | ||
230 | cc.finalFlush= 0!=testCase->getInt28("flush", errorCode); | |
231 | cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode); | |
232 | ||
233 | s=testCase->getString("errorCode", errorCode); | |
234 | if(s==UNICODE_STRING("invalid", 7)) { | |
235 | cc.outErrorCode=U_INVALID_CHAR_FOUND; | |
236 | } else if(s==UNICODE_STRING("illegal", 7)) { | |
237 | cc.outErrorCode=U_ILLEGAL_CHAR_FOUND; | |
238 | } else if(s==UNICODE_STRING("truncated", 9)) { | |
239 | cc.outErrorCode=U_TRUNCATED_CHAR_FOUND; | |
240 | } else { | |
241 | cc.outErrorCode=U_ZERO_ERROR; | |
242 | } | |
243 | ||
244 | s=testCase->getString("callback", errorCode); | |
245 | ||
246 | // read NUL-separated subchar first, if any | |
247 | length=u_strlen(p=s.getTerminatedBuffer()); | |
248 | if(++length<s.length()) { | |
249 | // copy the subchar from Latin-1 characters | |
250 | // start after the NUL | |
251 | p+=length; | |
252 | length=s.length()-length; | |
253 | if(length>=(int32_t)sizeof(cc.subchar)) { | |
254 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
255 | } else { | |
256 | int32_t j; | |
257 | ||
258 | for(j=0; j<length; ++j) { | |
259 | cc.subchar[j]=(char)p[j]; | |
260 | } | |
261 | // NUL-terminate the subchar | |
262 | cc.subchar[j]=0; | |
263 | } | |
264 | ||
265 | // remove the NUL and subchar from s | |
266 | s.truncate(u_strlen(s.getBuffer())); | |
267 | } else { | |
268 | // no subchar | |
269 | cc.subchar[0]=0; | |
270 | } | |
271 | ||
272 | s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), ""); | |
273 | cc.cbopt=cbopt; | |
274 | switch(cbopt[0]) { | |
275 | case SUB_CB: | |
276 | callback=UCNV_FROM_U_CALLBACK_SUBSTITUTE; | |
277 | break; | |
278 | case SKIP_CB: | |
279 | callback=UCNV_FROM_U_CALLBACK_SKIP; | |
280 | break; | |
281 | case STOP_CB: | |
282 | callback=UCNV_FROM_U_CALLBACK_STOP; | |
283 | break; | |
284 | case ESC_CB: | |
285 | callback=UCNV_FROM_U_CALLBACK_ESCAPE; | |
286 | break; | |
287 | default: | |
288 | callback=NULL; | |
289 | break; | |
290 | } | |
291 | option=callback==NULL ? cbopt : cbopt+1; | |
292 | if(*option==0) { | |
293 | option=NULL; | |
294 | } | |
295 | ||
296 | invalidUChars=testCase->getString("invalidUChars", errorCode); | |
297 | cc.invalidUChars=invalidUChars.getBuffer(); | |
298 | cc.invalidLength=invalidUChars.length(); | |
299 | ||
300 | if(U_FAILURE(errorCode)) { | |
301 | errln("error parsing conversion/fromUnicode test case %d - %s", | |
302 | i, u_errorName(errorCode)); | |
303 | errorCode=U_ZERO_ERROR; | |
304 | } else { | |
305 | logln("TestFromUnicode[%d] %s", i, charset); | |
306 | FromUnicodeCase(cc, callback, option); | |
307 | } | |
308 | } | |
309 | delete testData; | |
310 | } | |
311 | delete dataModule; | |
312 | } | |
313 | else { | |
314 | errln("Failed: could not load test conversion data"); | |
315 | } | |
316 | } | |
317 | ||
318 | static const UChar ellipsis[]={ 0x2e, 0x2e, 0x2e }; | |
319 | ||
320 | void | |
321 | ConversionTest::TestGetUnicodeSet() { | |
322 | char charset[100]; | |
323 | UnicodeString s, map, mapnot; | |
324 | int32_t which; | |
325 | ||
326 | ParsePosition pos; | |
327 | UnicodeSet cnvSet, mapSet, mapnotSet, diffSet; | |
328 | UConverter *cnv; | |
329 | ||
330 | TestDataModule *dataModule; | |
331 | TestData *testData; | |
332 | const DataMap *testCase; | |
333 | UErrorCode errorCode; | |
334 | int32_t i; | |
335 | ||
336 | errorCode=U_ZERO_ERROR; | |
337 | dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode); | |
338 | if(U_SUCCESS(errorCode)) { | |
339 | testData=dataModule->createTestData("getUnicodeSet", errorCode); | |
340 | if(U_SUCCESS(errorCode)) { | |
341 | for(i=0; testData->nextCase(testCase, errorCode); ++i) { | |
342 | if(U_FAILURE(errorCode)) { | |
343 | errln("error retrieving conversion/getUnicodeSet test case %d - %s", | |
344 | i, u_errorName(errorCode)); | |
345 | errorCode=U_ZERO_ERROR; | |
346 | continue; | |
347 | } | |
348 | ||
349 | s=testCase->getString("charset", errorCode); | |
350 | s.extract(0, 0x7fffffff, charset, sizeof(charset), ""); | |
351 | ||
352 | map=testCase->getString("map", errorCode); | |
353 | mapnot=testCase->getString("mapnot", errorCode); | |
354 | ||
355 | which=testCase->getInt28("which", errorCode); | |
356 | ||
357 | if(U_FAILURE(errorCode)) { | |
358 | errln("error parsing conversion/getUnicodeSet test case %d - %s", | |
359 | i, u_errorName(errorCode)); | |
360 | errorCode=U_ZERO_ERROR; | |
361 | continue; | |
362 | } | |
363 | ||
364 | // test this test case | |
365 | mapSet.clear(); | |
366 | mapnotSet.clear(); | |
367 | ||
368 | pos.setIndex(0); | |
369 | mapSet.applyPattern(map, pos, 0, NULL, errorCode); | |
370 | if(U_FAILURE(errorCode) || pos.getIndex()!=map.length()) { | |
371 | errln("error creating the map set for conversion/getUnicodeSet test case %d - %s\n" | |
372 | " error index %d index %d U+%04x", | |
373 | i, u_errorName(errorCode), pos.getErrorIndex(), pos.getIndex(), map.char32At(pos.getIndex())); | |
374 | errorCode=U_ZERO_ERROR; | |
375 | continue; | |
376 | } | |
377 | ||
378 | pos.setIndex(0); | |
379 | mapnotSet.applyPattern(mapnot, pos, 0, NULL, errorCode); | |
380 | if(U_FAILURE(errorCode) || pos.getIndex()!=mapnot.length()) { | |
381 | errln("error creating the mapnot set for conversion/getUnicodeSet test case %d - %s\n" | |
382 | " error index %d index %d U+%04x", | |
383 | i, u_errorName(errorCode), pos.getErrorIndex(), pos.getIndex(), mapnot.char32At(pos.getIndex())); | |
384 | errorCode=U_ZERO_ERROR; | |
385 | continue; | |
386 | } | |
387 | ||
388 | logln("TestGetUnicodeSet[%d] %s", i, charset); | |
389 | ||
390 | cnv=cnv_open(charset, errorCode); | |
391 | if(U_FAILURE(errorCode)) { | |
392 | errln("error opening \"%s\" for conversion/getUnicodeSet test case %d - %s", | |
393 | charset, i, u_errorName(errorCode)); | |
394 | errorCode=U_ZERO_ERROR; | |
395 | continue; | |
396 | } | |
397 | ||
398 | ucnv_getUnicodeSet(cnv, (USet *)&cnvSet, (UConverterUnicodeSet)which, &errorCode); | |
399 | ucnv_close(cnv); | |
400 | ||
401 | if(U_FAILURE(errorCode)) { | |
402 | errln("error in ucnv_getUnicodeSet(\"%s\") for conversion/getUnicodeSet test case %d - %s", | |
403 | charset, i, u_errorName(errorCode)); | |
404 | errorCode=U_ZERO_ERROR; | |
405 | continue; | |
406 | } | |
407 | ||
408 | // are there items that must be in cnvSet but are not? | |
409 | (diffSet=mapSet).removeAll(cnvSet); | |
410 | if(!diffSet.isEmpty()) { | |
411 | diffSet.toPattern(s, TRUE); | |
412 | if(s.length()>100) { | |
413 | s.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis)); | |
414 | } | |
415 | errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - conversion/getUnicodeSet test case %d", | |
416 | charset, i); | |
417 | errln(s); | |
418 | } | |
419 | ||
420 | // are there items that must not be in cnvSet but are? | |
421 | (diffSet=mapnotSet).retainAll(cnvSet); | |
422 | if(!diffSet.isEmpty()) { | |
423 | diffSet.toPattern(s, TRUE); | |
424 | if(s.length()>100) { | |
425 | s.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis)); | |
426 | } | |
427 | errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - conversion/getUnicodeSet test case %d", | |
428 | charset, i); | |
429 | errln(s); | |
430 | } | |
431 | } | |
432 | delete testData; | |
433 | } | |
434 | delete dataModule; | |
435 | } | |
436 | else { | |
437 | errln("Failed: could not load test conversion data"); | |
438 | } | |
439 | } | |
440 | ||
441 | // open testdata or ICU data converter ------------------------------------- *** | |
442 | ||
443 | UConverter * | |
444 | ConversionTest::cnv_open(const char *name, UErrorCode &errorCode) { | |
445 | if(name!=NULL && *name=='*') { | |
446 | /* loadTestData(): set the data directory */ | |
447 | return ucnv_openPackage(loadTestData(errorCode), name+1, &errorCode); | |
448 | } else { | |
449 | return ucnv_open(name, &errorCode); | |
450 | } | |
451 | } | |
452 | ||
453 | // output helpers ---------------------------------------------------------- *** | |
454 | ||
455 | static inline char | |
456 | hexDigit(uint8_t digit) { | |
457 | return digit<=9 ? (char)('0'+digit) : (char)('a'-10+digit); | |
458 | } | |
459 | ||
460 | static char * | |
461 | printBytes(const uint8_t *bytes, int32_t length, char *out) { | |
462 | uint8_t b; | |
463 | ||
464 | if(length>0) { | |
465 | b=*bytes++; | |
466 | --length; | |
467 | *out++=hexDigit((uint8_t)(b>>4)); | |
468 | *out++=hexDigit((uint8_t)(b&0xf)); | |
469 | } | |
470 | ||
471 | while(length>0) { | |
472 | b=*bytes++; | |
473 | --length; | |
474 | *out++=' '; | |
475 | *out++=hexDigit((uint8_t)(b>>4)); | |
476 | *out++=hexDigit((uint8_t)(b&0xf)); | |
477 | } | |
478 | *out++=0; | |
479 | return out; | |
480 | } | |
481 | ||
482 | static char * | |
483 | printUnicode(const UChar *unicode, int32_t length, char *out) { | |
484 | UChar32 c; | |
485 | int32_t i; | |
486 | ||
487 | for(i=0; i<length;) { | |
488 | if(i>0) { | |
489 | *out++=' '; | |
490 | } | |
491 | U16_NEXT(unicode, i, length, c); | |
492 | // write 4..6 digits | |
493 | if(c>=0x100000) { | |
494 | *out++='1'; | |
495 | } | |
496 | if(c>=0x10000) { | |
497 | *out++=hexDigit((uint8_t)((c>>16)&0xf)); | |
498 | } | |
499 | *out++=hexDigit((uint8_t)((c>>12)&0xf)); | |
500 | *out++=hexDigit((uint8_t)((c>>8)&0xf)); | |
501 | *out++=hexDigit((uint8_t)((c>>4)&0xf)); | |
502 | *out++=hexDigit((uint8_t)(c&0xf)); | |
503 | } | |
504 | *out++=0; | |
505 | return out; | |
506 | } | |
507 | ||
508 | static char * | |
509 | printOffsets(const int32_t *offsets, int32_t length, char *out) { | |
510 | int32_t i, o, d; | |
511 | ||
512 | if(offsets==NULL) { | |
513 | length=0; | |
514 | } | |
515 | ||
516 | for(i=0; i<length; ++i) { | |
517 | if(i>0) { | |
518 | *out++=' '; | |
519 | } | |
520 | o=offsets[i]; | |
521 | ||
522 | // print all offsets with 2 characters each (-x, -9..99, xx) | |
523 | if(o<-9) { | |
524 | *out++='-'; | |
525 | *out++='x'; | |
526 | } else if(o<0) { | |
527 | *out++='-'; | |
528 | *out++=(char)('0'-o); | |
529 | } else if(o<=99) { | |
530 | *out++=(d=o/10)==0 ? ' ' : (char)('0'+d); | |
531 | *out++=(char)('0'+o%10); | |
532 | } else /* o>99 */ { | |
533 | *out++='x'; | |
534 | *out++='x'; | |
535 | } | |
536 | } | |
537 | *out++=0; | |
538 | return out; | |
539 | } | |
540 | ||
541 | // toUnicode test worker functions ----------------------------------------- *** | |
542 | ||
543 | static int32_t | |
544 | stepToUnicode(ConversionCase &cc, UConverter *cnv, | |
545 | UChar *result, int32_t resultCapacity, | |
546 | int32_t *resultOffsets, /* also resultCapacity */ | |
547 | int32_t step, | |
548 | UErrorCode *pErrorCode) { | |
549 | const char *source, *sourceLimit, *bytesLimit; | |
550 | UChar *target, *targetLimit, *resultLimit; | |
551 | UBool flush; | |
552 | ||
553 | source=(const char *)cc.bytes; | |
554 | target=result; | |
555 | bytesLimit=source+cc.bytesLength; | |
556 | resultLimit=result+resultCapacity; | |
557 | ||
558 | if(step>=0) { | |
559 | // call ucnv_toUnicode() with in/out buffers no larger than (step) at a time | |
560 | // move only one buffer (in vs. out) at a time to be extra mean | |
561 | // step==0 performs bulk conversion and generates offsets | |
562 | ||
563 | // initialize the partial limits for the loop | |
564 | if(step==0) { | |
565 | // use the entire buffers | |
566 | sourceLimit=bytesLimit; | |
567 | targetLimit=resultLimit; | |
568 | flush=cc.finalFlush; | |
569 | } else { | |
570 | // start with empty partial buffers | |
571 | sourceLimit=source; | |
572 | targetLimit=target; | |
573 | flush=FALSE; | |
574 | ||
575 | // output offsets only for bulk conversion | |
576 | resultOffsets=NULL; | |
577 | } | |
578 | ||
579 | for(;;) { | |
580 | // resetting the opposite conversion direction must not affect this one | |
581 | ucnv_resetFromUnicode(cnv); | |
582 | ||
583 | // convert | |
584 | ucnv_toUnicode(cnv, | |
585 | &target, targetLimit, | |
586 | &source, sourceLimit, | |
587 | resultOffsets, | |
588 | flush, pErrorCode); | |
589 | ||
590 | // check pointers and errors | |
591 | if(source>sourceLimit || target>targetLimit) { | |
592 | *pErrorCode=U_INTERNAL_PROGRAM_ERROR; | |
593 | break; | |
594 | } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { | |
595 | if(target!=targetLimit) { | |
596 | // buffer overflow must only be set when the target is filled | |
597 | *pErrorCode=U_INTERNAL_PROGRAM_ERROR; | |
598 | break; | |
599 | } else if(targetLimit==resultLimit) { | |
600 | // not just a partial overflow | |
601 | break; | |
602 | } | |
603 | ||
604 | // the partial target is filled, set a new limit, reset the error and continue | |
605 | targetLimit=(resultLimit-target)>=step ? target+step : resultLimit; | |
606 | *pErrorCode=U_ZERO_ERROR; | |
607 | } else if(U_FAILURE(*pErrorCode)) { | |
608 | // some other error occurred, done | |
609 | break; | |
610 | } else { | |
611 | if(source!=sourceLimit) { | |
612 | // when no error occurs, then the input must be consumed | |
613 | *pErrorCode=U_INTERNAL_PROGRAM_ERROR; | |
614 | break; | |
615 | } | |
616 | ||
617 | if(sourceLimit==bytesLimit) { | |
618 | // we are done | |
619 | break; | |
620 | } | |
621 | ||
622 | // the partial conversion succeeded, set a new limit and continue | |
623 | sourceLimit=(bytesLimit-source)>=step ? source+step : bytesLimit; | |
624 | flush=(UBool)(cc.finalFlush && sourceLimit==bytesLimit); | |
625 | } | |
626 | } | |
627 | } else /* step<0 */ { | |
628 | /* | |
629 | * step==-1: call only ucnv_getNextUChar() | |
630 | * otherwise alternate between ucnv_toUnicode() and ucnv_getNextUChar() | |
631 | * if step==-2 or -3, then give ucnv_toUnicode() the whole remaining input, | |
632 | * else give it at most (-step-2)/2 bytes | |
633 | */ | |
634 | UChar32 c; | |
635 | ||
636 | // end the loop by getting an index out of bounds error | |
637 | for(;;) { | |
638 | // resetting the opposite conversion direction must not affect this one | |
639 | ucnv_resetFromUnicode(cnv); | |
640 | ||
641 | // convert | |
642 | if((step&1)!=0 /* odd: -1, -3, -5, ... */) { | |
643 | sourceLimit=source; // use sourceLimit not as a real limit | |
644 | // but to remember the pre-getNextUChar source pointer | |
645 | c=ucnv_getNextUChar(cnv, &source, bytesLimit, pErrorCode); | |
646 | ||
647 | // check pointers and errors | |
648 | if(*pErrorCode==U_INDEX_OUTOFBOUNDS_ERROR) { | |
649 | if(source!=bytesLimit) { | |
650 | *pErrorCode=U_INTERNAL_PROGRAM_ERROR; | |
651 | } else { | |
652 | *pErrorCode=U_ZERO_ERROR; | |
653 | } | |
654 | break; | |
655 | } else if(U_FAILURE(*pErrorCode)) { | |
656 | break; | |
657 | } | |
658 | // source may not move if c is from previous overflow | |
659 | ||
660 | if(target==resultLimit) { | |
661 | *pErrorCode=U_BUFFER_OVERFLOW_ERROR; | |
662 | break; | |
663 | } | |
664 | if(c<=0xffff) { | |
665 | *target++=(UChar)c; | |
666 | } else { | |
667 | *target++=U16_LEAD(c); | |
668 | if(target==resultLimit) { | |
669 | *pErrorCode=U_BUFFER_OVERFLOW_ERROR; | |
670 | break; | |
671 | } | |
672 | *target++=U16_TRAIL(c); | |
673 | } | |
674 | ||
675 | // alternate between -n-1 and -n but leave -1 alone | |
676 | if(step<-1) { | |
677 | ++step; | |
678 | } | |
679 | } else /* step is even */ { | |
680 | // allow only one UChar output | |
681 | targetLimit=target<resultLimit ? target+1 : resultLimit; | |
682 | ||
683 | // as with ucnv_getNextUChar(), we always flush (if we go to bytesLimit) | |
684 | // and never output offsets | |
685 | if(step==-2) { | |
686 | sourceLimit=bytesLimit; | |
687 | } else { | |
688 | sourceLimit=source+(-step-2)/2; | |
689 | if(sourceLimit>bytesLimit) { | |
690 | sourceLimit=bytesLimit; | |
691 | } | |
692 | } | |
693 | ||
694 | ucnv_toUnicode(cnv, | |
695 | &target, targetLimit, | |
696 | &source, sourceLimit, | |
697 | NULL, (UBool)(sourceLimit==bytesLimit), pErrorCode); | |
698 | ||
699 | // check pointers and errors | |
700 | if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { | |
701 | if(target!=targetLimit) { | |
702 | // buffer overflow must only be set when the target is filled | |
703 | *pErrorCode=U_INTERNAL_PROGRAM_ERROR; | |
704 | break; | |
705 | } else if(targetLimit==resultLimit) { | |
706 | // not just a partial overflow | |
707 | break; | |
708 | } | |
709 | ||
710 | // the partial target is filled, set a new limit and continue | |
711 | *pErrorCode=U_ZERO_ERROR; | |
712 | } else if(U_FAILURE(*pErrorCode)) { | |
713 | // some other error occurred, done | |
714 | break; | |
715 | } else { | |
716 | if(source!=sourceLimit) { | |
717 | // when no error occurs, then the input must be consumed | |
718 | *pErrorCode=U_INTERNAL_PROGRAM_ERROR; | |
719 | break; | |
720 | } | |
721 | ||
722 | // we are done (flush==TRUE) but we continue, to get the index out of bounds error above | |
723 | } | |
724 | ||
725 | --step; | |
726 | } | |
727 | } | |
728 | } | |
729 | ||
730 | return (int32_t)(target-result); | |
731 | } | |
732 | ||
733 | UBool | |
734 | ConversionTest::ToUnicodeCase(ConversionCase &cc, UConverterToUCallback callback, const char *option) { | |
735 | UConverter *cnv; | |
736 | UErrorCode errorCode; | |
737 | ||
738 | // open the converter | |
739 | errorCode=U_ZERO_ERROR; | |
740 | cnv=cnv_open(cc.charset, errorCode); | |
741 | if(U_FAILURE(errorCode)) { | |
742 | errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s", | |
743 | cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode)); | |
744 | return FALSE; | |
745 | } | |
746 | ||
747 | // set the callback | |
748 | if(callback!=NULL) { | |
749 | ucnv_setToUCallBack(cnv, callback, option, NULL, NULL, &errorCode); | |
750 | if(U_FAILURE(errorCode)) { | |
751 | errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setToUCallBack() failed - %s", | |
752 | cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode)); | |
753 | ucnv_close(cnv); | |
754 | return FALSE; | |
755 | } | |
756 | } | |
757 | ||
758 | int32_t resultOffsets[200]; | |
759 | UChar result[200]; | |
760 | int32_t resultLength; | |
761 | UBool ok; | |
762 | ||
763 | static const struct { | |
764 | int32_t step; | |
765 | const char *name; | |
766 | } steps[]={ | |
767 | { 0, "bulk" }, // must be first for offsets to be checked | |
768 | { 1, "step=1" }, | |
769 | { 3, "step=3" }, | |
770 | { 7, "step=7" }, | |
771 | { -1, "getNext" }, | |
772 | { -2, "toU(bulk)+getNext" }, | |
773 | { -3, "getNext+toU(bulk)" }, | |
774 | { -4, "toU(1)+getNext" }, | |
775 | { -5, "getNext+toU(1)" }, | |
776 | { -12, "toU(5)+getNext" }, | |
777 | { -13, "getNext+toU(5)" }, | |
778 | }; | |
779 | int32_t i, step; | |
780 | ||
781 | ok=TRUE; | |
782 | for(i=0; i<LENGTHOF(steps) && ok; ++i) { | |
783 | step=steps[i].step; | |
784 | if(step<0 && !cc.finalFlush) { | |
785 | // skip ucnv_getNextUChar() if !finalFlush because | |
786 | // ucnv_getNextUChar() always implies flush | |
787 | continue; | |
788 | } | |
789 | if(step!=0) { | |
790 | // bulk test is first, then offsets are not checked any more | |
791 | cc.offsets=NULL; | |
792 | } | |
793 | errorCode=U_ZERO_ERROR; | |
794 | resultLength=stepToUnicode(cc, cnv, | |
795 | result, LENGTHOF(result), | |
796 | step==0 ? resultOffsets : NULL, | |
797 | step, &errorCode); | |
798 | ok=checkToUnicode( | |
799 | cc, cnv, steps[i].name, | |
800 | result, resultLength, | |
801 | cc.offsets!=NULL ? resultOffsets : NULL, | |
802 | errorCode); | |
803 | if(U_FAILURE(errorCode) || !cc.finalFlush) { | |
804 | // reset if an error occurred or we did not flush | |
805 | // otherwise do nothing to make sure that flushing resets | |
806 | ucnv_resetToUnicode(cnv); | |
807 | } | |
808 | } | |
809 | ||
810 | // not a real loop, just a convenience for breaking out of the block | |
811 | while(ok && cc.finalFlush) { | |
812 | // test ucnv_toUChars() | |
813 | memset(result, 0, sizeof(result)); | |
814 | ||
815 | errorCode=U_ZERO_ERROR; | |
816 | resultLength=ucnv_toUChars(cnv, | |
817 | result, LENGTHOF(result), | |
818 | (const char *)cc.bytes, cc.bytesLength, | |
819 | &errorCode); | |
820 | ok=checkToUnicode( | |
821 | cc, cnv, "toUChars", | |
822 | result, resultLength, | |
823 | NULL, | |
824 | errorCode); | |
825 | if(!ok) { | |
826 | break; | |
827 | } | |
828 | ||
829 | // test preflighting | |
830 | // keep the correct result for simple checking | |
831 | errorCode=U_ZERO_ERROR; | |
832 | resultLength=ucnv_toUChars(cnv, | |
833 | NULL, 0, | |
834 | (const char *)cc.bytes, cc.bytesLength, | |
835 | &errorCode); | |
836 | if(errorCode==U_STRING_NOT_TERMINATED_WARNING || errorCode==U_BUFFER_OVERFLOW_ERROR) { | |
837 | errorCode=U_ZERO_ERROR; | |
838 | } | |
839 | ok=checkToUnicode( | |
840 | cc, cnv, "preflight toUChars", | |
841 | result, resultLength, | |
842 | NULL, | |
843 | errorCode); | |
844 | break; | |
845 | } | |
846 | ||
847 | ucnv_close(cnv); | |
848 | return ok; | |
849 | } | |
850 | ||
851 | UBool | |
852 | ConversionTest::checkToUnicode(ConversionCase &cc, UConverter *cnv, const char *name, | |
853 | const UChar *result, int32_t resultLength, | |
854 | const int32_t *resultOffsets, | |
855 | UErrorCode resultErrorCode) { | |
856 | char resultInvalidChars[8]; | |
857 | int8_t resultInvalidLength; | |
858 | UErrorCode errorCode; | |
859 | ||
860 | const char *msg; | |
861 | ||
862 | // reset the message; NULL will mean "ok" | |
863 | msg=NULL; | |
864 | ||
865 | errorCode=U_ZERO_ERROR; | |
866 | resultInvalidLength=sizeof(resultInvalidChars); | |
867 | ucnv_getInvalidChars(cnv, resultInvalidChars, &resultInvalidLength, &errorCode); | |
868 | if(U_FAILURE(errorCode)) { | |
869 | errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidChars() failed - %s", | |
870 | cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, u_errorName(errorCode)); | |
871 | return FALSE; | |
872 | } | |
873 | ||
874 | // check everything that might have gone wrong | |
875 | if(cc.unicodeLength!=resultLength) { | |
876 | msg="wrong result length"; | |
877 | } else if(0!=u_memcmp(cc.unicode, result, cc.unicodeLength)) { | |
878 | msg="wrong result string"; | |
879 | } else if(cc.offsets!=NULL && 0!=memcmp(cc.offsets, resultOffsets, cc.unicodeLength*sizeof(*cc.offsets))) { | |
880 | msg="wrong offsets"; | |
881 | } else if(cc.outErrorCode!=resultErrorCode) { | |
882 | msg="wrong error code"; | |
883 | } else if(cc.invalidLength!=resultInvalidLength) { | |
884 | msg="wrong length of last invalid input"; | |
885 | } else if(0!=memcmp(cc.invalidChars, resultInvalidChars, cc.invalidLength)) { | |
886 | msg="wrong last invalid input"; | |
887 | } | |
888 | ||
889 | if(msg==NULL) { | |
890 | return TRUE; | |
891 | } else { | |
892 | char buffer[2000]; // one buffer for all strings | |
893 | char *s, *bytesString, *unicodeString, *resultString, | |
894 | *offsetsString, *resultOffsetsString, | |
895 | *invalidCharsString, *resultInvalidCharsString; | |
896 | ||
897 | bytesString=s=buffer; | |
898 | s=printBytes(cc.bytes, cc.bytesLength, bytesString); | |
899 | s=printUnicode(cc.unicode, cc.unicodeLength, unicodeString=s); | |
900 | s=printUnicode(result, resultLength, resultString=s); | |
901 | s=printOffsets(cc.offsets, cc.unicodeLength, offsetsString=s); | |
902 | s=printOffsets(resultOffsets, resultLength, resultOffsetsString=s); | |
903 | s=printBytes(cc.invalidChars, cc.invalidLength, invalidCharsString=s); | |
904 | s=printBytes((uint8_t *)resultInvalidChars, resultInvalidLength, resultInvalidCharsString=s); | |
905 | ||
906 | if((s-buffer)>(int32_t)sizeof(buffer)) { | |
907 | errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkToUnicode() test output buffer overflow writing %d chars\n", | |
908 | cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, (int)(s-buffer)); | |
909 | exit(1); | |
910 | } | |
911 | ||
912 | errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n" | |
913 | " bytes <%s>[%d]\n" | |
914 | " expected <%s>[%d]\n" | |
915 | " result <%s>[%d]\n" | |
916 | " offsets <%s>\n" | |
917 | " result offsets <%s>\n" | |
918 | " error code expected %s got %s\n" | |
919 | " invalidChars expected <%s> got <%s>\n", | |
920 | cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, msg, | |
921 | bytesString, cc.bytesLength, | |
922 | unicodeString, cc.unicodeLength, | |
923 | resultString, resultLength, | |
924 | offsetsString, | |
925 | resultOffsetsString, | |
926 | u_errorName(cc.outErrorCode), u_errorName(resultErrorCode), | |
927 | invalidCharsString, resultInvalidCharsString); | |
928 | ||
929 | return FALSE; | |
930 | } | |
931 | } | |
932 | ||
933 | // fromUnicode test worker functions --------------------------------------- *** | |
934 | ||
935 | static int32_t | |
936 | stepFromUnicode(ConversionCase &cc, UConverter *cnv, | |
937 | char *result, int32_t resultCapacity, | |
938 | int32_t *resultOffsets, /* also resultCapacity */ | |
939 | int32_t step, | |
940 | UErrorCode *pErrorCode) { | |
941 | const UChar *source, *sourceLimit, *unicodeLimit; | |
942 | char *target, *targetLimit, *resultLimit; | |
943 | UBool flush; | |
944 | ||
945 | source=cc.unicode; | |
946 | target=result; | |
947 | unicodeLimit=source+cc.unicodeLength; | |
948 | resultLimit=result+resultCapacity; | |
949 | ||
950 | // call ucnv_fromUnicode() with in/out buffers no larger than (step) at a time | |
951 | // move only one buffer (in vs. out) at a time to be extra mean | |
952 | // step==0 performs bulk conversion and generates offsets | |
953 | ||
954 | // initialize the partial limits for the loop | |
955 | if(step==0) { | |
956 | // use the entire buffers | |
957 | sourceLimit=unicodeLimit; | |
958 | targetLimit=resultLimit; | |
959 | flush=cc.finalFlush; | |
960 | } else { | |
961 | // start with empty partial buffers | |
962 | sourceLimit=source; | |
963 | targetLimit=target; | |
964 | flush=FALSE; | |
965 | ||
966 | // output offsets only for bulk conversion | |
967 | resultOffsets=NULL; | |
968 | } | |
969 | ||
970 | for(;;) { | |
971 | // resetting the opposite conversion direction must not affect this one | |
972 | ucnv_resetToUnicode(cnv); | |
973 | ||
974 | // convert | |
975 | ucnv_fromUnicode(cnv, | |
976 | &target, targetLimit, | |
977 | &source, sourceLimit, | |
978 | resultOffsets, | |
979 | flush, pErrorCode); | |
980 | ||
981 | // check pointers and errors | |
982 | if(source>sourceLimit || target>targetLimit) { | |
983 | *pErrorCode=U_INTERNAL_PROGRAM_ERROR; | |
984 | break; | |
985 | } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { | |
986 | if(target!=targetLimit) { | |
987 | // buffer overflow must only be set when the target is filled | |
988 | *pErrorCode=U_INTERNAL_PROGRAM_ERROR; | |
989 | break; | |
990 | } else if(targetLimit==resultLimit) { | |
991 | // not just a partial overflow | |
992 | break; | |
993 | } | |
994 | ||
995 | // the partial target is filled, set a new limit, reset the error and continue | |
996 | targetLimit=(resultLimit-target)>=step ? target+step : resultLimit; | |
997 | *pErrorCode=U_ZERO_ERROR; | |
998 | } else if(U_FAILURE(*pErrorCode)) { | |
999 | // some other error occurred, done | |
1000 | break; | |
1001 | } else { | |
1002 | if(source!=sourceLimit) { | |
1003 | // when no error occurs, then the input must be consumed | |
1004 | *pErrorCode=U_INTERNAL_PROGRAM_ERROR; | |
1005 | break; | |
1006 | } | |
1007 | ||
1008 | if(sourceLimit==unicodeLimit) { | |
1009 | // we are done | |
1010 | break; | |
1011 | } | |
1012 | ||
1013 | // the partial conversion succeeded, set a new limit and continue | |
1014 | sourceLimit=(unicodeLimit-source)>=step ? source+step : unicodeLimit; | |
1015 | flush=(UBool)(cc.finalFlush && sourceLimit==unicodeLimit); | |
1016 | } | |
1017 | } | |
1018 | ||
1019 | return (int32_t)(target-result); | |
1020 | } | |
1021 | ||
1022 | UBool | |
1023 | ConversionTest::FromUnicodeCase(ConversionCase &cc, UConverterFromUCallback callback, const char *option) { | |
1024 | UConverter *cnv; | |
1025 | UErrorCode errorCode; | |
1026 | ||
1027 | // open the converter | |
1028 | errorCode=U_ZERO_ERROR; | |
1029 | cnv=cnv_open(cc.charset, errorCode); | |
1030 | if(U_FAILURE(errorCode)) { | |
1031 | errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s", | |
1032 | cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode)); | |
1033 | return FALSE; | |
1034 | } | |
1035 | ||
1036 | // set the callback | |
1037 | if(callback!=NULL) { | |
1038 | ucnv_setFromUCallBack(cnv, callback, option, NULL, NULL, &errorCode); | |
1039 | if(U_FAILURE(errorCode)) { | |
1040 | errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setFromUCallBack() failed - %s", | |
1041 | cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode)); | |
1042 | ucnv_close(cnv); | |
1043 | return FALSE; | |
1044 | } | |
1045 | } | |
1046 | ||
1047 | // set the fallbacks flag | |
1048 | // TODO change with Jitterbug 2401, then add a similar call for toUnicode too | |
1049 | ucnv_setFallback(cnv, cc.fallbacks); | |
1050 | ||
1051 | // set the subchar | |
1052 | int32_t length; | |
1053 | ||
1054 | if((length=(int32_t)strlen(cc.subchar))!=0) { | |
1055 | ucnv_setSubstChars(cnv, cc.subchar, (int8_t)length, &errorCode); | |
1056 | if(U_FAILURE(errorCode)) { | |
1057 | errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubChars() failed - %s", | |
1058 | cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode)); | |
1059 | ucnv_close(cnv); | |
1060 | return FALSE; | |
1061 | } | |
1062 | } | |
1063 | ||
1064 | int32_t resultOffsets[200]; | |
1065 | char result[200]; | |
1066 | int32_t resultLength; | |
1067 | UBool ok; | |
1068 | ||
1069 | static const struct { | |
1070 | int32_t step; | |
1071 | const char *name; | |
1072 | } steps[]={ | |
1073 | { 0, "bulk" }, // must be first for offsets to be checked | |
1074 | { 1, "step=1" }, | |
1075 | { 3, "step=3" }, | |
1076 | { 7, "step=7" } | |
1077 | }; | |
1078 | int32_t i, step; | |
1079 | ||
1080 | ok=TRUE; | |
1081 | for(i=0; i<LENGTHOF(steps) && ok; ++i) { | |
1082 | step=steps[i].step; | |
1083 | if(step!=0) { | |
1084 | // bulk test is first, then offsets are not checked any more | |
1085 | cc.offsets=NULL; | |
1086 | } | |
1087 | errorCode=U_ZERO_ERROR; | |
1088 | resultLength=stepFromUnicode(cc, cnv, | |
1089 | result, LENGTHOF(result), | |
1090 | step==0 ? resultOffsets : NULL, | |
1091 | step, &errorCode); | |
1092 | ok=checkFromUnicode( | |
1093 | cc, cnv, steps[i].name, | |
1094 | (uint8_t *)result, resultLength, | |
1095 | cc.offsets!=NULL ? resultOffsets : NULL, | |
1096 | errorCode); | |
1097 | if(U_FAILURE(errorCode) || !cc.finalFlush) { | |
1098 | // reset if an error occurred or we did not flush | |
1099 | // otherwise do nothing to make sure that flushing resets | |
1100 | ucnv_resetFromUnicode(cnv); | |
1101 | } | |
1102 | } | |
1103 | ||
1104 | // not a real loop, just a convenience for breaking out of the block | |
1105 | while(ok && cc.finalFlush) { | |
1106 | // test ucnv_fromUChars() | |
1107 | memset(result, 0, sizeof(result)); | |
1108 | ||
1109 | errorCode=U_ZERO_ERROR; | |
1110 | resultLength=ucnv_fromUChars(cnv, | |
1111 | result, LENGTHOF(result), | |
1112 | cc.unicode, cc.unicodeLength, | |
1113 | &errorCode); | |
1114 | ok=checkFromUnicode( | |
1115 | cc, cnv, "fromUChars", | |
1116 | (uint8_t *)result, resultLength, | |
1117 | NULL, | |
1118 | errorCode); | |
1119 | if(!ok) { | |
1120 | break; | |
1121 | } | |
1122 | ||
1123 | // test preflighting | |
1124 | // keep the correct result for simple checking | |
1125 | errorCode=U_ZERO_ERROR; | |
1126 | resultLength=ucnv_fromUChars(cnv, | |
1127 | NULL, 0, | |
1128 | cc.unicode, cc.unicodeLength, | |
1129 | &errorCode); | |
1130 | if(errorCode==U_STRING_NOT_TERMINATED_WARNING || errorCode==U_BUFFER_OVERFLOW_ERROR) { | |
1131 | errorCode=U_ZERO_ERROR; | |
1132 | } | |
1133 | ok=checkFromUnicode( | |
1134 | cc, cnv, "preflight fromUChars", | |
1135 | (uint8_t *)result, resultLength, | |
1136 | NULL, | |
1137 | errorCode); | |
1138 | break; | |
1139 | } | |
1140 | ||
1141 | ucnv_close(cnv); | |
1142 | return ok; | |
1143 | } | |
1144 | ||
1145 | UBool | |
1146 | ConversionTest::checkFromUnicode(ConversionCase &cc, UConverter *cnv, const char *name, | |
1147 | const uint8_t *result, int32_t resultLength, | |
1148 | const int32_t *resultOffsets, | |
1149 | UErrorCode resultErrorCode) { | |
1150 | UChar resultInvalidUChars[8]; | |
1151 | int8_t resultInvalidLength; | |
1152 | UErrorCode errorCode; | |
1153 | ||
1154 | const char *msg; | |
1155 | ||
1156 | // reset the message; NULL will mean "ok" | |
1157 | msg=NULL; | |
1158 | ||
1159 | errorCode=U_ZERO_ERROR; | |
1160 | resultInvalidLength=LENGTHOF(resultInvalidUChars); | |
1161 | ucnv_getInvalidUChars(cnv, resultInvalidUChars, &resultInvalidLength, &errorCode); | |
1162 | if(U_FAILURE(errorCode)) { | |
1163 | errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidUChars() failed - %s", | |
1164 | cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, u_errorName(errorCode)); | |
1165 | return FALSE; | |
1166 | } | |
1167 | ||
1168 | // check everything that might have gone wrong | |
1169 | if(cc.bytesLength!=resultLength) { | |
1170 | msg="wrong result length"; | |
1171 | } else if(0!=memcmp(cc.bytes, result, cc.bytesLength)) { | |
1172 | msg="wrong result string"; | |
1173 | } else if(cc.offsets!=NULL && 0!=memcmp(cc.offsets, resultOffsets, cc.bytesLength*sizeof(*cc.offsets))) { | |
1174 | msg="wrong offsets"; | |
1175 | } else if(cc.outErrorCode!=resultErrorCode) { | |
1176 | msg="wrong error code"; | |
1177 | } else if(cc.invalidLength!=resultInvalidLength) { | |
1178 | msg="wrong length of last invalid input"; | |
1179 | } else if(0!=u_memcmp(cc.invalidUChars, resultInvalidUChars, cc.invalidLength)) { | |
1180 | msg="wrong last invalid input"; | |
1181 | } | |
1182 | ||
1183 | if(msg==NULL) { | |
1184 | return TRUE; | |
1185 | } else { | |
1186 | char buffer[2000]; // one buffer for all strings | |
1187 | char *s, *unicodeString, *bytesString, *resultString, | |
1188 | *offsetsString, *resultOffsetsString, | |
1189 | *invalidCharsString, *resultInvalidUCharsString; | |
1190 | ||
1191 | unicodeString=s=buffer; | |
1192 | s=printUnicode(cc.unicode, cc.unicodeLength, unicodeString); | |
1193 | s=printBytes(cc.bytes, cc.bytesLength, bytesString=s); | |
1194 | s=printBytes(result, resultLength, resultString=s); | |
1195 | s=printOffsets(cc.offsets, cc.bytesLength, offsetsString=s); | |
1196 | s=printOffsets(resultOffsets, resultLength, resultOffsetsString=s); | |
1197 | s=printUnicode(cc.invalidUChars, cc.invalidLength, invalidCharsString=s); | |
1198 | s=printUnicode(resultInvalidUChars, resultInvalidLength, resultInvalidUCharsString=s); | |
1199 | ||
1200 | if((s-buffer)>(int32_t)sizeof(buffer)) { | |
1201 | errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkFromUnicode() test output buffer overflow writing %d chars\n", | |
1202 | cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, (int)(s-buffer)); | |
1203 | exit(1); | |
1204 | } | |
1205 | ||
1206 | errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n" | |
1207 | " unicode <%s>[%d]\n" | |
1208 | " expected <%s>[%d]\n" | |
1209 | " result <%s>[%d]\n" | |
1210 | " offsets <%s>\n" | |
1211 | " result offsets <%s>\n" | |
1212 | " error code expected %s got %s\n" | |
1213 | " invalidChars expected <%s> got <%s>\n", | |
1214 | cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, msg, | |
1215 | unicodeString, cc.unicodeLength, | |
1216 | bytesString, cc.bytesLength, | |
1217 | resultString, resultLength, | |
1218 | offsetsString, | |
1219 | resultOffsetsString, | |
1220 | u_errorName(cc.outErrorCode), u_errorName(resultErrorCode), | |
1221 | invalidCharsString, resultInvalidUCharsString); | |
1222 | ||
1223 | return FALSE; | |
1224 | } | |
1225 | } | |
1226 | ||
1227 | #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |