]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
b75a7d8f A |
3 | /******************************************************************** |
4 | * COPYRIGHT: | |
2ca993e8 | 5 | * Copyright (c) 1997-2016, International Business Machines Corporation and |
b75a7d8f A |
6 | * others. All Rights Reserved. |
7 | ********************************************************************/ | |
46f4442e | 8 | /***************************************************************************** |
b75a7d8f | 9 | * |
b331163b | 10 | * File ccapitst.c |
b75a7d8f A |
11 | * |
12 | * Modification History: | |
13 | * Name Description | |
14 | * Madhu Katragadda Ported for C API | |
46f4442e | 15 | ****************************************************************************** |
b75a7d8f A |
16 | */ |
17 | #include <stdio.h> | |
18 | #include <stdlib.h> | |
19 | #include <string.h> | |
20 | #include <ctype.h> | |
21 | #include "unicode/uloc.h" | |
22 | #include "unicode/ucnv.h" | |
23 | #include "unicode/ucnv_err.h" | |
374ca955 | 24 | #include "unicode/putil.h" |
46f4442e | 25 | #include "unicode/uset.h" |
b75a7d8f | 26 | #include "unicode/ustring.h" |
f3c0d7a5 | 27 | #include "unicode/utf8.h" |
374ca955 | 28 | #include "ucnv_bld.h" /* for sizeof(UConverter) */ |
73c04bcf | 29 | #include "cmemory.h" /* for UAlignedMemory */ |
b75a7d8f A |
30 | #include "cintltst.h" |
31 | #include "ccapitst.h" | |
4388f060 | 32 | #include "cstring.h" |
b75a7d8f | 33 | |
b75a7d8f A |
34 | #define NUM_CODEPAGE 1 |
35 | #define MAX_FILE_LEN 1024*20 | |
36 | #define UCS_FILE_NAME_SIZE 512 | |
37 | ||
340931cb A |
38 | /* Similar to C++ alignof(type) */ |
39 | #define ALIGNOF(type) offsetof (struct { char c; type member; }, member) | |
40 | ||
b75a7d8f | 41 | /*returns an action other than the one provided*/ |
51004dcb | 42 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
43 | static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA); |
44 | static UConverterToUCallback otherCharAction(UConverterToUCallback MIA); | |
51004dcb | 45 | #endif |
b75a7d8f | 46 | |
374ca955 A |
47 | static UConverter * |
48 | cnv_open(const char *name, UErrorCode *pErrorCode) { | |
49 | if(name!=NULL && name[0]=='*') { | |
50 | return ucnv_openPackage(loadTestData(pErrorCode), name+1, pErrorCode); | |
51 | } else { | |
52 | return ucnv_open(name, pErrorCode); | |
53 | } | |
54 | } | |
55 | ||
b75a7d8f A |
56 | |
57 | static void ListNames(void); | |
374ca955 | 58 | static void TestFlushCache(void); |
b75a7d8f A |
59 | static void TestDuplicateAlias(void); |
60 | static void TestCCSID(void); | |
61 | static void TestJ932(void); | |
62 | static void TestJ1968(void); | |
51004dcb | 63 | #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f | 64 | static void TestLMBCSMaxChar(void); |
51004dcb | 65 | #endif |
73c04bcf A |
66 | |
67 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
b75a7d8f | 68 | static void TestConvertSafeCloneCallback(void); |
73c04bcf A |
69 | #endif |
70 | ||
b75a7d8f A |
71 | static void TestEBCDICSwapLFNL(void); |
72 | static void TestConvertEx(void); | |
46f4442e | 73 | static void TestConvertExFromUTF8(void); |
729e4ab9 | 74 | static void TestConvertExFromUTF8_C5F0(void); |
b75a7d8f | 75 | static void TestConvertAlgorithmic(void); |
374ca955 | 76 | void TestDefaultConverterError(void); /* defined in cctest.c */ |
729e4ab9 | 77 | void TestDefaultConverterSet(void); /* defined in cctest.c */ |
73c04bcf A |
78 | static void TestToUCountPending(void); |
79 | static void TestFromUCountPending(void); | |
80 | static void TestDefaultName(void); | |
81 | static void TestCompareNames(void); | |
82 | static void TestSubstString(void); | |
46f4442e | 83 | static void InvalidArguments(void); |
729e4ab9 A |
84 | static void TestGetName(void); |
85 | static void TestUTFBOM(void); | |
b75a7d8f A |
86 | |
87 | void addTestConvert(TestNode** root); | |
88 | ||
89 | void addTestConvert(TestNode** root) | |
90 | { | |
91 | addTest(root, &ListNames, "tsconv/ccapitst/ListNames"); | |
92 | addTest(root, &TestConvert, "tsconv/ccapitst/TestConvert"); | |
93 | addTest(root, &TestFlushCache, "tsconv/ccapitst/TestFlushCache"); | |
94 | addTest(root, &TestAlias, "tsconv/ccapitst/TestAlias"); | |
95 | addTest(root, &TestDuplicateAlias, "tsconv/ccapitst/TestDuplicateAlias"); | |
73c04bcf | 96 | addTest(root, &TestConvertSafeClone, "tsconv/ccapitst/TestConvertSafeClone"); |
73c04bcf A |
97 | #if !UCONFIG_NO_LEGACY_CONVERSION |
98 | addTest(root, &TestConvertSafeCloneCallback,"tsconv/ccapitst/TestConvertSafeCloneCallback"); | |
99 | #endif | |
b75a7d8f A |
100 | addTest(root, &TestCCSID, "tsconv/ccapitst/TestCCSID"); |
101 | addTest(root, &TestJ932, "tsconv/ccapitst/TestJ932"); | |
102 | addTest(root, &TestJ1968, "tsconv/ccapitst/TestJ1968"); | |
729e4ab9 | 103 | #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f | 104 | addTest(root, &TestLMBCSMaxChar, "tsconv/ccapitst/TestLMBCSMaxChar"); |
729e4ab9 | 105 | #endif |
b75a7d8f A |
106 | addTest(root, &TestEBCDICSwapLFNL, "tsconv/ccapitst/TestEBCDICSwapLFNL"); |
107 | addTest(root, &TestConvertEx, "tsconv/ccapitst/TestConvertEx"); | |
46f4442e | 108 | addTest(root, &TestConvertExFromUTF8, "tsconv/ccapitst/TestConvertExFromUTF8"); |
729e4ab9 | 109 | addTest(root, &TestConvertExFromUTF8_C5F0, "tsconv/ccapitst/TestConvertExFromUTF8_C5F0"); |
b75a7d8f | 110 | addTest(root, &TestConvertAlgorithmic, "tsconv/ccapitst/TestConvertAlgorithmic"); |
374ca955 | 111 | addTest(root, &TestDefaultConverterError, "tsconv/ccapitst/TestDefaultConverterError"); |
729e4ab9 A |
112 | addTest(root, &TestDefaultConverterSet, "tsconv/ccapitst/TestDefaultConverterSet"); |
113 | #if !UCONFIG_NO_FILE_IO | |
73c04bcf A |
114 | addTest(root, &TestToUCountPending, "tsconv/ccapitst/TestToUCountPending"); |
115 | addTest(root, &TestFromUCountPending, "tsconv/ccapitst/TestFromUCountPending"); | |
729e4ab9 | 116 | #endif |
73c04bcf A |
117 | addTest(root, &TestDefaultName, "tsconv/ccapitst/TestDefaultName"); |
118 | addTest(root, &TestCompareNames, "tsconv/ccapitst/TestCompareNames"); | |
119 | addTest(root, &TestSubstString, "tsconv/ccapitst/TestSubstString"); | |
46f4442e | 120 | addTest(root, &InvalidArguments, "tsconv/ccapitst/InvalidArguments"); |
729e4ab9 A |
121 | addTest(root, &TestGetName, "tsconv/ccapitst/TestGetName"); |
122 | addTest(root, &TestUTFBOM, "tsconv/ccapitst/TestUTFBOM"); | |
b75a7d8f A |
123 | } |
124 | ||
125 | static void ListNames(void) { | |
126 | UErrorCode err = U_ZERO_ERROR; | |
127 | int32_t testLong1 = 0; | |
128 | const char* available_conv; | |
129 | UEnumeration *allNamesEnum = NULL; | |
130 | int32_t allNamesCount = 0; | |
131 | uint16_t count; | |
132 | ||
133 | log_verbose("Testing ucnv_openAllNames()..."); | |
134 | allNamesEnum = ucnv_openAllNames(&err); | |
135 | if(U_FAILURE(err)) { | |
729e4ab9 | 136 | log_data_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err)); |
b75a7d8f A |
137 | } |
138 | else { | |
139 | const char *string = NULL; | |
140 | int32_t len = 0; | |
141 | int32_t count1 = 0; | |
142 | int32_t count2 = 0; | |
143 | allNamesCount = uenum_count(allNamesEnum, &err); | |
144 | while ((string = uenum_next(allNamesEnum, &len, &err))) { | |
145 | count1++; | |
146 | log_verbose("read \"%s\", length %i\n", string, len); | |
147 | } | |
73c04bcf A |
148 | if (U_FAILURE(err)) { |
149 | log_err("FAILURE! uenum_next(allNamesEnum...) set an error: %s\n", u_errorName(err)); | |
150 | err = U_ZERO_ERROR; | |
151 | } | |
b75a7d8f A |
152 | uenum_reset(allNamesEnum, &err); |
153 | while ((string = uenum_next(allNamesEnum, &len, &err))) { | |
154 | count2++; | |
155 | ucnv_close(ucnv_open(string, &err)); | |
156 | log_verbose("read \"%s\", length %i (%s)\n", string, len, U_SUCCESS(err) ? "available" : "unavailable"); | |
157 | err = U_ZERO_ERROR; | |
158 | } | |
159 | if (count1 != count2) { | |
160 | log_err("FAILURE! uenum_reset(allNamesEnum, &err); doesn't work\n"); | |
161 | } | |
162 | } | |
163 | uenum_close(allNamesEnum); | |
164 | err = U_ZERO_ERROR; | |
165 | ||
166 | /*Tests ucnv_getAvailableName(), getAvialableCount()*/ | |
167 | ||
168 | log_verbose("Testing ucnv_countAvailable()..."); | |
169 | ||
170 | testLong1=ucnv_countAvailable(); | |
171 | log_info("Number of available codepages: %d/%d\n", testLong1, allNamesCount); | |
172 | ||
173 | log_verbose("\n---Testing ucnv_getAvailableName.."); /*need to check this out */ | |
174 | ||
175 | available_conv = ucnv_getAvailableName(testLong1); | |
176 | /*test ucnv_getAvailableName with err condition*/ | |
177 | log_verbose("\n---Testing ucnv_getAvailableName..with index < 0 "); | |
178 | available_conv = ucnv_getAvailableName(-1); | |
179 | if(available_conv != NULL){ | |
180 | log_err("ucnv_getAvailableName() with index < 0) should return NULL\n"); | |
181 | } | |
182 | ||
183 | /* Test ucnv_countAliases() etc. */ | |
184 | count = ucnv_countAliases("utf-8", &err); | |
185 | if(U_FAILURE(err)) { | |
729e4ab9 | 186 | log_data_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err)); |
b75a7d8f A |
187 | } else if(count <= 0) { |
188 | log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %d aliases\n", count); | |
189 | } else { | |
190 | /* try to get the aliases individually */ | |
191 | const char *alias; | |
192 | alias = ucnv_getAlias("utf-8", 0, &err); | |
193 | if(U_FAILURE(err)) { | |
194 | log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s\n", myErrorName(err)); | |
195 | } else if(strcmp("UTF-8", alias) != 0) { | |
196 | log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s instead of UTF-8\n", alias); | |
197 | } else { | |
198 | uint16_t aliasNum; | |
199 | for(aliasNum = 0; aliasNum < count; ++aliasNum) { | |
200 | alias = ucnv_getAlias("utf-8", aliasNum, &err); | |
201 | if(U_FAILURE(err)) { | |
202 | log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err)); | |
203 | } else if(strlen(alias) > 20) { | |
204 | /* sanity check */ | |
205 | log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> alias %s insanely long, corrupt?!\n", aliasNum, alias); | |
206 | } else { | |
207 | log_verbose("alias %d for utf-8: %s\n", aliasNum, alias); | |
208 | } | |
209 | } | |
210 | if(U_SUCCESS(err)) { | |
211 | /* try to fill an array with all aliases */ | |
212 | const char **aliases; | |
213 | aliases=(const char **)malloc(count * sizeof(const char *)); | |
214 | if(aliases != 0) { | |
215 | ucnv_getAliases("utf-8", aliases, &err); | |
216 | if(U_FAILURE(err)) { | |
217 | log_err("FAILURE! ucnv_getAliases(\"utf-8\") -> %s\n", myErrorName(err)); | |
218 | } else { | |
219 | for(aliasNum = 0; aliasNum < count; ++aliasNum) { | |
220 | /* compare the pointers with the ones returned individually */ | |
221 | alias = ucnv_getAlias("utf-8", aliasNum, &err); | |
222 | if(U_FAILURE(err)) { | |
223 | log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err)); | |
224 | } else if(aliases[aliasNum] != alias) { | |
225 | log_err("FAILURE! ucnv_getAliases(\"utf-8\")[%d] != ucnv_getAlias(\"utf-8\", %d)\n", aliasNum, aliasNum); | |
226 | } | |
227 | } | |
228 | } | |
229 | free((char **)aliases); | |
230 | } | |
231 | } | |
232 | } | |
233 | } | |
234 | } | |
235 | ||
236 | ||
237 | static void TestConvert() | |
238 | { | |
73c04bcf | 239 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
240 | char myptr[4]; |
241 | char save[4]; | |
242 | int32_t testLong1 = 0; | |
243 | uint16_t rest = 0; | |
244 | int32_t len = 0; | |
245 | int32_t x = 0; | |
246 | FILE* ucs_file_in = NULL; | |
247 | UChar BOM = 0x0000; | |
248 | UChar myUChar = 0x0000; | |
249 | char* mytarget; /* [MAX_FILE_LEN] */ | |
250 | char* mytarget_1; | |
251 | char* mytarget_use; | |
252 | UChar* consumedUni = NULL; | |
253 | char* consumed = NULL; | |
254 | char* output_cp_buffer; /* [MAX_FILE_LEN] */ | |
255 | UChar* ucs_file_buffer; /* [MAX_FILE_LEN] */ | |
256 | UChar* ucs_file_buffer_use; | |
257 | UChar* my_ucs_file_buffer; /* [MAX_FILE_LEN] */ | |
258 | UChar* my_ucs_file_buffer_1; | |
259 | int8_t ii = 0; | |
b75a7d8f A |
260 | uint16_t codepage_index = 0; |
261 | int32_t cp = 0; | |
262 | UErrorCode err = U_ZERO_ERROR; | |
263 | char ucs_file_name[UCS_FILE_NAME_SIZE]; | |
264 | UConverterFromUCallback MIA1, MIA1_2; | |
265 | UConverterToUCallback MIA2, MIA2_2; | |
266 | const void *MIA1Context, *MIA1Context2, *MIA2Context, *MIA2Context2; | |
267 | UConverter* someConverters[5]; | |
268 | UConverter* myConverter = 0; | |
269 | UChar* displayname = 0; | |
270 | ||
271 | const char* locale; | |
272 | ||
273 | UChar* uchar1 = 0; | |
274 | UChar* uchar2 = 0; | |
275 | UChar* uchar3 = 0; | |
276 | int32_t targetcapacity2; | |
277 | int32_t targetcapacity; | |
278 | int32_t targetsize; | |
279 | int32_t disnamelen; | |
280 | ||
281 | const UChar* tmp_ucs_buf; | |
282 | const UChar* tmp_consumedUni=NULL; | |
283 | const char* tmp_mytarget_use; | |
284 | const char* tmp_consumed; | |
285 | ||
286 | /****************************************************************** | |
287 | Checking Unicode -> ksc | |
288 | ******************************************************************/ | |
289 | ||
290 | const char* CodePagesToTest[NUM_CODEPAGE] = | |
291 | { | |
292 | "ibm-949_P110-1999" | |
293 | ||
294 | ||
295 | }; | |
296 | const uint16_t CodePageNumberToTest[NUM_CODEPAGE] = | |
297 | { | |
298 | 949 | |
299 | }; | |
300 | ||
301 | ||
302 | const int8_t CodePagesMinChars[NUM_CODEPAGE] = | |
303 | { | |
304 | 1 | |
305 | ||
306 | }; | |
307 | ||
308 | const int8_t CodePagesMaxChars[NUM_CODEPAGE] = | |
309 | { | |
310 | 2 | |
311 | ||
312 | }; | |
313 | ||
314 | const uint16_t CodePagesSubstitutionChars[NUM_CODEPAGE] = | |
315 | { | |
316 | 0xAFFE | |
317 | }; | |
318 | ||
319 | const char* CodePagesTestFiles[NUM_CODEPAGE] = | |
320 | { | |
321 | "uni-text.bin" | |
322 | }; | |
323 | ||
324 | ||
325 | const UConverterPlatform CodePagesPlatform[NUM_CODEPAGE] = | |
326 | { | |
327 | UCNV_IBM | |
328 | ||
329 | }; | |
330 | ||
331 | const char* CodePagesLocale[NUM_CODEPAGE] = | |
332 | { | |
333 | "ko_KR" | |
334 | }; | |
335 | ||
336 | UConverterFromUCallback oldFromUAction = NULL; | |
337 | UConverterToUCallback oldToUAction = NULL; | |
338 | const void* oldFromUContext = NULL; | |
339 | const void* oldToUContext = NULL; | |
340 | ||
341 | /* Allocate memory */ | |
342 | mytarget = (char*) malloc(MAX_FILE_LEN * sizeof(mytarget[0])); | |
343 | output_cp_buffer = (char*) malloc(MAX_FILE_LEN * sizeof(output_cp_buffer[0])); | |
344 | ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(ucs_file_buffer[0])); | |
345 | my_ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(my_ucs_file_buffer[0])); | |
346 | ||
347 | ucs_file_buffer_use = ucs_file_buffer; | |
348 | mytarget_1=mytarget; | |
349 | mytarget_use = mytarget; | |
350 | my_ucs_file_buffer_1=my_ucs_file_buffer; | |
351 | ||
352 | /* flush the converter cache to get a consistent state before the flushing is tested */ | |
353 | ucnv_flushCache(); | |
354 | ||
355 | /*Testing ucnv_openU()*/ | |
356 | { | |
357 | UChar converterName[]={ 0x0069, 0x0062, 0x006d, 0x002d, 0x0039, 0x0034, 0x0033, 0x0000}; /*ibm-943*/ | |
358 | UChar firstSortedName[]={ 0x0021, 0x0000}; /* ! */ | |
359 | UChar lastSortedName[]={ 0x007E, 0x0000}; /* ~ */ | |
360 | const char *illegalNameChars={ "ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943"}; | |
361 | UChar illegalName[100]; | |
362 | UConverter *converter=NULL; | |
363 | err=U_ZERO_ERROR; | |
364 | converter=ucnv_openU(converterName, &err); | |
365 | if(U_FAILURE(err)){ | |
366 | log_data_err("FAILURE! ucnv_openU(ibm-943, err) failed. %s\n", myErrorName(err)); | |
367 | } | |
368 | ucnv_close(converter); | |
369 | err=U_ZERO_ERROR; | |
370 | converter=ucnv_openU(NULL, &err); | |
371 | if(U_FAILURE(err)){ | |
372 | log_err("FAILURE! ucnv_openU(NULL, err) failed. %s\n", myErrorName(err)); | |
373 | } | |
374 | ucnv_close(converter); | |
375 | /*testing with error value*/ | |
376 | err=U_ILLEGAL_ARGUMENT_ERROR; | |
377 | converter=ucnv_openU(converterName, &err); | |
378 | if(!(converter == NULL)){ | |
379 | log_data_err("FAILURE! ucnv_openU(ibm-943, U_ILLEGAL_ARGUMENT_ERROR) is expected to fail\n"); | |
380 | } | |
381 | ucnv_close(converter); | |
382 | err=U_ZERO_ERROR; | |
383 | u_uastrcpy(illegalName, ""); | |
384 | u_uastrcpy(illegalName, illegalNameChars); | |
385 | ucnv_openU(illegalName, &err); | |
386 | if(!(err==U_ILLEGAL_ARGUMENT_ERROR)){ | |
387 | log_err("FAILURE! ucnv_openU(illegalName, err) is expected to fail\n"); | |
388 | } | |
389 | ||
390 | err=U_ZERO_ERROR; | |
391 | ucnv_openU(firstSortedName, &err); | |
392 | if(err!=U_FILE_ACCESS_ERROR){ | |
393 | log_err("FAILURE! ucnv_openU(firstSortedName, err) is expected to fail\n"); | |
394 | } | |
395 | ||
396 | err=U_ZERO_ERROR; | |
397 | ucnv_openU(lastSortedName, &err); | |
398 | if(err!=U_FILE_ACCESS_ERROR){ | |
399 | log_err("FAILURE! ucnv_openU(lastSortedName, err) is expected to fail\n"); | |
400 | } | |
401 | ||
402 | err=U_ZERO_ERROR; | |
403 | } | |
404 | log_verbose("Testing ucnv_open() with converter name greater than 7 characters\n"); | |
405 | { | |
406 | UConverter *cnv=NULL; | |
407 | err=U_ZERO_ERROR; | |
408 | cnv=ucnv_open("ibm-949,Madhu", &err); | |
409 | if(U_FAILURE(err)){ | |
410 | log_data_err("FAILURE! ucnv_open(\"ibm-949,Madhu\", err) failed. %s\n", myErrorName(err)); | |
411 | } | |
412 | ucnv_close(cnv); | |
413 | ||
414 | } | |
415 | /*Testing ucnv_convert()*/ | |
416 | { | |
417 | int32_t targetLimit=0, sourceLimit=0, i=0, targetCapacity=0; | |
418 | const uint8_t source[]={ 0x00, 0x04, 0x05, 0x06, 0xa2, 0xb4, 0x00}; | |
419 | const uint8_t expectedTarget[]={ 0x00, 0x37, 0x2d, 0x2e, 0x0e, 0x49, 0x62, 0x0f, 0x00}; | |
420 | char *target=0; | |
2ca993e8 | 421 | sourceLimit=UPRV_LENGTHOF(source); |
b75a7d8f A |
422 | err=U_ZERO_ERROR; |
423 | targetLimit=0; | |
424 | ||
425 | targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", NULL, targetLimit , (const char*)source, sourceLimit, &err); | |
426 | if(err == U_BUFFER_OVERFLOW_ERROR){ | |
427 | err=U_ZERO_ERROR; | |
428 | targetLimit=targetCapacity+1; | |
429 | target=(char*)malloc(sizeof(char) * targetLimit); | |
430 | targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); | |
431 | } | |
432 | if(U_FAILURE(err)){ | |
433 | log_data_err("FAILURE! ucnv_convert(ibm-1363->ibm-1364) failed. %s\n", myErrorName(err)); | |
434 | } | |
435 | else { | |
436 | for(i=0; i<targetCapacity; i++){ | |
437 | if(target[i] != expectedTarget[i]){ | |
438 | log_err("FAIL: ucnv_convert(ibm-1363->ibm-1364) failed.at index \n i=%d, Expected: %lx Got: %lx\n", i, (UChar)expectedTarget[i], (uint8_t)target[i]); | |
439 | } | |
440 | } | |
441 | ||
442 | i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source+1, -1, &err); | |
443 | if(U_FAILURE(err) || i!=7){ | |
444 | log_err("FAILURE! ucnv_convert() with sourceLimit=-1 failed: %s, returned %d instead of 7\n", | |
445 | u_errorName(err), i); | |
446 | } | |
447 | ||
448 | /*Test error conditions*/ | |
449 | err=U_ZERO_ERROR; | |
450 | i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, 0, &err); | |
451 | if(i !=0){ | |
452 | log_err("FAILURE! ucnv_convert() with sourceLimit=0 is expected to return 0\n"); | |
453 | } | |
454 | ||
455 | err=U_ILLEGAL_ARGUMENT_ERROR; | |
2ca993e8 | 456 | sourceLimit=UPRV_LENGTHOF(source); |
b75a7d8f A |
457 | i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); |
458 | if(i !=0 ){ | |
459 | log_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n"); | |
460 | } | |
461 | ||
462 | err=U_ZERO_ERROR; | |
2ca993e8 | 463 | sourceLimit=UPRV_LENGTHOF(source); |
b75a7d8f A |
464 | targetLimit=0; |
465 | i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); | |
466 | if(!(U_FAILURE(err) && err==U_BUFFER_OVERFLOW_ERROR)){ | |
467 | log_err("FAILURE! ucnv_convert() with targetLimit=0 is expected to throw U_BUFFER_OVERFLOW_ERROR\n"); | |
468 | } | |
469 | err=U_ZERO_ERROR; | |
470 | free(target); | |
471 | } | |
472 | } | |
473 | ||
474 | /*Testing ucnv_openCCSID and ucnv_open with error conditions*/ | |
475 | log_verbose("\n---Testing ucnv_open with err ! = U_ZERO_ERROR...\n"); | |
476 | err=U_ILLEGAL_ARGUMENT_ERROR; | |
477 | if(ucnv_open(NULL, &err) != NULL){ | |
478 | log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n"); | |
479 | } | |
480 | if(ucnv_openCCSID(1051, UCNV_IBM, &err) != NULL){ | |
481 | log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n"); | |
482 | } | |
483 | err=U_ZERO_ERROR; | |
484 | ||
485 | /* Testing ucnv_openCCSID(), ucnv_open(), ucnv_getName() */ | |
486 | log_verbose("\n---Testing ucnv_open default...\n"); | |
487 | someConverters[0] = ucnv_open(NULL,&err); | |
488 | someConverters[1] = ucnv_open(NULL,&err); | |
489 | someConverters[2] = ucnv_open("utf8", &err); | |
490 | someConverters[3] = ucnv_openCCSID(949,UCNV_IBM,&err); | |
491 | ucnv_close(ucnv_openCCSID(1051, UCNV_IBM, &err)); /* test for j350; ucnv_close(NULL) is safe */ | |
492 | if (U_FAILURE(err)){ log_data_err("FAILURE! %s\n", myErrorName(err));} | |
493 | ||
494 | /* Testing ucnv_getName()*/ | |
495 | /*default code page */ | |
496 | ucnv_getName(someConverters[0], &err); | |
497 | if(U_FAILURE(err)) { | |
498 | log_data_err("getName[0] failed\n"); | |
499 | } else { | |
500 | log_verbose("getName(someConverters[0]) returned %s\n", ucnv_getName(someConverters[0], &err)); | |
501 | } | |
502 | ucnv_getName(someConverters[1], &err); | |
503 | if(U_FAILURE(err)) { | |
504 | log_data_err("getName[1] failed\n"); | |
505 | } else { | |
506 | log_verbose("getName(someConverters[1]) returned %s\n", ucnv_getName(someConverters[1], &err)); | |
507 | } | |
508 | ||
b75a7d8f A |
509 | ucnv_close(someConverters[0]); |
510 | ucnv_close(someConverters[1]); | |
511 | ucnv_close(someConverters[2]); | |
512 | ucnv_close(someConverters[3]); | |
513 | ||
514 | ||
515 | for (codepage_index=0; codepage_index < NUM_CODEPAGE; ++codepage_index) | |
516 | { | |
517 | int32_t i = 0; | |
b75a7d8f A |
518 | |
519 | err = U_ZERO_ERROR; | |
374ca955 A |
520 | #ifdef U_TOPSRCDIR |
521 | strcpy(ucs_file_name, U_TOPSRCDIR U_FILE_SEP_STRING"test"U_FILE_SEP_STRING"testdata"U_FILE_SEP_STRING); | |
522 | #else | |
b75a7d8f A |
523 | strcpy(ucs_file_name, loadTestData(&err)); |
524 | ||
525 | if(U_FAILURE(err)){ | |
526 | log_err("\nCouldn't get the test data directory... Exiting...Error:%s\n", u_errorName(err)); | |
527 | return; | |
528 | } | |
529 | ||
374ca955 A |
530 | { |
531 | char* index = strrchr(ucs_file_name,(char)U_FILE_SEP_CHAR); | |
b75a7d8f | 532 | |
374ca955 A |
533 | if((unsigned int)(index-ucs_file_name) != (strlen(ucs_file_name)-1)){ |
534 | *(index+1)=0; | |
535 | } | |
b75a7d8f A |
536 | } |
537 | ||
538 | strcat(ucs_file_name,".."U_FILE_SEP_STRING); | |
374ca955 | 539 | #endif |
b75a7d8f A |
540 | strcat(ucs_file_name, CodePagesTestFiles[codepage_index]); |
541 | ||
542 | ucs_file_in = fopen(ucs_file_name,"rb"); | |
543 | if (!ucs_file_in) | |
544 | { | |
46f4442e | 545 | log_data_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name); |
b75a7d8f A |
546 | return; |
547 | } | |
548 | ||
549 | /*Creates a converter and testing ucnv_openCCSID(u_int code_page, platform, errstatus*/ | |
550 | ||
551 | /* myConverter =ucnv_openCCSID(CodePageNumberToTest[codepage_index],UCNV_IBM, &err); */ | |
552 | /* ucnv_flushCache(); */ | |
553 | myConverter =ucnv_open( "ibm-949", &err); | |
554 | if (!myConverter || U_FAILURE(err)) | |
555 | { | |
556 | log_data_err("Error creating the ibm-949 converter - %s \n", u_errorName(err)); | |
4388f060 A |
557 | fclose(ucs_file_in); |
558 | break; | |
b75a7d8f A |
559 | } |
560 | ||
561 | /*testing for ucnv_getName() */ | |
562 | log_verbose("Testing ucnv_getName()...\n"); | |
563 | ucnv_getName(myConverter, &err); | |
564 | if(U_FAILURE(err)) | |
565 | log_err("Error in getName\n"); | |
566 | else | |
567 | { | |
568 | log_verbose("getName o.k. %s\n", ucnv_getName(myConverter, &err)); | |
569 | } | |
4388f060 | 570 | if (uprv_stricmp(ucnv_getName(myConverter, &err), CodePagesToTest[codepage_index])) |
b75a7d8f A |
571 | log_err("getName failed\n"); |
572 | else | |
573 | log_verbose("getName ok\n"); | |
574 | /*Test getName with error condition*/ | |
575 | { | |
576 | const char* name=0; | |
577 | err=U_ILLEGAL_ARGUMENT_ERROR; | |
578 | log_verbose("Testing ucnv_getName with err != U_ZERO_ERROR"); | |
579 | name=ucnv_getName(myConverter, &err); | |
580 | if(name != NULL){ | |
581 | log_err("ucnv_getName() with err != U_ZERO_ERROR is expected to fail"); | |
582 | } | |
583 | err=U_ZERO_ERROR; | |
584 | } | |
585 | ||
586 | ||
587 | /*Tests ucnv_getMaxCharSize() and ucnv_getMinCharSize()*/ | |
588 | ||
589 | log_verbose("Testing ucnv_getMaxCharSize()...\n"); | |
590 | if (ucnv_getMaxCharSize(myConverter)==CodePagesMaxChars[codepage_index]) | |
591 | log_verbose("Max byte per character OK\n"); | |
592 | else | |
593 | log_err("Max byte per character failed\n"); | |
594 | ||
595 | log_verbose("\n---Testing ucnv_getMinCharSize()...\n"); | |
596 | if (ucnv_getMinCharSize(myConverter)==CodePagesMinChars[codepage_index]) | |
597 | log_verbose("Min byte per character OK\n"); | |
598 | else | |
599 | log_err("Min byte per character failed\n"); | |
600 | ||
601 | ||
602 | /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars()*/ | |
603 | log_verbose("\n---Testing ucnv_getSubstChars...\n"); | |
604 | ii=4; | |
605 | ucnv_getSubstChars(myConverter, myptr, &ii, &err); | |
374ca955 A |
606 | if (ii <= 0) { |
607 | log_err("ucnv_getSubstChars returned a negative number %d\n", ii); | |
608 | } | |
b75a7d8f A |
609 | |
610 | for(x=0;x<ii;x++) | |
611 | rest = (uint16_t)(((unsigned char)rest << 8) + (unsigned char)myptr[x]); | |
612 | if (rest==CodePagesSubstitutionChars[codepage_index]) | |
613 | log_verbose("Substitution character ok\n"); | |
614 | else | |
615 | log_err("Substitution character failed.\n"); | |
616 | ||
617 | log_verbose("\n---Testing ucnv_setSubstChars RoundTrip Test ...\n"); | |
618 | ucnv_setSubstChars(myConverter, myptr, ii, &err); | |
619 | if (U_FAILURE(err)) | |
620 | { | |
621 | log_err("FAILURE! %s\n", myErrorName(err)); | |
622 | } | |
623 | ucnv_getSubstChars(myConverter,save, &ii, &err); | |
624 | if (U_FAILURE(err)) | |
625 | { | |
626 | log_err("FAILURE! %s\n", myErrorName(err)); | |
627 | } | |
628 | ||
629 | if (strncmp(save, myptr, ii)) | |
630 | log_err("Saved substitution character failed\n"); | |
631 | else | |
632 | log_verbose("Saved substitution character ok\n"); | |
633 | ||
634 | /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars() with error conditions*/ | |
635 | log_verbose("\n---Testing ucnv_getSubstChars.. with len < minBytesPerChar\n"); | |
636 | ii=1; | |
637 | ucnv_getSubstChars(myConverter, myptr, &ii, &err); | |
638 | if(err != U_INDEX_OUTOFBOUNDS_ERROR){ | |
639 | log_err("ucnv_getSubstChars() with len < minBytesPerChar should throw U_INDEX_OUTOFBOUNDS_ERROR Got %s\n", myErrorName(err)); | |
640 | } | |
641 | err=U_ZERO_ERROR; | |
642 | ii=4; | |
643 | ucnv_getSubstChars(myConverter, myptr, &ii, &err); | |
644 | log_verbose("\n---Testing ucnv_setSubstChars.. with len < minBytesPerChar\n"); | |
645 | ucnv_setSubstChars(myConverter, myptr, 0, &err); | |
646 | if(err != U_ILLEGAL_ARGUMENT_ERROR){ | |
647 | log_err("ucnv_setSubstChars() with len < minBytesPerChar should throw U_ILLEGAL_ARGUMENT_ERROR Got %s\n", myErrorName(err)); | |
648 | } | |
649 | log_verbose("\n---Testing ucnv_setSubstChars.. with err != U_ZERO_ERROR \n"); | |
650 | strcpy(myptr, "abc"); | |
651 | ucnv_setSubstChars(myConverter, myptr, ii, &err); | |
652 | err=U_ZERO_ERROR; | |
653 | ucnv_getSubstChars(myConverter, save, &ii, &err); | |
654 | if(strncmp(save, myptr, ii) == 0){ | |
655 | log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't set the SubstChars and just return\n"); | |
656 | } | |
657 | log_verbose("\n---Testing ucnv_getSubstChars.. with err != U_ZERO_ERROR \n"); | |
658 | err=U_ZERO_ERROR; | |
659 | strcpy(myptr, "abc"); | |
660 | ucnv_setSubstChars(myConverter, myptr, ii, &err); | |
661 | err=U_ILLEGAL_ARGUMENT_ERROR; | |
662 | ucnv_getSubstChars(myConverter, save, &ii, &err); | |
663 | if(strncmp(save, myptr, ii) == 0){ | |
664 | log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't fill the SubstChars in the buffer, it just returns\n"); | |
665 | } | |
666 | err=U_ZERO_ERROR; | |
667 | /*------*/ | |
668 | ||
374ca955 | 669 | #ifdef U_ENABLE_GENERIC_ISO_2022 |
b75a7d8f A |
670 | /*resetState ucnv_reset()*/ |
671 | log_verbose("\n---Testing ucnv_reset()..\n"); | |
672 | ucnv_reset(myConverter); | |
673 | { | |
674 | UChar32 c; | |
675 | const uint8_t in[]={ 0x1b, 0x25, 0x42, 0x31, 0x32, 0x61, 0xc0, 0x80, 0xe0, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80}; | |
676 | const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
677 | UConverter *cnv=ucnv_open("ISO_2022", &err); | |
678 | if(U_FAILURE(err)) { | |
679 | log_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); | |
680 | } | |
681 | c=ucnv_getNextUChar(cnv, &source, limit, &err); | |
682 | if((U_FAILURE(err) || c != (UChar32)0x0031)) { | |
683 | log_err("ucnv_getNextUChar() failed: %s\n", u_errorName(err)); | |
684 | } | |
685 | ucnv_reset(cnv); | |
686 | ucnv_close(cnv); | |
687 | ||
688 | } | |
374ca955 A |
689 | #endif |
690 | ||
b75a7d8f A |
691 | /*getDisplayName*/ |
692 | log_verbose("\n---Testing ucnv_getDisplayName()...\n"); | |
693 | locale=CodePagesLocale[codepage_index]; | |
b75a7d8f | 694 | len=0; |
73c04bcf A |
695 | displayname=NULL; |
696 | disnamelen = ucnv_getDisplayName(myConverter, locale, displayname, len, &err); | |
697 | if(err==U_BUFFER_OVERFLOW_ERROR) { | |
b75a7d8f | 698 | err=U_ZERO_ERROR; |
73c04bcf | 699 | displayname=(UChar*)malloc((disnamelen+1) * sizeof(UChar)); |
b75a7d8f | 700 | ucnv_getDisplayName(myConverter,locale,displayname,disnamelen+1, &err); |
73c04bcf A |
701 | if(U_FAILURE(err)) { |
702 | log_err("getDisplayName failed. The error is %s\n", myErrorName(err)); | |
b75a7d8f | 703 | } |
73c04bcf | 704 | else { |
b75a7d8f | 705 | log_verbose(" getDisplayName o.k.\n"); |
73c04bcf A |
706 | } |
707 | free(displayname); | |
708 | displayname=NULL; | |
709 | } | |
710 | else { | |
711 | log_err("getDisplayName preflight doesn't work. Error is %s\n", myErrorName(err)); | |
b75a7d8f A |
712 | } |
713 | /*test ucnv_getDiaplayName with error condition*/ | |
b75a7d8f | 714 | err= U_ILLEGAL_ARGUMENT_ERROR; |
73c04bcf | 715 | len=ucnv_getDisplayName(myConverter,locale,NULL,0, &err); |
b75a7d8f A |
716 | if( len !=0 ){ |
717 | log_err("ucnv_getDisplayName() with err != U_ZERO_ERROR is supposed to return 0\n"); | |
718 | } | |
73c04bcf A |
719 | /*test ucnv_getDiaplayName with error condition*/ |
720 | err=U_ZERO_ERROR; | |
721 | len=ucnv_getDisplayName(NULL,locale,NULL,0, &err); | |
722 | if( len !=0 || U_SUCCESS(err)){ | |
723 | log_err("ucnv_getDisplayName(NULL) with cnv == NULL is supposed to return 0\n"); | |
724 | } | |
b75a7d8f A |
725 | err=U_ZERO_ERROR; |
726 | ||
727 | /* testing ucnv_setFromUCallBack() and ucnv_getFromUCallBack()*/ | |
728 | ucnv_getFromUCallBack(myConverter, &MIA1, &MIA1Context); | |
729 | ||
730 | log_verbose("\n---Testing ucnv_setFromUCallBack...\n"); | |
731 | ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err); | |
732 | if (U_FAILURE(err) || oldFromUAction != MIA1 || oldFromUContext != MIA1Context) | |
733 | { | |
734 | log_err("FAILURE! %s\n", myErrorName(err)); | |
735 | } | |
736 | ||
737 | ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2); | |
738 | if (MIA1_2 != otherUnicodeAction(MIA1) || MIA1Context2 != &BOM) | |
739 | log_err("get From UCallBack failed\n"); | |
740 | else | |
741 | log_verbose("get From UCallBack ok\n"); | |
742 | ||
743 | log_verbose("\n---Testing getFromUCallBack Roundtrip...\n"); | |
744 | ucnv_setFromUCallBack(myConverter,MIA1, MIA1Context, &oldFromUAction, &oldFromUContext, &err); | |
745 | if (U_FAILURE(err) || oldFromUAction != otherUnicodeAction(MIA1) || oldFromUContext != &BOM) | |
746 | { | |
747 | log_err("FAILURE! %s\n", myErrorName(err)); | |
748 | } | |
749 | ||
750 | ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2); | |
751 | if (MIA1_2 != MIA1 || MIA1Context2 != MIA1Context) | |
752 | log_err("get From UCallBack action failed\n"); | |
753 | else | |
754 | log_verbose("get From UCallBack action ok\n"); | |
755 | ||
756 | /*testing ucnv_setToUCallBack with error conditions*/ | |
757 | err=U_ILLEGAL_ARGUMENT_ERROR; | |
758 | log_verbose("\n---Testing setFromUCallBack. with err != U_ZERO_ERROR..\n"); | |
759 | ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err); | |
760 | ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2); | |
761 | if(MIA1_2 == otherUnicodeAction(MIA1) || MIA1Context2 == &BOM){ | |
762 | log_err("To setFromUCallBack with err != U_ZERO_ERROR is supposed to fail\n"); | |
763 | } | |
764 | err=U_ZERO_ERROR; | |
765 | ||
766 | ||
767 | /*testing ucnv_setToUCallBack() and ucnv_getToUCallBack()*/ | |
768 | ucnv_getToUCallBack(myConverter, &MIA2, &MIA2Context); | |
769 | ||
770 | log_verbose("\n---Testing setTo UCallBack...\n"); | |
771 | ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), &BOM, &oldToUAction, &oldToUContext, &err); | |
772 | if (U_FAILURE(err) || oldToUAction != MIA2 || oldToUContext != MIA2Context) | |
773 | { | |
774 | log_err("FAILURE! %s\n", myErrorName(err)); | |
775 | } | |
776 | ||
777 | ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2); | |
778 | if (MIA2_2 != otherCharAction(MIA2) || MIA2Context2 != &BOM) | |
779 | log_err("To UCallBack failed\n"); | |
780 | else | |
781 | log_verbose("To UCallBack ok\n"); | |
782 | ||
783 | log_verbose("\n---Testing setTo UCallBack Roundtrip...\n"); | |
784 | ucnv_setToUCallBack(myConverter,MIA2, MIA2Context, &oldToUAction, &oldToUContext, &err); | |
785 | if (U_FAILURE(err) || oldToUAction != otherCharAction(MIA2) || oldToUContext != &BOM) | |
786 | { log_err("FAILURE! %s\n", myErrorName(err)); } | |
787 | ||
788 | ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2); | |
789 | if (MIA2_2 != MIA2 || MIA2Context2 != MIA2Context) | |
790 | log_err("To UCallBack failed\n"); | |
791 | else | |
792 | log_verbose("To UCallBack ok\n"); | |
793 | ||
794 | /*testing ucnv_setToUCallBack with error conditions*/ | |
795 | err=U_ILLEGAL_ARGUMENT_ERROR; | |
796 | log_verbose("\n---Testing setToUCallBack. with err != U_ZERO_ERROR..\n"); | |
797 | ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), NULL, &oldToUAction, &oldToUContext, &err); | |
798 | ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2); | |
799 | if (MIA2_2 == otherCharAction(MIA2) || MIA2Context2 == &BOM){ | |
800 | log_err("To setToUCallBack with err != U_ZERO_ERROR is supposed to fail\n"); | |
801 | } | |
802 | err=U_ZERO_ERROR; | |
803 | ||
804 | ||
805 | /*getcodepageid testing ucnv_getCCSID() */ | |
806 | log_verbose("\n----Testing getCCSID....\n"); | |
807 | cp = ucnv_getCCSID(myConverter,&err); | |
808 | if (U_FAILURE(err)) | |
809 | { | |
810 | log_err("FAILURE!..... %s\n", myErrorName(err)); | |
811 | } | |
812 | if (cp != CodePageNumberToTest[codepage_index]) | |
813 | log_err("Codepage number test failed\n"); | |
814 | else | |
815 | log_verbose("Codepage number test OK\n"); | |
816 | ||
817 | /*testing ucnv_getCCSID() with err != U_ZERO_ERROR*/ | |
818 | err=U_ILLEGAL_ARGUMENT_ERROR; | |
819 | if( ucnv_getCCSID(myConverter,&err) != -1){ | |
820 | log_err("ucnv_getCCSID() with err != U_ZERO_ERROR is supposed to fail\n"); | |
821 | } | |
822 | err=U_ZERO_ERROR; | |
823 | ||
824 | /*getCodepagePlatform testing ucnv_getPlatform()*/ | |
825 | log_verbose("\n---Testing getCodepagePlatform ..\n"); | |
826 | if (CodePagesPlatform[codepage_index]!=ucnv_getPlatform(myConverter, &err)) | |
827 | log_err("Platform codepage test failed\n"); | |
828 | else | |
829 | log_verbose("Platform codepage test ok\n"); | |
830 | ||
831 | if (U_FAILURE(err)) | |
832 | { | |
833 | log_err("FAILURE! %s\n", myErrorName(err)); | |
834 | } | |
835 | /*testing ucnv_getPlatform() with err != U_ZERO_ERROR*/ | |
836 | err= U_ILLEGAL_ARGUMENT_ERROR; | |
837 | if(ucnv_getPlatform(myConverter, &err) != UCNV_UNKNOWN){ | |
838 | log_err("ucnv)getPlatform with err != U_ZERO_ERROR is supposed to fail\n"); | |
839 | } | |
840 | err=U_ZERO_ERROR; | |
841 | ||
842 | ||
843 | /*Reads the BOM*/ | |
57a6839d A |
844 | { |
845 | // Note: gcc produces a compile warning if the return value from fread() is ignored. | |
846 | size_t numRead = fread(&BOM, sizeof(UChar), 1, ucs_file_in); | |
847 | (void)numRead; | |
848 | } | |
b75a7d8f A |
849 | if (BOM!=0xFEFF && BOM!=0xFFFE) |
850 | { | |
851 | log_err("File Missing BOM...Bailing!\n"); | |
4388f060 A |
852 | fclose(ucs_file_in); |
853 | break; | |
b75a7d8f A |
854 | } |
855 | ||
856 | ||
857 | /*Reads in the file*/ | |
3d1f044b | 858 | while(!feof(ucs_file_in)&&(i+=(int32_t)fread(ucs_file_buffer+i, sizeof(UChar), 1, ucs_file_in))) |
b75a7d8f A |
859 | { |
860 | myUChar = ucs_file_buffer[i-1]; | |
861 | ||
862 | ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN*/ | |
863 | } | |
864 | ||
865 | myUChar = ucs_file_buffer[i-1]; | |
866 | ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN Corner Case*/ | |
867 | ||
868 | ||
869 | /*testing ucnv_fromUChars() and ucnv_toUChars() */ | |
870 | /*uchar1---fromUChar--->output_cp_buffer --toUChar--->uchar2*/ | |
871 | ||
872 | uchar1=(UChar*)malloc(sizeof(UChar) * (i+1)); | |
873 | u_uastrcpy(uchar1,""); | |
874 | u_strncpy(uchar1,ucs_file_buffer,i); | |
875 | uchar1[i] = 0; | |
876 | ||
877 | uchar3=(UChar*)malloc(sizeof(UChar)*(i+1)); | |
878 | u_uastrcpy(uchar3,""); | |
879 | u_strncpy(uchar3,ucs_file_buffer,i); | |
880 | uchar3[i] = 0; | |
881 | ||
882 | /*Calls the Conversion Routine */ | |
883 | testLong1 = MAX_FILE_LEN; | |
884 | log_verbose("\n---Testing ucnv_fromUChars()\n"); | |
885 | targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err); | |
886 | if (U_FAILURE(err)) | |
887 | { | |
888 | log_err("\nFAILURE...%s\n", myErrorName(err)); | |
889 | } | |
890 | else | |
891 | log_verbose(" ucnv_fromUChars() o.k.\n"); | |
892 | ||
893 | /*test the conversion routine */ | |
894 | log_verbose("\n---Testing ucnv_toUChars()\n"); | |
895 | /*call it first time for trapping the targetcapacity and size needed to allocate memory for the buffer uchar2 */ | |
896 | targetcapacity2=0; | |
897 | targetsize = ucnv_toUChars(myConverter, | |
898 | NULL, | |
899 | targetcapacity2, | |
900 | output_cp_buffer, | |
3d1f044b | 901 | (int32_t)strlen(output_cp_buffer), |
b75a7d8f A |
902 | &err); |
903 | /*if there is an buffer overflow then trap the values and pass them and make the actual call*/ | |
904 | ||
905 | if(err==U_BUFFER_OVERFLOW_ERROR) | |
906 | { | |
907 | err=U_ZERO_ERROR; | |
908 | uchar2=(UChar*)malloc((targetsize+1) * sizeof(UChar)); | |
909 | targetsize = ucnv_toUChars(myConverter, | |
910 | uchar2, | |
911 | targetsize+1, | |
912 | output_cp_buffer, | |
3d1f044b | 913 | (int32_t)strlen(output_cp_buffer), |
b75a7d8f A |
914 | &err); |
915 | ||
916 | if(U_FAILURE(err)) | |
917 | log_err("ucnv_toUChars() FAILED %s\n", myErrorName(err)); | |
918 | else | |
919 | log_verbose(" ucnv_toUChars() o.k.\n"); | |
920 | ||
921 | if(u_strcmp(uchar1,uchar2)!=0) | |
922 | log_err("equality test failed with conversion routine\n"); | |
923 | } | |
924 | else | |
925 | { | |
926 | log_err("ERR: calling toUChars: Didn't get U_BUFFER_OVERFLOW .. expected it.\n"); | |
927 | } | |
928 | /*Testing ucnv_fromUChars and ucnv_toUChars with error conditions*/ | |
929 | err=U_ILLEGAL_ARGUMENT_ERROR; | |
930 | log_verbose("\n---Testing ucnv_fromUChars() with err != U_ZERO_ERROR\n"); | |
931 | targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err); | |
932 | if (targetcapacity !=0) { | |
933 | log_err("\nFAILURE: ucnv_fromUChars with err != U_ZERO_ERROR is expected to fail and return 0\n"); | |
934 | } | |
935 | err=U_ZERO_ERROR; | |
936 | log_verbose("\n---Testing ucnv_fromUChars() with converter=NULL\n"); | |
937 | targetcapacity = ucnv_fromUChars(NULL, output_cp_buffer, testLong1, uchar1, -1, &err); | |
938 | if (targetcapacity !=0 || err != U_ILLEGAL_ARGUMENT_ERROR) { | |
939 | log_err("\nFAILURE: ucnv_fromUChars with converter=NULL is expected to fail\n"); | |
940 | } | |
941 | err=U_ZERO_ERROR; | |
942 | log_verbose("\n---Testing ucnv_fromUChars() with sourceLength = 0\n"); | |
943 | targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, 0, &err); | |
944 | if (targetcapacity !=0) { | |
945 | log_err("\nFAILURE: ucnv_fromUChars with sourceLength 0 is expected to return 0\n"); | |
946 | } | |
947 | log_verbose("\n---Testing ucnv_fromUChars() with targetLength = 0\n"); | |
948 | targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, 0, uchar1, -1, &err); | |
949 | if (err != U_BUFFER_OVERFLOW_ERROR) { | |
950 | log_err("\nFAILURE: ucnv_fromUChars with targetLength 0 is expected to fail and throw U_BUFFER_OVERFLOW_ERROR\n"); | |
951 | } | |
952 | /*toUChars with error conditions*/ | |
3d1f044b | 953 | targetsize = ucnv_toUChars(myConverter, uchar2, targetsize, output_cp_buffer, (int32_t)strlen(output_cp_buffer), &err); |
b75a7d8f A |
954 | if(targetsize != 0){ |
955 | log_err("\nFAILURE: ucnv_toUChars with err != U_ZERO_ERROR is expected to fail and return 0\n"); | |
956 | } | |
957 | err=U_ZERO_ERROR; | |
3d1f044b | 958 | targetsize = ucnv_toUChars(myConverter, uchar2, -1, output_cp_buffer, (int32_t)strlen(output_cp_buffer), &err); |
b75a7d8f A |
959 | if(targetsize != 0 || err != U_ILLEGAL_ARGUMENT_ERROR){ |
960 | log_err("\nFAILURE: ucnv_toUChars with targetsize < 0 is expected to throw U_ILLEGAL_ARGUMENT_ERROR and return 0\n"); | |
961 | } | |
962 | err=U_ZERO_ERROR; | |
963 | targetsize = ucnv_toUChars(myConverter, uchar2, 0, output_cp_buffer, 0, &err); | |
964 | if (targetsize !=0) { | |
965 | log_err("\nFAILURE: ucnv_toUChars with sourceLength 0 is expected to return 0\n"); | |
966 | } | |
967 | targetcapacity2=0; | |
3d1f044b | 968 | targetsize = ucnv_toUChars(myConverter, NULL, targetcapacity2, output_cp_buffer, (int32_t)strlen(output_cp_buffer), &err); |
b75a7d8f A |
969 | if (err != U_STRING_NOT_TERMINATED_WARNING) { |
970 | log_err("\nFAILURE: ucnv_toUChars(targetLength)->%s instead of U_STRING_NOT_TERMINATED_WARNING\n", | |
971 | u_errorName(err)); | |
972 | } | |
973 | err=U_ZERO_ERROR; | |
974 | /*-----*/ | |
975 | ||
976 | ||
977 | /*testing for ucnv_fromUnicode() and ucnv_toUnicode() */ | |
978 | /*Clean up re-usable vars*/ | |
b75a7d8f A |
979 | log_verbose("Testing ucnv_fromUnicode().....\n"); |
980 | tmp_ucs_buf=ucs_file_buffer_use; | |
981 | ucnv_fromUnicode(myConverter, &mytarget_1, | |
982 | mytarget + MAX_FILE_LEN, | |
983 | &tmp_ucs_buf, | |
984 | ucs_file_buffer_use+i, | |
985 | NULL, | |
986 | TRUE, | |
987 | &err); | |
988 | consumedUni = (UChar*)tmp_consumedUni; | |
57a6839d | 989 | (void)consumedUni; /* Suppress set but not used warning. */ |
b75a7d8f A |
990 | |
991 | if (U_FAILURE(err)) | |
992 | { | |
993 | log_err("FAILURE! %s\n", myErrorName(err)); | |
994 | } | |
995 | else | |
996 | log_verbose("ucnv_fromUnicode() o.k.\n"); | |
997 | ||
998 | /*Uni1 ----ToUnicode----> Cp2 ----FromUnicode---->Uni3 */ | |
999 | log_verbose("Testing ucnv_toUnicode().....\n"); | |
1000 | tmp_mytarget_use=mytarget_use; | |
1001 | tmp_consumed = consumed; | |
1002 | ucnv_toUnicode(myConverter, &my_ucs_file_buffer_1, | |
1003 | my_ucs_file_buffer + MAX_FILE_LEN, | |
1004 | &tmp_mytarget_use, | |
1005 | mytarget_use + (mytarget_1 - mytarget), | |
1006 | NULL, | |
1007 | FALSE, | |
1008 | &err); | |
1009 | consumed = (char*)tmp_consumed; | |
1010 | if (U_FAILURE(err)) | |
1011 | { | |
1012 | log_err("FAILURE! %s\n", myErrorName(err)); | |
1013 | } | |
1014 | else | |
1015 | log_verbose("ucnv_toUnicode() o.k.\n"); | |
1016 | ||
1017 | ||
1018 | log_verbose("\n---Testing RoundTrip ...\n"); | |
1019 | ||
1020 | ||
1021 | u_strncpy(uchar3, my_ucs_file_buffer,i); | |
1022 | uchar3[i] = 0; | |
1023 | ||
1024 | if(u_strcmp(uchar1,uchar3)==0) | |
1025 | log_verbose("Equality test o.k.\n"); | |
1026 | else | |
1027 | log_err("Equality test failed\n"); | |
1028 | ||
1029 | /*sanity compare */ | |
1030 | if(uchar2 == NULL) | |
1031 | { | |
1032 | log_err("uchar2 was NULL (ccapitst.c line %d), couldn't do sanity check\n", __LINE__); | |
1033 | } | |
1034 | else | |
1035 | { | |
1036 | if(u_strcmp(uchar2, uchar3)==0) | |
1037 | log_verbose("Equality test o.k.\n"); | |
1038 | else | |
1039 | log_err("Equality test failed\n"); | |
1040 | } | |
1041 | ||
1042 | fclose(ucs_file_in); | |
1043 | ucnv_close(myConverter); | |
b75a7d8f A |
1044 | if (uchar1 != 0) free(uchar1); |
1045 | if (uchar2 != 0) free(uchar2); | |
1046 | if (uchar3 != 0) free(uchar3); | |
1047 | } | |
1048 | ||
1049 | free((void*)mytarget); | |
1050 | free((void*)output_cp_buffer); | |
1051 | free((void*)ucs_file_buffer); | |
1052 | free((void*)my_ucs_file_buffer); | |
73c04bcf | 1053 | #endif |
b75a7d8f A |
1054 | } |
1055 | ||
51004dcb | 1056 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
1057 | static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA) |
1058 | { | |
1059 | return (MIA==(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP)?(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_SUBSTITUTE:(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP; | |
1060 | } | |
1061 | ||
b75a7d8f A |
1062 | static UConverterToUCallback otherCharAction(UConverterToUCallback MIA) |
1063 | { | |
1064 | return (MIA==(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP)?(UConverterToUCallback)UCNV_TO_U_CALLBACK_SUBSTITUTE:(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP; | |
1065 | } | |
51004dcb | 1066 | #endif |
b75a7d8f | 1067 | |
374ca955 | 1068 | static void TestFlushCache(void) { |
73c04bcf | 1069 | #if !UCONFIG_NO_LEGACY_CONVERSION |
374ca955 A |
1070 | UErrorCode err = U_ZERO_ERROR; |
1071 | UConverter* someConverters[5]; | |
1072 | int flushCount = 0; | |
1073 | ||
1074 | /* flush the converter cache to get a consistent state before the flushing is tested */ | |
1075 | ucnv_flushCache(); | |
1076 | ||
1077 | /*Testing ucnv_open()*/ | |
1078 | /* Note: These converters have been chosen because they do NOT | |
1079 | encode the Latin characters (U+0041, ...), and therefore are | |
1080 | highly unlikely to be chosen as system default codepages */ | |
1081 | ||
1082 | someConverters[0] = ucnv_open("ibm-1047", &err); | |
1083 | if (U_FAILURE(err)) { | |
1084 | log_data_err("FAILURE! %s\n", myErrorName(err)); | |
1085 | } | |
1086 | ||
1087 | someConverters[1] = ucnv_open("ibm-1047", &err); | |
1088 | if (U_FAILURE(err)) { | |
1089 | log_data_err("FAILURE! %s\n", myErrorName(err)); | |
1090 | } | |
1091 | ||
1092 | someConverters[2] = ucnv_open("ibm-1047", &err); | |
1093 | if (U_FAILURE(err)) { | |
1094 | log_data_err("FAILURE! %s\n", myErrorName(err)); | |
1095 | } | |
1096 | ||
1097 | someConverters[3] = ucnv_open("gb18030", &err); | |
1098 | if (U_FAILURE(err)) { | |
1099 | log_data_err("FAILURE! %s\n", myErrorName(err)); | |
1100 | } | |
1101 | ||
1102 | someConverters[4] = ucnv_open("ibm-954", &err); | |
1103 | if (U_FAILURE(err)) { | |
1104 | log_data_err("FAILURE! %s\n", myErrorName(err)); | |
1105 | } | |
1106 | ||
1107 | ||
1108 | /* Testing ucnv_flushCache() */ | |
1109 | log_verbose("\n---Testing ucnv_flushCache...\n"); | |
1110 | if ((flushCount=ucnv_flushCache())==0) | |
1111 | log_verbose("Flush cache ok\n"); | |
1112 | else | |
1113 | log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount); | |
1114 | ||
1115 | /*testing ucnv_close() and ucnv_flushCache() */ | |
1116 | ucnv_close(someConverters[0]); | |
1117 | ucnv_close(someConverters[1]); | |
1118 | ||
1119 | if ((flushCount=ucnv_flushCache())==0) | |
1120 | log_verbose("Flush cache ok\n"); | |
1121 | else | |
1122 | log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount); | |
1123 | ||
1124 | ucnv_close(someConverters[2]); | |
1125 | ucnv_close(someConverters[3]); | |
1126 | ||
1127 | if ((flushCount=ucnv_flushCache())==2) | |
1128 | log_verbose("Flush cache ok\n"); /*because first, second and third are same */ | |
1129 | else | |
1130 | log_data_err("Flush Cache failed line %d, got %d expected 2 or there is an error in ucnv_close()\n", | |
1131 | __LINE__, | |
1132 | flushCount); | |
1133 | ||
1134 | ucnv_close(someConverters[4]); | |
1135 | if ( (flushCount=ucnv_flushCache())==1) | |
1136 | log_verbose("Flush cache ok\n"); | |
1137 | else | |
1138 | log_data_err("Flush Cache failed line %d, expected 1 got %d \n", __LINE__, flushCount); | |
73c04bcf | 1139 | #endif |
374ca955 A |
1140 | } |
1141 | ||
b75a7d8f A |
1142 | /** |
1143 | * Test the converter alias API, specifically the fuzzy matching of | |
1144 | * alias names and the alias table integrity. Make sure each | |
1145 | * converter has at least one alias (itself), and that its listed | |
1146 | * aliases map back to itself. Check some hard-coded UTF-8 and | |
1147 | * ISO_2022 aliases to make sure they work. | |
1148 | */ | |
1149 | static void TestAlias() { | |
1150 | int32_t i, ncnv; | |
1151 | UErrorCode status = U_ZERO_ERROR; | |
1152 | ||
1153 | /* Predetermined aliases that we expect to map back to ISO_2022 | |
1154 | * and UTF-8. UPDATE THIS DATA AS NECESSARY. */ | |
1155 | const char* ISO_2022_NAMES[] = | |
374ca955 A |
1156 | {"ISO_2022,locale=ja,version=2", "ISO-2022-JP-2", "csISO2022JP2", |
1157 | "Iso-2022jP2", "isO-2022_Jp_2", "iSo--2022,locale=ja,version=2"}; | |
b331163b | 1158 | int32_t ISO_2022_NAMES_LENGTH = UPRV_LENGTHOF(ISO_2022_NAMES); |
b75a7d8f A |
1159 | const char *UTF8_NAMES[] = |
1160 | { "UTF-8", "utf-8", "utf8", "ibm-1208", | |
1161 | "utf_8", "ibm1208", "cp1208" }; | |
b331163b | 1162 | int32_t UTF8_NAMES_LENGTH = UPRV_LENGTHOF(UTF8_NAMES); |
b75a7d8f A |
1163 | |
1164 | struct { | |
1165 | const char *name; | |
1166 | const char *alias; | |
1167 | } CONVERTERS_NAMES[] = { | |
1168 | { "UTF-32BE", "UTF32_BigEndian" }, | |
1169 | { "UTF-32LE", "UTF32_LittleEndian" }, | |
1170 | { "UTF-32", "ISO-10646-UCS-4" }, | |
1171 | { "UTF32_PlatformEndian", "UTF32_PlatformEndian" }, | |
1172 | { "UTF-32", "ucs-4" } | |
1173 | }; | |
2ca993e8 | 1174 | int32_t CONVERTERS_NAMES_LENGTH = UPRV_LENGTHOF(CONVERTERS_NAMES); |
b75a7d8f A |
1175 | |
1176 | /* When there are bugs in gencnval or in ucnv_io, converters can | |
1177 | appear to have no aliases. */ | |
1178 | ncnv = ucnv_countAvailable(); | |
1179 | log_verbose("%d converters\n", ncnv); | |
1180 | for (i=0; i<ncnv; ++i) { | |
1181 | const char *name = ucnv_getAvailableName(i); | |
1182 | const char *alias0; | |
1183 | uint16_t na = ucnv_countAliases(name, &status); | |
1184 | uint16_t j; | |
1185 | UConverter *cnv; | |
1186 | ||
1187 | if (na == 0) { | |
1188 | log_err("FAIL: Converter \"%s\" (i=%d)" | |
1189 | " has no aliases; expect at least one\n", | |
1190 | name, i); | |
1191 | continue; | |
1192 | } | |
1193 | cnv = ucnv_open(name, &status); | |
1194 | if (U_FAILURE(status)) { | |
1195 | log_data_err("FAIL: Converter \"%s\" (i=%d)" | |
1196 | " can't be opened.\n", | |
1197 | name, i); | |
1198 | } | |
1199 | else { | |
1200 | if (strcmp(ucnv_getName(cnv, &status), name) != 0 | |
1201 | && (strstr(name, "PlatformEndian") == 0 && strstr(name, "OppositeEndian") == 0)) { | |
1202 | log_err("FAIL: Converter \"%s\" returned \"%s\" for getName. " | |
b331163b | 1203 | "They should be the same\n", |
b75a7d8f A |
1204 | name, ucnv_getName(cnv, &status)); |
1205 | } | |
1206 | } | |
1207 | ucnv_close(cnv); | |
1208 | ||
1209 | status = U_ZERO_ERROR; | |
1210 | alias0 = ucnv_getAlias(name, 0, &status); | |
1211 | for (j=1; j<na; ++j) { | |
1212 | const char *alias; | |
1213 | /* Make sure each alias maps back to the the same list of | |
1214 | aliases. Assume that if alias 0 is the same, the whole | |
1215 | list is the same (this should always be true). */ | |
1216 | const char *mapBack; | |
1217 | ||
1218 | status = U_ZERO_ERROR; | |
1219 | alias = ucnv_getAlias(name, j, &status); | |
1220 | if (status == U_AMBIGUOUS_ALIAS_WARNING) { | |
1221 | log_err("FAIL: Converter \"%s\"is ambiguous\n", name); | |
1222 | } | |
1223 | ||
1224 | if (alias == NULL) { | |
1225 | log_err("FAIL: Converter \"%s\" -> " | |
1226 | "alias[%d]=NULL\n", | |
1227 | name, j); | |
1228 | continue; | |
1229 | } | |
1230 | ||
1231 | mapBack = ucnv_getAlias(alias, 0, &status); | |
1232 | ||
1233 | if (mapBack == NULL) { | |
1234 | log_err("FAIL: Converter \"%s\" -> " | |
1235 | "alias[%d]=\"%s\" -> " | |
1236 | "alias[0]=NULL, exp. \"%s\"\n", | |
1237 | name, j, alias, alias0); | |
1238 | continue; | |
1239 | } | |
1240 | ||
1241 | if (0 != strcmp(alias0, mapBack)) { | |
1242 | int32_t idx; | |
1243 | UBool foundAlias = FALSE; | |
1244 | if (status == U_AMBIGUOUS_ALIAS_WARNING) { | |
1245 | /* Make sure that we only get this mismapping when there is | |
1246 | an ambiguous alias, and the other converter has this alias too. */ | |
1247 | for (idx = 0; idx < ucnv_countAliases(mapBack, &status); idx++) { | |
1248 | if (strcmp(ucnv_getAlias(mapBack, (uint16_t)idx, &status), alias) == 0) { | |
1249 | foundAlias = TRUE; | |
1250 | break; | |
1251 | } | |
1252 | } | |
1253 | } | |
1254 | /* else not ambiguous, and this is a real problem. foundAlias = FALSE */ | |
1255 | ||
1256 | if (!foundAlias) { | |
1257 | log_err("FAIL: Converter \"%s\" -> " | |
1258 | "alias[%d]=\"%s\" -> " | |
1259 | "alias[0]=\"%s\", exp. \"%s\"\n", | |
1260 | name, j, alias, mapBack, alias0); | |
1261 | } | |
1262 | } | |
1263 | } | |
1264 | } | |
1265 | ||
1266 | ||
1267 | /* Check a list of predetermined aliases that we expect to map | |
1268 | * back to ISO_2022 and UTF-8. */ | |
1269 | for (i=1; i<ISO_2022_NAMES_LENGTH; ++i) { | |
1270 | const char* mapBack = ucnv_getAlias(ISO_2022_NAMES[i], 0, &status); | |
1271 | if(!mapBack) { | |
1272 | log_data_err("Couldn't get alias for %s. You probably have no data\n", ISO_2022_NAMES[i]); | |
1273 | continue; | |
1274 | } | |
1275 | if (0 != strcmp(mapBack, ISO_2022_NAMES[0])) { | |
374ca955 | 1276 | log_err("FAIL: \"%s\" -> \"%s\", expect \"ISO_2022,locale=ja,version=2\"\n", |
b75a7d8f A |
1277 | ISO_2022_NAMES[i], mapBack); |
1278 | } | |
1279 | } | |
1280 | ||
1281 | ||
1282 | for (i=1; i<UTF8_NAMES_LENGTH; ++i) { | |
1283 | const char* mapBack = ucnv_getAlias(UTF8_NAMES[i], 0, &status); | |
1284 | if(!mapBack) { | |
1285 | log_data_err("Couldn't get alias for %s. You probably have no data\n", UTF8_NAMES[i]); | |
1286 | continue; | |
1287 | } | |
1288 | if (mapBack && 0 != strcmp(mapBack, UTF8_NAMES[0])) { | |
1289 | log_err("FAIL: \"%s\" -> \"%s\", expect UTF-8\n", | |
1290 | UTF8_NAMES[i], mapBack); | |
1291 | } | |
1292 | } | |
1293 | ||
1294 | /* | |
1295 | * Check a list of predetermined aliases that we expect to map | |
1296 | * back to predermined converter names. | |
1297 | */ | |
1298 | ||
1299 | for (i = 0; i < CONVERTERS_NAMES_LENGTH; ++i) { | |
1300 | const char* mapBack = ucnv_getAlias(CONVERTERS_NAMES[i].alias, 0, &status); | |
1301 | if(!mapBack) { | |
73c04bcf | 1302 | log_data_err("Couldn't get alias for %s. You probably have no data\n", CONVERTERS_NAMES[i].name); |
b75a7d8f A |
1303 | continue; |
1304 | } | |
1305 | if (0 != strcmp(mapBack, CONVERTERS_NAMES[i].name)) { | |
1306 | log_err("FAIL: \"%s\" -> \"%s\", expect %s\n", | |
1307 | CONVERTERS_NAMES[i].alias, mapBack, CONVERTERS_NAMES[i].name); | |
1308 | } | |
1309 | } | |
1310 | ||
1311 | } | |
1312 | ||
1313 | static void TestDuplicateAlias(void) { | |
1314 | const char *alias; | |
1315 | UErrorCode status = U_ZERO_ERROR; | |
1316 | ||
1317 | status = U_ZERO_ERROR; | |
1318 | alias = ucnv_getStandardName("Shift_JIS", "IBM", &status); | |
1319 | if (alias == NULL || strcmp(alias, "ibm-943") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) { | |
729e4ab9 | 1320 | log_data_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias); |
b75a7d8f A |
1321 | } |
1322 | status = U_ZERO_ERROR; | |
1323 | alias = ucnv_getStandardName("ibm-943", "IANA", &status); | |
1324 | if (alias == NULL || strcmp(alias, "Shift_JIS") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) { | |
1325 | log_data_err("FAIL: Didn't get Shift_JIS for ibm-943 {IANA}. Got %s\n", alias); | |
1326 | } | |
1327 | status = U_ZERO_ERROR; | |
1328 | alias = ucnv_getStandardName("ibm-943_P130-2000", "IANA", &status); | |
1329 | if (alias != NULL || status == U_AMBIGUOUS_ALIAS_WARNING) { | |
1330 | log_data_err("FAIL: Didn't get NULL for ibm-943 {IANA}. Got %s\n", alias); | |
1331 | } | |
1332 | } | |
1333 | ||
1334 | ||
1335 | /* Test safe clone callback */ | |
1336 | ||
1337 | static uint32_t TSCC_nextSerial() | |
1338 | { | |
1339 | static uint32_t n = 1; | |
1340 | ||
1341 | return (n++); | |
1342 | } | |
1343 | ||
1344 | typedef struct | |
1345 | { | |
1346 | uint32_t magic; /* 0xC0FFEE to identify that the object is OK */ | |
1347 | uint32_t serial; /* minted from nextSerial, above */ | |
1348 | UBool wasClosed; /* close happened on the object */ | |
1349 | } TSCCContext; | |
1350 | ||
1351 | static TSCCContext *TSCC_clone(TSCCContext *ctx) | |
1352 | { | |
1353 | TSCCContext *newCtx = (TSCCContext *)malloc(sizeof(TSCCContext)); | |
1354 | ||
1355 | newCtx->serial = TSCC_nextSerial(); | |
1356 | newCtx->wasClosed = 0; | |
1357 | newCtx->magic = 0xC0FFEE; | |
1358 | ||
1359 | log_verbose("TSCC_clone: %p:%d -> new context %p:%d\n", ctx, ctx->serial, newCtx, newCtx->serial); | |
1360 | ||
1361 | return newCtx; | |
1362 | } | |
1363 | ||
51004dcb | 1364 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
1365 | static void TSCC_fromU(const void *context, |
1366 | UConverterFromUnicodeArgs *fromUArgs, | |
1367 | const UChar* codeUnits, | |
1368 | int32_t length, | |
1369 | UChar32 codePoint, | |
1370 | UConverterCallbackReason reason, | |
1371 | UErrorCode * err) | |
1372 | { | |
340931cb A |
1373 | // suppress compiler warnings about unused variables |
1374 | (void)codeUnits; | |
1375 | (void)length; | |
1376 | (void)codePoint; | |
1377 | ||
b75a7d8f A |
1378 | TSCCContext *ctx = (TSCCContext*)context; |
1379 | UConverterFromUCallback junkFrom; | |
1380 | ||
1381 | log_verbose("TSCC_fromU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, fromUArgs->converter); | |
1382 | ||
1383 | if(ctx->magic != 0xC0FFEE) { | |
1384 | log_err("TSCC_fromU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic); | |
1385 | return; | |
1386 | } | |
1387 | ||
1388 | if(reason == UCNV_CLONE) { | |
1389 | UErrorCode subErr = U_ZERO_ERROR; | |
1390 | TSCCContext *newCtx; | |
1391 | TSCCContext *junkCtx; | |
73c04bcf | 1392 | TSCCContext **pjunkCtx = &junkCtx; |
b75a7d8f A |
1393 | |
1394 | /* "recreate" it */ | |
1395 | log_verbose("TSCC_fromU: cloning..\n"); | |
1396 | newCtx = TSCC_clone(ctx); | |
1397 | ||
1398 | if(newCtx == NULL) { | |
1399 | log_err("TSCC_fromU: internal clone failed on %p\n", ctx); | |
1400 | } | |
1401 | ||
1402 | /* now, SET it */ | |
73c04bcf | 1403 | ucnv_getFromUCallBack(fromUArgs->converter, &junkFrom, (const void**)pjunkCtx); |
b75a7d8f A |
1404 | ucnv_setFromUCallBack(fromUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr); |
1405 | ||
1406 | if(U_FAILURE(subErr)) { | |
1407 | *err = subErr; | |
1408 | } | |
1409 | } | |
1410 | ||
1411 | if(reason == UCNV_CLOSE) { | |
1412 | log_verbose("TSCC_fromU: Context %p:%d closing\n", ctx, ctx->serial); | |
1413 | ctx->wasClosed = TRUE; | |
1414 | } | |
1415 | } | |
1416 | ||
b75a7d8f A |
1417 | static void TSCC_toU(const void *context, |
1418 | UConverterToUnicodeArgs *toUArgs, | |
1419 | const char* codeUnits, | |
1420 | int32_t length, | |
1421 | UConverterCallbackReason reason, | |
1422 | UErrorCode * err) | |
1423 | { | |
340931cb A |
1424 | // suppress compiler warnings about unused variables |
1425 | (void)codeUnits; | |
1426 | (void)length; | |
1427 | ||
b75a7d8f A |
1428 | TSCCContext *ctx = (TSCCContext*)context; |
1429 | UConverterToUCallback junkFrom; | |
1430 | ||
1431 | log_verbose("TSCC_toU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, toUArgs->converter); | |
1432 | ||
1433 | if(ctx->magic != 0xC0FFEE) { | |
1434 | log_err("TSCC_toU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic); | |
1435 | return; | |
1436 | } | |
1437 | ||
1438 | if(reason == UCNV_CLONE) { | |
1439 | UErrorCode subErr = U_ZERO_ERROR; | |
1440 | TSCCContext *newCtx; | |
1441 | TSCCContext *junkCtx; | |
73c04bcf | 1442 | TSCCContext **pjunkCtx = &junkCtx; |
b75a7d8f A |
1443 | |
1444 | /* "recreate" it */ | |
1445 | log_verbose("TSCC_toU: cloning..\n"); | |
1446 | newCtx = TSCC_clone(ctx); | |
1447 | ||
1448 | if(newCtx == NULL) { | |
1449 | log_err("TSCC_toU: internal clone failed on %p\n", ctx); | |
1450 | } | |
1451 | ||
1452 | /* now, SET it */ | |
73c04bcf | 1453 | ucnv_getToUCallBack(toUArgs->converter, &junkFrom, (const void**)pjunkCtx); |
b75a7d8f A |
1454 | ucnv_setToUCallBack(toUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr); |
1455 | ||
1456 | if(U_FAILURE(subErr)) { | |
1457 | *err = subErr; | |
1458 | } | |
1459 | } | |
1460 | ||
1461 | if(reason == UCNV_CLOSE) { | |
1462 | log_verbose("TSCC_toU: Context %p:%d closing\n", ctx, ctx->serial); | |
1463 | ctx->wasClosed = TRUE; | |
1464 | } | |
1465 | } | |
1466 | ||
1467 | static void TSCC_init(TSCCContext *q) | |
1468 | { | |
1469 | q->magic = 0xC0FFEE; | |
1470 | q->serial = TSCC_nextSerial(); | |
1471 | q->wasClosed = 0; | |
1472 | } | |
1473 | ||
1474 | static void TSCC_print_log(TSCCContext *q, const char *name) | |
1475 | { | |
1476 | if(q==NULL) { | |
1477 | log_verbose("TSCContext: %s is NULL!!\n", name); | |
1478 | } else { | |
1479 | if(q->magic != 0xC0FFEE) { | |
1480 | log_err("TSCCContext: %p:%d's magic is %x, supposed to be 0xC0FFEE\n", | |
1481 | q,q->serial, q->magic); | |
1482 | } | |
1483 | log_verbose("TSCCContext %p:%d=%s - magic %x, %s\n", | |
1484 | q, q->serial, name, q->magic, q->wasClosed?"CLOSED":"open"); | |
1485 | } | |
1486 | } | |
1487 | ||
1488 | static void TestConvertSafeCloneCallback() | |
1489 | { | |
1490 | UErrorCode err = U_ZERO_ERROR; | |
1491 | TSCCContext from1, to1; | |
1492 | TSCCContext *from2, *from3, *to2, *to3; | |
73c04bcf | 1493 | TSCCContext **pfrom2 = &from2, **pfrom3 = &from3, **pto2 = &to2, **pto3 = &to3; |
b75a7d8f A |
1494 | char hunk[8192]; |
1495 | int32_t hunkSize = 8192; | |
1496 | UConverterFromUCallback junkFrom; | |
1497 | UConverterToUCallback junkTo; | |
1498 | UConverter *conv1, *conv2 = NULL; | |
1499 | ||
1500 | conv1 = ucnv_open("iso-8859-3", &err); | |
1501 | ||
1502 | if(U_FAILURE(err)) { | |
729e4ab9 | 1503 | log_data_err("Err opening iso-8859-3, %s\n", u_errorName(err)); |
b75a7d8f A |
1504 | return; |
1505 | } | |
1506 | ||
1507 | log_verbose("Opened conv1=%p\n", conv1); | |
1508 | ||
1509 | TSCC_init(&from1); | |
1510 | TSCC_init(&to1); | |
1511 | ||
1512 | TSCC_print_log(&from1, "from1"); | |
1513 | TSCC_print_log(&to1, "to1"); | |
1514 | ||
1515 | ucnv_setFromUCallBack(conv1, TSCC_fromU, &from1, NULL, NULL, &err); | |
1516 | log_verbose("Set from1 on conv1\n"); | |
1517 | TSCC_print_log(&from1, "from1"); | |
1518 | ||
1519 | ucnv_setToUCallBack(conv1, TSCC_toU, &to1, NULL, NULL, &err); | |
1520 | log_verbose("Set to1 on conv1\n"); | |
1521 | TSCC_print_log(&to1, "to1"); | |
1522 | ||
1523 | conv2 = ucnv_safeClone(conv1, hunk, &hunkSize, &err); | |
1524 | if(U_FAILURE(err)) { | |
1525 | log_err("safeClone failed: %s\n", u_errorName(err)); | |
1526 | return; | |
1527 | } | |
1528 | log_verbose("Cloned to conv2=%p.\n", conv2); | |
1529 | ||
1530 | /********** from *********************/ | |
73c04bcf A |
1531 | ucnv_getFromUCallBack(conv2, &junkFrom, (const void**)pfrom2); |
1532 | ucnv_getFromUCallBack(conv1, &junkFrom, (const void**)pfrom3); | |
b75a7d8f A |
1533 | |
1534 | TSCC_print_log(from2, "from2"); | |
1535 | TSCC_print_log(from3, "from3(==from1)"); | |
1536 | ||
1537 | if(from2 == NULL) { | |
1538 | log_err("FAIL! from2 is null \n"); | |
1539 | return; | |
1540 | } | |
1541 | ||
1542 | if(from3 == NULL) { | |
1543 | log_err("FAIL! from3 is null \n"); | |
1544 | return; | |
1545 | } | |
1546 | ||
1547 | if(from3 != (&from1) ) { | |
1548 | log_err("FAIL! conv1's FROM context changed!\n"); | |
1549 | } | |
1550 | ||
1551 | if(from2 == (&from1) ) { | |
1552 | log_err("FAIL! conv1's FROM context is the same as conv2's!\n"); | |
1553 | } | |
1554 | ||
1555 | if(from1.wasClosed) { | |
1556 | log_err("FAIL! from1 is closed \n"); | |
1557 | } | |
1558 | ||
1559 | if(from2->wasClosed) { | |
1560 | log_err("FAIL! from2 was closed\n"); | |
1561 | } | |
1562 | ||
1563 | /********** to *********************/ | |
73c04bcf A |
1564 | ucnv_getToUCallBack(conv2, &junkTo, (const void**)pto2); |
1565 | ucnv_getToUCallBack(conv1, &junkTo, (const void**)pto3); | |
b75a7d8f A |
1566 | |
1567 | TSCC_print_log(to2, "to2"); | |
1568 | TSCC_print_log(to3, "to3(==to1)"); | |
1569 | ||
1570 | if(to2 == NULL) { | |
1571 | log_err("FAIL! to2 is null \n"); | |
1572 | return; | |
1573 | } | |
1574 | ||
1575 | if(to3 == NULL) { | |
1576 | log_err("FAIL! to3 is null \n"); | |
1577 | return; | |
1578 | } | |
1579 | ||
1580 | if(to3 != (&to1) ) { | |
1581 | log_err("FAIL! conv1's TO context changed!\n"); | |
1582 | } | |
1583 | ||
1584 | if(to2 == (&to1) ) { | |
1585 | log_err("FAIL! conv1's TO context is the same as conv2's!\n"); | |
1586 | } | |
1587 | ||
1588 | if(to1.wasClosed) { | |
1589 | log_err("FAIL! to1 is closed \n"); | |
1590 | } | |
1591 | ||
1592 | if(to2->wasClosed) { | |
1593 | log_err("FAIL! to2 was closed\n"); | |
1594 | } | |
1595 | ||
1596 | /*************************************/ | |
1597 | ||
1598 | ucnv_close(conv1); | |
1599 | log_verbose("ucnv_closed (conv1)\n"); | |
1600 | TSCC_print_log(&from1, "from1"); | |
1601 | TSCC_print_log(from2, "from2"); | |
1602 | TSCC_print_log(&to1, "to1"); | |
1603 | TSCC_print_log(to2, "to2"); | |
1604 | ||
1605 | if(from1.wasClosed == FALSE) { | |
1606 | log_err("FAIL! from1 is NOT closed \n"); | |
1607 | } | |
1608 | ||
1609 | if(from2->wasClosed) { | |
1610 | log_err("FAIL! from2 was closed\n"); | |
1611 | } | |
1612 | ||
1613 | if(to1.wasClosed == FALSE) { | |
1614 | log_err("FAIL! to1 is NOT closed \n"); | |
1615 | } | |
1616 | ||
1617 | if(to2->wasClosed) { | |
1618 | log_err("FAIL! to2 was closed\n"); | |
1619 | } | |
1620 | ||
1621 | ucnv_close(conv2); | |
1622 | log_verbose("ucnv_closed (conv2)\n"); | |
1623 | ||
1624 | TSCC_print_log(&from1, "from1"); | |
1625 | TSCC_print_log(from2, "from2"); | |
1626 | ||
1627 | if(from1.wasClosed == FALSE) { | |
1628 | log_err("FAIL! from1 is NOT closed \n"); | |
1629 | } | |
1630 | ||
1631 | if(from2->wasClosed == FALSE) { | |
1632 | log_err("FAIL! from2 was NOT closed\n"); | |
1633 | } | |
1634 | ||
1635 | TSCC_print_log(&to1, "to1"); | |
1636 | TSCC_print_log(to2, "to2"); | |
1637 | ||
1638 | if(to1.wasClosed == FALSE) { | |
1639 | log_err("FAIL! to1 is NOT closed \n"); | |
1640 | } | |
1641 | ||
1642 | if(to2->wasClosed == FALSE) { | |
1643 | log_err("FAIL! to2 was NOT closed\n"); | |
1644 | } | |
1645 | ||
1646 | if(to2 != (&to1)) { | |
1647 | free(to2); /* to1 is stack based */ | |
1648 | } | |
1649 | if(from2 != (&from1)) { | |
1650 | free(from2); /* from1 is stack based */ | |
1651 | } | |
1652 | } | |
73c04bcf | 1653 | #endif |
b75a7d8f | 1654 | |
374ca955 A |
1655 | static UBool |
1656 | containsAnyOtherByte(uint8_t *p, int32_t length, uint8_t b) { | |
1657 | while(length>0) { | |
1658 | if(*p!=b) { | |
1659 | return TRUE; | |
1660 | } | |
1661 | ++p; | |
1662 | --length; | |
1663 | } | |
1664 | return FALSE; | |
1665 | } | |
1666 | ||
b75a7d8f A |
1667 | static void TestConvertSafeClone() |
1668 | { | |
374ca955 A |
1669 | /* one 'regular' & all the 'private stateful' converters */ |
1670 | static const char *const names[] = { | |
73c04bcf | 1671 | #if !UCONFIG_NO_LEGACY_CONVERSION |
374ca955 A |
1672 | "ibm-1047", |
1673 | "ISO_2022,locale=zh,version=1", | |
73c04bcf | 1674 | #endif |
374ca955 | 1675 | "SCSU", |
73c04bcf | 1676 | #if !UCONFIG_NO_LEGACY_CONVERSION |
374ca955 A |
1677 | "HZ", |
1678 | "lmbcs", | |
1679 | "ISCII,version=0", | |
1680 | "ISO_2022,locale=kr,version=1", | |
1681 | "ISO_2022,locale=jp,version=2", | |
73c04bcf | 1682 | #endif |
374ca955 A |
1683 | "BOCU-1", |
1684 | "UTF-7", | |
73c04bcf | 1685 | #if !UCONFIG_NO_LEGACY_CONVERSION |
374ca955 A |
1686 | "IMAP-mailbox-name", |
1687 | "ibm-1047-s390" | |
73c04bcf A |
1688 | #else |
1689 | "IMAP=mailbox-name" | |
1690 | #endif | |
374ca955 A |
1691 | }; |
1692 | ||
73c04bcf | 1693 | /* store the actual sizes of each converter */ |
b331163b | 1694 | int32_t actualSizes[UPRV_LENGTHOF(names)]; |
73c04bcf | 1695 | |
374ca955 A |
1696 | static const int32_t bufferSizes[] = { |
1697 | U_CNV_SAFECLONE_BUFFERSIZE, | |
1698 | (int32_t)(3*sizeof(UConverter))/2, /* 1.5*sizeof(UConverter) */ | |
1699 | (int32_t)sizeof(UConverter)/2 /* 0.5*sizeof(UConverter) */ | |
1700 | }; | |
b75a7d8f | 1701 | |
73c04bcf A |
1702 | char charBuffer[21]; /* Leave at an odd number for alignment testing */ |
1703 | uint8_t buffer[3] [U_CNV_SAFECLONE_BUFFERSIZE]; | |
374ca955 A |
1704 | int32_t bufferSize, maxBufferSize; |
1705 | const char *maxName; | |
1706 | UConverter * cnv, *cnv2; | |
1707 | UErrorCode err; | |
b75a7d8f A |
1708 | |
1709 | char *pCharBuffer; | |
1710 | const char *pConstCharBuffer; | |
2ca993e8 | 1711 | const char *charBufferLimit = charBuffer + UPRV_LENGTHOF(charBuffer); |
73c04bcf A |
1712 | UChar uniBuffer[] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */ |
1713 | UChar uniCharBuffer[20]; | |
1714 | char charSourceBuffer[] = { 0x1b, 0x24, 0x42 }; | |
b75a7d8f A |
1715 | const char *pCharSource = charSourceBuffer; |
1716 | const char *pCharSourceLimit = charSourceBuffer + sizeof(charSourceBuffer); | |
1717 | UChar *pUCharTarget = uniCharBuffer; | |
2ca993e8 | 1718 | UChar *pUCharTargetLimit = uniCharBuffer + UPRV_LENGTHOF(uniCharBuffer); |
b75a7d8f | 1719 | const UChar * pUniBuffer; |
2ca993e8 | 1720 | const UChar *uniBufferLimit = uniBuffer + UPRV_LENGTHOF(uniBuffer); |
51004dcb | 1721 | int32_t idx, j; |
b75a7d8f | 1722 | |
b75a7d8f | 1723 | err = U_ZERO_ERROR; |
374ca955 A |
1724 | cnv = ucnv_open(names[0], &err); |
1725 | if(U_SUCCESS(err)) { | |
1726 | /* Check the various error & informational states: */ | |
b75a7d8f | 1727 | |
374ca955 A |
1728 | /* Null status - just returns NULL */ |
1729 | bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; | |
57a6839d | 1730 | if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, NULL)) |
374ca955 A |
1731 | { |
1732 | log_err("FAIL: Cloned converter failed to deal correctly with null status\n"); | |
1733 | } | |
1734 | /* error status - should return 0 & keep error the same */ | |
1735 | err = U_MEMORY_ALLOCATION_ERROR; | |
57a6839d | 1736 | if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR) |
374ca955 A |
1737 | { |
1738 | log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n"); | |
1739 | } | |
1740 | err = U_ZERO_ERROR; | |
b75a7d8f | 1741 | |
57a6839d A |
1742 | /* Null buffer size pointer is ok */ |
1743 | if (NULL == (cnv2 = ucnv_safeClone(cnv, buffer[0], NULL, &err)) || U_FAILURE(err)) | |
374ca955 A |
1744 | { |
1745 | log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n"); | |
1746 | } | |
57a6839d | 1747 | ucnv_close(cnv2); |
374ca955 | 1748 | err = U_ZERO_ERROR; |
b75a7d8f | 1749 | |
374ca955 A |
1750 | /* buffer size pointer is 0 - fill in pbufferSize with a size */ |
1751 | bufferSize = 0; | |
57a6839d | 1752 | if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0) |
374ca955 A |
1753 | { |
1754 | log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n"); | |
1755 | } | |
1756 | /* Verify our define is large enough */ | |
1757 | if (U_CNV_SAFECLONE_BUFFERSIZE < bufferSize) | |
1758 | { | |
1759 | log_err("FAIL: Pre-calculated buffer size is too small\n"); | |
1760 | } | |
1761 | /* Verify we can use this run-time calculated size */ | |
57a6839d | 1762 | if (NULL == (cnv2 = ucnv_safeClone(cnv, buffer[0], &bufferSize, &err)) || U_FAILURE(err)) |
374ca955 A |
1763 | { |
1764 | log_err("FAIL: Converter can't be cloned with run-time size\n"); | |
1765 | } | |
1766 | if (cnv2) { | |
1767 | ucnv_close(cnv2); | |
1768 | } | |
1769 | ||
1770 | /* size one byte too small - should allocate & let us know */ | |
1771 | --bufferSize; | |
57a6839d | 1772 | if (NULL == (cnv2 = ucnv_safeClone(cnv, NULL, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) |
374ca955 A |
1773 | { |
1774 | log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n"); | |
1775 | } | |
1776 | if (cnv2) { | |
1777 | ucnv_close(cnv2); | |
1778 | } | |
1779 | ||
1780 | err = U_ZERO_ERROR; | |
1781 | bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; | |
1782 | ||
1783 | /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */ | |
57a6839d | 1784 | if (NULL == (cnv2 = ucnv_safeClone(cnv, NULL, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) |
374ca955 A |
1785 | { |
1786 | log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n"); | |
1787 | } | |
1788 | if (cnv2) { | |
1789 | ucnv_close(cnv2); | |
1790 | } | |
1791 | ||
1792 | err = U_ZERO_ERROR; | |
b75a7d8f | 1793 | |
374ca955 | 1794 | /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */ |
57a6839d | 1795 | if (NULL != ucnv_safeClone(NULL, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR) |
374ca955 A |
1796 | { |
1797 | log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n"); | |
1798 | } | |
1799 | ||
1800 | ucnv_close(cnv); | |
b75a7d8f A |
1801 | } |
1802 | ||
374ca955 A |
1803 | maxBufferSize = 0; |
1804 | maxName = ""; | |
b75a7d8f A |
1805 | |
1806 | /* Do these cloned converters work at all - shuffle UChars to chars & back again..*/ | |
1807 | ||
b331163b A |
1808 | for(j = 0; j < UPRV_LENGTHOF(bufferSizes); ++j) { |
1809 | for (idx = 0; idx < UPRV_LENGTHOF(names); idx++) | |
374ca955 A |
1810 | { |
1811 | err = U_ZERO_ERROR; | |
51004dcb | 1812 | cnv = ucnv_open(names[idx], &err); |
374ca955 | 1813 | if(U_FAILURE(err)) { |
51004dcb | 1814 | log_data_err("ucnv_open(\"%s\") failed - %s\n", names[idx], u_errorName(err)); |
374ca955 A |
1815 | continue; |
1816 | } | |
b75a7d8f | 1817 | |
374ca955 A |
1818 | if(j == 0) { |
1819 | /* preflight to get maxBufferSize */ | |
51004dcb A |
1820 | actualSizes[idx] = 0; |
1821 | ucnv_safeClone(cnv, NULL, &actualSizes[idx], &err); | |
1822 | if(actualSizes[idx] > maxBufferSize) { | |
1823 | maxBufferSize = actualSizes[idx]; | |
1824 | maxName = names[idx]; | |
374ca955 A |
1825 | } |
1826 | } | |
b75a7d8f | 1827 | |
374ca955 | 1828 | memset(buffer, 0xaa, sizeof(buffer)); |
b75a7d8f | 1829 | |
374ca955 A |
1830 | bufferSize = bufferSizes[j]; |
1831 | cnv2 = ucnv_safeClone(cnv, buffer[1], &bufferSize, &err); | |
b75a7d8f | 1832 | |
374ca955 A |
1833 | /* close the original immediately to make sure that the clone works by itself */ |
1834 | ucnv_close(cnv); | |
b75a7d8f | 1835 | |
340931cb | 1836 | if( actualSizes[idx] <= (bufferSizes[j] - (int32_t)ALIGNOF(UConverter)) && |
73c04bcf A |
1837 | err == U_SAFECLONE_ALLOCATED_WARNING |
1838 | ) { | |
51004dcb | 1839 | log_err("ucnv_safeClone(%s) did a heap clone although the buffer was large enough\n", names[idx]); |
73c04bcf A |
1840 | } |
1841 | ||
374ca955 A |
1842 | /* check if the clone function overwrote any bytes that it is not supposed to touch */ |
1843 | if(bufferSize <= bufferSizes[j]) { | |
1844 | /* used the stack buffer */ | |
1845 | if( containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer[0]), 0xaa) || | |
1846 | containsAnyOtherByte(buffer[1]+bufferSize, (int32_t)(sizeof(buffer)-(sizeof(buffer[0])+bufferSize)), 0xaa) | |
1847 | ) { | |
1848 | log_err("cloning %s in a stack buffer overwrote bytes outside the bufferSize %d (requested %d)\n", | |
51004dcb | 1849 | names[idx], bufferSize, bufferSizes[j]); |
374ca955 A |
1850 | } |
1851 | } else { | |
1852 | /* heap-allocated the clone */ | |
1853 | if(containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer), 0xaa)) { | |
1854 | log_err("cloning %s used the heap (bufferSize %d, requested %d) but overwrote stack buffer bytes\n", | |
51004dcb | 1855 | names[idx], bufferSize, bufferSizes[j]); |
374ca955 A |
1856 | } |
1857 | } | |
1858 | ||
1859 | pCharBuffer = charBuffer; | |
1860 | pUniBuffer = uniBuffer; | |
1861 | ||
1862 | ucnv_fromUnicode(cnv2, | |
1863 | &pCharBuffer, | |
1864 | charBufferLimit, | |
1865 | &pUniBuffer, | |
1866 | uniBufferLimit, | |
1867 | NULL, | |
1868 | TRUE, | |
1869 | &err); | |
1870 | if(U_FAILURE(err)){ | |
1871 | log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err)); | |
1872 | } | |
1873 | ucnv_toUnicode(cnv2, | |
1874 | &pUCharTarget, | |
1875 | pUCharTargetLimit, | |
1876 | &pCharSource, | |
1877 | pCharSourceLimit, | |
1878 | NULL, | |
1879 | TRUE, | |
1880 | &err | |
1881 | ); | |
1882 | ||
1883 | if(U_FAILURE(err)){ | |
1884 | log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err)); | |
1885 | } | |
1886 | ||
1887 | pConstCharBuffer = charBuffer; | |
1888 | if (uniBuffer [0] != ucnv_getNextUChar(cnv2, &pConstCharBuffer, pCharBuffer, &err)) | |
1889 | { | |
1890 | log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err)); | |
1891 | } | |
1892 | ucnv_close(cnv2); | |
b75a7d8f | 1893 | } |
b75a7d8f | 1894 | } |
374ca955 A |
1895 | |
1896 | log_verbose("ucnv_safeClone(): sizeof(UConverter)=%lu max preflighted clone size=%d (%s) U_CNV_SAFECLONE_BUFFERSIZE=%d\n", | |
1897 | sizeof(UConverter), maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE); | |
73c04bcf A |
1898 | if(maxBufferSize > U_CNV_SAFECLONE_BUFFERSIZE) { |
1899 | log_err("ucnv_safeClone(): max preflighted clone size=%d (%s) is larger than U_CNV_SAFECLONE_BUFFERSIZE=%d\n", | |
1900 | maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE); | |
1901 | } | |
b75a7d8f A |
1902 | } |
1903 | ||
1904 | static void TestCCSID() { | |
73c04bcf | 1905 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
1906 | UConverter *cnv; |
1907 | UErrorCode errorCode; | |
374ca955 | 1908 | int32_t ccsids[]={ 37, 850, 943, 949, 950, 1047, 1252, 1392, 33722 }; |
b75a7d8f A |
1909 | int32_t i, ccsid; |
1910 | ||
2ca993e8 | 1911 | for(i=0; i<UPRV_LENGTHOF(ccsids); ++i) { |
b75a7d8f A |
1912 | ccsid=ccsids[i]; |
1913 | ||
1914 | errorCode=U_ZERO_ERROR; | |
1915 | cnv=ucnv_openCCSID(ccsid, UCNV_IBM, &errorCode); | |
1916 | if(U_FAILURE(errorCode)) { | |
374ca955 | 1917 | log_data_err("error: ucnv_openCCSID(%ld) failed (%s)\n", ccsid, u_errorName(errorCode)); |
b75a7d8f A |
1918 | continue; |
1919 | } | |
1920 | ||
1921 | if(ccsid!=ucnv_getCCSID(cnv, &errorCode)) { | |
1922 | log_err("error: ucnv_getCCSID(ucnv_openCCSID(%ld))=%ld\n", ccsid, ucnv_getCCSID(cnv, &errorCode)); | |
1923 | } | |
1924 | ||
374ca955 A |
1925 | /* skip gb18030(ccsid 1392) */ |
1926 | if(ccsid != 1392 && UCNV_IBM!=ucnv_getPlatform(cnv, &errorCode)) { | |
b75a7d8f A |
1927 | log_err("error: ucnv_getPlatform(ucnv_openCCSID(%ld))=%ld!=UCNV_IBM\n", ccsid, ucnv_getPlatform(cnv, &errorCode)); |
1928 | } | |
1929 | ||
1930 | ucnv_close(cnv); | |
1931 | } | |
73c04bcf | 1932 | #endif |
b75a7d8f A |
1933 | } |
1934 | ||
1935 | /* jitterbug 932: ucnv_convert() bugs --------------------------------------- */ | |
1936 | ||
1937 | /* CHUNK_SIZE defined in common\ucnv.c: */ | |
1938 | #define CHUNK_SIZE 1024 | |
1939 | ||
1940 | static void bug1(void); | |
1941 | static void bug2(void); | |
1942 | static void bug3(void); | |
1943 | ||
1944 | static void | |
1945 | TestJ932(void) | |
1946 | { | |
1947 | bug1(); /* Unicode intermediate buffer straddle bug */ | |
1948 | bug2(); /* pre-flighting size incorrect caused by simple overflow */ | |
1949 | bug3(); /* pre-flighting size incorrect caused by expansion overflow */ | |
1950 | } | |
1951 | ||
1952 | /* | |
1953 | * jitterbug 932: test chunking boundary conditions in | |
1954 | ||
1955 | int32_t ucnv_convert(const char *toConverterName, | |
1956 | const char *fromConverterName, | |
1957 | char *target, | |
1958 | int32_t targetSize, | |
1959 | const char *source, | |
1960 | int32_t sourceSize, | |
1961 | UErrorCode * err) | |
1962 | ||
1963 | * See discussions on the icu mailing list in | |
1964 | * 2001-April with the subject "converter 'flush' question". | |
1965 | * | |
1966 | * Bug report and test code provided by Edward J. Batutis. | |
1967 | */ | |
1968 | static void bug1() | |
1969 | { | |
73c04bcf | 1970 | #if !UCONFIG_NO_LEGACY_CONVERSION |
46f4442e A |
1971 | char char_in[CHUNK_SIZE+32]; |
1972 | char char_out[CHUNK_SIZE*2]; | |
b75a7d8f A |
1973 | |
1974 | /* GB 18030 equivalent of U+10000 is 90308130 */ | |
1975 | static const char test_seq[]={ (char)0x90u, 0x30, (char)0x81u, 0x30 }; | |
1976 | ||
1977 | UErrorCode err = U_ZERO_ERROR; | |
1978 | int32_t i, test_seq_len = sizeof(test_seq); | |
1979 | ||
1980 | /* | |
1981 | * causes straddle bug in Unicode intermediate buffer by sliding the test sequence forward | |
1982 | * until the straddle bug appears. I didn't want to hard-code everything so this test could | |
1983 | * be expanded - however this is the only type of straddle bug I can think of at the moment - | |
1984 | * a high surrogate in the last position of the Unicode intermediate buffer. Apparently no | |
1985 | * other Unicode sequences cause a bug since combining sequences are not supported by the | |
1986 | * converters. | |
1987 | */ | |
1988 | ||
1989 | for (i = test_seq_len; i >= 0; i--) { | |
1990 | /* put character sequence into input buffer */ | |
1991 | memset(char_in, 0x61, sizeof(char_in)); /* GB 18030 'a' */ | |
1992 | memcpy(char_in + (CHUNK_SIZE - i), test_seq, test_seq_len); | |
1993 | ||
1994 | /* do the conversion */ | |
1995 | ucnv_convert("us-ascii", /* out */ | |
1996 | "gb18030", /* in */ | |
1997 | char_out, | |
1998 | sizeof(char_out), | |
1999 | char_in, | |
2000 | sizeof(char_in), | |
2001 | &err); | |
2002 | ||
2003 | /* bug1: */ | |
2004 | if (err == U_TRUNCATED_CHAR_FOUND) { | |
2005 | /* this happens when surrogate pair straddles the intermediate buffer in | |
2006 | * T_UConverter_fromCodepageToCodepage */ | |
2007 | log_err("error j932 bug 1: expected success, got U_TRUNCATED_CHAR_FOUND\n"); | |
2008 | } | |
2009 | } | |
73c04bcf | 2010 | #endif |
b75a7d8f A |
2011 | } |
2012 | ||
2013 | /* bug2: pre-flighting loop bug: simple overflow causes bug */ | |
2014 | static void bug2() | |
2015 | { | |
2016 | /* US-ASCII "1234567890" */ | |
2017 | static const char source[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 }; | |
b331163b | 2018 | #if !UCONFIG_ONLY_HTML_CONVERSION |
b75a7d8f A |
2019 | static const char sourceUTF8[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, (char)0xef, (char)0x80, (char)0x80 }; |
2020 | static const char sourceUTF32[]={ 0x00, 0x00, 0x00, 0x30, | |
2021 | 0x00, 0x00, 0x00, 0x31, | |
2022 | 0x00, 0x00, 0x00, 0x32, | |
2023 | 0x00, 0x00, 0x00, 0x33, | |
2024 | 0x00, 0x00, 0x00, 0x34, | |
2025 | 0x00, 0x00, 0x00, 0x35, | |
2026 | 0x00, 0x00, 0x00, 0x36, | |
2027 | 0x00, 0x00, 0x00, 0x37, | |
2028 | 0x00, 0x00, 0x00, 0x38, | |
2029 | 0x00, 0x00, (char)0xf0, 0x00}; | |
b331163b A |
2030 | #endif |
2031 | ||
b75a7d8f A |
2032 | static char target[5]; |
2033 | ||
2034 | UErrorCode err = U_ZERO_ERROR; | |
2035 | int32_t size; | |
2036 | ||
2037 | /* do the conversion */ | |
2038 | size = ucnv_convert("iso-8859-1", /* out */ | |
2039 | "us-ascii", /* in */ | |
2040 | target, | |
2041 | sizeof(target), | |
2042 | source, | |
2043 | sizeof(source), | |
2044 | &err); | |
2045 | ||
2046 | if ( size != 10 ) { | |
2047 | /* bug2: size is 5, should be 10 */ | |
2048 | log_data_err("error j932 bug 2 us-ascii->iso-8859-1: got preflighting size %d instead of 10\n", size); | |
2049 | } | |
2050 | ||
b331163b | 2051 | #if !UCONFIG_ONLY_HTML_CONVERSION |
b75a7d8f A |
2052 | err = U_ZERO_ERROR; |
2053 | /* do the conversion */ | |
2054 | size = ucnv_convert("UTF-32BE", /* out */ | |
2055 | "UTF-8", /* in */ | |
2056 | target, | |
2057 | sizeof(target), | |
2058 | sourceUTF8, | |
2059 | sizeof(sourceUTF8), | |
2060 | &err); | |
2061 | ||
2062 | if ( size != 32 ) { | |
2063 | /* bug2: size is 5, should be 32 */ | |
2064 | log_err("error j932 bug 2 UTF-8->UTF-32BE: got preflighting size %d instead of 32\n", size); | |
2065 | } | |
2066 | ||
2067 | err = U_ZERO_ERROR; | |
2068 | /* do the conversion */ | |
2069 | size = ucnv_convert("UTF-8", /* out */ | |
2070 | "UTF-32BE", /* in */ | |
2071 | target, | |
2072 | sizeof(target), | |
2073 | sourceUTF32, | |
2074 | sizeof(sourceUTF32), | |
2075 | &err); | |
2076 | ||
2077 | if ( size != 12 ) { | |
2078 | /* bug2: size is 5, should be 12 */ | |
2079 | log_err("error j932 bug 2 UTF-32BE->UTF-8: got preflighting size %d instead of 12\n", size); | |
2080 | } | |
b331163b | 2081 | #endif |
b75a7d8f A |
2082 | } |
2083 | ||
2084 | /* | |
2085 | * bug3: when the characters expand going from source to target codepage | |
2086 | * you get bug3 in addition to bug2 | |
2087 | */ | |
2088 | static void bug3() | |
2089 | { | |
b331163b | 2090 | #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION |
46f4442e A |
2091 | char char_in[CHUNK_SIZE*4]; |
2092 | char target[5]; | |
b75a7d8f A |
2093 | UErrorCode err = U_ZERO_ERROR; |
2094 | int32_t size; | |
2095 | ||
2096 | /* | |
2097 | * first get the buggy size from bug2 then | |
2098 | * compare it to buggy size with an expansion | |
2099 | */ | |
2100 | memset(char_in, 0x61, sizeof(char_in)); /* US-ASCII 'a' */ | |
2101 | ||
2102 | /* do the conversion */ | |
2103 | size = ucnv_convert("lmbcs", /* out */ | |
2104 | "us-ascii", /* in */ | |
2105 | target, | |
2106 | sizeof(target), | |
2107 | char_in, | |
2108 | sizeof(char_in), | |
2109 | &err); | |
2110 | ||
2111 | if ( size != sizeof(char_in) ) { | |
2112 | /* | |
2113 | * bug2: size is 0x2805 (CHUNK_SIZE*2+5 - maybe 5 is the size of the overflow buffer | |
2114 | * in the converter?), should be CHUNK_SIZE*4 | |
2115 | * | |
2116 | * Markus 2001-05-18: 5 is the size of our target[] here, ucnv_convert() did not reset targetSize... | |
2117 | */ | |
2118 | log_data_err("error j932 bug 2/3a: expected preflighting size 0x%04x, got 0x%04x\n", sizeof(char_in), size); | |
2119 | } | |
2120 | ||
2121 | /* | |
2122 | * now do the conversion with expansion | |
2123 | * ascii 0x08 expands to 0x0F 0x28 in lmbcs | |
2124 | */ | |
2125 | memset(char_in, 8, sizeof(char_in)); | |
2126 | err = U_ZERO_ERROR; | |
2127 | ||
2128 | /* do the conversion */ | |
2129 | size = ucnv_convert("lmbcs", /* out */ | |
2130 | "us-ascii", /* in */ | |
2131 | target, | |
2132 | sizeof(target), | |
2133 | char_in, | |
2134 | sizeof(char_in), | |
2135 | &err); | |
2136 | ||
2137 | /* expect 2X expansion */ | |
2138 | if ( size != sizeof(char_in) * 2 ) { | |
2139 | /* | |
2140 | * bug3: | |
2141 | * bug2 would lead us to expect 0x2805, but it isn't that either, it is 0x3c05: | |
2142 | */ | |
2143 | log_data_err("error j932 bug 3b: expected 0x%04x, got 0x%04x\n", sizeof(char_in) * 2, size); | |
2144 | } | |
73c04bcf | 2145 | #endif |
b75a7d8f A |
2146 | } |
2147 | ||
2148 | static void | |
2149 | convertExStreaming(UConverter *srcCnv, UConverter *targetCnv, | |
2150 | const char *src, int32_t srcLength, | |
2151 | const char *expectTarget, int32_t expectTargetLength, | |
2152 | int32_t chunkSize, | |
2153 | const char *testName, | |
2154 | UErrorCode expectCode) { | |
2155 | UChar pivotBuffer[CHUNK_SIZE]; | |
2156 | UChar *pivotSource, *pivotTarget; | |
2157 | const UChar *pivotLimit; | |
2158 | ||
2159 | char targetBuffer[CHUNK_SIZE]; | |
2160 | char *target; | |
2161 | const char *srcLimit, *finalSrcLimit, *targetLimit; | |
2162 | ||
2163 | int32_t targetLength; | |
2164 | ||
2165 | UBool flush; | |
2166 | ||
2167 | UErrorCode errorCode; | |
2168 | ||
2169 | /* setup */ | |
2170 | if(chunkSize>CHUNK_SIZE) { | |
2171 | chunkSize=CHUNK_SIZE; | |
2172 | } | |
2173 | ||
2174 | pivotSource=pivotTarget=pivotBuffer; | |
2175 | pivotLimit=pivotBuffer+chunkSize; | |
2176 | ||
2177 | finalSrcLimit=src+srcLength; | |
2178 | target=targetBuffer; | |
2179 | targetLimit=targetBuffer+chunkSize; | |
2180 | ||
2181 | ucnv_resetToUnicode(srcCnv); | |
2182 | ucnv_resetFromUnicode(targetCnv); | |
2183 | ||
2184 | errorCode=U_ZERO_ERROR; | |
2185 | flush=FALSE; | |
2186 | ||
2187 | /* convert, streaming-style (both converters and pivot keep state) */ | |
2188 | for(;;) { | |
2189 | /* for testing, give ucnv_convertEx() at most <chunkSize> input/pivot/output units at a time */ | |
2190 | if(src+chunkSize<=finalSrcLimit) { | |
2191 | srcLimit=src+chunkSize; | |
2192 | } else { | |
2193 | srcLimit=finalSrcLimit; | |
2194 | } | |
2195 | ucnv_convertEx(targetCnv, srcCnv, | |
2196 | &target, targetLimit, | |
2197 | &src, srcLimit, | |
2198 | pivotBuffer, &pivotSource, &pivotTarget, pivotLimit, | |
2199 | FALSE, flush, &errorCode); | |
2200 | targetLength=(int32_t)(target-targetBuffer); | |
46f4442e A |
2201 | if(target>targetLimit) { |
2202 | log_err("ucnv_convertEx(%s) chunk[%d] target %p exceeds targetLimit %p\n", | |
2203 | testName, chunkSize, target, targetLimit); | |
2204 | break; /* TODO: major problem! */ | |
2205 | } | |
b75a7d8f A |
2206 | if(errorCode==U_BUFFER_OVERFLOW_ERROR) { |
2207 | /* continue converting another chunk */ | |
2208 | errorCode=U_ZERO_ERROR; | |
340931cb | 2209 | if(targetLength+chunkSize<=(int32_t)sizeof(targetBuffer)) { |
b75a7d8f A |
2210 | targetLimit=target+chunkSize; |
2211 | } else { | |
340931cb | 2212 | targetLimit=targetBuffer+(int32_t)sizeof(targetBuffer); |
b75a7d8f A |
2213 | } |
2214 | } else if(U_FAILURE(errorCode)) { | |
2215 | /* failure */ | |
2216 | break; | |
2217 | } else if(flush) { | |
2218 | /* all done */ | |
2219 | break; | |
2220 | } else if(src==finalSrcLimit && pivotSource==pivotTarget) { | |
2221 | /* all consumed, now flush without input (separate from conversion for testing) */ | |
2222 | flush=TRUE; | |
2223 | } | |
2224 | } | |
2225 | ||
2226 | if(!(errorCode==expectCode || (expectCode==U_ZERO_ERROR && errorCode==U_STRING_NOT_TERMINATED_WARNING))) { | |
2227 | log_err("ucnv_convertEx(%s) chunk[%d] results in %s instead of %s\n", | |
2228 | testName, chunkSize, u_errorName(errorCode), u_errorName(expectCode)); | |
2229 | } else if(targetLength!=expectTargetLength) { | |
2230 | log_err("ucnv_convertEx(%s) chunk[%d] writes %d bytes instead of %d\n", | |
2231 | testName, chunkSize, targetLength, expectTargetLength); | |
2232 | } else if(memcmp(targetBuffer, expectTarget, targetLength)!=0) { | |
2233 | log_err("ucnv_convertEx(%s) chunk[%d] writes different bytes than expected\n", | |
2234 | testName, chunkSize); | |
2235 | } | |
2236 | } | |
2237 | ||
2238 | static void | |
2239 | convertExMultiStreaming(UConverter *srcCnv, UConverter *targetCnv, | |
2240 | const char *src, int32_t srcLength, | |
2241 | const char *expectTarget, int32_t expectTargetLength, | |
2242 | const char *testName, | |
2243 | UErrorCode expectCode) { | |
2244 | convertExStreaming(srcCnv, targetCnv, | |
2245 | src, srcLength, | |
2246 | expectTarget, expectTargetLength, | |
2247 | 1, testName, expectCode); | |
2248 | convertExStreaming(srcCnv, targetCnv, | |
2249 | src, srcLength, | |
2250 | expectTarget, expectTargetLength, | |
2251 | 3, testName, expectCode); | |
2252 | convertExStreaming(srcCnv, targetCnv, | |
2253 | src, srcLength, | |
2254 | expectTarget, expectTargetLength, | |
2255 | 7, testName, expectCode); | |
2256 | } | |
2257 | ||
2258 | static void TestConvertEx() { | |
73c04bcf | 2259 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
2260 | static const uint8_t |
2261 | utf8[]={ | |
2262 | /* 4e00 30a1 ff61 0410 */ | |
2263 | 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90 | |
2264 | }, | |
2265 | shiftJIS[]={ | |
2266 | 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40 | |
2267 | }, | |
2268 | errorTarget[]={ | |
2269 | /* | |
2270 | * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS: | |
2271 | * SUB, SUB, 0x40, SUB, SUB, 0x40 | |
2272 | */ | |
374ca955 | 2273 | 0xfc, 0xfc, 0xfc, 0xfc, 0x40, 0xfc, 0xfc, 0xfc, 0xfc, 0x40 |
b75a7d8f A |
2274 | }; |
2275 | ||
2276 | char srcBuffer[100], targetBuffer[100]; | |
2277 | ||
2278 | const char *src; | |
2279 | char *target; | |
2280 | ||
2281 | UChar pivotBuffer[100]; | |
2282 | UChar *pivotSource, *pivotTarget; | |
2283 | ||
2284 | UConverter *cnv1, *cnv2; | |
2285 | UErrorCode errorCode; | |
2286 | ||
2287 | errorCode=U_ZERO_ERROR; | |
2288 | cnv1=ucnv_open("UTF-8", &errorCode); | |
2289 | if(U_FAILURE(errorCode)) { | |
2290 | log_err("unable to open a UTF-8 converter - %s\n", u_errorName(errorCode)); | |
2291 | return; | |
2292 | } | |
2293 | ||
2294 | cnv2=ucnv_open("Shift-JIS", &errorCode); | |
2295 | if(U_FAILURE(errorCode)) { | |
2296 | log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode)); | |
2297 | ucnv_close(cnv1); | |
2298 | return; | |
2299 | } | |
2300 | ||
2301 | /* test ucnv_convertEx() with streaming conversion style */ | |
2302 | convertExMultiStreaming(cnv1, cnv2, | |
2303 | (const char *)utf8, sizeof(utf8), (const char *)shiftJIS, sizeof(shiftJIS), | |
2304 | "UTF-8 -> Shift-JIS", U_ZERO_ERROR); | |
2305 | ||
2306 | convertExMultiStreaming(cnv2, cnv1, | |
2307 | (const char *)shiftJIS, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8), | |
2308 | "Shift-JIS -> UTF-8", U_ZERO_ERROR); | |
2309 | ||
2310 | /* U_ZERO_ERROR because by default the SUB callbacks are set */ | |
2311 | convertExMultiStreaming(cnv1, cnv2, | |
2312 | (const char *)shiftJIS, sizeof(shiftJIS), (const char *)errorTarget, sizeof(errorTarget), | |
2313 | "shiftJIS[] UTF-8 -> Shift-JIS", U_ZERO_ERROR); | |
2314 | ||
2315 | /* test some simple conversions */ | |
2316 | ||
2317 | /* NUL-terminated source and target */ | |
2318 | errorCode=U_STRING_NOT_TERMINATED_WARNING; | |
2319 | memcpy(srcBuffer, utf8, sizeof(utf8)); | |
2320 | srcBuffer[sizeof(utf8)]=0; | |
2321 | src=srcBuffer; | |
2322 | target=targetBuffer; | |
2323 | ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, | |
2324 | NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode); | |
2325 | if( errorCode!=U_ZERO_ERROR || | |
2326 | target-targetBuffer!=sizeof(shiftJIS) || | |
2327 | *target!=0 || | |
2328 | memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0 | |
2329 | ) { | |
2330 | log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s - writes %d bytes, expect %d\n", | |
2331 | u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS)); | |
2332 | } | |
2333 | ||
2334 | /* NUL-terminated source and U_STRING_NOT_TERMINATED_WARNING */ | |
2335 | errorCode=U_AMBIGUOUS_ALIAS_WARNING; | |
2336 | memset(targetBuffer, 0xff, sizeof(targetBuffer)); | |
2337 | src=srcBuffer; | |
2338 | target=targetBuffer; | |
2339 | ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(shiftJIS), &src, NULL, | |
2340 | NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode); | |
2341 | if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || | |
2342 | target-targetBuffer!=sizeof(shiftJIS) || | |
2343 | *target!=(char)0xff || | |
2344 | memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0 | |
2345 | ) { | |
2346 | log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s, expect U_STRING_NOT_TERMINATED_WARNING - writes %d bytes, expect %d\n", | |
2347 | u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS)); | |
2348 | } | |
2349 | ||
2350 | /* bad arguments */ | |
2351 | errorCode=U_MESSAGE_PARSE_ERROR; | |
2352 | src=srcBuffer; | |
2353 | target=targetBuffer; | |
2354 | ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, | |
2355 | NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode); | |
2356 | if(errorCode!=U_MESSAGE_PARSE_ERROR) { | |
2357 | log_err("ucnv_convertEx(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode)); | |
2358 | } | |
2359 | ||
2360 | /* pivotLimit==pivotStart */ | |
2361 | errorCode=U_ZERO_ERROR; | |
2362 | pivotSource=pivotTarget=pivotBuffer; | |
2363 | ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, | |
2364 | pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer, TRUE, TRUE, &errorCode); | |
2365 | if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
2366 | log_err("ucnv_convertEx(pivotLimit==pivotStart) sets %s\n", u_errorName(errorCode)); | |
2367 | } | |
2368 | ||
2369 | /* *pivotSource==NULL */ | |
2370 | errorCode=U_ZERO_ERROR; | |
2371 | pivotSource=NULL; | |
2372 | ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, | |
2373 | pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, TRUE, &errorCode); | |
2374 | if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
2375 | log_err("ucnv_convertEx(*pivotSource==NULL) sets %s\n", u_errorName(errorCode)); | |
2376 | } | |
2377 | ||
2378 | /* *source==NULL */ | |
2379 | errorCode=U_ZERO_ERROR; | |
2380 | src=NULL; | |
2381 | pivotSource=pivotBuffer; | |
2382 | ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, | |
2383 | pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, TRUE, &errorCode); | |
2384 | if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
2385 | log_err("ucnv_convertEx(*source==NULL) sets %s\n", u_errorName(errorCode)); | |
2386 | } | |
2387 | ||
73c04bcf A |
2388 | /* streaming conversion without a pivot buffer */ |
2389 | errorCode=U_ZERO_ERROR; | |
2390 | src=srcBuffer; | |
2391 | pivotSource=pivotBuffer; | |
2392 | ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, | |
2393 | NULL, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, FALSE, &errorCode); | |
2394 | if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
2395 | log_err("ucnv_convertEx(pivotStart==NULL) sets %s\n", u_errorName(errorCode)); | |
2396 | } | |
2397 | ||
b75a7d8f A |
2398 | ucnv_close(cnv1); |
2399 | ucnv_close(cnv2); | |
73c04bcf | 2400 | #endif |
b75a7d8f A |
2401 | } |
2402 | ||
46f4442e A |
2403 | /* Test illegal UTF-8 input: Data and functions for TestConvertExFromUTF8(). */ |
2404 | static const char *const badUTF8[]={ | |
2405 | /* trail byte */ | |
2406 | "\x80", | |
2407 | ||
2408 | /* truncated multi-byte sequences */ | |
2409 | "\xd0", | |
2410 | "\xe0", | |
2411 | "\xe1", | |
2412 | "\xed", | |
2413 | "\xee", | |
2414 | "\xf0", | |
2415 | "\xf1", | |
2416 | "\xf4", | |
2417 | "\xf8", | |
2418 | "\xfc", | |
2419 | ||
2420 | "\xe0\x80", | |
2421 | "\xe0\xa0", | |
2422 | "\xe1\x80", | |
2423 | "\xed\x80", | |
2424 | "\xed\xa0", | |
2425 | "\xee\x80", | |
2426 | "\xf0\x80", | |
2427 | "\xf0\x90", | |
2428 | "\xf1\x80", | |
2429 | "\xf4\x80", | |
2430 | "\xf4\x90", | |
2431 | "\xf8\x80", | |
2432 | "\xfc\x80", | |
2433 | ||
2434 | "\xf0\x80\x80", | |
2435 | "\xf0\x90\x80", | |
2436 | "\xf1\x80\x80", | |
2437 | "\xf4\x80\x80", | |
2438 | "\xf4\x90\x80", | |
2439 | "\xf8\x80\x80", | |
2440 | "\xfc\x80\x80", | |
2441 | ||
2442 | "\xf8\x80\x80\x80", | |
2443 | "\xfc\x80\x80\x80", | |
2444 | ||
2445 | "\xfc\x80\x80\x80\x80", | |
2446 | ||
2447 | /* complete sequences but non-shortest forms or out of range etc. */ | |
2448 | "\xc0\x80", | |
2449 | "\xe0\x80\x80", | |
2450 | "\xed\xa0\x80", | |
2451 | "\xf0\x80\x80\x80", | |
2452 | "\xf4\x90\x80\x80", | |
2453 | "\xf8\x80\x80\x80\x80", | |
2454 | "\xfc\x80\x80\x80\x80\x80", | |
2455 | "\xfe", | |
2456 | "\xff" | |
2457 | }; | |
2458 | ||
4388f060 A |
2459 | #define ARG_CHAR_ARR_SIZE 8 |
2460 | ||
46f4442e A |
2461 | /* get some character that can be converted and convert it */ |
2462 | static UBool getTestChar(UConverter *cnv, const char *converterName, | |
2463 | char charUTF8[4], int32_t *pCharUTF8Length, | |
4388f060 A |
2464 | char char0[ARG_CHAR_ARR_SIZE], int32_t *pChar0Length, |
2465 | char char1[ARG_CHAR_ARR_SIZE], int32_t *pChar1Length) { | |
46f4442e A |
2466 | UChar utf16[U16_MAX_LENGTH]; |
2467 | int32_t utf16Length; | |
2468 | ||
2469 | const UChar *utf16Source; | |
2470 | char *target; | |
2471 | ||
2472 | USet *set; | |
2473 | UChar32 c; | |
2474 | UErrorCode errorCode; | |
2475 | ||
2476 | errorCode=U_ZERO_ERROR; | |
2477 | set=uset_open(1, 0); | |
2478 | ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); | |
2479 | c=uset_charAt(set, uset_size(set)/2); | |
2480 | uset_close(set); | |
2481 | ||
2482 | utf16Length=0; | |
2483 | U16_APPEND_UNSAFE(utf16, utf16Length, c); | |
2484 | *pCharUTF8Length=0; | |
2485 | U8_APPEND_UNSAFE(charUTF8, *pCharUTF8Length, c); | |
2486 | ||
2487 | utf16Source=utf16; | |
2488 | target=char0; | |
2489 | ucnv_fromUnicode(cnv, | |
4388f060 | 2490 | &target, char0+ARG_CHAR_ARR_SIZE, |
46f4442e A |
2491 | &utf16Source, utf16+utf16Length, |
2492 | NULL, FALSE, &errorCode); | |
2493 | *pChar0Length=(int32_t)(target-char0); | |
2494 | ||
2495 | utf16Source=utf16; | |
2496 | target=char1; | |
2497 | ucnv_fromUnicode(cnv, | |
4388f060 | 2498 | &target, char1+ARG_CHAR_ARR_SIZE, |
46f4442e A |
2499 | &utf16Source, utf16+utf16Length, |
2500 | NULL, FALSE, &errorCode); | |
2501 | *pChar1Length=(int32_t)(target-char1); | |
2502 | ||
2503 | if(U_FAILURE(errorCode)) { | |
2504 | log_err("unable to get test character for %s - %s\n", converterName, u_errorName(errorCode)); | |
2505 | return FALSE; | |
2506 | } | |
2507 | return TRUE; | |
2508 | } | |
2509 | ||
0f5d89e8 A |
2510 | static UBool isOneTruncatedUTF8(const char *s, int32_t length) { |
2511 | if(length==0) { | |
2512 | return FALSE; | |
2513 | } else if(length==1) { | |
2514 | return U8_IS_LEAD(s[0]); | |
2515 | } else { | |
2516 | int32_t count=U8_COUNT_TRAIL_BYTES(s[0]); | |
2517 | if(length<=count) { | |
2518 | // 2 or more bytes, but fewer than the lead byte indicates. | |
2519 | int32_t oneLength=0; | |
2520 | U8_FWD_1(s, oneLength, length); | |
2521 | // Truncated if we reach the end of the string. | |
2522 | // Not true if the lead byte and first trail byte do not start a valid sequence, | |
2523 | // e.g., E0 80 -> oneLength=1. | |
2524 | return oneLength==length; | |
2525 | } | |
2526 | return FALSE; | |
2527 | } | |
2528 | } | |
2529 | ||
46f4442e A |
2530 | static void testFromTruncatedUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName, |
2531 | char charUTF8[4], int32_t charUTF8Length, | |
2532 | char char0[8], int32_t char0Length, | |
2533 | char char1[8], int32_t char1Length) { | |
340931cb A |
2534 | // suppress compiler warnings about unused variables |
2535 | (void)char0; | |
2536 | (void)char0Length; | |
2537 | (void)char1; | |
2538 | (void)char1Length; | |
2539 | ||
46f4442e A |
2540 | char utf8[16]; |
2541 | int32_t utf8Length; | |
2542 | ||
2543 | char output[16]; | |
2544 | int32_t outputLength; | |
2545 | ||
2546 | char invalidChars[8]; | |
2547 | int8_t invalidLength; | |
2548 | ||
2549 | const char *source; | |
2550 | char *target; | |
2551 | ||
2552 | UChar pivotBuffer[8]; | |
2553 | UChar *pivotSource, *pivotTarget; | |
2554 | ||
2555 | UErrorCode errorCode; | |
2556 | int32_t i; | |
2557 | ||
2558 | /* test truncated sequences */ | |
2559 | errorCode=U_ZERO_ERROR; | |
2560 | ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); | |
2561 | ||
2562 | memcpy(utf8, charUTF8, charUTF8Length); | |
2563 | ||
b331163b | 2564 | for(i=0; i<UPRV_LENGTHOF(badUTF8); ++i) { |
46f4442e | 2565 | /* truncated sequence? */ |
3d1f044b | 2566 | int32_t length = (int32_t)strlen(badUTF8[i]); |
0f5d89e8 | 2567 | if(!isOneTruncatedUTF8(badUTF8[i], length)) { |
46f4442e A |
2568 | continue; |
2569 | } | |
2570 | ||
2571 | /* assemble a string with the test character and the truncated sequence */ | |
2572 | memcpy(utf8+charUTF8Length, badUTF8[i], length); | |
2573 | utf8Length=charUTF8Length+length; | |
2574 | ||
2575 | /* convert and check the invalidChars */ | |
2576 | source=utf8; | |
2577 | target=output; | |
2578 | pivotSource=pivotTarget=pivotBuffer; | |
2579 | errorCode=U_ZERO_ERROR; | |
2580 | ucnv_convertEx(cnv, utf8Cnv, | |
2581 | &target, output+sizeof(output), | |
2582 | &source, utf8+utf8Length, | |
b331163b | 2583 | pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+UPRV_LENGTHOF(pivotBuffer), |
46f4442e A |
2584 | TRUE, TRUE, /* reset & flush */ |
2585 | &errorCode); | |
2586 | outputLength=(int32_t)(target-output); | |
57a6839d | 2587 | (void)outputLength; /* Suppress set but not used warning. */ |
46f4442e A |
2588 | if(errorCode!=U_TRUNCATED_CHAR_FOUND || pivotSource!=pivotBuffer) { |
2589 | log_err("unexpected error %s from %s badUTF8[%ld]\n", u_errorName(errorCode), converterName, (long)i); | |
2590 | continue; | |
2591 | } | |
2592 | ||
2593 | errorCode=U_ZERO_ERROR; | |
2594 | invalidLength=(int8_t)sizeof(invalidChars); | |
2595 | ucnv_getInvalidChars(utf8Cnv, invalidChars, &invalidLength, &errorCode); | |
2596 | if(invalidLength!=length || 0!=memcmp(invalidChars, badUTF8[i], length)) { | |
2597 | log_err("wrong invalidChars from %s badUTF8[%ld]\n", converterName, (long)i); | |
2598 | } | |
2599 | } | |
2600 | } | |
2601 | ||
2602 | static void testFromBadUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName, | |
2603 | char charUTF8[4], int32_t charUTF8Length, | |
2604 | char char0[8], int32_t char0Length, | |
2605 | char char1[8], int32_t char1Length) { | |
2606 | char utf8[600], expect[600]; | |
2607 | int32_t utf8Length, expectLength; | |
2608 | ||
2609 | char testName[32]; | |
2610 | ||
2611 | UErrorCode errorCode; | |
2612 | int32_t i; | |
2613 | ||
2614 | errorCode=U_ZERO_ERROR; | |
2615 | ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, NULL, &errorCode); | |
2616 | ||
2617 | /* | |
2618 | * assemble an input string with the test character between each | |
2619 | * bad sequence, | |
2620 | * and an expected string with repeated test character output | |
2621 | */ | |
2622 | memcpy(utf8, charUTF8, charUTF8Length); | |
2623 | utf8Length=charUTF8Length; | |
2624 | ||
2625 | memcpy(expect, char0, char0Length); | |
2626 | expectLength=char0Length; | |
2627 | ||
b331163b | 2628 | for(i=0; i<UPRV_LENGTHOF(badUTF8); ++i) { |
3d1f044b | 2629 | int32_t length = (int32_t)strlen(badUTF8[i]); |
46f4442e A |
2630 | memcpy(utf8+utf8Length, badUTF8[i], length); |
2631 | utf8Length+=length; | |
2632 | ||
2633 | memcpy(utf8+utf8Length, charUTF8, charUTF8Length); | |
2634 | utf8Length+=charUTF8Length; | |
2635 | ||
2636 | memcpy(expect+expectLength, char1, char1Length); | |
2637 | expectLength+=char1Length; | |
2638 | } | |
2639 | ||
2640 | /* expect that each bad UTF-8 sequence is detected and skipped */ | |
2641 | strcpy(testName, "from bad UTF-8 to "); | |
2642 | strcat(testName, converterName); | |
2643 | ||
2644 | convertExMultiStreaming(utf8Cnv, cnv, | |
2645 | utf8, utf8Length, | |
2646 | expect, expectLength, | |
2647 | testName, | |
2648 | U_ZERO_ERROR); | |
2649 | } | |
2650 | ||
2651 | /* Test illegal UTF-8 input. */ | |
2652 | static void TestConvertExFromUTF8() { | |
2653 | static const char *const converterNames[]={ | |
2654 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
2655 | "windows-1252", | |
2656 | "shift-jis", | |
2657 | #endif | |
2658 | "us-ascii", | |
2659 | "iso-8859-1", | |
2660 | "utf-8" | |
2661 | }; | |
2662 | ||
2663 | UConverter *utf8Cnv, *cnv; | |
2664 | UErrorCode errorCode; | |
2665 | int32_t i; | |
2666 | ||
2667 | /* fromUnicode versions of some character, from initial state and later */ | |
2668 | char charUTF8[4], char0[8], char1[8]; | |
2669 | int32_t charUTF8Length, char0Length, char1Length; | |
2670 | ||
2671 | errorCode=U_ZERO_ERROR; | |
2672 | utf8Cnv=ucnv_open("UTF-8", &errorCode); | |
2673 | if(U_FAILURE(errorCode)) { | |
729e4ab9 | 2674 | log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode)); |
46f4442e A |
2675 | return; |
2676 | } | |
2677 | ||
b331163b | 2678 | for(i=0; i<UPRV_LENGTHOF(converterNames); ++i) { |
46f4442e A |
2679 | errorCode=U_ZERO_ERROR; |
2680 | cnv=ucnv_open(converterNames[i], &errorCode); | |
2681 | if(U_FAILURE(errorCode)) { | |
729e4ab9 | 2682 | log_data_err("unable to open %s converter - %s\n", converterNames[i], u_errorName(errorCode)); |
46f4442e A |
2683 | continue; |
2684 | } | |
2685 | if(!getTestChar(cnv, converterNames[i], charUTF8, &charUTF8Length, char0, &char0Length, char1, &char1Length)) { | |
2686 | continue; | |
2687 | } | |
2688 | testFromTruncatedUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length); | |
2689 | testFromBadUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length); | |
2690 | ucnv_close(cnv); | |
2691 | } | |
2692 | ucnv_close(utf8Cnv); | |
2693 | } | |
2694 | ||
729e4ab9 A |
2695 | static void TestConvertExFromUTF8_C5F0() { |
2696 | static const char *const converterNames[]={ | |
2697 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
2698 | "windows-1251", | |
2699 | "shift-jis", | |
2700 | #endif | |
2701 | "us-ascii", | |
2702 | "iso-8859-1", | |
2703 | "utf-8" | |
2704 | }; | |
2705 | ||
2706 | UConverter *utf8Cnv, *cnv; | |
2707 | UErrorCode errorCode; | |
2708 | int32_t i; | |
2709 | ||
2710 | static const char bad_utf8[2]={ (char)0xC5, (char)0xF0 }; | |
2711 | /* Expect "��" (2x U+FFFD as decimal NCRs) */ | |
2712 | static const char twoNCRs[16]={ | |
2713 | 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B, | |
2714 | 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B | |
2715 | }; | |
2716 | static const char twoFFFD[6]={ | |
2717 | (char)0xef, (char)0xbf, (char)0xbd, | |
2718 | (char)0xef, (char)0xbf, (char)0xbd | |
2719 | }; | |
2720 | const char *expected; | |
2721 | int32_t expectedLength; | |
2722 | char dest[20]; /* longer than longest expectedLength */ | |
2723 | ||
2724 | const char *src; | |
2725 | char *target; | |
2726 | ||
2727 | UChar pivotBuffer[128]; | |
2728 | UChar *pivotSource, *pivotTarget; | |
2729 | ||
2730 | errorCode=U_ZERO_ERROR; | |
2731 | utf8Cnv=ucnv_open("UTF-8", &errorCode); | |
2732 | if(U_FAILURE(errorCode)) { | |
2733 | log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode)); | |
2734 | return; | |
2735 | } | |
2736 | ||
b331163b | 2737 | for(i=0; i<UPRV_LENGTHOF(converterNames); ++i) { |
729e4ab9 A |
2738 | errorCode=U_ZERO_ERROR; |
2739 | cnv=ucnv_open(converterNames[i], &errorCode); | |
2740 | ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, | |
2741 | NULL, NULL, &errorCode); | |
2742 | if(U_FAILURE(errorCode)) { | |
2743 | log_data_err("unable to open %s converter - %s\n", | |
2744 | converterNames[i], u_errorName(errorCode)); | |
2745 | continue; | |
2746 | } | |
2747 | src=bad_utf8; | |
2748 | target=dest; | |
2749 | uprv_memset(dest, 9, sizeof(dest)); | |
b331163b | 2750 | if(i==UPRV_LENGTHOF(converterNames)-1) { |
729e4ab9 A |
2751 | /* conversion to UTF-8 yields two U+FFFD directly */ |
2752 | expected=twoFFFD; | |
2753 | expectedLength=6; | |
2754 | } else { | |
2755 | /* conversion to a non-Unicode charset yields two NCRs */ | |
2756 | expected=twoNCRs; | |
2757 | expectedLength=16; | |
2758 | } | |
2759 | pivotBuffer[0]=0; | |
2760 | pivotBuffer[1]=1; | |
2761 | pivotBuffer[2]=2; | |
2762 | pivotSource=pivotTarget=pivotBuffer; | |
2763 | ucnv_convertEx( | |
2764 | cnv, utf8Cnv, | |
2765 | &target, dest+expectedLength, | |
2766 | &src, bad_utf8+sizeof(bad_utf8), | |
b331163b | 2767 | pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+UPRV_LENGTHOF(pivotBuffer), |
729e4ab9 A |
2768 | TRUE, TRUE, &errorCode); |
2769 | if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || src!=bad_utf8+2 || | |
2770 | target!=dest+expectedLength || 0!=uprv_memcmp(dest, expected, expectedLength) || | |
2771 | dest[expectedLength]!=9 | |
2772 | ) { | |
2773 | log_err("ucnv_convertEx(UTF-8 C5 F0 -> %s/decimal NCRs) failed\n", converterNames[i]); | |
2774 | } | |
2775 | ucnv_close(cnv); | |
2776 | } | |
2777 | ucnv_close(utf8Cnv); | |
2778 | } | |
2779 | ||
b75a7d8f A |
2780 | static void |
2781 | TestConvertAlgorithmic() { | |
73c04bcf | 2782 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
2783 | static const uint8_t |
2784 | utf8[]={ | |
2785 | /* 4e00 30a1 ff61 0410 */ | |
2786 | 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90 | |
2787 | }, | |
2788 | shiftJIS[]={ | |
2789 | 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40 | |
2790 | }, | |
2791 | /*errorTarget[]={*/ | |
2792 | /* | |
2793 | * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS: | |
2794 | * SUB, SUB, 0x40, SUB, SUB, 0x40 | |
2795 | */ | |
2796 | /* 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40*/ | |
2797 | /*},*/ | |
2798 | utf16[]={ | |
2799 | 0xfe, 0xff /* BOM only, no text */ | |
b331163b A |
2800 | }; |
2801 | #if !UCONFIG_ONLY_HTML_CONVERSION | |
2802 | static const uint8_t utf32[]={ | |
b75a7d8f A |
2803 | 0xff, 0xfe, 0, 0 /* BOM only, no text */ |
2804 | }; | |
b331163b | 2805 | #endif |
b75a7d8f A |
2806 | |
2807 | char target[100], utf8NUL[100], shiftJISNUL[100]; | |
2808 | ||
2809 | UConverter *cnv; | |
2810 | UErrorCode errorCode; | |
2811 | ||
2812 | int32_t length; | |
2813 | ||
2814 | errorCode=U_ZERO_ERROR; | |
2815 | cnv=ucnv_open("Shift-JIS", &errorCode); | |
2816 | if(U_FAILURE(errorCode)) { | |
2817 | log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode)); | |
2818 | ucnv_close(cnv); | |
2819 | return; | |
2820 | } | |
2821 | ||
2822 | memcpy(utf8NUL, utf8, sizeof(utf8)); | |
2823 | utf8NUL[sizeof(utf8)]=0; | |
2824 | memcpy(shiftJISNUL, shiftJIS, sizeof(shiftJIS)); | |
2825 | shiftJISNUL[sizeof(shiftJIS)]=0; | |
2826 | ||
2827 | /* | |
2828 | * The to/from algorithmic convenience functions share a common implementation, | |
2829 | * so we need not test all permutations of them. | |
2830 | */ | |
2831 | ||
2832 | /* length in, not terminated out */ | |
2833 | errorCode=U_ZERO_ERROR; | |
2834 | length=ucnv_fromAlgorithmic(cnv, UCNV_UTF8, target, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8), &errorCode); | |
2835 | if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || | |
2836 | length!=sizeof(shiftJIS) || | |
2837 | memcmp(target, shiftJIS, length)!=0 | |
2838 | ) { | |
2839 | log_err("ucnv_fromAlgorithmic(UTF-8 -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect %d\n", | |
2840 | u_errorName(errorCode), length, sizeof(shiftJIS)); | |
2841 | } | |
2842 | ||
2843 | /* terminated in and out */ | |
2844 | memset(target, 0x55, sizeof(target)); | |
2845 | errorCode=U_STRING_NOT_TERMINATED_WARNING; | |
2846 | length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, -1, &errorCode); | |
2847 | if( errorCode!=U_ZERO_ERROR || | |
2848 | length!=sizeof(utf8) || | |
2849 | memcmp(target, utf8, length)!=0 | |
2850 | ) { | |
2851 | log_err("ucnv_toAlgorithmic(Shift-JIS -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect %d\n", | |
2852 | u_errorName(errorCode), length, sizeof(shiftJIS)); | |
2853 | } | |
2854 | ||
2855 | /* empty string, some target buffer */ | |
2856 | errorCode=U_STRING_NOT_TERMINATED_WARNING; | |
2857 | length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, 0, &errorCode); | |
2858 | if( errorCode!=U_ZERO_ERROR || | |
2859 | length!=0 | |
2860 | ) { | |
2861 | log_err("ucnv_toAlgorithmic(empty string -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect 0\n", | |
2862 | u_errorName(errorCode), length); | |
2863 | } | |
2864 | ||
2865 | /* pseudo-empty string, no target buffer */ | |
2866 | errorCode=U_ZERO_ERROR; | |
2867 | length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode); | |
2868 | if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || | |
2869 | length!=0 | |
2870 | ) { | |
2871 | log_err("ucnv_fromAlgorithmic(UTF-16 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n", | |
2872 | u_errorName(errorCode), length); | |
2873 | } | |
2874 | ||
b331163b | 2875 | #if !UCONFIG_ONLY_HTML_CONVERSION |
b75a7d8f A |
2876 | errorCode=U_ZERO_ERROR; |
2877 | length=ucnv_fromAlgorithmic(cnv, UCNV_UTF32, target, 0, (const char *)utf32, 4, &errorCode); | |
2878 | if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || | |
2879 | length!=0 | |
2880 | ) { | |
2881 | log_err("ucnv_fromAlgorithmic(UTF-32 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n", | |
2882 | u_errorName(errorCode), length); | |
2883 | } | |
b331163b | 2884 | #endif |
b75a7d8f A |
2885 | |
2886 | /* bad arguments */ | |
2887 | errorCode=U_MESSAGE_PARSE_ERROR; | |
2888 | length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode); | |
2889 | if(errorCode!=U_MESSAGE_PARSE_ERROR) { | |
2890 | log_err("ucnv_fromAlgorithmic(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode)); | |
2891 | } | |
2892 | ||
2893 | /* source==NULL */ | |
2894 | errorCode=U_ZERO_ERROR; | |
2895 | length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, NULL, 2, &errorCode); | |
2896 | if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
2897 | log_err("ucnv_fromAlgorithmic(source==NULL) sets %s\n", u_errorName(errorCode)); | |
2898 | } | |
2899 | ||
2900 | /* illegal alg. type */ | |
2901 | errorCode=U_ZERO_ERROR; | |
2902 | length=ucnv_fromAlgorithmic(cnv, (UConverterType)99, target, 0, (const char *)utf16, 2, &errorCode); | |
2903 | if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
2904 | log_err("ucnv_fromAlgorithmic(illegal alg. type) sets %s\n", u_errorName(errorCode)); | |
2905 | } | |
2906 | ucnv_close(cnv); | |
73c04bcf | 2907 | #endif |
b75a7d8f A |
2908 | } |
2909 | ||
51004dcb | 2910 | #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
2911 | static void TestLMBCSMaxChar(void) { |
2912 | static const struct { | |
2913 | int8_t maxSize; | |
2914 | const char *name; | |
2915 | } converter[] = { | |
374ca955 A |
2916 | /* some non-LMBCS converters - perfect test setup here */ |
2917 | { 1, "US-ASCII"}, | |
2918 | { 1, "ISO-8859-1"}, | |
2919 | ||
729e4ab9 A |
2920 | { 2, "UTF-16"}, |
2921 | { 2, "UTF-16BE"}, | |
374ca955 A |
2922 | { 3, "UTF-8"}, |
2923 | { 3, "CESU-8"}, | |
2924 | { 3, "SCSU"}, | |
2925 | { 4, "UTF-32"}, | |
2926 | { 4, "UTF-7"}, | |
2927 | { 4, "IMAP-mailbox-name"}, | |
2928 | { 4, "BOCU-1"}, | |
2929 | ||
2930 | { 1, "windows-1256"}, | |
2931 | { 2, "Shift-JIS"}, | |
2932 | { 2, "ibm-16684"}, | |
2933 | { 3, "ibm-930"}, | |
2934 | { 3, "ibm-1390"}, | |
2935 | { 4, "*test3"}, | |
2936 | { 16,"*test4"}, | |
2937 | ||
2938 | { 4, "ISCII"}, | |
2939 | { 4, "HZ"}, | |
2940 | ||
2941 | { 3, "ISO-2022"}, | |
2ca993e8 | 2942 | { 8, "ISO-2022-KR"}, |
374ca955 A |
2943 | { 6, "ISO-2022-JP"}, |
2944 | { 8, "ISO-2022-CN"}, | |
2945 | ||
2946 | /* LMBCS */ | |
2947 | { 3, "LMBCS-1"}, | |
2948 | { 3, "LMBCS-2"}, | |
2949 | { 3, "LMBCS-3"}, | |
2950 | { 3, "LMBCS-4"}, | |
2951 | { 3, "LMBCS-5"}, | |
2952 | { 3, "LMBCS-6"}, | |
2953 | { 3, "LMBCS-8"}, | |
2954 | { 3, "LMBCS-11"}, | |
2955 | { 3, "LMBCS-16"}, | |
2956 | { 3, "LMBCS-17"}, | |
2957 | { 3, "LMBCS-18"}, | |
2958 | { 3, "LMBCS-19"} | |
b75a7d8f A |
2959 | }; |
2960 | int32_t idx; | |
2961 | ||
b331163b | 2962 | for (idx = 0; idx < UPRV_LENGTHOF(converter); idx++) { |
b75a7d8f | 2963 | UErrorCode status = U_ZERO_ERROR; |
374ca955 | 2964 | UConverter *cnv = cnv_open(converter[idx].name, &status); |
b75a7d8f A |
2965 | if (U_FAILURE(status)) { |
2966 | continue; | |
2967 | } | |
2968 | if (converter[idx].maxSize != ucnv_getMaxCharSize(cnv)) { | |
374ca955 | 2969 | log_err("error: ucnv_getMaxCharSize(%s) expected %d, got %d\n", |
b75a7d8f A |
2970 | converter[idx].name, converter[idx].maxSize, ucnv_getMaxCharSize(cnv)); |
2971 | } | |
2972 | ucnv_close(cnv); | |
2973 | } | |
374ca955 A |
2974 | |
2975 | /* mostly test that the macro compiles */ | |
2976 | if(UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10) { | |
2977 | log_err("error UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10\n"); | |
2978 | } | |
b75a7d8f | 2979 | } |
51004dcb | 2980 | #endif |
b75a7d8f A |
2981 | |
2982 | static void TestJ1968(void) { | |
2983 | UErrorCode err = U_ZERO_ERROR; | |
2984 | UConverter *cnv; | |
2985 | char myConvName[] = "My really really really really really really really really really really really" | |
2986 | " really really really really really really really really really really really" | |
2987 | " really really really really really really really really long converter name"; | |
2988 | UChar myConvNameU[sizeof(myConvName)]; | |
2989 | ||
2990 | u_charsToUChars(myConvName, myConvNameU, sizeof(myConvName)); | |
2991 | ||
2992 | err = U_ZERO_ERROR; | |
2993 | myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH+1] = 0; | |
2994 | cnv = ucnv_openU(myConvNameU, &err); | |
2995 | if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { | |
2996 | log_err("1U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); | |
2997 | } | |
2998 | ||
2999 | err = U_ZERO_ERROR; | |
3000 | myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0; | |
3001 | cnv = ucnv_openU(myConvNameU, &err); | |
3002 | if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { | |
3003 | log_err("2U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); | |
3004 | } | |
3005 | ||
3006 | err = U_ZERO_ERROR; | |
3007 | myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0; | |
3008 | cnv = ucnv_openU(myConvNameU, &err); | |
3009 | if (cnv || err != U_FILE_ACCESS_ERROR) { | |
3010 | log_err("3U) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); | |
3011 | } | |
3012 | ||
3013 | ||
3014 | ||
3015 | ||
3016 | err = U_ZERO_ERROR; | |
3017 | cnv = ucnv_open(myConvName, &err); | |
3018 | if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { | |
3019 | log_err("1) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); | |
3020 | } | |
3021 | ||
3022 | err = U_ZERO_ERROR; | |
3023 | myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = ','; | |
3024 | cnv = ucnv_open(myConvName, &err); | |
3025 | if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { | |
3026 | log_err("2) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); | |
3027 | } | |
3028 | ||
3029 | err = U_ZERO_ERROR; | |
3030 | myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ','; | |
3031 | cnv = ucnv_open(myConvName, &err); | |
3032 | if (cnv || err != U_FILE_ACCESS_ERROR) { | |
3033 | log_err("3) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); | |
3034 | } | |
3035 | ||
3036 | err = U_ZERO_ERROR; | |
3037 | myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ','; | |
3038 | strncpy(myConvName + UCNV_MAX_CONVERTER_NAME_LENGTH, "locale=", 7); | |
3039 | cnv = ucnv_open(myConvName, &err); | |
3040 | if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { | |
3041 | log_err("4) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); | |
3042 | } | |
3043 | ||
3044 | /* The comma isn't really a part of the converter name. */ | |
3045 | err = U_ZERO_ERROR; | |
3046 | myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0; | |
3047 | cnv = ucnv_open(myConvName, &err); | |
3048 | if (cnv || err != U_FILE_ACCESS_ERROR) { | |
3049 | log_err("5) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); | |
3050 | } | |
3051 | ||
3052 | err = U_ZERO_ERROR; | |
3053 | myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ' '; | |
3054 | cnv = ucnv_open(myConvName, &err); | |
3055 | if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { | |
3056 | log_err("6) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); | |
3057 | } | |
3058 | ||
3059 | err = U_ZERO_ERROR; | |
3060 | myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0; | |
3061 | cnv = ucnv_open(myConvName, &err); | |
3062 | if (cnv || err != U_FILE_ACCESS_ERROR) { | |
3063 | log_err("7) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); | |
3064 | } | |
3065 | ||
3066 | } | |
3067 | ||
73c04bcf | 3068 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
3069 | static void |
3070 | testSwap(const char *name, UBool swap) { | |
3071 | /* | |
3072 | * Test Unicode text. | |
3073 | * Contains characters that are the highest for some of the | |
3074 | * tested conversions, to make sure that the ucnvmbcs.c code that modifies the | |
3075 | * tables copies the entire tables. | |
3076 | */ | |
3077 | static const UChar text[]={ | |
3078 | 0x61, 0xd, 0x62, 0xa, 0x4e00, 0x3000, 0xfffd, 0xa, 0x20, 0x85, 0xff5e, 0x7a | |
3079 | }; | |
3080 | ||
3081 | UChar uNormal[32], uSwapped[32]; | |
3082 | char normal[32], swapped[32]; | |
3083 | const UChar *pcu; | |
3084 | UChar *pu; | |
3085 | char *pc; | |
3086 | int32_t i, normalLength, swappedLength; | |
3087 | UChar u; | |
3088 | char c; | |
3089 | ||
3090 | const char *swappedName; | |
3091 | UConverter *cnv, *swapCnv; | |
3092 | UErrorCode errorCode; | |
3093 | ||
3094 | /* if the swap flag is FALSE, then the test encoding is not EBCDIC and must not swap */ | |
3095 | ||
3096 | /* open both the normal and the LF/NL-swapping converters */ | |
3097 | strcpy(swapped, name); | |
3098 | strcat(swapped, UCNV_SWAP_LFNL_OPTION_STRING); | |
3099 | ||
3100 | errorCode=U_ZERO_ERROR; | |
3101 | swapCnv=ucnv_open(swapped, &errorCode); | |
3102 | cnv=ucnv_open(name, &errorCode); | |
3103 | if(U_FAILURE(errorCode)) { | |
3104 | log_data_err("TestEBCDICSwapLFNL error: unable to open %s or %s (%s)\n", name, swapped, u_errorName(errorCode)); | |
3105 | goto cleanup; | |
3106 | } | |
3107 | ||
3108 | /* the name must contain the swap option if and only if we expect the converter to swap */ | |
3109 | swappedName=ucnv_getName(swapCnv, &errorCode); | |
3110 | if(U_FAILURE(errorCode)) { | |
3111 | log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl) failed (%s)\n", name, u_errorName(errorCode)); | |
3112 | goto cleanup; | |
3113 | } | |
3114 | ||
3115 | pc=strstr(swappedName, UCNV_SWAP_LFNL_OPTION_STRING); | |
3116 | if(swap != (pc!=NULL)) { | |
3117 | log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl)=%s should (%d) contain 'swaplfnl'\n", name, swappedName, swap); | |
3118 | goto cleanup; | |
3119 | } | |
3120 | ||
3121 | /* convert to EBCDIC */ | |
3122 | pcu=text; | |
3123 | pc=normal; | |
b331163b | 3124 | ucnv_fromUnicode(cnv, &pc, normal+UPRV_LENGTHOF(normal), &pcu, text+UPRV_LENGTHOF(text), NULL, TRUE, &errorCode); |
b75a7d8f A |
3125 | normalLength=(int32_t)(pc-normal); |
3126 | ||
3127 | pcu=text; | |
3128 | pc=swapped; | |
b331163b | 3129 | ucnv_fromUnicode(swapCnv, &pc, swapped+UPRV_LENGTHOF(swapped), &pcu, text+UPRV_LENGTHOF(text), NULL, TRUE, &errorCode); |
b75a7d8f A |
3130 | swappedLength=(int32_t)(pc-swapped); |
3131 | ||
3132 | if(U_FAILURE(errorCode)) { | |
3133 | log_err("TestEBCDICSwapLFNL error converting to %s - (%s)\n", name, u_errorName(errorCode)); | |
3134 | goto cleanup; | |
3135 | } | |
3136 | ||
3137 | /* compare EBCDIC output */ | |
3138 | if(normalLength!=swappedLength) { | |
3139 | log_err("TestEBCDICSwapLFNL error converting to %s - output lengths %d vs. %d\n", name, normalLength, swappedLength); | |
3140 | goto cleanup; | |
3141 | } | |
3142 | for(i=0; i<normalLength; ++i) { | |
3143 | /* swap EBCDIC LF/NL for comparison */ | |
3144 | c=normal[i]; | |
3145 | if(swap) { | |
3146 | if(c==0x15) { | |
3147 | c=0x25; | |
3148 | } else if(c==0x25) { | |
3149 | c=0x15; | |
3150 | } | |
3151 | } | |
3152 | ||
3153 | if(c!=swapped[i]) { | |
3154 | log_err("TestEBCDICSwapLFNL error converting to %s - did not swap properly, output[%d]=0x%02x\n", name, i, (uint8_t)swapped[i]); | |
3155 | goto cleanup; | |
3156 | } | |
3157 | } | |
3158 | ||
3159 | /* convert back to Unicode (may not roundtrip) */ | |
3160 | pc=normal; | |
3161 | pu=uNormal; | |
b331163b | 3162 | ucnv_toUnicode(cnv, &pu, uNormal+UPRV_LENGTHOF(uNormal), (const char **)&pc, normal+normalLength, NULL, TRUE, &errorCode); |
b75a7d8f A |
3163 | normalLength=(int32_t)(pu-uNormal); |
3164 | ||
3165 | pc=normal; | |
3166 | pu=uSwapped; | |
b331163b | 3167 | ucnv_toUnicode(swapCnv, &pu, uSwapped+UPRV_LENGTHOF(uSwapped), (const char **)&pc, normal+swappedLength, NULL, TRUE, &errorCode); |
b75a7d8f A |
3168 | swappedLength=(int32_t)(pu-uSwapped); |
3169 | ||
3170 | if(U_FAILURE(errorCode)) { | |
3171 | log_err("TestEBCDICSwapLFNL error converting from %s - (%s)\n", name, u_errorName(errorCode)); | |
3172 | goto cleanup; | |
3173 | } | |
3174 | ||
3175 | /* compare EBCDIC output */ | |
3176 | if(normalLength!=swappedLength) { | |
3177 | log_err("TestEBCDICSwapLFNL error converting from %s - output lengths %d vs. %d\n", name, normalLength, swappedLength); | |
3178 | goto cleanup; | |
3179 | } | |
3180 | for(i=0; i<normalLength; ++i) { | |
3181 | /* swap EBCDIC LF/NL for comparison */ | |
3182 | u=uNormal[i]; | |
3183 | if(swap) { | |
3184 | if(u==0xa) { | |
3185 | u=0x85; | |
3186 | } else if(u==0x85) { | |
3187 | u=0xa; | |
3188 | } | |
3189 | } | |
3190 | ||
3191 | if(u!=uSwapped[i]) { | |
3192 | log_err("TestEBCDICSwapLFNL error converting from %s - did not swap properly, output[%d]=U+%04x\n", name, i, uSwapped[i]); | |
3193 | goto cleanup; | |
3194 | } | |
3195 | } | |
3196 | ||
3197 | /* clean up */ | |
3198 | cleanup: | |
3199 | ucnv_close(cnv); | |
3200 | ucnv_close(swapCnv); | |
3201 | } | |
3202 | ||
3203 | static void | |
3204 | TestEBCDICSwapLFNL() { | |
3205 | static const struct { | |
3206 | const char *name; | |
3207 | UBool swap; | |
3208 | } tests[]={ | |
3209 | { "ibm-37", TRUE }, | |
3210 | { "ibm-1047", TRUE }, | |
3211 | { "ibm-1140", TRUE }, | |
3212 | { "ibm-930", TRUE }, | |
3213 | { "iso-8859-3", FALSE } | |
3214 | }; | |
3215 | ||
3216 | int i; | |
3217 | ||
b331163b | 3218 | for(i=0; i<UPRV_LENGTHOF(tests); ++i) { |
b75a7d8f A |
3219 | testSwap(tests[i].name, tests[i].swap); |
3220 | } | |
3221 | } | |
73c04bcf A |
3222 | #else |
3223 | static void | |
3224 | TestEBCDICSwapLFNL() { | |
3225 | /* test nothing... */ | |
3226 | } | |
3227 | #endif | |
3228 | ||
73c04bcf A |
3229 | static void TestFromUCountPending(){ |
3230 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
3231 | UErrorCode status = U_ZERO_ERROR; | |
3232 | /* const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; */ | |
3233 | static const struct { | |
3234 | UChar input[6]; | |
3235 | int32_t len; | |
3236 | int32_t exp; | |
3237 | }fromUnicodeTests[] = { | |
3238 | /*m:n conversion*/ | |
3239 | {{0xdbc4},1,1}, | |
3240 | {{ 0xdbc4, 0xde34, 0xd84d},3,1}, | |
3241 | {{ 0xdbc4, 0xde34, 0xd900},3,3}, | |
3242 | }; | |
3243 | int i; | |
3244 | UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status); | |
3245 | if(U_FAILURE(status)){ | |
46f4442e | 3246 | log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); |
73c04bcf A |
3247 | return; |
3248 | } | |
b331163b | 3249 | for(i=0; i<UPRV_LENGTHOF(fromUnicodeTests); ++i) { |
73c04bcf A |
3250 | char tgt[10]; |
3251 | char* target = tgt; | |
3252 | char* targetLimit = target + 10; | |
3253 | const UChar* source = fromUnicodeTests[i].input; | |
3254 | const UChar* sourceLimit = source + fromUnicodeTests[i].len; | |
3255 | int32_t len = 0; | |
3256 | ucnv_reset(cnv); | |
3257 | ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); | |
3258 | len = ucnv_fromUCountPending(cnv, &status); | |
3259 | if(U_FAILURE(status)){ | |
3260 | log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); | |
3261 | status = U_ZERO_ERROR; | |
3262 | continue; | |
3263 | } | |
3264 | if(len != fromUnicodeTests[i].exp){ | |
3265 | log_err("Did not get the expeced output for ucnv_fromUInputConsumed.\n"); | |
3266 | } | |
3267 | } | |
3268 | status = U_ZERO_ERROR; | |
3269 | { | |
3270 | /* | |
3271 | * The converter has to read the tail before it knows that | |
3272 | * only head alone matches. | |
3273 | * At the end, the output for head will overflow the target, | |
3274 | * middle will be pending, and tail will not have been consumed. | |
3275 | */ | |
3276 | /* | |
3277 | \U00101234 -> x (<U101234> \x07 |0) | |
3278 | \U00101234\U00050005 -> y (<U101234>+<U50005> \x07+\x00+\x01\x02\x0e+\x05 |0) | |
3279 | \U00101234\U00050005\U00060006 -> z (<U101234>+<U50005>+<U60006> \x07+\x00+\x01\x02\x0f+\x09 |0) | |
3280 | \U00060007 -> unassigned | |
3281 | */ | |
3282 | static const UChar head[] = {0xDBC4,0xDE34,0xD900,0xDC05,0x0000};/* \U00101234\U00050005 */ | |
3283 | static const UChar middle[] = {0xD940,0x0000}; /* first half of \U00060006 or \U00060007 */ | |
3284 | static const UChar tail[] = {0xDC07,0x0000};/* second half of \U00060007 */ | |
3285 | char tgt[10]; | |
3286 | char* target = tgt; | |
3287 | char* targetLimit = target + 2; /* expect overflow from converting \U00101234\U00050005 */ | |
3288 | const UChar* source = head; | |
3289 | const UChar* sourceLimit = source + u_strlen(head); | |
3290 | int32_t len = 0; | |
3291 | ucnv_reset(cnv); | |
3292 | ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); | |
3293 | len = ucnv_fromUCountPending(cnv, &status); | |
3294 | if(U_FAILURE(status)){ | |
3295 | log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); | |
3296 | status = U_ZERO_ERROR; | |
3297 | } | |
3298 | if(len!=4){ | |
3299 | log_err("ucnv_fromUInputHeld did not return correct length for head\n"); | |
3300 | } | |
3301 | source = middle; | |
3302 | sourceLimit = source + u_strlen(middle); | |
3303 | ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); | |
3304 | len = ucnv_fromUCountPending(cnv, &status); | |
3305 | if(U_FAILURE(status)){ | |
3306 | log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); | |
3307 | status = U_ZERO_ERROR; | |
3308 | } | |
3309 | if(len!=5){ | |
3310 | log_err("ucnv_fromUInputHeld did not return correct length for middle\n"); | |
3311 | } | |
3312 | source = tail; | |
3313 | sourceLimit = source + u_strlen(tail); | |
3314 | ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); | |
3315 | if(status != U_BUFFER_OVERFLOW_ERROR){ | |
3316 | log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); | |
3317 | } | |
3318 | status = U_ZERO_ERROR; | |
3319 | len = ucnv_fromUCountPending(cnv, &status); | |
3320 | /* middle[1] is pending, tail has not been consumed */ | |
3321 | if(U_FAILURE(status)){ | |
3322 | log_err("ucnv_fromUInputHeld call did not succeed. Error: %s\n", u_errorName(status)); | |
3323 | } | |
3324 | if(len!=1){ | |
3325 | log_err("ucnv_fromUInputHeld did not return correct length for tail\n"); | |
3326 | } | |
3327 | } | |
3328 | ucnv_close(cnv); | |
3329 | #endif | |
3330 | } | |
3331 | ||
3332 | static void | |
3333 | TestToUCountPending(){ | |
3334 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
3335 | UErrorCode status = U_ZERO_ERROR; | |
3336 | static const struct { | |
3337 | char input[6]; | |
3338 | int32_t len; | |
3339 | int32_t exp; | |
3340 | }toUnicodeTests[] = { | |
3341 | /*m:n conversion*/ | |
3342 | {{0x05, 0x01, 0x02},3,3}, | |
3343 | {{0x01, 0x02},2,2}, | |
3344 | {{0x07, 0x00, 0x01, 0x02},4,4}, | |
3345 | }; | |
3346 | ||
3347 | int i; | |
3348 | UConverterToUCallback *oldToUAction= NULL; | |
3349 | UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status); | |
3350 | if(U_FAILURE(status)){ | |
46f4442e | 3351 | log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); |
73c04bcf A |
3352 | return; |
3353 | } | |
3354 | ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status); | |
b331163b | 3355 | for(i=0; i<UPRV_LENGTHOF(toUnicodeTests); ++i) { |
4388f060 | 3356 | UChar tgt[20]; |
73c04bcf A |
3357 | UChar* target = tgt; |
3358 | UChar* targetLimit = target + 20; | |
3359 | const char* source = toUnicodeTests[i].input; | |
3360 | const char* sourceLimit = source + toUnicodeTests[i].len; | |
3361 | int32_t len = 0; | |
3362 | ucnv_reset(cnv); | |
4388f060 | 3363 | ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); |
73c04bcf A |
3364 | len = ucnv_toUCountPending(cnv,&status); |
3365 | if(U_FAILURE(status)){ | |
3366 | log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); | |
3367 | status = U_ZERO_ERROR; | |
3368 | continue; | |
3369 | } | |
3370 | if(len != toUnicodeTests[i].exp){ | |
3371 | log_err("Did not get the expeced output for ucnv_toUInputConsumed.\n"); | |
3372 | } | |
3373 | } | |
3374 | status = U_ZERO_ERROR; | |
3375 | ucnv_close(cnv); | |
3376 | ||
3377 | { | |
3378 | /* | |
3379 | * The converter has to read the tail before it knows that | |
3380 | * only head alone matches. | |
3381 | * At the end, the output for head will overflow the target, | |
3382 | * mid will be pending, and tail will not have been consumed. | |
3383 | */ | |
3384 | char head[] = { 0x01, 0x02, 0x03, 0x0a , 0x00}; | |
3385 | char mid[] = { 0x01, 0x02, 0x03, 0x0b, 0x00 }; | |
3386 | char tail[] = { 0x01, 0x02, 0x03, 0x0d, 0x00 }; | |
3387 | /* | |
3388 | 0x01, 0x02, 0x03, 0x0a -> x (<U23456> \x01\x02\x03\x0a |0) | |
3389 | 0x01, 0x02, 0x03, 0x0b -> y (<U000b> \x01\x02\x03\x0b |0) | |
3390 | 0x01, 0x02, 0x03, 0x0d -> z (<U34567> \x01\x02\x03\x0d |3) | |
3391 | 0x01, 0x02, 0x03, 0x0a + 0x01, 0x02, 0x03, 0x0b + 0x01 + many more -> z (see test4 "many bytes, and bytes per UChar") | |
3392 | */ | |
3393 | UChar tgt[10]; | |
3394 | UChar* target = tgt; | |
3395 | UChar* targetLimit = target + 1; /* expect overflow from converting */ | |
3396 | const char* source = head; | |
3397 | const char* sourceLimit = source + strlen(head); | |
3398 | int32_t len = 0; | |
3399 | cnv = ucnv_openPackage(loadTestData(&status), "test4", &status); | |
3400 | if(U_FAILURE(status)){ | |
3401 | log_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); | |
3402 | return; | |
3403 | } | |
3404 | ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status); | |
3405 | ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); | |
3406 | len = ucnv_toUCountPending(cnv,&status); | |
3407 | if(U_FAILURE(status)){ | |
3408 | log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); | |
3409 | } | |
3410 | if(len != 4){ | |
3411 | log_err("Did not get the expected len for head.\n"); | |
3412 | } | |
3413 | source=mid; | |
3414 | sourceLimit = source+strlen(mid); | |
3415 | ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); | |
3416 | len = ucnv_toUCountPending(cnv,&status); | |
3417 | if(U_FAILURE(status)){ | |
3418 | log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); | |
3419 | } | |
3420 | if(len != 8){ | |
3421 | log_err("Did not get the expected len for mid.\n"); | |
3422 | } | |
3423 | ||
3424 | source=tail; | |
3425 | sourceLimit = source+strlen(tail); | |
3426 | targetLimit = target; | |
3427 | ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); | |
3428 | if(status != U_BUFFER_OVERFLOW_ERROR){ | |
3429 | log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); | |
3430 | } | |
3431 | status = U_ZERO_ERROR; | |
3432 | len = ucnv_toUCountPending(cnv,&status); | |
3433 | /* mid[4] is pending, tail has not been consumed */ | |
3434 | if(U_FAILURE(status)){ | |
3435 | log_err("ucnv_toUCountPending call did not succeed. Error: %s\n", u_errorName(status)); | |
3436 | } | |
3437 | if(len != 4){ | |
3438 | log_err("Did not get the expected len for tail.\n"); | |
3439 | } | |
3440 | ucnv_close(cnv); | |
3441 | } | |
3442 | #endif | |
3443 | } | |
3444 | ||
729e4ab9 | 3445 | static void TestOneDefaultNameChange(const char *name, const char *expected) { |
73c04bcf A |
3446 | UErrorCode status = U_ZERO_ERROR; |
3447 | UConverter *cnv; | |
3448 | ucnv_setDefaultName(name); | |
729e4ab9 | 3449 | if(strcmp(ucnv_getDefaultName(), expected)==0) |
73c04bcf A |
3450 | log_verbose("setDefaultName of %s works.\n", name); |
3451 | else | |
3452 | log_err("setDefaultName of %s failed\n", name); | |
3453 | cnv=ucnv_open(NULL, &status); | |
3454 | if (U_FAILURE(status) || cnv == NULL) { | |
3455 | log_err("opening the default converter of %s failed\n", name); | |
3456 | return; | |
3457 | } | |
729e4ab9 | 3458 | if(strcmp(ucnv_getName(cnv, &status), expected)==0) |
73c04bcf A |
3459 | log_verbose("ucnv_getName of %s works.\n", name); |
3460 | else | |
3461 | log_err("ucnv_getName of %s failed\n", name); | |
3462 | ucnv_close(cnv); | |
3463 | } | |
3464 | ||
3465 | static void TestDefaultName(void) { | |
3466 | /*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/ | |
3467 | static char defaultName[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; | |
3468 | strcpy(defaultName, ucnv_getDefaultName()); | |
3469 | ||
3470 | log_verbose("getDefaultName returned %s\n", defaultName); | |
3471 | ||
3472 | /*change the default name by setting it */ | |
729e4ab9 A |
3473 | TestOneDefaultNameChange("UTF-8", "UTF-8"); |
3474 | #if U_CHARSET_IS_UTF8 | |
3475 | TestOneDefaultNameChange("ISCII,version=1", "UTF-8"); | |
3476 | TestOneDefaultNameChange("ISCII,version=2", "UTF-8"); | |
3477 | TestOneDefaultNameChange("ISO-8859-1", "UTF-8"); | |
3478 | #else | |
b331163b | 3479 | # if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION |
729e4ab9 A |
3480 | TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1"); |
3481 | TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2"); | |
3482 | # endif | |
3483 | TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1"); | |
73c04bcf | 3484 | #endif |
73c04bcf A |
3485 | |
3486 | /*set the default name back*/ | |
3487 | ucnv_setDefaultName(defaultName); | |
3488 | } | |
3489 | ||
3490 | /* Test that ucnv_compareNames() matches names according to spec. ----------- */ | |
3491 | ||
4388f060 | 3492 | static int |
73c04bcf A |
3493 | sign(int n) { |
3494 | if(n==0) { | |
3495 | return 0; | |
3496 | } else if(n<0) { | |
3497 | return -1; | |
3498 | } else /* n>0 */ { | |
3499 | return 1; | |
3500 | } | |
3501 | } | |
3502 | ||
3503 | static void | |
3504 | compareNames(const char **names) { | |
3505 | const char *relation, *name1, *name2; | |
3506 | int rel, result; | |
3507 | ||
3508 | relation=*names++; | |
3509 | if(*relation=='=') { | |
3510 | rel = 0; | |
3511 | } else if(*relation=='<') { | |
3512 | rel = -1; | |
3513 | } else { | |
3514 | rel = 1; | |
3515 | } | |
3516 | ||
3517 | name1=*names++; | |
3518 | if(name1==NULL) { | |
3519 | return; | |
3520 | } | |
3521 | while((name2=*names++)!=NULL) { | |
3522 | result=ucnv_compareNames(name1, name2); | |
3523 | if(sign(result)!=rel) { | |
3524 | log_err("ucnv_compareNames(\"%s\", \"%s\")=%d, sign!=%d\n", name1, name2, result, rel); | |
3525 | } | |
3526 | name1=name2; | |
3527 | } | |
3528 | } | |
3529 | ||
3530 | static void | |
3531 | TestCompareNames() { | |
3532 | static const char *equalUTF8[]={ "=", "UTF-8", "utf_8", "u*T@f08", "Utf 8", NULL }; | |
3533 | static const char *equalIBM[]={ "=", "ibm-37", "IBM037", "i-B-m 00037", "ibm-0037", "IBM00037", NULL }; | |
3534 | static const char *lessMac[]={ "<", "macos-0_1-10.2", "macos-1-10.0.2", "macos-1-10.2", NULL }; | |
3535 | static const char *lessUTF080[]={ "<", "UTF-0008", "utf$080", "u*T@f0800", "Utf 0000000009", NULL }; | |
3536 | ||
3537 | compareNames(equalUTF8); | |
3538 | compareNames(equalIBM); | |
3539 | compareNames(lessMac); | |
3540 | compareNames(lessUTF080); | |
3541 | } | |
3542 | ||
3543 | static void | |
3544 | TestSubstString() { | |
3545 | static const UChar surrogate[1]={ 0xd900 }; | |
3546 | char buffer[16]; | |
3547 | ||
3548 | static const UChar sub[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 }; | |
3549 | static const char subChars[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 }; | |
3550 | UConverter *cnv; | |
3551 | UErrorCode errorCode; | |
3552 | int32_t length; | |
3553 | int8_t len8; | |
3554 | ||
3555 | /* UTF-16/32: test that the BOM is output before the sub character */ | |
3556 | errorCode=U_ZERO_ERROR; | |
3557 | cnv=ucnv_open("UTF-16", &errorCode); | |
3558 | if(U_FAILURE(errorCode)) { | |
729e4ab9 | 3559 | log_data_err("ucnv_open(UTF-16) failed - %s\n", u_errorName(errorCode)); |
73c04bcf A |
3560 | return; |
3561 | } | |
3562 | length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode); | |
3563 | ucnv_close(cnv); | |
3564 | if(U_FAILURE(errorCode) || | |
3565 | length!=4 || | |
3566 | NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode) | |
3567 | ) { | |
3568 | log_err("ucnv_fromUChars(UTF-16, U+D900) did not write a BOM\n"); | |
3569 | } | |
3570 | ||
3571 | errorCode=U_ZERO_ERROR; | |
3572 | cnv=ucnv_open("UTF-32", &errorCode); | |
3573 | if(U_FAILURE(errorCode)) { | |
729e4ab9 | 3574 | log_data_err("ucnv_open(UTF-32) failed - %s\n", u_errorName(errorCode)); |
73c04bcf A |
3575 | return; |
3576 | } | |
3577 | length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode); | |
3578 | ucnv_close(cnv); | |
3579 | if(U_FAILURE(errorCode) || | |
3580 | length!=8 || | |
3581 | NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode) | |
3582 | ) { | |
3583 | log_err("ucnv_fromUChars(UTF-32, U+D900) did not write a BOM\n"); | |
3584 | } | |
3585 | ||
3586 | /* Simple API test of ucnv_setSubstString() + ucnv_getSubstChars(). */ | |
3587 | errorCode=U_ZERO_ERROR; | |
3588 | cnv=ucnv_open("ISO-8859-1", &errorCode); | |
3589 | if(U_FAILURE(errorCode)) { | |
729e4ab9 | 3590 | log_data_err("ucnv_open(ISO-8859-1) failed - %s\n", u_errorName(errorCode)); |
73c04bcf A |
3591 | return; |
3592 | } | |
b331163b | 3593 | ucnv_setSubstString(cnv, sub, UPRV_LENGTHOF(sub), &errorCode); |
73c04bcf A |
3594 | if(U_FAILURE(errorCode)) { |
3595 | log_err("ucnv_setSubstString(ISO-8859-1, sub[5]) failed - %s\n", u_errorName(errorCode)); | |
3596 | } else { | |
3597 | len8 = sizeof(buffer); | |
3598 | ucnv_getSubstChars(cnv, buffer, &len8, &errorCode); | |
3599 | /* Stateless converter, we expect the string converted to charset bytes. */ | |
3600 | if(U_FAILURE(errorCode) || len8!=sizeof(subChars) || 0!=uprv_memcmp(buffer, subChars, len8)) { | |
3601 | log_err("ucnv_getSubstChars(ucnv_setSubstString(ISO-8859-1, sub[5])) failed - %s\n", u_errorName(errorCode)); | |
3602 | } | |
3603 | } | |
3604 | ucnv_close(cnv); | |
3605 | ||
3606 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
3607 | errorCode=U_ZERO_ERROR; | |
3608 | cnv=ucnv_open("HZ", &errorCode); | |
3609 | if(U_FAILURE(errorCode)) { | |
729e4ab9 | 3610 | log_data_err("ucnv_open(HZ) failed - %s\n", u_errorName(errorCode)); |
73c04bcf A |
3611 | return; |
3612 | } | |
b331163b | 3613 | ucnv_setSubstString(cnv, sub, UPRV_LENGTHOF(sub), &errorCode); |
73c04bcf A |
3614 | if(U_FAILURE(errorCode)) { |
3615 | log_err("ucnv_setSubstString(HZ, sub[5]) failed - %s\n", u_errorName(errorCode)); | |
3616 | } else { | |
3617 | len8 = sizeof(buffer); | |
3618 | ucnv_getSubstChars(cnv, buffer, &len8, &errorCode); | |
3619 | /* Stateful converter, we expect that the Unicode string was set and that we get an empty char * string now. */ | |
3620 | if(U_FAILURE(errorCode) || len8!=0) { | |
3621 | log_err("ucnv_getSubstChars(ucnv_setSubstString(HZ, sub[5])) failed - %s\n", u_errorName(errorCode)); | |
3622 | } | |
3623 | } | |
3624 | ucnv_close(cnv); | |
3625 | #endif | |
3626 | /* | |
3627 | * Further testing of ucnv_setSubstString() is done via intltest convert. | |
3628 | * We do not test edge cases of illegal arguments and similar because the | |
3629 | * function implementation uses all of its parameters in calls to other | |
3630 | * functions with UErrorCode parameters. | |
3631 | */ | |
3632 | } | |
46f4442e A |
3633 | |
3634 | static void | |
3635 | InvalidArguments() { | |
3636 | UConverter *cnv; | |
3637 | UErrorCode errorCode; | |
3638 | char charBuffer[2] = {1, 1}; | |
3639 | char ucharAsCharBuffer[2] = {2, 2}; | |
3640 | char *charsPtr = charBuffer; | |
3641 | UChar *ucharsPtr = (UChar *)ucharAsCharBuffer; | |
3642 | UChar *ucharsBadPtr = (UChar *)(ucharAsCharBuffer + 1); | |
3643 | ||
3644 | errorCode=U_ZERO_ERROR; | |
3645 | cnv=ucnv_open("UTF-8", &errorCode); | |
3646 | if(U_FAILURE(errorCode)) { | |
3647 | log_err("ucnv_open() failed - %s\n", u_errorName(errorCode)); | |
3648 | return; | |
3649 | } | |
3650 | ||
3651 | errorCode=U_ZERO_ERROR; | |
3652 | /* This one should fail because an incomplete UChar is being passed in */ | |
3653 | ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsPtr, ucharsBadPtr, NULL, TRUE, &errorCode); | |
3654 | if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { | |
3655 | log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode)); | |
3656 | } | |
3657 | ||
3658 | errorCode=U_ZERO_ERROR; | |
3659 | /* This one should fail because ucharsBadPtr is > than ucharsPtr */ | |
3660 | ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsBadPtr, ucharsPtr, NULL, TRUE, &errorCode); | |
3661 | if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { | |
3662 | log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode)); | |
3663 | } | |
3664 | ||
3665 | errorCode=U_ZERO_ERROR; | |
3666 | /* This one should fail because an incomplete UChar is being passed in */ | |
3667 | ucnv_toUnicode(cnv, &ucharsPtr, ucharsBadPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode); | |
3668 | if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { | |
3669 | log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode)); | |
3670 | } | |
3671 | ||
3672 | errorCode=U_ZERO_ERROR; | |
3673 | /* This one should fail because ucharsBadPtr is > than ucharsPtr */ | |
3674 | ucnv_toUnicode(cnv, &ucharsBadPtr, ucharsPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode); | |
3675 | if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { | |
3676 | log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode)); | |
3677 | } | |
3678 | ||
3679 | if (charBuffer[0] != 1 || charBuffer[1] != 1 | |
3680 | || ucharAsCharBuffer[0] != 2 || ucharAsCharBuffer[1] != 2) | |
3681 | { | |
3682 | log_err("Data was incorrectly written to buffers\n"); | |
3683 | } | |
3684 | ||
3685 | ucnv_close(cnv); | |
3686 | } | |
3687 | ||
729e4ab9 A |
3688 | static void TestGetName() { |
3689 | static const char *const names[] = { | |
3690 | "Unicode", "UTF-16", | |
3691 | "UnicodeBigUnmarked", "UTF-16BE", | |
3692 | "UnicodeBig", "UTF-16BE,version=1", | |
3693 | "UnicodeLittleUnmarked", "UTF-16LE", | |
3694 | "UnicodeLittle", "UTF-16LE,version=1", | |
3695 | "x-UTF-16LE-BOM", "UTF-16LE,version=1" | |
3696 | }; | |
3697 | int32_t i; | |
b331163b | 3698 | for(i = 0; i < UPRV_LENGTHOF(names); i += 2) { |
729e4ab9 A |
3699 | UErrorCode errorCode = U_ZERO_ERROR; |
3700 | UConverter *cnv = ucnv_open(names[i], &errorCode); | |
3701 | if(U_SUCCESS(errorCode)) { | |
3702 | const char *name = ucnv_getName(cnv, &errorCode); | |
3703 | if(U_FAILURE(errorCode) || 0 != strcmp(name, names[i+1])) { | |
3704 | log_err("ucnv_getName(%s) = %s != %s -- %s\n", | |
3705 | names[i], name, names[i+1], u_errorName(errorCode)); | |
3706 | } | |
3707 | ucnv_close(cnv); | |
3708 | } | |
3709 | } | |
3710 | } | |
3711 | ||
3712 | static void TestUTFBOM() { | |
3713 | static const UChar a16[] = { 0x61 }; | |
3714 | static const char *const names[] = { | |
3715 | "UTF-16", | |
3716 | "UTF-16,version=1", | |
3717 | "UTF-16BE", | |
3718 | "UnicodeBig", | |
3719 | "UTF-16LE", | |
3720 | "UnicodeLittle" | |
3721 | }; | |
3722 | static const uint8_t expected[][5] = { | |
3723 | #if U_IS_BIG_ENDIAN | |
3724 | { 4, 0xfe, 0xff, 0, 0x61 }, | |
3725 | { 4, 0xfe, 0xff, 0, 0x61 }, | |
3726 | #else | |
3727 | { 4, 0xff, 0xfe, 0x61, 0 }, | |
3728 | { 4, 0xff, 0xfe, 0x61, 0 }, | |
3729 | #endif | |
3730 | ||
3731 | { 2, 0, 0x61 }, | |
3732 | { 4, 0xfe, 0xff, 0, 0x61 }, | |
3733 | ||
3734 | { 2, 0x61, 0 }, | |
3735 | { 4, 0xff, 0xfe, 0x61, 0 } | |
3736 | }; | |
46f4442e | 3737 | |
729e4ab9 A |
3738 | char bytes[10]; |
3739 | int32_t i; | |
3740 | ||
b331163b | 3741 | for(i = 0; i < UPRV_LENGTHOF(names); ++i) { |
729e4ab9 A |
3742 | UErrorCode errorCode = U_ZERO_ERROR; |
3743 | UConverter *cnv = ucnv_open(names[i], &errorCode); | |
3744 | int32_t length = 0; | |
3745 | const uint8_t *exp = expected[i]; | |
3746 | if (U_FAILURE(errorCode)) { | |
3747 | log_err_status(errorCode, "Unable to open converter: %s got error code: %s\n", names[i], u_errorName(errorCode)); | |
3748 | continue; | |
3749 | } | |
3750 | length = ucnv_fromUChars(cnv, bytes, (int32_t)sizeof(bytes), a16, 1, &errorCode); | |
3751 | ||
3752 | if(U_FAILURE(errorCode) || length != exp[0] || 0 != memcmp(bytes, exp+1, length)) { | |
3753 | log_err("unexpected %s BOM writing behavior -- %s\n", | |
3754 | names[i], u_errorName(errorCode)); | |
3755 | } | |
3756 | ucnv_close(cnv); | |
3757 | } | |
3758 | } |