]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
b75a7d8f A |
3 | /* |
4 | ******************************************************************** | |
374ca955 | 5 | * COPYRIGHT: |
2ca993e8 | 6 | * Copyright (c) 1996-2016, International Business Machines Corporation and |
b75a7d8f A |
7 | * others. All Rights Reserved. |
8 | ******************************************************************** | |
9 | * | |
b331163b | 10 | * ucnv_bld.cpp: |
b75a7d8f A |
11 | * |
12 | * Defines functions that are used in the creation/initialization/deletion | |
13 | * of converters and related structures. | |
14 | * uses uconv_io.h routines to access disk information | |
15 | * is used by ucnv.h to implement public API create/delete/flushCache routines | |
16 | * Modification History: | |
374ca955 | 17 | * |
b75a7d8f | 18 | * Date Name Description |
374ca955 | 19 | * |
b75a7d8f A |
20 | * 06/20/2000 helena OS/400 port changes; mostly typecast. |
21 | * 06/29/2000 helena Major rewrite of the callback interface. | |
22 | */ | |
23 | ||
374ca955 A |
24 | #include "unicode/utypes.h" |
25 | ||
26 | #if !UCONFIG_NO_CONVERSION | |
b75a7d8f | 27 | |
73c04bcf | 28 | #include "unicode/putil.h" |
b75a7d8f A |
29 | #include "unicode/udata.h" |
30 | #include "unicode/ucnv.h" | |
b75a7d8f | 31 | #include "unicode/uloc.h" |
57a6839d | 32 | #include "mutex.h" |
4388f060 | 33 | #include "putilimp.h" |
57a6839d | 34 | #include "uassert.h" |
374ca955 | 35 | #include "utracimp.h" |
b75a7d8f A |
36 | #include "ucnv_io.h" |
37 | #include "ucnv_bld.h" | |
374ca955 A |
38 | #include "ucnvmbcs.h" |
39 | #include "ucnv_ext.h" | |
b75a7d8f A |
40 | #include "ucnv_cnv.h" |
41 | #include "ucnv_imp.h" | |
42 | #include "uhash.h" | |
43 | #include "umutex.h" | |
44 | #include "cstring.h" | |
45 | #include "cmemory.h" | |
46 | #include "ucln_cmn.h" | |
374ca955 | 47 | #include "ustr_cnv.h" |
b75a7d8f A |
48 | |
49 | ||
b75a7d8f A |
50 | #if 0 |
51 | #include <stdio.h> | |
52 | extern void UCNV_DEBUG_LOG(char *what, char *who, void *p, int l); | |
53 | #define UCNV_DEBUG_LOG(x,y,z) UCNV_DEBUG_LOG(x,y,z,__LINE__) | |
54 | #else | |
55 | # define UCNV_DEBUG_LOG(x,y,z) | |
56 | #endif | |
57 | ||
58 | static const UConverterSharedData * const | |
59 | converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={ | |
60 | NULL, NULL, | |
61 | ||
62 | #if UCONFIG_NO_LEGACY_CONVERSION | |
63 | NULL, | |
64 | #else | |
65 | &_MBCSData, | |
66 | #endif | |
67 | ||
68 | &_Latin1Data, | |
b331163b A |
69 | &_UTF8Data, &_UTF16BEData, &_UTF16LEData, |
70 | #if UCONFIG_ONLY_HTML_CONVERSION | |
71 | NULL, NULL, | |
72 | #else | |
73 | &_UTF32BEData, &_UTF32LEData, | |
74 | #endif | |
b75a7d8f A |
75 | NULL, |
76 | ||
77 | #if UCONFIG_NO_LEGACY_CONVERSION | |
78 | NULL, | |
b331163b A |
79 | #else |
80 | &_ISO2022Data, | |
81 | #endif | |
82 | ||
83 | #if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION | |
b75a7d8f A |
84 | NULL, NULL, NULL, NULL, NULL, NULL, |
85 | NULL, NULL, NULL, NULL, NULL, NULL, | |
86 | NULL, | |
87 | #else | |
b75a7d8f A |
88 | &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6, |
89 | &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19, | |
90 | &_HZData, | |
91 | #endif | |
92 | ||
b331163b A |
93 | #if UCONFIG_ONLY_HTML_CONVERSION |
94 | NULL, | |
95 | #else | |
b75a7d8f | 96 | &_SCSUData, |
b331163b | 97 | #endif |
b75a7d8f | 98 | |
b331163b A |
99 | |
100 | #if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION | |
b75a7d8f A |
101 | NULL, |
102 | #else | |
103 | &_ISCIIData, | |
104 | #endif | |
105 | ||
106 | &_ASCIIData, | |
b331163b A |
107 | #if UCONFIG_ONLY_HTML_CONVERSION |
108 | NULL, NULL, &_UTF16Data, NULL, NULL, NULL, | |
109 | #else | |
4388f060 | 110 | &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData, |
b331163b | 111 | #endif |
4388f060 | 112 | |
b331163b | 113 | #if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION |
4388f060 A |
114 | NULL, |
115 | #else | |
116 | &_CompoundTextData | |
117 | #endif | |
b75a7d8f A |
118 | }; |
119 | ||
120 | /* Please keep this in binary sorted order for getAlgorithmicTypeFromName. | |
121 | Also the name should be in lower case and all spaces, dashes and underscores | |
122 | removed | |
123 | */ | |
124 | static struct { | |
125 | const char *name; | |
126 | const UConverterType type; | |
127 | } const cnvNameType[] = { | |
b331163b | 128 | #if !UCONFIG_ONLY_HTML_CONVERSION |
b75a7d8f A |
129 | { "bocu1", UCNV_BOCU1 }, |
130 | { "cesu8", UCNV_CESU8 }, | |
b331163b A |
131 | #endif |
132 | #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION | |
b75a7d8f | 133 | { "hz",UCNV_HZ }, |
374ca955 | 134 | #endif |
b331163b | 135 | #if !UCONFIG_ONLY_HTML_CONVERSION |
b75a7d8f | 136 | { "imapmailboxname", UCNV_IMAP_MAILBOX }, |
b331163b A |
137 | #endif |
138 | #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION | |
b75a7d8f | 139 | { "iscii", UCNV_ISCII }, |
b331163b A |
140 | #endif |
141 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
b75a7d8f A |
142 | { "iso2022", UCNV_ISO_2022 }, |
143 | #endif | |
144 | { "iso88591", UCNV_LATIN_1 }, | |
b331163b | 145 | #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION |
b75a7d8f A |
146 | { "lmbcs1", UCNV_LMBCS_1 }, |
147 | { "lmbcs11",UCNV_LMBCS_11 }, | |
148 | { "lmbcs16",UCNV_LMBCS_16 }, | |
149 | { "lmbcs17",UCNV_LMBCS_17 }, | |
150 | { "lmbcs18",UCNV_LMBCS_18 }, | |
151 | { "lmbcs19",UCNV_LMBCS_19 }, | |
152 | { "lmbcs2", UCNV_LMBCS_2 }, | |
153 | { "lmbcs3", UCNV_LMBCS_3 }, | |
154 | { "lmbcs4", UCNV_LMBCS_4 }, | |
155 | { "lmbcs5", UCNV_LMBCS_5 }, | |
156 | { "lmbcs6", UCNV_LMBCS_6 }, | |
157 | { "lmbcs8", UCNV_LMBCS_8 }, | |
158 | #endif | |
b331163b | 159 | #if !UCONFIG_ONLY_HTML_CONVERSION |
b75a7d8f | 160 | { "scsu", UCNV_SCSU }, |
b331163b | 161 | #endif |
b75a7d8f A |
162 | { "usascii", UCNV_US_ASCII }, |
163 | { "utf16", UCNV_UTF16 }, | |
164 | { "utf16be", UCNV_UTF16_BigEndian }, | |
165 | { "utf16le", UCNV_UTF16_LittleEndian }, | |
166 | #if U_IS_BIG_ENDIAN | |
167 | { "utf16oppositeendian", UCNV_UTF16_LittleEndian }, | |
168 | { "utf16platformendian", UCNV_UTF16_BigEndian }, | |
169 | #else | |
170 | { "utf16oppositeendian", UCNV_UTF16_BigEndian}, | |
171 | { "utf16platformendian", UCNV_UTF16_LittleEndian }, | |
172 | #endif | |
b331163b | 173 | #if !UCONFIG_ONLY_HTML_CONVERSION |
b75a7d8f A |
174 | { "utf32", UCNV_UTF32 }, |
175 | { "utf32be", UCNV_UTF32_BigEndian }, | |
176 | { "utf32le", UCNV_UTF32_LittleEndian }, | |
177 | #if U_IS_BIG_ENDIAN | |
178 | { "utf32oppositeendian", UCNV_UTF32_LittleEndian }, | |
179 | { "utf32platformendian", UCNV_UTF32_BigEndian }, | |
180 | #else | |
181 | { "utf32oppositeendian", UCNV_UTF32_BigEndian }, | |
182 | { "utf32platformendian", UCNV_UTF32_LittleEndian }, | |
183 | #endif | |
b331163b A |
184 | #endif |
185 | #if !UCONFIG_ONLY_HTML_CONVERSION | |
b75a7d8f | 186 | { "utf7", UCNV_UTF7 }, |
b331163b | 187 | #endif |
4388f060 | 188 | { "utf8", UCNV_UTF8 }, |
b331163b | 189 | #if !UCONFIG_ONLY_HTML_CONVERSION |
4388f060 | 190 | { "x11compoundtext", UCNV_COMPOUND_TEXT} |
b331163b | 191 | #endif |
b75a7d8f A |
192 | }; |
193 | ||
194 | ||
195 | /*initializes some global variables */ | |
196 | static UHashtable *SHARED_DATA_HASHTABLE = NULL; | |
51004dcb A |
197 | static UMutex cnvCacheMutex = U_MUTEX_INITIALIZER; /* Mutex for synchronizing cnv cache access. */ |
198 | /* Note: the global mutex is used for */ | |
199 | /* reference count updates. */ | |
b75a7d8f | 200 | |
73c04bcf A |
201 | static const char **gAvailableConverters = NULL; |
202 | static uint16_t gAvailableConverterCount = 0; | |
57a6839d | 203 | static icu::UInitOnce gAvailableConvertersInitOnce = U_INITONCE_INITIALIZER; |
73c04bcf | 204 | |
729e4ab9 A |
205 | #if !U_CHARSET_IS_UTF8 |
206 | ||
46f4442e | 207 | /* This contains the resolved converter name. So no further alias lookup is needed again. */ |
73c04bcf A |
208 | static char gDefaultConverterNameBuffer[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; /* +1 for NULL */ |
209 | static const char *gDefaultConverterName = NULL; | |
46f4442e A |
210 | |
211 | /* | |
212 | If the default converter is an algorithmic converter, this is the cached value. | |
213 | We don't cache a full UConverter and clone it because ucnv_clone doesn't have | |
214 | less overhead than an algorithmic open. We don't cache non-algorithmic converters | |
215 | because ucnv_flushCache must be able to unload the default converter and its table. | |
216 | */ | |
73c04bcf | 217 | static const UConverterSharedData *gDefaultAlgorithmicSharedData = NULL; |
46f4442e A |
218 | |
219 | /* Does gDefaultConverterName have a converter option and require extra parsing? */ | |
73c04bcf A |
220 | static UBool gDefaultConverterContainsOption; |
221 | ||
729e4ab9 | 222 | #endif /* !U_CHARSET_IS_UTF8 */ |
b75a7d8f A |
223 | |
224 | static const char DATA_TYPE[] = "cnv"; | |
225 | ||
57a6839d A |
226 | /* ucnv_flushAvailableConverterCache. This is only called from ucnv_cleanup(). |
227 | * If it is ever to be called from elsewhere, synchronization | |
228 | * will need to be considered. | |
229 | */ | |
46f4442e A |
230 | static void |
231 | ucnv_flushAvailableConverterCache() { | |
57a6839d | 232 | gAvailableConverterCount = 0; |
46f4442e | 233 | if (gAvailableConverters) { |
46f4442e A |
234 | uprv_free((char **)gAvailableConverters); |
235 | gAvailableConverters = NULL; | |
46f4442e | 236 | } |
57a6839d | 237 | gAvailableConvertersInitOnce.reset(); |
46f4442e A |
238 | } |
239 | ||
240 | /* ucnv_cleanup - delete all storage held by the converter cache, except any */ | |
241 | /* in use by open converters. */ | |
242 | /* Not thread safe. */ | |
243 | /* Not supported API. */ | |
374ca955 | 244 | static UBool U_CALLCONV ucnv_cleanup(void) { |
46f4442e A |
245 | ucnv_flushCache(); |
246 | if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) { | |
247 | uhash_close(SHARED_DATA_HASHTABLE); | |
248 | SHARED_DATA_HASHTABLE = NULL; | |
b75a7d8f A |
249 | } |
250 | ||
46f4442e A |
251 | /* Isn't called from flushCache because other threads may have preexisting references to the table. */ |
252 | ucnv_flushAvailableConverterCache(); | |
73c04bcf | 253 | |
729e4ab9 | 254 | #if !U_CHARSET_IS_UTF8 |
73c04bcf A |
255 | gDefaultConverterName = NULL; |
256 | gDefaultConverterNameBuffer[0] = 0; | |
257 | gDefaultConverterContainsOption = FALSE; | |
258 | gDefaultAlgorithmicSharedData = NULL; | |
729e4ab9 | 259 | #endif |
73c04bcf | 260 | |
b75a7d8f A |
261 | return (SHARED_DATA_HASHTABLE == NULL); |
262 | } | |
263 | ||
b75a7d8f | 264 | static UBool U_CALLCONV |
4388f060 A |
265 | isCnvAcceptable(void * /*context*/, |
266 | const char * /*type*/, const char * /*name*/, | |
729e4ab9 | 267 | const UDataInfo *pInfo) { |
b75a7d8f A |
268 | return (UBool)( |
269 | pInfo->size>=20 && | |
270 | pInfo->isBigEndian==U_IS_BIG_ENDIAN && | |
271 | pInfo->charsetFamily==U_CHARSET_FAMILY && | |
272 | pInfo->sizeofUChar==U_SIZEOF_UCHAR && | |
273 | pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ | |
274 | pInfo->dataFormat[1]==0x6e && | |
275 | pInfo->dataFormat[2]==0x76 && | |
276 | pInfo->dataFormat[3]==0x74 && | |
277 | pInfo->formatVersion[0]==6); /* Everything will be version 6 */ | |
278 | } | |
279 | ||
280 | /** | |
281 | * Un flatten shared data from a UDATA.. | |
282 | */ | |
283 | static UConverterSharedData* | |
374ca955 | 284 | ucnv_data_unFlattenClone(UConverterLoadArgs *pArgs, UDataMemory *pData, UErrorCode *status) |
b75a7d8f A |
285 | { |
286 | /* UDataInfo info; -- necessary only if some converters have different formatVersion */ | |
287 | const uint8_t *raw = (const uint8_t *)udata_getMemory(pData); | |
288 | const UConverterStaticData *source = (const UConverterStaticData *) raw; | |
289 | UConverterSharedData *data; | |
290 | UConverterType type = (UConverterType)source->conversionType; | |
291 | ||
292 | if(U_FAILURE(*status)) | |
293 | return NULL; | |
294 | ||
295 | if( (uint16_t)type >= UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES || | |
296 | converterData[type] == NULL || | |
2ca993e8 | 297 | !converterData[type]->isReferenceCounted || |
b75a7d8f A |
298 | converterData[type]->referenceCounter != 1 || |
299 | source->structSize != sizeof(UConverterStaticData)) | |
300 | { | |
301 | *status = U_INVALID_TABLE_FORMAT; | |
302 | return NULL; | |
303 | } | |
304 | ||
305 | data = (UConverterSharedData *)uprv_malloc(sizeof(UConverterSharedData)); | |
306 | if(data == NULL) { | |
307 | *status = U_MEMORY_ALLOCATION_ERROR; | |
308 | return NULL; | |
309 | } | |
310 | ||
311 | /* copy initial values from the static structure for this type */ | |
312 | uprv_memcpy(data, converterData[type], sizeof(UConverterSharedData)); | |
313 | ||
b75a7d8f | 314 | data->staticData = source; |
374ca955 | 315 | |
b75a7d8f A |
316 | data->sharedDataCached = FALSE; |
317 | ||
318 | /* fill in fields from the loaded data */ | |
319 | data->dataMemory = (void*)pData; /* for future use */ | |
320 | ||
321 | if(data->impl->load != NULL) { | |
374ca955 | 322 | data->impl->load(data, pArgs, raw + source->structSize, status); |
b75a7d8f | 323 | if(U_FAILURE(*status)) { |
b75a7d8f A |
324 | uprv_free(data); |
325 | return NULL; | |
326 | } | |
327 | } | |
328 | return data; | |
329 | } | |
330 | ||
331 | /*Takes an alias name gets an actual converter file name | |
332 | *goes to disk and opens it. | |
333 | *allocates the memory and returns a new UConverter object | |
334 | */ | |
374ca955 | 335 | static UConverterSharedData *createConverterFromFile(UConverterLoadArgs *pArgs, UErrorCode * err) |
b75a7d8f A |
336 | { |
337 | UDataMemory *data; | |
338 | UConverterSharedData *sharedData; | |
339 | ||
374ca955 A |
340 | UTRACE_ENTRY_OC(UTRACE_UCNV_LOAD); |
341 | ||
73c04bcf | 342 | if (U_FAILURE (*err)) { |
374ca955 | 343 | UTRACE_EXIT_STATUS(*err); |
b75a7d8f A |
344 | return NULL; |
345 | } | |
346 | ||
374ca955 A |
347 | UTRACE_DATA2(UTRACE_OPEN_CLOSE, "load converter %s from package %s", pArgs->name, pArgs->pkg); |
348 | ||
349 | data = udata_openChoice(pArgs->pkg, DATA_TYPE, pArgs->name, isCnvAcceptable, NULL, err); | |
b75a7d8f A |
350 | if(U_FAILURE(*err)) |
351 | { | |
374ca955 | 352 | UTRACE_EXIT_STATUS(*err); |
b75a7d8f A |
353 | return NULL; |
354 | } | |
355 | ||
374ca955 | 356 | sharedData = ucnv_data_unFlattenClone(pArgs, data, err); |
b75a7d8f A |
357 | if(U_FAILURE(*err)) |
358 | { | |
359 | udata_close(data); | |
374ca955 | 360 | UTRACE_EXIT_STATUS(*err); |
b75a7d8f A |
361 | return NULL; |
362 | } | |
363 | ||
374ca955 A |
364 | /* |
365 | * TODO Store pkg in a field in the shared data so that delta-only converters | |
366 | * can load base converters from the same package. | |
367 | * If the pkg name is longer than the field, then either do not load the converter | |
368 | * in the first place, or just set the pkg field to "". | |
369 | */ | |
370 | ||
371 | UTRACE_EXIT_PTR_STATUS(sharedData, *err); | |
b75a7d8f A |
372 | return sharedData; |
373 | } | |
374 | ||
b75a7d8f A |
375 | /*returns a converter type from a string |
376 | */ | |
377 | static const UConverterSharedData * | |
378 | getAlgorithmicTypeFromName(const char *realName) | |
379 | { | |
380 | uint32_t mid, start, limit; | |
374ca955 | 381 | uint32_t lastMid; |
b75a7d8f A |
382 | int result; |
383 | char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH]; | |
384 | ||
385 | /* Lower case and remove ignoreable characters. */ | |
386 | ucnv_io_stripForCompare(strippedName, realName); | |
387 | ||
388 | /* do a binary search for the alias */ | |
389 | start = 0; | |
2ca993e8 | 390 | limit = UPRV_LENGTHOF(cnvNameType); |
b75a7d8f | 391 | mid = limit; |
374ca955 | 392 | lastMid = UINT32_MAX; |
b75a7d8f A |
393 | |
394 | for (;;) { | |
395 | mid = (uint32_t)((start + limit) / 2); | |
374ca955 A |
396 | if (lastMid == mid) { /* Have we moved? */ |
397 | break; /* We haven't moved, and it wasn't found. */ | |
398 | } | |
399 | lastMid = mid; | |
b75a7d8f A |
400 | result = uprv_strcmp(strippedName, cnvNameType[mid].name); |
401 | ||
402 | if (result < 0) { | |
403 | limit = mid; | |
404 | } else if (result > 0) { | |
405 | start = mid; | |
406 | } else { | |
407 | return converterData[cnvNameType[mid].type]; | |
408 | } | |
409 | } | |
410 | ||
411 | return NULL; | |
412 | } | |
413 | ||
46f4442e A |
414 | /* |
415 | * Based on the number of known converters, this determines how many times larger | |
416 | * the shared data hash table should be. When on small platforms, or just a couple | |
417 | * of converters are used, this number should be 2. When memory is plentiful, or | |
418 | * when ucnv_countAvailable is ever used with a lot of available converters, | |
419 | * this should be 4. | |
420 | * Larger numbers reduce the number of hash collisions, but use more memory. | |
421 | */ | |
422 | #define UCNV_CACHE_LOAD_FACTOR 2 | |
423 | ||
b75a7d8f A |
424 | /* Puts the shared data in the static hashtable SHARED_DATA_HASHTABLE */ |
425 | /* Will always be called with the cnvCacheMutex alrady being held */ | |
426 | /* by the calling function. */ | |
427 | /* Stores the shared data in the SHARED_DATA_HASHTABLE | |
428 | * @param data The shared data | |
429 | */ | |
430 | static void | |
431 | ucnv_shareConverterData(UConverterSharedData * data) | |
432 | { | |
433 | UErrorCode err = U_ZERO_ERROR; | |
434 | /*Lazy evaluates the Hashtable itself */ | |
435 | /*void *sanity = NULL;*/ | |
436 | ||
437 | if (SHARED_DATA_HASHTABLE == NULL) | |
438 | { | |
73c04bcf | 439 | SHARED_DATA_HASHTABLE = uhash_openSize(uhash_hashChars, uhash_compareChars, NULL, |
46f4442e | 440 | ucnv_io_countKnownConverters(&err)*UCNV_CACHE_LOAD_FACTOR, |
b75a7d8f | 441 | &err); |
374ca955 A |
442 | ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); |
443 | ||
444 | if (U_FAILURE(err)) | |
b75a7d8f A |
445 | return; |
446 | } | |
447 | ||
448 | /* ### check to see if the element is not already there! */ | |
449 | ||
450 | /* | |
451 | sanity = ucnv_getSharedConverterData (data->staticData->name); | |
452 | if(sanity != NULL) | |
453 | { | |
454 | UCNV_DEBUG_LOG("put:overwrite!",data->staticData->name,sanity); | |
455 | } | |
456 | UCNV_DEBUG_LOG("put:chk",data->staticData->name,sanity); | |
457 | */ | |
374ca955 | 458 | |
b75a7d8f A |
459 | /* Mark it shared */ |
460 | data->sharedDataCached = TRUE; | |
461 | ||
462 | uhash_put(SHARED_DATA_HASHTABLE, | |
463 | (void*) data->staticData->name, /* Okay to cast away const as long as | |
464 | keyDeleter == NULL */ | |
465 | data, | |
466 | &err); | |
467 | UCNV_DEBUG_LOG("put", data->staticData->name,data); | |
468 | ||
469 | } | |
470 | ||
471 | /* Look up a converter name in the shared data cache. */ | |
472 | /* cnvCacheMutex must be held by the caller to protect the hash table. */ | |
473 | /* gets the shared data from the SHARED_DATA_HASHTABLE (might return NULL if it isn't there) | |
474 | * @param name The name of the shared data | |
475 | * @return the shared data from the SHARED_DATA_HASHTABLE | |
476 | */ | |
477 | static UConverterSharedData * | |
478 | ucnv_getSharedConverterData(const char *name) | |
479 | { | |
480 | /*special case when no Table has yet been created we return NULL */ | |
481 | if (SHARED_DATA_HASHTABLE == NULL) | |
482 | { | |
483 | return NULL; | |
484 | } | |
485 | else | |
486 | { | |
487 | UConverterSharedData *rc; | |
488 | ||
489 | rc = (UConverterSharedData*)uhash_get(SHARED_DATA_HASHTABLE, name); | |
490 | UCNV_DEBUG_LOG("get",name,rc); | |
491 | return rc; | |
492 | } | |
493 | } | |
494 | ||
495 | /*frees the string of memory blocks associates with a sharedConverter | |
496 | *if and only if the referenceCounter == 0 | |
497 | */ | |
498 | /* Deletes (frees) the Shared data it's passed. first it checks the referenceCounter to | |
499 | * see if anyone is using it, if not it frees all the memory stemming from sharedConverterData and | |
500 | * returns TRUE, | |
501 | * otherwise returns FALSE | |
502 | * @param sharedConverterData The shared data | |
503 | * @return if not it frees all the memory stemming from sharedConverterData and | |
504 | * returns TRUE, otherwise returns FALSE | |
505 | */ | |
506 | static UBool | |
507 | ucnv_deleteSharedConverterData(UConverterSharedData * deadSharedData) | |
508 | { | |
374ca955 A |
509 | UTRACE_ENTRY_OC(UTRACE_UCNV_UNLOAD); |
510 | UTRACE_DATA2(UTRACE_OPEN_CLOSE, "unload converter %s shared data %p", deadSharedData->staticData->name, deadSharedData); | |
511 | ||
512 | if (deadSharedData->referenceCounter > 0) { | |
513 | UTRACE_EXIT_VALUE((int32_t)FALSE); | |
b75a7d8f | 514 | return FALSE; |
374ca955 | 515 | } |
b75a7d8f A |
516 | |
517 | if (deadSharedData->impl->unload != NULL) { | |
518 | deadSharedData->impl->unload(deadSharedData); | |
519 | } | |
374ca955 | 520 | |
b75a7d8f A |
521 | if(deadSharedData->dataMemory != NULL) |
522 | { | |
523 | UDataMemory *data = (UDataMemory*)deadSharedData->dataMemory; | |
524 | udata_close(data); | |
525 | } | |
526 | ||
b75a7d8f | 527 | uprv_free(deadSharedData); |
374ca955 A |
528 | |
529 | UTRACE_EXIT_VALUE((int32_t)TRUE); | |
b75a7d8f A |
530 | return TRUE; |
531 | } | |
532 | ||
374ca955 A |
533 | /** |
534 | * Load a non-algorithmic converter. | |
535 | * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex). | |
536 | */ | |
537 | UConverterSharedData * | |
538 | ucnv_load(UConverterLoadArgs *pArgs, UErrorCode *err) { | |
539 | UConverterSharedData *mySharedConverterData; | |
540 | ||
541 | if(err == NULL || U_FAILURE(*err)) { | |
542 | return NULL; | |
543 | } | |
544 | ||
545 | if(pArgs->pkg != NULL && *pArgs->pkg != 0) { | |
546 | /* application-provided converters are not currently cached */ | |
547 | return createConverterFromFile(pArgs, err); | |
548 | } | |
549 | ||
550 | mySharedConverterData = ucnv_getSharedConverterData(pArgs->name); | |
551 | if (mySharedConverterData == NULL) | |
552 | { | |
553 | /*Not cached, we need to stream it in from file */ | |
554 | mySharedConverterData = createConverterFromFile(pArgs, err); | |
555 | if (U_FAILURE (*err) || (mySharedConverterData == NULL)) | |
556 | { | |
557 | return NULL; | |
558 | } | |
729e4ab9 | 559 | else if (!pArgs->onlyTestIsLoadable) |
374ca955 A |
560 | { |
561 | /* share it with other library clients */ | |
562 | ucnv_shareConverterData(mySharedConverterData); | |
563 | } | |
564 | } | |
565 | else | |
566 | { | |
567 | /* The data for this converter was already in the cache. */ | |
568 | /* Update the reference counter on the shared data: one more client */ | |
569 | mySharedConverterData->referenceCounter++; | |
570 | } | |
571 | ||
572 | return mySharedConverterData; | |
573 | } | |
574 | ||
575 | /** | |
576 | * Unload a non-algorithmic converter. | |
2ca993e8 | 577 | * It must be sharedData->isReferenceCounted |
374ca955 A |
578 | * and this function must be called inside umtx_lock(&cnvCacheMutex). |
579 | */ | |
4388f060 | 580 | U_CAPI void |
374ca955 A |
581 | ucnv_unload(UConverterSharedData *sharedData) { |
582 | if(sharedData != NULL) { | |
b75a7d8f A |
583 | if (sharedData->referenceCounter > 0) { |
584 | sharedData->referenceCounter--; | |
585 | } | |
374ca955 | 586 | |
b75a7d8f A |
587 | if((sharedData->referenceCounter <= 0)&&(sharedData->sharedDataCached == FALSE)) { |
588 | ucnv_deleteSharedConverterData(sharedData); | |
589 | } | |
590 | } | |
b75a7d8f A |
591 | } |
592 | ||
4388f060 | 593 | U_CFUNC void |
374ca955 | 594 | ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData) |
b75a7d8f | 595 | { |
2ca993e8 | 596 | if(sharedData != NULL && sharedData->isReferenceCounted) { |
374ca955 A |
597 | umtx_lock(&cnvCacheMutex); |
598 | ucnv_unload(sharedData); | |
599 | umtx_unlock(&cnvCacheMutex); | |
600 | } | |
601 | } | |
602 | ||
4388f060 | 603 | U_CFUNC void |
374ca955 A |
604 | ucnv_incrementRefCount(UConverterSharedData *sharedData) |
605 | { | |
2ca993e8 | 606 | if(sharedData != NULL && sharedData->isReferenceCounted) { |
374ca955 | 607 | umtx_lock(&cnvCacheMutex); |
b75a7d8f | 608 | sharedData->referenceCounter++; |
374ca955 | 609 | umtx_unlock(&cnvCacheMutex); |
b75a7d8f | 610 | } |
b75a7d8f A |
611 | } |
612 | ||
729e4ab9 A |
613 | /* |
614 | * *pPieces must be initialized. | |
615 | * The name without options will be copied to pPieces->cnvName. | |
616 | * The locale and options will be copied to pPieces only if present in inName, | |
617 | * otherwise the existing values in pPieces remain. | |
618 | * *pArgs will be set to the pPieces values. | |
619 | */ | |
b75a7d8f A |
620 | static void |
621 | parseConverterOptions(const char *inName, | |
729e4ab9 A |
622 | UConverterNamePieces *pPieces, |
623 | UConverterLoadArgs *pArgs, | |
b75a7d8f A |
624 | UErrorCode *err) |
625 | { | |
729e4ab9 | 626 | char *cnvName = pPieces->cnvName; |
b75a7d8f A |
627 | char c; |
628 | int32_t len = 0; | |
629 | ||
729e4ab9 A |
630 | pArgs->name=inName; |
631 | pArgs->locale=pPieces->locale; | |
632 | pArgs->options=pPieces->options; | |
633 | ||
b75a7d8f A |
634 | /* copy the converter name itself to cnvName */ |
635 | while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) { | |
636 | if (++len>=UCNV_MAX_CONVERTER_NAME_LENGTH) { | |
637 | *err = U_ILLEGAL_ARGUMENT_ERROR; /* bad name */ | |
729e4ab9 | 638 | pPieces->cnvName[0]=0; |
b75a7d8f A |
639 | return; |
640 | } | |
641 | *cnvName++=c; | |
642 | inName++; | |
643 | } | |
644 | *cnvName=0; | |
729e4ab9 | 645 | pArgs->name=pPieces->cnvName; |
b75a7d8f A |
646 | |
647 | /* parse options. No more name copying should occur. */ | |
648 | while((c=*inName)!=0) { | |
649 | if(c==UCNV_OPTION_SEP_CHAR) { | |
650 | ++inName; | |
651 | } | |
652 | ||
653 | /* inName is behind an option separator */ | |
654 | if(uprv_strncmp(inName, "locale=", 7)==0) { | |
655 | /* do not modify locale itself in case we have multiple locale options */ | |
729e4ab9 | 656 | char *dest=pPieces->locale; |
b75a7d8f A |
657 | |
658 | /* copy the locale option value */ | |
659 | inName+=7; | |
660 | len=0; | |
661 | while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) { | |
662 | ++inName; | |
663 | ||
664 | if(++len>=ULOC_FULLNAME_CAPACITY) { | |
665 | *err=U_ILLEGAL_ARGUMENT_ERROR; /* bad name */ | |
729e4ab9 | 666 | pPieces->locale[0]=0; |
b75a7d8f A |
667 | return; |
668 | } | |
669 | ||
670 | *dest++=c; | |
671 | } | |
672 | *dest=0; | |
673 | } else if(uprv_strncmp(inName, "version=", 8)==0) { | |
729e4ab9 | 674 | /* copy the version option value into bits 3..0 of pPieces->options */ |
b75a7d8f A |
675 | inName+=8; |
676 | c=*inName; | |
677 | if(c==0) { | |
729e4ab9 | 678 | pArgs->options=(pPieces->options&=~UCNV_OPTION_VERSION); |
b75a7d8f A |
679 | return; |
680 | } else if((uint8_t)(c-'0')<10) { | |
729e4ab9 | 681 | pArgs->options=pPieces->options=(pPieces->options&~UCNV_OPTION_VERSION)|(uint32_t)(c-'0'); |
b75a7d8f A |
682 | ++inName; |
683 | } | |
684 | } else if(uprv_strncmp(inName, "swaplfnl", 8)==0) { | |
685 | inName+=8; | |
729e4ab9 | 686 | pArgs->options=(pPieces->options|=UCNV_OPTION_SWAP_LFNL); |
b75a7d8f A |
687 | /* add processing for new options here with another } else if(uprv_strncmp(inName, "option-name=", XX)==0) { */ |
688 | } else { | |
689 | /* ignore any other options until we define some */ | |
690 | while(((c = *inName++) != 0) && (c != UCNV_OPTION_SEP_CHAR)) { | |
691 | } | |
692 | if(c==0) { | |
693 | return; | |
694 | } | |
695 | } | |
696 | } | |
697 | } | |
698 | ||
699 | /*Logic determines if the converter is Algorithmic AND/OR cached | |
700 | *depending on that: | |
701 | * -we either go to get data from disk and cache it (Data=TRUE, Cached=False) | |
702 | * -Get it from a Hashtable (Data=X, Cached=TRUE) | |
703 | * -Call dataConverter initializer (Data=TRUE, Cached=TRUE) | |
704 | * -Call AlgorithmicConverter initializer (Data=FALSE, Cached=TRUE) | |
705 | */ | |
4388f060 | 706 | U_CFUNC UConverterSharedData * |
729e4ab9 A |
707 | ucnv_loadSharedData(const char *converterName, |
708 | UConverterNamePieces *pPieces, | |
709 | UConverterLoadArgs *pArgs, | |
710 | UErrorCode * err) { | |
711 | UConverterNamePieces stackPieces; | |
712 | UConverterLoadArgs stackArgs; | |
b75a7d8f A |
713 | UConverterSharedData *mySharedConverterData = NULL; |
714 | UErrorCode internalErrorCode = U_ZERO_ERROR; | |
73c04bcf A |
715 | UBool mayContainOption = TRUE; |
716 | UBool checkForAlgorithmic = TRUE; | |
374ca955 A |
717 | |
718 | if (U_FAILURE (*err)) { | |
b75a7d8f | 719 | return NULL; |
374ca955 A |
720 | } |
721 | ||
729e4ab9 A |
722 | if(pPieces == NULL) { |
723 | if(pArgs != NULL) { | |
724 | /* | |
725 | * Bad: We may set pArgs pointers to stackPieces fields | |
726 | * which will be invalid after this function returns. | |
727 | */ | |
728 | *err = U_INTERNAL_PROGRAM_ERROR; | |
729 | return NULL; | |
730 | } | |
731 | pPieces = &stackPieces; | |
732 | } | |
733 | if(pArgs == NULL) { | |
734 | uprv_memset(&stackArgs, 0, sizeof(stackArgs)); | |
735 | stackArgs.size = (int32_t)sizeof(stackArgs); | |
736 | pArgs = &stackArgs; | |
374ca955 | 737 | } |
b75a7d8f | 738 | |
729e4ab9 A |
739 | pPieces->cnvName[0] = 0; |
740 | pPieces->locale[0] = 0; | |
741 | pPieces->options = 0; | |
742 | ||
743 | pArgs->name = converterName; | |
744 | pArgs->locale = pPieces->locale; | |
745 | pArgs->options = pPieces->options; | |
b75a7d8f A |
746 | |
747 | /* In case "name" is NULL we want to open the default converter. */ | |
748 | if (converterName == NULL) { | |
729e4ab9 A |
749 | #if U_CHARSET_IS_UTF8 |
750 | pArgs->name = "UTF-8"; | |
751 | return (UConverterSharedData *)converterData[UCNV_UTF8]; | |
752 | #else | |
73c04bcf | 753 | /* Call ucnv_getDefaultName first to query the name from the OS. */ |
729e4ab9 A |
754 | pArgs->name = ucnv_getDefaultName(); |
755 | if (pArgs->name == NULL) { | |
b75a7d8f A |
756 | *err = U_MISSING_RESOURCE_ERROR; |
757 | return NULL; | |
758 | } | |
73c04bcf A |
759 | mySharedConverterData = (UConverterSharedData *)gDefaultAlgorithmicSharedData; |
760 | checkForAlgorithmic = FALSE; | |
761 | mayContainOption = gDefaultConverterContainsOption; | |
b75a7d8f | 762 | /* the default converter name is already canonical */ |
729e4ab9 | 763 | #endif |
73c04bcf | 764 | } |
729e4ab9 | 765 | else if(UCNV_FAST_IS_UTF8(converterName)) { |
73c04bcf | 766 | /* fastpath for UTF-8 */ |
729e4ab9 | 767 | pArgs->name = "UTF-8"; |
73c04bcf A |
768 | return (UConverterSharedData *)converterData[UCNV_UTF8]; |
769 | } | |
770 | else { | |
b75a7d8f | 771 | /* separate the converter name from the options */ |
729e4ab9 | 772 | parseConverterOptions(converterName, pPieces, pArgs, err); |
b75a7d8f A |
773 | if (U_FAILURE(*err)) { |
774 | /* Very bad name used. */ | |
775 | return NULL; | |
776 | } | |
777 | ||
778 | /* get the canonical converter name */ | |
729e4ab9 A |
779 | pArgs->name = ucnv_io_getConverterName(pArgs->name, &mayContainOption, &internalErrorCode); |
780 | if (U_FAILURE(internalErrorCode) || pArgs->name == NULL) { | |
b75a7d8f A |
781 | /* |
782 | * set the input name in case the converter was added | |
783 | * without updating the alias table, or when there is no alias table | |
784 | */ | |
729e4ab9 | 785 | pArgs->name = pPieces->cnvName; |
51004dcb A |
786 | } else if (internalErrorCode == U_AMBIGUOUS_ALIAS_WARNING) { |
787 | *err = U_AMBIGUOUS_ALIAS_WARNING; | |
b75a7d8f A |
788 | } |
789 | } | |
790 | ||
791 | /* separate the converter name from the options */ | |
729e4ab9 A |
792 | if(mayContainOption && pArgs->name != pPieces->cnvName) { |
793 | parseConverterOptions(pArgs->name, pPieces, pArgs, err); | |
b75a7d8f | 794 | } |
374ca955 | 795 | |
b75a7d8f | 796 | /* get the shared data for an algorithmic converter, if it is one */ |
73c04bcf | 797 | if (checkForAlgorithmic) { |
729e4ab9 | 798 | mySharedConverterData = (UConverterSharedData *)getAlgorithmicTypeFromName(pArgs->name); |
73c04bcf | 799 | } |
b75a7d8f A |
800 | if (mySharedConverterData == NULL) |
801 | { | |
802 | /* it is a data-based converter, get its shared data. */ | |
803 | /* Hold the cnvCacheMutex through the whole process of checking the */ | |
804 | /* converter data cache, and adding new entries to the cache */ | |
805 | /* to prevent other threads from modifying the cache during the */ | |
806 | /* process. */ | |
729e4ab9 A |
807 | pArgs->nestedLoads=1; |
808 | pArgs->pkg=NULL; | |
374ca955 | 809 | |
b75a7d8f | 810 | umtx_lock(&cnvCacheMutex); |
729e4ab9 | 811 | mySharedConverterData = ucnv_load(pArgs, err); |
374ca955 A |
812 | umtx_unlock(&cnvCacheMutex); |
813 | if (U_FAILURE (*err) || (mySharedConverterData == NULL)) | |
b75a7d8f | 814 | { |
374ca955 | 815 | return NULL; |
b75a7d8f | 816 | } |
b75a7d8f A |
817 | } |
818 | ||
374ca955 A |
819 | return mySharedConverterData; |
820 | } | |
b75a7d8f | 821 | |
4388f060 | 822 | U_CAPI UConverter * |
374ca955 A |
823 | ucnv_createConverter(UConverter *myUConverter, const char *converterName, UErrorCode * err) |
824 | { | |
729e4ab9 | 825 | UConverterNamePieces stackPieces; |
4388f060 | 826 | UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; |
374ca955 A |
827 | UConverterSharedData *mySharedConverterData; |
828 | ||
829 | UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); | |
830 | ||
831 | if(U_SUCCESS(*err)) { | |
832 | UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open converter %s", converterName); | |
833 | ||
729e4ab9 A |
834 | mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err); |
835 | ||
836 | myUConverter = ucnv_createConverterFromSharedData( | |
837 | myUConverter, mySharedConverterData, | |
838 | &stackArgs, | |
839 | err); | |
374ca955 A |
840 | |
841 | if(U_SUCCESS(*err)) { | |
729e4ab9 A |
842 | UTRACE_EXIT_PTR_STATUS(myUConverter, *err); |
843 | return myUConverter; | |
b75a7d8f | 844 | } |
b75a7d8f A |
845 | } |
846 | ||
374ca955 A |
847 | /* exit with error */ |
848 | UTRACE_EXIT_STATUS(*err); | |
849 | return NULL; | |
b75a7d8f A |
850 | } |
851 | ||
729e4ab9 A |
852 | U_CFUNC UBool |
853 | ucnv_canCreateConverter(const char *converterName, UErrorCode *err) { | |
854 | UConverter myUConverter; | |
855 | UConverterNamePieces stackPieces; | |
4388f060 | 856 | UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; |
729e4ab9 A |
857 | UConverterSharedData *mySharedConverterData; |
858 | ||
859 | UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); | |
860 | ||
861 | if(U_SUCCESS(*err)) { | |
862 | UTRACE_DATA1(UTRACE_OPEN_CLOSE, "test if can open converter %s", converterName); | |
863 | ||
864 | stackArgs.onlyTestIsLoadable=TRUE; | |
865 | mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err); | |
866 | ucnv_createConverterFromSharedData( | |
867 | &myUConverter, mySharedConverterData, | |
868 | &stackArgs, | |
869 | err); | |
870 | ucnv_unloadSharedDataIfReady(mySharedConverterData); | |
871 | } | |
872 | ||
873 | UTRACE_EXIT_STATUS(*err); | |
874 | return U_SUCCESS(*err); | |
875 | } | |
876 | ||
b75a7d8f A |
877 | UConverter * |
878 | ucnv_createAlgorithmicConverter(UConverter *myUConverter, | |
879 | UConverterType type, | |
880 | const char *locale, uint32_t options, | |
881 | UErrorCode *err) { | |
374ca955 | 882 | UConverter *cnv; |
b75a7d8f | 883 | const UConverterSharedData *sharedData; |
4388f060 | 884 | UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; |
b75a7d8f | 885 | |
374ca955 A |
886 | UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_ALGORITHMIC); |
887 | UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open algorithmic converter type %d", (int32_t)type); | |
888 | ||
b75a7d8f A |
889 | if(type<0 || UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES<=type) { |
890 | *err = U_ILLEGAL_ARGUMENT_ERROR; | |
374ca955 | 891 | UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR); |
b75a7d8f A |
892 | return NULL; |
893 | } | |
894 | ||
895 | sharedData = converterData[type]; | |
2ca993e8 | 896 | if(sharedData == NULL || sharedData->isReferenceCounted) { |
b75a7d8f A |
897 | /* not a valid type, or not an algorithmic converter */ |
898 | *err = U_ILLEGAL_ARGUMENT_ERROR; | |
374ca955 | 899 | UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR); |
b75a7d8f A |
900 | return NULL; |
901 | } | |
902 | ||
729e4ab9 A |
903 | stackArgs.name = ""; |
904 | stackArgs.options = options; | |
905 | stackArgs.locale=locale; | |
906 | cnv = ucnv_createConverterFromSharedData( | |
907 | myUConverter, (UConverterSharedData *)sharedData, | |
908 | &stackArgs, err); | |
374ca955 A |
909 | |
910 | UTRACE_EXIT_PTR_STATUS(cnv, *err); | |
911 | return cnv; | |
b75a7d8f A |
912 | } |
913 | ||
4388f060 | 914 | U_CFUNC UConverter* |
b75a7d8f A |
915 | ucnv_createConverterFromPackage(const char *packageName, const char *converterName, UErrorCode * err) |
916 | { | |
b75a7d8f | 917 | UConverter *myUConverter; |
374ca955 | 918 | UConverterSharedData *mySharedConverterData; |
729e4ab9 | 919 | UConverterNamePieces stackPieces; |
4388f060 | 920 | UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; |
374ca955 A |
921 | |
922 | UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_PACKAGE); | |
b75a7d8f A |
923 | |
924 | if(U_FAILURE(*err)) { | |
374ca955 A |
925 | UTRACE_EXIT_STATUS(*err); |
926 | return NULL; | |
b75a7d8f A |
927 | } |
928 | ||
374ca955 A |
929 | UTRACE_DATA2(UTRACE_OPEN_CLOSE, "open converter %s from package %s", converterName, packageName); |
930 | ||
374ca955 | 931 | /* first, get the options out of the converterName string */ |
729e4ab9 A |
932 | stackPieces.cnvName[0] = 0; |
933 | stackPieces.locale[0] = 0; | |
934 | stackPieces.options = 0; | |
935 | parseConverterOptions(converterName, &stackPieces, &stackArgs, err); | |
b75a7d8f A |
936 | if (U_FAILURE(*err)) { |
937 | /* Very bad name used. */ | |
374ca955 | 938 | UTRACE_EXIT_STATUS(*err); |
b75a7d8f A |
939 | return NULL; |
940 | } | |
729e4ab9 A |
941 | stackArgs.nestedLoads=1; |
942 | stackArgs.pkg=packageName; | |
374ca955 | 943 | |
b75a7d8f | 944 | /* open the data, unflatten the shared structure */ |
729e4ab9 | 945 | mySharedConverterData = createConverterFromFile(&stackArgs, err); |
374ca955 | 946 | |
b75a7d8f | 947 | if (U_FAILURE(*err)) { |
374ca955 A |
948 | UTRACE_EXIT_STATUS(*err); |
949 | return NULL; | |
b75a7d8f A |
950 | } |
951 | ||
952 | /* create the actual converter */ | |
729e4ab9 | 953 | myUConverter = ucnv_createConverterFromSharedData(NULL, mySharedConverterData, &stackArgs, err); |
374ca955 | 954 | |
b75a7d8f A |
955 | if (U_FAILURE(*err)) { |
956 | ucnv_close(myUConverter); | |
374ca955 A |
957 | UTRACE_EXIT_STATUS(*err); |
958 | return NULL; | |
b75a7d8f | 959 | } |
374ca955 A |
960 | |
961 | UTRACE_EXIT_PTR_STATUS(myUConverter, *err); | |
b75a7d8f A |
962 | return myUConverter; |
963 | } | |
964 | ||
965 | ||
4388f060 | 966 | U_CFUNC UConverter* |
b75a7d8f A |
967 | ucnv_createConverterFromSharedData(UConverter *myUConverter, |
968 | UConverterSharedData *mySharedConverterData, | |
729e4ab9 | 969 | UConverterLoadArgs *pArgs, |
b75a7d8f A |
970 | UErrorCode *err) |
971 | { | |
972 | UBool isCopyLocal; | |
973 | ||
729e4ab9 A |
974 | if(U_FAILURE(*err)) { |
975 | ucnv_unloadSharedDataIfReady(mySharedConverterData); | |
976 | return myUConverter; | |
977 | } | |
b75a7d8f A |
978 | if(myUConverter == NULL) |
979 | { | |
980 | myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter)); | |
981 | if(myUConverter == NULL) | |
982 | { | |
983 | *err = U_MEMORY_ALLOCATION_ERROR; | |
729e4ab9 | 984 | ucnv_unloadSharedDataIfReady(mySharedConverterData); |
b75a7d8f A |
985 | return NULL; |
986 | } | |
987 | isCopyLocal = FALSE; | |
988 | } else { | |
989 | isCopyLocal = TRUE; | |
990 | } | |
991 | ||
992 | /* initialize the converter */ | |
993 | uprv_memset(myUConverter, 0, sizeof(UConverter)); | |
994 | myUConverter->isCopyLocal = isCopyLocal; | |
73c04bcf | 995 | /*myUConverter->isExtraLocal = FALSE;*/ /* Set by the memset call */ |
b75a7d8f | 996 | myUConverter->sharedData = mySharedConverterData; |
729e4ab9 A |
997 | myUConverter->options = pArgs->options; |
998 | if(!pArgs->onlyTestIsLoadable) { | |
999 | myUConverter->preFromUFirstCP = U_SENTINEL; | |
1000 | myUConverter->fromCharErrorBehaviour = UCNV_TO_U_DEFAULT_CALLBACK; | |
1001 | myUConverter->fromUCharErrorBehaviour = UCNV_FROM_U_DEFAULT_CALLBACK; | |
1002 | myUConverter->toUnicodeStatus = mySharedConverterData->toUnicodeStatus; | |
1003 | myUConverter->maxBytesPerUChar = mySharedConverterData->staticData->maxBytesPerChar; | |
1004 | myUConverter->subChar1 = mySharedConverterData->staticData->subChar1; | |
1005 | myUConverter->subCharLen = mySharedConverterData->staticData->subCharLen; | |
1006 | myUConverter->subChars = (uint8_t *)myUConverter->subUChars; | |
1007 | uprv_memcpy(myUConverter->subChars, mySharedConverterData->staticData->subChar, myUConverter->subCharLen); | |
1008 | myUConverter->toUCallbackReason = UCNV_ILLEGAL; /* default reason to invoke (*fromCharErrorBehaviour) */ | |
1009 | } | |
73c04bcf A |
1010 | |
1011 | if(mySharedConverterData->impl->open != NULL) { | |
729e4ab9 A |
1012 | mySharedConverterData->impl->open(myUConverter, pArgs, err); |
1013 | if(U_FAILURE(*err) && !pArgs->onlyTestIsLoadable) { | |
1014 | /* don't ucnv_close() if onlyTestIsLoadable because not fully initialized */ | |
b75a7d8f A |
1015 | ucnv_close(myUConverter); |
1016 | return NULL; | |
1017 | } | |
1018 | } | |
1019 | ||
1020 | return myUConverter; | |
1021 | } | |
1022 | ||
1023 | /*Frees all shared immutable objects that aren't referred to (reference count = 0) | |
1024 | */ | |
1025 | U_CAPI int32_t U_EXPORT2 | |
1026 | ucnv_flushCache () | |
1027 | { | |
1028 | UConverterSharedData *mySharedData = NULL; | |
374ca955 | 1029 | int32_t pos; |
b75a7d8f A |
1030 | int32_t tableDeletedNum = 0; |
1031 | const UHashElement *e; | |
729e4ab9 | 1032 | /*UErrorCode status = U_ILLEGAL_ARGUMENT_ERROR;*/ |
374ca955 A |
1033 | int32_t i, remaining; |
1034 | ||
1035 | UTRACE_ENTRY_OC(UTRACE_UCNV_FLUSH_CACHE); | |
b75a7d8f A |
1036 | |
1037 | /* Close the default converter without creating a new one so that everything will be flushed. */ | |
729e4ab9 | 1038 | u_flushDefaultConverter(); |
b75a7d8f A |
1039 | |
1040 | /*if shared data hasn't even been lazy evaluated yet | |
1041 | * return 0 | |
1042 | */ | |
374ca955 A |
1043 | if (SHARED_DATA_HASHTABLE == NULL) { |
1044 | UTRACE_EXIT_VALUE((int32_t)0); | |
b75a7d8f | 1045 | return 0; |
374ca955 | 1046 | } |
b75a7d8f A |
1047 | |
1048 | /*creates an enumeration to iterate through every element in the | |
1049 | * table | |
1050 | * | |
1051 | * Synchronization: holding cnvCacheMutex will prevent any other thread from | |
1052 | * accessing or modifying the hash table during the iteration. | |
1053 | * The reference count of an entry may be decremented by | |
1054 | * ucnv_close while the iteration is in process, but this is | |
1055 | * benign. It can't be incremented (in ucnv_createConverter()) | |
1056 | * because the sequence of looking up in the cache + incrementing | |
1057 | * is protected by cnvCacheMutex. | |
1058 | */ | |
1059 | umtx_lock(&cnvCacheMutex); | |
374ca955 A |
1060 | /* |
1061 | * double loop: A delta/extension-only converter has a pointer to its base table's | |
1062 | * shared data; the first iteration of the outer loop may see the delta converter | |
1063 | * before the base converter, and unloading the delta converter may get the base | |
1064 | * converter's reference counter down to 0. | |
1065 | */ | |
1066 | i = 0; | |
1067 | do { | |
1068 | remaining = 0; | |
b331163b | 1069 | pos = UHASH_FIRST; |
374ca955 | 1070 | while ((e = uhash_nextElement (SHARED_DATA_HASHTABLE, &pos)) != NULL) |
b75a7d8f | 1071 | { |
374ca955 A |
1072 | mySharedData = (UConverterSharedData *) e->value.pointer; |
1073 | /*deletes only if reference counter == 0 */ | |
1074 | if (mySharedData->referenceCounter == 0) | |
1075 | { | |
1076 | tableDeletedNum++; | |
1077 | ||
1078 | UCNV_DEBUG_LOG("del",mySharedData->staticData->name,mySharedData); | |
1079 | ||
1080 | uhash_removeElement(SHARED_DATA_HASHTABLE, e); | |
1081 | mySharedData->sharedDataCached = FALSE; | |
1082 | ucnv_deleteSharedConverterData (mySharedData); | |
1083 | } else { | |
1084 | ++remaining; | |
1085 | } | |
b75a7d8f | 1086 | } |
374ca955 | 1087 | } while(++i == 1 && remaining > 0); |
b75a7d8f A |
1088 | umtx_unlock(&cnvCacheMutex); |
1089 | ||
374ca955 A |
1090 | UTRACE_DATA1(UTRACE_INFO, "ucnv_flushCache() exits with %d converters remaining", remaining); |
1091 | ||
374ca955 | 1092 | UTRACE_EXIT_VALUE(tableDeletedNum); |
b75a7d8f A |
1093 | return tableDeletedNum; |
1094 | } | |
1095 | ||
73c04bcf A |
1096 | /* available converters list --------------------------------------------------- */ |
1097 | ||
57a6839d A |
1098 | static void U_CALLCONV initAvailableConvertersList(UErrorCode &errCode) { |
1099 | U_ASSERT(gAvailableConverterCount == 0); | |
1100 | U_ASSERT(gAvailableConverters == NULL); | |
73c04bcf | 1101 | |
57a6839d A |
1102 | ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); |
1103 | UEnumeration *allConvEnum = ucnv_openAllNames(&errCode); | |
1104 | int32_t allConverterCount = uenum_count(allConvEnum, &errCode); | |
1105 | if (U_FAILURE(errCode)) { | |
1106 | return; | |
1107 | } | |
73c04bcf | 1108 | |
57a6839d A |
1109 | /* We can't have more than "*converterTable" converters to open */ |
1110 | gAvailableConverters = (const char **) uprv_malloc(allConverterCount * sizeof(char*)); | |
1111 | if (!gAvailableConverters) { | |
1112 | errCode = U_MEMORY_ALLOCATION_ERROR; | |
1113 | return; | |
1114 | } | |
46f4442e | 1115 | |
57a6839d A |
1116 | /* Open the default converter to make sure that it has first dibs in the hash table. */ |
1117 | UErrorCode localStatus = U_ZERO_ERROR; | |
1118 | UConverter tempConverter; | |
1119 | ucnv_close(ucnv_createConverter(&tempConverter, NULL, &localStatus)); | |
73c04bcf | 1120 | |
57a6839d | 1121 | gAvailableConverterCount = 0; |
73c04bcf | 1122 | |
57a6839d A |
1123 | for (int32_t idx = 0; idx < allConverterCount; idx++) { |
1124 | localStatus = U_ZERO_ERROR; | |
1125 | const char *converterName = uenum_next(allConvEnum, NULL, &localStatus); | |
1126 | if (ucnv_canCreateConverter(converterName, &localStatus)) { | |
1127 | gAvailableConverters[gAvailableConverterCount++] = converterName; | |
73c04bcf | 1128 | } |
73c04bcf | 1129 | } |
57a6839d A |
1130 | |
1131 | uenum_close(allConvEnum); | |
1132 | } | |
1133 | ||
1134 | ||
1135 | static UBool haveAvailableConverterList(UErrorCode *pErrorCode) { | |
1136 | umtx_initOnce(gAvailableConvertersInitOnce, &initAvailableConvertersList, *pErrorCode); | |
1137 | return U_SUCCESS(*pErrorCode); | |
73c04bcf A |
1138 | } |
1139 | ||
1140 | U_CFUNC uint16_t | |
1141 | ucnv_bld_countAvailableConverters(UErrorCode *pErrorCode) { | |
1142 | if (haveAvailableConverterList(pErrorCode)) { | |
1143 | return gAvailableConverterCount; | |
1144 | } | |
1145 | return 0; | |
1146 | } | |
1147 | ||
1148 | U_CFUNC const char * | |
1149 | ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) { | |
1150 | if (haveAvailableConverterList(pErrorCode)) { | |
1151 | if (n < gAvailableConverterCount) { | |
1152 | return gAvailableConverters[n]; | |
1153 | } | |
1154 | *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; | |
1155 | } | |
1156 | return NULL; | |
1157 | } | |
1158 | ||
1159 | /* default converter name --------------------------------------------------- */ | |
1160 | ||
729e4ab9 | 1161 | #if !U_CHARSET_IS_UTF8 |
46f4442e A |
1162 | /* |
1163 | Copy the canonical converter name. | |
1164 | ucnv_getDefaultName must be thread safe, which can call this function. | |
1165 | ||
1166 | ucnv_setDefaultName calls this function and it doesn't have to be | |
1167 | thread safe because there is no reliable/safe way to reset the | |
1168 | converter in use in all threads. If you did reset the converter, you | |
1169 | would not be sure that retrieving a default converter for one string | |
1170 | would be the same type of default converter for a successive string. | |
1171 | Since the name is a returned via ucnv_getDefaultName without copying, | |
1172 | you shouldn't be modifying or deleting the string from a separate thread. | |
1173 | */ | |
4388f060 | 1174 | static inline void |
73c04bcf | 1175 | internalSetName(const char *name, UErrorCode *status) { |
729e4ab9 | 1176 | UConverterNamePieces stackPieces; |
4388f060 | 1177 | UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; |
73c04bcf A |
1178 | int32_t length=(int32_t)(uprv_strlen(name)); |
1179 | UBool containsOption = (UBool)(uprv_strchr(name, UCNV_OPTION_SEP_CHAR) != NULL); | |
1180 | const UConverterSharedData *algorithmicSharedData; | |
1181 | ||
729e4ab9 | 1182 | stackArgs.name = name; |
73c04bcf | 1183 | if(containsOption) { |
729e4ab9 A |
1184 | stackPieces.cnvName[0] = 0; |
1185 | stackPieces.locale[0] = 0; | |
1186 | stackPieces.options = 0; | |
1187 | parseConverterOptions(name, &stackPieces, &stackArgs, status); | |
1188 | if(U_FAILURE(*status)) { | |
1189 | return; | |
1190 | } | |
73c04bcf | 1191 | } |
729e4ab9 | 1192 | algorithmicSharedData = getAlgorithmicTypeFromName(stackArgs.name); |
73c04bcf A |
1193 | |
1194 | umtx_lock(&cnvCacheMutex); | |
1195 | ||
46f4442e A |
1196 | gDefaultAlgorithmicSharedData = algorithmicSharedData; |
1197 | gDefaultConverterContainsOption = containsOption; | |
73c04bcf A |
1198 | uprv_memcpy(gDefaultConverterNameBuffer, name, length); |
1199 | gDefaultConverterNameBuffer[length]=0; | |
729e4ab9 A |
1200 | |
1201 | /* gDefaultConverterName MUST be the last global var set by this function. */ | |
1202 | /* It is the variable checked in ucnv_getDefaultName() to see if initialization is required. */ | |
57a6839d A |
1203 | // But there is nothing here preventing that from being reordered, either by the compiler |
1204 | // or hardware. I'm adding the mutex to ucnv_getDefaultName for now. UMTX_CHECK is not enough. | |
1205 | // -- Andy | |
73c04bcf | 1206 | gDefaultConverterName = gDefaultConverterNameBuffer; |
73c04bcf A |
1207 | |
1208 | ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); | |
1209 | ||
1210 | umtx_unlock(&cnvCacheMutex); | |
1211 | } | |
729e4ab9 | 1212 | #endif |
73c04bcf A |
1213 | |
1214 | /* | |
1215 | * In order to be really thread-safe, the get function would have to take | |
1216 | * a buffer parameter and copy the current string inside a mutex block. | |
1217 | * This implementation only tries to be really thread-safe while | |
1218 | * setting the name. | |
1219 | * It assumes that setting a pointer is atomic. | |
1220 | */ | |
1221 | ||
1222 | U_CAPI const char* U_EXPORT2 | |
1223 | ucnv_getDefaultName() { | |
729e4ab9 A |
1224 | #if U_CHARSET_IS_UTF8 |
1225 | return "UTF-8"; | |
1226 | #else | |
73c04bcf A |
1227 | /* local variable to be thread-safe */ |
1228 | const char *name; | |
1229 | ||
46f4442e | 1230 | /* |
57a6839d | 1231 | Concurrent calls to ucnv_getDefaultName must be thread safe, |
46f4442e A |
1232 | but ucnv_setDefaultName is not thread safe. |
1233 | */ | |
57a6839d A |
1234 | { |
1235 | icu::Mutex lock(&cnvCacheMutex); | |
1236 | name = gDefaultConverterName; | |
1237 | } | |
73c04bcf A |
1238 | if(name==NULL) { |
1239 | UErrorCode errorCode = U_ZERO_ERROR; | |
1240 | UConverter *cnv = NULL; | |
1241 | ||
1242 | name = uprv_getDefaultCodepage(); | |
1243 | ||
1244 | /* if the name is there, test it out and get the canonical name with options */ | |
1245 | if(name != NULL) { | |
1246 | cnv = ucnv_open(name, &errorCode); | |
1247 | if(U_SUCCESS(errorCode) && cnv != NULL) { | |
1248 | name = ucnv_getName(cnv, &errorCode); | |
1249 | } | |
1250 | } | |
1251 | ||
1252 | if(name == NULL || name[0] == 0 | |
1253 | || U_FAILURE(errorCode) || cnv == NULL | |
1254 | || uprv_strlen(name)>=sizeof(gDefaultConverterNameBuffer)) | |
1255 | { | |
1256 | /* Panic time, let's use a fallback. */ | |
1257 | #if (U_CHARSET_FAMILY == U_ASCII_FAMILY) | |
1258 | name = "US-ASCII"; | |
1259 | /* there is no 'algorithmic' converter for EBCDIC */ | |
4388f060 | 1260 | #elif U_PLATFORM == U_PF_OS390 |
73c04bcf A |
1261 | name = "ibm-1047_P100-1995" UCNV_SWAP_LFNL_OPTION_STRING; |
1262 | #else | |
1263 | name = "ibm-37_P100-1995"; | |
1264 | #endif | |
1265 | } | |
1266 | ||
1267 | internalSetName(name, &errorCode); | |
1268 | ||
1269 | /* The close may make the current name go away. */ | |
1270 | ucnv_close(cnv); | |
1271 | } | |
1272 | ||
1273 | return name; | |
729e4ab9 | 1274 | #endif |
73c04bcf A |
1275 | } |
1276 | ||
51004dcb A |
1277 | #if U_CHARSET_IS_UTF8 |
1278 | U_CAPI void U_EXPORT2 ucnv_setDefaultName(const char *) {} | |
1279 | #else | |
46f4442e A |
1280 | /* |
1281 | This function is not thread safe, and it can't be thread safe. | |
1282 | See internalSetName or the API reference for details. | |
1283 | */ | |
73c04bcf A |
1284 | U_CAPI void U_EXPORT2 |
1285 | ucnv_setDefaultName(const char *converterName) { | |
1286 | if(converterName==NULL) { | |
1287 | /* reset to the default codepage */ | |
73c04bcf | 1288 | gDefaultConverterName=NULL; |
73c04bcf A |
1289 | } else { |
1290 | UErrorCode errorCode = U_ZERO_ERROR; | |
1291 | UConverter *cnv = NULL; | |
1292 | const char *name = NULL; | |
1293 | ||
1294 | /* if the name is there, test it out and get the canonical name with options */ | |
1295 | cnv = ucnv_open(converterName, &errorCode); | |
1296 | if(U_SUCCESS(errorCode) && cnv != NULL) { | |
1297 | name = ucnv_getName(cnv, &errorCode); | |
1298 | } | |
1299 | ||
1300 | if(U_SUCCESS(errorCode) && name!=NULL) { | |
1301 | internalSetName(name, &errorCode); | |
1302 | } | |
1303 | /* else this converter is bad to use. Don't change it to a bad value. */ | |
1304 | ||
1305 | /* The close may make the current name go away. */ | |
1306 | ucnv_close(cnv); | |
729e4ab9 A |
1307 | |
1308 | /* reset the converter cache */ | |
1309 | u_flushDefaultConverter(); | |
73c04bcf A |
1310 | } |
1311 | } | |
51004dcb | 1312 | #endif |
73c04bcf | 1313 | |
374ca955 A |
1314 | /* data swapping ------------------------------------------------------------ */ |
1315 | ||
1316 | /* most of this might belong more properly into ucnvmbcs.c, but that is so large */ | |
1317 | ||
1318 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
1319 | ||
1320 | U_CAPI int32_t U_EXPORT2 | |
1321 | ucnv_swap(const UDataSwapper *ds, | |
1322 | const void *inData, int32_t length, void *outData, | |
1323 | UErrorCode *pErrorCode) { | |
1324 | const UDataInfo *pInfo; | |
1325 | int32_t headerSize; | |
1326 | ||
1327 | const uint8_t *inBytes; | |
1328 | uint8_t *outBytes; | |
1329 | ||
1330 | uint32_t offset, count, staticDataSize; | |
1331 | int32_t size; | |
1332 | ||
1333 | const UConverterStaticData *inStaticData; | |
1334 | UConverterStaticData *outStaticData; | |
1335 | ||
1336 | const _MBCSHeader *inMBCSHeader; | |
1337 | _MBCSHeader *outMBCSHeader; | |
1338 | _MBCSHeader mbcsHeader; | |
46f4442e A |
1339 | uint32_t mbcsHeaderLength; |
1340 | UBool noFromU=FALSE; | |
1341 | ||
374ca955 A |
1342 | uint8_t outputType; |
1343 | ||
46f4442e A |
1344 | int32_t maxFastUChar, mbcsIndexLength; |
1345 | ||
374ca955 A |
1346 | const int32_t *inExtIndexes; |
1347 | int32_t extOffset; | |
1348 | ||
1349 | /* udata_swapDataHeader checks the arguments */ | |
1350 | headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); | |
1351 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
1352 | return 0; | |
1353 | } | |
1354 | ||
1355 | /* check data format and format version */ | |
1356 | pInfo=(const UDataInfo *)((const char *)inData+4); | |
1357 | if(!( | |
1358 | pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ | |
1359 | pInfo->dataFormat[1]==0x6e && | |
1360 | pInfo->dataFormat[2]==0x76 && | |
1361 | pInfo->dataFormat[3]==0x74 && | |
1362 | pInfo->formatVersion[0]==6 && | |
1363 | pInfo->formatVersion[1]>=2 | |
1364 | )) { | |
1365 | udata_printError(ds, "ucnv_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not recognized as an ICU .cnv conversion table\n", | |
1366 | pInfo->dataFormat[0], pInfo->dataFormat[1], | |
1367 | pInfo->dataFormat[2], pInfo->dataFormat[3], | |
1368 | pInfo->formatVersion[0], pInfo->formatVersion[1]); | |
1369 | *pErrorCode=U_UNSUPPORTED_ERROR; | |
1370 | return 0; | |
1371 | } | |
1372 | ||
1373 | inBytes=(const uint8_t *)inData+headerSize; | |
1374 | outBytes=(uint8_t *)outData+headerSize; | |
1375 | ||
1376 | /* read the initial UConverterStaticData structure after the UDataInfo header */ | |
1377 | inStaticData=(const UConverterStaticData *)inBytes; | |
1378 | outStaticData=(UConverterStaticData *)outBytes; | |
1379 | ||
1380 | if(length<0) { | |
1381 | staticDataSize=ds->readUInt32(inStaticData->structSize); | |
1382 | } else { | |
1383 | length-=headerSize; | |
4388f060 | 1384 | if( length<(int32_t)sizeof(UConverterStaticData) || |
374ca955 A |
1385 | (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize)) |
1386 | ) { | |
1387 | udata_printError(ds, "ucnv_swap(): too few bytes (%d after header) for an ICU .cnv conversion table\n", | |
1388 | length); | |
1389 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | |
1390 | return 0; | |
1391 | } | |
1392 | } | |
1393 | ||
1394 | if(length>=0) { | |
1395 | /* swap the static data */ | |
1396 | if(inStaticData!=outStaticData) { | |
1397 | uprv_memcpy(outStaticData, inStaticData, staticDataSize); | |
1398 | } | |
1399 | ||
1400 | ds->swapArray32(ds, &inStaticData->structSize, 4, | |
1401 | &outStaticData->structSize, pErrorCode); | |
1402 | ds->swapArray32(ds, &inStaticData->codepage, 4, | |
1403 | &outStaticData->codepage, pErrorCode); | |
1404 | ||
73c04bcf | 1405 | ds->swapInvChars(ds, inStaticData->name, (int32_t)uprv_strlen(inStaticData->name), |
374ca955 A |
1406 | outStaticData->name, pErrorCode); |
1407 | if(U_FAILURE(*pErrorCode)) { | |
73c04bcf | 1408 | udata_printError(ds, "ucnv_swap(): error swapping converter name\n"); |
374ca955 A |
1409 | return 0; |
1410 | } | |
1411 | } | |
1412 | ||
1413 | inBytes+=staticDataSize; | |
1414 | outBytes+=staticDataSize; | |
1415 | if(length>=0) { | |
1416 | length-=(int32_t)staticDataSize; | |
1417 | } | |
1418 | ||
1419 | /* check for supported conversionType values */ | |
1420 | if(inStaticData->conversionType==UCNV_MBCS) { | |
1421 | /* swap MBCS data */ | |
1422 | inMBCSHeader=(const _MBCSHeader *)inBytes; | |
1423 | outMBCSHeader=(_MBCSHeader *)outBytes; | |
1424 | ||
4388f060 | 1425 | if(0<=length && length<(int32_t)sizeof(_MBCSHeader)) { |
73c04bcf A |
1426 | udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", |
1427 | length); | |
1428 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | |
1429 | return 0; | |
1430 | } | |
46f4442e A |
1431 | if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) { |
1432 | mbcsHeaderLength=MBCS_HEADER_V4_LENGTH; | |
1433 | } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 && | |
1434 | ((mbcsHeader.options=ds->readUInt32(inMBCSHeader->options))& | |
1435 | MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0 | |
1436 | ) { | |
1437 | mbcsHeaderLength=mbcsHeader.options&MBCS_OPT_LENGTH_MASK; | |
1438 | noFromU=(UBool)((mbcsHeader.options&MBCS_OPT_NO_FROM_U)!=0); | |
1439 | } else { | |
374ca955 A |
1440 | udata_printError(ds, "ucnv_swap(): unsupported _MBCSHeader.version %d.%d\n", |
1441 | inMBCSHeader->version[0], inMBCSHeader->version[1]); | |
1442 | *pErrorCode=U_UNSUPPORTED_ERROR; | |
1443 | return 0; | |
1444 | } | |
1445 | ||
1446 | uprv_memcpy(mbcsHeader.version, inMBCSHeader->version, 4); | |
1447 | mbcsHeader.countStates= ds->readUInt32(inMBCSHeader->countStates); | |
1448 | mbcsHeader.countToUFallbacks= ds->readUInt32(inMBCSHeader->countToUFallbacks); | |
1449 | mbcsHeader.offsetToUCodeUnits= ds->readUInt32(inMBCSHeader->offsetToUCodeUnits); | |
1450 | mbcsHeader.offsetFromUTable= ds->readUInt32(inMBCSHeader->offsetFromUTable); | |
1451 | mbcsHeader.offsetFromUBytes= ds->readUInt32(inMBCSHeader->offsetFromUBytes); | |
1452 | mbcsHeader.flags= ds->readUInt32(inMBCSHeader->flags); | |
1453 | mbcsHeader.fromUBytesLength= ds->readUInt32(inMBCSHeader->fromUBytesLength); | |
46f4442e | 1454 | /* mbcsHeader.options have been read above */ |
374ca955 | 1455 | |
73c04bcf | 1456 | extOffset=(int32_t)(mbcsHeader.flags>>8); |
374ca955 | 1457 | outputType=(uint8_t)mbcsHeader.flags; |
46f4442e A |
1458 | if(noFromU && outputType==MBCS_OUTPUT_1) { |
1459 | udata_printError(ds, "ucnv_swap(): unsupported combination of makeconv --small with SBCS\n"); | |
1460 | *pErrorCode=U_UNSUPPORTED_ERROR; | |
1461 | return 0; | |
1462 | } | |
374ca955 A |
1463 | |
1464 | /* make sure that the output type is known */ | |
1465 | switch(outputType) { | |
1466 | case MBCS_OUTPUT_1: | |
1467 | case MBCS_OUTPUT_2: | |
1468 | case MBCS_OUTPUT_3: | |
1469 | case MBCS_OUTPUT_4: | |
1470 | case MBCS_OUTPUT_3_EUC: | |
1471 | case MBCS_OUTPUT_4_EUC: | |
1472 | case MBCS_OUTPUT_2_SISO: | |
1473 | case MBCS_OUTPUT_EXT_ONLY: | |
1474 | /* OK */ | |
1475 | break; | |
1476 | default: | |
1477 | udata_printError(ds, "ucnv_swap(): unsupported MBCS output type 0x%x\n", | |
1478 | outputType); | |
1479 | *pErrorCode=U_UNSUPPORTED_ERROR; | |
1480 | return 0; | |
1481 | } | |
1482 | ||
1483 | /* calculate the length of the MBCS data */ | |
46f4442e A |
1484 | |
1485 | /* | |
1486 | * utf8Friendly MBCS files (mbcsHeader.version 4.3) | |
1487 | * contain an additional mbcsIndex table: | |
1488 | * uint16_t[(maxFastUChar+1)>>6]; | |
1489 | * where maxFastUChar=((mbcsHeader.version[2]<<8)|0xff). | |
1490 | */ | |
1491 | maxFastUChar=0; | |
1492 | mbcsIndexLength=0; | |
1493 | if( outputType!=MBCS_OUTPUT_EXT_ONLY && outputType!=MBCS_OUTPUT_1 && | |
1494 | mbcsHeader.version[1]>=3 && (maxFastUChar=mbcsHeader.version[2])!=0 | |
1495 | ) { | |
1496 | maxFastUChar=(maxFastUChar<<8)|0xff; | |
1497 | mbcsIndexLength=((maxFastUChar+1)>>6)*2; /* number of bytes */ | |
1498 | } | |
1499 | ||
374ca955 | 1500 | if(extOffset==0) { |
46f4442e A |
1501 | size=(int32_t)(mbcsHeader.offsetFromUBytes+mbcsIndexLength); |
1502 | if(!noFromU) { | |
1503 | size+=(int32_t)mbcsHeader.fromUBytesLength; | |
1504 | } | |
374ca955 A |
1505 | |
1506 | /* avoid compiler warnings - not otherwise necessary, and the value does not matter */ | |
1507 | inExtIndexes=NULL; | |
1508 | } else { | |
1509 | /* there is extension data after the base data, see ucnv_ext.h */ | |
1510 | if(length>=0 && length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) { | |
1511 | udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n", | |
1512 | length); | |
1513 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | |
1514 | return 0; | |
1515 | } | |
1516 | ||
1517 | inExtIndexes=(const int32_t *)(inBytes+extOffset); | |
1518 | size=extOffset+udata_readInt32(ds, inExtIndexes[UCNV_EXT_SIZE]); | |
1519 | } | |
1520 | ||
1521 | if(length>=0) { | |
1522 | if(length<size) { | |
1523 | udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", | |
1524 | length); | |
1525 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | |
1526 | return 0; | |
1527 | } | |
1528 | ||
1529 | /* copy the data for inaccessible bytes */ | |
1530 | if(inBytes!=outBytes) { | |
1531 | uprv_memcpy(outBytes, inBytes, size); | |
1532 | } | |
1533 | ||
46f4442e A |
1534 | /* swap the MBCSHeader, except for the version field */ |
1535 | count=mbcsHeaderLength*4; | |
1536 | ds->swapArray32(ds, &inMBCSHeader->countStates, count-4, | |
374ca955 A |
1537 | &outMBCSHeader->countStates, pErrorCode); |
1538 | ||
1539 | if(outputType==MBCS_OUTPUT_EXT_ONLY) { | |
1540 | /* | |
1541 | * extension-only file, | |
1542 | * contains a base name instead of normal base table data | |
1543 | */ | |
1544 | ||
1545 | /* swap the base name, between the header and the extension data */ | |
46f4442e A |
1546 | const char *inBaseName=(const char *)inBytes+count; |
1547 | char *outBaseName=(char *)outBytes+count; | |
1548 | ds->swapInvChars(ds, inBaseName, (int32_t)uprv_strlen(inBaseName), | |
1549 | outBaseName, pErrorCode); | |
374ca955 A |
1550 | } else { |
1551 | /* normal file with base table data */ | |
1552 | ||
1553 | /* swap the state table, 1kB per state */ | |
46f4442e A |
1554 | offset=count; |
1555 | count=mbcsHeader.countStates*1024; | |
1556 | ds->swapArray32(ds, inBytes+offset, (int32_t)count, | |
1557 | outBytes+offset, pErrorCode); | |
374ca955 A |
1558 | |
1559 | /* swap the toUFallbacks[] */ | |
46f4442e A |
1560 | offset+=count; |
1561 | count=mbcsHeader.countToUFallbacks*8; | |
1562 | ds->swapArray32(ds, inBytes+offset, (int32_t)count, | |
374ca955 A |
1563 | outBytes+offset, pErrorCode); |
1564 | ||
1565 | /* swap the unicodeCodeUnits[] */ | |
1566 | offset=mbcsHeader.offsetToUCodeUnits; | |
1567 | count=mbcsHeader.offsetFromUTable-offset; | |
1568 | ds->swapArray16(ds, inBytes+offset, (int32_t)count, | |
1569 | outBytes+offset, pErrorCode); | |
1570 | ||
1571 | /* offset to the stage 1 table, independent of the outputType */ | |
1572 | offset=mbcsHeader.offsetFromUTable; | |
1573 | ||
1574 | if(outputType==MBCS_OUTPUT_1) { | |
1575 | /* SBCS: swap the fromU tables, all 16 bits wide */ | |
1576 | count=(mbcsHeader.offsetFromUBytes-offset)+mbcsHeader.fromUBytesLength; | |
1577 | ds->swapArray16(ds, inBytes+offset, (int32_t)count, | |
1578 | outBytes+offset, pErrorCode); | |
1579 | } else { | |
1580 | /* otherwise: swap the stage tables separately */ | |
1581 | ||
1582 | /* stage 1 table: uint16_t[0x440 or 0x40] */ | |
1583 | if(inStaticData->unicodeMask&UCNV_HAS_SUPPLEMENTARY) { | |
1584 | count=0x440*2; /* for all of Unicode */ | |
1585 | } else { | |
1586 | count=0x40*2; /* only BMP */ | |
1587 | } | |
1588 | ds->swapArray16(ds, inBytes+offset, (int32_t)count, | |
1589 | outBytes+offset, pErrorCode); | |
1590 | ||
1591 | /* stage 2 table: uint32_t[] */ | |
1592 | offset+=count; | |
1593 | count=mbcsHeader.offsetFromUBytes-offset; | |
1594 | ds->swapArray32(ds, inBytes+offset, (int32_t)count, | |
1595 | outBytes+offset, pErrorCode); | |
1596 | ||
1597 | /* stage 3/result bytes: sometimes uint16_t[] or uint32_t[] */ | |
1598 | offset=mbcsHeader.offsetFromUBytes; | |
46f4442e | 1599 | count= noFromU ? 0 : mbcsHeader.fromUBytesLength; |
374ca955 A |
1600 | switch(outputType) { |
1601 | case MBCS_OUTPUT_2: | |
1602 | case MBCS_OUTPUT_3_EUC: | |
1603 | case MBCS_OUTPUT_2_SISO: | |
1604 | ds->swapArray16(ds, inBytes+offset, (int32_t)count, | |
1605 | outBytes+offset, pErrorCode); | |
1606 | break; | |
1607 | case MBCS_OUTPUT_4: | |
1608 | ds->swapArray32(ds, inBytes+offset, (int32_t)count, | |
1609 | outBytes+offset, pErrorCode); | |
1610 | break; | |
1611 | default: | |
1612 | /* just uint8_t[], nothing to swap */ | |
1613 | break; | |
1614 | } | |
46f4442e A |
1615 | |
1616 | if(mbcsIndexLength!=0) { | |
1617 | offset+=count; | |
1618 | count=mbcsIndexLength; | |
1619 | ds->swapArray16(ds, inBytes+offset, (int32_t)count, | |
1620 | outBytes+offset, pErrorCode); | |
1621 | } | |
374ca955 A |
1622 | } |
1623 | } | |
1624 | ||
1625 | if(extOffset!=0) { | |
1626 | /* swap the extension data */ | |
1627 | inBytes+=extOffset; | |
1628 | outBytes+=extOffset; | |
1629 | ||
1630 | /* swap toUTable[] */ | |
1631 | offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_INDEX]); | |
1632 | length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_LENGTH]); | |
1633 | ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); | |
1634 | ||
1635 | /* swap toUUChars[] */ | |
1636 | offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_INDEX]); | |
1637 | length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_LENGTH]); | |
1638 | ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); | |
1639 | ||
1640 | /* swap fromUTableUChars[] */ | |
1641 | offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_UCHARS_INDEX]); | |
1642 | length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_LENGTH]); | |
1643 | ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); | |
1644 | ||
1645 | /* swap fromUTableValues[] */ | |
1646 | offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_VALUES_INDEX]); | |
1647 | /* same length as for fromUTableUChars[] */ | |
1648 | ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); | |
1649 | ||
1650 | /* no need to swap fromUBytes[] */ | |
1651 | ||
1652 | /* swap fromUStage12[] */ | |
1653 | offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_INDEX]); | |
1654 | length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_LENGTH]); | |
1655 | ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); | |
1656 | ||
1657 | /* swap fromUStage3[] */ | |
1658 | offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_INDEX]); | |
1659 | length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_LENGTH]); | |
1660 | ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); | |
1661 | ||
1662 | /* swap fromUStage3b[] */ | |
1663 | offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_INDEX]); | |
1664 | length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_LENGTH]); | |
1665 | ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); | |
1666 | ||
1667 | /* swap indexes[] */ | |
1668 | length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_INDEXES_LENGTH]); | |
1669 | ds->swapArray32(ds, inBytes, length*4, outBytes, pErrorCode); | |
1670 | } | |
1671 | } | |
1672 | } else { | |
1673 | udata_printError(ds, "ucnv_swap(): unknown conversionType=%d!=UCNV_MBCS\n", | |
1674 | inStaticData->conversionType); | |
1675 | *pErrorCode=U_UNSUPPORTED_ERROR; | |
1676 | return 0; | |
1677 | } | |
1678 | ||
1679 | return headerSize+(int32_t)staticDataSize+size; | |
1680 | } | |
1681 | ||
1682 | #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ | |
1683 | ||
1684 | #endif |