]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ******************************************************************** | |
374ca955 | 3 | * COPYRIGHT: |
4388f060 | 4 | * Copyright (c) 1996-2011, International Business Machines Corporation and |
b75a7d8f A |
5 | * others. All Rights Reserved. |
6 | ******************************************************************** | |
7 | * | |
4388f060 | 8 | * uconv_bld.cpp: |
b75a7d8f A |
9 | * |
10 | * Defines functions that are used in the creation/initialization/deletion | |
11 | * of converters and related structures. | |
12 | * uses uconv_io.h routines to access disk information | |
13 | * is used by ucnv.h to implement public API create/delete/flushCache routines | |
14 | * Modification History: | |
374ca955 | 15 | * |
b75a7d8f | 16 | * Date Name Description |
374ca955 | 17 | * |
b75a7d8f A |
18 | * 06/20/2000 helena OS/400 port changes; mostly typecast. |
19 | * 06/29/2000 helena Major rewrite of the callback interface. | |
20 | */ | |
21 | ||
374ca955 A |
22 | #include "unicode/utypes.h" |
23 | ||
24 | #if !UCONFIG_NO_CONVERSION | |
b75a7d8f | 25 | |
73c04bcf | 26 | #include "unicode/putil.h" |
b75a7d8f A |
27 | #include "unicode/udata.h" |
28 | #include "unicode/ucnv.h" | |
b75a7d8f | 29 | #include "unicode/uloc.h" |
4388f060 | 30 | #include "putilimp.h" |
374ca955 | 31 | #include "utracimp.h" |
b75a7d8f A |
32 | #include "ucnv_io.h" |
33 | #include "ucnv_bld.h" | |
374ca955 A |
34 | #include "ucnvmbcs.h" |
35 | #include "ucnv_ext.h" | |
b75a7d8f A |
36 | #include "ucnv_cnv.h" |
37 | #include "ucnv_imp.h" | |
38 | #include "uhash.h" | |
39 | #include "umutex.h" | |
40 | #include "cstring.h" | |
41 | #include "cmemory.h" | |
42 | #include "ucln_cmn.h" | |
374ca955 | 43 | #include "ustr_cnv.h" |
b75a7d8f A |
44 | |
45 | ||
46 | ||
47 | #if 0 | |
48 | #include <stdio.h> | |
49 | extern void UCNV_DEBUG_LOG(char *what, char *who, void *p, int l); | |
50 | #define UCNV_DEBUG_LOG(x,y,z) UCNV_DEBUG_LOG(x,y,z,__LINE__) | |
51 | #else | |
52 | # define UCNV_DEBUG_LOG(x,y,z) | |
53 | #endif | |
54 | ||
55 | static const UConverterSharedData * const | |
56 | converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={ | |
57 | NULL, NULL, | |
58 | ||
59 | #if UCONFIG_NO_LEGACY_CONVERSION | |
60 | NULL, | |
61 | #else | |
62 | &_MBCSData, | |
63 | #endif | |
64 | ||
65 | &_Latin1Data, | |
66 | &_UTF8Data, &_UTF16BEData, &_UTF16LEData, &_UTF32BEData, &_UTF32LEData, | |
67 | NULL, | |
68 | ||
69 | #if UCONFIG_NO_LEGACY_CONVERSION | |
70 | NULL, | |
71 | NULL, NULL, NULL, NULL, NULL, NULL, | |
72 | NULL, NULL, NULL, NULL, NULL, NULL, | |
73 | NULL, | |
74 | #else | |
75 | &_ISO2022Data, | |
76 | &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6, | |
77 | &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19, | |
78 | &_HZData, | |
79 | #endif | |
80 | ||
81 | &_SCSUData, | |
82 | ||
83 | #if UCONFIG_NO_LEGACY_CONVERSION | |
84 | NULL, | |
85 | #else | |
86 | &_ISCIIData, | |
87 | #endif | |
88 | ||
89 | &_ASCIIData, | |
4388f060 A |
90 | &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData, |
91 | ||
92 | #if UCONFIG_NO_LEGACY_CONVERSION | |
93 | NULL, | |
94 | #else | |
95 | &_CompoundTextData | |
96 | #endif | |
b75a7d8f A |
97 | }; |
98 | ||
99 | /* Please keep this in binary sorted order for getAlgorithmicTypeFromName. | |
100 | Also the name should be in lower case and all spaces, dashes and underscores | |
101 | removed | |
102 | */ | |
103 | static struct { | |
104 | const char *name; | |
105 | const UConverterType type; | |
106 | } const cnvNameType[] = { | |
107 | { "bocu1", UCNV_BOCU1 }, | |
108 | { "cesu8", UCNV_CESU8 }, | |
109 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
110 | { "hz",UCNV_HZ }, | |
374ca955 | 111 | #endif |
b75a7d8f | 112 | { "imapmailboxname", UCNV_IMAP_MAILBOX }, |
374ca955 | 113 | #if !UCONFIG_NO_LEGACY_CONVERSION |
b75a7d8f A |
114 | { "iscii", UCNV_ISCII }, |
115 | { "iso2022", UCNV_ISO_2022 }, | |
116 | #endif | |
117 | { "iso88591", UCNV_LATIN_1 }, | |
118 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
119 | { "lmbcs1", UCNV_LMBCS_1 }, | |
120 | { "lmbcs11",UCNV_LMBCS_11 }, | |
121 | { "lmbcs16",UCNV_LMBCS_16 }, | |
122 | { "lmbcs17",UCNV_LMBCS_17 }, | |
123 | { "lmbcs18",UCNV_LMBCS_18 }, | |
124 | { "lmbcs19",UCNV_LMBCS_19 }, | |
125 | { "lmbcs2", UCNV_LMBCS_2 }, | |
126 | { "lmbcs3", UCNV_LMBCS_3 }, | |
127 | { "lmbcs4", UCNV_LMBCS_4 }, | |
128 | { "lmbcs5", UCNV_LMBCS_5 }, | |
129 | { "lmbcs6", UCNV_LMBCS_6 }, | |
130 | { "lmbcs8", UCNV_LMBCS_8 }, | |
131 | #endif | |
132 | { "scsu", UCNV_SCSU }, | |
133 | { "usascii", UCNV_US_ASCII }, | |
134 | { "utf16", UCNV_UTF16 }, | |
135 | { "utf16be", UCNV_UTF16_BigEndian }, | |
136 | { "utf16le", UCNV_UTF16_LittleEndian }, | |
137 | #if U_IS_BIG_ENDIAN | |
138 | { "utf16oppositeendian", UCNV_UTF16_LittleEndian }, | |
139 | { "utf16platformendian", UCNV_UTF16_BigEndian }, | |
140 | #else | |
141 | { "utf16oppositeendian", UCNV_UTF16_BigEndian}, | |
142 | { "utf16platformendian", UCNV_UTF16_LittleEndian }, | |
143 | #endif | |
144 | { "utf32", UCNV_UTF32 }, | |
145 | { "utf32be", UCNV_UTF32_BigEndian }, | |
146 | { "utf32le", UCNV_UTF32_LittleEndian }, | |
147 | #if U_IS_BIG_ENDIAN | |
148 | { "utf32oppositeendian", UCNV_UTF32_LittleEndian }, | |
149 | { "utf32platformendian", UCNV_UTF32_BigEndian }, | |
150 | #else | |
151 | { "utf32oppositeendian", UCNV_UTF32_BigEndian }, | |
152 | { "utf32platformendian", UCNV_UTF32_LittleEndian }, | |
153 | #endif | |
154 | { "utf7", UCNV_UTF7 }, | |
4388f060 A |
155 | { "utf8", UCNV_UTF8 }, |
156 | { "x11compoundtext", UCNV_COMPOUND_TEXT} | |
b75a7d8f A |
157 | }; |
158 | ||
159 | ||
160 | /*initializes some global variables */ | |
161 | static UHashtable *SHARED_DATA_HASHTABLE = NULL; | |
162 | static UMTX cnvCacheMutex = NULL; /* Mutex for synchronizing cnv cache access. */ | |
163 | /* Note: the global mutex is used for */ | |
164 | /* reference count updates. */ | |
165 | ||
73c04bcf A |
166 | static const char **gAvailableConverters = NULL; |
167 | static uint16_t gAvailableConverterCount = 0; | |
168 | ||
729e4ab9 A |
169 | #if !U_CHARSET_IS_UTF8 |
170 | ||
46f4442e | 171 | /* This contains the resolved converter name. So no further alias lookup is needed again. */ |
73c04bcf A |
172 | static char gDefaultConverterNameBuffer[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; /* +1 for NULL */ |
173 | static const char *gDefaultConverterName = NULL; | |
46f4442e A |
174 | |
175 | /* | |
176 | If the default converter is an algorithmic converter, this is the cached value. | |
177 | We don't cache a full UConverter and clone it because ucnv_clone doesn't have | |
178 | less overhead than an algorithmic open. We don't cache non-algorithmic converters | |
179 | because ucnv_flushCache must be able to unload the default converter and its table. | |
180 | */ | |
73c04bcf | 181 | static const UConverterSharedData *gDefaultAlgorithmicSharedData = NULL; |
46f4442e A |
182 | |
183 | /* Does gDefaultConverterName have a converter option and require extra parsing? */ | |
73c04bcf A |
184 | static UBool gDefaultConverterContainsOption; |
185 | ||
729e4ab9 | 186 | #endif /* !U_CHARSET_IS_UTF8 */ |
b75a7d8f A |
187 | |
188 | static const char DATA_TYPE[] = "cnv"; | |
189 | ||
46f4442e A |
190 | static void |
191 | ucnv_flushAvailableConverterCache() { | |
192 | if (gAvailableConverters) { | |
193 | umtx_lock(&cnvCacheMutex); | |
194 | gAvailableConverterCount = 0; | |
195 | uprv_free((char **)gAvailableConverters); | |
196 | gAvailableConverters = NULL; | |
197 | umtx_unlock(&cnvCacheMutex); | |
198 | } | |
199 | } | |
200 | ||
201 | /* ucnv_cleanup - delete all storage held by the converter cache, except any */ | |
202 | /* in use by open converters. */ | |
203 | /* Not thread safe. */ | |
204 | /* Not supported API. */ | |
374ca955 | 205 | static UBool U_CALLCONV ucnv_cleanup(void) { |
46f4442e A |
206 | ucnv_flushCache(); |
207 | if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) { | |
208 | uhash_close(SHARED_DATA_HASHTABLE); | |
209 | SHARED_DATA_HASHTABLE = NULL; | |
b75a7d8f A |
210 | } |
211 | ||
46f4442e A |
212 | /* Isn't called from flushCache because other threads may have preexisting references to the table. */ |
213 | ucnv_flushAvailableConverterCache(); | |
73c04bcf | 214 | |
729e4ab9 | 215 | #if !U_CHARSET_IS_UTF8 |
73c04bcf A |
216 | gDefaultConverterName = NULL; |
217 | gDefaultConverterNameBuffer[0] = 0; | |
218 | gDefaultConverterContainsOption = FALSE; | |
219 | gDefaultAlgorithmicSharedData = NULL; | |
729e4ab9 | 220 | #endif |
73c04bcf | 221 | |
46f4442e A |
222 | umtx_destroy(&cnvCacheMutex); /* Don't worry about destroying the mutex even */ |
223 | /* if the hash table still exists. The mutex */ | |
224 | /* will lazily re-init itself if needed. */ | |
b75a7d8f A |
225 | return (SHARED_DATA_HASHTABLE == NULL); |
226 | } | |
227 | ||
b75a7d8f | 228 | static UBool U_CALLCONV |
4388f060 A |
229 | isCnvAcceptable(void * /*context*/, |
230 | const char * /*type*/, const char * /*name*/, | |
729e4ab9 | 231 | const UDataInfo *pInfo) { |
b75a7d8f A |
232 | return (UBool)( |
233 | pInfo->size>=20 && | |
234 | pInfo->isBigEndian==U_IS_BIG_ENDIAN && | |
235 | pInfo->charsetFamily==U_CHARSET_FAMILY && | |
236 | pInfo->sizeofUChar==U_SIZEOF_UCHAR && | |
237 | pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ | |
238 | pInfo->dataFormat[1]==0x6e && | |
239 | pInfo->dataFormat[2]==0x76 && | |
240 | pInfo->dataFormat[3]==0x74 && | |
241 | pInfo->formatVersion[0]==6); /* Everything will be version 6 */ | |
242 | } | |
243 | ||
244 | /** | |
245 | * Un flatten shared data from a UDATA.. | |
246 | */ | |
247 | static UConverterSharedData* | |
374ca955 | 248 | ucnv_data_unFlattenClone(UConverterLoadArgs *pArgs, UDataMemory *pData, UErrorCode *status) |
b75a7d8f A |
249 | { |
250 | /* UDataInfo info; -- necessary only if some converters have different formatVersion */ | |
251 | const uint8_t *raw = (const uint8_t *)udata_getMemory(pData); | |
252 | const UConverterStaticData *source = (const UConverterStaticData *) raw; | |
253 | UConverterSharedData *data; | |
254 | UConverterType type = (UConverterType)source->conversionType; | |
255 | ||
256 | if(U_FAILURE(*status)) | |
257 | return NULL; | |
258 | ||
259 | if( (uint16_t)type >= UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES || | |
260 | converterData[type] == NULL || | |
261 | converterData[type]->referenceCounter != 1 || | |
262 | source->structSize != sizeof(UConverterStaticData)) | |
263 | { | |
264 | *status = U_INVALID_TABLE_FORMAT; | |
265 | return NULL; | |
266 | } | |
267 | ||
268 | data = (UConverterSharedData *)uprv_malloc(sizeof(UConverterSharedData)); | |
269 | if(data == NULL) { | |
270 | *status = U_MEMORY_ALLOCATION_ERROR; | |
271 | return NULL; | |
272 | } | |
273 | ||
274 | /* copy initial values from the static structure for this type */ | |
275 | uprv_memcpy(data, converterData[type], sizeof(UConverterSharedData)); | |
276 | ||
374ca955 | 277 | #if 0 /* made UConverterMBCSTable part of UConverterSharedData -- markus 20031107 */ |
b75a7d8f A |
278 | /* |
279 | * It would be much more efficient if the table were a direct member, not a pointer. | |
280 | * However, that would add to the size of all UConverterSharedData objects | |
281 | * even if they do not use this table (especially algorithmic ones). | |
282 | * If this changes, then the static templates from converterData[type] | |
283 | * need more entries. | |
374ca955 A |
284 | * |
285 | * In principle, it would be cleaner if the load() function below | |
286 | * allocated the table. | |
b75a7d8f A |
287 | */ |
288 | data->table = (UConverterTable *)uprv_malloc(sizeof(UConverterTable)); | |
289 | if(data->table == NULL) { | |
290 | uprv_free(data); | |
291 | *status = U_MEMORY_ALLOCATION_ERROR; | |
292 | return NULL; | |
293 | } | |
294 | uprv_memset(data->table, 0, sizeof(UConverterTable)); | |
374ca955 A |
295 | #endif |
296 | ||
b75a7d8f | 297 | data->staticData = source; |
374ca955 | 298 | |
b75a7d8f A |
299 | data->sharedDataCached = FALSE; |
300 | ||
301 | /* fill in fields from the loaded data */ | |
302 | data->dataMemory = (void*)pData; /* for future use */ | |
303 | ||
304 | if(data->impl->load != NULL) { | |
374ca955 | 305 | data->impl->load(data, pArgs, raw + source->structSize, status); |
b75a7d8f A |
306 | if(U_FAILURE(*status)) { |
307 | uprv_free(data->table); | |
308 | uprv_free(data); | |
309 | return NULL; | |
310 | } | |
311 | } | |
312 | return data; | |
313 | } | |
314 | ||
315 | /*Takes an alias name gets an actual converter file name | |
316 | *goes to disk and opens it. | |
317 | *allocates the memory and returns a new UConverter object | |
318 | */ | |
374ca955 | 319 | static UConverterSharedData *createConverterFromFile(UConverterLoadArgs *pArgs, UErrorCode * err) |
b75a7d8f A |
320 | { |
321 | UDataMemory *data; | |
322 | UConverterSharedData *sharedData; | |
323 | ||
374ca955 A |
324 | UTRACE_ENTRY_OC(UTRACE_UCNV_LOAD); |
325 | ||
73c04bcf | 326 | if (U_FAILURE (*err)) { |
374ca955 | 327 | UTRACE_EXIT_STATUS(*err); |
b75a7d8f A |
328 | return NULL; |
329 | } | |
330 | ||
374ca955 A |
331 | UTRACE_DATA2(UTRACE_OPEN_CLOSE, "load converter %s from package %s", pArgs->name, pArgs->pkg); |
332 | ||
333 | data = udata_openChoice(pArgs->pkg, DATA_TYPE, pArgs->name, isCnvAcceptable, NULL, err); | |
b75a7d8f A |
334 | if(U_FAILURE(*err)) |
335 | { | |
374ca955 | 336 | UTRACE_EXIT_STATUS(*err); |
b75a7d8f A |
337 | return NULL; |
338 | } | |
339 | ||
374ca955 | 340 | sharedData = ucnv_data_unFlattenClone(pArgs, data, err); |
b75a7d8f A |
341 | if(U_FAILURE(*err)) |
342 | { | |
343 | udata_close(data); | |
374ca955 | 344 | UTRACE_EXIT_STATUS(*err); |
b75a7d8f A |
345 | return NULL; |
346 | } | |
347 | ||
374ca955 A |
348 | /* |
349 | * TODO Store pkg in a field in the shared data so that delta-only converters | |
350 | * can load base converters from the same package. | |
351 | * If the pkg name is longer than the field, then either do not load the converter | |
352 | * in the first place, or just set the pkg field to "". | |
353 | */ | |
354 | ||
355 | UTRACE_EXIT_PTR_STATUS(sharedData, *err); | |
b75a7d8f A |
356 | return sharedData; |
357 | } | |
358 | ||
b75a7d8f A |
359 | /*returns a converter type from a string |
360 | */ | |
361 | static const UConverterSharedData * | |
362 | getAlgorithmicTypeFromName(const char *realName) | |
363 | { | |
364 | uint32_t mid, start, limit; | |
374ca955 | 365 | uint32_t lastMid; |
b75a7d8f A |
366 | int result; |
367 | char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH]; | |
368 | ||
369 | /* Lower case and remove ignoreable characters. */ | |
370 | ucnv_io_stripForCompare(strippedName, realName); | |
371 | ||
372 | /* do a binary search for the alias */ | |
373 | start = 0; | |
374 | limit = sizeof(cnvNameType)/sizeof(cnvNameType[0]); | |
375 | mid = limit; | |
374ca955 | 376 | lastMid = UINT32_MAX; |
b75a7d8f A |
377 | |
378 | for (;;) { | |
379 | mid = (uint32_t)((start + limit) / 2); | |
374ca955 A |
380 | if (lastMid == mid) { /* Have we moved? */ |
381 | break; /* We haven't moved, and it wasn't found. */ | |
382 | } | |
383 | lastMid = mid; | |
b75a7d8f A |
384 | result = uprv_strcmp(strippedName, cnvNameType[mid].name); |
385 | ||
386 | if (result < 0) { | |
387 | limit = mid; | |
388 | } else if (result > 0) { | |
389 | start = mid; | |
390 | } else { | |
391 | return converterData[cnvNameType[mid].type]; | |
392 | } | |
393 | } | |
394 | ||
395 | return NULL; | |
396 | } | |
397 | ||
46f4442e A |
398 | /* |
399 | * Based on the number of known converters, this determines how many times larger | |
400 | * the shared data hash table should be. When on small platforms, or just a couple | |
401 | * of converters are used, this number should be 2. When memory is plentiful, or | |
402 | * when ucnv_countAvailable is ever used with a lot of available converters, | |
403 | * this should be 4. | |
404 | * Larger numbers reduce the number of hash collisions, but use more memory. | |
405 | */ | |
406 | #define UCNV_CACHE_LOAD_FACTOR 2 | |
407 | ||
b75a7d8f A |
408 | /* Puts the shared data in the static hashtable SHARED_DATA_HASHTABLE */ |
409 | /* Will always be called with the cnvCacheMutex alrady being held */ | |
410 | /* by the calling function. */ | |
411 | /* Stores the shared data in the SHARED_DATA_HASHTABLE | |
412 | * @param data The shared data | |
413 | */ | |
414 | static void | |
415 | ucnv_shareConverterData(UConverterSharedData * data) | |
416 | { | |
417 | UErrorCode err = U_ZERO_ERROR; | |
418 | /*Lazy evaluates the Hashtable itself */ | |
419 | /*void *sanity = NULL;*/ | |
420 | ||
421 | if (SHARED_DATA_HASHTABLE == NULL) | |
422 | { | |
73c04bcf | 423 | SHARED_DATA_HASHTABLE = uhash_openSize(uhash_hashChars, uhash_compareChars, NULL, |
46f4442e | 424 | ucnv_io_countKnownConverters(&err)*UCNV_CACHE_LOAD_FACTOR, |
b75a7d8f | 425 | &err); |
374ca955 A |
426 | ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); |
427 | ||
428 | if (U_FAILURE(err)) | |
b75a7d8f A |
429 | return; |
430 | } | |
431 | ||
432 | /* ### check to see if the element is not already there! */ | |
433 | ||
434 | /* | |
435 | sanity = ucnv_getSharedConverterData (data->staticData->name); | |
436 | if(sanity != NULL) | |
437 | { | |
438 | UCNV_DEBUG_LOG("put:overwrite!",data->staticData->name,sanity); | |
439 | } | |
440 | UCNV_DEBUG_LOG("put:chk",data->staticData->name,sanity); | |
441 | */ | |
374ca955 | 442 | |
b75a7d8f A |
443 | /* Mark it shared */ |
444 | data->sharedDataCached = TRUE; | |
445 | ||
446 | uhash_put(SHARED_DATA_HASHTABLE, | |
447 | (void*) data->staticData->name, /* Okay to cast away const as long as | |
448 | keyDeleter == NULL */ | |
449 | data, | |
450 | &err); | |
451 | UCNV_DEBUG_LOG("put", data->staticData->name,data); | |
452 | ||
453 | } | |
454 | ||
455 | /* Look up a converter name in the shared data cache. */ | |
456 | /* cnvCacheMutex must be held by the caller to protect the hash table. */ | |
457 | /* gets the shared data from the SHARED_DATA_HASHTABLE (might return NULL if it isn't there) | |
458 | * @param name The name of the shared data | |
459 | * @return the shared data from the SHARED_DATA_HASHTABLE | |
460 | */ | |
461 | static UConverterSharedData * | |
462 | ucnv_getSharedConverterData(const char *name) | |
463 | { | |
464 | /*special case when no Table has yet been created we return NULL */ | |
465 | if (SHARED_DATA_HASHTABLE == NULL) | |
466 | { | |
467 | return NULL; | |
468 | } | |
469 | else | |
470 | { | |
471 | UConverterSharedData *rc; | |
472 | ||
473 | rc = (UConverterSharedData*)uhash_get(SHARED_DATA_HASHTABLE, name); | |
474 | UCNV_DEBUG_LOG("get",name,rc); | |
475 | return rc; | |
476 | } | |
477 | } | |
478 | ||
479 | /*frees the string of memory blocks associates with a sharedConverter | |
480 | *if and only if the referenceCounter == 0 | |
481 | */ | |
482 | /* Deletes (frees) the Shared data it's passed. first it checks the referenceCounter to | |
483 | * see if anyone is using it, if not it frees all the memory stemming from sharedConverterData and | |
484 | * returns TRUE, | |
485 | * otherwise returns FALSE | |
486 | * @param sharedConverterData The shared data | |
487 | * @return if not it frees all the memory stemming from sharedConverterData and | |
488 | * returns TRUE, otherwise returns FALSE | |
489 | */ | |
490 | static UBool | |
491 | ucnv_deleteSharedConverterData(UConverterSharedData * deadSharedData) | |
492 | { | |
374ca955 A |
493 | UTRACE_ENTRY_OC(UTRACE_UCNV_UNLOAD); |
494 | UTRACE_DATA2(UTRACE_OPEN_CLOSE, "unload converter %s shared data %p", deadSharedData->staticData->name, deadSharedData); | |
495 | ||
496 | if (deadSharedData->referenceCounter > 0) { | |
497 | UTRACE_EXIT_VALUE((int32_t)FALSE); | |
b75a7d8f | 498 | return FALSE; |
374ca955 | 499 | } |
b75a7d8f A |
500 | |
501 | if (deadSharedData->impl->unload != NULL) { | |
502 | deadSharedData->impl->unload(deadSharedData); | |
503 | } | |
374ca955 | 504 | |
b75a7d8f A |
505 | if(deadSharedData->dataMemory != NULL) |
506 | { | |
507 | UDataMemory *data = (UDataMemory*)deadSharedData->dataMemory; | |
508 | udata_close(data); | |
509 | } | |
510 | ||
511 | if(deadSharedData->table != NULL) | |
512 | { | |
513 | uprv_free(deadSharedData->table); | |
514 | } | |
515 | ||
516 | #if 0 | |
517 | /* if the static data is actually owned by the shared data */ | |
518 | /* enable if we ever have this situation. */ | |
519 | if(deadSharedData->staticDataOwned == TRUE) /* see ucnv_bld.h */ | |
520 | { | |
521 | uprv_free((void*)deadSharedData->staticData); | |
522 | } | |
523 | #endif | |
524 | ||
525 | #if 0 | |
526 | /* Zap it ! */ | |
527 | uprv_memset(deadSharedData->0, sizeof(*deadSharedData)); | |
528 | #endif | |
529 | ||
530 | uprv_free(deadSharedData); | |
374ca955 A |
531 | |
532 | UTRACE_EXIT_VALUE((int32_t)TRUE); | |
b75a7d8f A |
533 | return TRUE; |
534 | } | |
535 | ||
374ca955 A |
536 | /** |
537 | * Load a non-algorithmic converter. | |
538 | * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex). | |
539 | */ | |
540 | UConverterSharedData * | |
541 | ucnv_load(UConverterLoadArgs *pArgs, UErrorCode *err) { | |
542 | UConverterSharedData *mySharedConverterData; | |
543 | ||
544 | if(err == NULL || U_FAILURE(*err)) { | |
545 | return NULL; | |
546 | } | |
547 | ||
548 | if(pArgs->pkg != NULL && *pArgs->pkg != 0) { | |
549 | /* application-provided converters are not currently cached */ | |
550 | return createConverterFromFile(pArgs, err); | |
551 | } | |
552 | ||
553 | mySharedConverterData = ucnv_getSharedConverterData(pArgs->name); | |
554 | if (mySharedConverterData == NULL) | |
555 | { | |
556 | /*Not cached, we need to stream it in from file */ | |
557 | mySharedConverterData = createConverterFromFile(pArgs, err); | |
558 | if (U_FAILURE (*err) || (mySharedConverterData == NULL)) | |
559 | { | |
560 | return NULL; | |
561 | } | |
729e4ab9 | 562 | else if (!pArgs->onlyTestIsLoadable) |
374ca955 A |
563 | { |
564 | /* share it with other library clients */ | |
565 | ucnv_shareConverterData(mySharedConverterData); | |
566 | } | |
567 | } | |
568 | else | |
569 | { | |
570 | /* The data for this converter was already in the cache. */ | |
571 | /* Update the reference counter on the shared data: one more client */ | |
572 | mySharedConverterData->referenceCounter++; | |
573 | } | |
574 | ||
575 | return mySharedConverterData; | |
576 | } | |
577 | ||
578 | /** | |
579 | * Unload a non-algorithmic converter. | |
580 | * It must be sharedData->referenceCounter != ~0 | |
581 | * and this function must be called inside umtx_lock(&cnvCacheMutex). | |
582 | */ | |
4388f060 | 583 | U_CAPI void |
374ca955 A |
584 | ucnv_unload(UConverterSharedData *sharedData) { |
585 | if(sharedData != NULL) { | |
b75a7d8f A |
586 | if (sharedData->referenceCounter > 0) { |
587 | sharedData->referenceCounter--; | |
588 | } | |
374ca955 | 589 | |
b75a7d8f A |
590 | if((sharedData->referenceCounter <= 0)&&(sharedData->sharedDataCached == FALSE)) { |
591 | ucnv_deleteSharedConverterData(sharedData); | |
592 | } | |
593 | } | |
b75a7d8f A |
594 | } |
595 | ||
4388f060 | 596 | U_CFUNC void |
374ca955 | 597 | ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData) |
b75a7d8f | 598 | { |
b75a7d8f | 599 | /* |
374ca955 A |
600 | Checking whether it's an algorithic converter is okay |
601 | in multithreaded applications because the value never changes. | |
602 | Don't check referenceCounter for any other value. | |
b75a7d8f | 603 | */ |
4388f060 | 604 | if(sharedData != NULL && sharedData->referenceCounter != (uint32_t)~0) { |
374ca955 A |
605 | umtx_lock(&cnvCacheMutex); |
606 | ucnv_unload(sharedData); | |
607 | umtx_unlock(&cnvCacheMutex); | |
608 | } | |
609 | } | |
610 | ||
4388f060 | 611 | U_CFUNC void |
374ca955 A |
612 | ucnv_incrementRefCount(UConverterSharedData *sharedData) |
613 | { | |
729e4ab9 A |
614 | /* |
615 | Checking whether it's an algorithic converter is okay | |
616 | in multithreaded applications because the value never changes. | |
617 | Don't check referenceCounter for any other value. | |
618 | */ | |
4388f060 | 619 | if(sharedData != NULL && sharedData->referenceCounter != (uint32_t)~0) { |
374ca955 | 620 | umtx_lock(&cnvCacheMutex); |
b75a7d8f | 621 | sharedData->referenceCounter++; |
374ca955 | 622 | umtx_unlock(&cnvCacheMutex); |
b75a7d8f | 623 | } |
b75a7d8f A |
624 | } |
625 | ||
729e4ab9 A |
626 | /* |
627 | * *pPieces must be initialized. | |
628 | * The name without options will be copied to pPieces->cnvName. | |
629 | * The locale and options will be copied to pPieces only if present in inName, | |
630 | * otherwise the existing values in pPieces remain. | |
631 | * *pArgs will be set to the pPieces values. | |
632 | */ | |
b75a7d8f A |
633 | static void |
634 | parseConverterOptions(const char *inName, | |
729e4ab9 A |
635 | UConverterNamePieces *pPieces, |
636 | UConverterLoadArgs *pArgs, | |
b75a7d8f A |
637 | UErrorCode *err) |
638 | { | |
729e4ab9 | 639 | char *cnvName = pPieces->cnvName; |
b75a7d8f A |
640 | char c; |
641 | int32_t len = 0; | |
642 | ||
729e4ab9 A |
643 | pArgs->name=inName; |
644 | pArgs->locale=pPieces->locale; | |
645 | pArgs->options=pPieces->options; | |
646 | ||
b75a7d8f A |
647 | /* copy the converter name itself to cnvName */ |
648 | while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) { | |
649 | if (++len>=UCNV_MAX_CONVERTER_NAME_LENGTH) { | |
650 | *err = U_ILLEGAL_ARGUMENT_ERROR; /* bad name */ | |
729e4ab9 | 651 | pPieces->cnvName[0]=0; |
b75a7d8f A |
652 | return; |
653 | } | |
654 | *cnvName++=c; | |
655 | inName++; | |
656 | } | |
657 | *cnvName=0; | |
729e4ab9 | 658 | pArgs->name=pPieces->cnvName; |
b75a7d8f A |
659 | |
660 | /* parse options. No more name copying should occur. */ | |
661 | while((c=*inName)!=0) { | |
662 | if(c==UCNV_OPTION_SEP_CHAR) { | |
663 | ++inName; | |
664 | } | |
665 | ||
666 | /* inName is behind an option separator */ | |
667 | if(uprv_strncmp(inName, "locale=", 7)==0) { | |
668 | /* do not modify locale itself in case we have multiple locale options */ | |
729e4ab9 | 669 | char *dest=pPieces->locale; |
b75a7d8f A |
670 | |
671 | /* copy the locale option value */ | |
672 | inName+=7; | |
673 | len=0; | |
674 | while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) { | |
675 | ++inName; | |
676 | ||
677 | if(++len>=ULOC_FULLNAME_CAPACITY) { | |
678 | *err=U_ILLEGAL_ARGUMENT_ERROR; /* bad name */ | |
729e4ab9 | 679 | pPieces->locale[0]=0; |
b75a7d8f A |
680 | return; |
681 | } | |
682 | ||
683 | *dest++=c; | |
684 | } | |
685 | *dest=0; | |
686 | } else if(uprv_strncmp(inName, "version=", 8)==0) { | |
729e4ab9 | 687 | /* copy the version option value into bits 3..0 of pPieces->options */ |
b75a7d8f A |
688 | inName+=8; |
689 | c=*inName; | |
690 | if(c==0) { | |
729e4ab9 | 691 | pArgs->options=(pPieces->options&=~UCNV_OPTION_VERSION); |
b75a7d8f A |
692 | return; |
693 | } else if((uint8_t)(c-'0')<10) { | |
729e4ab9 | 694 | pArgs->options=pPieces->options=(pPieces->options&~UCNV_OPTION_VERSION)|(uint32_t)(c-'0'); |
b75a7d8f A |
695 | ++inName; |
696 | } | |
697 | } else if(uprv_strncmp(inName, "swaplfnl", 8)==0) { | |
698 | inName+=8; | |
729e4ab9 | 699 | pArgs->options=(pPieces->options|=UCNV_OPTION_SWAP_LFNL); |
b75a7d8f A |
700 | /* add processing for new options here with another } else if(uprv_strncmp(inName, "option-name=", XX)==0) { */ |
701 | } else { | |
702 | /* ignore any other options until we define some */ | |
703 | while(((c = *inName++) != 0) && (c != UCNV_OPTION_SEP_CHAR)) { | |
704 | } | |
705 | if(c==0) { | |
706 | return; | |
707 | } | |
708 | } | |
709 | } | |
710 | } | |
711 | ||
712 | /*Logic determines if the converter is Algorithmic AND/OR cached | |
713 | *depending on that: | |
714 | * -we either go to get data from disk and cache it (Data=TRUE, Cached=False) | |
715 | * -Get it from a Hashtable (Data=X, Cached=TRUE) | |
716 | * -Call dataConverter initializer (Data=TRUE, Cached=TRUE) | |
717 | * -Call AlgorithmicConverter initializer (Data=FALSE, Cached=TRUE) | |
718 | */ | |
4388f060 | 719 | U_CFUNC UConverterSharedData * |
729e4ab9 A |
720 | ucnv_loadSharedData(const char *converterName, |
721 | UConverterNamePieces *pPieces, | |
722 | UConverterLoadArgs *pArgs, | |
723 | UErrorCode * err) { | |
724 | UConverterNamePieces stackPieces; | |
725 | UConverterLoadArgs stackArgs; | |
b75a7d8f A |
726 | UConverterSharedData *mySharedConverterData = NULL; |
727 | UErrorCode internalErrorCode = U_ZERO_ERROR; | |
73c04bcf A |
728 | UBool mayContainOption = TRUE; |
729 | UBool checkForAlgorithmic = TRUE; | |
374ca955 A |
730 | |
731 | if (U_FAILURE (*err)) { | |
b75a7d8f | 732 | return NULL; |
374ca955 A |
733 | } |
734 | ||
729e4ab9 A |
735 | if(pPieces == NULL) { |
736 | if(pArgs != NULL) { | |
737 | /* | |
738 | * Bad: We may set pArgs pointers to stackPieces fields | |
739 | * which will be invalid after this function returns. | |
740 | */ | |
741 | *err = U_INTERNAL_PROGRAM_ERROR; | |
742 | return NULL; | |
743 | } | |
744 | pPieces = &stackPieces; | |
745 | } | |
746 | if(pArgs == NULL) { | |
747 | uprv_memset(&stackArgs, 0, sizeof(stackArgs)); | |
748 | stackArgs.size = (int32_t)sizeof(stackArgs); | |
749 | pArgs = &stackArgs; | |
374ca955 | 750 | } |
b75a7d8f | 751 | |
729e4ab9 A |
752 | pPieces->cnvName[0] = 0; |
753 | pPieces->locale[0] = 0; | |
754 | pPieces->options = 0; | |
755 | ||
756 | pArgs->name = converterName; | |
757 | pArgs->locale = pPieces->locale; | |
758 | pArgs->options = pPieces->options; | |
b75a7d8f A |
759 | |
760 | /* In case "name" is NULL we want to open the default converter. */ | |
761 | if (converterName == NULL) { | |
729e4ab9 A |
762 | #if U_CHARSET_IS_UTF8 |
763 | pArgs->name = "UTF-8"; | |
764 | return (UConverterSharedData *)converterData[UCNV_UTF8]; | |
765 | #else | |
73c04bcf | 766 | /* Call ucnv_getDefaultName first to query the name from the OS. */ |
729e4ab9 A |
767 | pArgs->name = ucnv_getDefaultName(); |
768 | if (pArgs->name == NULL) { | |
b75a7d8f A |
769 | *err = U_MISSING_RESOURCE_ERROR; |
770 | return NULL; | |
771 | } | |
73c04bcf A |
772 | mySharedConverterData = (UConverterSharedData *)gDefaultAlgorithmicSharedData; |
773 | checkForAlgorithmic = FALSE; | |
774 | mayContainOption = gDefaultConverterContainsOption; | |
b75a7d8f | 775 | /* the default converter name is already canonical */ |
729e4ab9 | 776 | #endif |
73c04bcf | 777 | } |
729e4ab9 | 778 | else if(UCNV_FAST_IS_UTF8(converterName)) { |
73c04bcf | 779 | /* fastpath for UTF-8 */ |
729e4ab9 | 780 | pArgs->name = "UTF-8"; |
73c04bcf A |
781 | return (UConverterSharedData *)converterData[UCNV_UTF8]; |
782 | } | |
783 | else { | |
b75a7d8f | 784 | /* separate the converter name from the options */ |
729e4ab9 | 785 | parseConverterOptions(converterName, pPieces, pArgs, err); |
b75a7d8f A |
786 | if (U_FAILURE(*err)) { |
787 | /* Very bad name used. */ | |
788 | return NULL; | |
789 | } | |
790 | ||
791 | /* get the canonical converter name */ | |
729e4ab9 A |
792 | pArgs->name = ucnv_io_getConverterName(pArgs->name, &mayContainOption, &internalErrorCode); |
793 | if (U_FAILURE(internalErrorCode) || pArgs->name == NULL) { | |
b75a7d8f A |
794 | /* |
795 | * set the input name in case the converter was added | |
796 | * without updating the alias table, or when there is no alias table | |
797 | */ | |
729e4ab9 | 798 | pArgs->name = pPieces->cnvName; |
b75a7d8f A |
799 | } |
800 | } | |
801 | ||
802 | /* separate the converter name from the options */ | |
729e4ab9 A |
803 | if(mayContainOption && pArgs->name != pPieces->cnvName) { |
804 | parseConverterOptions(pArgs->name, pPieces, pArgs, err); | |
b75a7d8f | 805 | } |
374ca955 | 806 | |
b75a7d8f | 807 | /* get the shared data for an algorithmic converter, if it is one */ |
73c04bcf | 808 | if (checkForAlgorithmic) { |
729e4ab9 | 809 | mySharedConverterData = (UConverterSharedData *)getAlgorithmicTypeFromName(pArgs->name); |
73c04bcf | 810 | } |
b75a7d8f A |
811 | if (mySharedConverterData == NULL) |
812 | { | |
813 | /* it is a data-based converter, get its shared data. */ | |
814 | /* Hold the cnvCacheMutex through the whole process of checking the */ | |
815 | /* converter data cache, and adding new entries to the cache */ | |
816 | /* to prevent other threads from modifying the cache during the */ | |
817 | /* process. */ | |
729e4ab9 A |
818 | pArgs->nestedLoads=1; |
819 | pArgs->pkg=NULL; | |
374ca955 | 820 | |
b75a7d8f | 821 | umtx_lock(&cnvCacheMutex); |
729e4ab9 | 822 | mySharedConverterData = ucnv_load(pArgs, err); |
374ca955 A |
823 | umtx_unlock(&cnvCacheMutex); |
824 | if (U_FAILURE (*err) || (mySharedConverterData == NULL)) | |
b75a7d8f | 825 | { |
374ca955 | 826 | return NULL; |
b75a7d8f | 827 | } |
b75a7d8f A |
828 | } |
829 | ||
374ca955 A |
830 | return mySharedConverterData; |
831 | } | |
b75a7d8f | 832 | |
4388f060 | 833 | U_CAPI UConverter * |
374ca955 A |
834 | ucnv_createConverter(UConverter *myUConverter, const char *converterName, UErrorCode * err) |
835 | { | |
729e4ab9 | 836 | UConverterNamePieces stackPieces; |
4388f060 | 837 | UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; |
374ca955 A |
838 | UConverterSharedData *mySharedConverterData; |
839 | ||
840 | UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); | |
841 | ||
842 | if(U_SUCCESS(*err)) { | |
843 | UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open converter %s", converterName); | |
844 | ||
729e4ab9 A |
845 | mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err); |
846 | ||
847 | myUConverter = ucnv_createConverterFromSharedData( | |
848 | myUConverter, mySharedConverterData, | |
849 | &stackArgs, | |
850 | err); | |
374ca955 A |
851 | |
852 | if(U_SUCCESS(*err)) { | |
729e4ab9 A |
853 | UTRACE_EXIT_PTR_STATUS(myUConverter, *err); |
854 | return myUConverter; | |
b75a7d8f | 855 | } |
b75a7d8f A |
856 | } |
857 | ||
374ca955 A |
858 | /* exit with error */ |
859 | UTRACE_EXIT_STATUS(*err); | |
860 | return NULL; | |
b75a7d8f A |
861 | } |
862 | ||
729e4ab9 A |
863 | U_CFUNC UBool |
864 | ucnv_canCreateConverter(const char *converterName, UErrorCode *err) { | |
865 | UConverter myUConverter; | |
866 | UConverterNamePieces stackPieces; | |
4388f060 | 867 | UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; |
729e4ab9 A |
868 | UConverterSharedData *mySharedConverterData; |
869 | ||
870 | UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); | |
871 | ||
872 | if(U_SUCCESS(*err)) { | |
873 | UTRACE_DATA1(UTRACE_OPEN_CLOSE, "test if can open converter %s", converterName); | |
874 | ||
875 | stackArgs.onlyTestIsLoadable=TRUE; | |
876 | mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err); | |
877 | ucnv_createConverterFromSharedData( | |
878 | &myUConverter, mySharedConverterData, | |
879 | &stackArgs, | |
880 | err); | |
881 | ucnv_unloadSharedDataIfReady(mySharedConverterData); | |
882 | } | |
883 | ||
884 | UTRACE_EXIT_STATUS(*err); | |
885 | return U_SUCCESS(*err); | |
886 | } | |
887 | ||
b75a7d8f A |
888 | UConverter * |
889 | ucnv_createAlgorithmicConverter(UConverter *myUConverter, | |
890 | UConverterType type, | |
891 | const char *locale, uint32_t options, | |
892 | UErrorCode *err) { | |
374ca955 | 893 | UConverter *cnv; |
b75a7d8f | 894 | const UConverterSharedData *sharedData; |
4388f060 | 895 | UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; |
b75a7d8f | 896 | |
374ca955 A |
897 | UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_ALGORITHMIC); |
898 | UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open algorithmic converter type %d", (int32_t)type); | |
899 | ||
b75a7d8f A |
900 | if(type<0 || UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES<=type) { |
901 | *err = U_ILLEGAL_ARGUMENT_ERROR; | |
374ca955 | 902 | UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR); |
b75a7d8f A |
903 | return NULL; |
904 | } | |
905 | ||
906 | sharedData = converterData[type]; | |
729e4ab9 A |
907 | /* |
908 | Checking whether it's an algorithic converter is okay | |
909 | in multithreaded applications because the value never changes. | |
910 | Don't check referenceCounter for any other value. | |
911 | */ | |
4388f060 | 912 | if(sharedData == NULL || sharedData->referenceCounter != (uint32_t)~0) { |
b75a7d8f A |
913 | /* not a valid type, or not an algorithmic converter */ |
914 | *err = U_ILLEGAL_ARGUMENT_ERROR; | |
374ca955 | 915 | UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR); |
b75a7d8f A |
916 | return NULL; |
917 | } | |
918 | ||
729e4ab9 A |
919 | stackArgs.name = ""; |
920 | stackArgs.options = options; | |
921 | stackArgs.locale=locale; | |
922 | cnv = ucnv_createConverterFromSharedData( | |
923 | myUConverter, (UConverterSharedData *)sharedData, | |
924 | &stackArgs, err); | |
374ca955 A |
925 | |
926 | UTRACE_EXIT_PTR_STATUS(cnv, *err); | |
927 | return cnv; | |
b75a7d8f A |
928 | } |
929 | ||
4388f060 | 930 | U_CFUNC UConverter* |
b75a7d8f A |
931 | ucnv_createConverterFromPackage(const char *packageName, const char *converterName, UErrorCode * err) |
932 | { | |
b75a7d8f | 933 | UConverter *myUConverter; |
374ca955 | 934 | UConverterSharedData *mySharedConverterData; |
729e4ab9 | 935 | UConverterNamePieces stackPieces; |
4388f060 | 936 | UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; |
374ca955 A |
937 | |
938 | UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_PACKAGE); | |
b75a7d8f A |
939 | |
940 | if(U_FAILURE(*err)) { | |
374ca955 A |
941 | UTRACE_EXIT_STATUS(*err); |
942 | return NULL; | |
b75a7d8f A |
943 | } |
944 | ||
374ca955 A |
945 | UTRACE_DATA2(UTRACE_OPEN_CLOSE, "open converter %s from package %s", converterName, packageName); |
946 | ||
374ca955 | 947 | /* first, get the options out of the converterName string */ |
729e4ab9 A |
948 | stackPieces.cnvName[0] = 0; |
949 | stackPieces.locale[0] = 0; | |
950 | stackPieces.options = 0; | |
951 | parseConverterOptions(converterName, &stackPieces, &stackArgs, err); | |
b75a7d8f A |
952 | if (U_FAILURE(*err)) { |
953 | /* Very bad name used. */ | |
374ca955 | 954 | UTRACE_EXIT_STATUS(*err); |
b75a7d8f A |
955 | return NULL; |
956 | } | |
729e4ab9 A |
957 | stackArgs.nestedLoads=1; |
958 | stackArgs.pkg=packageName; | |
374ca955 | 959 | |
b75a7d8f | 960 | /* open the data, unflatten the shared structure */ |
729e4ab9 | 961 | mySharedConverterData = createConverterFromFile(&stackArgs, err); |
374ca955 | 962 | |
b75a7d8f | 963 | if (U_FAILURE(*err)) { |
374ca955 A |
964 | UTRACE_EXIT_STATUS(*err); |
965 | return NULL; | |
b75a7d8f A |
966 | } |
967 | ||
968 | /* create the actual converter */ | |
729e4ab9 | 969 | myUConverter = ucnv_createConverterFromSharedData(NULL, mySharedConverterData, &stackArgs, err); |
374ca955 | 970 | |
b75a7d8f A |
971 | if (U_FAILURE(*err)) { |
972 | ucnv_close(myUConverter); | |
374ca955 A |
973 | UTRACE_EXIT_STATUS(*err); |
974 | return NULL; | |
b75a7d8f | 975 | } |
374ca955 A |
976 | |
977 | UTRACE_EXIT_PTR_STATUS(myUConverter, *err); | |
b75a7d8f A |
978 | return myUConverter; |
979 | } | |
980 | ||
981 | ||
4388f060 | 982 | U_CFUNC UConverter* |
b75a7d8f A |
983 | ucnv_createConverterFromSharedData(UConverter *myUConverter, |
984 | UConverterSharedData *mySharedConverterData, | |
729e4ab9 | 985 | UConverterLoadArgs *pArgs, |
b75a7d8f A |
986 | UErrorCode *err) |
987 | { | |
988 | UBool isCopyLocal; | |
989 | ||
729e4ab9 A |
990 | if(U_FAILURE(*err)) { |
991 | ucnv_unloadSharedDataIfReady(mySharedConverterData); | |
992 | return myUConverter; | |
993 | } | |
b75a7d8f A |
994 | if(myUConverter == NULL) |
995 | { | |
996 | myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter)); | |
997 | if(myUConverter == NULL) | |
998 | { | |
999 | *err = U_MEMORY_ALLOCATION_ERROR; | |
729e4ab9 | 1000 | ucnv_unloadSharedDataIfReady(mySharedConverterData); |
b75a7d8f A |
1001 | return NULL; |
1002 | } | |
1003 | isCopyLocal = FALSE; | |
1004 | } else { | |
1005 | isCopyLocal = TRUE; | |
1006 | } | |
1007 | ||
1008 | /* initialize the converter */ | |
1009 | uprv_memset(myUConverter, 0, sizeof(UConverter)); | |
1010 | myUConverter->isCopyLocal = isCopyLocal; | |
73c04bcf | 1011 | /*myUConverter->isExtraLocal = FALSE;*/ /* Set by the memset call */ |
b75a7d8f | 1012 | myUConverter->sharedData = mySharedConverterData; |
729e4ab9 A |
1013 | myUConverter->options = pArgs->options; |
1014 | if(!pArgs->onlyTestIsLoadable) { | |
1015 | myUConverter->preFromUFirstCP = U_SENTINEL; | |
1016 | myUConverter->fromCharErrorBehaviour = UCNV_TO_U_DEFAULT_CALLBACK; | |
1017 | myUConverter->fromUCharErrorBehaviour = UCNV_FROM_U_DEFAULT_CALLBACK; | |
1018 | myUConverter->toUnicodeStatus = mySharedConverterData->toUnicodeStatus; | |
1019 | myUConverter->maxBytesPerUChar = mySharedConverterData->staticData->maxBytesPerChar; | |
1020 | myUConverter->subChar1 = mySharedConverterData->staticData->subChar1; | |
1021 | myUConverter->subCharLen = mySharedConverterData->staticData->subCharLen; | |
1022 | myUConverter->subChars = (uint8_t *)myUConverter->subUChars; | |
1023 | uprv_memcpy(myUConverter->subChars, mySharedConverterData->staticData->subChar, myUConverter->subCharLen); | |
1024 | myUConverter->toUCallbackReason = UCNV_ILLEGAL; /* default reason to invoke (*fromCharErrorBehaviour) */ | |
1025 | } | |
73c04bcf A |
1026 | |
1027 | if(mySharedConverterData->impl->open != NULL) { | |
729e4ab9 A |
1028 | mySharedConverterData->impl->open(myUConverter, pArgs, err); |
1029 | if(U_FAILURE(*err) && !pArgs->onlyTestIsLoadable) { | |
1030 | /* don't ucnv_close() if onlyTestIsLoadable because not fully initialized */ | |
b75a7d8f A |
1031 | ucnv_close(myUConverter); |
1032 | return NULL; | |
1033 | } | |
1034 | } | |
1035 | ||
1036 | return myUConverter; | |
1037 | } | |
1038 | ||
1039 | /*Frees all shared immutable objects that aren't referred to (reference count = 0) | |
1040 | */ | |
1041 | U_CAPI int32_t U_EXPORT2 | |
1042 | ucnv_flushCache () | |
1043 | { | |
1044 | UConverterSharedData *mySharedData = NULL; | |
374ca955 | 1045 | int32_t pos; |
b75a7d8f A |
1046 | int32_t tableDeletedNum = 0; |
1047 | const UHashElement *e; | |
729e4ab9 | 1048 | /*UErrorCode status = U_ILLEGAL_ARGUMENT_ERROR;*/ |
374ca955 A |
1049 | int32_t i, remaining; |
1050 | ||
1051 | UTRACE_ENTRY_OC(UTRACE_UCNV_FLUSH_CACHE); | |
b75a7d8f A |
1052 | |
1053 | /* Close the default converter without creating a new one so that everything will be flushed. */ | |
729e4ab9 | 1054 | u_flushDefaultConverter(); |
b75a7d8f A |
1055 | |
1056 | /*if shared data hasn't even been lazy evaluated yet | |
1057 | * return 0 | |
1058 | */ | |
374ca955 A |
1059 | if (SHARED_DATA_HASHTABLE == NULL) { |
1060 | UTRACE_EXIT_VALUE((int32_t)0); | |
b75a7d8f | 1061 | return 0; |
374ca955 | 1062 | } |
b75a7d8f A |
1063 | |
1064 | /*creates an enumeration to iterate through every element in the | |
1065 | * table | |
1066 | * | |
1067 | * Synchronization: holding cnvCacheMutex will prevent any other thread from | |
1068 | * accessing or modifying the hash table during the iteration. | |
1069 | * The reference count of an entry may be decremented by | |
1070 | * ucnv_close while the iteration is in process, but this is | |
1071 | * benign. It can't be incremented (in ucnv_createConverter()) | |
1072 | * because the sequence of looking up in the cache + incrementing | |
1073 | * is protected by cnvCacheMutex. | |
1074 | */ | |
1075 | umtx_lock(&cnvCacheMutex); | |
374ca955 A |
1076 | /* |
1077 | * double loop: A delta/extension-only converter has a pointer to its base table's | |
1078 | * shared data; the first iteration of the outer loop may see the delta converter | |
1079 | * before the base converter, and unloading the delta converter may get the base | |
1080 | * converter's reference counter down to 0. | |
1081 | */ | |
1082 | i = 0; | |
1083 | do { | |
1084 | remaining = 0; | |
1085 | pos = -1; | |
1086 | while ((e = uhash_nextElement (SHARED_DATA_HASHTABLE, &pos)) != NULL) | |
b75a7d8f | 1087 | { |
374ca955 A |
1088 | mySharedData = (UConverterSharedData *) e->value.pointer; |
1089 | /*deletes only if reference counter == 0 */ | |
1090 | if (mySharedData->referenceCounter == 0) | |
1091 | { | |
1092 | tableDeletedNum++; | |
1093 | ||
1094 | UCNV_DEBUG_LOG("del",mySharedData->staticData->name,mySharedData); | |
1095 | ||
1096 | uhash_removeElement(SHARED_DATA_HASHTABLE, e); | |
1097 | mySharedData->sharedDataCached = FALSE; | |
1098 | ucnv_deleteSharedConverterData (mySharedData); | |
1099 | } else { | |
1100 | ++remaining; | |
1101 | } | |
b75a7d8f | 1102 | } |
374ca955 | 1103 | } while(++i == 1 && remaining > 0); |
b75a7d8f A |
1104 | umtx_unlock(&cnvCacheMutex); |
1105 | ||
374ca955 A |
1106 | UTRACE_DATA1(UTRACE_INFO, "ucnv_flushCache() exits with %d converters remaining", remaining); |
1107 | ||
374ca955 | 1108 | UTRACE_EXIT_VALUE(tableDeletedNum); |
b75a7d8f A |
1109 | return tableDeletedNum; |
1110 | } | |
1111 | ||
73c04bcf A |
1112 | /* available converters list --------------------------------------------------- */ |
1113 | ||
1114 | static UBool haveAvailableConverterList(UErrorCode *pErrorCode) { | |
1115 | int needInit; | |
1116 | UMTX_CHECK(&cnvCacheMutex, (gAvailableConverters == NULL), needInit); | |
1117 | if (needInit) { | |
1118 | UConverter tempConverter; | |
1119 | UEnumeration *allConvEnum = NULL; | |
1120 | uint16_t idx; | |
1121 | uint16_t localConverterCount; | |
1122 | uint16_t allConverterCount; | |
1123 | UErrorCode localStatus; | |
1124 | const char *converterName; | |
1125 | const char **localConverterList; | |
1126 | ||
1127 | allConvEnum = ucnv_openAllNames(pErrorCode); | |
1128 | allConverterCount = uenum_count(allConvEnum, pErrorCode); | |
1129 | if (U_FAILURE(*pErrorCode)) { | |
1130 | return FALSE; | |
1131 | } | |
1132 | ||
1133 | /* We can't have more than "*converterTable" converters to open */ | |
1134 | localConverterList = (const char **) uprv_malloc(allConverterCount * sizeof(char*)); | |
1135 | if (!localConverterList) { | |
1136 | *pErrorCode = U_MEMORY_ALLOCATION_ERROR; | |
1137 | return FALSE; | |
1138 | } | |
1139 | ||
46f4442e A |
1140 | /* Open the default converter to make sure that it has first dibs in the hash table. */ |
1141 | localStatus = U_ZERO_ERROR; | |
1142 | ucnv_close(ucnv_createConverter(&tempConverter, NULL, &localStatus)); | |
1143 | ||
73c04bcf A |
1144 | localConverterCount = 0; |
1145 | ||
1146 | for (idx = 0; idx < allConverterCount; idx++) { | |
1147 | localStatus = U_ZERO_ERROR; | |
1148 | converterName = uenum_next(allConvEnum, NULL, &localStatus); | |
729e4ab9 | 1149 | if (ucnv_canCreateConverter(converterName, &localStatus)) { |
73c04bcf A |
1150 | localConverterList[localConverterCount++] = converterName; |
1151 | } | |
1152 | } | |
1153 | uenum_close(allConvEnum); | |
1154 | ||
1155 | umtx_lock(&cnvCacheMutex); | |
1156 | if (gAvailableConverters == NULL) { | |
73c04bcf | 1157 | gAvailableConverterCount = localConverterCount; |
729e4ab9 | 1158 | gAvailableConverters = localConverterList; |
73c04bcf A |
1159 | ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); |
1160 | } | |
1161 | else { | |
1162 | uprv_free((char **)localConverterList); | |
1163 | } | |
1164 | umtx_unlock(&cnvCacheMutex); | |
1165 | } | |
1166 | return TRUE; | |
1167 | } | |
1168 | ||
1169 | U_CFUNC uint16_t | |
1170 | ucnv_bld_countAvailableConverters(UErrorCode *pErrorCode) { | |
1171 | if (haveAvailableConverterList(pErrorCode)) { | |
1172 | return gAvailableConverterCount; | |
1173 | } | |
1174 | return 0; | |
1175 | } | |
1176 | ||
1177 | U_CFUNC const char * | |
1178 | ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) { | |
1179 | if (haveAvailableConverterList(pErrorCode)) { | |
1180 | if (n < gAvailableConverterCount) { | |
1181 | return gAvailableConverters[n]; | |
1182 | } | |
1183 | *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; | |
1184 | } | |
1185 | return NULL; | |
1186 | } | |
1187 | ||
1188 | /* default converter name --------------------------------------------------- */ | |
1189 | ||
729e4ab9 | 1190 | #if !U_CHARSET_IS_UTF8 |
46f4442e A |
1191 | /* |
1192 | Copy the canonical converter name. | |
1193 | ucnv_getDefaultName must be thread safe, which can call this function. | |
1194 | ||
1195 | ucnv_setDefaultName calls this function and it doesn't have to be | |
1196 | thread safe because there is no reliable/safe way to reset the | |
1197 | converter in use in all threads. If you did reset the converter, you | |
1198 | would not be sure that retrieving a default converter for one string | |
1199 | would be the same type of default converter for a successive string. | |
1200 | Since the name is a returned via ucnv_getDefaultName without copying, | |
1201 | you shouldn't be modifying or deleting the string from a separate thread. | |
1202 | */ | |
4388f060 | 1203 | static inline void |
73c04bcf | 1204 | internalSetName(const char *name, UErrorCode *status) { |
729e4ab9 | 1205 | UConverterNamePieces stackPieces; |
4388f060 | 1206 | UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; |
73c04bcf A |
1207 | int32_t length=(int32_t)(uprv_strlen(name)); |
1208 | UBool containsOption = (UBool)(uprv_strchr(name, UCNV_OPTION_SEP_CHAR) != NULL); | |
1209 | const UConverterSharedData *algorithmicSharedData; | |
1210 | ||
729e4ab9 | 1211 | stackArgs.name = name; |
73c04bcf | 1212 | if(containsOption) { |
729e4ab9 A |
1213 | stackPieces.cnvName[0] = 0; |
1214 | stackPieces.locale[0] = 0; | |
1215 | stackPieces.options = 0; | |
1216 | parseConverterOptions(name, &stackPieces, &stackArgs, status); | |
1217 | if(U_FAILURE(*status)) { | |
1218 | return; | |
1219 | } | |
73c04bcf | 1220 | } |
729e4ab9 | 1221 | algorithmicSharedData = getAlgorithmicTypeFromName(stackArgs.name); |
73c04bcf A |
1222 | |
1223 | umtx_lock(&cnvCacheMutex); | |
1224 | ||
46f4442e A |
1225 | gDefaultAlgorithmicSharedData = algorithmicSharedData; |
1226 | gDefaultConverterContainsOption = containsOption; | |
73c04bcf A |
1227 | uprv_memcpy(gDefaultConverterNameBuffer, name, length); |
1228 | gDefaultConverterNameBuffer[length]=0; | |
729e4ab9 A |
1229 | |
1230 | /* gDefaultConverterName MUST be the last global var set by this function. */ | |
1231 | /* It is the variable checked in ucnv_getDefaultName() to see if initialization is required. */ | |
73c04bcf | 1232 | gDefaultConverterName = gDefaultConverterNameBuffer; |
73c04bcf A |
1233 | |
1234 | ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); | |
1235 | ||
1236 | umtx_unlock(&cnvCacheMutex); | |
1237 | } | |
729e4ab9 | 1238 | #endif |
73c04bcf A |
1239 | |
1240 | /* | |
1241 | * In order to be really thread-safe, the get function would have to take | |
1242 | * a buffer parameter and copy the current string inside a mutex block. | |
1243 | * This implementation only tries to be really thread-safe while | |
1244 | * setting the name. | |
1245 | * It assumes that setting a pointer is atomic. | |
1246 | */ | |
1247 | ||
1248 | U_CAPI const char* U_EXPORT2 | |
1249 | ucnv_getDefaultName() { | |
729e4ab9 A |
1250 | #if U_CHARSET_IS_UTF8 |
1251 | return "UTF-8"; | |
1252 | #else | |
73c04bcf A |
1253 | /* local variable to be thread-safe */ |
1254 | const char *name; | |
1255 | ||
46f4442e A |
1256 | /* |
1257 | Multiple calls to ucnv_getDefaultName must be thread safe, | |
1258 | but ucnv_setDefaultName is not thread safe. | |
1259 | */ | |
73c04bcf A |
1260 | UMTX_CHECK(&cnvCacheMutex, gDefaultConverterName, name); |
1261 | if(name==NULL) { | |
1262 | UErrorCode errorCode = U_ZERO_ERROR; | |
1263 | UConverter *cnv = NULL; | |
1264 | ||
1265 | name = uprv_getDefaultCodepage(); | |
1266 | ||
1267 | /* if the name is there, test it out and get the canonical name with options */ | |
1268 | if(name != NULL) { | |
1269 | cnv = ucnv_open(name, &errorCode); | |
1270 | if(U_SUCCESS(errorCode) && cnv != NULL) { | |
1271 | name = ucnv_getName(cnv, &errorCode); | |
1272 | } | |
1273 | } | |
1274 | ||
1275 | if(name == NULL || name[0] == 0 | |
1276 | || U_FAILURE(errorCode) || cnv == NULL | |
1277 | || uprv_strlen(name)>=sizeof(gDefaultConverterNameBuffer)) | |
1278 | { | |
1279 | /* Panic time, let's use a fallback. */ | |
1280 | #if (U_CHARSET_FAMILY == U_ASCII_FAMILY) | |
1281 | name = "US-ASCII"; | |
1282 | /* there is no 'algorithmic' converter for EBCDIC */ | |
4388f060 | 1283 | #elif U_PLATFORM == U_PF_OS390 |
73c04bcf A |
1284 | name = "ibm-1047_P100-1995" UCNV_SWAP_LFNL_OPTION_STRING; |
1285 | #else | |
1286 | name = "ibm-37_P100-1995"; | |
1287 | #endif | |
1288 | } | |
1289 | ||
1290 | internalSetName(name, &errorCode); | |
1291 | ||
1292 | /* The close may make the current name go away. */ | |
1293 | ucnv_close(cnv); | |
1294 | } | |
1295 | ||
1296 | return name; | |
729e4ab9 | 1297 | #endif |
73c04bcf A |
1298 | } |
1299 | ||
46f4442e A |
1300 | /* |
1301 | This function is not thread safe, and it can't be thread safe. | |
1302 | See internalSetName or the API reference for details. | |
1303 | */ | |
73c04bcf A |
1304 | U_CAPI void U_EXPORT2 |
1305 | ucnv_setDefaultName(const char *converterName) { | |
729e4ab9 | 1306 | #if !U_CHARSET_IS_UTF8 |
73c04bcf A |
1307 | if(converterName==NULL) { |
1308 | /* reset to the default codepage */ | |
73c04bcf | 1309 | gDefaultConverterName=NULL; |
73c04bcf A |
1310 | } else { |
1311 | UErrorCode errorCode = U_ZERO_ERROR; | |
1312 | UConverter *cnv = NULL; | |
1313 | const char *name = NULL; | |
1314 | ||
1315 | /* if the name is there, test it out and get the canonical name with options */ | |
1316 | cnv = ucnv_open(converterName, &errorCode); | |
1317 | if(U_SUCCESS(errorCode) && cnv != NULL) { | |
1318 | name = ucnv_getName(cnv, &errorCode); | |
1319 | } | |
1320 | ||
1321 | if(U_SUCCESS(errorCode) && name!=NULL) { | |
1322 | internalSetName(name, &errorCode); | |
1323 | } | |
1324 | /* else this converter is bad to use. Don't change it to a bad value. */ | |
1325 | ||
1326 | /* The close may make the current name go away. */ | |
1327 | ucnv_close(cnv); | |
729e4ab9 A |
1328 | |
1329 | /* reset the converter cache */ | |
1330 | u_flushDefaultConverter(); | |
73c04bcf | 1331 | } |
729e4ab9 | 1332 | #endif |
73c04bcf A |
1333 | } |
1334 | ||
374ca955 A |
1335 | /* data swapping ------------------------------------------------------------ */ |
1336 | ||
1337 | /* most of this might belong more properly into ucnvmbcs.c, but that is so large */ | |
1338 | ||
1339 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
1340 | ||
1341 | U_CAPI int32_t U_EXPORT2 | |
1342 | ucnv_swap(const UDataSwapper *ds, | |
1343 | const void *inData, int32_t length, void *outData, | |
1344 | UErrorCode *pErrorCode) { | |
1345 | const UDataInfo *pInfo; | |
1346 | int32_t headerSize; | |
1347 | ||
1348 | const uint8_t *inBytes; | |
1349 | uint8_t *outBytes; | |
1350 | ||
1351 | uint32_t offset, count, staticDataSize; | |
1352 | int32_t size; | |
1353 | ||
1354 | const UConverterStaticData *inStaticData; | |
1355 | UConverterStaticData *outStaticData; | |
1356 | ||
1357 | const _MBCSHeader *inMBCSHeader; | |
1358 | _MBCSHeader *outMBCSHeader; | |
1359 | _MBCSHeader mbcsHeader; | |
46f4442e A |
1360 | uint32_t mbcsHeaderLength; |
1361 | UBool noFromU=FALSE; | |
1362 | ||
374ca955 A |
1363 | uint8_t outputType; |
1364 | ||
46f4442e A |
1365 | int32_t maxFastUChar, mbcsIndexLength; |
1366 | ||
374ca955 A |
1367 | const int32_t *inExtIndexes; |
1368 | int32_t extOffset; | |
1369 | ||
1370 | /* udata_swapDataHeader checks the arguments */ | |
1371 | headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); | |
1372 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
1373 | return 0; | |
1374 | } | |
1375 | ||
1376 | /* check data format and format version */ | |
1377 | pInfo=(const UDataInfo *)((const char *)inData+4); | |
1378 | if(!( | |
1379 | pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ | |
1380 | pInfo->dataFormat[1]==0x6e && | |
1381 | pInfo->dataFormat[2]==0x76 && | |
1382 | pInfo->dataFormat[3]==0x74 && | |
1383 | pInfo->formatVersion[0]==6 && | |
1384 | pInfo->formatVersion[1]>=2 | |
1385 | )) { | |
1386 | udata_printError(ds, "ucnv_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not recognized as an ICU .cnv conversion table\n", | |
1387 | pInfo->dataFormat[0], pInfo->dataFormat[1], | |
1388 | pInfo->dataFormat[2], pInfo->dataFormat[3], | |
1389 | pInfo->formatVersion[0], pInfo->formatVersion[1]); | |
1390 | *pErrorCode=U_UNSUPPORTED_ERROR; | |
1391 | return 0; | |
1392 | } | |
1393 | ||
1394 | inBytes=(const uint8_t *)inData+headerSize; | |
1395 | outBytes=(uint8_t *)outData+headerSize; | |
1396 | ||
1397 | /* read the initial UConverterStaticData structure after the UDataInfo header */ | |
1398 | inStaticData=(const UConverterStaticData *)inBytes; | |
1399 | outStaticData=(UConverterStaticData *)outBytes; | |
1400 | ||
1401 | if(length<0) { | |
1402 | staticDataSize=ds->readUInt32(inStaticData->structSize); | |
1403 | } else { | |
1404 | length-=headerSize; | |
4388f060 | 1405 | if( length<(int32_t)sizeof(UConverterStaticData) || |
374ca955 A |
1406 | (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize)) |
1407 | ) { | |
1408 | udata_printError(ds, "ucnv_swap(): too few bytes (%d after header) for an ICU .cnv conversion table\n", | |
1409 | length); | |
1410 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | |
1411 | return 0; | |
1412 | } | |
1413 | } | |
1414 | ||
1415 | if(length>=0) { | |
1416 | /* swap the static data */ | |
1417 | if(inStaticData!=outStaticData) { | |
1418 | uprv_memcpy(outStaticData, inStaticData, staticDataSize); | |
1419 | } | |
1420 | ||
1421 | ds->swapArray32(ds, &inStaticData->structSize, 4, | |
1422 | &outStaticData->structSize, pErrorCode); | |
1423 | ds->swapArray32(ds, &inStaticData->codepage, 4, | |
1424 | &outStaticData->codepage, pErrorCode); | |
1425 | ||
73c04bcf | 1426 | ds->swapInvChars(ds, inStaticData->name, (int32_t)uprv_strlen(inStaticData->name), |
374ca955 A |
1427 | outStaticData->name, pErrorCode); |
1428 | if(U_FAILURE(*pErrorCode)) { | |
73c04bcf | 1429 | udata_printError(ds, "ucnv_swap(): error swapping converter name\n"); |
374ca955 A |
1430 | return 0; |
1431 | } | |
1432 | } | |
1433 | ||
1434 | inBytes+=staticDataSize; | |
1435 | outBytes+=staticDataSize; | |
1436 | if(length>=0) { | |
1437 | length-=(int32_t)staticDataSize; | |
1438 | } | |
1439 | ||
1440 | /* check for supported conversionType values */ | |
1441 | if(inStaticData->conversionType==UCNV_MBCS) { | |
1442 | /* swap MBCS data */ | |
1443 | inMBCSHeader=(const _MBCSHeader *)inBytes; | |
1444 | outMBCSHeader=(_MBCSHeader *)outBytes; | |
1445 | ||
4388f060 | 1446 | if(0<=length && length<(int32_t)sizeof(_MBCSHeader)) { |
73c04bcf A |
1447 | udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", |
1448 | length); | |
1449 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | |
1450 | return 0; | |
1451 | } | |
46f4442e A |
1452 | if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) { |
1453 | mbcsHeaderLength=MBCS_HEADER_V4_LENGTH; | |
1454 | } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 && | |
1455 | ((mbcsHeader.options=ds->readUInt32(inMBCSHeader->options))& | |
1456 | MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0 | |
1457 | ) { | |
1458 | mbcsHeaderLength=mbcsHeader.options&MBCS_OPT_LENGTH_MASK; | |
1459 | noFromU=(UBool)((mbcsHeader.options&MBCS_OPT_NO_FROM_U)!=0); | |
1460 | } else { | |
374ca955 A |
1461 | udata_printError(ds, "ucnv_swap(): unsupported _MBCSHeader.version %d.%d\n", |
1462 | inMBCSHeader->version[0], inMBCSHeader->version[1]); | |
1463 | *pErrorCode=U_UNSUPPORTED_ERROR; | |
1464 | return 0; | |
1465 | } | |
1466 | ||
1467 | uprv_memcpy(mbcsHeader.version, inMBCSHeader->version, 4); | |
1468 | mbcsHeader.countStates= ds->readUInt32(inMBCSHeader->countStates); | |
1469 | mbcsHeader.countToUFallbacks= ds->readUInt32(inMBCSHeader->countToUFallbacks); | |
1470 | mbcsHeader.offsetToUCodeUnits= ds->readUInt32(inMBCSHeader->offsetToUCodeUnits); | |
1471 | mbcsHeader.offsetFromUTable= ds->readUInt32(inMBCSHeader->offsetFromUTable); | |
1472 | mbcsHeader.offsetFromUBytes= ds->readUInt32(inMBCSHeader->offsetFromUBytes); | |
1473 | mbcsHeader.flags= ds->readUInt32(inMBCSHeader->flags); | |
1474 | mbcsHeader.fromUBytesLength= ds->readUInt32(inMBCSHeader->fromUBytesLength); | |
46f4442e | 1475 | /* mbcsHeader.options have been read above */ |
374ca955 | 1476 | |
73c04bcf | 1477 | extOffset=(int32_t)(mbcsHeader.flags>>8); |
374ca955 | 1478 | outputType=(uint8_t)mbcsHeader.flags; |
46f4442e A |
1479 | if(noFromU && outputType==MBCS_OUTPUT_1) { |
1480 | udata_printError(ds, "ucnv_swap(): unsupported combination of makeconv --small with SBCS\n"); | |
1481 | *pErrorCode=U_UNSUPPORTED_ERROR; | |
1482 | return 0; | |
1483 | } | |
374ca955 A |
1484 | |
1485 | /* make sure that the output type is known */ | |
1486 | switch(outputType) { | |
1487 | case MBCS_OUTPUT_1: | |
1488 | case MBCS_OUTPUT_2: | |
1489 | case MBCS_OUTPUT_3: | |
1490 | case MBCS_OUTPUT_4: | |
1491 | case MBCS_OUTPUT_3_EUC: | |
1492 | case MBCS_OUTPUT_4_EUC: | |
1493 | case MBCS_OUTPUT_2_SISO: | |
1494 | case MBCS_OUTPUT_EXT_ONLY: | |
1495 | /* OK */ | |
1496 | break; | |
1497 | default: | |
1498 | udata_printError(ds, "ucnv_swap(): unsupported MBCS output type 0x%x\n", | |
1499 | outputType); | |
1500 | *pErrorCode=U_UNSUPPORTED_ERROR; | |
1501 | return 0; | |
1502 | } | |
1503 | ||
1504 | /* calculate the length of the MBCS data */ | |
46f4442e A |
1505 | |
1506 | /* | |
1507 | * utf8Friendly MBCS files (mbcsHeader.version 4.3) | |
1508 | * contain an additional mbcsIndex table: | |
1509 | * uint16_t[(maxFastUChar+1)>>6]; | |
1510 | * where maxFastUChar=((mbcsHeader.version[2]<<8)|0xff). | |
1511 | */ | |
1512 | maxFastUChar=0; | |
1513 | mbcsIndexLength=0; | |
1514 | if( outputType!=MBCS_OUTPUT_EXT_ONLY && outputType!=MBCS_OUTPUT_1 && | |
1515 | mbcsHeader.version[1]>=3 && (maxFastUChar=mbcsHeader.version[2])!=0 | |
1516 | ) { | |
1517 | maxFastUChar=(maxFastUChar<<8)|0xff; | |
1518 | mbcsIndexLength=((maxFastUChar+1)>>6)*2; /* number of bytes */ | |
1519 | } | |
1520 | ||
374ca955 | 1521 | if(extOffset==0) { |
46f4442e A |
1522 | size=(int32_t)(mbcsHeader.offsetFromUBytes+mbcsIndexLength); |
1523 | if(!noFromU) { | |
1524 | size+=(int32_t)mbcsHeader.fromUBytesLength; | |
1525 | } | |
374ca955 A |
1526 | |
1527 | /* avoid compiler warnings - not otherwise necessary, and the value does not matter */ | |
1528 | inExtIndexes=NULL; | |
1529 | } else { | |
1530 | /* there is extension data after the base data, see ucnv_ext.h */ | |
1531 | if(length>=0 && length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) { | |
1532 | udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n", | |
1533 | length); | |
1534 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | |
1535 | return 0; | |
1536 | } | |
1537 | ||
1538 | inExtIndexes=(const int32_t *)(inBytes+extOffset); | |
1539 | size=extOffset+udata_readInt32(ds, inExtIndexes[UCNV_EXT_SIZE]); | |
1540 | } | |
1541 | ||
1542 | if(length>=0) { | |
1543 | if(length<size) { | |
1544 | udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", | |
1545 | length); | |
1546 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | |
1547 | return 0; | |
1548 | } | |
1549 | ||
1550 | /* copy the data for inaccessible bytes */ | |
1551 | if(inBytes!=outBytes) { | |
1552 | uprv_memcpy(outBytes, inBytes, size); | |
1553 | } | |
1554 | ||
46f4442e A |
1555 | /* swap the MBCSHeader, except for the version field */ |
1556 | count=mbcsHeaderLength*4; | |
1557 | ds->swapArray32(ds, &inMBCSHeader->countStates, count-4, | |
374ca955 A |
1558 | &outMBCSHeader->countStates, pErrorCode); |
1559 | ||
1560 | if(outputType==MBCS_OUTPUT_EXT_ONLY) { | |
1561 | /* | |
1562 | * extension-only file, | |
1563 | * contains a base name instead of normal base table data | |
1564 | */ | |
1565 | ||
1566 | /* swap the base name, between the header and the extension data */ | |
46f4442e A |
1567 | const char *inBaseName=(const char *)inBytes+count; |
1568 | char *outBaseName=(char *)outBytes+count; | |
1569 | ds->swapInvChars(ds, inBaseName, (int32_t)uprv_strlen(inBaseName), | |
1570 | outBaseName, pErrorCode); | |
374ca955 A |
1571 | } else { |
1572 | /* normal file with base table data */ | |
1573 | ||
1574 | /* swap the state table, 1kB per state */ | |
46f4442e A |
1575 | offset=count; |
1576 | count=mbcsHeader.countStates*1024; | |
1577 | ds->swapArray32(ds, inBytes+offset, (int32_t)count, | |
1578 | outBytes+offset, pErrorCode); | |
374ca955 A |
1579 | |
1580 | /* swap the toUFallbacks[] */ | |
46f4442e A |
1581 | offset+=count; |
1582 | count=mbcsHeader.countToUFallbacks*8; | |
1583 | ds->swapArray32(ds, inBytes+offset, (int32_t)count, | |
374ca955 A |
1584 | outBytes+offset, pErrorCode); |
1585 | ||
1586 | /* swap the unicodeCodeUnits[] */ | |
1587 | offset=mbcsHeader.offsetToUCodeUnits; | |
1588 | count=mbcsHeader.offsetFromUTable-offset; | |
1589 | ds->swapArray16(ds, inBytes+offset, (int32_t)count, | |
1590 | outBytes+offset, pErrorCode); | |
1591 | ||
1592 | /* offset to the stage 1 table, independent of the outputType */ | |
1593 | offset=mbcsHeader.offsetFromUTable; | |
1594 | ||
1595 | if(outputType==MBCS_OUTPUT_1) { | |
1596 | /* SBCS: swap the fromU tables, all 16 bits wide */ | |
1597 | count=(mbcsHeader.offsetFromUBytes-offset)+mbcsHeader.fromUBytesLength; | |
1598 | ds->swapArray16(ds, inBytes+offset, (int32_t)count, | |
1599 | outBytes+offset, pErrorCode); | |
1600 | } else { | |
1601 | /* otherwise: swap the stage tables separately */ | |
1602 | ||
1603 | /* stage 1 table: uint16_t[0x440 or 0x40] */ | |
1604 | if(inStaticData->unicodeMask&UCNV_HAS_SUPPLEMENTARY) { | |
1605 | count=0x440*2; /* for all of Unicode */ | |
1606 | } else { | |
1607 | count=0x40*2; /* only BMP */ | |
1608 | } | |
1609 | ds->swapArray16(ds, inBytes+offset, (int32_t)count, | |
1610 | outBytes+offset, pErrorCode); | |
1611 | ||
1612 | /* stage 2 table: uint32_t[] */ | |
1613 | offset+=count; | |
1614 | count=mbcsHeader.offsetFromUBytes-offset; | |
1615 | ds->swapArray32(ds, inBytes+offset, (int32_t)count, | |
1616 | outBytes+offset, pErrorCode); | |
1617 | ||
1618 | /* stage 3/result bytes: sometimes uint16_t[] or uint32_t[] */ | |
1619 | offset=mbcsHeader.offsetFromUBytes; | |
46f4442e | 1620 | count= noFromU ? 0 : mbcsHeader.fromUBytesLength; |
374ca955 A |
1621 | switch(outputType) { |
1622 | case MBCS_OUTPUT_2: | |
1623 | case MBCS_OUTPUT_3_EUC: | |
1624 | case MBCS_OUTPUT_2_SISO: | |
1625 | ds->swapArray16(ds, inBytes+offset, (int32_t)count, | |
1626 | outBytes+offset, pErrorCode); | |
1627 | break; | |
1628 | case MBCS_OUTPUT_4: | |
1629 | ds->swapArray32(ds, inBytes+offset, (int32_t)count, | |
1630 | outBytes+offset, pErrorCode); | |
1631 | break; | |
1632 | default: | |
1633 | /* just uint8_t[], nothing to swap */ | |
1634 | break; | |
1635 | } | |
46f4442e A |
1636 | |
1637 | if(mbcsIndexLength!=0) { | |
1638 | offset+=count; | |
1639 | count=mbcsIndexLength; | |
1640 | ds->swapArray16(ds, inBytes+offset, (int32_t)count, | |
1641 | outBytes+offset, pErrorCode); | |
1642 | } | |
374ca955 A |
1643 | } |
1644 | } | |
1645 | ||
1646 | if(extOffset!=0) { | |
1647 | /* swap the extension data */ | |
1648 | inBytes+=extOffset; | |
1649 | outBytes+=extOffset; | |
1650 | ||
1651 | /* swap toUTable[] */ | |
1652 | offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_INDEX]); | |
1653 | length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_LENGTH]); | |
1654 | ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); | |
1655 | ||
1656 | /* swap toUUChars[] */ | |
1657 | offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_INDEX]); | |
1658 | length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_LENGTH]); | |
1659 | ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); | |
1660 | ||
1661 | /* swap fromUTableUChars[] */ | |
1662 | offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_UCHARS_INDEX]); | |
1663 | length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_LENGTH]); | |
1664 | ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); | |
1665 | ||
1666 | /* swap fromUTableValues[] */ | |
1667 | offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_VALUES_INDEX]); | |
1668 | /* same length as for fromUTableUChars[] */ | |
1669 | ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); | |
1670 | ||
1671 | /* no need to swap fromUBytes[] */ | |
1672 | ||
1673 | /* swap fromUStage12[] */ | |
1674 | offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_INDEX]); | |
1675 | length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_LENGTH]); | |
1676 | ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); | |
1677 | ||
1678 | /* swap fromUStage3[] */ | |
1679 | offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_INDEX]); | |
1680 | length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_LENGTH]); | |
1681 | ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); | |
1682 | ||
1683 | /* swap fromUStage3b[] */ | |
1684 | offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_INDEX]); | |
1685 | length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_LENGTH]); | |
1686 | ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); | |
1687 | ||
1688 | /* swap indexes[] */ | |
1689 | length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_INDEXES_LENGTH]); | |
1690 | ds->swapArray32(ds, inBytes, length*4, outBytes, pErrorCode); | |
1691 | } | |
1692 | } | |
1693 | } else { | |
1694 | udata_printError(ds, "ucnv_swap(): unknown conversionType=%d!=UCNV_MBCS\n", | |
1695 | inStaticData->conversionType); | |
1696 | *pErrorCode=U_UNSUPPORTED_ERROR; | |
1697 | return 0; | |
1698 | } | |
1699 | ||
1700 | return headerSize+(int32_t)staticDataSize+size; | |
1701 | } | |
1702 | ||
1703 | #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ | |
1704 | ||
1705 | #endif |