1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ********************************************************************
6 * Copyright (c) 1996-2016, International Business Machines Corporation and
7 * others. All Rights Reserved.
8 ********************************************************************
12 * Defines functions that are used in the creation/initialization/deletion
13 * of converters and related structures.
14 * uses uconv_io.h routines to access disk information
15 * is used by ucnv.h to implement public API create/delete/flushCache routines
16 * Modification History:
18 * Date Name Description
20 * 06/20/2000 helena OS/400 port changes; mostly typecast.
21 * 06/29/2000 helena Major rewrite of the callback interface.
24 #include "unicode/utypes.h"
26 #if !UCONFIG_NO_CONVERSION
28 #include "unicode/putil.h"
29 #include "unicode/udata.h"
30 #include "unicode/ucnv.h"
31 #include "unicode/uloc.h"
52 extern void UCNV_DEBUG_LOG(char *what
, char *who
, void *p
, int l
);
53 #define UCNV_DEBUG_LOG(x,y,z) UCNV_DEBUG_LOG(x,y,z,__LINE__)
55 # define UCNV_DEBUG_LOG(x,y,z)
58 static const UConverterSharedData
* const
59 converterData
[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES
]={
62 #if UCONFIG_NO_LEGACY_CONVERSION
69 &_UTF8Data
, &_UTF16BEData
, &_UTF16LEData
,
70 #if UCONFIG_ONLY_HTML_CONVERSION
73 &_UTF32BEData
, &_UTF32LEData
,
77 #if UCONFIG_NO_LEGACY_CONVERSION
83 #if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION
84 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
85 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
88 &_LMBCSData1
,&_LMBCSData2
, &_LMBCSData3
, &_LMBCSData4
, &_LMBCSData5
, &_LMBCSData6
,
89 &_LMBCSData8
,&_LMBCSData11
,&_LMBCSData16
,&_LMBCSData17
,&_LMBCSData18
,&_LMBCSData19
,
93 #if UCONFIG_ONLY_HTML_CONVERSION
100 #if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION
107 #if UCONFIG_ONLY_HTML_CONVERSION
108 NULL
, NULL
, &_UTF16Data
, NULL
, NULL
, NULL
,
110 &_UTF7Data
, &_Bocu1Data
, &_UTF16Data
, &_UTF32Data
, &_CESU8Data
, &_IMAPData
,
113 #if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION
120 /* Please keep this in binary sorted order for getAlgorithmicTypeFromName.
121 Also the name should be in lower case and all spaces, dashes and underscores
126 const UConverterType type
;
127 } const cnvNameType
[] = {
128 #if !UCONFIG_ONLY_HTML_CONVERSION
129 { "bocu1", UCNV_BOCU1
},
130 { "cesu8", UCNV_CESU8
},
132 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
135 #if !UCONFIG_ONLY_HTML_CONVERSION
136 { "imapmailboxname", UCNV_IMAP_MAILBOX
},
138 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
139 { "iscii", UCNV_ISCII
},
141 #if !UCONFIG_NO_LEGACY_CONVERSION
142 { "iso2022", UCNV_ISO_2022
},
144 { "iso88591", UCNV_LATIN_1
},
145 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
146 { "lmbcs1", UCNV_LMBCS_1
},
147 { "lmbcs11",UCNV_LMBCS_11
},
148 { "lmbcs16",UCNV_LMBCS_16
},
149 { "lmbcs17",UCNV_LMBCS_17
},
150 { "lmbcs18",UCNV_LMBCS_18
},
151 { "lmbcs19",UCNV_LMBCS_19
},
152 { "lmbcs2", UCNV_LMBCS_2
},
153 { "lmbcs3", UCNV_LMBCS_3
},
154 { "lmbcs4", UCNV_LMBCS_4
},
155 { "lmbcs5", UCNV_LMBCS_5
},
156 { "lmbcs6", UCNV_LMBCS_6
},
157 { "lmbcs8", UCNV_LMBCS_8
},
159 #if !UCONFIG_ONLY_HTML_CONVERSION
160 { "scsu", UCNV_SCSU
},
162 { "usascii", UCNV_US_ASCII
},
163 { "utf16", UCNV_UTF16
},
164 { "utf16be", UCNV_UTF16_BigEndian
},
165 { "utf16le", UCNV_UTF16_LittleEndian
},
167 { "utf16oppositeendian", UCNV_UTF16_LittleEndian
},
168 { "utf16platformendian", UCNV_UTF16_BigEndian
},
170 { "utf16oppositeendian", UCNV_UTF16_BigEndian
},
171 { "utf16platformendian", UCNV_UTF16_LittleEndian
},
173 #if !UCONFIG_ONLY_HTML_CONVERSION
174 { "utf32", UCNV_UTF32
},
175 { "utf32be", UCNV_UTF32_BigEndian
},
176 { "utf32le", UCNV_UTF32_LittleEndian
},
178 { "utf32oppositeendian", UCNV_UTF32_LittleEndian
},
179 { "utf32platformendian", UCNV_UTF32_BigEndian
},
181 { "utf32oppositeendian", UCNV_UTF32_BigEndian
},
182 { "utf32platformendian", UCNV_UTF32_LittleEndian
},
185 #if !UCONFIG_ONLY_HTML_CONVERSION
186 { "utf7", UCNV_UTF7
},
188 { "utf8", UCNV_UTF8
},
189 #if !UCONFIG_ONLY_HTML_CONVERSION
190 { "x11compoundtext", UCNV_COMPOUND_TEXT
}
195 /*initializes some global variables */
196 static UHashtable
*SHARED_DATA_HASHTABLE
= NULL
;
197 static icu::UMutex
*cnvCacheMutex() { /* Mutex for synchronizing cnv cache access. */
198 static icu::UMutex
*m
= STATIC_NEW(icu::UMutex
);
201 /* Note: the global mutex is used for */
202 /* reference count updates. */
204 static const char **gAvailableConverters
= NULL
;
205 static uint16_t gAvailableConverterCount
= 0;
206 static icu::UInitOnce gAvailableConvertersInitOnce
= U_INITONCE_INITIALIZER
;
208 #if !U_CHARSET_IS_UTF8
210 /* This contains the resolved converter name. So no further alias lookup is needed again. */
211 static char gDefaultConverterNameBuffer
[UCNV_MAX_CONVERTER_NAME_LENGTH
+ 1]; /* +1 for NULL */
212 static const char *gDefaultConverterName
= NULL
;
215 If the default converter is an algorithmic converter, this is the cached value.
216 We don't cache a full UConverter and clone it because ucnv_clone doesn't have
217 less overhead than an algorithmic open. We don't cache non-algorithmic converters
218 because ucnv_flushCache must be able to unload the default converter and its table.
220 static const UConverterSharedData
*gDefaultAlgorithmicSharedData
= NULL
;
222 /* Does gDefaultConverterName have a converter option and require extra parsing? */
223 static UBool gDefaultConverterContainsOption
;
225 #endif /* !U_CHARSET_IS_UTF8 */
227 static const char DATA_TYPE
[] = "cnv";
229 /* ucnv_flushAvailableConverterCache. This is only called from ucnv_cleanup().
230 * If it is ever to be called from elsewhere, synchronization
231 * will need to be considered.
234 ucnv_flushAvailableConverterCache() {
235 gAvailableConverterCount
= 0;
236 if (gAvailableConverters
) {
237 uprv_free((char **)gAvailableConverters
);
238 gAvailableConverters
= NULL
;
240 gAvailableConvertersInitOnce
.reset();
243 /* ucnv_cleanup - delete all storage held by the converter cache, except any */
244 /* in use by open converters. */
245 /* Not thread safe. */
246 /* Not supported API. */
247 static UBool U_CALLCONV
ucnv_cleanup(void) {
249 if (SHARED_DATA_HASHTABLE
!= NULL
&& uhash_count(SHARED_DATA_HASHTABLE
) == 0) {
250 uhash_close(SHARED_DATA_HASHTABLE
);
251 SHARED_DATA_HASHTABLE
= NULL
;
254 /* Isn't called from flushCache because other threads may have preexisting references to the table. */
255 ucnv_flushAvailableConverterCache();
257 #if !U_CHARSET_IS_UTF8
258 gDefaultConverterName
= NULL
;
259 gDefaultConverterNameBuffer
[0] = 0;
260 gDefaultConverterContainsOption
= FALSE
;
261 gDefaultAlgorithmicSharedData
= NULL
;
264 return (SHARED_DATA_HASHTABLE
== NULL
);
267 U_CAPI
void U_EXPORT2
268 ucnv_enableCleanup() {
269 ucln_common_registerCleanup(UCLN_COMMON_UCNV
, ucnv_cleanup
);
272 static UBool U_CALLCONV
273 isCnvAcceptable(void * /*context*/,
274 const char * /*type*/, const char * /*name*/,
275 const UDataInfo
*pInfo
) {
278 pInfo
->isBigEndian
==U_IS_BIG_ENDIAN
&&
279 pInfo
->charsetFamily
==U_CHARSET_FAMILY
&&
280 pInfo
->sizeofUChar
==U_SIZEOF_UCHAR
&&
281 pInfo
->dataFormat
[0]==0x63 && /* dataFormat="cnvt" */
282 pInfo
->dataFormat
[1]==0x6e &&
283 pInfo
->dataFormat
[2]==0x76 &&
284 pInfo
->dataFormat
[3]==0x74 &&
285 pInfo
->formatVersion
[0]==6); /* Everything will be version 6 */
289 * Un flatten shared data from a UDATA..
291 static UConverterSharedData
*
292 ucnv_data_unFlattenClone(UConverterLoadArgs
*pArgs
, UDataMemory
*pData
, UErrorCode
*status
)
294 /* UDataInfo info; -- necessary only if some converters have different formatVersion */
295 const uint8_t *raw
= (const uint8_t *)udata_getMemory(pData
);
296 const UConverterStaticData
*source
= (const UConverterStaticData
*) raw
;
297 UConverterSharedData
*data
;
298 UConverterType type
= (UConverterType
)source
->conversionType
;
300 if(U_FAILURE(*status
))
303 if( (uint16_t)type
>= UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES
||
304 converterData
[type
] == NULL
||
305 !converterData
[type
]->isReferenceCounted
||
306 converterData
[type
]->referenceCounter
!= 1 ||
307 source
->structSize
!= sizeof(UConverterStaticData
))
309 *status
= U_INVALID_TABLE_FORMAT
;
313 data
= (UConverterSharedData
*)uprv_malloc(sizeof(UConverterSharedData
));
315 *status
= U_MEMORY_ALLOCATION_ERROR
;
319 /* copy initial values from the static structure for this type */
320 uprv_memcpy(data
, converterData
[type
], sizeof(UConverterSharedData
));
322 data
->staticData
= source
;
324 data
->sharedDataCached
= FALSE
;
326 /* fill in fields from the loaded data */
327 data
->dataMemory
= (void*)pData
; /* for future use */
329 if(data
->impl
->load
!= NULL
) {
330 data
->impl
->load(data
, pArgs
, raw
+ source
->structSize
, status
);
331 if(U_FAILURE(*status
)) {
339 /*Takes an alias name gets an actual converter file name
340 *goes to disk and opens it.
341 *allocates the memory and returns a new UConverter object
343 static UConverterSharedData
*createConverterFromFile(UConverterLoadArgs
*pArgs
, UErrorCode
* err
)
346 UConverterSharedData
*sharedData
;
348 UTRACE_ENTRY_OC(UTRACE_UCNV_LOAD
);
350 if (U_FAILURE (*err
)) {
351 UTRACE_EXIT_STATUS(*err
);
355 UTRACE_DATA2(UTRACE_OPEN_CLOSE
, "load converter %s from package %s", pArgs
->name
, pArgs
->pkg
);
357 data
= udata_openChoice(pArgs
->pkg
, DATA_TYPE
, pArgs
->name
, isCnvAcceptable
, NULL
, err
);
360 UTRACE_EXIT_STATUS(*err
);
364 sharedData
= ucnv_data_unFlattenClone(pArgs
, data
, err
);
368 UTRACE_EXIT_STATUS(*err
);
373 * TODO Store pkg in a field in the shared data so that delta-only converters
374 * can load base converters from the same package.
375 * If the pkg name is longer than the field, then either do not load the converter
376 * in the first place, or just set the pkg field to "".
379 UTRACE_EXIT_PTR_STATUS(sharedData
, *err
);
383 /*returns a converter type from a string
385 static const UConverterSharedData
*
386 getAlgorithmicTypeFromName(const char *realName
)
388 uint32_t mid
, start
, limit
;
391 char strippedName
[UCNV_MAX_CONVERTER_NAME_LENGTH
];
393 /* Lower case and remove ignoreable characters. */
394 ucnv_io_stripForCompare(strippedName
, realName
);
396 /* do a binary search for the alias */
398 limit
= UPRV_LENGTHOF(cnvNameType
);
400 lastMid
= UINT32_MAX
;
403 mid
= (uint32_t)((start
+ limit
) / 2);
404 if (lastMid
== mid
) { /* Have we moved? */
405 break; /* We haven't moved, and it wasn't found. */
408 result
= uprv_strcmp(strippedName
, cnvNameType
[mid
].name
);
412 } else if (result
> 0) {
415 return converterData
[cnvNameType
[mid
].type
];
423 * Based on the number of known converters, this determines how many times larger
424 * the shared data hash table should be. When on small platforms, or just a couple
425 * of converters are used, this number should be 2. When memory is plentiful, or
426 * when ucnv_countAvailable is ever used with a lot of available converters,
428 * Larger numbers reduce the number of hash collisions, but use more memory.
430 #define UCNV_CACHE_LOAD_FACTOR 2
432 /* Puts the shared data in the static hashtable SHARED_DATA_HASHTABLE */
433 /* Will always be called with the cnvCacheMutex alrady being held */
434 /* by the calling function. */
435 /* Stores the shared data in the SHARED_DATA_HASHTABLE
436 * @param data The shared data
439 ucnv_shareConverterData(UConverterSharedData
* data
)
441 UErrorCode err
= U_ZERO_ERROR
;
442 /*Lazy evaluates the Hashtable itself */
443 /*void *sanity = NULL;*/
445 if (SHARED_DATA_HASHTABLE
== NULL
)
447 SHARED_DATA_HASHTABLE
= uhash_openSize(uhash_hashChars
, uhash_compareChars
, NULL
,
448 ucnv_io_countKnownConverters(&err
)*UCNV_CACHE_LOAD_FACTOR
,
450 ucnv_enableCleanup();
456 /* ### check to see if the element is not already there! */
459 sanity = ucnv_getSharedConverterData (data->staticData->name);
462 UCNV_DEBUG_LOG("put:overwrite!",data->staticData->name,sanity);
464 UCNV_DEBUG_LOG("put:chk",data->staticData->name,sanity);
468 data
->sharedDataCached
= TRUE
;
470 uhash_put(SHARED_DATA_HASHTABLE
,
471 (void*) data
->staticData
->name
, /* Okay to cast away const as long as
472 keyDeleter == NULL */
475 UCNV_DEBUG_LOG("put", data
->staticData
->name
,data
);
479 /* Look up a converter name in the shared data cache. */
480 /* cnvCacheMutex must be held by the caller to protect the hash table. */
481 /* gets the shared data from the SHARED_DATA_HASHTABLE (might return NULL if it isn't there)
482 * @param name The name of the shared data
483 * @return the shared data from the SHARED_DATA_HASHTABLE
485 static UConverterSharedData
*
486 ucnv_getSharedConverterData(const char *name
)
488 /*special case when no Table has yet been created we return NULL */
489 if (SHARED_DATA_HASHTABLE
== NULL
)
495 UConverterSharedData
*rc
;
497 rc
= (UConverterSharedData
*)uhash_get(SHARED_DATA_HASHTABLE
, name
);
498 UCNV_DEBUG_LOG("get",name
,rc
);
503 /*frees the string of memory blocks associates with a sharedConverter
504 *if and only if the referenceCounter == 0
506 /* Deletes (frees) the Shared data it's passed. first it checks the referenceCounter to
507 * see if anyone is using it, if not it frees all the memory stemming from sharedConverterData and
509 * otherwise returns FALSE
510 * @param sharedConverterData The shared data
511 * @return if not it frees all the memory stemming from sharedConverterData and
512 * returns TRUE, otherwise returns FALSE
515 ucnv_deleteSharedConverterData(UConverterSharedData
* deadSharedData
)
517 UTRACE_ENTRY_OC(UTRACE_UCNV_UNLOAD
);
518 UTRACE_DATA2(UTRACE_OPEN_CLOSE
, "unload converter %s shared data %p", deadSharedData
->staticData
->name
, deadSharedData
);
520 if (deadSharedData
->referenceCounter
> 0) {
521 UTRACE_EXIT_VALUE((int32_t)FALSE
);
525 if (deadSharedData
->impl
->unload
!= NULL
) {
526 deadSharedData
->impl
->unload(deadSharedData
);
529 if(deadSharedData
->dataMemory
!= NULL
)
531 UDataMemory
*data
= (UDataMemory
*)deadSharedData
->dataMemory
;
535 uprv_free(deadSharedData
);
537 UTRACE_EXIT_VALUE((int32_t)TRUE
);
542 * Load a non-algorithmic converter.
543 * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex).
545 UConverterSharedData
*
546 ucnv_load(UConverterLoadArgs
*pArgs
, UErrorCode
*err
) {
547 UConverterSharedData
*mySharedConverterData
;
549 if(err
== NULL
|| U_FAILURE(*err
)) {
553 if(pArgs
->pkg
!= NULL
&& *pArgs
->pkg
!= 0) {
554 /* application-provided converters are not currently cached */
555 return createConverterFromFile(pArgs
, err
);
558 mySharedConverterData
= ucnv_getSharedConverterData(pArgs
->name
);
559 if (mySharedConverterData
== NULL
)
561 /*Not cached, we need to stream it in from file */
562 mySharedConverterData
= createConverterFromFile(pArgs
, err
);
563 if (U_FAILURE (*err
) || (mySharedConverterData
== NULL
))
567 else if (!pArgs
->onlyTestIsLoadable
)
569 /* share it with other library clients */
570 ucnv_shareConverterData(mySharedConverterData
);
575 /* The data for this converter was already in the cache. */
576 /* Update the reference counter on the shared data: one more client */
577 mySharedConverterData
->referenceCounter
++;
580 return mySharedConverterData
;
584 * Unload a non-algorithmic converter.
585 * It must be sharedData->isReferenceCounted
586 * and this function must be called inside umtx_lock(&cnvCacheMutex).
589 ucnv_unload(UConverterSharedData
*sharedData
) {
590 if(sharedData
!= NULL
) {
591 if (sharedData
->referenceCounter
> 0) {
592 sharedData
->referenceCounter
--;
595 if((sharedData
->referenceCounter
<= 0)&&(sharedData
->sharedDataCached
== FALSE
)) {
596 ucnv_deleteSharedConverterData(sharedData
);
602 ucnv_unloadSharedDataIfReady(UConverterSharedData
*sharedData
)
604 if(sharedData
!= NULL
&& sharedData
->isReferenceCounted
) {
605 umtx_lock(cnvCacheMutex());
606 ucnv_unload(sharedData
);
607 umtx_unlock(cnvCacheMutex());
612 ucnv_incrementRefCount(UConverterSharedData
*sharedData
)
614 if(sharedData
!= NULL
&& sharedData
->isReferenceCounted
) {
615 umtx_lock(cnvCacheMutex());
616 sharedData
->referenceCounter
++;
617 umtx_unlock(cnvCacheMutex());
622 * *pPieces must be initialized.
623 * The name without options will be copied to pPieces->cnvName.
624 * The locale and options will be copied to pPieces only if present in inName,
625 * otherwise the existing values in pPieces remain.
626 * *pArgs will be set to the pPieces values.
629 parseConverterOptions(const char *inName
,
630 UConverterNamePieces
*pPieces
,
631 UConverterLoadArgs
*pArgs
,
634 char *cnvName
= pPieces
->cnvName
;
639 pArgs
->locale
=pPieces
->locale
;
640 pArgs
->options
=pPieces
->options
;
642 /* copy the converter name itself to cnvName */
643 while((c
=*inName
)!=0 && c
!=UCNV_OPTION_SEP_CHAR
) {
644 if (++len
>=UCNV_MAX_CONVERTER_NAME_LENGTH
) {
645 *err
= U_ILLEGAL_ARGUMENT_ERROR
; /* bad name */
646 pPieces
->cnvName
[0]=0;
653 pArgs
->name
=pPieces
->cnvName
;
655 /* parse options. No more name copying should occur. */
656 while((c
=*inName
)!=0) {
657 if(c
==UCNV_OPTION_SEP_CHAR
) {
661 /* inName is behind an option separator */
662 if(uprv_strncmp(inName
, "locale=", 7)==0) {
663 /* do not modify locale itself in case we have multiple locale options */
664 char *dest
=pPieces
->locale
;
666 /* copy the locale option value */
669 while((c
=*inName
)!=0 && c
!=UCNV_OPTION_SEP_CHAR
) {
672 if(++len
>=ULOC_FULLNAME_CAPACITY
) {
673 *err
=U_ILLEGAL_ARGUMENT_ERROR
; /* bad name */
674 pPieces
->locale
[0]=0;
681 } else if(uprv_strncmp(inName
, "version=", 8)==0) {
682 /* copy the version option value into bits 3..0 of pPieces->options */
686 pArgs
->options
=(pPieces
->options
&=~UCNV_OPTION_VERSION
);
688 } else if((uint8_t)(c
-'0')<10) {
689 pArgs
->options
=pPieces
->options
=(pPieces
->options
&~UCNV_OPTION_VERSION
)|(uint32_t)(c
-'0');
692 } else if(uprv_strncmp(inName
, "swaplfnl", 8)==0) {
694 pArgs
->options
=(pPieces
->options
|=UCNV_OPTION_SWAP_LFNL
);
695 /* add processing for new options here with another } else if(uprv_strncmp(inName, "option-name=", XX)==0) { */
697 /* ignore any other options until we define some */
698 while(((c
= *inName
++) != 0) && (c
!= UCNV_OPTION_SEP_CHAR
)) {
707 /*Logic determines if the converter is Algorithmic AND/OR cached
709 * -we either go to get data from disk and cache it (Data=TRUE, Cached=False)
710 * -Get it from a Hashtable (Data=X, Cached=TRUE)
711 * -Call dataConverter initializer (Data=TRUE, Cached=TRUE)
712 * -Call AlgorithmicConverter initializer (Data=FALSE, Cached=TRUE)
714 U_CFUNC UConverterSharedData
*
715 ucnv_loadSharedData(const char *converterName
,
716 UConverterNamePieces
*pPieces
,
717 UConverterLoadArgs
*pArgs
,
719 UConverterNamePieces stackPieces
;
720 UConverterLoadArgs stackArgs
;
721 UConverterSharedData
*mySharedConverterData
= NULL
;
722 UErrorCode internalErrorCode
= U_ZERO_ERROR
;
723 UBool mayContainOption
= TRUE
;
724 UBool checkForAlgorithmic
= TRUE
;
726 if (U_FAILURE (*err
)) {
730 if(pPieces
== NULL
) {
733 * Bad: We may set pArgs pointers to stackPieces fields
734 * which will be invalid after this function returns.
736 *err
= U_INTERNAL_PROGRAM_ERROR
;
739 pPieces
= &stackPieces
;
742 uprv_memset(&stackArgs
, 0, sizeof(stackArgs
));
743 stackArgs
.size
= (int32_t)sizeof(stackArgs
);
747 pPieces
->cnvName
[0] = 0;
748 pPieces
->locale
[0] = 0;
749 pPieces
->options
= 0;
751 pArgs
->name
= converterName
;
752 pArgs
->locale
= pPieces
->locale
;
753 pArgs
->options
= pPieces
->options
;
755 /* In case "name" is NULL we want to open the default converter. */
756 if (converterName
== NULL
) {
757 #if U_CHARSET_IS_UTF8
758 pArgs
->name
= "UTF-8";
759 return (UConverterSharedData
*)converterData
[UCNV_UTF8
];
761 /* Call ucnv_getDefaultName first to query the name from the OS. */
762 pArgs
->name
= ucnv_getDefaultName();
763 if (pArgs
->name
== NULL
) {
764 *err
= U_MISSING_RESOURCE_ERROR
;
767 mySharedConverterData
= (UConverterSharedData
*)gDefaultAlgorithmicSharedData
;
768 checkForAlgorithmic
= FALSE
;
769 mayContainOption
= gDefaultConverterContainsOption
;
770 /* the default converter name is already canonical */
773 else if(UCNV_FAST_IS_UTF8(converterName
)) {
774 /* fastpath for UTF-8 */
775 pArgs
->name
= "UTF-8";
776 return (UConverterSharedData
*)converterData
[UCNV_UTF8
];
779 /* separate the converter name from the options */
780 parseConverterOptions(converterName
, pPieces
, pArgs
, err
);
781 if (U_FAILURE(*err
)) {
782 /* Very bad name used. */
786 /* get the canonical converter name */
787 pArgs
->name
= ucnv_io_getConverterName(pArgs
->name
, &mayContainOption
, &internalErrorCode
);
788 if (U_FAILURE(internalErrorCode
) || pArgs
->name
== NULL
) {
790 * set the input name in case the converter was added
791 * without updating the alias table, or when there is no alias table
793 pArgs
->name
= pPieces
->cnvName
;
794 } else if (internalErrorCode
== U_AMBIGUOUS_ALIAS_WARNING
) {
795 *err
= U_AMBIGUOUS_ALIAS_WARNING
;
799 /* separate the converter name from the options */
800 if(mayContainOption
&& pArgs
->name
!= pPieces
->cnvName
) {
801 parseConverterOptions(pArgs
->name
, pPieces
, pArgs
, err
);
804 /* get the shared data for an algorithmic converter, if it is one */
805 if (checkForAlgorithmic
) {
806 mySharedConverterData
= (UConverterSharedData
*)getAlgorithmicTypeFromName(pArgs
->name
);
808 if (mySharedConverterData
== NULL
)
810 /* it is a data-based converter, get its shared data. */
811 /* Hold the cnvCacheMutex through the whole process of checking the */
812 /* converter data cache, and adding new entries to the cache */
813 /* to prevent other threads from modifying the cache during the */
815 pArgs
->nestedLoads
=1;
818 umtx_lock(cnvCacheMutex());
819 mySharedConverterData
= ucnv_load(pArgs
, err
);
820 umtx_unlock(cnvCacheMutex());
821 if (U_FAILURE (*err
) || (mySharedConverterData
== NULL
))
827 return mySharedConverterData
;
831 ucnv_createConverter(UConverter
*myUConverter
, const char *converterName
, UErrorCode
* err
)
833 UConverterNamePieces stackPieces
;
834 UConverterLoadArgs stackArgs
=UCNV_LOAD_ARGS_INITIALIZER
;
835 UConverterSharedData
*mySharedConverterData
;
837 UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN
);
839 if(U_SUCCESS(*err
)) {
840 UTRACE_DATA1(UTRACE_OPEN_CLOSE
, "open converter %s", converterName
);
842 mySharedConverterData
= ucnv_loadSharedData(converterName
, &stackPieces
, &stackArgs
, err
);
844 myUConverter
= ucnv_createConverterFromSharedData(
845 myUConverter
, mySharedConverterData
,
849 if(U_SUCCESS(*err
)) {
850 UTRACE_EXIT_PTR_STATUS(myUConverter
, *err
);
855 /* exit with error */
856 UTRACE_EXIT_STATUS(*err
);
861 ucnv_canCreateConverter(const char *converterName
, UErrorCode
*err
) {
862 UConverter myUConverter
;
863 UConverterNamePieces stackPieces
;
864 UConverterLoadArgs stackArgs
=UCNV_LOAD_ARGS_INITIALIZER
;
865 UConverterSharedData
*mySharedConverterData
;
867 UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN
);
869 if(U_SUCCESS(*err
)) {
870 UTRACE_DATA1(UTRACE_OPEN_CLOSE
, "test if can open converter %s", converterName
);
872 stackArgs
.onlyTestIsLoadable
=TRUE
;
873 mySharedConverterData
= ucnv_loadSharedData(converterName
, &stackPieces
, &stackArgs
, err
);
874 ucnv_createConverterFromSharedData(
875 &myUConverter
, mySharedConverterData
,
878 ucnv_unloadSharedDataIfReady(mySharedConverterData
);
881 UTRACE_EXIT_STATUS(*err
);
882 return U_SUCCESS(*err
);
886 ucnv_createAlgorithmicConverter(UConverter
*myUConverter
,
888 const char *locale
, uint32_t options
,
891 const UConverterSharedData
*sharedData
;
892 UConverterLoadArgs stackArgs
=UCNV_LOAD_ARGS_INITIALIZER
;
894 UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_ALGORITHMIC
);
895 UTRACE_DATA1(UTRACE_OPEN_CLOSE
, "open algorithmic converter type %d", (int32_t)type
);
897 if(type
<0 || UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES
<=type
) {
898 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
899 UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR
);
903 sharedData
= converterData
[type
];
904 if(sharedData
== NULL
|| sharedData
->isReferenceCounted
) {
905 /* not a valid type, or not an algorithmic converter */
906 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
907 UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR
);
912 stackArgs
.options
= options
;
913 stackArgs
.locale
=locale
;
914 cnv
= ucnv_createConverterFromSharedData(
915 myUConverter
, (UConverterSharedData
*)sharedData
,
918 UTRACE_EXIT_PTR_STATUS(cnv
, *err
);
923 ucnv_createConverterFromPackage(const char *packageName
, const char *converterName
, UErrorCode
* err
)
925 UConverter
*myUConverter
;
926 UConverterSharedData
*mySharedConverterData
;
927 UConverterNamePieces stackPieces
;
928 UConverterLoadArgs stackArgs
=UCNV_LOAD_ARGS_INITIALIZER
;
930 UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_PACKAGE
);
932 if(U_FAILURE(*err
)) {
933 UTRACE_EXIT_STATUS(*err
);
937 UTRACE_DATA2(UTRACE_OPEN_CLOSE
, "open converter %s from package %s", converterName
, packageName
);
939 /* first, get the options out of the converterName string */
940 stackPieces
.cnvName
[0] = 0;
941 stackPieces
.locale
[0] = 0;
942 stackPieces
.options
= 0;
943 parseConverterOptions(converterName
, &stackPieces
, &stackArgs
, err
);
944 if (U_FAILURE(*err
)) {
945 /* Very bad name used. */
946 UTRACE_EXIT_STATUS(*err
);
949 stackArgs
.nestedLoads
=1;
950 stackArgs
.pkg
=packageName
;
952 /* open the data, unflatten the shared structure */
953 mySharedConverterData
= createConverterFromFile(&stackArgs
, err
);
955 if (U_FAILURE(*err
)) {
956 UTRACE_EXIT_STATUS(*err
);
960 /* create the actual converter */
961 myUConverter
= ucnv_createConverterFromSharedData(NULL
, mySharedConverterData
, &stackArgs
, err
);
963 if (U_FAILURE(*err
)) {
964 ucnv_close(myUConverter
);
965 UTRACE_EXIT_STATUS(*err
);
969 UTRACE_EXIT_PTR_STATUS(myUConverter
, *err
);
975 ucnv_createConverterFromSharedData(UConverter
*myUConverter
,
976 UConverterSharedData
*mySharedConverterData
,
977 UConverterLoadArgs
*pArgs
,
982 if(U_FAILURE(*err
)) {
983 ucnv_unloadSharedDataIfReady(mySharedConverterData
);
986 if(myUConverter
== NULL
)
988 myUConverter
= (UConverter
*) uprv_malloc (sizeof (UConverter
));
989 if(myUConverter
== NULL
)
991 *err
= U_MEMORY_ALLOCATION_ERROR
;
992 ucnv_unloadSharedDataIfReady(mySharedConverterData
);
1000 /* initialize the converter */
1001 uprv_memset(myUConverter
, 0, sizeof(UConverter
));
1002 myUConverter
->isCopyLocal
= isCopyLocal
;
1003 /*myUConverter->isExtraLocal = FALSE;*/ /* Set by the memset call */
1004 myUConverter
->sharedData
= mySharedConverterData
;
1005 myUConverter
->options
= pArgs
->options
;
1006 if(!pArgs
->onlyTestIsLoadable
) {
1007 myUConverter
->preFromUFirstCP
= U_SENTINEL
;
1008 myUConverter
->fromCharErrorBehaviour
= UCNV_TO_U_DEFAULT_CALLBACK
;
1009 myUConverter
->fromUCharErrorBehaviour
= UCNV_FROM_U_DEFAULT_CALLBACK
;
1010 myUConverter
->toUnicodeStatus
= mySharedConverterData
->toUnicodeStatus
;
1011 myUConverter
->maxBytesPerUChar
= mySharedConverterData
->staticData
->maxBytesPerChar
;
1012 myUConverter
->subChar1
= mySharedConverterData
->staticData
->subChar1
;
1013 myUConverter
->subCharLen
= mySharedConverterData
->staticData
->subCharLen
;
1014 myUConverter
->subChars
= (uint8_t *)myUConverter
->subUChars
;
1015 uprv_memcpy(myUConverter
->subChars
, mySharedConverterData
->staticData
->subChar
, myUConverter
->subCharLen
);
1016 myUConverter
->toUCallbackReason
= UCNV_ILLEGAL
; /* default reason to invoke (*fromCharErrorBehaviour) */
1019 if(mySharedConverterData
->impl
->open
!= NULL
) {
1020 mySharedConverterData
->impl
->open(myUConverter
, pArgs
, err
);
1021 if(U_FAILURE(*err
) && !pArgs
->onlyTestIsLoadable
) {
1022 /* don't ucnv_close() if onlyTestIsLoadable because not fully initialized */
1023 ucnv_close(myUConverter
);
1028 return myUConverter
;
1031 /*Frees all shared immutable objects that aren't referred to (reference count = 0)
1033 U_CAPI
int32_t U_EXPORT2
1036 UConverterSharedData
*mySharedData
= NULL
;
1038 int32_t tableDeletedNum
= 0;
1039 const UHashElement
*e
;
1040 /*UErrorCode status = U_ILLEGAL_ARGUMENT_ERROR;*/
1041 int32_t i
, remaining
;
1043 UTRACE_ENTRY_OC(UTRACE_UCNV_FLUSH_CACHE
);
1045 /* Close the default converter without creating a new one so that everything will be flushed. */
1046 u_flushDefaultConverter();
1048 /*if shared data hasn't even been lazy evaluated yet
1051 if (SHARED_DATA_HASHTABLE
== NULL
) {
1052 UTRACE_EXIT_VALUE((int32_t)0);
1056 /*creates an enumeration to iterate through every element in the
1059 * Synchronization: holding cnvCacheMutex will prevent any other thread from
1060 * accessing or modifying the hash table during the iteration.
1061 * The reference count of an entry may be decremented by
1062 * ucnv_close while the iteration is in process, but this is
1063 * benign. It can't be incremented (in ucnv_createConverter())
1064 * because the sequence of looking up in the cache + incrementing
1065 * is protected by cnvCacheMutex.
1067 umtx_lock(cnvCacheMutex());
1069 * double loop: A delta/extension-only converter has a pointer to its base table's
1070 * shared data; the first iteration of the outer loop may see the delta converter
1071 * before the base converter, and unloading the delta converter may get the base
1072 * converter's reference counter down to 0.
1078 while ((e
= uhash_nextElement (SHARED_DATA_HASHTABLE
, &pos
)) != NULL
)
1080 mySharedData
= (UConverterSharedData
*) e
->value
.pointer
;
1081 /*deletes only if reference counter == 0 */
1082 if (mySharedData
->referenceCounter
== 0)
1086 UCNV_DEBUG_LOG("del",mySharedData
->staticData
->name
,mySharedData
);
1088 uhash_removeElement(SHARED_DATA_HASHTABLE
, e
);
1089 mySharedData
->sharedDataCached
= FALSE
;
1090 ucnv_deleteSharedConverterData (mySharedData
);
1095 } while(++i
== 1 && remaining
> 0);
1096 umtx_unlock(cnvCacheMutex());
1098 UTRACE_DATA1(UTRACE_INFO
, "ucnv_flushCache() exits with %d converters remaining", remaining
);
1100 UTRACE_EXIT_VALUE(tableDeletedNum
);
1101 return tableDeletedNum
;
1104 /* available converters list --------------------------------------------------- */
1106 static void U_CALLCONV
initAvailableConvertersList(UErrorCode
&errCode
) {
1107 U_ASSERT(gAvailableConverterCount
== 0);
1108 U_ASSERT(gAvailableConverters
== NULL
);
1110 ucnv_enableCleanup();
1111 UEnumeration
*allConvEnum
= ucnv_openAllNames(&errCode
);
1112 int32_t allConverterCount
= uenum_count(allConvEnum
, &errCode
);
1113 if (U_FAILURE(errCode
)) {
1117 /* We can't have more than "*converterTable" converters to open */
1118 gAvailableConverters
= (const char **) uprv_malloc(allConverterCount
* sizeof(char*));
1119 if (!gAvailableConverters
) {
1120 errCode
= U_MEMORY_ALLOCATION_ERROR
;
1124 /* Open the default converter to make sure that it has first dibs in the hash table. */
1125 UErrorCode localStatus
= U_ZERO_ERROR
;
1126 UConverter tempConverter
;
1127 ucnv_close(ucnv_createConverter(&tempConverter
, NULL
, &localStatus
));
1129 gAvailableConverterCount
= 0;
1131 for (int32_t idx
= 0; idx
< allConverterCount
; idx
++) {
1132 localStatus
= U_ZERO_ERROR
;
1133 const char *converterName
= uenum_next(allConvEnum
, NULL
, &localStatus
);
1134 if (ucnv_canCreateConverter(converterName
, &localStatus
)) {
1135 gAvailableConverters
[gAvailableConverterCount
++] = converterName
;
1139 uenum_close(allConvEnum
);
1143 static UBool
haveAvailableConverterList(UErrorCode
*pErrorCode
) {
1144 umtx_initOnce(gAvailableConvertersInitOnce
, &initAvailableConvertersList
, *pErrorCode
);
1145 return U_SUCCESS(*pErrorCode
);
1149 ucnv_bld_countAvailableConverters(UErrorCode
*pErrorCode
) {
1150 if (haveAvailableConverterList(pErrorCode
)) {
1151 return gAvailableConverterCount
;
1156 U_CFUNC
const char *
1157 ucnv_bld_getAvailableConverter(uint16_t n
, UErrorCode
*pErrorCode
) {
1158 if (haveAvailableConverterList(pErrorCode
)) {
1159 if (n
< gAvailableConverterCount
) {
1160 return gAvailableConverters
[n
];
1162 *pErrorCode
= U_INDEX_OUTOFBOUNDS_ERROR
;
1167 /* default converter name --------------------------------------------------- */
1169 #if !U_CHARSET_IS_UTF8
1171 Copy the canonical converter name.
1172 ucnv_getDefaultName must be thread safe, which can call this function.
1174 ucnv_setDefaultName calls this function and it doesn't have to be
1175 thread safe because there is no reliable/safe way to reset the
1176 converter in use in all threads. If you did reset the converter, you
1177 would not be sure that retrieving a default converter for one string
1178 would be the same type of default converter for a successive string.
1179 Since the name is a returned via ucnv_getDefaultName without copying,
1180 you shouldn't be modifying or deleting the string from a separate thread.
1183 internalSetName(const char *name
, UErrorCode
*status
) {
1184 UConverterNamePieces stackPieces
;
1185 UConverterLoadArgs stackArgs
=UCNV_LOAD_ARGS_INITIALIZER
;
1186 int32_t length
=(int32_t)(uprv_strlen(name
));
1187 UBool containsOption
= (UBool
)(uprv_strchr(name
, UCNV_OPTION_SEP_CHAR
) != NULL
);
1188 const UConverterSharedData
*algorithmicSharedData
;
1190 stackArgs
.name
= name
;
1191 if(containsOption
) {
1192 stackPieces
.cnvName
[0] = 0;
1193 stackPieces
.locale
[0] = 0;
1194 stackPieces
.options
= 0;
1195 parseConverterOptions(name
, &stackPieces
, &stackArgs
, status
);
1196 if(U_FAILURE(*status
)) {
1200 algorithmicSharedData
= getAlgorithmicTypeFromName(stackArgs
.name
);
1202 umtx_lock(cnvCacheMutex());
1204 gDefaultAlgorithmicSharedData
= algorithmicSharedData
;
1205 gDefaultConverterContainsOption
= containsOption
;
1206 uprv_memcpy(gDefaultConverterNameBuffer
, name
, length
);
1207 gDefaultConverterNameBuffer
[length
]=0;
1209 /* gDefaultConverterName MUST be the last global var set by this function. */
1210 /* It is the variable checked in ucnv_getDefaultName() to see if initialization is required. */
1211 // But there is nothing here preventing that from being reordered, either by the compiler
1212 // or hardware. I'm adding the mutex to ucnv_getDefaultName for now. UMTX_CHECK is not enough.
1214 gDefaultConverterName
= gDefaultConverterNameBuffer
;
1216 ucnv_enableCleanup();
1218 umtx_unlock(cnvCacheMutex());
1223 * In order to be really thread-safe, the get function would have to take
1224 * a buffer parameter and copy the current string inside a mutex block.
1225 * This implementation only tries to be really thread-safe while
1227 * It assumes that setting a pointer is atomic.
1230 U_CAPI
const char* U_EXPORT2
1231 ucnv_getDefaultName() {
1232 #if U_CHARSET_IS_UTF8
1235 /* local variable to be thread-safe */
1239 Concurrent calls to ucnv_getDefaultName must be thread safe,
1240 but ucnv_setDefaultName is not thread safe.
1243 icu::Mutex
lock(cnvCacheMutex());
1244 name
= gDefaultConverterName
;
1247 UErrorCode errorCode
= U_ZERO_ERROR
;
1248 UConverter
*cnv
= NULL
;
1250 name
= uprv_getDefaultCodepage();
1252 /* if the name is there, test it out and get the canonical name with options */
1254 cnv
= ucnv_open(name
, &errorCode
);
1255 if(U_SUCCESS(errorCode
) && cnv
!= NULL
) {
1256 name
= ucnv_getName(cnv
, &errorCode
);
1260 if(name
== NULL
|| name
[0] == 0
1261 || U_FAILURE(errorCode
) || cnv
== NULL
1262 || uprv_strlen(name
)>=sizeof(gDefaultConverterNameBuffer
))
1264 /* Panic time, let's use a fallback. */
1265 #if (U_CHARSET_FAMILY == U_ASCII_FAMILY)
1267 /* there is no 'algorithmic' converter for EBCDIC */
1268 #elif U_PLATFORM == U_PF_OS390
1269 name
= "ibm-1047_P100-1995" UCNV_SWAP_LFNL_OPTION_STRING
;
1271 name
= "ibm-37_P100-1995";
1275 internalSetName(name
, &errorCode
);
1277 /* The close may make the current name go away. */
1285 #if U_CHARSET_IS_UTF8
1286 U_CAPI
void U_EXPORT2
ucnv_setDefaultName(const char *) {}
1289 This function is not thread safe, and it can't be thread safe.
1290 See internalSetName or the API reference for details.
1292 U_CAPI
void U_EXPORT2
1293 ucnv_setDefaultName(const char *converterName
) {
1294 if(converterName
==NULL
) {
1295 /* reset to the default codepage */
1296 gDefaultConverterName
=NULL
;
1298 UErrorCode errorCode
= U_ZERO_ERROR
;
1299 UConverter
*cnv
= NULL
;
1300 const char *name
= NULL
;
1302 /* if the name is there, test it out and get the canonical name with options */
1303 cnv
= ucnv_open(converterName
, &errorCode
);
1304 if(U_SUCCESS(errorCode
) && cnv
!= NULL
) {
1305 name
= ucnv_getName(cnv
, &errorCode
);
1308 if(U_SUCCESS(errorCode
) && name
!=NULL
) {
1309 internalSetName(name
, &errorCode
);
1311 /* else this converter is bad to use. Don't change it to a bad value. */
1313 /* The close may make the current name go away. */
1316 /* reset the converter cache */
1317 u_flushDefaultConverter();
1322 /* data swapping ------------------------------------------------------------ */
1324 /* most of this might belong more properly into ucnvmbcs.c, but that is so large */
1326 #if !UCONFIG_NO_LEGACY_CONVERSION
1328 U_CAPI
int32_t U_EXPORT2
1329 ucnv_swap(const UDataSwapper
*ds
,
1330 const void *inData
, int32_t length
, void *outData
,
1331 UErrorCode
*pErrorCode
) {
1332 const UDataInfo
*pInfo
;
1335 const uint8_t *inBytes
;
1338 uint32_t offset
, count
, staticDataSize
;
1341 const UConverterStaticData
*inStaticData
;
1342 UConverterStaticData
*outStaticData
;
1344 const _MBCSHeader
*inMBCSHeader
;
1345 _MBCSHeader
*outMBCSHeader
;
1346 _MBCSHeader mbcsHeader
;
1347 uint32_t mbcsHeaderLength
;
1348 UBool noFromU
=FALSE
;
1352 int32_t maxFastUChar
, mbcsIndexLength
;
1354 const int32_t *inExtIndexes
;
1357 /* udata_swapDataHeader checks the arguments */
1358 headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, pErrorCode
);
1359 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
1363 /* check data format and format version */
1364 pInfo
=(const UDataInfo
*)((const char *)inData
+4);
1366 pInfo
->dataFormat
[0]==0x63 && /* dataFormat="cnvt" */
1367 pInfo
->dataFormat
[1]==0x6e &&
1368 pInfo
->dataFormat
[2]==0x76 &&
1369 pInfo
->dataFormat
[3]==0x74 &&
1370 pInfo
->formatVersion
[0]==6 &&
1371 pInfo
->formatVersion
[1]>=2
1373 udata_printError(ds
, "ucnv_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not recognized as an ICU .cnv conversion table\n",
1374 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
1375 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
1376 pInfo
->formatVersion
[0], pInfo
->formatVersion
[1]);
1377 *pErrorCode
=U_UNSUPPORTED_ERROR
;
1381 inBytes
=(const uint8_t *)inData
+headerSize
;
1382 outBytes
=(uint8_t *)outData
+headerSize
;
1384 /* read the initial UConverterStaticData structure after the UDataInfo header */
1385 inStaticData
=(const UConverterStaticData
*)inBytes
;
1386 outStaticData
=(UConverterStaticData
*)outBytes
;
1389 staticDataSize
=ds
->readUInt32(inStaticData
->structSize
);
1392 if( length
<(int32_t)sizeof(UConverterStaticData
) ||
1393 (uint32_t)length
<(staticDataSize
=ds
->readUInt32(inStaticData
->structSize
))
1395 udata_printError(ds
, "ucnv_swap(): too few bytes (%d after header) for an ICU .cnv conversion table\n",
1397 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
1403 /* swap the static data */
1404 if(inStaticData
!=outStaticData
) {
1405 uprv_memcpy(outStaticData
, inStaticData
, staticDataSize
);
1408 ds
->swapArray32(ds
, &inStaticData
->structSize
, 4,
1409 &outStaticData
->structSize
, pErrorCode
);
1410 ds
->swapArray32(ds
, &inStaticData
->codepage
, 4,
1411 &outStaticData
->codepage
, pErrorCode
);
1413 ds
->swapInvChars(ds
, inStaticData
->name
, (int32_t)uprv_strlen(inStaticData
->name
),
1414 outStaticData
->name
, pErrorCode
);
1415 if(U_FAILURE(*pErrorCode
)) {
1416 udata_printError(ds
, "ucnv_swap(): error swapping converter name\n");
1421 inBytes
+=staticDataSize
;
1422 outBytes
+=staticDataSize
;
1424 length
-=(int32_t)staticDataSize
;
1427 /* check for supported conversionType values */
1428 if(inStaticData
->conversionType
==UCNV_MBCS
) {
1429 /* swap MBCS data */
1430 inMBCSHeader
=(const _MBCSHeader
*)inBytes
;
1431 outMBCSHeader
=(_MBCSHeader
*)outBytes
;
1433 if(0<=length
&& length
<(int32_t)sizeof(_MBCSHeader
)) {
1434 udata_printError(ds
, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n",
1436 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
1439 if(inMBCSHeader
->version
[0]==4 && inMBCSHeader
->version
[1]>=1) {
1440 mbcsHeaderLength
=MBCS_HEADER_V4_LENGTH
;
1441 } else if(inMBCSHeader
->version
[0]==5 && inMBCSHeader
->version
[1]>=3 &&
1442 ((mbcsHeader
.options
=ds
->readUInt32(inMBCSHeader
->options
))&
1443 MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK
)==0
1445 mbcsHeaderLength
=mbcsHeader
.options
&MBCS_OPT_LENGTH_MASK
;
1446 noFromU
=(UBool
)((mbcsHeader
.options
&MBCS_OPT_NO_FROM_U
)!=0);
1448 udata_printError(ds
, "ucnv_swap(): unsupported _MBCSHeader.version %d.%d\n",
1449 inMBCSHeader
->version
[0], inMBCSHeader
->version
[1]);
1450 *pErrorCode
=U_UNSUPPORTED_ERROR
;
1454 uprv_memcpy(mbcsHeader
.version
, inMBCSHeader
->version
, 4);
1455 mbcsHeader
.countStates
= ds
->readUInt32(inMBCSHeader
->countStates
);
1456 mbcsHeader
.countToUFallbacks
= ds
->readUInt32(inMBCSHeader
->countToUFallbacks
);
1457 mbcsHeader
.offsetToUCodeUnits
= ds
->readUInt32(inMBCSHeader
->offsetToUCodeUnits
);
1458 mbcsHeader
.offsetFromUTable
= ds
->readUInt32(inMBCSHeader
->offsetFromUTable
);
1459 mbcsHeader
.offsetFromUBytes
= ds
->readUInt32(inMBCSHeader
->offsetFromUBytes
);
1460 mbcsHeader
.flags
= ds
->readUInt32(inMBCSHeader
->flags
);
1461 mbcsHeader
.fromUBytesLength
= ds
->readUInt32(inMBCSHeader
->fromUBytesLength
);
1462 /* mbcsHeader.options have been read above */
1464 extOffset
=(int32_t)(mbcsHeader
.flags
>>8);
1465 outputType
=(uint8_t)mbcsHeader
.flags
;
1466 if(noFromU
&& outputType
==MBCS_OUTPUT_1
) {
1467 udata_printError(ds
, "ucnv_swap(): unsupported combination of makeconv --small with SBCS\n");
1468 *pErrorCode
=U_UNSUPPORTED_ERROR
;
1472 /* make sure that the output type is known */
1473 switch(outputType
) {
1478 case MBCS_OUTPUT_3_EUC
:
1479 case MBCS_OUTPUT_4_EUC
:
1480 case MBCS_OUTPUT_2_SISO
:
1481 case MBCS_OUTPUT_EXT_ONLY
:
1485 udata_printError(ds
, "ucnv_swap(): unsupported MBCS output type 0x%x\n",
1487 *pErrorCode
=U_UNSUPPORTED_ERROR
;
1491 /* calculate the length of the MBCS data */
1494 * utf8Friendly MBCS files (mbcsHeader.version 4.3)
1495 * contain an additional mbcsIndex table:
1496 * uint16_t[(maxFastUChar+1)>>6];
1497 * where maxFastUChar=((mbcsHeader.version[2]<<8)|0xff).
1501 if( outputType
!=MBCS_OUTPUT_EXT_ONLY
&& outputType
!=MBCS_OUTPUT_1
&&
1502 mbcsHeader
.version
[1]>=3 && (maxFastUChar
=mbcsHeader
.version
[2])!=0
1504 maxFastUChar
=(maxFastUChar
<<8)|0xff;
1505 mbcsIndexLength
=((maxFastUChar
+1)>>6)*2; /* number of bytes */
1509 size
=(int32_t)(mbcsHeader
.offsetFromUBytes
+mbcsIndexLength
);
1511 size
+=(int32_t)mbcsHeader
.fromUBytesLength
;
1514 /* avoid compiler warnings - not otherwise necessary, and the value does not matter */
1517 /* there is extension data after the base data, see ucnv_ext.h */
1518 if(length
>=0 && length
<(extOffset
+UCNV_EXT_INDEXES_MIN_LENGTH
*4)) {
1519 udata_printError(ds
, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n",
1521 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
1525 inExtIndexes
=(const int32_t *)(inBytes
+extOffset
);
1526 size
=extOffset
+udata_readInt32(ds
, inExtIndexes
[UCNV_EXT_SIZE
]);
1531 udata_printError(ds
, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n",
1533 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
1537 /* copy the data for inaccessible bytes */
1538 if(inBytes
!=outBytes
) {
1539 uprv_memcpy(outBytes
, inBytes
, size
);
1542 /* swap the MBCSHeader, except for the version field */
1543 count
=mbcsHeaderLength
*4;
1544 ds
->swapArray32(ds
, &inMBCSHeader
->countStates
, count
-4,
1545 &outMBCSHeader
->countStates
, pErrorCode
);
1547 if(outputType
==MBCS_OUTPUT_EXT_ONLY
) {
1549 * extension-only file,
1550 * contains a base name instead of normal base table data
1553 /* swap the base name, between the header and the extension data */
1554 const char *inBaseName
=(const char *)inBytes
+count
;
1555 char *outBaseName
=(char *)outBytes
+count
;
1556 ds
->swapInvChars(ds
, inBaseName
, (int32_t)uprv_strlen(inBaseName
),
1557 outBaseName
, pErrorCode
);
1559 /* normal file with base table data */
1561 /* swap the state table, 1kB per state */
1563 count
=mbcsHeader
.countStates
*1024;
1564 ds
->swapArray32(ds
, inBytes
+offset
, (int32_t)count
,
1565 outBytes
+offset
, pErrorCode
);
1567 /* swap the toUFallbacks[] */
1569 count
=mbcsHeader
.countToUFallbacks
*8;
1570 ds
->swapArray32(ds
, inBytes
+offset
, (int32_t)count
,
1571 outBytes
+offset
, pErrorCode
);
1573 /* swap the unicodeCodeUnits[] */
1574 offset
=mbcsHeader
.offsetToUCodeUnits
;
1575 count
=mbcsHeader
.offsetFromUTable
-offset
;
1576 ds
->swapArray16(ds
, inBytes
+offset
, (int32_t)count
,
1577 outBytes
+offset
, pErrorCode
);
1579 /* offset to the stage 1 table, independent of the outputType */
1580 offset
=mbcsHeader
.offsetFromUTable
;
1582 if(outputType
==MBCS_OUTPUT_1
) {
1583 /* SBCS: swap the fromU tables, all 16 bits wide */
1584 count
=(mbcsHeader
.offsetFromUBytes
-offset
)+mbcsHeader
.fromUBytesLength
;
1585 ds
->swapArray16(ds
, inBytes
+offset
, (int32_t)count
,
1586 outBytes
+offset
, pErrorCode
);
1588 /* otherwise: swap the stage tables separately */
1590 /* stage 1 table: uint16_t[0x440 or 0x40] */
1591 if(inStaticData
->unicodeMask
&UCNV_HAS_SUPPLEMENTARY
) {
1592 count
=0x440*2; /* for all of Unicode */
1594 count
=0x40*2; /* only BMP */
1596 ds
->swapArray16(ds
, inBytes
+offset
, (int32_t)count
,
1597 outBytes
+offset
, pErrorCode
);
1599 /* stage 2 table: uint32_t[] */
1601 count
=mbcsHeader
.offsetFromUBytes
-offset
;
1602 ds
->swapArray32(ds
, inBytes
+offset
, (int32_t)count
,
1603 outBytes
+offset
, pErrorCode
);
1605 /* stage 3/result bytes: sometimes uint16_t[] or uint32_t[] */
1606 offset
=mbcsHeader
.offsetFromUBytes
;
1607 count
= noFromU
? 0 : mbcsHeader
.fromUBytesLength
;
1608 switch(outputType
) {
1610 case MBCS_OUTPUT_3_EUC
:
1611 case MBCS_OUTPUT_2_SISO
:
1612 ds
->swapArray16(ds
, inBytes
+offset
, (int32_t)count
,
1613 outBytes
+offset
, pErrorCode
);
1616 ds
->swapArray32(ds
, inBytes
+offset
, (int32_t)count
,
1617 outBytes
+offset
, pErrorCode
);
1620 /* just uint8_t[], nothing to swap */
1624 if(mbcsIndexLength
!=0) {
1626 count
=mbcsIndexLength
;
1627 ds
->swapArray16(ds
, inBytes
+offset
, (int32_t)count
,
1628 outBytes
+offset
, pErrorCode
);
1634 /* swap the extension data */
1636 outBytes
+=extOffset
;
1638 /* swap toUTable[] */
1639 offset
=udata_readInt32(ds
, inExtIndexes
[UCNV_EXT_TO_U_INDEX
]);
1640 length
=udata_readInt32(ds
, inExtIndexes
[UCNV_EXT_TO_U_LENGTH
]);
1641 ds
->swapArray32(ds
, inBytes
+offset
, length
*4, outBytes
+offset
, pErrorCode
);
1643 /* swap toUUChars[] */
1644 offset
=udata_readInt32(ds
, inExtIndexes
[UCNV_EXT_TO_U_UCHARS_INDEX
]);
1645 length
=udata_readInt32(ds
, inExtIndexes
[UCNV_EXT_TO_U_UCHARS_LENGTH
]);
1646 ds
->swapArray16(ds
, inBytes
+offset
, length
*2, outBytes
+offset
, pErrorCode
);
1648 /* swap fromUTableUChars[] */
1649 offset
=udata_readInt32(ds
, inExtIndexes
[UCNV_EXT_FROM_U_UCHARS_INDEX
]);
1650 length
=udata_readInt32(ds
, inExtIndexes
[UCNV_EXT_FROM_U_LENGTH
]);
1651 ds
->swapArray16(ds
, inBytes
+offset
, length
*2, outBytes
+offset
, pErrorCode
);
1653 /* swap fromUTableValues[] */
1654 offset
=udata_readInt32(ds
, inExtIndexes
[UCNV_EXT_FROM_U_VALUES_INDEX
]);
1655 /* same length as for fromUTableUChars[] */
1656 ds
->swapArray32(ds
, inBytes
+offset
, length
*4, outBytes
+offset
, pErrorCode
);
1658 /* no need to swap fromUBytes[] */
1660 /* swap fromUStage12[] */
1661 offset
=udata_readInt32(ds
, inExtIndexes
[UCNV_EXT_FROM_U_STAGE_12_INDEX
]);
1662 length
=udata_readInt32(ds
, inExtIndexes
[UCNV_EXT_FROM_U_STAGE_12_LENGTH
]);
1663 ds
->swapArray16(ds
, inBytes
+offset
, length
*2, outBytes
+offset
, pErrorCode
);
1665 /* swap fromUStage3[] */
1666 offset
=udata_readInt32(ds
, inExtIndexes
[UCNV_EXT_FROM_U_STAGE_3_INDEX
]);
1667 length
=udata_readInt32(ds
, inExtIndexes
[UCNV_EXT_FROM_U_STAGE_3_LENGTH
]);
1668 ds
->swapArray16(ds
, inBytes
+offset
, length
*2, outBytes
+offset
, pErrorCode
);
1670 /* swap fromUStage3b[] */
1671 offset
=udata_readInt32(ds
, inExtIndexes
[UCNV_EXT_FROM_U_STAGE_3B_INDEX
]);
1672 length
=udata_readInt32(ds
, inExtIndexes
[UCNV_EXT_FROM_U_STAGE_3B_LENGTH
]);
1673 ds
->swapArray32(ds
, inBytes
+offset
, length
*4, outBytes
+offset
, pErrorCode
);
1675 /* swap indexes[] */
1676 length
=udata_readInt32(ds
, inExtIndexes
[UCNV_EXT_INDEXES_LENGTH
]);
1677 ds
->swapArray32(ds
, inBytes
, length
*4, outBytes
, pErrorCode
);
1681 udata_printError(ds
, "ucnv_swap(): unknown conversionType=%d!=UCNV_MBCS\n",
1682 inStaticData
->conversionType
);
1683 *pErrorCode
=U_UNSUPPORTED_ERROR
;
1687 return headerSize
+(int32_t)staticDataSize
+size
;
1690 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */