2 ******************************************************************************
4 * Copyright (C) 1999-2004, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
11 * initializes global variables and defines functions pertaining to file
12 * access, and name resolution aspect of the library.
16 * created on: 1999nov22
17 * created by: Markus W. Scherer
19 * Use the binary cnvalias.icu (created from convrtrs.txt) to work
20 * with aliases for converter names.
22 * Date Name Description
23 * 11/22/1999 markus Created
24 * 06/28/2002 grhoten Major overhaul of the converter alias design.
25 * Now an alias can map to different converters
26 * depending on the specified standard.
27 *******************************************************************************
30 #include "unicode/utypes.h"
32 #if !UCONFIG_NO_CONVERSION
34 #include "unicode/putil.h"
35 #include "unicode/ucnv.h" /* This file implements ucnv_xXXX() APIs */
36 #include "unicode/udata.h"
47 /* Format of cnvalias.icu -----------------------------------------------------
49 * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt.
50 * This binary form contains several tables. All indexes are to uint16_t
51 * units, and not to the bytes (uint8_t units). Addressing everything on
52 * 16-bit boundaries allows us to store more information with small index
53 * numbers, which are also 16-bit in size. The majority of the table (except
54 * the string table) are 16-bit numbers.
56 * First there is the size of the Table of Contents (TOC). The TOC
57 * entries contain the size of each section. In order to find the offset
58 * you just need to sum up the previous offsets.
59 * The TOC length and entries are an array of uint32_t values.
60 * The first section after the TOC starts immediately after the TOC.
62 * 1) This section contains a list of converters. This list contains indexes
63 * into the string table for the converter name. The index of this list is
64 * also used by other sections, which are mentioned later on.
65 * This list is not sorted.
67 * 2) This section contains a list of tags. This list contains indexes
68 * into the string table for the tag name. The index of this list is
69 * also used by other sections, which are mentioned later on.
70 * This list is in priority order of standards.
72 * 3) This section contains a list of sorted unique aliases. This
73 * list contains indexes into the string table for the alias name. The
74 * index of this list is also used by other sections, like the 4th section.
75 * The index for the 3rd and 4th section is used to get the
76 * alias -> converter name mapping. Section 3 and 4 form a two column table.
78 * 4) This section contains a list of mapped converter names. Consider this
79 * as a table that maps the 3rd section to the 1st section. This list contains
80 * indexes into the 1st section. The index of this list is the same index in
81 * the 3rd section. There is also some extra information in the high bits of
82 * each converter index in this table. Currently it's only used to say that
83 * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
84 * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
85 * the predigested form of the 5th section so that an alias lookup can be fast.
87 * 5) This section contains a 2D array with indexes to the 6th section. This
88 * section is the full form of all alias mappings. The column index is the
89 * index into the converter list (column header). The row index is the index
90 * to tag list (row header). This 2D array is the top part a 3D array. The
91 * third dimension is in the 6th section.
93 * 6) This is blob of variable length arrays. Each array starts with a size,
94 * and is followed by indexes to alias names in the string table. This is
95 * the third dimension to the section 5. No other section should be referencing
98 * 7) Reserved at this time (There is no information). This _usually_ has a
99 * size of 0. Future versions may add more information here.
101 * 8) This is the string table. All strings are indexed on an even address.
102 * There are two reasons for this. First many chip architectures locate strings
103 * faster on even address boundaries. Second, since all indexes are 16-bit
104 * numbers, this string table can be 128KB in size instead of 64KB when we
105 * only have strings starting on an even address.
108 * Here is the concept of section 5 and 6. It's a 3D cube. Each tag
109 * has a unique alias among all converters. That same alias can
110 * be mentioned in other standards on different converters,
111 * but only one alias per tag can be unique.
114 * Converter Names (Usually in TR22 form)
115 * -------------------------------------------.
121 * ------------------------------------------/ |
129 * -------------------------------------------
133 * Here is what it really looks like. It's like swiss cheese.
134 * There are holes. Some converters aren't recognized by
135 * a standard, or they are really old converters that the
136 * standard doesn't recognize anymore.
138 * Converter Names (Usually in TR22 form)
139 * -------------------------------------------.
140 * T /##########################################/|
142 * g / # ## ## ### # ### ### ### #/
143 * s / # ##### #### ## ## #/#
144 * / ### # # ## # # # ### # # #/##
145 * ------------------------------------------/# #
146 * A |### # # ## # # # ### # # #|# #
147 * l |# # # # # ## # #|# #
157 * Used by the UEnumeration API
159 typedef struct UAliasContext
{
164 static const char DATA_NAME
[] = "cnvalias";
165 static const char DATA_TYPE
[] = "icu";
167 static UDataMemory
*gAliasData
=NULL
;
171 converterListIndex
=1,
174 untaggedConvArrayIndex
=4,
175 taggedAliasArrayIndex
=5,
176 taggedAliasListsIndex
=6,
179 minTocLength
=8, /* min. tocLength in the file, does not count the tocLengthIndex! */
180 offsetsCount
/* length of the swapper's temporary offsets[] */
183 static const uint16_t *gConverterList
= NULL
;
184 static const uint16_t *gTagList
= NULL
;
185 static const uint16_t *gAliasList
= NULL
;
186 static const uint16_t *gUntaggedConvArray
= NULL
;
187 static const uint16_t *gTaggedAliasArray
= NULL
;
188 static const uint16_t *gTaggedAliasLists
= NULL
;
189 static const uint16_t *gStringTable
= NULL
;
191 static uint32_t gConverterListSize
;
192 static uint32_t gTagListSize
;
193 static uint32_t gAliasListSize
;
194 static uint32_t gUntaggedConvArraySize
;
195 static uint32_t gTaggedAliasArraySize
;
196 static uint32_t gTaggedAliasListsSize
;
197 static uint32_t gStringTableSize
;
199 static const char **gAvailableConverters
= NULL
;
200 static uint16_t gAvailableConverterCount
= 0;
202 static char gDefaultConverterNameBuffer
[UCNV_MAX_CONVERTER_NAME_LENGTH
+ 1]; /* +1 for NULL */
203 static const char *gDefaultConverterName
= NULL
;
205 #define GET_STRING(idx) (const char *)(gStringTable + (idx))
207 static UBool U_CALLCONV
208 isAcceptable(void *context
,
209 const char *type
, const char *name
,
210 const UDataInfo
*pInfo
) {
213 pInfo
->isBigEndian
==U_IS_BIG_ENDIAN
&&
214 pInfo
->charsetFamily
==U_CHARSET_FAMILY
&&
215 pInfo
->dataFormat
[0]==0x43 && /* dataFormat="CvAl" */
216 pInfo
->dataFormat
[1]==0x76 &&
217 pInfo
->dataFormat
[2]==0x41 &&
218 pInfo
->dataFormat
[3]==0x6c &&
219 pInfo
->formatVersion
[0]==3);
222 static UBool U_CALLCONV
ucnv_io_cleanup(void)
225 udata_close(gAliasData
);
229 ucnv_io_flushAvailableConverterCache();
231 gConverterListSize
= 0;
234 gUntaggedConvArraySize
= 0;
235 gTaggedAliasArraySize
= 0;
236 gTaggedAliasListsSize
= 0;
237 gStringTableSize
= 0;
239 gConverterList
= NULL
;
242 gUntaggedConvArray
= NULL
;
243 gTaggedAliasArray
= NULL
;
244 gTaggedAliasLists
= NULL
;
247 gDefaultConverterName
= NULL
;
248 gDefaultConverterNameBuffer
[0] = 0;
250 return TRUE
; /* Everything was cleaned up */
254 haveAliasData(UErrorCode
*pErrorCode
) {
257 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
262 haveData
= (int)(gAliasData
==NULL
);
265 /* load converter alias data from file if necessary */
267 UDataMemory
*data
= NULL
;
268 const uint16_t *table
= NULL
;
271 uint32_t reservedSize1
;
273 data
= udata_openChoice(NULL
, DATA_TYPE
, DATA_NAME
, isAcceptable
, NULL
, pErrorCode
);
274 if(U_FAILURE(*pErrorCode
)) {
278 table
= (const uint16_t *)udata_getMemory(data
);
280 tableStart
= ((const uint32_t *)(table
))[0];
281 if (tableStart
< minTocLength
) {
282 *pErrorCode
= U_INVALID_FORMAT_ERROR
;
288 if(gAliasData
==NULL
) {
292 gConverterListSize
= ((const uint32_t *)(table
))[1];
293 gTagListSize
= ((const uint32_t *)(table
))[2];
294 gAliasListSize
= ((const uint32_t *)(table
))[3];
295 gUntaggedConvArraySize
= ((const uint32_t *)(table
))[4];
296 gTaggedAliasArraySize
= ((const uint32_t *)(table
))[5];
297 gTaggedAliasListsSize
= ((const uint32_t *)(table
))[6];
298 reservedSize1
= ((const uint32_t *)(table
))[7]; /* reserved */
299 gStringTableSize
= ((const uint32_t *)(table
))[8];
301 currOffset
= tableStart
* (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t));
302 gConverterList
= table
+ currOffset
;
304 currOffset
+= gConverterListSize
;
305 gTagList
= table
+ currOffset
;
307 currOffset
+= gTagListSize
;
308 gAliasList
= table
+ currOffset
;
310 currOffset
+= gAliasListSize
;
311 gUntaggedConvArray
= table
+ currOffset
;
313 currOffset
+= gUntaggedConvArraySize
;
314 gTaggedAliasArray
= table
+ currOffset
;
316 /* aliasLists is a 1's based array, but it has a padding character */
317 currOffset
+= gTaggedAliasArraySize
;
318 gTaggedAliasLists
= table
+ currOffset
;
320 currOffset
+= gTaggedAliasListsSize
;
323 currOffset
+= reservedSize1
;
324 gStringTable
= table
+ currOffset
;
326 ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO
, ucnv_io_cleanup
);
330 /* if a different thread set it first, then close the extra data */
332 udata_close(data
); /* NULL if it was set correctly */
339 static U_INLINE UBool
340 isAlias(const char *alias
, UErrorCode
*pErrorCode
) {
342 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
344 } else if(*alias
==0) {
351 static uint32_t getTagNumber(const char *tagname
) {
354 for (tagNum
= 0; tagNum
< gTagListSize
; tagNum
++) {
355 if (!uprv_stricmp(GET_STRING(gTagList
[tagNum
]), tagname
)) {
364 /* @see ucnv_compareNames */
365 U_CFUNC
char * U_EXPORT2
366 ucnv_io_stripASCIIForCompare(char *dst
, const char *name
) {
371 /* Ignore delimiters '-', '_', and ' ' */
372 while ((c1
= *name
) == 0x2d || c1
== 0x5f || c1
== 0x20) {
376 /* lowercase for case-insensitive comparison */
377 *(dstItr
++) = uprv_asciitolower(c1
);
383 U_CFUNC
char * U_EXPORT2
384 ucnv_io_stripEBCDICForCompare(char *dst
, const char *name
) {
389 /* Ignore delimiters '-', '_', and ' ' */
390 while ((c1
= *name
) == 0x60 || c1
== 0x6d || c1
== 0x40) {
394 /* lowercase for case-insensitive comparison */
395 *(dstItr
++) = uprv_ebcdictolower(c1
);
402 * Do a fuzzy compare of a two converter/alias names. The comparison
403 * is case-insensitive. It also ignores the characters '-', '_', and
404 * ' ' (dash, underscore, and space). Thus the strings "UTF-8",
405 * "utf_8", and "Utf 8" are exactly equivalent.
407 * This is a symmetrical (commutative) operation; order of arguments
408 * is insignificant. This is an important property for sorting the
409 * list (when the list is preprocessed into binary form) and for
410 * performing binary searches on it at run time.
412 * @param name1 a converter name or alias, zero-terminated
413 * @param name2 a converter name or alias, zero-terminated
414 * @return 0 if the names match, or a negative value if the name1
415 * lexically precedes name2, or a positive value if the name1
416 * lexically follows name2.
418 * @see ucnv_io_stripForCompare
421 ucnv_compareNames(const char *name1
, const char *name2
) {
426 /* Ignore delimiters '-', '_', and ' ' */
427 while ((c1
= *name1
) == '-' || c1
== '_' || c1
== ' ') {
430 while ((c2
= *name2
) == '-' || c2
== '_' || c2
== ' ') {
434 /* If we reach the ends of both strings then they match */
439 /* Case-insensitive comparison */
440 rc
= (int)(unsigned char)uprv_tolower(c1
) -
441 (int)(unsigned char)uprv_tolower(c2
);
451 * search for an alias
452 * return the converter number index for gConverterList
454 static U_INLINE
uint32_t
455 findConverter(const char *alias
, UErrorCode
*pErrorCode
) {
456 uint32_t mid
, start
, limit
;
460 /* do a binary search for the alias */
462 limit
= gUntaggedConvArraySize
;
464 lastMid
= UINT32_MAX
;
467 mid
= (uint32_t)((start
+ limit
) / 2);
468 if (lastMid
== mid
) { /* Have we moved? */
469 break; /* We haven't moved, and it wasn't found. */
472 result
= ucnv_compareNames(alias
, GET_STRING(gAliasList
[mid
]));
476 } else if (result
> 0) {
479 /* Since the gencnval tool folds duplicates into one entry,
480 * this alias in gAliasList is unique, but different standards
481 * may map an alias to different converters.
483 if (gUntaggedConvArray
[mid
] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT
) {
484 *pErrorCode
= U_AMBIGUOUS_ALIAS_WARNING
;
486 return gUntaggedConvArray
[mid
] & UCNV_CONVERTER_INDEX_MASK
;
494 * Is this alias in this list?
495 * alias and listOffset should be non-NULL.
497 static U_INLINE UBool
498 isAliasInList(const char *alias
, uint32_t listOffset
) {
501 uint32_t listCount
= gTaggedAliasLists
[listOffset
];
502 /* +1 to skip listCount */
503 const uint16_t *currList
= gTaggedAliasLists
+ listOffset
+ 1;
504 for (currAlias
= 0; currAlias
< listCount
; currAlias
++) {
505 if (currList
[currAlias
]
506 && ucnv_compareNames(alias
, GET_STRING(currList
[currAlias
]))==0)
516 * Search for an standard name of an alias (what is the default name
517 * that this standard uses?)
518 * return the listOffset for gTaggedAliasLists. If it's 0,
519 * the it couldn't be found, but the parameters are valid.
522 findTaggedAliasListsOffset(const char *alias
, const char *standard
, UErrorCode
*pErrorCode
) {
526 UErrorCode myErr
= U_ZERO_ERROR
;
527 uint32_t tagNum
= getTagNumber(standard
);
529 /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
530 convNum
= findConverter(alias
, &myErr
);
531 if (myErr
!= U_ZERO_ERROR
) {
535 if (tagNum
< (gTagListSize
- UCNV_NUM_HIDDEN_TAGS
) && convNum
< gConverterListSize
) {
536 listOffset
= gTaggedAliasArray
[tagNum
*gConverterListSize
+ convNum
];
537 if (listOffset
&& gTaggedAliasLists
[listOffset
+ 1]) {
540 if (myErr
== U_AMBIGUOUS_ALIAS_WARNING
) {
541 /* Uh Oh! They used an ambiguous alias.
542 We have to search the whole swiss cheese starting
543 at the highest standard affinity.
544 This may take a while.
546 for (idx
= 0; idx
< gTaggedAliasArraySize
; idx
++) {
547 listOffset
= gTaggedAliasArray
[idx
];
548 if (listOffset
&& isAliasInList(alias
, listOffset
)) {
549 uint32_t currTagNum
= idx
/gConverterListSize
;
550 uint32_t currConvNum
= (idx
- currTagNum
*gConverterListSize
);
551 uint32_t tempListOffset
= gTaggedAliasArray
[tagNum
*gConverterListSize
+ currConvNum
];
552 if (tempListOffset
&& gTaggedAliasLists
[tempListOffset
+ 1]) {
553 return tempListOffset
;
555 /* else keep on looking */
556 /* We could speed this up by starting on the next row
557 because an alias is unique per row, right now.
558 This would change if alias versioning appears. */
561 /* The standard doesn't know about the alias */
563 /* else no default name */
566 /* else converter or tag not found */
571 /* Return the canonical name */
573 findTaggedConverterNum(const char *alias
, const char *standard
, UErrorCode
*pErrorCode
) {
577 UErrorCode myErr
= U_ZERO_ERROR
;
578 uint32_t tagNum
= getTagNumber(standard
);
580 /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
581 convNum
= findConverter(alias
, &myErr
);
582 if (myErr
!= U_ZERO_ERROR
) {
586 if (tagNum
< (gTagListSize
- UCNV_NUM_HIDDEN_TAGS
) && convNum
< gConverterListSize
) {
587 listOffset
= gTaggedAliasArray
[tagNum
*gConverterListSize
+ convNum
];
588 if (listOffset
&& isAliasInList(alias
, listOffset
)) {
591 if (myErr
== U_AMBIGUOUS_ALIAS_WARNING
) {
592 /* Uh Oh! They used an ambiguous alias.
593 We have to search one slice of the swiss cheese.
594 We search only in the requested tag, not the whole thing.
595 This may take a while.
597 uint32_t convStart
= (tagNum
)*gConverterListSize
;
598 uint32_t convLimit
= (tagNum
+1)*gConverterListSize
;
599 for (idx
= convStart
; idx
< convLimit
; idx
++) {
600 listOffset
= gTaggedAliasArray
[idx
];
601 if (listOffset
&& isAliasInList(alias
, listOffset
)) {
602 return idx
-convStart
;
605 /* The standard doesn't know about the alias */
607 /* else no canonical name */
609 /* else converter or tag not found */
617 ucnv_io_getConverterName(const char *alias
, UErrorCode
*pErrorCode
) {
618 if(haveAliasData(pErrorCode
) && isAlias(alias
, pErrorCode
)) {
619 uint32_t convNum
= findConverter(alias
, pErrorCode
);
620 if (convNum
< gConverterListSize
) {
621 return GET_STRING(gConverterList
[convNum
]);
623 /* else converter not found */
628 static int32_t U_CALLCONV
629 ucnv_io_countStandardAliases(UEnumeration
*enumerator
, UErrorCode
*pErrorCode
) {
631 UAliasContext
*myContext
= (UAliasContext
*)(enumerator
->context
);
632 uint32_t listOffset
= myContext
->listOffset
;
635 value
= gTaggedAliasLists
[listOffset
];
640 static const char* U_CALLCONV
641 ucnv_io_nextStandardAliases(UEnumeration
*enumerator
,
642 int32_t* resultLength
,
643 UErrorCode
*pErrorCode
)
645 UAliasContext
*myContext
= (UAliasContext
*)(enumerator
->context
);
646 uint32_t listOffset
= myContext
->listOffset
;
649 uint32_t listCount
= gTaggedAliasLists
[listOffset
];
650 const uint16_t *currList
= gTaggedAliasLists
+ listOffset
+ 1;
652 if (myContext
->listIdx
< listCount
) {
653 const char *myStr
= GET_STRING(currList
[myContext
->listIdx
++]);
655 *resultLength
= (int32_t)uprv_strlen(myStr
);
660 /* Either we accessed a zero length list, or we enumerated too far. */
661 *pErrorCode
= U_INDEX_OUTOFBOUNDS_ERROR
;
665 static void U_CALLCONV
666 ucnv_io_resetStandardAliases(UEnumeration
*enumerator
, UErrorCode
*pErrorCode
) {
667 ((UAliasContext
*)(enumerator
->context
))->listIdx
= 0;
670 static void U_CALLCONV
671 ucnv_io_closeUEnumeration(UEnumeration
*enumerator
) {
672 uprv_free(enumerator
->context
);
673 uprv_free(enumerator
);
676 /* Enumerate the aliases for the specified converter and standard tag */
677 static const UEnumeration gEnumAliases
= {
680 ucnv_io_closeUEnumeration
,
681 ucnv_io_countStandardAliases
,
683 ucnv_io_nextStandardAliases
,
684 ucnv_io_resetStandardAliases
687 U_CAPI UEnumeration
* U_EXPORT2
688 ucnv_openStandardNames(const char *convName
,
689 const char *standard
,
690 UErrorCode
*pErrorCode
)
692 UEnumeration
*myEnum
= NULL
;
693 if (haveAliasData(pErrorCode
) && isAlias(convName
, pErrorCode
)) {
694 uint32_t listOffset
= findTaggedAliasListsOffset(convName
, standard
, pErrorCode
);
696 /* When listOffset == 0, we want to acknowledge that the
697 converter name and standard are okay, but there
698 is nothing to enumerate. */
699 if (listOffset
< gTaggedAliasListsSize
) {
700 UAliasContext
*myContext
;
702 myEnum
= uprv_malloc(sizeof(UEnumeration
));
703 if (myEnum
== NULL
) {
704 *pErrorCode
= U_MEMORY_ALLOCATION_ERROR
;
707 uprv_memcpy(myEnum
, &gEnumAliases
, sizeof(UEnumeration
));
708 myContext
= uprv_malloc(sizeof(UAliasContext
));
709 if (myContext
== NULL
) {
710 *pErrorCode
= U_MEMORY_ALLOCATION_ERROR
;
714 myContext
->listOffset
= listOffset
;
715 myContext
->listIdx
= 0;
716 myEnum
->context
= myContext
;
718 /* else converter or tag not found */
724 ucnv_io_countAliases(const char *alias
, UErrorCode
*pErrorCode
) {
725 if(haveAliasData(pErrorCode
) && isAlias(alias
, pErrorCode
)) {
726 uint32_t convNum
= findConverter(alias
, pErrorCode
);
727 if (convNum
< gConverterListSize
) {
728 /* tagListNum - 1 is the ALL tag */
729 int32_t listOffset
= gTaggedAliasArray
[(gTagListSize
- 1)*gConverterListSize
+ convNum
];
732 return gTaggedAliasLists
[listOffset
];
734 /* else this shouldn't happen. internal program error */
736 /* else converter not found */
742 ucnv_io_getAliases(const char *alias
, uint16_t start
, const char **aliases
, UErrorCode
*pErrorCode
) {
743 if(haveAliasData(pErrorCode
) && isAlias(alias
, pErrorCode
)) {
745 uint32_t convNum
= findConverter(alias
, pErrorCode
);
746 if (convNum
< gConverterListSize
) {
747 /* tagListNum - 1 is the ALL tag */
748 int32_t listOffset
= gTaggedAliasArray
[(gTagListSize
- 1)*gConverterListSize
+ convNum
];
751 uint32_t listCount
= gTaggedAliasLists
[listOffset
];
752 /* +1 to skip listCount */
753 const uint16_t *currList
= gTaggedAliasLists
+ listOffset
+ 1;
755 for (currAlias
= start
; currAlias
< listCount
; currAlias
++) {
756 aliases
[currAlias
] = GET_STRING(currList
[currAlias
]);
759 /* else this shouldn't happen. internal program error */
761 /* else converter not found */
767 ucnv_io_getAlias(const char *alias
, uint16_t n
, UErrorCode
*pErrorCode
) {
768 if(haveAliasData(pErrorCode
) && isAlias(alias
, pErrorCode
)) {
769 uint32_t convNum
= findConverter(alias
, pErrorCode
);
770 if (convNum
< gConverterListSize
) {
771 /* tagListNum - 1 is the ALL tag */
772 int32_t listOffset
= gTaggedAliasArray
[(gTagListSize
- 1)*gConverterListSize
+ convNum
];
775 uint32_t listCount
= gTaggedAliasLists
[listOffset
];
776 /* +1 to skip listCount */
777 const uint16_t *currList
= gTaggedAliasLists
+ listOffset
+ 1;
780 return GET_STRING(currList
[n
]);
782 *pErrorCode
= U_INDEX_OUTOFBOUNDS_ERROR
;
784 /* else this shouldn't happen. internal program error */
786 /* else converter not found */
792 ucnv_io_countStandards(UErrorCode
*pErrorCode
) {
793 if (haveAliasData(pErrorCode
)) {
794 /* Don't include the empty list */
795 return (uint16_t)(gTagListSize
- UCNV_NUM_HIDDEN_TAGS
);
801 U_CAPI
const char * U_EXPORT2
802 ucnv_getStandard(uint16_t n
, UErrorCode
*pErrorCode
) {
803 if (haveAliasData(pErrorCode
)) {
804 if (n
< gTagListSize
- UCNV_NUM_HIDDEN_TAGS
) {
805 return GET_STRING(gTagList
[n
]);
807 *pErrorCode
= U_INDEX_OUTOFBOUNDS_ERROR
;
813 U_CAPI
const char * U_EXPORT2
814 ucnv_getStandardName(const char *alias
, const char *standard
, UErrorCode
*pErrorCode
) {
815 if (haveAliasData(pErrorCode
) && isAlias(alias
, pErrorCode
)) {
816 uint32_t listOffset
= findTaggedAliasListsOffset(alias
, standard
, pErrorCode
);
818 if (0 < listOffset
&& listOffset
< gTaggedAliasListsSize
) {
819 const uint16_t *currList
= gTaggedAliasLists
+ listOffset
+ 1;
821 /* Get the preferred name from this list */
823 return GET_STRING(currList
[0]);
825 /* else someone screwed up the alias table. */
826 /* *pErrorCode = U_INVALID_FORMAT_ERROR */
833 U_CAPI
const char * U_EXPORT2
834 ucnv_getCanonicalName(const char *alias
, const char *standard
, UErrorCode
*pErrorCode
) {
835 if (haveAliasData(pErrorCode
) && isAlias(alias
, pErrorCode
)) {
836 uint32_t convNum
= findTaggedConverterNum(alias
, standard
, pErrorCode
);
838 if (convNum
< gConverterListSize
) {
839 return GET_STRING(gConverterList
[convNum
]);
847 ucnv_io_flushAvailableConverterCache() {
848 if (gAvailableConverters
) {
850 gAvailableConverterCount
= 0;
851 uprv_free((char **)gAvailableConverters
);
852 gAvailableConverters
= NULL
;
857 static UBool
haveAvailableConverterList(UErrorCode
*pErrorCode
) {
858 if (gAvailableConverters
== NULL
) {
860 uint16_t localConverterCount
;
862 const char *converterName
;
863 const char **localConverterList
;
865 if (!haveAliasData(pErrorCode
)) {
869 /* We can't have more than "*converterTable" converters to open */
870 localConverterList
= (const char **) uprv_malloc(gConverterListSize
* sizeof(char*));
871 if (!localConverterList
) {
872 *pErrorCode
= U_MEMORY_ALLOCATION_ERROR
;
876 localConverterCount
= 0;
878 for (idx
= 0; idx
< gConverterListSize
; idx
++) {
879 status
= U_ZERO_ERROR
;
880 converterName
= GET_STRING(gConverterList
[idx
]);
881 ucnv_close(ucnv_open(converterName
, &status
));
882 if (U_SUCCESS(status
)) {
883 localConverterList
[localConverterCount
++] = converterName
;
888 if (gAvailableConverters
== NULL
) {
889 gAvailableConverters
= localConverterList
;
890 gAvailableConverterCount
= localConverterCount
;
891 /* haveData should have already registered the cleanup function */
894 uprv_free((char **)localConverterList
);
902 ucnv_io_countAvailableConverters(UErrorCode
*pErrorCode
) {
903 if (haveAvailableConverterList(pErrorCode
)) {
904 return gAvailableConverterCount
;
910 ucnv_io_getAvailableConverter(uint16_t n
, UErrorCode
*pErrorCode
) {
911 if (haveAvailableConverterList(pErrorCode
)) {
912 if (n
< gAvailableConverterCount
) {
913 return gAvailableConverters
[n
];
915 *pErrorCode
= U_INDEX_OUTOFBOUNDS_ERROR
;
920 static int32_t U_CALLCONV
921 ucnv_io_countAllConverters(UEnumeration
*enumerator
, UErrorCode
*pErrorCode
) {
922 return gConverterListSize
;
925 static const char* U_CALLCONV
926 ucnv_io_nextAllConverters(UEnumeration
*enumerator
,
927 int32_t* resultLength
,
928 UErrorCode
*pErrorCode
)
930 uint16_t *myContext
= (uint16_t *)(enumerator
->context
);
932 if (*myContext
< gConverterListSize
) {
933 const char *myStr
= GET_STRING(gConverterList
[(*myContext
)++]);
935 *resultLength
= (int32_t)uprv_strlen(myStr
);
939 /* Either we accessed a zero length list, or we enumerated too far. */
940 *pErrorCode
= U_INDEX_OUTOFBOUNDS_ERROR
;
944 static void U_CALLCONV
945 ucnv_io_resetAllConverters(UEnumeration
*enumerator
, UErrorCode
*pErrorCode
) {
946 *((uint16_t *)(enumerator
->context
)) = 0;
949 static const UEnumeration gEnumAllConverters
= {
952 ucnv_io_closeUEnumeration
,
953 ucnv_io_countAllConverters
,
955 ucnv_io_nextAllConverters
,
956 ucnv_io_resetAllConverters
959 U_CAPI UEnumeration
* U_EXPORT2
960 ucnv_openAllNames(UErrorCode
*pErrorCode
) {
961 UEnumeration
*myEnum
= NULL
;
962 if (haveAliasData(pErrorCode
)) {
965 myEnum
= uprv_malloc(sizeof(UEnumeration
));
966 if (myEnum
== NULL
) {
967 *pErrorCode
= U_MEMORY_ALLOCATION_ERROR
;
970 uprv_memcpy(myEnum
, &gEnumAllConverters
, sizeof(UEnumeration
));
971 myContext
= uprv_malloc(sizeof(uint16_t));
972 if (myContext
== NULL
) {
973 *pErrorCode
= U_MEMORY_ALLOCATION_ERROR
;
978 myEnum
->context
= myContext
;
984 ucnv_io_countAvailableAliases(UErrorCode
*pErrorCode
) {
985 if (haveAliasData(pErrorCode
)) {
986 return (uint16_t)gAliasListSize
;
991 /* default converter name --------------------------------------------------- */
994 * In order to be really thread-safe, the get function would have to take
995 * a buffer parameter and copy the current string inside a mutex block.
996 * This implementation only tries to be really thread-safe while
998 * It assumes that setting a pointer is atomic.
1001 U_CFUNC
const char *
1002 ucnv_io_getDefaultConverterName() {
1003 /* local variable to be thread-safe */
1007 name
=gDefaultConverterName
;
1011 UErrorCode errorCode
= U_ZERO_ERROR
;
1012 UConverter
*cnv
= NULL
;
1015 name
= uprv_getDefaultCodepage();
1017 /* if the name is there, test it out and get the canonical name with options */
1019 cnv
= ucnv_open(name
, &errorCode
);
1020 if(U_SUCCESS(errorCode
) && cnv
!= NULL
) {
1021 name
= ucnv_getName(cnv
, &errorCode
);
1025 if(name
== NULL
|| name
[0] == 0
1026 || U_FAILURE(errorCode
) || cnv
== NULL
1027 || length
>=sizeof(gDefaultConverterNameBuffer
))
1029 /* Panic time, let's use a fallback. */
1030 #if (U_CHARSET_FAMILY == U_ASCII_FAMILY)
1032 /* there is no 'algorithmic' converter for EBCDIC */
1033 #elif defined(OS390)
1034 name
= "ibm-1047_P100-1995" UCNV_SWAP_LFNL_OPTION_STRING
;
1036 name
= "ibm-37_P100-1995";
1040 length
=(int32_t)(uprv_strlen(name
));
1042 /* Copy the name before we close the converter. */
1044 uprv_memcpy(gDefaultConverterNameBuffer
, name
, length
);
1045 gDefaultConverterNameBuffer
[length
]=0;
1046 gDefaultConverterName
= gDefaultConverterNameBuffer
;
1047 name
= gDefaultConverterName
;
1048 ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO
, ucnv_io_cleanup
);
1051 /* The close may make the current name go away. */
1059 ucnv_io_setDefaultConverterName(const char *converterName
) {
1060 if(converterName
==NULL
) {
1061 /* reset to the default codepage */
1063 gDefaultConverterName
=NULL
;
1066 UErrorCode errorCode
=U_ZERO_ERROR
;
1067 const char *name
=ucnv_io_getConverterName(converterName
, &errorCode
);
1071 if(U_SUCCESS(errorCode
) && name
!=NULL
) {
1072 gDefaultConverterName
=name
;
1074 /* do not set the name if the alias lookup failed and it is too long */
1075 int32_t length
=(int32_t)(uprv_strlen(converterName
));
1076 if(length
<sizeof(gDefaultConverterNameBuffer
)) {
1077 /* it was not found as an alias, so copy it - accept an empty name */
1078 uprv_memcpy(gDefaultConverterNameBuffer
, converterName
, length
);
1079 gDefaultConverterNameBuffer
[length
]=0;
1080 gDefaultConverterName
=gDefaultConverterNameBuffer
;
1087 /* alias table swapping ----------------------------------------------------- */
1089 typedef char * U_CALLCONV
StripForCompareFn(char *dst
, const char *name
);
1092 * row of a temporary array
1094 * gets platform-endian charset string indexes and sorting indexes;
1095 * after sorting this array by strings, the actual arrays are permutated
1096 * according to the sorting indexes
1098 typedef struct TempRow
{
1099 uint16_t strIndex
, sortIndex
;
1102 typedef struct TempAliasTable
{
1106 StripForCompareFn
*stripForCompare
;
1110 STACK_ROW_CAPACITY
=500
1114 io_compareRows(const void *context
, const void *left
, const void *right
) {
1115 char strippedLeft
[UCNV_MAX_CONVERTER_NAME_LENGTH
],
1116 strippedRight
[UCNV_MAX_CONVERTER_NAME_LENGTH
];
1118 TempAliasTable
*tempTable
=(TempAliasTable
*)context
;
1119 const char *chars
=tempTable
->chars
;
1121 return (int32_t)uprv_strcmp(tempTable
->stripForCompare(strippedLeft
, chars
+2*((const TempRow
*)left
)->strIndex
),
1122 tempTable
->stripForCompare(strippedRight
, chars
+2*((const TempRow
*)right
)->strIndex
));
1125 U_CAPI
int32_t U_EXPORT2
1126 ucnv_swapAliases(const UDataSwapper
*ds
,
1127 const void *inData
, int32_t length
, void *outData
,
1128 UErrorCode
*pErrorCode
) {
1129 const UDataInfo
*pInfo
;
1132 const uint16_t *inTable
;
1133 uint32_t toc
[offsetsCount
];
1134 uint32_t offsets
[offsetsCount
]; /* 16-bit-addressed offsets from inTable/outTable */
1135 uint32_t i
, count
, tocLength
, topOffset
;
1137 TempRow rows
[STACK_ROW_CAPACITY
];
1138 uint16_t resort
[STACK_ROW_CAPACITY
];
1139 TempAliasTable tempTable
;
1141 /* udata_swapDataHeader checks the arguments */
1142 headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, pErrorCode
);
1143 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
1147 /* check data format and format version */
1148 pInfo
=(const UDataInfo
*)((const char *)inData
+4);
1150 pInfo
->dataFormat
[0]==0x43 && /* dataFormat="CvAl" */
1151 pInfo
->dataFormat
[1]==0x76 &&
1152 pInfo
->dataFormat
[2]==0x41 &&
1153 pInfo
->dataFormat
[3]==0x6c &&
1154 pInfo
->formatVersion
[0]==3
1156 udata_printError(ds
, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n",
1157 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
1158 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
1159 pInfo
->formatVersion
[0]);
1160 *pErrorCode
=U_UNSUPPORTED_ERROR
;
1164 /* an alias table must contain at least the table of contents array */
1165 if(length
>=0 && (length
-headerSize
)<4*(1+minTocLength
)) {
1166 udata_printError(ds
, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1168 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
1172 inTable
=(const uint16_t *)((const char *)inData
+headerSize
);
1173 toc
[tocLengthIndex
]=tocLength
=ds
->readUInt32(((const uint32_t *)inTable
)[tocLengthIndex
]);
1174 if(tocLength
<minTocLength
) {
1175 udata_printError(ds
, "ucnv_swapAliases(): table of contents too short (%u sections)\n", tocLength
);
1176 *pErrorCode
=U_INVALID_FORMAT_ERROR
;
1180 /* read the known part of the table of contents */
1181 for(i
=converterListIndex
; i
<=minTocLength
; ++i
) {
1182 toc
[i
]=ds
->readUInt32(((const uint32_t *)inTable
)[i
]);
1185 /* compute offsets */
1186 offsets
[tocLengthIndex
]=0;
1187 offsets
[converterListIndex
]=2*(1+tocLength
); /* count two 16-bit units per toc entry */
1188 for(i
=tagListIndex
; i
<=stringTableIndex
; ++i
) {
1189 offsets
[i
]=offsets
[i
-1]+toc
[i
-1];
1192 /* compute the overall size of the after-header data, in numbers of 16-bit units */
1193 topOffset
=offsets
[i
-1]+toc
[i
-1];
1197 const uint16_t *p
, *p2
;
1201 if((length
-headerSize
)<(2*(int32_t)topOffset
)) {
1202 udata_printError(ds
, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1204 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
1208 outTable
=(uint16_t *)((char *)outData
+headerSize
);
1210 /* swap the entire table of contents */
1211 ds
->swapArray32(ds
, inTable
, 4*(1+tocLength
), outTable
, pErrorCode
);
1214 ds
->swapInvChars(ds
, inTable
+offsets
[stringTableIndex
], 2*(int32_t)toc
[stringTableIndex
],
1215 outTable
+offsets
[stringTableIndex
], pErrorCode
);
1216 if(U_FAILURE(*pErrorCode
)) {
1217 udata_printError(ds
, "ucnv_swapAliases().swapInvChars(charset names) failed - %s\n",
1218 u_errorName(*pErrorCode
));
1222 if(ds
->inCharset
==ds
->outCharset
) {
1223 /* no need to sort, just swap all 16-bit values together */
1225 inTable
+offsets
[converterListIndex
],
1226 2*(int32_t)(offsets
[stringTableIndex
]-offsets
[converterListIndex
]),
1227 outTable
+offsets
[converterListIndex
],
1230 /* allocate the temporary table for sorting */
1231 count
=toc
[aliasListIndex
];
1233 tempTable
.chars
=(const char *)(outTable
+offsets
[stringTableIndex
]); /* sort by outCharset */
1235 if(count
<=STACK_ROW_CAPACITY
) {
1236 tempTable
.rows
=rows
;
1237 tempTable
.resort
=resort
;
1239 tempTable
.rows
=(TempRow
*)uprv_malloc(count
*sizeof(TempRow
)+count
*2);
1240 if(tempTable
.rows
==NULL
) {
1241 udata_printError(ds
, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n",
1243 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
1246 tempTable
.resort
=(uint16_t *)(tempTable
.rows
+count
);
1249 if(ds
->outCharset
==U_ASCII_FAMILY
) {
1250 tempTable
.stripForCompare
=ucnv_io_stripASCIIForCompare
;
1251 } else /* U_EBCDIC_FAMILY */ {
1252 tempTable
.stripForCompare
=ucnv_io_stripEBCDICForCompare
;
1256 * Sort unique aliases+mapped names.
1258 * We need to sort the list again by outCharset strings because they
1259 * sort differently for different charset families.
1260 * First we set up a temporary table with the string indexes and
1261 * sorting indexes and sort that.
1262 * Then we permutate and copy/swap the actual values.
1264 p
=inTable
+offsets
[aliasListIndex
];
1265 q
=outTable
+offsets
[aliasListIndex
];
1267 p2
=inTable
+offsets
[untaggedConvArrayIndex
];
1268 q2
=outTable
+offsets
[untaggedConvArrayIndex
];
1270 for(i
=0; i
<count
; ++i
) {
1271 tempTable
.rows
[i
].strIndex
=ds
->readUInt16(p
[i
]);
1272 tempTable
.rows
[i
].sortIndex
=(uint16_t)i
;
1275 uprv_sortArray(tempTable
.rows
, (int32_t)count
, sizeof(TempRow
),
1276 io_compareRows
, &tempTable
,
1279 if(U_SUCCESS(*pErrorCode
)) {
1280 /* copy/swap/permutate items */
1282 for(i
=0; i
<count
; ++i
) {
1283 oldIndex
=tempTable
.rows
[i
].sortIndex
;
1284 ds
->swapArray16(ds
, p
+oldIndex
, 2, q
+i
, pErrorCode
);
1285 ds
->swapArray16(ds
, p2
+oldIndex
, 2, q2
+i
, pErrorCode
);
1289 * If we swap in-place, then the permutation must use another
1290 * temporary array (tempTable.resort)
1291 * before the results are copied to the outBundle.
1293 uint16_t *r
=tempTable
.resort
;
1295 for(i
=0; i
<count
; ++i
) {
1296 oldIndex
=tempTable
.rows
[i
].sortIndex
;
1297 ds
->swapArray16(ds
, p
+oldIndex
, 2, r
+i
, pErrorCode
);
1299 uprv_memcpy(q
, r
, 2*count
);
1301 for(i
=0; i
<count
; ++i
) {
1302 oldIndex
=tempTable
.rows
[i
].sortIndex
;
1303 ds
->swapArray16(ds
, p2
+oldIndex
, 2, r
+i
, pErrorCode
);
1305 uprv_memcpy(q2
, r
, 2*count
);
1309 if(tempTable
.rows
!=rows
) {
1310 uprv_free(tempTable
.rows
);
1313 if(U_FAILURE(*pErrorCode
)) {
1314 udata_printError(ds
, "ucnv_swapAliases().uprv_sortArray(%u items) failed - %s\n",
1315 count
, u_errorName(*pErrorCode
));
1319 /* swap remaining 16-bit values */
1321 inTable
+offsets
[converterListIndex
],
1322 2*(int32_t)(offsets
[aliasListIndex
]-offsets
[converterListIndex
]),
1323 outTable
+offsets
[converterListIndex
],
1326 inTable
+offsets
[taggedAliasArrayIndex
],
1327 2*(int32_t)(offsets
[stringTableIndex
]-offsets
[taggedAliasArrayIndex
]),
1328 outTable
+offsets
[taggedAliasArrayIndex
],
1333 return headerSize
+2*(int32_t)topOffset
;
1339 * Hey, Emacs, please set the following:
1342 * indent-tabs-mode: nil