2 ******************************************************************************
4 * Copyright (C) 1999-2006, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
11 * initializes global variables and defines functions pertaining to converter
12 * name resolution aspect of the conversion code.
16 * created on: 1999nov22
17 * created by: Markus W. Scherer
19 * Use the binary cnvalias.icu (created from convrtrs.txt) to work
20 * with aliases for converter names.
22 * Date Name Description
23 * 11/22/1999 markus Created
24 * 06/28/2002 grhoten Major overhaul of the converter alias design.
25 * Now an alias can map to different converters
26 * depending on the specified standard.
27 *******************************************************************************
30 #include "unicode/utypes.h"
32 #if !UCONFIG_NO_CONVERSION
34 #include "unicode/ucnv.h"
35 #include "unicode/udata.h"
46 /* Format of cnvalias.icu -----------------------------------------------------
48 * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt.
49 * This binary form contains several tables. All indexes are to uint16_t
50 * units, and not to the bytes (uint8_t units). Addressing everything on
51 * 16-bit boundaries allows us to store more information with small index
52 * numbers, which are also 16-bit in size. The majority of the table (except
53 * the string table) are 16-bit numbers.
55 * First there is the size of the Table of Contents (TOC). The TOC
56 * entries contain the size of each section. In order to find the offset
57 * you just need to sum up the previous offsets.
58 * The TOC length and entries are an array of uint32_t values.
59 * The first section after the TOC starts immediately after the TOC.
61 * 1) This section contains a list of converters. This list contains indexes
62 * into the string table for the converter name. The index of this list is
63 * also used by other sections, which are mentioned later on.
64 * This list is not sorted.
66 * 2) This section contains a list of tags. This list contains indexes
67 * into the string table for the tag name. The index of this list is
68 * also used by other sections, which are mentioned later on.
69 * This list is in priority order of standards.
71 * 3) This section contains a list of sorted unique aliases. This
72 * list contains indexes into the string table for the alias name. The
73 * index of this list is also used by other sections, like the 4th section.
74 * The index for the 3rd and 4th section is used to get the
75 * alias -> converter name mapping. Section 3 and 4 form a two column table.
76 * Some of the most significant bits of each index may contain other
77 * information (see findConverter for details).
79 * 4) This section contains a list of mapped converter names. Consider this
80 * as a table that maps the 3rd section to the 1st section. This list contains
81 * indexes into the 1st section. The index of this list is the same index in
82 * the 3rd section. There is also some extra information in the high bits of
83 * each converter index in this table. Currently it's only used to say that
84 * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
85 * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
86 * the predigested form of the 5th section so that an alias lookup can be fast.
88 * 5) This section contains a 2D array with indexes to the 6th section. This
89 * section is the full form of all alias mappings. The column index is the
90 * index into the converter list (column header). The row index is the index
91 * to tag list (row header). This 2D array is the top part a 3D array. The
92 * third dimension is in the 6th section.
94 * 6) This is blob of variable length arrays. Each array starts with a size,
95 * and is followed by indexes to alias names in the string table. This is
96 * the third dimension to the section 5. No other section should be referencing
99 * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its
100 * presence indicates that a section 9 exists. UConverterAliasOptions specifies
101 * what type of string normalization is used among other potential things in the
104 * 8) This is the string table. All strings are indexed on an even address.
105 * There are two reasons for this. First many chip architectures locate strings
106 * faster on even address boundaries. Second, since all indexes are 16-bit
107 * numbers, this string table can be 128KB in size instead of 64KB when we
108 * only have strings starting on an even address.
110 * 9) When present this is a set of prenormalized strings from section 8. This
111 * table contains normalized strings with the dashes and spaces stripped out,
112 * and all strings lowercased. In the future, the options in section 7 may state
113 * other types of normalization.
115 * Here is the concept of section 5 and 6. It's a 3D cube. Each tag
116 * has a unique alias among all converters. That same alias can
117 * be mentioned in other standards on different converters,
118 * but only one alias per tag can be unique.
121 * Converter Names (Usually in TR22 form)
122 * -------------------------------------------.
128 * ------------------------------------------/ |
136 * -------------------------------------------
140 * Here is what it really looks like. It's like swiss cheese.
141 * There are holes. Some converters aren't recognized by
142 * a standard, or they are really old converters that the
143 * standard doesn't recognize anymore.
145 * Converter Names (Usually in TR22 form)
146 * -------------------------------------------.
147 * T /##########################################/|
149 * g / # ## ## ### # ### ### ### #/
150 * s / # ##### #### ## ## #/#
151 * / ### # # ## # # # ### # # #/##
152 * ------------------------------------------/# #
153 * A |### # # ## # # # ### # # #|# #
154 * l |# # # # # ## # #|# #
164 * Used by the UEnumeration API
166 typedef struct UAliasContext
{
171 static const char DATA_NAME
[] = "cnvalias";
172 static const char DATA_TYPE
[] = "icu";
174 static UDataMemory
*gAliasData
=NULL
;
178 converterListIndex
=1,
181 untaggedConvArrayIndex
=4,
182 taggedAliasArrayIndex
=5,
183 taggedAliasListsIndex
=6,
186 normalizedStringTableIndex
=9,
187 offsetsCount
, /* length of the swapper's temporary offsets[] */
188 minTocLength
=8 /* min. tocLength in the file, does not count the tocLengthIndex! */
191 static const UConverterAliasOptions defaultTableOptions
= {
192 UCNV_IO_UNNORMALIZED
,
193 0 /* containsCnvOptionInfo */
195 static UConverterAlias gMainTable
;
197 #define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx))
198 #define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx))
200 static UBool U_CALLCONV
201 isAcceptable(void *context
,
202 const char *type
, const char *name
,
203 const UDataInfo
*pInfo
) {
206 pInfo
->isBigEndian
==U_IS_BIG_ENDIAN
&&
207 pInfo
->charsetFamily
==U_CHARSET_FAMILY
&&
208 pInfo
->dataFormat
[0]==0x43 && /* dataFormat="CvAl" */
209 pInfo
->dataFormat
[1]==0x76 &&
210 pInfo
->dataFormat
[2]==0x41 &&
211 pInfo
->dataFormat
[3]==0x6c &&
212 pInfo
->formatVersion
[0]==3);
215 static UBool U_CALLCONV
ucnv_io_cleanup(void)
218 udata_close(gAliasData
);
222 uprv_memset(&gMainTable
, 0, sizeof(gMainTable
));
224 return TRUE
; /* Everything was cleaned up */
228 haveAliasData(UErrorCode
*pErrorCode
) {
231 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
235 UMTX_CHECK(NULL
, (gAliasData
==NULL
), needInit
);
237 /* load converter alias data from file if necessary */
239 UDataMemory
*data
= NULL
;
240 const uint16_t *table
= NULL
;
244 data
= udata_openChoice(NULL
, DATA_TYPE
, DATA_NAME
, isAcceptable
, NULL
, pErrorCode
);
245 if(U_FAILURE(*pErrorCode
)) {
249 table
= (const uint16_t *)udata_getMemory(data
);
251 tableStart
= ((const uint32_t *)(table
))[0];
252 if (tableStart
< minTocLength
) {
253 *pErrorCode
= U_INVALID_FORMAT_ERROR
;
259 if(gAliasData
==NULL
) {
263 gMainTable
.converterListSize
= ((const uint32_t *)(table
))[1];
264 gMainTable
.tagListSize
= ((const uint32_t *)(table
))[2];
265 gMainTable
.aliasListSize
= ((const uint32_t *)(table
))[3];
266 gMainTable
.untaggedConvArraySize
= ((const uint32_t *)(table
))[4];
267 gMainTable
.taggedAliasArraySize
= ((const uint32_t *)(table
))[5];
268 gMainTable
.taggedAliasListsSize
= ((const uint32_t *)(table
))[6];
269 gMainTable
.optionTableSize
= ((const uint32_t *)(table
))[7];
270 gMainTable
.stringTableSize
= ((const uint32_t *)(table
))[8];
272 if (((const uint32_t *)(table
))[0] > 8) {
273 gMainTable
.normalizedStringTableSize
= ((const uint32_t *)(table
))[9];
276 currOffset
= tableStart
* (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t));
277 gMainTable
.converterList
= table
+ currOffset
;
279 currOffset
+= gMainTable
.converterListSize
;
280 gMainTable
.tagList
= table
+ currOffset
;
282 currOffset
+= gMainTable
.tagListSize
;
283 gMainTable
.aliasList
= table
+ currOffset
;
285 currOffset
+= gMainTable
.aliasListSize
;
286 gMainTable
.untaggedConvArray
= table
+ currOffset
;
288 currOffset
+= gMainTable
.untaggedConvArraySize
;
289 gMainTable
.taggedAliasArray
= table
+ currOffset
;
291 /* aliasLists is a 1's based array, but it has a padding character */
292 currOffset
+= gMainTable
.taggedAliasArraySize
;
293 gMainTable
.taggedAliasLists
= table
+ currOffset
;
295 currOffset
+= gMainTable
.taggedAliasListsSize
;
296 if (gMainTable
.optionTableSize
> 0
297 && ((const UConverterAliasOptions
*)(table
+ currOffset
))->stringNormalizationType
< UCNV_IO_NORM_TYPE_COUNT
)
300 gMainTable
.optionTable
= (const UConverterAliasOptions
*)(table
+ currOffset
);
303 /* Smaller table, or I can't handle this normalization mode!
304 Use the original slower table lookup. */
305 gMainTable
.optionTable
= &defaultTableOptions
;
308 currOffset
+= gMainTable
.optionTableSize
;
309 gMainTable
.stringTable
= table
+ currOffset
;
311 currOffset
+= gMainTable
.stringTableSize
;
312 gMainTable
.normalizedStringTable
= ((gMainTable
.optionTable
->stringNormalizationType
== UCNV_IO_UNNORMALIZED
)
313 ? gMainTable
.stringTable
: (table
+ currOffset
));
315 ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO
, ucnv_io_cleanup
);
319 /* if a different thread set it first, then close the extra data */
321 udata_close(data
); /* NULL if it was set correctly */
328 static U_INLINE UBool
329 isAlias(const char *alias
, UErrorCode
*pErrorCode
) {
331 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
334 return (UBool
)(*alias
!=0);
337 static uint32_t getTagNumber(const char *tagname
) {
338 if (gMainTable
.tagList
) {
340 for (tagNum
= 0; tagNum
< gMainTable
.tagListSize
; tagNum
++) {
341 if (!uprv_stricmp(GET_STRING(gMainTable
.tagList
[tagNum
]), tagname
)) {
350 /* character types relevant for ucnv_compareNames() */
355 MINLETTER
/* any values from here on are lowercase letter mappings */
358 /* character types for ASCII 00..7F */
359 static const uint8_t asciiTypes
[128] = {
360 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
361 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
362 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
363 ZERO
, NONZERO
, NONZERO
, NONZERO
, NONZERO
, NONZERO
, NONZERO
, NONZERO
, NONZERO
, NONZERO
, 0, 0, 0, 0, 0, 0,
364 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
365 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0,
366 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
367 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0
370 #define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)IGNORE)
372 /* character types for EBCDIC 80..FF */
373 static const uint8_t ebcdicTypes
[128] = {
374 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
375 0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
376 0, 0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
377 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
378 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
379 0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
380 0, 0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
381 ZERO
, NONZERO
, NONZERO
, NONZERO
, NONZERO
, NONZERO
, NONZERO
, NONZERO
, NONZERO
, NONZERO
, 0, 0, 0, 0, 0, 0
384 #define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)IGNORE)
386 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
387 # define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c)
388 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
389 # define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c)
391 # error U_CHARSET_FAMILY is not valid
394 /* @see ucnv_compareNames */
395 U_CFUNC
char * U_EXPORT2
396 ucnv_io_stripASCIIForCompare(char *dst
, const char *name
) {
398 uint8_t type
, nextType
;
400 UBool afterDigit
= FALSE
;
402 while ((c1
= *name
++) != 0) {
403 type
= GET_ASCII_TYPE(c1
);
407 continue; /* ignore all but letters and digits */
410 nextType
= GET_ASCII_TYPE(*name
);
411 if (nextType
== ZERO
|| nextType
== NONZERO
) {
412 continue; /* ignore leading zero before another digit */
420 c1
= (char)type
; /* lowercased letter */
430 U_CFUNC
char * U_EXPORT2
431 ucnv_io_stripEBCDICForCompare(char *dst
, const char *name
) {
433 uint8_t type
, nextType
;
435 UBool afterDigit
= FALSE
;
437 while ((c1
= *name
++) != 0) {
438 type
= GET_EBCDIC_TYPE(c1
);
442 continue; /* ignore all but letters and digits */
445 nextType
= GET_EBCDIC_TYPE(*name
);
446 if (nextType
== ZERO
|| nextType
== NONZERO
) {
447 continue; /* ignore leading zero before another digit */
455 c1
= (char)type
; /* lowercased letter */
466 * Do a fuzzy compare of two converter/alias names.
467 * The comparison is case-insensitive, ignores leading zeroes if they are not
468 * followed by further digits, and ignores all but letters and digits.
469 * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
470 * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
471 * at http://www.unicode.org/reports/tr22/
473 * This is a symmetrical (commutative) operation; order of arguments
474 * is insignificant. This is an important property for sorting the
475 * list (when the list is preprocessed into binary form) and for
476 * performing binary searches on it at run time.
478 * @param name1 a converter name or alias, zero-terminated
479 * @param name2 a converter name or alias, zero-terminated
480 * @return 0 if the names match, or a negative value if the name1
481 * lexically precedes name2, or a positive value if the name1
482 * lexically follows name2.
484 * @see ucnv_io_stripForCompare
487 ucnv_compareNames(const char *name1
, const char *name2
) {
489 uint8_t type
, nextType
;
491 UBool afterDigit1
= FALSE
, afterDigit2
= FALSE
;
494 while ((c1
= *name1
++) != 0) {
495 type
= GET_CHAR_TYPE(c1
);
499 continue; /* ignore all but letters and digits */
502 nextType
= GET_CHAR_TYPE(*name1
);
503 if (nextType
== ZERO
|| nextType
== NONZERO
) {
504 continue; /* ignore leading zero before another digit */
512 c1
= (char)type
; /* lowercased letter */
516 break; /* deliver c1 */
518 while ((c2
= *name2
++) != 0) {
519 type
= GET_CHAR_TYPE(c2
);
523 continue; /* ignore all but letters and digits */
526 nextType
= GET_CHAR_TYPE(*name2
);
527 if (nextType
== ZERO
|| nextType
== NONZERO
) {
528 continue; /* ignore leading zero before another digit */
536 c2
= (char)type
; /* lowercased letter */
540 break; /* deliver c2 */
543 /* If we reach the ends of both strings then they match */
548 /* Case-insensitive comparison */
549 rc
= (int)(unsigned char)c1
- (int)(unsigned char)c2
;
557 * search for an alias
558 * return the converter number index for gConverterList
560 static U_INLINE
uint32_t
561 findConverter(const char *alias
, UBool
*containsOption
, UErrorCode
*pErrorCode
) {
562 uint32_t mid
, start
, limit
;
565 int isUnnormalized
= (gMainTable
.optionTable
->stringNormalizationType
== UCNV_IO_UNNORMALIZED
);
566 char strippedName
[UCNV_MAX_CONVERTER_NAME_LENGTH
];
568 if (!isUnnormalized
) {
569 if (uprv_strlen(alias
) >= UCNV_MAX_CONVERTER_NAME_LENGTH
) {
570 *pErrorCode
= U_BUFFER_OVERFLOW_ERROR
;
574 /* Lower case and remove ignoreable characters. */
575 ucnv_io_stripForCompare(strippedName
, alias
);
576 alias
= strippedName
;
579 /* do a binary search for the alias */
581 limit
= gMainTable
.untaggedConvArraySize
;
583 lastMid
= UINT32_MAX
;
586 mid
= (uint32_t)((start
+ limit
) / 2);
587 if (lastMid
== mid
) { /* Have we moved? */
588 break; /* We haven't moved, and it wasn't found. */
591 if (isUnnormalized
) {
592 result
= ucnv_compareNames(alias
, GET_STRING(gMainTable
.aliasList
[mid
]));
595 result
= uprv_strcmp(alias
, GET_NORMALIZED_STRING(gMainTable
.aliasList
[mid
]));
600 } else if (result
> 0) {
603 /* Since the gencnval tool folds duplicates into one entry,
604 * this alias in gAliasList is unique, but different standards
605 * may map an alias to different converters.
607 if (gMainTable
.untaggedConvArray
[mid
] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT
) {
608 *pErrorCode
= U_AMBIGUOUS_ALIAS_WARNING
;
610 /* State whether the canonical converter name contains an option.
611 This information is contained in this list in order to maintain backward & forward compatibility. */
612 if (containsOption
) {
613 UBool containsCnvOptionInfo
= (UBool
)gMainTable
.optionTable
->containsCnvOptionInfo
;
614 *containsOption
= (UBool
)((containsCnvOptionInfo
615 && ((gMainTable
.untaggedConvArray
[mid
] & UCNV_CONTAINS_OPTION_BIT
) != 0))
616 || !containsCnvOptionInfo
);
618 return gMainTable
.untaggedConvArray
[mid
] & UCNV_CONVERTER_INDEX_MASK
;
626 * Is this alias in this list?
627 * alias and listOffset should be non-NULL.
629 static U_INLINE UBool
630 isAliasInList(const char *alias
, uint32_t listOffset
) {
633 uint32_t listCount
= gMainTable
.taggedAliasLists
[listOffset
];
634 /* +1 to skip listCount */
635 const uint16_t *currList
= gMainTable
.taggedAliasLists
+ listOffset
+ 1;
636 for (currAlias
= 0; currAlias
< listCount
; currAlias
++) {
637 if (currList
[currAlias
]
638 && ucnv_compareNames(alias
, GET_STRING(currList
[currAlias
]))==0)
648 * Search for an standard name of an alias (what is the default name
649 * that this standard uses?)
650 * return the listOffset for gTaggedAliasLists. If it's 0,
651 * the it couldn't be found, but the parameters are valid.
654 findTaggedAliasListsOffset(const char *alias
, const char *standard
, UErrorCode
*pErrorCode
) {
658 UErrorCode myErr
= U_ZERO_ERROR
;
659 uint32_t tagNum
= getTagNumber(standard
);
661 /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
662 convNum
= findConverter(alias
, NULL
, &myErr
);
663 if (myErr
!= U_ZERO_ERROR
) {
667 if (tagNum
< (gMainTable
.tagListSize
- UCNV_NUM_HIDDEN_TAGS
) && convNum
< gMainTable
.converterListSize
) {
668 listOffset
= gMainTable
.taggedAliasArray
[tagNum
*gMainTable
.converterListSize
+ convNum
];
669 if (listOffset
&& gMainTable
.taggedAliasLists
[listOffset
+ 1]) {
672 if (myErr
== U_AMBIGUOUS_ALIAS_WARNING
) {
673 /* Uh Oh! They used an ambiguous alias.
674 We have to search the whole swiss cheese starting
675 at the highest standard affinity.
676 This may take a while.
678 for (idx
= 0; idx
< gMainTable
.taggedAliasArraySize
; idx
++) {
679 listOffset
= gMainTable
.taggedAliasArray
[idx
];
680 if (listOffset
&& isAliasInList(alias
, listOffset
)) {
681 uint32_t currTagNum
= idx
/gMainTable
.converterListSize
;
682 uint32_t currConvNum
= (idx
- currTagNum
*gMainTable
.converterListSize
);
683 uint32_t tempListOffset
= gMainTable
.taggedAliasArray
[tagNum
*gMainTable
.converterListSize
+ currConvNum
];
684 if (tempListOffset
&& gMainTable
.taggedAliasLists
[tempListOffset
+ 1]) {
685 return tempListOffset
;
687 /* else keep on looking */
688 /* We could speed this up by starting on the next row
689 because an alias is unique per row, right now.
690 This would change if alias versioning appears. */
693 /* The standard doesn't know about the alias */
695 /* else no default name */
698 /* else converter or tag not found */
703 /* Return the canonical name */
705 findTaggedConverterNum(const char *alias
, const char *standard
, UErrorCode
*pErrorCode
) {
709 UErrorCode myErr
= U_ZERO_ERROR
;
710 uint32_t tagNum
= getTagNumber(standard
);
712 /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
713 convNum
= findConverter(alias
, NULL
, &myErr
);
714 if (myErr
!= U_ZERO_ERROR
) {
718 if (tagNum
< (gMainTable
.tagListSize
- UCNV_NUM_HIDDEN_TAGS
) && convNum
< gMainTable
.converterListSize
) {
719 listOffset
= gMainTable
.taggedAliasArray
[tagNum
*gMainTable
.converterListSize
+ convNum
];
720 if (listOffset
&& isAliasInList(alias
, listOffset
)) {
723 if (myErr
== U_AMBIGUOUS_ALIAS_WARNING
) {
724 /* Uh Oh! They used an ambiguous alias.
725 We have to search one slice of the swiss cheese.
726 We search only in the requested tag, not the whole thing.
727 This may take a while.
729 uint32_t convStart
= (tagNum
)*gMainTable
.converterListSize
;
730 uint32_t convLimit
= (tagNum
+1)*gMainTable
.converterListSize
;
731 for (idx
= convStart
; idx
< convLimit
; idx
++) {
732 listOffset
= gMainTable
.taggedAliasArray
[idx
];
733 if (listOffset
&& isAliasInList(alias
, listOffset
)) {
734 return idx
-convStart
;
737 /* The standard doesn't know about the alias */
739 /* else no canonical name */
741 /* else converter or tag not found */
749 ucnv_io_getConverterName(const char *alias
, UBool
*containsOption
, UErrorCode
*pErrorCode
) {
750 if(haveAliasData(pErrorCode
) && isAlias(alias
, pErrorCode
)) {
751 uint32_t convNum
= findConverter(alias
, containsOption
, pErrorCode
);
752 if (convNum
< gMainTable
.converterListSize
) {
753 return GET_STRING(gMainTable
.converterList
[convNum
]);
755 /* else converter not found */
760 static int32_t U_CALLCONV
761 ucnv_io_countStandardAliases(UEnumeration
*enumerator
, UErrorCode
*pErrorCode
) {
763 UAliasContext
*myContext
= (UAliasContext
*)(enumerator
->context
);
764 uint32_t listOffset
= myContext
->listOffset
;
767 value
= gMainTable
.taggedAliasLists
[listOffset
];
772 static const char* U_CALLCONV
773 ucnv_io_nextStandardAliases(UEnumeration
*enumerator
,
774 int32_t* resultLength
,
775 UErrorCode
*pErrorCode
)
777 UAliasContext
*myContext
= (UAliasContext
*)(enumerator
->context
);
778 uint32_t listOffset
= myContext
->listOffset
;
781 uint32_t listCount
= gMainTable
.taggedAliasLists
[listOffset
];
782 const uint16_t *currList
= gMainTable
.taggedAliasLists
+ listOffset
+ 1;
784 if (myContext
->listIdx
< listCount
) {
785 const char *myStr
= GET_STRING(currList
[myContext
->listIdx
++]);
787 *resultLength
= (int32_t)uprv_strlen(myStr
);
792 /* Either we accessed a zero length list, or we enumerated too far. */
799 static void U_CALLCONV
800 ucnv_io_resetStandardAliases(UEnumeration
*enumerator
, UErrorCode
*pErrorCode
) {
801 ((UAliasContext
*)(enumerator
->context
))->listIdx
= 0;
804 static void U_CALLCONV
805 ucnv_io_closeUEnumeration(UEnumeration
*enumerator
) {
806 uprv_free(enumerator
->context
);
807 uprv_free(enumerator
);
810 /* Enumerate the aliases for the specified converter and standard tag */
811 static const UEnumeration gEnumAliases
= {
814 ucnv_io_closeUEnumeration
,
815 ucnv_io_countStandardAliases
,
817 ucnv_io_nextStandardAliases
,
818 ucnv_io_resetStandardAliases
821 U_CAPI UEnumeration
* U_EXPORT2
822 ucnv_openStandardNames(const char *convName
,
823 const char *standard
,
824 UErrorCode
*pErrorCode
)
826 UEnumeration
*myEnum
= NULL
;
827 if (haveAliasData(pErrorCode
) && isAlias(convName
, pErrorCode
)) {
828 uint32_t listOffset
= findTaggedAliasListsOffset(convName
, standard
, pErrorCode
);
830 /* When listOffset == 0, we want to acknowledge that the
831 converter name and standard are okay, but there
832 is nothing to enumerate. */
833 if (listOffset
< gMainTable
.taggedAliasListsSize
) {
834 UAliasContext
*myContext
;
836 myEnum
= uprv_malloc(sizeof(UEnumeration
));
837 if (myEnum
== NULL
) {
838 *pErrorCode
= U_MEMORY_ALLOCATION_ERROR
;
841 uprv_memcpy(myEnum
, &gEnumAliases
, sizeof(UEnumeration
));
842 myContext
= uprv_malloc(sizeof(UAliasContext
));
843 if (myContext
== NULL
) {
844 *pErrorCode
= U_MEMORY_ALLOCATION_ERROR
;
848 myContext
->listOffset
= listOffset
;
849 myContext
->listIdx
= 0;
850 myEnum
->context
= myContext
;
852 /* else converter or tag not found */
858 ucnv_io_countAliases(const char *alias
, UErrorCode
*pErrorCode
) {
859 if(haveAliasData(pErrorCode
) && isAlias(alias
, pErrorCode
)) {
860 uint32_t convNum
= findConverter(alias
, NULL
, pErrorCode
);
861 if (convNum
< gMainTable
.converterListSize
) {
862 /* tagListNum - 1 is the ALL tag */
863 int32_t listOffset
= gMainTable
.taggedAliasArray
[(gMainTable
.tagListSize
- 1)*gMainTable
.converterListSize
+ convNum
];
866 return gMainTable
.taggedAliasLists
[listOffset
];
868 /* else this shouldn't happen. internal program error */
870 /* else converter not found */
876 ucnv_io_getAliases(const char *alias
, uint16_t start
, const char **aliases
, UErrorCode
*pErrorCode
) {
877 if(haveAliasData(pErrorCode
) && isAlias(alias
, pErrorCode
)) {
879 uint32_t convNum
= findConverter(alias
, NULL
, pErrorCode
);
880 if (convNum
< gMainTable
.converterListSize
) {
881 /* tagListNum - 1 is the ALL tag */
882 int32_t listOffset
= gMainTable
.taggedAliasArray
[(gMainTable
.tagListSize
- 1)*gMainTable
.converterListSize
+ convNum
];
885 uint32_t listCount
= gMainTable
.taggedAliasLists
[listOffset
];
886 /* +1 to skip listCount */
887 const uint16_t *currList
= gMainTable
.taggedAliasLists
+ listOffset
+ 1;
889 for (currAlias
= start
; currAlias
< listCount
; currAlias
++) {
890 aliases
[currAlias
] = GET_STRING(currList
[currAlias
]);
893 /* else this shouldn't happen. internal program error */
895 /* else converter not found */
901 ucnv_io_getAlias(const char *alias
, uint16_t n
, UErrorCode
*pErrorCode
) {
902 if(haveAliasData(pErrorCode
) && isAlias(alias
, pErrorCode
)) {
903 uint32_t convNum
= findConverter(alias
, NULL
, pErrorCode
);
904 if (convNum
< gMainTable
.converterListSize
) {
905 /* tagListNum - 1 is the ALL tag */
906 int32_t listOffset
= gMainTable
.taggedAliasArray
[(gMainTable
.tagListSize
- 1)*gMainTable
.converterListSize
+ convNum
];
909 uint32_t listCount
= gMainTable
.taggedAliasLists
[listOffset
];
910 /* +1 to skip listCount */
911 const uint16_t *currList
= gMainTable
.taggedAliasLists
+ listOffset
+ 1;
914 return GET_STRING(currList
[n
]);
916 *pErrorCode
= U_INDEX_OUTOFBOUNDS_ERROR
;
918 /* else this shouldn't happen. internal program error */
920 /* else converter not found */
926 ucnv_io_countStandards(UErrorCode
*pErrorCode
) {
927 if (haveAliasData(pErrorCode
)) {
928 /* Don't include the empty list */
929 return (uint16_t)(gMainTable
.tagListSize
- UCNV_NUM_HIDDEN_TAGS
);
935 U_CAPI
const char * U_EXPORT2
936 ucnv_getStandard(uint16_t n
, UErrorCode
*pErrorCode
) {
937 if (haveAliasData(pErrorCode
)) {
938 if (n
< gMainTable
.tagListSize
- UCNV_NUM_HIDDEN_TAGS
) {
939 return GET_STRING(gMainTable
.tagList
[n
]);
941 *pErrorCode
= U_INDEX_OUTOFBOUNDS_ERROR
;
947 U_CAPI
const char * U_EXPORT2
948 ucnv_getStandardName(const char *alias
, const char *standard
, UErrorCode
*pErrorCode
) {
949 if (haveAliasData(pErrorCode
) && isAlias(alias
, pErrorCode
)) {
950 uint32_t listOffset
= findTaggedAliasListsOffset(alias
, standard
, pErrorCode
);
952 if (0 < listOffset
&& listOffset
< gMainTable
.taggedAliasListsSize
) {
953 const uint16_t *currList
= gMainTable
.taggedAliasLists
+ listOffset
+ 1;
955 /* Get the preferred name from this list */
957 return GET_STRING(currList
[0]);
959 /* else someone screwed up the alias table. */
960 /* *pErrorCode = U_INVALID_FORMAT_ERROR */
967 U_CAPI
uint16_t U_EXPORT2
968 ucnv_countAliases(const char *alias
, UErrorCode
*pErrorCode
)
970 return ucnv_io_countAliases(alias
, pErrorCode
);
974 U_CAPI
const char* U_EXPORT2
975 ucnv_getAlias(const char *alias
, uint16_t n
, UErrorCode
*pErrorCode
)
977 return ucnv_io_getAlias(alias
, n
, pErrorCode
);
980 U_CAPI
void U_EXPORT2
981 ucnv_getAliases(const char *alias
, const char **aliases
, UErrorCode
*pErrorCode
)
983 ucnv_io_getAliases(alias
, 0, aliases
, pErrorCode
);
986 U_CAPI
uint16_t U_EXPORT2
987 ucnv_countStandards(void)
989 UErrorCode err
= U_ZERO_ERROR
;
990 return ucnv_io_countStandards(&err
);
993 U_CAPI
const char * U_EXPORT2
994 ucnv_getCanonicalName(const char *alias
, const char *standard
, UErrorCode
*pErrorCode
) {
995 if (haveAliasData(pErrorCode
) && isAlias(alias
, pErrorCode
)) {
996 uint32_t convNum
= findTaggedConverterNum(alias
, standard
, pErrorCode
);
998 if (convNum
< gMainTable
.converterListSize
) {
999 return GET_STRING(gMainTable
.converterList
[convNum
]);
1006 static int32_t U_CALLCONV
1007 ucnv_io_countAllConverters(UEnumeration
*enumerator
, UErrorCode
*pErrorCode
) {
1008 return gMainTable
.converterListSize
;
1011 static const char* U_CALLCONV
1012 ucnv_io_nextAllConverters(UEnumeration
*enumerator
,
1013 int32_t* resultLength
,
1014 UErrorCode
*pErrorCode
)
1016 uint16_t *myContext
= (uint16_t *)(enumerator
->context
);
1018 if (*myContext
< gMainTable
.converterListSize
) {
1019 const char *myStr
= GET_STRING(gMainTable
.converterList
[(*myContext
)++]);
1021 *resultLength
= (int32_t)uprv_strlen(myStr
);
1025 /* Either we accessed a zero length list, or we enumerated too far. */
1032 static void U_CALLCONV
1033 ucnv_io_resetAllConverters(UEnumeration
*enumerator
, UErrorCode
*pErrorCode
) {
1034 *((uint16_t *)(enumerator
->context
)) = 0;
1037 static const UEnumeration gEnumAllConverters
= {
1040 ucnv_io_closeUEnumeration
,
1041 ucnv_io_countAllConverters
,
1043 ucnv_io_nextAllConverters
,
1044 ucnv_io_resetAllConverters
1047 U_CAPI UEnumeration
* U_EXPORT2
1048 ucnv_openAllNames(UErrorCode
*pErrorCode
) {
1049 UEnumeration
*myEnum
= NULL
;
1050 if (haveAliasData(pErrorCode
)) {
1051 uint16_t *myContext
;
1053 myEnum
= uprv_malloc(sizeof(UEnumeration
));
1054 if (myEnum
== NULL
) {
1055 *pErrorCode
= U_MEMORY_ALLOCATION_ERROR
;
1058 uprv_memcpy(myEnum
, &gEnumAllConverters
, sizeof(UEnumeration
));
1059 myContext
= uprv_malloc(sizeof(uint16_t));
1060 if (myContext
== NULL
) {
1061 *pErrorCode
= U_MEMORY_ALLOCATION_ERROR
;
1066 myEnum
->context
= myContext
;
1072 ucnv_io_countTotalAliases(UErrorCode
*pErrorCode
) {
1073 if (haveAliasData(pErrorCode
)) {
1074 return (uint16_t)gMainTable
.aliasListSize
;
1079 /* alias table swapping ----------------------------------------------------- */
1081 typedef char * U_CALLCONV
StripForCompareFn(char *dst
, const char *name
);
1084 * row of a temporary array
1086 * gets platform-endian charset string indexes and sorting indexes;
1087 * after sorting this array by strings, the actual arrays are permutated
1088 * according to the sorting indexes
1090 typedef struct TempRow
{
1091 uint16_t strIndex
, sortIndex
;
1094 typedef struct TempAliasTable
{
1098 StripForCompareFn
*stripForCompare
;
1102 STACK_ROW_CAPACITY
=500
1106 io_compareRows(const void *context
, const void *left
, const void *right
) {
1107 char strippedLeft
[UCNV_MAX_CONVERTER_NAME_LENGTH
],
1108 strippedRight
[UCNV_MAX_CONVERTER_NAME_LENGTH
];
1110 TempAliasTable
*tempTable
=(TempAliasTable
*)context
;
1111 const char *chars
=tempTable
->chars
;
1113 return (int32_t)uprv_strcmp(tempTable
->stripForCompare(strippedLeft
, chars
+2*((const TempRow
*)left
)->strIndex
),
1114 tempTable
->stripForCompare(strippedRight
, chars
+2*((const TempRow
*)right
)->strIndex
));
1117 U_CAPI
int32_t U_EXPORT2
1118 ucnv_swapAliases(const UDataSwapper
*ds
,
1119 const void *inData
, int32_t length
, void *outData
,
1120 UErrorCode
*pErrorCode
) {
1121 const UDataInfo
*pInfo
;
1124 const uint16_t *inTable
;
1125 uint32_t toc
[offsetsCount
];
1126 uint32_t offsets
[offsetsCount
]; /* 16-bit-addressed offsets from inTable/outTable */
1127 uint32_t i
, count
, tocLength
, topOffset
;
1129 TempRow rows
[STACK_ROW_CAPACITY
];
1130 uint16_t resort
[STACK_ROW_CAPACITY
];
1131 TempAliasTable tempTable
;
1133 /* udata_swapDataHeader checks the arguments */
1134 headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, pErrorCode
);
1135 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
1139 /* check data format and format version */
1140 pInfo
=(const UDataInfo
*)((const char *)inData
+4);
1142 pInfo
->dataFormat
[0]==0x43 && /* dataFormat="CvAl" */
1143 pInfo
->dataFormat
[1]==0x76 &&
1144 pInfo
->dataFormat
[2]==0x41 &&
1145 pInfo
->dataFormat
[3]==0x6c &&
1146 pInfo
->formatVersion
[0]==3
1148 udata_printError(ds
, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n",
1149 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
1150 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
1151 pInfo
->formatVersion
[0]);
1152 *pErrorCode
=U_UNSUPPORTED_ERROR
;
1156 /* an alias table must contain at least the table of contents array */
1157 if(length
>=0 && (length
-headerSize
)<4*(1+minTocLength
)) {
1158 udata_printError(ds
, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1160 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
1164 inTable
=(const uint16_t *)((const char *)inData
+headerSize
);
1165 uprv_memset(toc
, 0, sizeof(toc
));
1166 toc
[tocLengthIndex
]=tocLength
=ds
->readUInt32(((const uint32_t *)inTable
)[tocLengthIndex
]);
1167 if(tocLength
<minTocLength
|| offsetsCount
<=tocLength
) {
1168 udata_printError(ds
, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength
);
1169 *pErrorCode
=U_INVALID_FORMAT_ERROR
;
1173 /* read the known part of the table of contents */
1174 for(i
=converterListIndex
; i
<=tocLength
; ++i
) {
1175 toc
[i
]=ds
->readUInt32(((const uint32_t *)inTable
)[i
]);
1178 /* compute offsets */
1179 uprv_memset(offsets
, 0, sizeof(offsets
));
1180 offsets
[converterListIndex
]=2*(1+tocLength
); /* count two 16-bit units per toc entry */
1181 for(i
=tagListIndex
; i
<=tocLength
; ++i
) {
1182 offsets
[i
]=offsets
[i
-1]+toc
[i
-1];
1185 /* compute the overall size of the after-header data, in numbers of 16-bit units */
1186 topOffset
=offsets
[i
-1]+toc
[i
-1];
1190 const uint16_t *p
, *p2
;
1194 if((length
-headerSize
)<(2*(int32_t)topOffset
)) {
1195 udata_printError(ds
, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1197 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
1201 outTable
=(uint16_t *)((char *)outData
+headerSize
);
1203 /* swap the entire table of contents */
1204 ds
->swapArray32(ds
, inTable
, 4*(1+tocLength
), outTable
, pErrorCode
);
1206 /* swap unormalized strings & normalized strings */
1207 ds
->swapInvChars(ds
, inTable
+offsets
[stringTableIndex
], 2*(int32_t)(toc
[stringTableIndex
]+toc
[normalizedStringTableIndex
]),
1208 outTable
+offsets
[stringTableIndex
], pErrorCode
);
1209 if(U_FAILURE(*pErrorCode
)) {
1210 udata_printError(ds
, "ucnv_swapAliases().swapInvChars(charset names) failed\n");
1214 if(ds
->inCharset
==ds
->outCharset
) {
1215 /* no need to sort, just swap all 16-bit values together */
1217 inTable
+offsets
[converterListIndex
],
1218 2*(int32_t)(offsets
[stringTableIndex
]-offsets
[converterListIndex
]),
1219 outTable
+offsets
[converterListIndex
],
1222 /* allocate the temporary table for sorting */
1223 count
=toc
[aliasListIndex
];
1225 tempTable
.chars
=(const char *)(outTable
+offsets
[stringTableIndex
]); /* sort by outCharset */
1227 if(count
<=STACK_ROW_CAPACITY
) {
1228 tempTable
.rows
=rows
;
1229 tempTable
.resort
=resort
;
1231 tempTable
.rows
=(TempRow
*)uprv_malloc(count
*sizeof(TempRow
)+count
*2);
1232 if(tempTable
.rows
==NULL
) {
1233 udata_printError(ds
, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n",
1235 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
1238 tempTable
.resort
=(uint16_t *)(tempTable
.rows
+count
);
1241 if(ds
->outCharset
==U_ASCII_FAMILY
) {
1242 tempTable
.stripForCompare
=ucnv_io_stripASCIIForCompare
;
1243 } else /* U_EBCDIC_FAMILY */ {
1244 tempTable
.stripForCompare
=ucnv_io_stripEBCDICForCompare
;
1248 * Sort unique aliases+mapped names.
1250 * We need to sort the list again by outCharset strings because they
1251 * sort differently for different charset families.
1252 * First we set up a temporary table with the string indexes and
1253 * sorting indexes and sort that.
1254 * Then we permutate and copy/swap the actual values.
1256 p
=inTable
+offsets
[aliasListIndex
];
1257 q
=outTable
+offsets
[aliasListIndex
];
1259 p2
=inTable
+offsets
[untaggedConvArrayIndex
];
1260 q2
=outTable
+offsets
[untaggedConvArrayIndex
];
1262 for(i
=0; i
<count
; ++i
) {
1263 tempTable
.rows
[i
].strIndex
=ds
->readUInt16(p
[i
]);
1264 tempTable
.rows
[i
].sortIndex
=(uint16_t)i
;
1267 uprv_sortArray(tempTable
.rows
, (int32_t)count
, sizeof(TempRow
),
1268 io_compareRows
, &tempTable
,
1271 if(U_SUCCESS(*pErrorCode
)) {
1272 /* copy/swap/permutate items */
1274 for(i
=0; i
<count
; ++i
) {
1275 oldIndex
=tempTable
.rows
[i
].sortIndex
;
1276 ds
->swapArray16(ds
, p
+oldIndex
, 2, q
+i
, pErrorCode
);
1277 ds
->swapArray16(ds
, p2
+oldIndex
, 2, q2
+i
, pErrorCode
);
1281 * If we swap in-place, then the permutation must use another
1282 * temporary array (tempTable.resort)
1283 * before the results are copied to the outBundle.
1285 uint16_t *r
=tempTable
.resort
;
1287 for(i
=0; i
<count
; ++i
) {
1288 oldIndex
=tempTable
.rows
[i
].sortIndex
;
1289 ds
->swapArray16(ds
, p
+oldIndex
, 2, r
+i
, pErrorCode
);
1291 uprv_memcpy(q
, r
, 2*count
);
1293 for(i
=0; i
<count
; ++i
) {
1294 oldIndex
=tempTable
.rows
[i
].sortIndex
;
1295 ds
->swapArray16(ds
, p2
+oldIndex
, 2, r
+i
, pErrorCode
);
1297 uprv_memcpy(q2
, r
, 2*count
);
1301 if(tempTable
.rows
!=rows
) {
1302 uprv_free(tempTable
.rows
);
1305 if(U_FAILURE(*pErrorCode
)) {
1306 udata_printError(ds
, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n",
1311 /* swap remaining 16-bit values */
1313 inTable
+offsets
[converterListIndex
],
1314 2*(int32_t)(offsets
[aliasListIndex
]-offsets
[converterListIndex
]),
1315 outTable
+offsets
[converterListIndex
],
1318 inTable
+offsets
[taggedAliasArrayIndex
],
1319 2*(int32_t)(offsets
[stringTableIndex
]-offsets
[taggedAliasArrayIndex
]),
1320 outTable
+offsets
[taggedAliasArrayIndex
],
1325 return headerSize
+2*(int32_t)topOffset
;
1331 * Hey, Emacs, please set the following:
1334 * indent-tabs-mode: nil