]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ucnv_io.cpp
ICU-57165.0.1.tar.gz
[apple/icu.git] / icuSources / common / ucnv_io.cpp
CommitLineData
b75a7d8f
A
1/*
2******************************************************************************
3*
2ca993e8 4* Copyright (C) 1999-2015, International Business Machines
b75a7d8f
A
5* Corporation and others. All Rights Reserved.
6*
7******************************************************************************
8*
9*
4388f060 10* ucnv_io.cpp:
73c04bcf
A
11* initializes global variables and defines functions pertaining to converter
12* name resolution aspect of the conversion code.
b75a7d8f
A
13*
14* new implementation:
15*
16* created on: 1999nov22
17* created by: Markus W. Scherer
18*
19* Use the binary cnvalias.icu (created from convrtrs.txt) to work
20* with aliases for converter names.
21*
22* Date Name Description
23* 11/22/1999 markus Created
24* 06/28/2002 grhoten Major overhaul of the converter alias design.
25* Now an alias can map to different converters
26* depending on the specified standard.
27*******************************************************************************
28*/
29
30#include "unicode/utypes.h"
374ca955
A
31
32#if !UCONFIG_NO_CONVERSION
33
73c04bcf 34#include "unicode/ucnv.h"
b75a7d8f
A
35#include "unicode/udata.h"
36
37#include "umutex.h"
374ca955 38#include "uarrsort.h"
57a6839d 39#include "uassert.h"
374ca955 40#include "udataswp.h"
b75a7d8f
A
41#include "cstring.h"
42#include "cmemory.h"
43#include "ucnv_io.h"
44#include "uenumimp.h"
45#include "ucln_cmn.h"
46
47/* Format of cnvalias.icu -----------------------------------------------------
48 *
49 * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt.
50 * This binary form contains several tables. All indexes are to uint16_t
51 * units, and not to the bytes (uint8_t units). Addressing everything on
52 * 16-bit boundaries allows us to store more information with small index
53 * numbers, which are also 16-bit in size. The majority of the table (except
54 * the string table) are 16-bit numbers.
55 *
56 * First there is the size of the Table of Contents (TOC). The TOC
57 * entries contain the size of each section. In order to find the offset
58 * you just need to sum up the previous offsets.
374ca955
A
59 * The TOC length and entries are an array of uint32_t values.
60 * The first section after the TOC starts immediately after the TOC.
b75a7d8f
A
61 *
62 * 1) This section contains a list of converters. This list contains indexes
63 * into the string table for the converter name. The index of this list is
64 * also used by other sections, which are mentioned later on.
374ca955 65 * This list is not sorted.
b75a7d8f
A
66 *
67 * 2) This section contains a list of tags. This list contains indexes
68 * into the string table for the tag name. The index of this list is
69 * also used by other sections, which are mentioned later on.
374ca955 70 * This list is in priority order of standards.
b75a7d8f
A
71 *
72 * 3) This section contains a list of sorted unique aliases. This
73 * list contains indexes into the string table for the alias name. The
74 * index of this list is also used by other sections, like the 4th section.
75 * The index for the 3rd and 4th section is used to get the
76 * alias -> converter name mapping. Section 3 and 4 form a two column table.
73c04bcf
A
77 * Some of the most significant bits of each index may contain other
78 * information (see findConverter for details).
b75a7d8f
A
79 *
80 * 4) This section contains a list of mapped converter names. Consider this
81 * as a table that maps the 3rd section to the 1st section. This list contains
82 * indexes into the 1st section. The index of this list is the same index in
83 * the 3rd section. There is also some extra information in the high bits of
84 * each converter index in this table. Currently it's only used to say that
85 * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
86 * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
87 * the predigested form of the 5th section so that an alias lookup can be fast.
374ca955 88 *
b75a7d8f
A
89 * 5) This section contains a 2D array with indexes to the 6th section. This
90 * section is the full form of all alias mappings. The column index is the
91 * index into the converter list (column header). The row index is the index
92 * to tag list (row header). This 2D array is the top part a 3D array. The
93 * third dimension is in the 6th section.
94 *
95 * 6) This is blob of variable length arrays. Each array starts with a size,
96 * and is followed by indexes to alias names in the string table. This is
97 * the third dimension to the section 5. No other section should be referencing
98 * this section.
99 *
73c04bcf
A
100 * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its
101 * presence indicates that a section 9 exists. UConverterAliasOptions specifies
102 * what type of string normalization is used among other potential things in the
103 * future.
b75a7d8f
A
104 *
105 * 8) This is the string table. All strings are indexed on an even address.
106 * There are two reasons for this. First many chip architectures locate strings
107 * faster on even address boundaries. Second, since all indexes are 16-bit
108 * numbers, this string table can be 128KB in size instead of 64KB when we
109 * only have strings starting on an even address.
110 *
73c04bcf
A
111 * 9) When present this is a set of prenormalized strings from section 8. This
112 * table contains normalized strings with the dashes and spaces stripped out,
113 * and all strings lowercased. In the future, the options in section 7 may state
114 * other types of normalization.
b75a7d8f
A
115 *
116 * Here is the concept of section 5 and 6. It's a 3D cube. Each tag
117 * has a unique alias among all converters. That same alias can
118 * be mentioned in other standards on different converters,
119 * but only one alias per tag can be unique.
120 *
121 *
122 * Converter Names (Usually in TR22 form)
123 * -------------------------------------------.
124 * T / /|
125 * a / / |
126 * g / / |
127 * s / / |
128 * / / |
129 * ------------------------------------------/ |
130 * A | | |
131 * l | | |
132 * i | | /
133 * a | | /
134 * s | | /
135 * e | | /
136 * s | |/
137 * -------------------------------------------
138 *
139 *
140 *
141 * Here is what it really looks like. It's like swiss cheese.
142 * There are holes. Some converters aren't recognized by
143 * a standard, or they are really old converters that the
144 * standard doesn't recognize anymore.
145 *
146 * Converter Names (Usually in TR22 form)
147 * -------------------------------------------.
148 * T /##########################################/|
149 * a / # # /#
374ca955
A
150 * g / # ## ## ### # ### ### ### #/
151 * s / # ##### #### ## ## #/#
152 * / ### # # ## # # # ### # # #/##
b75a7d8f
A
153 * ------------------------------------------/# #
154 * A |### # # ## # # # ### # # #|# #
155 * l |# # # # # ## # #|# #
156 * i |# # # # # # #|#
157 * a |# #|#
158 * s | #|#
374ca955
A
159 * e
160 * s
161 *
b75a7d8f
A
162 */
163
164/**
165 * Used by the UEnumeration API
166 */
167typedef struct UAliasContext {
168 uint32_t listOffset;
169 uint32_t listIdx;
170} UAliasContext;
171
172static const char DATA_NAME[] = "cnvalias";
173static const char DATA_TYPE[] = "icu";
174
175static UDataMemory *gAliasData=NULL;
57a6839d 176static icu::UInitOnce gAliasDataInitOnce = U_INITONCE_INITIALIZER;
b75a7d8f 177
374ca955
A
178enum {
179 tocLengthIndex=0,
180 converterListIndex=1,
181 tagListIndex=2,
182 aliasListIndex=3,
183 untaggedConvArrayIndex=4,
184 taggedAliasArrayIndex=5,
185 taggedAliasListsIndex=6,
73c04bcf 186 tableOptionsIndex=7,
374ca955 187 stringTableIndex=8,
73c04bcf
A
188 normalizedStringTableIndex=9,
189 offsetsCount, /* length of the swapper's temporary offsets[] */
190 minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */
374ca955
A
191};
192
73c04bcf
A
193static const UConverterAliasOptions defaultTableOptions = {
194 UCNV_IO_UNNORMALIZED,
195 0 /* containsCnvOptionInfo */
196};
197static UConverterAlias gMainTable;
b75a7d8f 198
73c04bcf
A
199#define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx))
200#define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx))
b75a7d8f
A
201
202static UBool U_CALLCONV
4388f060
A
203isAcceptable(void * /*context*/,
204 const char * /*type*/, const char * /*name*/,
b75a7d8f
A
205 const UDataInfo *pInfo) {
206 return (UBool)(
207 pInfo->size>=20 &&
208 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
209 pInfo->charsetFamily==U_CHARSET_FAMILY &&
210 pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */
211 pInfo->dataFormat[1]==0x76 &&
212 pInfo->dataFormat[2]==0x41 &&
213 pInfo->dataFormat[3]==0x6c &&
214 pInfo->formatVersion[0]==3);
215}
216
374ca955
A
217static UBool U_CALLCONV ucnv_io_cleanup(void)
218{
219 if (gAliasData) {
220 udata_close(gAliasData);
221 gAliasData = NULL;
222 }
57a6839d 223 gAliasDataInitOnce.reset();
374ca955 224
73c04bcf 225 uprv_memset(&gMainTable, 0, sizeof(gMainTable));
374ca955
A
226
227 return TRUE; /* Everything was cleaned up */
228}
229
57a6839d
A
230static void U_CALLCONV initAliasData(UErrorCode &errCode) {
231 UDataMemory *data;
232 const uint16_t *table;
233 const uint32_t *sectionSizes;
234 uint32_t tableStart;
235 uint32_t currOffset;
b75a7d8f 236
57a6839d 237 ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup);
b75a7d8f 238
57a6839d
A
239 U_ASSERT(gAliasData == NULL);
240 data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &errCode);
241 if(U_FAILURE(errCode)) {
242 return;
243 }
b75a7d8f 244
57a6839d
A
245 sectionSizes = (const uint32_t *)udata_getMemory(data);
246 table = (const uint16_t *)sectionSizes;
b75a7d8f 247
57a6839d
A
248 tableStart = sectionSizes[0];
249 if (tableStart < minTocLength) {
250 errCode = U_INVALID_FORMAT_ERROR;
251 udata_close(data);
252 return;
253 }
254 gAliasData = data;
255
256 gMainTable.converterListSize = sectionSizes[1];
257 gMainTable.tagListSize = sectionSizes[2];
258 gMainTable.aliasListSize = sectionSizes[3];
259 gMainTable.untaggedConvArraySize = sectionSizes[4];
260 gMainTable.taggedAliasArraySize = sectionSizes[5];
261 gMainTable.taggedAliasListsSize = sectionSizes[6];
262 gMainTable.optionTableSize = sectionSizes[7];
263 gMainTable.stringTableSize = sectionSizes[8];
264
265 if (tableStart > 8) {
266 gMainTable.normalizedStringTableSize = sectionSizes[9];
267 }
b75a7d8f 268
57a6839d
A
269 currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t));
270 gMainTable.converterList = table + currOffset;
b75a7d8f 271
57a6839d
A
272 currOffset += gMainTable.converterListSize;
273 gMainTable.tagList = table + currOffset;
b75a7d8f 274
57a6839d
A
275 currOffset += gMainTable.tagListSize;
276 gMainTable.aliasList = table + currOffset;
b75a7d8f 277
57a6839d
A
278 currOffset += gMainTable.aliasListSize;
279 gMainTable.untaggedConvArray = table + currOffset;
b75a7d8f 280
57a6839d
A
281 currOffset += gMainTable.untaggedConvArraySize;
282 gMainTable.taggedAliasArray = table + currOffset;
b75a7d8f 283
57a6839d
A
284 /* aliasLists is a 1's based array, but it has a padding character */
285 currOffset += gMainTable.taggedAliasArraySize;
286 gMainTable.taggedAliasLists = table + currOffset;
73c04bcf 287
57a6839d
A
288 currOffset += gMainTable.taggedAliasListsSize;
289 if (gMainTable.optionTableSize > 0
290 && ((const UConverterAliasOptions *)(table + currOffset))->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT)
291 {
292 /* Faster table */
293 gMainTable.optionTable = (const UConverterAliasOptions *)(table + currOffset);
294 }
295 else {
296 /* Smaller table, or I can't handle this normalization mode!
297 Use the original slower table lookup. */
298 gMainTable.optionTable = &defaultTableOptions;
299 }
b75a7d8f 300
57a6839d
A
301 currOffset += gMainTable.optionTableSize;
302 gMainTable.stringTable = table + currOffset;
729e4ab9 303
57a6839d
A
304 currOffset += gMainTable.stringTableSize;
305 gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED)
306 ? gMainTable.stringTable : (table + currOffset));
307}
b75a7d8f 308
b75a7d8f 309
57a6839d
A
310static UBool
311haveAliasData(UErrorCode *pErrorCode) {
312 umtx_initOnce(gAliasDataInitOnce, &initAliasData, *pErrorCode);
313 return U_SUCCESS(*pErrorCode);
b75a7d8f
A
314}
315
4388f060 316static inline UBool
b75a7d8f
A
317isAlias(const char *alias, UErrorCode *pErrorCode) {
318 if(alias==NULL) {
319 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
320 return FALSE;
b75a7d8f 321 }
73c04bcf 322 return (UBool)(*alias!=0);
b75a7d8f
A
323}
324
b75a7d8f 325static uint32_t getTagNumber(const char *tagname) {
73c04bcf 326 if (gMainTable.tagList) {
b75a7d8f 327 uint32_t tagNum;
73c04bcf
A
328 for (tagNum = 0; tagNum < gMainTable.tagListSize; tagNum++) {
329 if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) {
b75a7d8f
A
330 return tagNum;
331 }
332 }
333 }
334
335 return UINT32_MAX;
336}
337
73c04bcf
A
338/* character types relevant for ucnv_compareNames() */
339enum {
57a6839d 340 UIGNORE,
73c04bcf
A
341 ZERO,
342 NONZERO,
343 MINLETTER /* any values from here on are lowercase letter mappings */
344};
345
346/* character types for ASCII 00..7F */
347static const uint8_t asciiTypes[128] = {
348 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
349 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
350 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
351 ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0,
352 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
353 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0,
354 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
355 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0
356};
357
57a6839d 358#define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)UIGNORE)
73c04bcf
A
359
360/* character types for EBCDIC 80..FF */
361static const uint8_t ebcdicTypes[128] = {
362 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
363 0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
364 0, 0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
365 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
366 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
367 0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
368 0, 0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
369 ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0
370};
371
57a6839d 372#define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)UIGNORE)
73c04bcf
A
373
374#if U_CHARSET_FAMILY==U_ASCII_FAMILY
375# define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c)
376#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
377# define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c)
378#else
379# error U_CHARSET_FAMILY is not valid
380#endif
381
b75a7d8f
A
382/* @see ucnv_compareNames */
383U_CFUNC char * U_EXPORT2
374ca955 384ucnv_io_stripASCIIForCompare(char *dst, const char *name) {
b75a7d8f 385 char *dstItr = dst;
73c04bcf
A
386 uint8_t type, nextType;
387 char c1;
388 UBool afterDigit = FALSE;
389
390 while ((c1 = *name++) != 0) {
391 type = GET_ASCII_TYPE(c1);
392 switch (type) {
57a6839d 393 case UIGNORE:
73c04bcf
A
394 afterDigit = FALSE;
395 continue; /* ignore all but letters and digits */
396 case ZERO:
397 if (!afterDigit) {
398 nextType = GET_ASCII_TYPE(*name);
399 if (nextType == ZERO || nextType == NONZERO) {
400 continue; /* ignore leading zero before another digit */
401 }
402 }
403 break;
404 case NONZERO:
405 afterDigit = TRUE;
406 break;
407 default:
408 c1 = (char)type; /* lowercased letter */
409 afterDigit = FALSE;
410 break;
b75a7d8f 411 }
73c04bcf 412 *dstItr++ = c1;
374ca955 413 }
73c04bcf 414 *dstItr = 0;
374ca955
A
415 return dst;
416}
417
418U_CFUNC char * U_EXPORT2
419ucnv_io_stripEBCDICForCompare(char *dst, const char *name) {
374ca955 420 char *dstItr = dst;
73c04bcf
A
421 uint8_t type, nextType;
422 char c1;
423 UBool afterDigit = FALSE;
424
425 while ((c1 = *name++) != 0) {
426 type = GET_EBCDIC_TYPE(c1);
427 switch (type) {
57a6839d 428 case UIGNORE:
73c04bcf
A
429 afterDigit = FALSE;
430 continue; /* ignore all but letters and digits */
431 case ZERO:
432 if (!afterDigit) {
433 nextType = GET_EBCDIC_TYPE(*name);
434 if (nextType == ZERO || nextType == NONZERO) {
435 continue; /* ignore leading zero before another digit */
436 }
437 }
438 break;
439 case NONZERO:
440 afterDigit = TRUE;
441 break;
442 default:
443 c1 = (char)type; /* lowercased letter */
444 afterDigit = FALSE;
445 break;
374ca955 446 }
73c04bcf 447 *dstItr++ = c1;
b75a7d8f 448 }
73c04bcf 449 *dstItr = 0;
b75a7d8f
A
450 return dst;
451}
452
453/**
73c04bcf
A
454 * Do a fuzzy compare of two converter/alias names.
455 * The comparison is case-insensitive, ignores leading zeroes if they are not
456 * followed by further digits, and ignores all but letters and digits.
457 * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
458 * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
459 * at http://www.unicode.org/reports/tr22/
374ca955 460 *
b75a7d8f
A
461 * This is a symmetrical (commutative) operation; order of arguments
462 * is insignificant. This is an important property for sorting the
463 * list (when the list is preprocessed into binary form) and for
464 * performing binary searches on it at run time.
374ca955 465 *
b75a7d8f
A
466 * @param name1 a converter name or alias, zero-terminated
467 * @param name2 a converter name or alias, zero-terminated
468 * @return 0 if the names match, or a negative value if the name1
469 * lexically precedes name2, or a positive value if the name1
470 * lexically follows name2.
471 *
472 * @see ucnv_io_stripForCompare
473 */
474U_CAPI int U_EXPORT2
475ucnv_compareNames(const char *name1, const char *name2) {
476 int rc;
73c04bcf 477 uint8_t type, nextType;
b75a7d8f 478 char c1, c2;
73c04bcf 479 UBool afterDigit1 = FALSE, afterDigit2 = FALSE;
b75a7d8f
A
480
481 for (;;) {
73c04bcf
A
482 while ((c1 = *name1++) != 0) {
483 type = GET_CHAR_TYPE(c1);
484 switch (type) {
57a6839d 485 case UIGNORE:
73c04bcf
A
486 afterDigit1 = FALSE;
487 continue; /* ignore all but letters and digits */
488 case ZERO:
489 if (!afterDigit1) {
490 nextType = GET_CHAR_TYPE(*name1);
491 if (nextType == ZERO || nextType == NONZERO) {
492 continue; /* ignore leading zero before another digit */
493 }
494 }
495 break;
496 case NONZERO:
497 afterDigit1 = TRUE;
498 break;
499 default:
500 c1 = (char)type; /* lowercased letter */
501 afterDigit1 = FALSE;
502 break;
503 }
504 break; /* deliver c1 */
b75a7d8f 505 }
73c04bcf
A
506 while ((c2 = *name2++) != 0) {
507 type = GET_CHAR_TYPE(c2);
508 switch (type) {
57a6839d 509 case UIGNORE:
73c04bcf
A
510 afterDigit2 = FALSE;
511 continue; /* ignore all but letters and digits */
512 case ZERO:
513 if (!afterDigit2) {
514 nextType = GET_CHAR_TYPE(*name2);
515 if (nextType == ZERO || nextType == NONZERO) {
516 continue; /* ignore leading zero before another digit */
517 }
518 }
519 break;
520 case NONZERO:
521 afterDigit2 = TRUE;
522 break;
523 default:
524 c2 = (char)type; /* lowercased letter */
525 afterDigit2 = FALSE;
526 break;
527 }
528 break; /* deliver c2 */
b75a7d8f
A
529 }
530
531 /* If we reach the ends of both strings then they match */
532 if ((c1|c2)==0) {
533 return 0;
534 }
374ca955 535
b75a7d8f 536 /* Case-insensitive comparison */
73c04bcf 537 rc = (int)(unsigned char)c1 - (int)(unsigned char)c2;
b75a7d8f
A
538 if (rc != 0) {
539 return rc;
540 }
b75a7d8f
A
541 }
542}
543
544/*
545 * search for an alias
546 * return the converter number index for gConverterList
547 */
4388f060 548static inline uint32_t
73c04bcf 549findConverter(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
b75a7d8f 550 uint32_t mid, start, limit;
374ca955 551 uint32_t lastMid;
b75a7d8f 552 int result;
73c04bcf
A
553 int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED);
554 char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH];
555
556 if (!isUnnormalized) {
557 if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) {
558 *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
559 return UINT32_MAX;
560 }
561
562 /* Lower case and remove ignoreable characters. */
563 ucnv_io_stripForCompare(strippedName, alias);
564 alias = strippedName;
565 }
b75a7d8f
A
566
567 /* do a binary search for the alias */
568 start = 0;
73c04bcf 569 limit = gMainTable.untaggedConvArraySize;
b75a7d8f 570 mid = limit;
374ca955 571 lastMid = UINT32_MAX;
b75a7d8f
A
572
573 for (;;) {
574 mid = (uint32_t)((start + limit) / 2);
374ca955
A
575 if (lastMid == mid) { /* Have we moved? */
576 break; /* We haven't moved, and it wasn't found. */
577 }
578 lastMid = mid;
73c04bcf
A
579 if (isUnnormalized) {
580 result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid]));
581 }
582 else {
583 result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid]));
584 }
b75a7d8f
A
585
586 if (result < 0) {
587 limit = mid;
588 } else if (result > 0) {
589 start = mid;
590 } else {
591 /* Since the gencnval tool folds duplicates into one entry,
592 * this alias in gAliasList is unique, but different standards
593 * may map an alias to different converters.
594 */
73c04bcf 595 if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) {
b75a7d8f
A
596 *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING;
597 }
73c04bcf
A
598 /* State whether the canonical converter name contains an option.
599 This information is contained in this list in order to maintain backward & forward compatibility. */
600 if (containsOption) {
601 UBool containsCnvOptionInfo = (UBool)gMainTable.optionTable->containsCnvOptionInfo;
602 *containsOption = (UBool)((containsCnvOptionInfo
603 && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0))
604 || !containsCnvOptionInfo);
605 }
606 return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK;
b75a7d8f
A
607 }
608 }
609
610 return UINT32_MAX;
611}
612
613/*
614 * Is this alias in this list?
615 * alias and listOffset should be non-NULL.
616 */
4388f060 617static inline UBool
b75a7d8f
A
618isAliasInList(const char *alias, uint32_t listOffset) {
619 if (listOffset) {
620 uint32_t currAlias;
73c04bcf 621 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
b75a7d8f 622 /* +1 to skip listCount */
73c04bcf 623 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
b75a7d8f
A
624 for (currAlias = 0; currAlias < listCount; currAlias++) {
625 if (currList[currAlias]
626 && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0)
627 {
628 return TRUE;
629 }
630 }
631 }
632 return FALSE;
633}
634
635/*
636 * Search for an standard name of an alias (what is the default name
637 * that this standard uses?)
638 * return the listOffset for gTaggedAliasLists. If it's 0,
639 * the it couldn't be found, but the parameters are valid.
640 */
641static uint32_t
642findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) {
643 uint32_t idx;
644 uint32_t listOffset;
645 uint32_t convNum;
646 UErrorCode myErr = U_ZERO_ERROR;
647 uint32_t tagNum = getTagNumber(standard);
648
649 /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
73c04bcf 650 convNum = findConverter(alias, NULL, &myErr);
b75a7d8f
A
651 if (myErr != U_ZERO_ERROR) {
652 *pErrorCode = myErr;
653 }
654
73c04bcf
A
655 if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
656 listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
657 if (listOffset && gMainTable.taggedAliasLists[listOffset + 1]) {
b75a7d8f
A
658 return listOffset;
659 }
660 if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
661 /* Uh Oh! They used an ambiguous alias.
662 We have to search the whole swiss cheese starting
663 at the highest standard affinity.
664 This may take a while.
665 */
73c04bcf
A
666 for (idx = 0; idx < gMainTable.taggedAliasArraySize; idx++) {
667 listOffset = gMainTable.taggedAliasArray[idx];
b75a7d8f 668 if (listOffset && isAliasInList(alias, listOffset)) {
73c04bcf
A
669 uint32_t currTagNum = idx/gMainTable.converterListSize;
670 uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize);
671 uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum];
672 if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + 1]) {
b75a7d8f
A
673 return tempListOffset;
674 }
675 /* else keep on looking */
676 /* We could speed this up by starting on the next row
677 because an alias is unique per row, right now.
678 This would change if alias versioning appears. */
679 }
680 }
681 /* The standard doesn't know about the alias */
682 }
683 /* else no default name */
684 return 0;
685 }
686 /* else converter or tag not found */
687
688 return UINT32_MAX;
689}
690
691/* Return the canonical name */
692static uint32_t
693findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) {
694 uint32_t idx;
695 uint32_t listOffset;
696 uint32_t convNum;
697 UErrorCode myErr = U_ZERO_ERROR;
698 uint32_t tagNum = getTagNumber(standard);
699
700 /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
73c04bcf 701 convNum = findConverter(alias, NULL, &myErr);
b75a7d8f
A
702 if (myErr != U_ZERO_ERROR) {
703 *pErrorCode = myErr;
704 }
705
73c04bcf
A
706 if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
707 listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
b75a7d8f
A
708 if (listOffset && isAliasInList(alias, listOffset)) {
709 return convNum;
710 }
711 if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
712 /* Uh Oh! They used an ambiguous alias.
713 We have to search one slice of the swiss cheese.
714 We search only in the requested tag, not the whole thing.
715 This may take a while.
716 */
73c04bcf
A
717 uint32_t convStart = (tagNum)*gMainTable.converterListSize;
718 uint32_t convLimit = (tagNum+1)*gMainTable.converterListSize;
b75a7d8f 719 for (idx = convStart; idx < convLimit; idx++) {
73c04bcf 720 listOffset = gMainTable.taggedAliasArray[idx];
b75a7d8f
A
721 if (listOffset && isAliasInList(alias, listOffset)) {
722 return idx-convStart;
723 }
724 }
725 /* The standard doesn't know about the alias */
726 }
727 /* else no canonical name */
728 }
729 /* else converter or tag not found */
730
731 return UINT32_MAX;
732}
733
734
735
736U_CFUNC const char *
73c04bcf 737ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
4388f060
A
738 const char *aliasTmp = alias;
739 int32_t i = 0;
740 for (i = 0; i < 2; i++) {
741 if (i == 1) {
742 /*
743 * After the first unsuccess converter lookup, check to see if
744 * the name begins with 'x-'. If it does, strip it off and try
745 * again. This behaviour is similar to how ICU4J does it.
746 */
2ca993e8 747 if (aliasTmp[0] == 'x' && aliasTmp[1] == '-') {
4388f060
A
748 aliasTmp = aliasTmp+2;
749 } else {
750 break;
751 }
752 }
753 if(haveAliasData(pErrorCode) && isAlias(aliasTmp, pErrorCode)) {
754 uint32_t convNum = findConverter(aliasTmp, containsOption, pErrorCode);
755 if (convNum < gMainTable.converterListSize) {
756 return GET_STRING(gMainTable.converterList[convNum]);
757 }
758 /* else converter not found */
759 } else {
760 break;
b75a7d8f 761 }
b75a7d8f 762 }
4388f060 763
b75a7d8f
A
764 return NULL;
765}
766
767static int32_t U_CALLCONV
4388f060 768ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
b75a7d8f
A
769 int32_t value = 0;
770 UAliasContext *myContext = (UAliasContext *)(enumerator->context);
771 uint32_t listOffset = myContext->listOffset;
772
773 if (listOffset) {
73c04bcf 774 value = gMainTable.taggedAliasLists[listOffset];
b75a7d8f
A
775 }
776 return value;
777}
778
779static const char* U_CALLCONV
780ucnv_io_nextStandardAliases(UEnumeration *enumerator,
781 int32_t* resultLength,
4388f060 782 UErrorCode * /*pErrorCode*/)
b75a7d8f
A
783{
784 UAliasContext *myContext = (UAliasContext *)(enumerator->context);
785 uint32_t listOffset = myContext->listOffset;
786
787 if (listOffset) {
73c04bcf
A
788 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
789 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
b75a7d8f
A
790
791 if (myContext->listIdx < listCount) {
792 const char *myStr = GET_STRING(currList[myContext->listIdx++]);
793 if (resultLength) {
374ca955 794 *resultLength = (int32_t)uprv_strlen(myStr);
b75a7d8f
A
795 }
796 return myStr;
797 }
798 }
799 /* Either we accessed a zero length list, or we enumerated too far. */
73c04bcf
A
800 if (resultLength) {
801 *resultLength = 0;
802 }
b75a7d8f
A
803 return NULL;
804}
805
806static void U_CALLCONV
4388f060 807ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
b75a7d8f
A
808 ((UAliasContext *)(enumerator->context))->listIdx = 0;
809}
810
811static void U_CALLCONV
812ucnv_io_closeUEnumeration(UEnumeration *enumerator) {
813 uprv_free(enumerator->context);
814 uprv_free(enumerator);
815}
816
817/* Enumerate the aliases for the specified converter and standard tag */
818static const UEnumeration gEnumAliases = {
819 NULL,
820 NULL,
821 ucnv_io_closeUEnumeration,
822 ucnv_io_countStandardAliases,
823 uenum_unextDefault,
824 ucnv_io_nextStandardAliases,
825 ucnv_io_resetStandardAliases
826};
827
828U_CAPI UEnumeration * U_EXPORT2
829ucnv_openStandardNames(const char *convName,
830 const char *standard,
831 UErrorCode *pErrorCode)
832{
833 UEnumeration *myEnum = NULL;
834 if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) {
835 uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode);
836
837 /* When listOffset == 0, we want to acknowledge that the
838 converter name and standard are okay, but there
839 is nothing to enumerate. */
73c04bcf 840 if (listOffset < gMainTable.taggedAliasListsSize) {
b75a7d8f
A
841 UAliasContext *myContext;
842
51004dcb 843 myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
b75a7d8f
A
844 if (myEnum == NULL) {
845 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
846 return NULL;
847 }
848 uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration));
51004dcb 849 myContext = static_cast<UAliasContext *>(uprv_malloc(sizeof(UAliasContext)));
b75a7d8f
A
850 if (myContext == NULL) {
851 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
852 uprv_free(myEnum);
853 return NULL;
854 }
855 myContext->listOffset = listOffset;
856 myContext->listIdx = 0;
857 myEnum->context = myContext;
858 }
859 /* else converter or tag not found */
860 }
861 return myEnum;
862}
863
73c04bcf 864static uint16_t
b75a7d8f
A
865ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) {
866 if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
73c04bcf
A
867 uint32_t convNum = findConverter(alias, NULL, pErrorCode);
868 if (convNum < gMainTable.converterListSize) {
b75a7d8f 869 /* tagListNum - 1 is the ALL tag */
73c04bcf 870 int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
b75a7d8f
A
871
872 if (listOffset) {
73c04bcf 873 return gMainTable.taggedAliasLists[listOffset];
b75a7d8f
A
874 }
875 /* else this shouldn't happen. internal program error */
876 }
877 /* else converter not found */
878 }
879 return 0;
880}
881
73c04bcf 882static uint16_t
b75a7d8f
A
883ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) {
884 if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
885 uint32_t currAlias;
73c04bcf
A
886 uint32_t convNum = findConverter(alias, NULL, pErrorCode);
887 if (convNum < gMainTable.converterListSize) {
b75a7d8f 888 /* tagListNum - 1 is the ALL tag */
73c04bcf 889 int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
b75a7d8f
A
890
891 if (listOffset) {
73c04bcf 892 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
b75a7d8f 893 /* +1 to skip listCount */
73c04bcf 894 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
b75a7d8f
A
895
896 for (currAlias = start; currAlias < listCount; currAlias++) {
897 aliases[currAlias] = GET_STRING(currList[currAlias]);
898 }
899 }
900 /* else this shouldn't happen. internal program error */
901 }
902 /* else converter not found */
903 }
904 return 0;
905}
906
73c04bcf 907static const char *
b75a7d8f
A
908ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
909 if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
73c04bcf
A
910 uint32_t convNum = findConverter(alias, NULL, pErrorCode);
911 if (convNum < gMainTable.converterListSize) {
b75a7d8f 912 /* tagListNum - 1 is the ALL tag */
73c04bcf 913 int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
b75a7d8f
A
914
915 if (listOffset) {
73c04bcf 916 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
b75a7d8f 917 /* +1 to skip listCount */
73c04bcf 918 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
b75a7d8f
A
919
920 if (n < listCount) {
921 return GET_STRING(currList[n]);
922 }
923 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
924 }
925 /* else this shouldn't happen. internal program error */
926 }
927 /* else converter not found */
928 }
929 return NULL;
930}
931
73c04bcf 932static uint16_t
b75a7d8f
A
933ucnv_io_countStandards(UErrorCode *pErrorCode) {
934 if (haveAliasData(pErrorCode)) {
935 /* Don't include the empty list */
73c04bcf 936 return (uint16_t)(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS);
b75a7d8f
A
937 }
938
939 return 0;
940}
941
942U_CAPI const char * U_EXPORT2
943ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
944 if (haveAliasData(pErrorCode)) {
73c04bcf
A
945 if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) {
946 return GET_STRING(gMainTable.tagList[n]);
b75a7d8f
A
947 }
948 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
949 }
950
951 return NULL;
952}
953
954U_CAPI const char * U_EXPORT2
955ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
956 if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
957 uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode);
958
73c04bcf
A
959 if (0 < listOffset && listOffset < gMainTable.taggedAliasListsSize) {
960 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
b75a7d8f
A
961
962 /* Get the preferred name from this list */
963 if (currList[0]) {
964 return GET_STRING(currList[0]);
965 }
966 /* else someone screwed up the alias table. */
967 /* *pErrorCode = U_INVALID_FORMAT_ERROR */
968 }
969 }
970
971 return NULL;
972}
973
73c04bcf
A
974U_CAPI uint16_t U_EXPORT2
975ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
976{
977 return ucnv_io_countAliases(alias, pErrorCode);
978}
b75a7d8f 979
b75a7d8f 980
73c04bcf
A
981U_CAPI const char* U_EXPORT2
982ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
983{
984 return ucnv_io_getAlias(alias, n, pErrorCode);
b75a7d8f
A
985}
986
73c04bcf
A
987U_CAPI void U_EXPORT2
988ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
989{
990 ucnv_io_getAliases(alias, 0, aliases, pErrorCode);
b75a7d8f
A
991}
992
73c04bcf
A
993U_CAPI uint16_t U_EXPORT2
994ucnv_countStandards(void)
995{
996 UErrorCode err = U_ZERO_ERROR;
997 return ucnv_io_countStandards(&err);
b75a7d8f
A
998}
999
73c04bcf
A
1000U_CAPI const char * U_EXPORT2
1001ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
1002 if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
1003 uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode);
b75a7d8f 1004
73c04bcf
A
1005 if (convNum < gMainTable.converterListSize) {
1006 return GET_STRING(gMainTable.converterList[convNum]);
b75a7d8f 1007 }
b75a7d8f 1008 }
73c04bcf 1009
b75a7d8f
A
1010 return NULL;
1011}
1012
1013static int32_t U_CALLCONV
4388f060 1014ucnv_io_countAllConverters(UEnumeration * /*enumerator*/, UErrorCode * /*pErrorCode*/) {
73c04bcf 1015 return gMainTable.converterListSize;
b75a7d8f
A
1016}
1017
1018static const char* U_CALLCONV
1019ucnv_io_nextAllConverters(UEnumeration *enumerator,
1020 int32_t* resultLength,
4388f060 1021 UErrorCode * /*pErrorCode*/)
b75a7d8f
A
1022{
1023 uint16_t *myContext = (uint16_t *)(enumerator->context);
1024
73c04bcf
A
1025 if (*myContext < gMainTable.converterListSize) {
1026 const char *myStr = GET_STRING(gMainTable.converterList[(*myContext)++]);
b75a7d8f 1027 if (resultLength) {
374ca955 1028 *resultLength = (int32_t)uprv_strlen(myStr);
b75a7d8f
A
1029 }
1030 return myStr;
1031 }
1032 /* Either we accessed a zero length list, or we enumerated too far. */
73c04bcf
A
1033 if (resultLength) {
1034 *resultLength = 0;
1035 }
b75a7d8f
A
1036 return NULL;
1037}
1038
1039static void U_CALLCONV
4388f060 1040ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
b75a7d8f
A
1041 *((uint16_t *)(enumerator->context)) = 0;
1042}
1043
1044static const UEnumeration gEnumAllConverters = {
1045 NULL,
1046 NULL,
1047 ucnv_io_closeUEnumeration,
1048 ucnv_io_countAllConverters,
1049 uenum_unextDefault,
1050 ucnv_io_nextAllConverters,
1051 ucnv_io_resetAllConverters
1052};
1053
1054U_CAPI UEnumeration * U_EXPORT2
1055ucnv_openAllNames(UErrorCode *pErrorCode) {
1056 UEnumeration *myEnum = NULL;
1057 if (haveAliasData(pErrorCode)) {
1058 uint16_t *myContext;
1059
51004dcb 1060 myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
b75a7d8f
A
1061 if (myEnum == NULL) {
1062 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1063 return NULL;
1064 }
1065 uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration));
51004dcb 1066 myContext = static_cast<uint16_t *>(uprv_malloc(sizeof(uint16_t)));
b75a7d8f
A
1067 if (myContext == NULL) {
1068 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1069 uprv_free(myEnum);
1070 return NULL;
1071 }
1072 *myContext = 0;
1073 myEnum->context = myContext;
1074 }
1075 return myEnum;
1076}
1077
1078U_CFUNC uint16_t
46f4442e 1079ucnv_io_countKnownConverters(UErrorCode *pErrorCode) {
b75a7d8f 1080 if (haveAliasData(pErrorCode)) {
46f4442e 1081 return (uint16_t)gMainTable.converterListSize;
b75a7d8f
A
1082 }
1083 return 0;
1084}
1085
374ca955
A
1086/* alias table swapping ----------------------------------------------------- */
1087
1088typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name);
1089
1090/*
1091 * row of a temporary array
1092 *
1093 * gets platform-endian charset string indexes and sorting indexes;
1094 * after sorting this array by strings, the actual arrays are permutated
1095 * according to the sorting indexes
1096 */
1097typedef struct TempRow {
1098 uint16_t strIndex, sortIndex;
1099} TempRow;
1100
1101typedef struct TempAliasTable {
1102 const char *chars;
1103 TempRow *rows;
1104 uint16_t *resort;
1105 StripForCompareFn *stripForCompare;
1106} TempAliasTable;
1107
1108enum {
1109 STACK_ROW_CAPACITY=500
1110};
1111
1112static int32_t
1113io_compareRows(const void *context, const void *left, const void *right) {
1114 char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH],
1115 strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH];
1116
1117 TempAliasTable *tempTable=(TempAliasTable *)context;
1118 const char *chars=tempTable->chars;
1119
1120 return (int32_t)uprv_strcmp(tempTable->stripForCompare(strippedLeft, chars+2*((const TempRow *)left)->strIndex),
1121 tempTable->stripForCompare(strippedRight, chars+2*((const TempRow *)right)->strIndex));
1122}
1123
1124U_CAPI int32_t U_EXPORT2
1125ucnv_swapAliases(const UDataSwapper *ds,
1126 const void *inData, int32_t length, void *outData,
1127 UErrorCode *pErrorCode) {
1128 const UDataInfo *pInfo;
1129 int32_t headerSize;
1130
1131 const uint16_t *inTable;
46f4442e 1132 const uint32_t *inSectionSizes;
374ca955
A
1133 uint32_t toc[offsetsCount];
1134 uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */
1135 uint32_t i, count, tocLength, topOffset;
1136
1137 TempRow rows[STACK_ROW_CAPACITY];
1138 uint16_t resort[STACK_ROW_CAPACITY];
1139 TempAliasTable tempTable;
1140
1141 /* udata_swapDataHeader checks the arguments */
1142 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
1143 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1144 return 0;
1145 }
1146
1147 /* check data format and format version */
1148 pInfo=(const UDataInfo *)((const char *)inData+4);
1149 if(!(
1150 pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */
1151 pInfo->dataFormat[1]==0x76 &&
1152 pInfo->dataFormat[2]==0x41 &&
1153 pInfo->dataFormat[3]==0x6c &&
1154 pInfo->formatVersion[0]==3
1155 )) {
1156 udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n",
1157 pInfo->dataFormat[0], pInfo->dataFormat[1],
1158 pInfo->dataFormat[2], pInfo->dataFormat[3],
1159 pInfo->formatVersion[0]);
1160 *pErrorCode=U_UNSUPPORTED_ERROR;
1161 return 0;
1162 }
1163
1164 /* an alias table must contain at least the table of contents array */
1165 if(length>=0 && (length-headerSize)<4*(1+minTocLength)) {
1166 udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1167 length-headerSize);
1168 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1169 return 0;
1170 }
1171
46f4442e
A
1172 inSectionSizes=(const uint32_t *)((const char *)inData+headerSize);
1173 inTable=(const uint16_t *)inSectionSizes;
73c04bcf 1174 uprv_memset(toc, 0, sizeof(toc));
46f4442e 1175 toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]);
73c04bcf
A
1176 if(tocLength<minTocLength || offsetsCount<=tocLength) {
1177 udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength);
374ca955
A
1178 *pErrorCode=U_INVALID_FORMAT_ERROR;
1179 return 0;
1180 }
1181
1182 /* read the known part of the table of contents */
73c04bcf 1183 for(i=converterListIndex; i<=tocLength; ++i) {
46f4442e 1184 toc[i]=ds->readUInt32(inSectionSizes[i]);
374ca955
A
1185 }
1186
1187 /* compute offsets */
73c04bcf 1188 uprv_memset(offsets, 0, sizeof(offsets));
374ca955 1189 offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */
73c04bcf 1190 for(i=tagListIndex; i<=tocLength; ++i) {
374ca955
A
1191 offsets[i]=offsets[i-1]+toc[i-1];
1192 }
1193
1194 /* compute the overall size of the after-header data, in numbers of 16-bit units */
1195 topOffset=offsets[i-1]+toc[i-1];
1196
1197 if(length>=0) {
1198 uint16_t *outTable;
1199 const uint16_t *p, *p2;
1200 uint16_t *q, *q2;
1201 uint16_t oldIndex;
1202
1203 if((length-headerSize)<(2*(int32_t)topOffset)) {
1204 udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1205 length-headerSize);
1206 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1207 return 0;
1208 }
1209
1210 outTable=(uint16_t *)((char *)outData+headerSize);
1211
1212 /* swap the entire table of contents */
1213 ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode);
1214
73c04bcf
A
1215 /* swap unormalized strings & normalized strings */
1216 ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]),
374ca955
A
1217 outTable+offsets[stringTableIndex], pErrorCode);
1218 if(U_FAILURE(*pErrorCode)) {
73c04bcf 1219 udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n");
374ca955
A
1220 return 0;
1221 }
1222
1223 if(ds->inCharset==ds->outCharset) {
1224 /* no need to sort, just swap all 16-bit values together */
1225 ds->swapArray16(ds,
1226 inTable+offsets[converterListIndex],
1227 2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]),
1228 outTable+offsets[converterListIndex],
1229 pErrorCode);
1230 } else {
1231 /* allocate the temporary table for sorting */
1232 count=toc[aliasListIndex];
1233
1234 tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */
1235
1236 if(count<=STACK_ROW_CAPACITY) {
1237 tempTable.rows=rows;
1238 tempTable.resort=resort;
1239 } else {
1240 tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2);
1241 if(tempTable.rows==NULL) {
1242 udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n",
1243 count);
1244 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1245 return 0;
1246 }
1247 tempTable.resort=(uint16_t *)(tempTable.rows+count);
1248 }
1249
1250 if(ds->outCharset==U_ASCII_FAMILY) {
1251 tempTable.stripForCompare=ucnv_io_stripASCIIForCompare;
1252 } else /* U_EBCDIC_FAMILY */ {
1253 tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare;
1254 }
1255
1256 /*
1257 * Sort unique aliases+mapped names.
1258 *
1259 * We need to sort the list again by outCharset strings because they
1260 * sort differently for different charset families.
1261 * First we set up a temporary table with the string indexes and
1262 * sorting indexes and sort that.
1263 * Then we permutate and copy/swap the actual values.
1264 */
1265 p=inTable+offsets[aliasListIndex];
1266 q=outTable+offsets[aliasListIndex];
1267
1268 p2=inTable+offsets[untaggedConvArrayIndex];
1269 q2=outTable+offsets[untaggedConvArrayIndex];
1270
1271 for(i=0; i<count; ++i) {
1272 tempTable.rows[i].strIndex=ds->readUInt16(p[i]);
1273 tempTable.rows[i].sortIndex=(uint16_t)i;
1274 }
1275
1276 uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow),
1277 io_compareRows, &tempTable,
1278 FALSE, pErrorCode);
1279
1280 if(U_SUCCESS(*pErrorCode)) {
1281 /* copy/swap/permutate items */
1282 if(p!=q) {
1283 for(i=0; i<count; ++i) {
1284 oldIndex=tempTable.rows[i].sortIndex;
1285 ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode);
1286 ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode);
1287 }
1288 } else {
1289 /*
1290 * If we swap in-place, then the permutation must use another
1291 * temporary array (tempTable.resort)
1292 * before the results are copied to the outBundle.
1293 */
1294 uint16_t *r=tempTable.resort;
1295
1296 for(i=0; i<count; ++i) {
1297 oldIndex=tempTable.rows[i].sortIndex;
1298 ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode);
1299 }
a62d09fc 1300 uprv_memcpy(q, r, 2*(size_t)count);
374ca955
A
1301
1302 for(i=0; i<count; ++i) {
1303 oldIndex=tempTable.rows[i].sortIndex;
1304 ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode);
1305 }
a62d09fc 1306 uprv_memcpy(q2, r, 2*(size_t)count);
374ca955
A
1307 }
1308 }
1309
1310 if(tempTable.rows!=rows) {
1311 uprv_free(tempTable.rows);
1312 }
1313
1314 if(U_FAILURE(*pErrorCode)) {
73c04bcf
A
1315 udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n",
1316 count);
374ca955
A
1317 return 0;
1318 }
1319
1320 /* swap remaining 16-bit values */
1321 ds->swapArray16(ds,
1322 inTable+offsets[converterListIndex],
1323 2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]),
1324 outTable+offsets[converterListIndex],
1325 pErrorCode);
1326 ds->swapArray16(ds,
1327 inTable+offsets[taggedAliasArrayIndex],
1328 2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]),
1329 outTable+offsets[taggedAliasArrayIndex],
1330 pErrorCode);
1331 }
1332 }
1333
1334 return headerSize+2*(int32_t)topOffset;
1335}
1336
1337#endif
1338
57a6839d 1339
b75a7d8f
A
1340/*
1341 * Hey, Emacs, please set the following:
1342 *
1343 * Local Variables:
1344 * indent-tabs-mode: nil
1345 * End:
1346 *
1347 */