]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ucnv_io.c
ICU-8.11.2.tar.gz
[apple/icu.git] / icuSources / common / ucnv_io.c
CommitLineData
b75a7d8f
A
1/*
2******************************************************************************
3*
73c04bcf 4* Copyright (C) 1999-2006, International Business Machines
b75a7d8f
A
5* Corporation and others. All Rights Reserved.
6*
7******************************************************************************
8*
9*
10* ucnv_io.c:
73c04bcf
A
11* initializes global variables and defines functions pertaining to converter
12* name resolution aspect of the conversion code.
b75a7d8f
A
13*
14* new implementation:
15*
16* created on: 1999nov22
17* created by: Markus W. Scherer
18*
19* Use the binary cnvalias.icu (created from convrtrs.txt) to work
20* with aliases for converter names.
21*
22* Date Name Description
23* 11/22/1999 markus Created
24* 06/28/2002 grhoten Major overhaul of the converter alias design.
25* Now an alias can map to different converters
26* depending on the specified standard.
27*******************************************************************************
28*/
29
30#include "unicode/utypes.h"
374ca955
A
31
32#if !UCONFIG_NO_CONVERSION
33
73c04bcf 34#include "unicode/ucnv.h"
b75a7d8f
A
35#include "unicode/udata.h"
36
37#include "umutex.h"
374ca955
A
38#include "uarrsort.h"
39#include "udataswp.h"
b75a7d8f
A
40#include "cstring.h"
41#include "cmemory.h"
42#include "ucnv_io.h"
43#include "uenumimp.h"
44#include "ucln_cmn.h"
45
46/* Format of cnvalias.icu -----------------------------------------------------
47 *
48 * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt.
49 * This binary form contains several tables. All indexes are to uint16_t
50 * units, and not to the bytes (uint8_t units). Addressing everything on
51 * 16-bit boundaries allows us to store more information with small index
52 * numbers, which are also 16-bit in size. The majority of the table (except
53 * the string table) are 16-bit numbers.
54 *
55 * First there is the size of the Table of Contents (TOC). The TOC
56 * entries contain the size of each section. In order to find the offset
57 * you just need to sum up the previous offsets.
374ca955
A
58 * The TOC length and entries are an array of uint32_t values.
59 * The first section after the TOC starts immediately after the TOC.
b75a7d8f
A
60 *
61 * 1) This section contains a list of converters. This list contains indexes
62 * into the string table for the converter name. The index of this list is
63 * also used by other sections, which are mentioned later on.
374ca955 64 * This list is not sorted.
b75a7d8f
A
65 *
66 * 2) This section contains a list of tags. This list contains indexes
67 * into the string table for the tag name. The index of this list is
68 * also used by other sections, which are mentioned later on.
374ca955 69 * This list is in priority order of standards.
b75a7d8f
A
70 *
71 * 3) This section contains a list of sorted unique aliases. This
72 * list contains indexes into the string table for the alias name. The
73 * index of this list is also used by other sections, like the 4th section.
74 * The index for the 3rd and 4th section is used to get the
75 * alias -> converter name mapping. Section 3 and 4 form a two column table.
73c04bcf
A
76 * Some of the most significant bits of each index may contain other
77 * information (see findConverter for details).
b75a7d8f
A
78 *
79 * 4) This section contains a list of mapped converter names. Consider this
80 * as a table that maps the 3rd section to the 1st section. This list contains
81 * indexes into the 1st section. The index of this list is the same index in
82 * the 3rd section. There is also some extra information in the high bits of
83 * each converter index in this table. Currently it's only used to say that
84 * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
85 * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
86 * the predigested form of the 5th section so that an alias lookup can be fast.
374ca955 87 *
b75a7d8f
A
88 * 5) This section contains a 2D array with indexes to the 6th section. This
89 * section is the full form of all alias mappings. The column index is the
90 * index into the converter list (column header). The row index is the index
91 * to tag list (row header). This 2D array is the top part a 3D array. The
92 * third dimension is in the 6th section.
93 *
94 * 6) This is blob of variable length arrays. Each array starts with a size,
95 * and is followed by indexes to alias names in the string table. This is
96 * the third dimension to the section 5. No other section should be referencing
97 * this section.
98 *
73c04bcf
A
99 * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its
100 * presence indicates that a section 9 exists. UConverterAliasOptions specifies
101 * what type of string normalization is used among other potential things in the
102 * future.
b75a7d8f
A
103 *
104 * 8) This is the string table. All strings are indexed on an even address.
105 * There are two reasons for this. First many chip architectures locate strings
106 * faster on even address boundaries. Second, since all indexes are 16-bit
107 * numbers, this string table can be 128KB in size instead of 64KB when we
108 * only have strings starting on an even address.
109 *
73c04bcf
A
110 * 9) When present this is a set of prenormalized strings from section 8. This
111 * table contains normalized strings with the dashes and spaces stripped out,
112 * and all strings lowercased. In the future, the options in section 7 may state
113 * other types of normalization.
b75a7d8f
A
114 *
115 * Here is the concept of section 5 and 6. It's a 3D cube. Each tag
116 * has a unique alias among all converters. That same alias can
117 * be mentioned in other standards on different converters,
118 * but only one alias per tag can be unique.
119 *
120 *
121 * Converter Names (Usually in TR22 form)
122 * -------------------------------------------.
123 * T / /|
124 * a / / |
125 * g / / |
126 * s / / |
127 * / / |
128 * ------------------------------------------/ |
129 * A | | |
130 * l | | |
131 * i | | /
132 * a | | /
133 * s | | /
134 * e | | /
135 * s | |/
136 * -------------------------------------------
137 *
138 *
139 *
140 * Here is what it really looks like. It's like swiss cheese.
141 * There are holes. Some converters aren't recognized by
142 * a standard, or they are really old converters that the
143 * standard doesn't recognize anymore.
144 *
145 * Converter Names (Usually in TR22 form)
146 * -------------------------------------------.
147 * T /##########################################/|
148 * a / # # /#
374ca955
A
149 * g / # ## ## ### # ### ### ### #/
150 * s / # ##### #### ## ## #/#
151 * / ### # # ## # # # ### # # #/##
b75a7d8f
A
152 * ------------------------------------------/# #
153 * A |### # # ## # # # ### # # #|# #
154 * l |# # # # # ## # #|# #
155 * i |# # # # # # #|#
156 * a |# #|#
157 * s | #|#
374ca955
A
158 * e
159 * s
160 *
b75a7d8f
A
161 */
162
163/**
164 * Used by the UEnumeration API
165 */
166typedef struct UAliasContext {
167 uint32_t listOffset;
168 uint32_t listIdx;
169} UAliasContext;
170
171static const char DATA_NAME[] = "cnvalias";
172static const char DATA_TYPE[] = "icu";
173
174static UDataMemory *gAliasData=NULL;
175
374ca955
A
176enum {
177 tocLengthIndex=0,
178 converterListIndex=1,
179 tagListIndex=2,
180 aliasListIndex=3,
181 untaggedConvArrayIndex=4,
182 taggedAliasArrayIndex=5,
183 taggedAliasListsIndex=6,
73c04bcf 184 tableOptionsIndex=7,
374ca955 185 stringTableIndex=8,
73c04bcf
A
186 normalizedStringTableIndex=9,
187 offsetsCount, /* length of the swapper's temporary offsets[] */
188 minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */
374ca955
A
189};
190
73c04bcf
A
191static const UConverterAliasOptions defaultTableOptions = {
192 UCNV_IO_UNNORMALIZED,
193 0 /* containsCnvOptionInfo */
194};
195static UConverterAlias gMainTable;
b75a7d8f 196
73c04bcf
A
197#define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx))
198#define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx))
b75a7d8f
A
199
200static UBool U_CALLCONV
201isAcceptable(void *context,
202 const char *type, const char *name,
203 const UDataInfo *pInfo) {
204 return (UBool)(
205 pInfo->size>=20 &&
206 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
207 pInfo->charsetFamily==U_CHARSET_FAMILY &&
208 pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */
209 pInfo->dataFormat[1]==0x76 &&
210 pInfo->dataFormat[2]==0x41 &&
211 pInfo->dataFormat[3]==0x6c &&
212 pInfo->formatVersion[0]==3);
213}
214
374ca955
A
215static UBool U_CALLCONV ucnv_io_cleanup(void)
216{
217 if (gAliasData) {
218 udata_close(gAliasData);
219 gAliasData = NULL;
220 }
221
73c04bcf 222 uprv_memset(&gMainTable, 0, sizeof(gMainTable));
374ca955
A
223
224 return TRUE; /* Everything was cleaned up */
225}
226
b75a7d8f
A
227static UBool
228haveAliasData(UErrorCode *pErrorCode) {
73c04bcf 229 int needInit;
b75a7d8f
A
230
231 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
232 return FALSE;
233 }
234
73c04bcf 235 UMTX_CHECK(NULL, (gAliasData==NULL), needInit);
b75a7d8f
A
236
237 /* load converter alias data from file if necessary */
73c04bcf 238 if (needInit) {
b75a7d8f
A
239 UDataMemory *data = NULL;
240 const uint16_t *table = NULL;
241 uint32_t tableStart;
242 uint32_t currOffset;
b75a7d8f
A
243
244 data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
245 if(U_FAILURE(*pErrorCode)) {
246 return FALSE;
247 }
248
249 table = (const uint16_t *)udata_getMemory(data);
250
251 tableStart = ((const uint32_t *)(table))[0];
374ca955 252 if (tableStart < minTocLength) {
b75a7d8f
A
253 *pErrorCode = U_INVALID_FORMAT_ERROR;
254 udata_close(data);
255 return FALSE;
256 }
257
258 umtx_lock(NULL);
259 if(gAliasData==NULL) {
260 gAliasData = data;
261 data=NULL;
262
73c04bcf
A
263 gMainTable.converterListSize = ((const uint32_t *)(table))[1];
264 gMainTable.tagListSize = ((const uint32_t *)(table))[2];
265 gMainTable.aliasListSize = ((const uint32_t *)(table))[3];
266 gMainTable.untaggedConvArraySize = ((const uint32_t *)(table))[4];
267 gMainTable.taggedAliasArraySize = ((const uint32_t *)(table))[5];
268 gMainTable.taggedAliasListsSize = ((const uint32_t *)(table))[6];
269 gMainTable.optionTableSize = ((const uint32_t *)(table))[7];
270 gMainTable.stringTableSize = ((const uint32_t *)(table))[8];
271
272 if (((const uint32_t *)(table))[0] > 8) {
273 gMainTable.normalizedStringTableSize = ((const uint32_t *)(table))[9];
274 }
b75a7d8f
A
275
276 currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t));
73c04bcf 277 gMainTable.converterList = table + currOffset;
b75a7d8f 278
73c04bcf
A
279 currOffset += gMainTable.converterListSize;
280 gMainTable.tagList = table + currOffset;
b75a7d8f 281
73c04bcf
A
282 currOffset += gMainTable.tagListSize;
283 gMainTable.aliasList = table + currOffset;
b75a7d8f 284
73c04bcf
A
285 currOffset += gMainTable.aliasListSize;
286 gMainTable.untaggedConvArray = table + currOffset;
b75a7d8f 287
73c04bcf
A
288 currOffset += gMainTable.untaggedConvArraySize;
289 gMainTable.taggedAliasArray = table + currOffset;
b75a7d8f
A
290
291 /* aliasLists is a 1's based array, but it has a padding character */
73c04bcf
A
292 currOffset += gMainTable.taggedAliasArraySize;
293 gMainTable.taggedAliasLists = table + currOffset;
b75a7d8f 294
73c04bcf
A
295 currOffset += gMainTable.taggedAliasListsSize;
296 if (gMainTable.optionTableSize > 0
297 && ((const UConverterAliasOptions *)(table + currOffset))->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT)
298 {
299 /* Faster table */
300 gMainTable.optionTable = (const UConverterAliasOptions *)(table + currOffset);
301 }
302 else {
303 /* Smaller table, or I can't handle this normalization mode!
304 Use the original slower table lookup. */
305 gMainTable.optionTable = &defaultTableOptions;
306 }
b75a7d8f 307
73c04bcf
A
308 currOffset += gMainTable.optionTableSize;
309 gMainTable.stringTable = table + currOffset;
310
311 currOffset += gMainTable.stringTableSize;
312 gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED)
313 ? gMainTable.stringTable : (table + currOffset));
b75a7d8f 314
374ca955 315 ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup);
b75a7d8f
A
316 }
317 umtx_unlock(NULL);
318
319 /* if a different thread set it first, then close the extra data */
320 if(data!=NULL) {
321 udata_close(data); /* NULL if it was set correctly */
322 }
323 }
324
325 return TRUE;
326}
327
328static U_INLINE UBool
329isAlias(const char *alias, UErrorCode *pErrorCode) {
330 if(alias==NULL) {
331 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
332 return FALSE;
b75a7d8f 333 }
73c04bcf 334 return (UBool)(*alias!=0);
b75a7d8f
A
335}
336
b75a7d8f 337static uint32_t getTagNumber(const char *tagname) {
73c04bcf 338 if (gMainTable.tagList) {
b75a7d8f 339 uint32_t tagNum;
73c04bcf
A
340 for (tagNum = 0; tagNum < gMainTable.tagListSize; tagNum++) {
341 if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) {
b75a7d8f
A
342 return tagNum;
343 }
344 }
345 }
346
347 return UINT32_MAX;
348}
349
73c04bcf
A
350/* character types relevant for ucnv_compareNames() */
351enum {
352 IGNORE,
353 ZERO,
354 NONZERO,
355 MINLETTER /* any values from here on are lowercase letter mappings */
356};
357
358/* character types for ASCII 00..7F */
359static const uint8_t asciiTypes[128] = {
360 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
361 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
362 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
363 ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0,
364 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
365 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0,
366 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
367 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0
368};
369
370#define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)IGNORE)
371
372/* character types for EBCDIC 80..FF */
373static const uint8_t ebcdicTypes[128] = {
374 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
375 0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
376 0, 0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
377 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
378 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
379 0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
380 0, 0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
381 ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0
382};
383
384#define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)IGNORE)
385
386#if U_CHARSET_FAMILY==U_ASCII_FAMILY
387# define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c)
388#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
389# define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c)
390#else
391# error U_CHARSET_FAMILY is not valid
392#endif
393
b75a7d8f
A
394/* @see ucnv_compareNames */
395U_CFUNC char * U_EXPORT2
374ca955 396ucnv_io_stripASCIIForCompare(char *dst, const char *name) {
b75a7d8f 397 char *dstItr = dst;
73c04bcf
A
398 uint8_t type, nextType;
399 char c1;
400 UBool afterDigit = FALSE;
401
402 while ((c1 = *name++) != 0) {
403 type = GET_ASCII_TYPE(c1);
404 switch (type) {
405 case IGNORE:
406 afterDigit = FALSE;
407 continue; /* ignore all but letters and digits */
408 case ZERO:
409 if (!afterDigit) {
410 nextType = GET_ASCII_TYPE(*name);
411 if (nextType == ZERO || nextType == NONZERO) {
412 continue; /* ignore leading zero before another digit */
413 }
414 }
415 break;
416 case NONZERO:
417 afterDigit = TRUE;
418 break;
419 default:
420 c1 = (char)type; /* lowercased letter */
421 afterDigit = FALSE;
422 break;
b75a7d8f 423 }
73c04bcf 424 *dstItr++ = c1;
374ca955 425 }
73c04bcf 426 *dstItr = 0;
374ca955
A
427 return dst;
428}
429
430U_CFUNC char * U_EXPORT2
431ucnv_io_stripEBCDICForCompare(char *dst, const char *name) {
374ca955 432 char *dstItr = dst;
73c04bcf
A
433 uint8_t type, nextType;
434 char c1;
435 UBool afterDigit = FALSE;
436
437 while ((c1 = *name++) != 0) {
438 type = GET_EBCDIC_TYPE(c1);
439 switch (type) {
440 case IGNORE:
441 afterDigit = FALSE;
442 continue; /* ignore all but letters and digits */
443 case ZERO:
444 if (!afterDigit) {
445 nextType = GET_EBCDIC_TYPE(*name);
446 if (nextType == ZERO || nextType == NONZERO) {
447 continue; /* ignore leading zero before another digit */
448 }
449 }
450 break;
451 case NONZERO:
452 afterDigit = TRUE;
453 break;
454 default:
455 c1 = (char)type; /* lowercased letter */
456 afterDigit = FALSE;
457 break;
374ca955 458 }
73c04bcf 459 *dstItr++ = c1;
b75a7d8f 460 }
73c04bcf 461 *dstItr = 0;
b75a7d8f
A
462 return dst;
463}
464
465/**
73c04bcf
A
466 * Do a fuzzy compare of two converter/alias names.
467 * The comparison is case-insensitive, ignores leading zeroes if they are not
468 * followed by further digits, and ignores all but letters and digits.
469 * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
470 * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
471 * at http://www.unicode.org/reports/tr22/
374ca955 472 *
b75a7d8f
A
473 * This is a symmetrical (commutative) operation; order of arguments
474 * is insignificant. This is an important property for sorting the
475 * list (when the list is preprocessed into binary form) and for
476 * performing binary searches on it at run time.
374ca955 477 *
b75a7d8f
A
478 * @param name1 a converter name or alias, zero-terminated
479 * @param name2 a converter name or alias, zero-terminated
480 * @return 0 if the names match, or a negative value if the name1
481 * lexically precedes name2, or a positive value if the name1
482 * lexically follows name2.
483 *
484 * @see ucnv_io_stripForCompare
485 */
486U_CAPI int U_EXPORT2
487ucnv_compareNames(const char *name1, const char *name2) {
488 int rc;
73c04bcf 489 uint8_t type, nextType;
b75a7d8f 490 char c1, c2;
73c04bcf 491 UBool afterDigit1 = FALSE, afterDigit2 = FALSE;
b75a7d8f
A
492
493 for (;;) {
73c04bcf
A
494 while ((c1 = *name1++) != 0) {
495 type = GET_CHAR_TYPE(c1);
496 switch (type) {
497 case IGNORE:
498 afterDigit1 = FALSE;
499 continue; /* ignore all but letters and digits */
500 case ZERO:
501 if (!afterDigit1) {
502 nextType = GET_CHAR_TYPE(*name1);
503 if (nextType == ZERO || nextType == NONZERO) {
504 continue; /* ignore leading zero before another digit */
505 }
506 }
507 break;
508 case NONZERO:
509 afterDigit1 = TRUE;
510 break;
511 default:
512 c1 = (char)type; /* lowercased letter */
513 afterDigit1 = FALSE;
514 break;
515 }
516 break; /* deliver c1 */
b75a7d8f 517 }
73c04bcf
A
518 while ((c2 = *name2++) != 0) {
519 type = GET_CHAR_TYPE(c2);
520 switch (type) {
521 case IGNORE:
522 afterDigit2 = FALSE;
523 continue; /* ignore all but letters and digits */
524 case ZERO:
525 if (!afterDigit2) {
526 nextType = GET_CHAR_TYPE(*name2);
527 if (nextType == ZERO || nextType == NONZERO) {
528 continue; /* ignore leading zero before another digit */
529 }
530 }
531 break;
532 case NONZERO:
533 afterDigit2 = TRUE;
534 break;
535 default:
536 c2 = (char)type; /* lowercased letter */
537 afterDigit2 = FALSE;
538 break;
539 }
540 break; /* deliver c2 */
b75a7d8f
A
541 }
542
543 /* If we reach the ends of both strings then they match */
544 if ((c1|c2)==0) {
545 return 0;
546 }
374ca955 547
b75a7d8f 548 /* Case-insensitive comparison */
73c04bcf 549 rc = (int)(unsigned char)c1 - (int)(unsigned char)c2;
b75a7d8f
A
550 if (rc != 0) {
551 return rc;
552 }
b75a7d8f
A
553 }
554}
555
556/*
557 * search for an alias
558 * return the converter number index for gConverterList
559 */
560static U_INLINE uint32_t
73c04bcf 561findConverter(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
b75a7d8f 562 uint32_t mid, start, limit;
374ca955 563 uint32_t lastMid;
b75a7d8f 564 int result;
73c04bcf
A
565 int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED);
566 char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH];
567
568 if (!isUnnormalized) {
569 if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) {
570 *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
571 return UINT32_MAX;
572 }
573
574 /* Lower case and remove ignoreable characters. */
575 ucnv_io_stripForCompare(strippedName, alias);
576 alias = strippedName;
577 }
b75a7d8f
A
578
579 /* do a binary search for the alias */
580 start = 0;
73c04bcf 581 limit = gMainTable.untaggedConvArraySize;
b75a7d8f 582 mid = limit;
374ca955 583 lastMid = UINT32_MAX;
b75a7d8f
A
584
585 for (;;) {
586 mid = (uint32_t)((start + limit) / 2);
374ca955
A
587 if (lastMid == mid) { /* Have we moved? */
588 break; /* We haven't moved, and it wasn't found. */
589 }
590 lastMid = mid;
73c04bcf
A
591 if (isUnnormalized) {
592 result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid]));
593 }
594 else {
595 result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid]));
596 }
b75a7d8f
A
597
598 if (result < 0) {
599 limit = mid;
600 } else if (result > 0) {
601 start = mid;
602 } else {
603 /* Since the gencnval tool folds duplicates into one entry,
604 * this alias in gAliasList is unique, but different standards
605 * may map an alias to different converters.
606 */
73c04bcf 607 if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) {
b75a7d8f
A
608 *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING;
609 }
73c04bcf
A
610 /* State whether the canonical converter name contains an option.
611 This information is contained in this list in order to maintain backward & forward compatibility. */
612 if (containsOption) {
613 UBool containsCnvOptionInfo = (UBool)gMainTable.optionTable->containsCnvOptionInfo;
614 *containsOption = (UBool)((containsCnvOptionInfo
615 && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0))
616 || !containsCnvOptionInfo);
617 }
618 return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK;
b75a7d8f
A
619 }
620 }
621
622 return UINT32_MAX;
623}
624
625/*
626 * Is this alias in this list?
627 * alias and listOffset should be non-NULL.
628 */
374ca955 629static U_INLINE UBool
b75a7d8f
A
630isAliasInList(const char *alias, uint32_t listOffset) {
631 if (listOffset) {
632 uint32_t currAlias;
73c04bcf 633 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
b75a7d8f 634 /* +1 to skip listCount */
73c04bcf 635 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
b75a7d8f
A
636 for (currAlias = 0; currAlias < listCount; currAlias++) {
637 if (currList[currAlias]
638 && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0)
639 {
640 return TRUE;
641 }
642 }
643 }
644 return FALSE;
645}
646
647/*
648 * Search for an standard name of an alias (what is the default name
649 * that this standard uses?)
650 * return the listOffset for gTaggedAliasLists. If it's 0,
651 * the it couldn't be found, but the parameters are valid.
652 */
653static uint32_t
654findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) {
655 uint32_t idx;
656 uint32_t listOffset;
657 uint32_t convNum;
658 UErrorCode myErr = U_ZERO_ERROR;
659 uint32_t tagNum = getTagNumber(standard);
660
661 /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
73c04bcf 662 convNum = findConverter(alias, NULL, &myErr);
b75a7d8f
A
663 if (myErr != U_ZERO_ERROR) {
664 *pErrorCode = myErr;
665 }
666
73c04bcf
A
667 if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
668 listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
669 if (listOffset && gMainTable.taggedAliasLists[listOffset + 1]) {
b75a7d8f
A
670 return listOffset;
671 }
672 if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
673 /* Uh Oh! They used an ambiguous alias.
674 We have to search the whole swiss cheese starting
675 at the highest standard affinity.
676 This may take a while.
677 */
73c04bcf
A
678 for (idx = 0; idx < gMainTable.taggedAliasArraySize; idx++) {
679 listOffset = gMainTable.taggedAliasArray[idx];
b75a7d8f 680 if (listOffset && isAliasInList(alias, listOffset)) {
73c04bcf
A
681 uint32_t currTagNum = idx/gMainTable.converterListSize;
682 uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize);
683 uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum];
684 if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + 1]) {
b75a7d8f
A
685 return tempListOffset;
686 }
687 /* else keep on looking */
688 /* We could speed this up by starting on the next row
689 because an alias is unique per row, right now.
690 This would change if alias versioning appears. */
691 }
692 }
693 /* The standard doesn't know about the alias */
694 }
695 /* else no default name */
696 return 0;
697 }
698 /* else converter or tag not found */
699
700 return UINT32_MAX;
701}
702
703/* Return the canonical name */
704static uint32_t
705findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) {
706 uint32_t idx;
707 uint32_t listOffset;
708 uint32_t convNum;
709 UErrorCode myErr = U_ZERO_ERROR;
710 uint32_t tagNum = getTagNumber(standard);
711
712 /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
73c04bcf 713 convNum = findConverter(alias, NULL, &myErr);
b75a7d8f
A
714 if (myErr != U_ZERO_ERROR) {
715 *pErrorCode = myErr;
716 }
717
73c04bcf
A
718 if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
719 listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
b75a7d8f
A
720 if (listOffset && isAliasInList(alias, listOffset)) {
721 return convNum;
722 }
723 if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
724 /* Uh Oh! They used an ambiguous alias.
725 We have to search one slice of the swiss cheese.
726 We search only in the requested tag, not the whole thing.
727 This may take a while.
728 */
73c04bcf
A
729 uint32_t convStart = (tagNum)*gMainTable.converterListSize;
730 uint32_t convLimit = (tagNum+1)*gMainTable.converterListSize;
b75a7d8f 731 for (idx = convStart; idx < convLimit; idx++) {
73c04bcf 732 listOffset = gMainTable.taggedAliasArray[idx];
b75a7d8f
A
733 if (listOffset && isAliasInList(alias, listOffset)) {
734 return idx-convStart;
735 }
736 }
737 /* The standard doesn't know about the alias */
738 }
739 /* else no canonical name */
740 }
741 /* else converter or tag not found */
742
743 return UINT32_MAX;
744}
745
746
747
748U_CFUNC const char *
73c04bcf 749ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
b75a7d8f 750 if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
73c04bcf
A
751 uint32_t convNum = findConverter(alias, containsOption, pErrorCode);
752 if (convNum < gMainTable.converterListSize) {
753 return GET_STRING(gMainTable.converterList[convNum]);
b75a7d8f
A
754 }
755 /* else converter not found */
756 }
757 return NULL;
758}
759
760static int32_t U_CALLCONV
761ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode *pErrorCode) {
762 int32_t value = 0;
763 UAliasContext *myContext = (UAliasContext *)(enumerator->context);
764 uint32_t listOffset = myContext->listOffset;
765
766 if (listOffset) {
73c04bcf 767 value = gMainTable.taggedAliasLists[listOffset];
b75a7d8f
A
768 }
769 return value;
770}
771
772static const char* U_CALLCONV
773ucnv_io_nextStandardAliases(UEnumeration *enumerator,
774 int32_t* resultLength,
775 UErrorCode *pErrorCode)
776{
777 UAliasContext *myContext = (UAliasContext *)(enumerator->context);
778 uint32_t listOffset = myContext->listOffset;
779
780 if (listOffset) {
73c04bcf
A
781 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
782 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
b75a7d8f
A
783
784 if (myContext->listIdx < listCount) {
785 const char *myStr = GET_STRING(currList[myContext->listIdx++]);
786 if (resultLength) {
374ca955 787 *resultLength = (int32_t)uprv_strlen(myStr);
b75a7d8f
A
788 }
789 return myStr;
790 }
791 }
792 /* Either we accessed a zero length list, or we enumerated too far. */
73c04bcf
A
793 if (resultLength) {
794 *resultLength = 0;
795 }
b75a7d8f
A
796 return NULL;
797}
798
799static void U_CALLCONV
800ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode *pErrorCode) {
801 ((UAliasContext *)(enumerator->context))->listIdx = 0;
802}
803
804static void U_CALLCONV
805ucnv_io_closeUEnumeration(UEnumeration *enumerator) {
806 uprv_free(enumerator->context);
807 uprv_free(enumerator);
808}
809
810/* Enumerate the aliases for the specified converter and standard tag */
811static const UEnumeration gEnumAliases = {
812 NULL,
813 NULL,
814 ucnv_io_closeUEnumeration,
815 ucnv_io_countStandardAliases,
816 uenum_unextDefault,
817 ucnv_io_nextStandardAliases,
818 ucnv_io_resetStandardAliases
819};
820
821U_CAPI UEnumeration * U_EXPORT2
822ucnv_openStandardNames(const char *convName,
823 const char *standard,
824 UErrorCode *pErrorCode)
825{
826 UEnumeration *myEnum = NULL;
827 if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) {
828 uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode);
829
830 /* When listOffset == 0, we want to acknowledge that the
831 converter name and standard are okay, but there
832 is nothing to enumerate. */
73c04bcf 833 if (listOffset < gMainTable.taggedAliasListsSize) {
b75a7d8f
A
834 UAliasContext *myContext;
835
836 myEnum = uprv_malloc(sizeof(UEnumeration));
837 if (myEnum == NULL) {
838 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
839 return NULL;
840 }
841 uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration));
842 myContext = uprv_malloc(sizeof(UAliasContext));
843 if (myContext == NULL) {
844 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
845 uprv_free(myEnum);
846 return NULL;
847 }
848 myContext->listOffset = listOffset;
849 myContext->listIdx = 0;
850 myEnum->context = myContext;
851 }
852 /* else converter or tag not found */
853 }
854 return myEnum;
855}
856
73c04bcf 857static uint16_t
b75a7d8f
A
858ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) {
859 if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
73c04bcf
A
860 uint32_t convNum = findConverter(alias, NULL, pErrorCode);
861 if (convNum < gMainTable.converterListSize) {
b75a7d8f 862 /* tagListNum - 1 is the ALL tag */
73c04bcf 863 int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
b75a7d8f
A
864
865 if (listOffset) {
73c04bcf 866 return gMainTable.taggedAliasLists[listOffset];
b75a7d8f
A
867 }
868 /* else this shouldn't happen. internal program error */
869 }
870 /* else converter not found */
871 }
872 return 0;
873}
874
73c04bcf 875static uint16_t
b75a7d8f
A
876ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) {
877 if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
878 uint32_t currAlias;
73c04bcf
A
879 uint32_t convNum = findConverter(alias, NULL, pErrorCode);
880 if (convNum < gMainTable.converterListSize) {
b75a7d8f 881 /* tagListNum - 1 is the ALL tag */
73c04bcf 882 int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
b75a7d8f
A
883
884 if (listOffset) {
73c04bcf 885 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
b75a7d8f 886 /* +1 to skip listCount */
73c04bcf 887 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
b75a7d8f
A
888
889 for (currAlias = start; currAlias < listCount; currAlias++) {
890 aliases[currAlias] = GET_STRING(currList[currAlias]);
891 }
892 }
893 /* else this shouldn't happen. internal program error */
894 }
895 /* else converter not found */
896 }
897 return 0;
898}
899
73c04bcf 900static const char *
b75a7d8f
A
901ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
902 if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
73c04bcf
A
903 uint32_t convNum = findConverter(alias, NULL, pErrorCode);
904 if (convNum < gMainTable.converterListSize) {
b75a7d8f 905 /* tagListNum - 1 is the ALL tag */
73c04bcf 906 int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
b75a7d8f
A
907
908 if (listOffset) {
73c04bcf 909 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
b75a7d8f 910 /* +1 to skip listCount */
73c04bcf 911 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
b75a7d8f
A
912
913 if (n < listCount) {
914 return GET_STRING(currList[n]);
915 }
916 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
917 }
918 /* else this shouldn't happen. internal program error */
919 }
920 /* else converter not found */
921 }
922 return NULL;
923}
924
73c04bcf 925static uint16_t
b75a7d8f
A
926ucnv_io_countStandards(UErrorCode *pErrorCode) {
927 if (haveAliasData(pErrorCode)) {
928 /* Don't include the empty list */
73c04bcf 929 return (uint16_t)(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS);
b75a7d8f
A
930 }
931
932 return 0;
933}
934
935U_CAPI const char * U_EXPORT2
936ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
937 if (haveAliasData(pErrorCode)) {
73c04bcf
A
938 if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) {
939 return GET_STRING(gMainTable.tagList[n]);
b75a7d8f
A
940 }
941 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
942 }
943
944 return NULL;
945}
946
947U_CAPI const char * U_EXPORT2
948ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
949 if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
950 uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode);
951
73c04bcf
A
952 if (0 < listOffset && listOffset < gMainTable.taggedAliasListsSize) {
953 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
b75a7d8f
A
954
955 /* Get the preferred name from this list */
956 if (currList[0]) {
957 return GET_STRING(currList[0]);
958 }
959 /* else someone screwed up the alias table. */
960 /* *pErrorCode = U_INVALID_FORMAT_ERROR */
961 }
962 }
963
964 return NULL;
965}
966
73c04bcf
A
967U_CAPI uint16_t U_EXPORT2
968ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
969{
970 return ucnv_io_countAliases(alias, pErrorCode);
971}
b75a7d8f 972
b75a7d8f 973
73c04bcf
A
974U_CAPI const char* U_EXPORT2
975ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
976{
977 return ucnv_io_getAlias(alias, n, pErrorCode);
b75a7d8f
A
978}
979
73c04bcf
A
980U_CAPI void U_EXPORT2
981ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
982{
983 ucnv_io_getAliases(alias, 0, aliases, pErrorCode);
b75a7d8f
A
984}
985
73c04bcf
A
986U_CAPI uint16_t U_EXPORT2
987ucnv_countStandards(void)
988{
989 UErrorCode err = U_ZERO_ERROR;
990 return ucnv_io_countStandards(&err);
b75a7d8f
A
991}
992
73c04bcf
A
993U_CAPI const char * U_EXPORT2
994ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
995 if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
996 uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode);
b75a7d8f 997
73c04bcf
A
998 if (convNum < gMainTable.converterListSize) {
999 return GET_STRING(gMainTable.converterList[convNum]);
b75a7d8f 1000 }
b75a7d8f 1001 }
73c04bcf 1002
b75a7d8f
A
1003 return NULL;
1004}
1005
1006static int32_t U_CALLCONV
1007ucnv_io_countAllConverters(UEnumeration *enumerator, UErrorCode *pErrorCode) {
73c04bcf 1008 return gMainTable.converterListSize;
b75a7d8f
A
1009}
1010
1011static const char* U_CALLCONV
1012ucnv_io_nextAllConverters(UEnumeration *enumerator,
1013 int32_t* resultLength,
1014 UErrorCode *pErrorCode)
1015{
1016 uint16_t *myContext = (uint16_t *)(enumerator->context);
1017
73c04bcf
A
1018 if (*myContext < gMainTable.converterListSize) {
1019 const char *myStr = GET_STRING(gMainTable.converterList[(*myContext)++]);
b75a7d8f 1020 if (resultLength) {
374ca955 1021 *resultLength = (int32_t)uprv_strlen(myStr);
b75a7d8f
A
1022 }
1023 return myStr;
1024 }
1025 /* Either we accessed a zero length list, or we enumerated too far. */
73c04bcf
A
1026 if (resultLength) {
1027 *resultLength = 0;
1028 }
b75a7d8f
A
1029 return NULL;
1030}
1031
1032static void U_CALLCONV
1033ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode *pErrorCode) {
1034 *((uint16_t *)(enumerator->context)) = 0;
1035}
1036
1037static const UEnumeration gEnumAllConverters = {
1038 NULL,
1039 NULL,
1040 ucnv_io_closeUEnumeration,
1041 ucnv_io_countAllConverters,
1042 uenum_unextDefault,
1043 ucnv_io_nextAllConverters,
1044 ucnv_io_resetAllConverters
1045};
1046
1047U_CAPI UEnumeration * U_EXPORT2
1048ucnv_openAllNames(UErrorCode *pErrorCode) {
1049 UEnumeration *myEnum = NULL;
1050 if (haveAliasData(pErrorCode)) {
1051 uint16_t *myContext;
1052
1053 myEnum = uprv_malloc(sizeof(UEnumeration));
1054 if (myEnum == NULL) {
1055 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1056 return NULL;
1057 }
1058 uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration));
1059 myContext = uprv_malloc(sizeof(uint16_t));
1060 if (myContext == NULL) {
1061 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1062 uprv_free(myEnum);
1063 return NULL;
1064 }
1065 *myContext = 0;
1066 myEnum->context = myContext;
1067 }
1068 return myEnum;
1069}
1070
1071U_CFUNC uint16_t
73c04bcf 1072ucnv_io_countTotalAliases(UErrorCode *pErrorCode) {
b75a7d8f 1073 if (haveAliasData(pErrorCode)) {
73c04bcf 1074 return (uint16_t)gMainTable.aliasListSize;
b75a7d8f
A
1075 }
1076 return 0;
1077}
1078
374ca955
A
1079/* alias table swapping ----------------------------------------------------- */
1080
1081typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name);
1082
1083/*
1084 * row of a temporary array
1085 *
1086 * gets platform-endian charset string indexes and sorting indexes;
1087 * after sorting this array by strings, the actual arrays are permutated
1088 * according to the sorting indexes
1089 */
1090typedef struct TempRow {
1091 uint16_t strIndex, sortIndex;
1092} TempRow;
1093
1094typedef struct TempAliasTable {
1095 const char *chars;
1096 TempRow *rows;
1097 uint16_t *resort;
1098 StripForCompareFn *stripForCompare;
1099} TempAliasTable;
1100
1101enum {
1102 STACK_ROW_CAPACITY=500
1103};
1104
1105static int32_t
1106io_compareRows(const void *context, const void *left, const void *right) {
1107 char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH],
1108 strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH];
1109
1110 TempAliasTable *tempTable=(TempAliasTable *)context;
1111 const char *chars=tempTable->chars;
1112
1113 return (int32_t)uprv_strcmp(tempTable->stripForCompare(strippedLeft, chars+2*((const TempRow *)left)->strIndex),
1114 tempTable->stripForCompare(strippedRight, chars+2*((const TempRow *)right)->strIndex));
1115}
1116
1117U_CAPI int32_t U_EXPORT2
1118ucnv_swapAliases(const UDataSwapper *ds,
1119 const void *inData, int32_t length, void *outData,
1120 UErrorCode *pErrorCode) {
1121 const UDataInfo *pInfo;
1122 int32_t headerSize;
1123
1124 const uint16_t *inTable;
1125 uint32_t toc[offsetsCount];
1126 uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */
1127 uint32_t i, count, tocLength, topOffset;
1128
1129 TempRow rows[STACK_ROW_CAPACITY];
1130 uint16_t resort[STACK_ROW_CAPACITY];
1131 TempAliasTable tempTable;
1132
1133 /* udata_swapDataHeader checks the arguments */
1134 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
1135 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1136 return 0;
1137 }
1138
1139 /* check data format and format version */
1140 pInfo=(const UDataInfo *)((const char *)inData+4);
1141 if(!(
1142 pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */
1143 pInfo->dataFormat[1]==0x76 &&
1144 pInfo->dataFormat[2]==0x41 &&
1145 pInfo->dataFormat[3]==0x6c &&
1146 pInfo->formatVersion[0]==3
1147 )) {
1148 udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n",
1149 pInfo->dataFormat[0], pInfo->dataFormat[1],
1150 pInfo->dataFormat[2], pInfo->dataFormat[3],
1151 pInfo->formatVersion[0]);
1152 *pErrorCode=U_UNSUPPORTED_ERROR;
1153 return 0;
1154 }
1155
1156 /* an alias table must contain at least the table of contents array */
1157 if(length>=0 && (length-headerSize)<4*(1+minTocLength)) {
1158 udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1159 length-headerSize);
1160 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1161 return 0;
1162 }
1163
1164 inTable=(const uint16_t *)((const char *)inData+headerSize);
73c04bcf 1165 uprv_memset(toc, 0, sizeof(toc));
374ca955 1166 toc[tocLengthIndex]=tocLength=ds->readUInt32(((const uint32_t *)inTable)[tocLengthIndex]);
73c04bcf
A
1167 if(tocLength<minTocLength || offsetsCount<=tocLength) {
1168 udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength);
374ca955
A
1169 *pErrorCode=U_INVALID_FORMAT_ERROR;
1170 return 0;
1171 }
1172
1173 /* read the known part of the table of contents */
73c04bcf 1174 for(i=converterListIndex; i<=tocLength; ++i) {
374ca955
A
1175 toc[i]=ds->readUInt32(((const uint32_t *)inTable)[i]);
1176 }
1177
1178 /* compute offsets */
73c04bcf 1179 uprv_memset(offsets, 0, sizeof(offsets));
374ca955 1180 offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */
73c04bcf 1181 for(i=tagListIndex; i<=tocLength; ++i) {
374ca955
A
1182 offsets[i]=offsets[i-1]+toc[i-1];
1183 }
1184
1185 /* compute the overall size of the after-header data, in numbers of 16-bit units */
1186 topOffset=offsets[i-1]+toc[i-1];
1187
1188 if(length>=0) {
1189 uint16_t *outTable;
1190 const uint16_t *p, *p2;
1191 uint16_t *q, *q2;
1192 uint16_t oldIndex;
1193
1194 if((length-headerSize)<(2*(int32_t)topOffset)) {
1195 udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1196 length-headerSize);
1197 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1198 return 0;
1199 }
1200
1201 outTable=(uint16_t *)((char *)outData+headerSize);
1202
1203 /* swap the entire table of contents */
1204 ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode);
1205
73c04bcf
A
1206 /* swap unormalized strings & normalized strings */
1207 ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]),
374ca955
A
1208 outTable+offsets[stringTableIndex], pErrorCode);
1209 if(U_FAILURE(*pErrorCode)) {
73c04bcf 1210 udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n");
374ca955
A
1211 return 0;
1212 }
1213
1214 if(ds->inCharset==ds->outCharset) {
1215 /* no need to sort, just swap all 16-bit values together */
1216 ds->swapArray16(ds,
1217 inTable+offsets[converterListIndex],
1218 2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]),
1219 outTable+offsets[converterListIndex],
1220 pErrorCode);
1221 } else {
1222 /* allocate the temporary table for sorting */
1223 count=toc[aliasListIndex];
1224
1225 tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */
1226
1227 if(count<=STACK_ROW_CAPACITY) {
1228 tempTable.rows=rows;
1229 tempTable.resort=resort;
1230 } else {
1231 tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2);
1232 if(tempTable.rows==NULL) {
1233 udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n",
1234 count);
1235 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1236 return 0;
1237 }
1238 tempTable.resort=(uint16_t *)(tempTable.rows+count);
1239 }
1240
1241 if(ds->outCharset==U_ASCII_FAMILY) {
1242 tempTable.stripForCompare=ucnv_io_stripASCIIForCompare;
1243 } else /* U_EBCDIC_FAMILY */ {
1244 tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare;
1245 }
1246
1247 /*
1248 * Sort unique aliases+mapped names.
1249 *
1250 * We need to sort the list again by outCharset strings because they
1251 * sort differently for different charset families.
1252 * First we set up a temporary table with the string indexes and
1253 * sorting indexes and sort that.
1254 * Then we permutate and copy/swap the actual values.
1255 */
1256 p=inTable+offsets[aliasListIndex];
1257 q=outTable+offsets[aliasListIndex];
1258
1259 p2=inTable+offsets[untaggedConvArrayIndex];
1260 q2=outTable+offsets[untaggedConvArrayIndex];
1261
1262 for(i=0; i<count; ++i) {
1263 tempTable.rows[i].strIndex=ds->readUInt16(p[i]);
1264 tempTable.rows[i].sortIndex=(uint16_t)i;
1265 }
1266
1267 uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow),
1268 io_compareRows, &tempTable,
1269 FALSE, pErrorCode);
1270
1271 if(U_SUCCESS(*pErrorCode)) {
1272 /* copy/swap/permutate items */
1273 if(p!=q) {
1274 for(i=0; i<count; ++i) {
1275 oldIndex=tempTable.rows[i].sortIndex;
1276 ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode);
1277 ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode);
1278 }
1279 } else {
1280 /*
1281 * If we swap in-place, then the permutation must use another
1282 * temporary array (tempTable.resort)
1283 * before the results are copied to the outBundle.
1284 */
1285 uint16_t *r=tempTable.resort;
1286
1287 for(i=0; i<count; ++i) {
1288 oldIndex=tempTable.rows[i].sortIndex;
1289 ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode);
1290 }
1291 uprv_memcpy(q, r, 2*count);
1292
1293 for(i=0; i<count; ++i) {
1294 oldIndex=tempTable.rows[i].sortIndex;
1295 ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode);
1296 }
1297 uprv_memcpy(q2, r, 2*count);
1298 }
1299 }
1300
1301 if(tempTable.rows!=rows) {
1302 uprv_free(tempTable.rows);
1303 }
1304
1305 if(U_FAILURE(*pErrorCode)) {
73c04bcf
A
1306 udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n",
1307 count);
374ca955
A
1308 return 0;
1309 }
1310
1311 /* swap remaining 16-bit values */
1312 ds->swapArray16(ds,
1313 inTable+offsets[converterListIndex],
1314 2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]),
1315 outTable+offsets[converterListIndex],
1316 pErrorCode);
1317 ds->swapArray16(ds,
1318 inTable+offsets[taggedAliasArrayIndex],
1319 2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]),
1320 outTable+offsets[taggedAliasArrayIndex],
1321 pErrorCode);
1322 }
1323 }
1324
1325 return headerSize+2*(int32_t)topOffset;
1326}
1327
1328#endif
1329
b75a7d8f
A
1330/*
1331 * Hey, Emacs, please set the following:
1332 *
1333 * Local Variables:
1334 * indent-tabs-mode: nil
1335 * End:
1336 *
1337 */