]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ucnv_io.cpp
ICU-66108.tar.gz
[apple/icu.git] / icuSources / common / ucnv_io.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/*
4******************************************************************************
5*
2ca993e8 6* Copyright (C) 1999-2015, International Business Machines
b75a7d8f
A
7* Corporation and others. All Rights Reserved.
8*
9******************************************************************************
10*
11*
4388f060 12* ucnv_io.cpp:
73c04bcf
A
13* initializes global variables and defines functions pertaining to converter
14* name resolution aspect of the conversion code.
b75a7d8f
A
15*
16* new implementation:
17*
18* created on: 1999nov22
19* created by: Markus W. Scherer
20*
21* Use the binary cnvalias.icu (created from convrtrs.txt) to work
22* with aliases for converter names.
23*
24* Date Name Description
25* 11/22/1999 markus Created
26* 06/28/2002 grhoten Major overhaul of the converter alias design.
27* Now an alias can map to different converters
28* depending on the specified standard.
29*******************************************************************************
30*/
31
32#include "unicode/utypes.h"
374ca955
A
33
34#if !UCONFIG_NO_CONVERSION
35
73c04bcf 36#include "unicode/ucnv.h"
b75a7d8f
A
37#include "unicode/udata.h"
38
39#include "umutex.h"
374ca955 40#include "uarrsort.h"
57a6839d 41#include "uassert.h"
374ca955 42#include "udataswp.h"
b75a7d8f
A
43#include "cstring.h"
44#include "cmemory.h"
45#include "ucnv_io.h"
46#include "uenumimp.h"
47#include "ucln_cmn.h"
48
49/* Format of cnvalias.icu -----------------------------------------------------
50 *
51 * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt.
52 * This binary form contains several tables. All indexes are to uint16_t
53 * units, and not to the bytes (uint8_t units). Addressing everything on
54 * 16-bit boundaries allows us to store more information with small index
55 * numbers, which are also 16-bit in size. The majority of the table (except
56 * the string table) are 16-bit numbers.
57 *
58 * First there is the size of the Table of Contents (TOC). The TOC
59 * entries contain the size of each section. In order to find the offset
60 * you just need to sum up the previous offsets.
374ca955
A
61 * The TOC length and entries are an array of uint32_t values.
62 * The first section after the TOC starts immediately after the TOC.
b75a7d8f
A
63 *
64 * 1) This section contains a list of converters. This list contains indexes
65 * into the string table for the converter name. The index of this list is
66 * also used by other sections, which are mentioned later on.
374ca955 67 * This list is not sorted.
b75a7d8f
A
68 *
69 * 2) This section contains a list of tags. This list contains indexes
70 * into the string table for the tag name. The index of this list is
71 * also used by other sections, which are mentioned later on.
374ca955 72 * This list is in priority order of standards.
b75a7d8f
A
73 *
74 * 3) This section contains a list of sorted unique aliases. This
75 * list contains indexes into the string table for the alias name. The
76 * index of this list is also used by other sections, like the 4th section.
77 * The index for the 3rd and 4th section is used to get the
78 * alias -> converter name mapping. Section 3 and 4 form a two column table.
73c04bcf
A
79 * Some of the most significant bits of each index may contain other
80 * information (see findConverter for details).
b75a7d8f
A
81 *
82 * 4) This section contains a list of mapped converter names. Consider this
83 * as a table that maps the 3rd section to the 1st section. This list contains
84 * indexes into the 1st section. The index of this list is the same index in
85 * the 3rd section. There is also some extra information in the high bits of
86 * each converter index in this table. Currently it's only used to say that
87 * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
88 * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
89 * the predigested form of the 5th section so that an alias lookup can be fast.
374ca955 90 *
b75a7d8f
A
91 * 5) This section contains a 2D array with indexes to the 6th section. This
92 * section is the full form of all alias mappings. The column index is the
93 * index into the converter list (column header). The row index is the index
94 * to tag list (row header). This 2D array is the top part a 3D array. The
95 * third dimension is in the 6th section.
96 *
97 * 6) This is blob of variable length arrays. Each array starts with a size,
98 * and is followed by indexes to alias names in the string table. This is
99 * the third dimension to the section 5. No other section should be referencing
100 * this section.
101 *
73c04bcf
A
102 * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its
103 * presence indicates that a section 9 exists. UConverterAliasOptions specifies
104 * what type of string normalization is used among other potential things in the
105 * future.
b75a7d8f
A
106 *
107 * 8) This is the string table. All strings are indexed on an even address.
108 * There are two reasons for this. First many chip architectures locate strings
109 * faster on even address boundaries. Second, since all indexes are 16-bit
110 * numbers, this string table can be 128KB in size instead of 64KB when we
111 * only have strings starting on an even address.
112 *
73c04bcf
A
113 * 9) When present this is a set of prenormalized strings from section 8. This
114 * table contains normalized strings with the dashes and spaces stripped out,
115 * and all strings lowercased. In the future, the options in section 7 may state
116 * other types of normalization.
b75a7d8f
A
117 *
118 * Here is the concept of section 5 and 6. It's a 3D cube. Each tag
119 * has a unique alias among all converters. That same alias can
120 * be mentioned in other standards on different converters,
121 * but only one alias per tag can be unique.
122 *
123 *
124 * Converter Names (Usually in TR22 form)
125 * -------------------------------------------.
126 * T / /|
127 * a / / |
128 * g / / |
129 * s / / |
130 * / / |
131 * ------------------------------------------/ |
132 * A | | |
133 * l | | |
134 * i | | /
135 * a | | /
136 * s | | /
137 * e | | /
138 * s | |/
139 * -------------------------------------------
140 *
141 *
142 *
143 * Here is what it really looks like. It's like swiss cheese.
144 * There are holes. Some converters aren't recognized by
145 * a standard, or they are really old converters that the
146 * standard doesn't recognize anymore.
147 *
148 * Converter Names (Usually in TR22 form)
149 * -------------------------------------------.
150 * T /##########################################/|
151 * a / # # /#
374ca955
A
152 * g / # ## ## ### # ### ### ### #/
153 * s / # ##### #### ## ## #/#
154 * / ### # # ## # # # ### # # #/##
b75a7d8f
A
155 * ------------------------------------------/# #
156 * A |### # # ## # # # ### # # #|# #
157 * l |# # # # # ## # #|# #
158 * i |# # # # # # #|#
159 * a |# #|#
160 * s | #|#
374ca955
A
161 * e
162 * s
163 *
b75a7d8f
A
164 */
165
166/**
167 * Used by the UEnumeration API
168 */
169typedef struct UAliasContext {
170 uint32_t listOffset;
171 uint32_t listIdx;
172} UAliasContext;
173
174static const char DATA_NAME[] = "cnvalias";
175static const char DATA_TYPE[] = "icu";
176
177static UDataMemory *gAliasData=NULL;
57a6839d 178static icu::UInitOnce gAliasDataInitOnce = U_INITONCE_INITIALIZER;
b75a7d8f 179
374ca955
A
180enum {
181 tocLengthIndex=0,
182 converterListIndex=1,
183 tagListIndex=2,
184 aliasListIndex=3,
185 untaggedConvArrayIndex=4,
186 taggedAliasArrayIndex=5,
187 taggedAliasListsIndex=6,
73c04bcf 188 tableOptionsIndex=7,
374ca955 189 stringTableIndex=8,
73c04bcf
A
190 normalizedStringTableIndex=9,
191 offsetsCount, /* length of the swapper's temporary offsets[] */
192 minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */
374ca955
A
193};
194
73c04bcf
A
195static const UConverterAliasOptions defaultTableOptions = {
196 UCNV_IO_UNNORMALIZED,
197 0 /* containsCnvOptionInfo */
198};
199static UConverterAlias gMainTable;
b75a7d8f 200
73c04bcf
A
201#define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx))
202#define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx))
b75a7d8f
A
203
204static UBool U_CALLCONV
4388f060
A
205isAcceptable(void * /*context*/,
206 const char * /*type*/, const char * /*name*/,
b75a7d8f
A
207 const UDataInfo *pInfo) {
208 return (UBool)(
209 pInfo->size>=20 &&
210 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
211 pInfo->charsetFamily==U_CHARSET_FAMILY &&
212 pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */
213 pInfo->dataFormat[1]==0x76 &&
214 pInfo->dataFormat[2]==0x41 &&
215 pInfo->dataFormat[3]==0x6c &&
216 pInfo->formatVersion[0]==3);
217}
218
374ca955
A
219static UBool U_CALLCONV ucnv_io_cleanup(void)
220{
221 if (gAliasData) {
222 udata_close(gAliasData);
223 gAliasData = NULL;
224 }
57a6839d 225 gAliasDataInitOnce.reset();
374ca955 226
73c04bcf 227 uprv_memset(&gMainTable, 0, sizeof(gMainTable));
374ca955
A
228
229 return TRUE; /* Everything was cleaned up */
230}
231
57a6839d
A
232static void U_CALLCONV initAliasData(UErrorCode &errCode) {
233 UDataMemory *data;
234 const uint16_t *table;
235 const uint32_t *sectionSizes;
236 uint32_t tableStart;
237 uint32_t currOffset;
b75a7d8f 238
57a6839d 239 ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup);
b75a7d8f 240
57a6839d
A
241 U_ASSERT(gAliasData == NULL);
242 data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &errCode);
243 if(U_FAILURE(errCode)) {
244 return;
245 }
b75a7d8f 246
57a6839d
A
247 sectionSizes = (const uint32_t *)udata_getMemory(data);
248 table = (const uint16_t *)sectionSizes;
b75a7d8f 249
57a6839d
A
250 tableStart = sectionSizes[0];
251 if (tableStart < minTocLength) {
252 errCode = U_INVALID_FORMAT_ERROR;
253 udata_close(data);
254 return;
255 }
256 gAliasData = data;
257
258 gMainTable.converterListSize = sectionSizes[1];
259 gMainTable.tagListSize = sectionSizes[2];
260 gMainTable.aliasListSize = sectionSizes[3];
261 gMainTable.untaggedConvArraySize = sectionSizes[4];
262 gMainTable.taggedAliasArraySize = sectionSizes[5];
263 gMainTable.taggedAliasListsSize = sectionSizes[6];
264 gMainTable.optionTableSize = sectionSizes[7];
265 gMainTable.stringTableSize = sectionSizes[8];
266
267 if (tableStart > 8) {
268 gMainTable.normalizedStringTableSize = sectionSizes[9];
269 }
b75a7d8f 270
57a6839d
A
271 currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t));
272 gMainTable.converterList = table + currOffset;
b75a7d8f 273
57a6839d
A
274 currOffset += gMainTable.converterListSize;
275 gMainTable.tagList = table + currOffset;
b75a7d8f 276
57a6839d
A
277 currOffset += gMainTable.tagListSize;
278 gMainTable.aliasList = table + currOffset;
b75a7d8f 279
57a6839d
A
280 currOffset += gMainTable.aliasListSize;
281 gMainTable.untaggedConvArray = table + currOffset;
b75a7d8f 282
57a6839d
A
283 currOffset += gMainTable.untaggedConvArraySize;
284 gMainTable.taggedAliasArray = table + currOffset;
b75a7d8f 285
57a6839d
A
286 /* aliasLists is a 1's based array, but it has a padding character */
287 currOffset += gMainTable.taggedAliasArraySize;
288 gMainTable.taggedAliasLists = table + currOffset;
73c04bcf 289
57a6839d
A
290 currOffset += gMainTable.taggedAliasListsSize;
291 if (gMainTable.optionTableSize > 0
292 && ((const UConverterAliasOptions *)(table + currOffset))->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT)
293 {
294 /* Faster table */
295 gMainTable.optionTable = (const UConverterAliasOptions *)(table + currOffset);
296 }
297 else {
298 /* Smaller table, or I can't handle this normalization mode!
299 Use the original slower table lookup. */
300 gMainTable.optionTable = &defaultTableOptions;
301 }
b75a7d8f 302
57a6839d
A
303 currOffset += gMainTable.optionTableSize;
304 gMainTable.stringTable = table + currOffset;
729e4ab9 305
57a6839d
A
306 currOffset += gMainTable.stringTableSize;
307 gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED)
308 ? gMainTable.stringTable : (table + currOffset));
309}
b75a7d8f 310
b75a7d8f 311
57a6839d
A
312static UBool
313haveAliasData(UErrorCode *pErrorCode) {
314 umtx_initOnce(gAliasDataInitOnce, &initAliasData, *pErrorCode);
315 return U_SUCCESS(*pErrorCode);
b75a7d8f
A
316}
317
4388f060 318static inline UBool
b75a7d8f
A
319isAlias(const char *alias, UErrorCode *pErrorCode) {
320 if(alias==NULL) {
321 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
322 return FALSE;
b75a7d8f 323 }
73c04bcf 324 return (UBool)(*alias!=0);
b75a7d8f
A
325}
326
b75a7d8f 327static uint32_t getTagNumber(const char *tagname) {
73c04bcf 328 if (gMainTable.tagList) {
b75a7d8f 329 uint32_t tagNum;
73c04bcf
A
330 for (tagNum = 0; tagNum < gMainTable.tagListSize; tagNum++) {
331 if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) {
b75a7d8f
A
332 return tagNum;
333 }
334 }
335 }
336
337 return UINT32_MAX;
338}
339
73c04bcf
A
340/* character types relevant for ucnv_compareNames() */
341enum {
57a6839d 342 UIGNORE,
73c04bcf
A
343 ZERO,
344 NONZERO,
345 MINLETTER /* any values from here on are lowercase letter mappings */
346};
347
348/* character types for ASCII 00..7F */
349static const uint8_t asciiTypes[128] = {
350 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
351 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
352 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
353 ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0,
354 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
355 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0,
356 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
357 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0
358};
359
57a6839d 360#define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)UIGNORE)
73c04bcf
A
361
362/* character types for EBCDIC 80..FF */
363static const uint8_t ebcdicTypes[128] = {
364 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
365 0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
366 0, 0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
367 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
368 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
369 0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
370 0, 0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
371 ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0
372};
373
57a6839d 374#define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)UIGNORE)
73c04bcf
A
375
376#if U_CHARSET_FAMILY==U_ASCII_FAMILY
377# define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c)
378#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
379# define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c)
380#else
381# error U_CHARSET_FAMILY is not valid
382#endif
383
f3c0d7a5 384
b75a7d8f 385/* @see ucnv_compareNames */
f3c0d7a5 386U_CAPI char * U_CALLCONV
374ca955 387ucnv_io_stripASCIIForCompare(char *dst, const char *name) {
b75a7d8f 388 char *dstItr = dst;
73c04bcf
A
389 uint8_t type, nextType;
390 char c1;
391 UBool afterDigit = FALSE;
392
393 while ((c1 = *name++) != 0) {
394 type = GET_ASCII_TYPE(c1);
395 switch (type) {
57a6839d 396 case UIGNORE:
73c04bcf
A
397 afterDigit = FALSE;
398 continue; /* ignore all but letters and digits */
399 case ZERO:
400 if (!afterDigit) {
401 nextType = GET_ASCII_TYPE(*name);
402 if (nextType == ZERO || nextType == NONZERO) {
403 continue; /* ignore leading zero before another digit */
404 }
405 }
406 break;
407 case NONZERO:
408 afterDigit = TRUE;
409 break;
410 default:
411 c1 = (char)type; /* lowercased letter */
412 afterDigit = FALSE;
413 break;
b75a7d8f 414 }
73c04bcf 415 *dstItr++ = c1;
374ca955 416 }
73c04bcf 417 *dstItr = 0;
374ca955
A
418 return dst;
419}
420
f3c0d7a5 421U_CAPI char * U_CALLCONV
374ca955 422ucnv_io_stripEBCDICForCompare(char *dst, const char *name) {
374ca955 423 char *dstItr = dst;
73c04bcf
A
424 uint8_t type, nextType;
425 char c1;
426 UBool afterDigit = FALSE;
427
428 while ((c1 = *name++) != 0) {
429 type = GET_EBCDIC_TYPE(c1);
430 switch (type) {
57a6839d 431 case UIGNORE:
73c04bcf
A
432 afterDigit = FALSE;
433 continue; /* ignore all but letters and digits */
434 case ZERO:
435 if (!afterDigit) {
436 nextType = GET_EBCDIC_TYPE(*name);
437 if (nextType == ZERO || nextType == NONZERO) {
438 continue; /* ignore leading zero before another digit */
439 }
440 }
441 break;
442 case NONZERO:
443 afterDigit = TRUE;
444 break;
445 default:
446 c1 = (char)type; /* lowercased letter */
447 afterDigit = FALSE;
448 break;
374ca955 449 }
73c04bcf 450 *dstItr++ = c1;
b75a7d8f 451 }
73c04bcf 452 *dstItr = 0;
b75a7d8f
A
453 return dst;
454}
455
456/**
73c04bcf
A
457 * Do a fuzzy compare of two converter/alias names.
458 * The comparison is case-insensitive, ignores leading zeroes if they are not
459 * followed by further digits, and ignores all but letters and digits.
460 * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
461 * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
462 * at http://www.unicode.org/reports/tr22/
374ca955 463 *
b75a7d8f
A
464 * This is a symmetrical (commutative) operation; order of arguments
465 * is insignificant. This is an important property for sorting the
466 * list (when the list is preprocessed into binary form) and for
467 * performing binary searches on it at run time.
374ca955 468 *
b75a7d8f
A
469 * @param name1 a converter name or alias, zero-terminated
470 * @param name2 a converter name or alias, zero-terminated
471 * @return 0 if the names match, or a negative value if the name1
472 * lexically precedes name2, or a positive value if the name1
473 * lexically follows name2.
474 *
475 * @see ucnv_io_stripForCompare
476 */
477U_CAPI int U_EXPORT2
478ucnv_compareNames(const char *name1, const char *name2) {
479 int rc;
73c04bcf 480 uint8_t type, nextType;
b75a7d8f 481 char c1, c2;
73c04bcf 482 UBool afterDigit1 = FALSE, afterDigit2 = FALSE;
b75a7d8f
A
483
484 for (;;) {
73c04bcf
A
485 while ((c1 = *name1++) != 0) {
486 type = GET_CHAR_TYPE(c1);
487 switch (type) {
57a6839d 488 case UIGNORE:
73c04bcf
A
489 afterDigit1 = FALSE;
490 continue; /* ignore all but letters and digits */
491 case ZERO:
492 if (!afterDigit1) {
493 nextType = GET_CHAR_TYPE(*name1);
494 if (nextType == ZERO || nextType == NONZERO) {
495 continue; /* ignore leading zero before another digit */
496 }
497 }
498 break;
499 case NONZERO:
500 afterDigit1 = TRUE;
501 break;
502 default:
503 c1 = (char)type; /* lowercased letter */
504 afterDigit1 = FALSE;
505 break;
506 }
507 break; /* deliver c1 */
b75a7d8f 508 }
73c04bcf
A
509 while ((c2 = *name2++) != 0) {
510 type = GET_CHAR_TYPE(c2);
511 switch (type) {
57a6839d 512 case UIGNORE:
73c04bcf
A
513 afterDigit2 = FALSE;
514 continue; /* ignore all but letters and digits */
515 case ZERO:
516 if (!afterDigit2) {
517 nextType = GET_CHAR_TYPE(*name2);
518 if (nextType == ZERO || nextType == NONZERO) {
519 continue; /* ignore leading zero before another digit */
520 }
521 }
522 break;
523 case NONZERO:
524 afterDigit2 = TRUE;
525 break;
526 default:
527 c2 = (char)type; /* lowercased letter */
528 afterDigit2 = FALSE;
529 break;
530 }
531 break; /* deliver c2 */
b75a7d8f
A
532 }
533
534 /* If we reach the ends of both strings then they match */
535 if ((c1|c2)==0) {
536 return 0;
537 }
374ca955 538
b75a7d8f 539 /* Case-insensitive comparison */
73c04bcf 540 rc = (int)(unsigned char)c1 - (int)(unsigned char)c2;
b75a7d8f
A
541 if (rc != 0) {
542 return rc;
543 }
b75a7d8f
A
544 }
545}
546
547/*
548 * search for an alias
549 * return the converter number index for gConverterList
550 */
4388f060 551static inline uint32_t
73c04bcf 552findConverter(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
b75a7d8f 553 uint32_t mid, start, limit;
374ca955 554 uint32_t lastMid;
b75a7d8f 555 int result;
73c04bcf
A
556 int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED);
557 char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH];
558
559 if (!isUnnormalized) {
560 if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) {
561 *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
562 return UINT32_MAX;
563 }
564
565 /* Lower case and remove ignoreable characters. */
566 ucnv_io_stripForCompare(strippedName, alias);
567 alias = strippedName;
568 }
b75a7d8f
A
569
570 /* do a binary search for the alias */
571 start = 0;
73c04bcf 572 limit = gMainTable.untaggedConvArraySize;
b75a7d8f 573 mid = limit;
374ca955 574 lastMid = UINT32_MAX;
b75a7d8f
A
575
576 for (;;) {
577 mid = (uint32_t)((start + limit) / 2);
374ca955
A
578 if (lastMid == mid) { /* Have we moved? */
579 break; /* We haven't moved, and it wasn't found. */
580 }
581 lastMid = mid;
73c04bcf
A
582 if (isUnnormalized) {
583 result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid]));
584 }
585 else {
586 result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid]));
587 }
b75a7d8f
A
588
589 if (result < 0) {
590 limit = mid;
591 } else if (result > 0) {
592 start = mid;
593 } else {
594 /* Since the gencnval tool folds duplicates into one entry,
595 * this alias in gAliasList is unique, but different standards
596 * may map an alias to different converters.
597 */
73c04bcf 598 if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) {
b75a7d8f
A
599 *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING;
600 }
73c04bcf
A
601 /* State whether the canonical converter name contains an option.
602 This information is contained in this list in order to maintain backward & forward compatibility. */
603 if (containsOption) {
604 UBool containsCnvOptionInfo = (UBool)gMainTable.optionTable->containsCnvOptionInfo;
605 *containsOption = (UBool)((containsCnvOptionInfo
606 && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0))
607 || !containsCnvOptionInfo);
608 }
609 return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK;
b75a7d8f
A
610 }
611 }
612
613 return UINT32_MAX;
614}
615
616/*
617 * Is this alias in this list?
618 * alias and listOffset should be non-NULL.
619 */
4388f060 620static inline UBool
b75a7d8f
A
621isAliasInList(const char *alias, uint32_t listOffset) {
622 if (listOffset) {
623 uint32_t currAlias;
73c04bcf 624 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
b75a7d8f 625 /* +1 to skip listCount */
73c04bcf 626 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
b75a7d8f
A
627 for (currAlias = 0; currAlias < listCount; currAlias++) {
628 if (currList[currAlias]
629 && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0)
630 {
631 return TRUE;
632 }
633 }
634 }
635 return FALSE;
636}
637
638/*
639 * Search for an standard name of an alias (what is the default name
640 * that this standard uses?)
641 * return the listOffset for gTaggedAliasLists. If it's 0,
642 * the it couldn't be found, but the parameters are valid.
643 */
644static uint32_t
645findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) {
646 uint32_t idx;
647 uint32_t listOffset;
648 uint32_t convNum;
649 UErrorCode myErr = U_ZERO_ERROR;
650 uint32_t tagNum = getTagNumber(standard);
651
652 /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
73c04bcf 653 convNum = findConverter(alias, NULL, &myErr);
b75a7d8f
A
654 if (myErr != U_ZERO_ERROR) {
655 *pErrorCode = myErr;
656 }
657
73c04bcf
A
658 if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
659 listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
660 if (listOffset && gMainTable.taggedAliasLists[listOffset + 1]) {
b75a7d8f
A
661 return listOffset;
662 }
663 if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
664 /* Uh Oh! They used an ambiguous alias.
665 We have to search the whole swiss cheese starting
666 at the highest standard affinity.
667 This may take a while.
668 */
73c04bcf
A
669 for (idx = 0; idx < gMainTable.taggedAliasArraySize; idx++) {
670 listOffset = gMainTable.taggedAliasArray[idx];
b75a7d8f 671 if (listOffset && isAliasInList(alias, listOffset)) {
73c04bcf
A
672 uint32_t currTagNum = idx/gMainTable.converterListSize;
673 uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize);
674 uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum];
675 if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + 1]) {
b75a7d8f
A
676 return tempListOffset;
677 }
678 /* else keep on looking */
679 /* We could speed this up by starting on the next row
680 because an alias is unique per row, right now.
681 This would change if alias versioning appears. */
682 }
683 }
684 /* The standard doesn't know about the alias */
685 }
686 /* else no default name */
687 return 0;
688 }
689 /* else converter or tag not found */
690
691 return UINT32_MAX;
692}
693
694/* Return the canonical name */
695static uint32_t
696findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) {
697 uint32_t idx;
698 uint32_t listOffset;
699 uint32_t convNum;
700 UErrorCode myErr = U_ZERO_ERROR;
701 uint32_t tagNum = getTagNumber(standard);
702
703 /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
73c04bcf 704 convNum = findConverter(alias, NULL, &myErr);
b75a7d8f
A
705 if (myErr != U_ZERO_ERROR) {
706 *pErrorCode = myErr;
707 }
708
73c04bcf
A
709 if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
710 listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
b75a7d8f
A
711 if (listOffset && isAliasInList(alias, listOffset)) {
712 return convNum;
713 }
714 if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
715 /* Uh Oh! They used an ambiguous alias.
716 We have to search one slice of the swiss cheese.
717 We search only in the requested tag, not the whole thing.
718 This may take a while.
719 */
73c04bcf
A
720 uint32_t convStart = (tagNum)*gMainTable.converterListSize;
721 uint32_t convLimit = (tagNum+1)*gMainTable.converterListSize;
b75a7d8f 722 for (idx = convStart; idx < convLimit; idx++) {
73c04bcf 723 listOffset = gMainTable.taggedAliasArray[idx];
b75a7d8f
A
724 if (listOffset && isAliasInList(alias, listOffset)) {
725 return idx-convStart;
726 }
727 }
728 /* The standard doesn't know about the alias */
729 }
730 /* else no canonical name */
731 }
732 /* else converter or tag not found */
733
734 return UINT32_MAX;
735}
736
f3c0d7a5 737U_CAPI const char *
73c04bcf 738ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
4388f060
A
739 const char *aliasTmp = alias;
740 int32_t i = 0;
741 for (i = 0; i < 2; i++) {
742 if (i == 1) {
743 /*
744 * After the first unsuccess converter lookup, check to see if
745 * the name begins with 'x-'. If it does, strip it off and try
746 * again. This behaviour is similar to how ICU4J does it.
747 */
2ca993e8 748 if (aliasTmp[0] == 'x' && aliasTmp[1] == '-') {
4388f060
A
749 aliasTmp = aliasTmp+2;
750 } else {
751 break;
752 }
753 }
754 if(haveAliasData(pErrorCode) && isAlias(aliasTmp, pErrorCode)) {
755 uint32_t convNum = findConverter(aliasTmp, containsOption, pErrorCode);
756 if (convNum < gMainTable.converterListSize) {
757 return GET_STRING(gMainTable.converterList[convNum]);
758 }
759 /* else converter not found */
760 } else {
761 break;
b75a7d8f 762 }
b75a7d8f 763 }
4388f060 764
b75a7d8f
A
765 return NULL;
766}
767
f3c0d7a5
A
768U_CDECL_BEGIN
769
770
b75a7d8f 771static int32_t U_CALLCONV
4388f060 772ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
b75a7d8f
A
773 int32_t value = 0;
774 UAliasContext *myContext = (UAliasContext *)(enumerator->context);
775 uint32_t listOffset = myContext->listOffset;
776
777 if (listOffset) {
73c04bcf 778 value = gMainTable.taggedAliasLists[listOffset];
b75a7d8f
A
779 }
780 return value;
781}
782
f3c0d7a5 783static const char * U_CALLCONV
b75a7d8f
A
784ucnv_io_nextStandardAliases(UEnumeration *enumerator,
785 int32_t* resultLength,
4388f060 786 UErrorCode * /*pErrorCode*/)
b75a7d8f
A
787{
788 UAliasContext *myContext = (UAliasContext *)(enumerator->context);
789 uint32_t listOffset = myContext->listOffset;
790
791 if (listOffset) {
73c04bcf
A
792 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
793 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
b75a7d8f
A
794
795 if (myContext->listIdx < listCount) {
796 const char *myStr = GET_STRING(currList[myContext->listIdx++]);
797 if (resultLength) {
374ca955 798 *resultLength = (int32_t)uprv_strlen(myStr);
b75a7d8f
A
799 }
800 return myStr;
801 }
802 }
803 /* Either we accessed a zero length list, or we enumerated too far. */
73c04bcf
A
804 if (resultLength) {
805 *resultLength = 0;
806 }
b75a7d8f
A
807 return NULL;
808}
809
810static void U_CALLCONV
4388f060 811ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
b75a7d8f
A
812 ((UAliasContext *)(enumerator->context))->listIdx = 0;
813}
814
815static void U_CALLCONV
816ucnv_io_closeUEnumeration(UEnumeration *enumerator) {
817 uprv_free(enumerator->context);
818 uprv_free(enumerator);
819}
820
f3c0d7a5
A
821U_CDECL_END
822
b75a7d8f
A
823/* Enumerate the aliases for the specified converter and standard tag */
824static const UEnumeration gEnumAliases = {
825 NULL,
826 NULL,
827 ucnv_io_closeUEnumeration,
828 ucnv_io_countStandardAliases,
829 uenum_unextDefault,
830 ucnv_io_nextStandardAliases,
831 ucnv_io_resetStandardAliases
832};
833
834U_CAPI UEnumeration * U_EXPORT2
835ucnv_openStandardNames(const char *convName,
836 const char *standard,
837 UErrorCode *pErrorCode)
838{
839 UEnumeration *myEnum = NULL;
840 if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) {
841 uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode);
842
843 /* When listOffset == 0, we want to acknowledge that the
844 converter name and standard are okay, but there
845 is nothing to enumerate. */
73c04bcf 846 if (listOffset < gMainTable.taggedAliasListsSize) {
b75a7d8f
A
847 UAliasContext *myContext;
848
51004dcb 849 myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
b75a7d8f
A
850 if (myEnum == NULL) {
851 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
852 return NULL;
853 }
854 uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration));
51004dcb 855 myContext = static_cast<UAliasContext *>(uprv_malloc(sizeof(UAliasContext)));
b75a7d8f
A
856 if (myContext == NULL) {
857 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
858 uprv_free(myEnum);
859 return NULL;
860 }
861 myContext->listOffset = listOffset;
862 myContext->listIdx = 0;
863 myEnum->context = myContext;
864 }
865 /* else converter or tag not found */
866 }
867 return myEnum;
868}
869
73c04bcf 870static uint16_t
b75a7d8f
A
871ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) {
872 if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
73c04bcf
A
873 uint32_t convNum = findConverter(alias, NULL, pErrorCode);
874 if (convNum < gMainTable.converterListSize) {
b75a7d8f 875 /* tagListNum - 1 is the ALL tag */
73c04bcf 876 int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
b75a7d8f
A
877
878 if (listOffset) {
73c04bcf 879 return gMainTable.taggedAliasLists[listOffset];
b75a7d8f
A
880 }
881 /* else this shouldn't happen. internal program error */
882 }
883 /* else converter not found */
884 }
885 return 0;
886}
887
73c04bcf 888static uint16_t
b75a7d8f
A
889ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) {
890 if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
891 uint32_t currAlias;
73c04bcf
A
892 uint32_t convNum = findConverter(alias, NULL, pErrorCode);
893 if (convNum < gMainTable.converterListSize) {
b75a7d8f 894 /* tagListNum - 1 is the ALL tag */
73c04bcf 895 int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
b75a7d8f
A
896
897 if (listOffset) {
73c04bcf 898 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
b75a7d8f 899 /* +1 to skip listCount */
73c04bcf 900 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
b75a7d8f
A
901
902 for (currAlias = start; currAlias < listCount; currAlias++) {
903 aliases[currAlias] = GET_STRING(currList[currAlias]);
904 }
905 }
906 /* else this shouldn't happen. internal program error */
907 }
908 /* else converter not found */
909 }
910 return 0;
911}
912
73c04bcf 913static const char *
b75a7d8f
A
914ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
915 if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
73c04bcf
A
916 uint32_t convNum = findConverter(alias, NULL, pErrorCode);
917 if (convNum < gMainTable.converterListSize) {
b75a7d8f 918 /* tagListNum - 1 is the ALL tag */
73c04bcf 919 int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
b75a7d8f
A
920
921 if (listOffset) {
73c04bcf 922 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
b75a7d8f 923 /* +1 to skip listCount */
73c04bcf 924 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
b75a7d8f
A
925
926 if (n < listCount) {
927 return GET_STRING(currList[n]);
928 }
929 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
930 }
931 /* else this shouldn't happen. internal program error */
932 }
933 /* else converter not found */
934 }
935 return NULL;
936}
937
73c04bcf 938static uint16_t
b75a7d8f
A
939ucnv_io_countStandards(UErrorCode *pErrorCode) {
940 if (haveAliasData(pErrorCode)) {
941 /* Don't include the empty list */
73c04bcf 942 return (uint16_t)(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS);
b75a7d8f
A
943 }
944
945 return 0;
946}
947
948U_CAPI const char * U_EXPORT2
949ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
950 if (haveAliasData(pErrorCode)) {
73c04bcf
A
951 if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) {
952 return GET_STRING(gMainTable.tagList[n]);
b75a7d8f
A
953 }
954 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
955 }
956
957 return NULL;
958}
959
960U_CAPI const char * U_EXPORT2
961ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
962 if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
963 uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode);
964
73c04bcf
A
965 if (0 < listOffset && listOffset < gMainTable.taggedAliasListsSize) {
966 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
b75a7d8f
A
967
968 /* Get the preferred name from this list */
969 if (currList[0]) {
970 return GET_STRING(currList[0]);
971 }
972 /* else someone screwed up the alias table. */
973 /* *pErrorCode = U_INVALID_FORMAT_ERROR */
974 }
975 }
976
977 return NULL;
978}
979
73c04bcf
A
980U_CAPI uint16_t U_EXPORT2
981ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
982{
983 return ucnv_io_countAliases(alias, pErrorCode);
984}
b75a7d8f 985
b75a7d8f 986
73c04bcf
A
987U_CAPI const char* U_EXPORT2
988ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
989{
990 return ucnv_io_getAlias(alias, n, pErrorCode);
b75a7d8f
A
991}
992
73c04bcf
A
993U_CAPI void U_EXPORT2
994ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
995{
996 ucnv_io_getAliases(alias, 0, aliases, pErrorCode);
b75a7d8f
A
997}
998
73c04bcf
A
999U_CAPI uint16_t U_EXPORT2
1000ucnv_countStandards(void)
1001{
1002 UErrorCode err = U_ZERO_ERROR;
1003 return ucnv_io_countStandards(&err);
b75a7d8f
A
1004}
1005
73c04bcf
A
1006U_CAPI const char * U_EXPORT2
1007ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
1008 if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
1009 uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode);
b75a7d8f 1010
73c04bcf
A
1011 if (convNum < gMainTable.converterListSize) {
1012 return GET_STRING(gMainTable.converterList[convNum]);
b75a7d8f 1013 }
b75a7d8f 1014 }
73c04bcf 1015
b75a7d8f
A
1016 return NULL;
1017}
1018
f3c0d7a5
A
1019U_CDECL_BEGIN
1020
1021
b75a7d8f 1022static int32_t U_CALLCONV
4388f060 1023ucnv_io_countAllConverters(UEnumeration * /*enumerator*/, UErrorCode * /*pErrorCode*/) {
73c04bcf 1024 return gMainTable.converterListSize;
b75a7d8f
A
1025}
1026
f3c0d7a5 1027static const char * U_CALLCONV
b75a7d8f
A
1028ucnv_io_nextAllConverters(UEnumeration *enumerator,
1029 int32_t* resultLength,
4388f060 1030 UErrorCode * /*pErrorCode*/)
b75a7d8f
A
1031{
1032 uint16_t *myContext = (uint16_t *)(enumerator->context);
1033
73c04bcf
A
1034 if (*myContext < gMainTable.converterListSize) {
1035 const char *myStr = GET_STRING(gMainTable.converterList[(*myContext)++]);
b75a7d8f 1036 if (resultLength) {
374ca955 1037 *resultLength = (int32_t)uprv_strlen(myStr);
b75a7d8f
A
1038 }
1039 return myStr;
1040 }
1041 /* Either we accessed a zero length list, or we enumerated too far. */
73c04bcf
A
1042 if (resultLength) {
1043 *resultLength = 0;
1044 }
b75a7d8f
A
1045 return NULL;
1046}
1047
1048static void U_CALLCONV
4388f060 1049ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
b75a7d8f
A
1050 *((uint16_t *)(enumerator->context)) = 0;
1051}
f3c0d7a5 1052U_CDECL_END
b75a7d8f
A
1053static const UEnumeration gEnumAllConverters = {
1054 NULL,
1055 NULL,
1056 ucnv_io_closeUEnumeration,
1057 ucnv_io_countAllConverters,
1058 uenum_unextDefault,
1059 ucnv_io_nextAllConverters,
1060 ucnv_io_resetAllConverters
1061};
1062
1063U_CAPI UEnumeration * U_EXPORT2
1064ucnv_openAllNames(UErrorCode *pErrorCode) {
1065 UEnumeration *myEnum = NULL;
1066 if (haveAliasData(pErrorCode)) {
1067 uint16_t *myContext;
1068
51004dcb 1069 myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
b75a7d8f
A
1070 if (myEnum == NULL) {
1071 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1072 return NULL;
1073 }
1074 uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration));
51004dcb 1075 myContext = static_cast<uint16_t *>(uprv_malloc(sizeof(uint16_t)));
b75a7d8f
A
1076 if (myContext == NULL) {
1077 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1078 uprv_free(myEnum);
1079 return NULL;
1080 }
1081 *myContext = 0;
1082 myEnum->context = myContext;
1083 }
1084 return myEnum;
1085}
1086
f3c0d7a5 1087U_CAPI uint16_t
46f4442e 1088ucnv_io_countKnownConverters(UErrorCode *pErrorCode) {
b75a7d8f 1089 if (haveAliasData(pErrorCode)) {
46f4442e 1090 return (uint16_t)gMainTable.converterListSize;
b75a7d8f
A
1091 }
1092 return 0;
1093}
1094
374ca955
A
1095/* alias table swapping ----------------------------------------------------- */
1096
f3c0d7a5
A
1097U_CDECL_BEGIN
1098
374ca955 1099typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name);
f3c0d7a5
A
1100U_CDECL_END
1101
374ca955
A
1102
1103/*
1104 * row of a temporary array
1105 *
1106 * gets platform-endian charset string indexes and sorting indexes;
1107 * after sorting this array by strings, the actual arrays are permutated
1108 * according to the sorting indexes
1109 */
1110typedef struct TempRow {
1111 uint16_t strIndex, sortIndex;
1112} TempRow;
1113
1114typedef struct TempAliasTable {
1115 const char *chars;
1116 TempRow *rows;
1117 uint16_t *resort;
1118 StripForCompareFn *stripForCompare;
1119} TempAliasTable;
1120
1121enum {
1122 STACK_ROW_CAPACITY=500
1123};
1124
f3c0d7a5 1125static int32_t U_CALLCONV
374ca955
A
1126io_compareRows(const void *context, const void *left, const void *right) {
1127 char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH],
1128 strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH];
1129
1130 TempAliasTable *tempTable=(TempAliasTable *)context;
1131 const char *chars=tempTable->chars;
1132
1133 return (int32_t)uprv_strcmp(tempTable->stripForCompare(strippedLeft, chars+2*((const TempRow *)left)->strIndex),
1134 tempTable->stripForCompare(strippedRight, chars+2*((const TempRow *)right)->strIndex));
1135}
1136
1137U_CAPI int32_t U_EXPORT2
1138ucnv_swapAliases(const UDataSwapper *ds,
1139 const void *inData, int32_t length, void *outData,
1140 UErrorCode *pErrorCode) {
1141 const UDataInfo *pInfo;
1142 int32_t headerSize;
1143
1144 const uint16_t *inTable;
46f4442e 1145 const uint32_t *inSectionSizes;
374ca955
A
1146 uint32_t toc[offsetsCount];
1147 uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */
1148 uint32_t i, count, tocLength, topOffset;
1149
1150 TempRow rows[STACK_ROW_CAPACITY];
1151 uint16_t resort[STACK_ROW_CAPACITY];
1152 TempAliasTable tempTable;
1153
1154 /* udata_swapDataHeader checks the arguments */
1155 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
1156 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1157 return 0;
1158 }
1159
1160 /* check data format and format version */
1161 pInfo=(const UDataInfo *)((const char *)inData+4);
1162 if(!(
1163 pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */
1164 pInfo->dataFormat[1]==0x76 &&
1165 pInfo->dataFormat[2]==0x41 &&
1166 pInfo->dataFormat[3]==0x6c &&
1167 pInfo->formatVersion[0]==3
1168 )) {
1169 udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n",
1170 pInfo->dataFormat[0], pInfo->dataFormat[1],
1171 pInfo->dataFormat[2], pInfo->dataFormat[3],
1172 pInfo->formatVersion[0]);
1173 *pErrorCode=U_UNSUPPORTED_ERROR;
1174 return 0;
1175 }
1176
1177 /* an alias table must contain at least the table of contents array */
1178 if(length>=0 && (length-headerSize)<4*(1+minTocLength)) {
1179 udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1180 length-headerSize);
1181 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1182 return 0;
1183 }
1184
46f4442e
A
1185 inSectionSizes=(const uint32_t *)((const char *)inData+headerSize);
1186 inTable=(const uint16_t *)inSectionSizes;
73c04bcf 1187 uprv_memset(toc, 0, sizeof(toc));
46f4442e 1188 toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]);
73c04bcf
A
1189 if(tocLength<minTocLength || offsetsCount<=tocLength) {
1190 udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength);
374ca955
A
1191 *pErrorCode=U_INVALID_FORMAT_ERROR;
1192 return 0;
1193 }
1194
1195 /* read the known part of the table of contents */
73c04bcf 1196 for(i=converterListIndex; i<=tocLength; ++i) {
46f4442e 1197 toc[i]=ds->readUInt32(inSectionSizes[i]);
374ca955
A
1198 }
1199
1200 /* compute offsets */
73c04bcf 1201 uprv_memset(offsets, 0, sizeof(offsets));
374ca955 1202 offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */
73c04bcf 1203 for(i=tagListIndex; i<=tocLength; ++i) {
374ca955
A
1204 offsets[i]=offsets[i-1]+toc[i-1];
1205 }
1206
1207 /* compute the overall size of the after-header data, in numbers of 16-bit units */
1208 topOffset=offsets[i-1]+toc[i-1];
1209
1210 if(length>=0) {
1211 uint16_t *outTable;
1212 const uint16_t *p, *p2;
1213 uint16_t *q, *q2;
1214 uint16_t oldIndex;
1215
1216 if((length-headerSize)<(2*(int32_t)topOffset)) {
1217 udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1218 length-headerSize);
1219 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1220 return 0;
1221 }
1222
1223 outTable=(uint16_t *)((char *)outData+headerSize);
1224
1225 /* swap the entire table of contents */
1226 ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode);
1227
73c04bcf
A
1228 /* swap unormalized strings & normalized strings */
1229 ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]),
374ca955
A
1230 outTable+offsets[stringTableIndex], pErrorCode);
1231 if(U_FAILURE(*pErrorCode)) {
73c04bcf 1232 udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n");
374ca955
A
1233 return 0;
1234 }
1235
1236 if(ds->inCharset==ds->outCharset) {
1237 /* no need to sort, just swap all 16-bit values together */
1238 ds->swapArray16(ds,
1239 inTable+offsets[converterListIndex],
1240 2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]),
1241 outTable+offsets[converterListIndex],
1242 pErrorCode);
1243 } else {
1244 /* allocate the temporary table for sorting */
1245 count=toc[aliasListIndex];
1246
1247 tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */
1248
1249 if(count<=STACK_ROW_CAPACITY) {
1250 tempTable.rows=rows;
1251 tempTable.resort=resort;
1252 } else {
1253 tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2);
1254 if(tempTable.rows==NULL) {
1255 udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n",
1256 count);
1257 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1258 return 0;
1259 }
1260 tempTable.resort=(uint16_t *)(tempTable.rows+count);
1261 }
1262
1263 if(ds->outCharset==U_ASCII_FAMILY) {
1264 tempTable.stripForCompare=ucnv_io_stripASCIIForCompare;
1265 } else /* U_EBCDIC_FAMILY */ {
1266 tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare;
1267 }
1268
1269 /*
1270 * Sort unique aliases+mapped names.
1271 *
1272 * We need to sort the list again by outCharset strings because they
1273 * sort differently for different charset families.
1274 * First we set up a temporary table with the string indexes and
1275 * sorting indexes and sort that.
1276 * Then we permutate and copy/swap the actual values.
1277 */
1278 p=inTable+offsets[aliasListIndex];
1279 q=outTable+offsets[aliasListIndex];
1280
1281 p2=inTable+offsets[untaggedConvArrayIndex];
1282 q2=outTable+offsets[untaggedConvArrayIndex];
1283
1284 for(i=0; i<count; ++i) {
1285 tempTable.rows[i].strIndex=ds->readUInt16(p[i]);
1286 tempTable.rows[i].sortIndex=(uint16_t)i;
1287 }
1288
1289 uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow),
1290 io_compareRows, &tempTable,
1291 FALSE, pErrorCode);
1292
1293 if(U_SUCCESS(*pErrorCode)) {
1294 /* copy/swap/permutate items */
1295 if(p!=q) {
1296 for(i=0; i<count; ++i) {
1297 oldIndex=tempTable.rows[i].sortIndex;
1298 ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode);
1299 ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode);
1300 }
1301 } else {
1302 /*
1303 * If we swap in-place, then the permutation must use another
1304 * temporary array (tempTable.resort)
1305 * before the results are copied to the outBundle.
1306 */
1307 uint16_t *r=tempTable.resort;
1308
1309 for(i=0; i<count; ++i) {
1310 oldIndex=tempTable.rows[i].sortIndex;
1311 ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode);
1312 }
a62d09fc 1313 uprv_memcpy(q, r, 2*(size_t)count);
374ca955
A
1314
1315 for(i=0; i<count; ++i) {
1316 oldIndex=tempTable.rows[i].sortIndex;
1317 ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode);
1318 }
a62d09fc 1319 uprv_memcpy(q2, r, 2*(size_t)count);
374ca955
A
1320 }
1321 }
1322
1323 if(tempTable.rows!=rows) {
1324 uprv_free(tempTable.rows);
1325 }
1326
1327 if(U_FAILURE(*pErrorCode)) {
73c04bcf
A
1328 udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n",
1329 count);
374ca955
A
1330 return 0;
1331 }
1332
1333 /* swap remaining 16-bit values */
1334 ds->swapArray16(ds,
1335 inTable+offsets[converterListIndex],
1336 2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]),
1337 outTable+offsets[converterListIndex],
1338 pErrorCode);
1339 ds->swapArray16(ds,
1340 inTable+offsets[taggedAliasArrayIndex],
1341 2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]),
1342 outTable+offsets[taggedAliasArrayIndex],
1343 pErrorCode);
1344 }
1345 }
1346
1347 return headerSize+2*(int32_t)topOffset;
1348}
1349
1350#endif
1351
57a6839d 1352
b75a7d8f
A
1353/*
1354 * Hey, Emacs, please set the following:
1355 *
1356 * Local Variables:
1357 * indent-tabs-mode: nil
1358 * End:
1359 *
1360 */