]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | ****************************************************************************** | |
3 | * | |
4 | * Copyright (C) 1999-2004, International Business Machines | |
5 | * Corporation and others. All Rights Reserved. | |
6 | * | |
7 | ****************************************************************************** | |
8 | * | |
9 | * | |
10 | * ucnv_io.c: | |
11 | * initializes global variables and defines functions pertaining to file | |
12 | * access, and name resolution aspect of the library. | |
13 | * | |
14 | * new implementation: | |
15 | * | |
16 | * created on: 1999nov22 | |
17 | * created by: Markus W. Scherer | |
18 | * | |
19 | * Use the binary cnvalias.icu (created from convrtrs.txt) to work | |
20 | * with aliases for converter names. | |
21 | * | |
22 | * Date Name Description | |
23 | * 11/22/1999 markus Created | |
24 | * 06/28/2002 grhoten Major overhaul of the converter alias design. | |
25 | * Now an alias can map to different converters | |
26 | * depending on the specified standard. | |
27 | ******************************************************************************* | |
28 | */ | |
29 | ||
30 | #include "unicode/utypes.h" | |
31 | ||
32 | #if !UCONFIG_NO_CONVERSION | |
33 | ||
34 | #include "unicode/putil.h" | |
35 | #include "unicode/ucnv.h" /* This file implements ucnv_xXXX() APIs */ | |
36 | #include "unicode/udata.h" | |
37 | ||
38 | #include "umutex.h" | |
39 | #include "uarrsort.h" | |
40 | #include "udataswp.h" | |
41 | #include "cstring.h" | |
42 | #include "cmemory.h" | |
43 | #include "ucnv_io.h" | |
44 | #include "uenumimp.h" | |
45 | #include "ucln_cmn.h" | |
46 | ||
47 | /* Format of cnvalias.icu ----------------------------------------------------- | |
48 | * | |
49 | * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt. | |
50 | * This binary form contains several tables. All indexes are to uint16_t | |
51 | * units, and not to the bytes (uint8_t units). Addressing everything on | |
52 | * 16-bit boundaries allows us to store more information with small index | |
53 | * numbers, which are also 16-bit in size. The majority of the table (except | |
54 | * the string table) are 16-bit numbers. | |
55 | * | |
56 | * First there is the size of the Table of Contents (TOC). The TOC | |
57 | * entries contain the size of each section. In order to find the offset | |
58 | * you just need to sum up the previous offsets. | |
59 | * The TOC length and entries are an array of uint32_t values. | |
60 | * The first section after the TOC starts immediately after the TOC. | |
61 | * | |
62 | * 1) This section contains a list of converters. This list contains indexes | |
63 | * into the string table for the converter name. The index of this list is | |
64 | * also used by other sections, which are mentioned later on. | |
65 | * This list is not sorted. | |
66 | * | |
67 | * 2) This section contains a list of tags. This list contains indexes | |
68 | * into the string table for the tag name. The index of this list is | |
69 | * also used by other sections, which are mentioned later on. | |
70 | * This list is in priority order of standards. | |
71 | * | |
72 | * 3) This section contains a list of sorted unique aliases. This | |
73 | * list contains indexes into the string table for the alias name. The | |
74 | * index of this list is also used by other sections, like the 4th section. | |
75 | * The index for the 3rd and 4th section is used to get the | |
76 | * alias -> converter name mapping. Section 3 and 4 form a two column table. | |
77 | * | |
78 | * 4) This section contains a list of mapped converter names. Consider this | |
79 | * as a table that maps the 3rd section to the 1st section. This list contains | |
80 | * indexes into the 1st section. The index of this list is the same index in | |
81 | * the 3rd section. There is also some extra information in the high bits of | |
82 | * each converter index in this table. Currently it's only used to say that | |
83 | * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK | |
84 | * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is | |
85 | * the predigested form of the 5th section so that an alias lookup can be fast. | |
86 | * | |
87 | * 5) This section contains a 2D array with indexes to the 6th section. This | |
88 | * section is the full form of all alias mappings. The column index is the | |
89 | * index into the converter list (column header). The row index is the index | |
90 | * to tag list (row header). This 2D array is the top part a 3D array. The | |
91 | * third dimension is in the 6th section. | |
92 | * | |
93 | * 6) This is blob of variable length arrays. Each array starts with a size, | |
94 | * and is followed by indexes to alias names in the string table. This is | |
95 | * the third dimension to the section 5. No other section should be referencing | |
96 | * this section. | |
97 | * | |
98 | * 7) Reserved at this time (There is no information). This _usually_ has a | |
99 | * size of 0. Future versions may add more information here. | |
100 | * | |
101 | * 8) This is the string table. All strings are indexed on an even address. | |
102 | * There are two reasons for this. First many chip architectures locate strings | |
103 | * faster on even address boundaries. Second, since all indexes are 16-bit | |
104 | * numbers, this string table can be 128KB in size instead of 64KB when we | |
105 | * only have strings starting on an even address. | |
106 | * | |
107 | * | |
108 | * Here is the concept of section 5 and 6. It's a 3D cube. Each tag | |
109 | * has a unique alias among all converters. That same alias can | |
110 | * be mentioned in other standards on different converters, | |
111 | * but only one alias per tag can be unique. | |
112 | * | |
113 | * | |
114 | * Converter Names (Usually in TR22 form) | |
115 | * -------------------------------------------. | |
116 | * T / /| | |
117 | * a / / | | |
118 | * g / / | | |
119 | * s / / | | |
120 | * / / | | |
121 | * ------------------------------------------/ | | |
122 | * A | | | | |
123 | * l | | | | |
124 | * i | | / | |
125 | * a | | / | |
126 | * s | | / | |
127 | * e | | / | |
128 | * s | |/ | |
129 | * ------------------------------------------- | |
130 | * | |
131 | * | |
132 | * | |
133 | * Here is what it really looks like. It's like swiss cheese. | |
134 | * There are holes. Some converters aren't recognized by | |
135 | * a standard, or they are really old converters that the | |
136 | * standard doesn't recognize anymore. | |
137 | * | |
138 | * Converter Names (Usually in TR22 form) | |
139 | * -------------------------------------------. | |
140 | * T /##########################################/| | |
141 | * a / # # /# | |
142 | * g / # ## ## ### # ### ### ### #/ | |
143 | * s / # ##### #### ## ## #/# | |
144 | * / ### # # ## # # # ### # # #/## | |
145 | * ------------------------------------------/# # | |
146 | * A |### # # ## # # # ### # # #|# # | |
147 | * l |# # # # # ## # #|# # | |
148 | * i |# # # # # # #|# | |
149 | * a |# #|# | |
150 | * s | #|# | |
151 | * e | |
152 | * s | |
153 | * | |
154 | */ | |
155 | ||
156 | /** | |
157 | * Used by the UEnumeration API | |
158 | */ | |
159 | typedef struct UAliasContext { | |
160 | uint32_t listOffset; | |
161 | uint32_t listIdx; | |
162 | } UAliasContext; | |
163 | ||
164 | static const char DATA_NAME[] = "cnvalias"; | |
165 | static const char DATA_TYPE[] = "icu"; | |
166 | ||
167 | static UDataMemory *gAliasData=NULL; | |
168 | ||
169 | enum { | |
170 | tocLengthIndex=0, | |
171 | converterListIndex=1, | |
172 | tagListIndex=2, | |
173 | aliasListIndex=3, | |
174 | untaggedConvArrayIndex=4, | |
175 | taggedAliasArrayIndex=5, | |
176 | taggedAliasListsIndex=6, | |
177 | reservedIndex1=7, | |
178 | stringTableIndex=8, | |
179 | minTocLength=8, /* min. tocLength in the file, does not count the tocLengthIndex! */ | |
180 | offsetsCount /* length of the swapper's temporary offsets[] */ | |
181 | }; | |
182 | ||
183 | static const uint16_t *gConverterList = NULL; | |
184 | static const uint16_t *gTagList = NULL; | |
185 | static const uint16_t *gAliasList = NULL; | |
186 | static const uint16_t *gUntaggedConvArray = NULL; | |
187 | static const uint16_t *gTaggedAliasArray = NULL; | |
188 | static const uint16_t *gTaggedAliasLists = NULL; | |
189 | static const uint16_t *gStringTable = NULL; | |
190 | ||
191 | static uint32_t gConverterListSize; | |
192 | static uint32_t gTagListSize; | |
193 | static uint32_t gAliasListSize; | |
194 | static uint32_t gUntaggedConvArraySize; | |
195 | static uint32_t gTaggedAliasArraySize; | |
196 | static uint32_t gTaggedAliasListsSize; | |
197 | static uint32_t gStringTableSize; | |
198 | ||
199 | static const char **gAvailableConverters = NULL; | |
200 | static uint16_t gAvailableConverterCount = 0; | |
201 | ||
202 | static char gDefaultConverterNameBuffer[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; /* +1 for NULL */ | |
203 | static const char *gDefaultConverterName = NULL; | |
204 | ||
205 | #define GET_STRING(idx) (const char *)(gStringTable + (idx)) | |
206 | ||
207 | static UBool U_CALLCONV | |
208 | isAcceptable(void *context, | |
209 | const char *type, const char *name, | |
210 | const UDataInfo *pInfo) { | |
211 | return (UBool)( | |
212 | pInfo->size>=20 && | |
213 | pInfo->isBigEndian==U_IS_BIG_ENDIAN && | |
214 | pInfo->charsetFamily==U_CHARSET_FAMILY && | |
215 | pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */ | |
216 | pInfo->dataFormat[1]==0x76 && | |
217 | pInfo->dataFormat[2]==0x41 && | |
218 | pInfo->dataFormat[3]==0x6c && | |
219 | pInfo->formatVersion[0]==3); | |
220 | } | |
221 | ||
222 | static UBool U_CALLCONV ucnv_io_cleanup(void) | |
223 | { | |
224 | if (gAliasData) { | |
225 | udata_close(gAliasData); | |
226 | gAliasData = NULL; | |
227 | } | |
228 | ||
229 | ucnv_io_flushAvailableConverterCache(); | |
230 | ||
231 | gConverterListSize = 0; | |
232 | gTagListSize = 0; | |
233 | gAliasListSize = 0; | |
234 | gUntaggedConvArraySize = 0; | |
235 | gTaggedAliasArraySize = 0; | |
236 | gTaggedAliasListsSize = 0; | |
237 | gStringTableSize = 0; | |
238 | ||
239 | gConverterList = NULL; | |
240 | gTagList = NULL; | |
241 | gAliasList = NULL; | |
242 | gUntaggedConvArray = NULL; | |
243 | gTaggedAliasArray = NULL; | |
244 | gTaggedAliasLists = NULL; | |
245 | gStringTable = NULL; | |
246 | ||
247 | gDefaultConverterName = NULL; | |
248 | gDefaultConverterNameBuffer[0] = 0; | |
249 | ||
250 | return TRUE; /* Everything was cleaned up */ | |
251 | } | |
252 | ||
253 | static UBool | |
254 | haveAliasData(UErrorCode *pErrorCode) { | |
255 | int haveData; | |
256 | ||
257 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
258 | return FALSE; | |
259 | } | |
260 | ||
261 | umtx_lock(NULL); | |
262 | haveData = (int)(gAliasData==NULL); | |
263 | umtx_unlock(NULL); | |
264 | ||
265 | /* load converter alias data from file if necessary */ | |
266 | if (haveData) { | |
267 | UDataMemory *data = NULL; | |
268 | const uint16_t *table = NULL; | |
269 | uint32_t tableStart; | |
270 | uint32_t currOffset; | |
271 | uint32_t reservedSize1; | |
272 | ||
273 | data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode); | |
274 | if(U_FAILURE(*pErrorCode)) { | |
275 | return FALSE; | |
276 | } | |
277 | ||
278 | table = (const uint16_t *)udata_getMemory(data); | |
279 | ||
280 | tableStart = ((const uint32_t *)(table))[0]; | |
281 | if (tableStart < minTocLength) { | |
282 | *pErrorCode = U_INVALID_FORMAT_ERROR; | |
283 | udata_close(data); | |
284 | return FALSE; | |
285 | } | |
286 | ||
287 | umtx_lock(NULL); | |
288 | if(gAliasData==NULL) { | |
289 | gAliasData = data; | |
290 | data=NULL; | |
291 | ||
292 | gConverterListSize = ((const uint32_t *)(table))[1]; | |
293 | gTagListSize = ((const uint32_t *)(table))[2]; | |
294 | gAliasListSize = ((const uint32_t *)(table))[3]; | |
295 | gUntaggedConvArraySize = ((const uint32_t *)(table))[4]; | |
296 | gTaggedAliasArraySize = ((const uint32_t *)(table))[5]; | |
297 | gTaggedAliasListsSize = ((const uint32_t *)(table))[6]; | |
298 | reservedSize1 = ((const uint32_t *)(table))[7]; /* reserved */ | |
299 | gStringTableSize = ((const uint32_t *)(table))[8]; | |
300 | ||
301 | currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t)); | |
302 | gConverterList = table + currOffset; | |
303 | ||
304 | currOffset += gConverterListSize; | |
305 | gTagList = table + currOffset; | |
306 | ||
307 | currOffset += gTagListSize; | |
308 | gAliasList = table + currOffset; | |
309 | ||
310 | currOffset += gAliasListSize; | |
311 | gUntaggedConvArray = table + currOffset; | |
312 | ||
313 | currOffset += gUntaggedConvArraySize; | |
314 | gTaggedAliasArray = table + currOffset; | |
315 | ||
316 | /* aliasLists is a 1's based array, but it has a padding character */ | |
317 | currOffset += gTaggedAliasArraySize; | |
318 | gTaggedAliasLists = table + currOffset; | |
319 | ||
320 | currOffset += gTaggedAliasListsSize; | |
321 | /* reserved */ | |
322 | ||
323 | currOffset += reservedSize1; | |
324 | gStringTable = table + currOffset; | |
325 | ||
326 | ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup); | |
327 | } | |
328 | umtx_unlock(NULL); | |
329 | ||
330 | /* if a different thread set it first, then close the extra data */ | |
331 | if(data!=NULL) { | |
332 | udata_close(data); /* NULL if it was set correctly */ | |
333 | } | |
334 | } | |
335 | ||
336 | return TRUE; | |
337 | } | |
338 | ||
339 | static U_INLINE UBool | |
340 | isAlias(const char *alias, UErrorCode *pErrorCode) { | |
341 | if(alias==NULL) { | |
342 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
343 | return FALSE; | |
344 | } else if(*alias==0) { | |
345 | return FALSE; | |
346 | } else { | |
347 | return TRUE; | |
348 | } | |
349 | } | |
350 | ||
351 | static uint32_t getTagNumber(const char *tagname) { | |
352 | if (gTagList) { | |
353 | uint32_t tagNum; | |
354 | for (tagNum = 0; tagNum < gTagListSize; tagNum++) { | |
355 | if (!uprv_stricmp(GET_STRING(gTagList[tagNum]), tagname)) { | |
356 | return tagNum; | |
357 | } | |
358 | } | |
359 | } | |
360 | ||
361 | return UINT32_MAX; | |
362 | } | |
363 | ||
364 | /* @see ucnv_compareNames */ | |
365 | U_CFUNC char * U_EXPORT2 | |
366 | ucnv_io_stripASCIIForCompare(char *dst, const char *name) { | |
367 | char c1 = *name; | |
368 | char *dstItr = dst; | |
369 | ||
370 | while (c1) { | |
371 | /* Ignore delimiters '-', '_', and ' ' */ | |
372 | while ((c1 = *name) == 0x2d || c1 == 0x5f || c1 == 0x20) { | |
373 | ++name; | |
374 | } | |
375 | ||
376 | /* lowercase for case-insensitive comparison */ | |
377 | *(dstItr++) = uprv_asciitolower(c1); | |
378 | ++name; | |
379 | } | |
380 | return dst; | |
381 | } | |
382 | ||
383 | U_CFUNC char * U_EXPORT2 | |
384 | ucnv_io_stripEBCDICForCompare(char *dst, const char *name) { | |
385 | char c1 = *name; | |
386 | char *dstItr = dst; | |
387 | ||
388 | while (c1) { | |
389 | /* Ignore delimiters '-', '_', and ' ' */ | |
390 | while ((c1 = *name) == 0x60 || c1 == 0x6d || c1 == 0x40) { | |
391 | ++name; | |
392 | } | |
393 | ||
394 | /* lowercase for case-insensitive comparison */ | |
395 | *(dstItr++) = uprv_ebcdictolower(c1); | |
396 | ++name; | |
397 | } | |
398 | return dst; | |
399 | } | |
400 | ||
401 | /** | |
402 | * Do a fuzzy compare of a two converter/alias names. The comparison | |
403 | * is case-insensitive. It also ignores the characters '-', '_', and | |
404 | * ' ' (dash, underscore, and space). Thus the strings "UTF-8", | |
405 | * "utf_8", and "Utf 8" are exactly equivalent. | |
406 | * | |
407 | * This is a symmetrical (commutative) operation; order of arguments | |
408 | * is insignificant. This is an important property for sorting the | |
409 | * list (when the list is preprocessed into binary form) and for | |
410 | * performing binary searches on it at run time. | |
411 | * | |
412 | * @param name1 a converter name or alias, zero-terminated | |
413 | * @param name2 a converter name or alias, zero-terminated | |
414 | * @return 0 if the names match, or a negative value if the name1 | |
415 | * lexically precedes name2, or a positive value if the name1 | |
416 | * lexically follows name2. | |
417 | * | |
418 | * @see ucnv_io_stripForCompare | |
419 | */ | |
420 | U_CAPI int U_EXPORT2 | |
421 | ucnv_compareNames(const char *name1, const char *name2) { | |
422 | int rc; | |
423 | char c1, c2; | |
424 | ||
425 | for (;;) { | |
426 | /* Ignore delimiters '-', '_', and ' ' */ | |
427 | while ((c1 = *name1) == '-' || c1 == '_' || c1 == ' ') { | |
428 | ++name1; | |
429 | } | |
430 | while ((c2 = *name2) == '-' || c2 == '_' || c2 == ' ') { | |
431 | ++name2; | |
432 | } | |
433 | ||
434 | /* If we reach the ends of both strings then they match */ | |
435 | if ((c1|c2)==0) { | |
436 | return 0; | |
437 | } | |
438 | ||
439 | /* Case-insensitive comparison */ | |
440 | rc = (int)(unsigned char)uprv_tolower(c1) - | |
441 | (int)(unsigned char)uprv_tolower(c2); | |
442 | if (rc != 0) { | |
443 | return rc; | |
444 | } | |
445 | ++name1; | |
446 | ++name2; | |
447 | } | |
448 | } | |
449 | ||
450 | /* | |
451 | * search for an alias | |
452 | * return the converter number index for gConverterList | |
453 | */ | |
454 | static U_INLINE uint32_t | |
455 | findConverter(const char *alias, UErrorCode *pErrorCode) { | |
456 | uint32_t mid, start, limit; | |
457 | uint32_t lastMid; | |
458 | int result; | |
459 | ||
460 | /* do a binary search for the alias */ | |
461 | start = 0; | |
462 | limit = gUntaggedConvArraySize; | |
463 | mid = limit; | |
464 | lastMid = UINT32_MAX; | |
465 | ||
466 | for (;;) { | |
467 | mid = (uint32_t)((start + limit) / 2); | |
468 | if (lastMid == mid) { /* Have we moved? */ | |
469 | break; /* We haven't moved, and it wasn't found. */ | |
470 | } | |
471 | lastMid = mid; | |
472 | result = ucnv_compareNames(alias, GET_STRING(gAliasList[mid])); | |
473 | ||
474 | if (result < 0) { | |
475 | limit = mid; | |
476 | } else if (result > 0) { | |
477 | start = mid; | |
478 | } else { | |
479 | /* Since the gencnval tool folds duplicates into one entry, | |
480 | * this alias in gAliasList is unique, but different standards | |
481 | * may map an alias to different converters. | |
482 | */ | |
483 | if (gUntaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) { | |
484 | *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING; | |
485 | } | |
486 | return gUntaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK; | |
487 | } | |
488 | } | |
489 | ||
490 | return UINT32_MAX; | |
491 | } | |
492 | ||
493 | /* | |
494 | * Is this alias in this list? | |
495 | * alias and listOffset should be non-NULL. | |
496 | */ | |
497 | static U_INLINE UBool | |
498 | isAliasInList(const char *alias, uint32_t listOffset) { | |
499 | if (listOffset) { | |
500 | uint32_t currAlias; | |
501 | uint32_t listCount = gTaggedAliasLists[listOffset]; | |
502 | /* +1 to skip listCount */ | |
503 | const uint16_t *currList = gTaggedAliasLists + listOffset + 1; | |
504 | for (currAlias = 0; currAlias < listCount; currAlias++) { | |
505 | if (currList[currAlias] | |
506 | && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0) | |
507 | { | |
508 | return TRUE; | |
509 | } | |
510 | } | |
511 | } | |
512 | return FALSE; | |
513 | } | |
514 | ||
515 | /* | |
516 | * Search for an standard name of an alias (what is the default name | |
517 | * that this standard uses?) | |
518 | * return the listOffset for gTaggedAliasLists. If it's 0, | |
519 | * the it couldn't be found, but the parameters are valid. | |
520 | */ | |
521 | static uint32_t | |
522 | findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) { | |
523 | uint32_t idx; | |
524 | uint32_t listOffset; | |
525 | uint32_t convNum; | |
526 | UErrorCode myErr = U_ZERO_ERROR; | |
527 | uint32_t tagNum = getTagNumber(standard); | |
528 | ||
529 | /* Make a quick guess. Hopefully they used a TR22 canonical alias. */ | |
530 | convNum = findConverter(alias, &myErr); | |
531 | if (myErr != U_ZERO_ERROR) { | |
532 | *pErrorCode = myErr; | |
533 | } | |
534 | ||
535 | if (tagNum < (gTagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gConverterListSize) { | |
536 | listOffset = gTaggedAliasArray[tagNum*gConverterListSize + convNum]; | |
537 | if (listOffset && gTaggedAliasLists[listOffset + 1]) { | |
538 | return listOffset; | |
539 | } | |
540 | if (myErr == U_AMBIGUOUS_ALIAS_WARNING) { | |
541 | /* Uh Oh! They used an ambiguous alias. | |
542 | We have to search the whole swiss cheese starting | |
543 | at the highest standard affinity. | |
544 | This may take a while. | |
545 | */ | |
546 | for (idx = 0; idx < gTaggedAliasArraySize; idx++) { | |
547 | listOffset = gTaggedAliasArray[idx]; | |
548 | if (listOffset && isAliasInList(alias, listOffset)) { | |
549 | uint32_t currTagNum = idx/gConverterListSize; | |
550 | uint32_t currConvNum = (idx - currTagNum*gConverterListSize); | |
551 | uint32_t tempListOffset = gTaggedAliasArray[tagNum*gConverterListSize + currConvNum]; | |
552 | if (tempListOffset && gTaggedAliasLists[tempListOffset + 1]) { | |
553 | return tempListOffset; | |
554 | } | |
555 | /* else keep on looking */ | |
556 | /* We could speed this up by starting on the next row | |
557 | because an alias is unique per row, right now. | |
558 | This would change if alias versioning appears. */ | |
559 | } | |
560 | } | |
561 | /* The standard doesn't know about the alias */ | |
562 | } | |
563 | /* else no default name */ | |
564 | return 0; | |
565 | } | |
566 | /* else converter or tag not found */ | |
567 | ||
568 | return UINT32_MAX; | |
569 | } | |
570 | ||
571 | /* Return the canonical name */ | |
572 | static uint32_t | |
573 | findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) { | |
574 | uint32_t idx; | |
575 | uint32_t listOffset; | |
576 | uint32_t convNum; | |
577 | UErrorCode myErr = U_ZERO_ERROR; | |
578 | uint32_t tagNum = getTagNumber(standard); | |
579 | ||
580 | /* Make a quick guess. Hopefully they used a TR22 canonical alias. */ | |
581 | convNum = findConverter(alias, &myErr); | |
582 | if (myErr != U_ZERO_ERROR) { | |
583 | *pErrorCode = myErr; | |
584 | } | |
585 | ||
586 | if (tagNum < (gTagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gConverterListSize) { | |
587 | listOffset = gTaggedAliasArray[tagNum*gConverterListSize + convNum]; | |
588 | if (listOffset && isAliasInList(alias, listOffset)) { | |
589 | return convNum; | |
590 | } | |
591 | if (myErr == U_AMBIGUOUS_ALIAS_WARNING) { | |
592 | /* Uh Oh! They used an ambiguous alias. | |
593 | We have to search one slice of the swiss cheese. | |
594 | We search only in the requested tag, not the whole thing. | |
595 | This may take a while. | |
596 | */ | |
597 | uint32_t convStart = (tagNum)*gConverterListSize; | |
598 | uint32_t convLimit = (tagNum+1)*gConverterListSize; | |
599 | for (idx = convStart; idx < convLimit; idx++) { | |
600 | listOffset = gTaggedAliasArray[idx]; | |
601 | if (listOffset && isAliasInList(alias, listOffset)) { | |
602 | return idx-convStart; | |
603 | } | |
604 | } | |
605 | /* The standard doesn't know about the alias */ | |
606 | } | |
607 | /* else no canonical name */ | |
608 | } | |
609 | /* else converter or tag not found */ | |
610 | ||
611 | return UINT32_MAX; | |
612 | } | |
613 | ||
614 | ||
615 | ||
616 | U_CFUNC const char * | |
617 | ucnv_io_getConverterName(const char *alias, UErrorCode *pErrorCode) { | |
618 | if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { | |
619 | uint32_t convNum = findConverter(alias, pErrorCode); | |
620 | if (convNum < gConverterListSize) { | |
621 | return GET_STRING(gConverterList[convNum]); | |
622 | } | |
623 | /* else converter not found */ | |
624 | } | |
625 | return NULL; | |
626 | } | |
627 | ||
628 | static int32_t U_CALLCONV | |
629 | ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode *pErrorCode) { | |
630 | int32_t value = 0; | |
631 | UAliasContext *myContext = (UAliasContext *)(enumerator->context); | |
632 | uint32_t listOffset = myContext->listOffset; | |
633 | ||
634 | if (listOffset) { | |
635 | value = gTaggedAliasLists[listOffset]; | |
636 | } | |
637 | return value; | |
638 | } | |
639 | ||
640 | static const char* U_CALLCONV | |
641 | ucnv_io_nextStandardAliases(UEnumeration *enumerator, | |
642 | int32_t* resultLength, | |
643 | UErrorCode *pErrorCode) | |
644 | { | |
645 | UAliasContext *myContext = (UAliasContext *)(enumerator->context); | |
646 | uint32_t listOffset = myContext->listOffset; | |
647 | ||
648 | if (listOffset) { | |
649 | uint32_t listCount = gTaggedAliasLists[listOffset]; | |
650 | const uint16_t *currList = gTaggedAliasLists + listOffset + 1; | |
651 | ||
652 | if (myContext->listIdx < listCount) { | |
653 | const char *myStr = GET_STRING(currList[myContext->listIdx++]); | |
654 | if (resultLength) { | |
655 | *resultLength = (int32_t)uprv_strlen(myStr); | |
656 | } | |
657 | return myStr; | |
658 | } | |
659 | } | |
660 | /* Either we accessed a zero length list, or we enumerated too far. */ | |
661 | *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; | |
662 | return NULL; | |
663 | } | |
664 | ||
665 | static void U_CALLCONV | |
666 | ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode *pErrorCode) { | |
667 | ((UAliasContext *)(enumerator->context))->listIdx = 0; | |
668 | } | |
669 | ||
670 | static void U_CALLCONV | |
671 | ucnv_io_closeUEnumeration(UEnumeration *enumerator) { | |
672 | uprv_free(enumerator->context); | |
673 | uprv_free(enumerator); | |
674 | } | |
675 | ||
676 | /* Enumerate the aliases for the specified converter and standard tag */ | |
677 | static const UEnumeration gEnumAliases = { | |
678 | NULL, | |
679 | NULL, | |
680 | ucnv_io_closeUEnumeration, | |
681 | ucnv_io_countStandardAliases, | |
682 | uenum_unextDefault, | |
683 | ucnv_io_nextStandardAliases, | |
684 | ucnv_io_resetStandardAliases | |
685 | }; | |
686 | ||
687 | U_CAPI UEnumeration * U_EXPORT2 | |
688 | ucnv_openStandardNames(const char *convName, | |
689 | const char *standard, | |
690 | UErrorCode *pErrorCode) | |
691 | { | |
692 | UEnumeration *myEnum = NULL; | |
693 | if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) { | |
694 | uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode); | |
695 | ||
696 | /* When listOffset == 0, we want to acknowledge that the | |
697 | converter name and standard are okay, but there | |
698 | is nothing to enumerate. */ | |
699 | if (listOffset < gTaggedAliasListsSize) { | |
700 | UAliasContext *myContext; | |
701 | ||
702 | myEnum = uprv_malloc(sizeof(UEnumeration)); | |
703 | if (myEnum == NULL) { | |
704 | *pErrorCode = U_MEMORY_ALLOCATION_ERROR; | |
705 | return NULL; | |
706 | } | |
707 | uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration)); | |
708 | myContext = uprv_malloc(sizeof(UAliasContext)); | |
709 | if (myContext == NULL) { | |
710 | *pErrorCode = U_MEMORY_ALLOCATION_ERROR; | |
711 | uprv_free(myEnum); | |
712 | return NULL; | |
713 | } | |
714 | myContext->listOffset = listOffset; | |
715 | myContext->listIdx = 0; | |
716 | myEnum->context = myContext; | |
717 | } | |
718 | /* else converter or tag not found */ | |
719 | } | |
720 | return myEnum; | |
721 | } | |
722 | ||
723 | U_CFUNC uint16_t | |
724 | ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) { | |
725 | if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { | |
726 | uint32_t convNum = findConverter(alias, pErrorCode); | |
727 | if (convNum < gConverterListSize) { | |
728 | /* tagListNum - 1 is the ALL tag */ | |
729 | int32_t listOffset = gTaggedAliasArray[(gTagListSize - 1)*gConverterListSize + convNum]; | |
730 | ||
731 | if (listOffset) { | |
732 | return gTaggedAliasLists[listOffset]; | |
733 | } | |
734 | /* else this shouldn't happen. internal program error */ | |
735 | } | |
736 | /* else converter not found */ | |
737 | } | |
738 | return 0; | |
739 | } | |
740 | ||
741 | U_CFUNC uint16_t | |
742 | ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) { | |
743 | if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { | |
744 | uint32_t currAlias; | |
745 | uint32_t convNum = findConverter(alias, pErrorCode); | |
746 | if (convNum < gConverterListSize) { | |
747 | /* tagListNum - 1 is the ALL tag */ | |
748 | int32_t listOffset = gTaggedAliasArray[(gTagListSize - 1)*gConverterListSize + convNum]; | |
749 | ||
750 | if (listOffset) { | |
751 | uint32_t listCount = gTaggedAliasLists[listOffset]; | |
752 | /* +1 to skip listCount */ | |
753 | const uint16_t *currList = gTaggedAliasLists + listOffset + 1; | |
754 | ||
755 | for (currAlias = start; currAlias < listCount; currAlias++) { | |
756 | aliases[currAlias] = GET_STRING(currList[currAlias]); | |
757 | } | |
758 | } | |
759 | /* else this shouldn't happen. internal program error */ | |
760 | } | |
761 | /* else converter not found */ | |
762 | } | |
763 | return 0; | |
764 | } | |
765 | ||
766 | U_CFUNC const char * | |
767 | ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) { | |
768 | if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { | |
769 | uint32_t convNum = findConverter(alias, pErrorCode); | |
770 | if (convNum < gConverterListSize) { | |
771 | /* tagListNum - 1 is the ALL tag */ | |
772 | int32_t listOffset = gTaggedAliasArray[(gTagListSize - 1)*gConverterListSize + convNum]; | |
773 | ||
774 | if (listOffset) { | |
775 | uint32_t listCount = gTaggedAliasLists[listOffset]; | |
776 | /* +1 to skip listCount */ | |
777 | const uint16_t *currList = gTaggedAliasLists + listOffset + 1; | |
778 | ||
779 | if (n < listCount) { | |
780 | return GET_STRING(currList[n]); | |
781 | } | |
782 | *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; | |
783 | } | |
784 | /* else this shouldn't happen. internal program error */ | |
785 | } | |
786 | /* else converter not found */ | |
787 | } | |
788 | return NULL; | |
789 | } | |
790 | ||
791 | U_CFUNC uint16_t | |
792 | ucnv_io_countStandards(UErrorCode *pErrorCode) { | |
793 | if (haveAliasData(pErrorCode)) { | |
794 | /* Don't include the empty list */ | |
795 | return (uint16_t)(gTagListSize - UCNV_NUM_HIDDEN_TAGS); | |
796 | } | |
797 | ||
798 | return 0; | |
799 | } | |
800 | ||
801 | U_CAPI const char * U_EXPORT2 | |
802 | ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) { | |
803 | if (haveAliasData(pErrorCode)) { | |
804 | if (n < gTagListSize - UCNV_NUM_HIDDEN_TAGS) { | |
805 | return GET_STRING(gTagList[n]); | |
806 | } | |
807 | *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; | |
808 | } | |
809 | ||
810 | return NULL; | |
811 | } | |
812 | ||
813 | U_CAPI const char * U_EXPORT2 | |
814 | ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) { | |
815 | if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { | |
816 | uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode); | |
817 | ||
818 | if (0 < listOffset && listOffset < gTaggedAliasListsSize) { | |
819 | const uint16_t *currList = gTaggedAliasLists + listOffset + 1; | |
820 | ||
821 | /* Get the preferred name from this list */ | |
822 | if (currList[0]) { | |
823 | return GET_STRING(currList[0]); | |
824 | } | |
825 | /* else someone screwed up the alias table. */ | |
826 | /* *pErrorCode = U_INVALID_FORMAT_ERROR */ | |
827 | } | |
828 | } | |
829 | ||
830 | return NULL; | |
831 | } | |
832 | ||
833 | U_CAPI const char * U_EXPORT2 | |
834 | ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) { | |
835 | if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { | |
836 | uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode); | |
837 | ||
838 | if (convNum < gConverterListSize) { | |
839 | return GET_STRING(gConverterList[convNum]); | |
840 | } | |
841 | } | |
842 | ||
843 | return NULL; | |
844 | } | |
845 | ||
846 | void | |
847 | ucnv_io_flushAvailableConverterCache() { | |
848 | if (gAvailableConverters) { | |
849 | umtx_lock(NULL); | |
850 | gAvailableConverterCount = 0; | |
851 | uprv_free((char **)gAvailableConverters); | |
852 | gAvailableConverters = NULL; | |
853 | umtx_unlock(NULL); | |
854 | } | |
855 | } | |
856 | ||
857 | static UBool haveAvailableConverterList(UErrorCode *pErrorCode) { | |
858 | if (gAvailableConverters == NULL) { | |
859 | uint16_t idx; | |
860 | uint16_t localConverterCount; | |
861 | UErrorCode status; | |
862 | const char *converterName; | |
863 | const char **localConverterList; | |
864 | ||
865 | if (!haveAliasData(pErrorCode)) { | |
866 | return FALSE; | |
867 | } | |
868 | ||
869 | /* We can't have more than "*converterTable" converters to open */ | |
870 | localConverterList = (const char **) uprv_malloc(gConverterListSize * sizeof(char*)); | |
871 | if (!localConverterList) { | |
872 | *pErrorCode = U_MEMORY_ALLOCATION_ERROR; | |
873 | return FALSE; | |
874 | } | |
875 | ||
876 | localConverterCount = 0; | |
877 | ||
878 | for (idx = 0; idx < gConverterListSize; idx++) { | |
879 | status = U_ZERO_ERROR; | |
880 | converterName = GET_STRING(gConverterList[idx]); | |
881 | ucnv_close(ucnv_open(converterName, &status)); | |
882 | if (U_SUCCESS(status)) { | |
883 | localConverterList[localConverterCount++] = converterName; | |
884 | } | |
885 | } | |
886 | ||
887 | umtx_lock(NULL); | |
888 | if (gAvailableConverters == NULL) { | |
889 | gAvailableConverters = localConverterList; | |
890 | gAvailableConverterCount = localConverterCount; | |
891 | /* haveData should have already registered the cleanup function */ | |
892 | } | |
893 | else { | |
894 | uprv_free((char **)localConverterList); | |
895 | } | |
896 | umtx_unlock(NULL); | |
897 | } | |
898 | return TRUE; | |
899 | } | |
900 | ||
901 | U_CFUNC uint16_t | |
902 | ucnv_io_countAvailableConverters(UErrorCode *pErrorCode) { | |
903 | if (haveAvailableConverterList(pErrorCode)) { | |
904 | return gAvailableConverterCount; | |
905 | } | |
906 | return 0; | |
907 | } | |
908 | ||
909 | U_CFUNC const char * | |
910 | ucnv_io_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) { | |
911 | if (haveAvailableConverterList(pErrorCode)) { | |
912 | if (n < gAvailableConverterCount) { | |
913 | return gAvailableConverters[n]; | |
914 | } | |
915 | *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; | |
916 | } | |
917 | return NULL; | |
918 | } | |
919 | ||
920 | static int32_t U_CALLCONV | |
921 | ucnv_io_countAllConverters(UEnumeration *enumerator, UErrorCode *pErrorCode) { | |
922 | return gConverterListSize; | |
923 | } | |
924 | ||
925 | static const char* U_CALLCONV | |
926 | ucnv_io_nextAllConverters(UEnumeration *enumerator, | |
927 | int32_t* resultLength, | |
928 | UErrorCode *pErrorCode) | |
929 | { | |
930 | uint16_t *myContext = (uint16_t *)(enumerator->context); | |
931 | ||
932 | if (*myContext < gConverterListSize) { | |
933 | const char *myStr = GET_STRING(gConverterList[(*myContext)++]); | |
934 | if (resultLength) { | |
935 | *resultLength = (int32_t)uprv_strlen(myStr); | |
936 | } | |
937 | return myStr; | |
938 | } | |
939 | /* Either we accessed a zero length list, or we enumerated too far. */ | |
940 | *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; | |
941 | return NULL; | |
942 | } | |
943 | ||
944 | static void U_CALLCONV | |
945 | ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode *pErrorCode) { | |
946 | *((uint16_t *)(enumerator->context)) = 0; | |
947 | } | |
948 | ||
949 | static const UEnumeration gEnumAllConverters = { | |
950 | NULL, | |
951 | NULL, | |
952 | ucnv_io_closeUEnumeration, | |
953 | ucnv_io_countAllConverters, | |
954 | uenum_unextDefault, | |
955 | ucnv_io_nextAllConverters, | |
956 | ucnv_io_resetAllConverters | |
957 | }; | |
958 | ||
959 | U_CAPI UEnumeration * U_EXPORT2 | |
960 | ucnv_openAllNames(UErrorCode *pErrorCode) { | |
961 | UEnumeration *myEnum = NULL; | |
962 | if (haveAliasData(pErrorCode)) { | |
963 | uint16_t *myContext; | |
964 | ||
965 | myEnum = uprv_malloc(sizeof(UEnumeration)); | |
966 | if (myEnum == NULL) { | |
967 | *pErrorCode = U_MEMORY_ALLOCATION_ERROR; | |
968 | return NULL; | |
969 | } | |
970 | uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration)); | |
971 | myContext = uprv_malloc(sizeof(uint16_t)); | |
972 | if (myContext == NULL) { | |
973 | *pErrorCode = U_MEMORY_ALLOCATION_ERROR; | |
974 | uprv_free(myEnum); | |
975 | return NULL; | |
976 | } | |
977 | *myContext = 0; | |
978 | myEnum->context = myContext; | |
979 | } | |
980 | return myEnum; | |
981 | } | |
982 | ||
983 | U_CFUNC uint16_t | |
984 | ucnv_io_countAvailableAliases(UErrorCode *pErrorCode) { | |
985 | if (haveAliasData(pErrorCode)) { | |
986 | return (uint16_t)gAliasListSize; | |
987 | } | |
988 | return 0; | |
989 | } | |
990 | ||
991 | /* default converter name --------------------------------------------------- */ | |
992 | ||
993 | /* | |
994 | * In order to be really thread-safe, the get function would have to take | |
995 | * a buffer parameter and copy the current string inside a mutex block. | |
996 | * This implementation only tries to be really thread-safe while | |
997 | * setting the name. | |
998 | * It assumes that setting a pointer is atomic. | |
999 | */ | |
1000 | ||
1001 | U_CFUNC const char * | |
1002 | ucnv_io_getDefaultConverterName() { | |
1003 | /* local variable to be thread-safe */ | |
1004 | const char *name; | |
1005 | ||
1006 | umtx_lock(NULL); | |
1007 | name=gDefaultConverterName; | |
1008 | umtx_unlock(NULL); | |
1009 | ||
1010 | if(name==NULL) { | |
1011 | UErrorCode errorCode = U_ZERO_ERROR; | |
1012 | UConverter *cnv = NULL; | |
1013 | int32_t length = 0; | |
1014 | ||
1015 | name = uprv_getDefaultCodepage(); | |
1016 | ||
1017 | /* if the name is there, test it out and get the canonical name with options */ | |
1018 | if(name != NULL) { | |
1019 | cnv = ucnv_open(name, &errorCode); | |
1020 | if(U_SUCCESS(errorCode) && cnv != NULL) { | |
1021 | name = ucnv_getName(cnv, &errorCode); | |
1022 | } | |
1023 | } | |
1024 | ||
1025 | if(name == NULL || name[0] == 0 | |
1026 | || U_FAILURE(errorCode) || cnv == NULL | |
1027 | || length>=sizeof(gDefaultConverterNameBuffer)) | |
1028 | { | |
1029 | /* Panic time, let's use a fallback. */ | |
1030 | #if (U_CHARSET_FAMILY == U_ASCII_FAMILY) | |
1031 | name = "US-ASCII"; | |
1032 | /* there is no 'algorithmic' converter for EBCDIC */ | |
1033 | #elif defined(OS390) | |
1034 | name = "ibm-1047_P100-1995" UCNV_SWAP_LFNL_OPTION_STRING; | |
1035 | #else | |
1036 | name = "ibm-37_P100-1995"; | |
1037 | #endif | |
1038 | } | |
1039 | ||
1040 | length=(int32_t)(uprv_strlen(name)); | |
1041 | ||
1042 | /* Copy the name before we close the converter. */ | |
1043 | umtx_lock(NULL); | |
1044 | uprv_memcpy(gDefaultConverterNameBuffer, name, length); | |
1045 | gDefaultConverterNameBuffer[length]=0; | |
1046 | gDefaultConverterName = gDefaultConverterNameBuffer; | |
1047 | name = gDefaultConverterName; | |
1048 | ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup); | |
1049 | umtx_unlock(NULL); | |
1050 | ||
1051 | /* The close may make the current name go away. */ | |
1052 | ucnv_close(cnv); | |
1053 | } | |
1054 | ||
1055 | return name; | |
1056 | } | |
1057 | ||
1058 | U_CFUNC void | |
1059 | ucnv_io_setDefaultConverterName(const char *converterName) { | |
1060 | if(converterName==NULL) { | |
1061 | /* reset to the default codepage */ | |
1062 | umtx_lock(NULL); | |
1063 | gDefaultConverterName=NULL; | |
1064 | umtx_unlock(NULL); | |
1065 | } else { | |
1066 | UErrorCode errorCode=U_ZERO_ERROR; | |
1067 | const char *name=ucnv_io_getConverterName(converterName, &errorCode); | |
1068 | ||
1069 | umtx_lock(NULL); | |
1070 | ||
1071 | if(U_SUCCESS(errorCode) && name!=NULL) { | |
1072 | gDefaultConverterName=name; | |
1073 | } else { | |
1074 | /* do not set the name if the alias lookup failed and it is too long */ | |
1075 | int32_t length=(int32_t)(uprv_strlen(converterName)); | |
1076 | if(length<sizeof(gDefaultConverterNameBuffer)) { | |
1077 | /* it was not found as an alias, so copy it - accept an empty name */ | |
1078 | uprv_memcpy(gDefaultConverterNameBuffer, converterName, length); | |
1079 | gDefaultConverterNameBuffer[length]=0; | |
1080 | gDefaultConverterName=gDefaultConverterNameBuffer; | |
1081 | } | |
1082 | } | |
1083 | umtx_unlock(NULL); | |
1084 | } | |
1085 | } | |
1086 | ||
1087 | /* alias table swapping ----------------------------------------------------- */ | |
1088 | ||
1089 | typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name); | |
1090 | ||
1091 | /* | |
1092 | * row of a temporary array | |
1093 | * | |
1094 | * gets platform-endian charset string indexes and sorting indexes; | |
1095 | * after sorting this array by strings, the actual arrays are permutated | |
1096 | * according to the sorting indexes | |
1097 | */ | |
1098 | typedef struct TempRow { | |
1099 | uint16_t strIndex, sortIndex; | |
1100 | } TempRow; | |
1101 | ||
1102 | typedef struct TempAliasTable { | |
1103 | const char *chars; | |
1104 | TempRow *rows; | |
1105 | uint16_t *resort; | |
1106 | StripForCompareFn *stripForCompare; | |
1107 | } TempAliasTable; | |
1108 | ||
1109 | enum { | |
1110 | STACK_ROW_CAPACITY=500 | |
1111 | }; | |
1112 | ||
1113 | static int32_t | |
1114 | io_compareRows(const void *context, const void *left, const void *right) { | |
1115 | char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH], | |
1116 | strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH]; | |
1117 | ||
1118 | TempAliasTable *tempTable=(TempAliasTable *)context; | |
1119 | const char *chars=tempTable->chars; | |
1120 | ||
1121 | return (int32_t)uprv_strcmp(tempTable->stripForCompare(strippedLeft, chars+2*((const TempRow *)left)->strIndex), | |
1122 | tempTable->stripForCompare(strippedRight, chars+2*((const TempRow *)right)->strIndex)); | |
1123 | } | |
1124 | ||
1125 | U_CAPI int32_t U_EXPORT2 | |
1126 | ucnv_swapAliases(const UDataSwapper *ds, | |
1127 | const void *inData, int32_t length, void *outData, | |
1128 | UErrorCode *pErrorCode) { | |
1129 | const UDataInfo *pInfo; | |
1130 | int32_t headerSize; | |
1131 | ||
1132 | const uint16_t *inTable; | |
1133 | uint32_t toc[offsetsCount]; | |
1134 | uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */ | |
1135 | uint32_t i, count, tocLength, topOffset; | |
1136 | ||
1137 | TempRow rows[STACK_ROW_CAPACITY]; | |
1138 | uint16_t resort[STACK_ROW_CAPACITY]; | |
1139 | TempAliasTable tempTable; | |
1140 | ||
1141 | /* udata_swapDataHeader checks the arguments */ | |
1142 | headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); | |
1143 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
1144 | return 0; | |
1145 | } | |
1146 | ||
1147 | /* check data format and format version */ | |
1148 | pInfo=(const UDataInfo *)((const char *)inData+4); | |
1149 | if(!( | |
1150 | pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */ | |
1151 | pInfo->dataFormat[1]==0x76 && | |
1152 | pInfo->dataFormat[2]==0x41 && | |
1153 | pInfo->dataFormat[3]==0x6c && | |
1154 | pInfo->formatVersion[0]==3 | |
1155 | )) { | |
1156 | udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n", | |
1157 | pInfo->dataFormat[0], pInfo->dataFormat[1], | |
1158 | pInfo->dataFormat[2], pInfo->dataFormat[3], | |
1159 | pInfo->formatVersion[0]); | |
1160 | *pErrorCode=U_UNSUPPORTED_ERROR; | |
1161 | return 0; | |
1162 | } | |
1163 | ||
1164 | /* an alias table must contain at least the table of contents array */ | |
1165 | if(length>=0 && (length-headerSize)<4*(1+minTocLength)) { | |
1166 | udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n", | |
1167 | length-headerSize); | |
1168 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | |
1169 | return 0; | |
1170 | } | |
1171 | ||
1172 | inTable=(const uint16_t *)((const char *)inData+headerSize); | |
1173 | toc[tocLengthIndex]=tocLength=ds->readUInt32(((const uint32_t *)inTable)[tocLengthIndex]); | |
1174 | if(tocLength<minTocLength) { | |
1175 | udata_printError(ds, "ucnv_swapAliases(): table of contents too short (%u sections)\n", tocLength); | |
1176 | *pErrorCode=U_INVALID_FORMAT_ERROR; | |
1177 | return 0; | |
1178 | } | |
1179 | ||
1180 | /* read the known part of the table of contents */ | |
1181 | for(i=converterListIndex; i<=minTocLength; ++i) { | |
1182 | toc[i]=ds->readUInt32(((const uint32_t *)inTable)[i]); | |
1183 | } | |
1184 | ||
1185 | /* compute offsets */ | |
1186 | offsets[tocLengthIndex]=0; | |
1187 | offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */ | |
1188 | for(i=tagListIndex; i<=stringTableIndex; ++i) { | |
1189 | offsets[i]=offsets[i-1]+toc[i-1]; | |
1190 | } | |
1191 | ||
1192 | /* compute the overall size of the after-header data, in numbers of 16-bit units */ | |
1193 | topOffset=offsets[i-1]+toc[i-1]; | |
1194 | ||
1195 | if(length>=0) { | |
1196 | uint16_t *outTable; | |
1197 | const uint16_t *p, *p2; | |
1198 | uint16_t *q, *q2; | |
1199 | uint16_t oldIndex; | |
1200 | ||
1201 | if((length-headerSize)<(2*(int32_t)topOffset)) { | |
1202 | udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n", | |
1203 | length-headerSize); | |
1204 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | |
1205 | return 0; | |
1206 | } | |
1207 | ||
1208 | outTable=(uint16_t *)((char *)outData+headerSize); | |
1209 | ||
1210 | /* swap the entire table of contents */ | |
1211 | ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode); | |
1212 | ||
1213 | /* swap strings */ | |
1214 | ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)toc[stringTableIndex], | |
1215 | outTable+offsets[stringTableIndex], pErrorCode); | |
1216 | if(U_FAILURE(*pErrorCode)) { | |
1217 | udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed - %s\n", | |
1218 | u_errorName(*pErrorCode)); | |
1219 | return 0; | |
1220 | } | |
1221 | ||
1222 | if(ds->inCharset==ds->outCharset) { | |
1223 | /* no need to sort, just swap all 16-bit values together */ | |
1224 | ds->swapArray16(ds, | |
1225 | inTable+offsets[converterListIndex], | |
1226 | 2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]), | |
1227 | outTable+offsets[converterListIndex], | |
1228 | pErrorCode); | |
1229 | } else { | |
1230 | /* allocate the temporary table for sorting */ | |
1231 | count=toc[aliasListIndex]; | |
1232 | ||
1233 | tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */ | |
1234 | ||
1235 | if(count<=STACK_ROW_CAPACITY) { | |
1236 | tempTable.rows=rows; | |
1237 | tempTable.resort=resort; | |
1238 | } else { | |
1239 | tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2); | |
1240 | if(tempTable.rows==NULL) { | |
1241 | udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n", | |
1242 | count); | |
1243 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
1244 | return 0; | |
1245 | } | |
1246 | tempTable.resort=(uint16_t *)(tempTable.rows+count); | |
1247 | } | |
1248 | ||
1249 | if(ds->outCharset==U_ASCII_FAMILY) { | |
1250 | tempTable.stripForCompare=ucnv_io_stripASCIIForCompare; | |
1251 | } else /* U_EBCDIC_FAMILY */ { | |
1252 | tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare; | |
1253 | } | |
1254 | ||
1255 | /* | |
1256 | * Sort unique aliases+mapped names. | |
1257 | * | |
1258 | * We need to sort the list again by outCharset strings because they | |
1259 | * sort differently for different charset families. | |
1260 | * First we set up a temporary table with the string indexes and | |
1261 | * sorting indexes and sort that. | |
1262 | * Then we permutate and copy/swap the actual values. | |
1263 | */ | |
1264 | p=inTable+offsets[aliasListIndex]; | |
1265 | q=outTable+offsets[aliasListIndex]; | |
1266 | ||
1267 | p2=inTable+offsets[untaggedConvArrayIndex]; | |
1268 | q2=outTable+offsets[untaggedConvArrayIndex]; | |
1269 | ||
1270 | for(i=0; i<count; ++i) { | |
1271 | tempTable.rows[i].strIndex=ds->readUInt16(p[i]); | |
1272 | tempTable.rows[i].sortIndex=(uint16_t)i; | |
1273 | } | |
1274 | ||
1275 | uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow), | |
1276 | io_compareRows, &tempTable, | |
1277 | FALSE, pErrorCode); | |
1278 | ||
1279 | if(U_SUCCESS(*pErrorCode)) { | |
1280 | /* copy/swap/permutate items */ | |
1281 | if(p!=q) { | |
1282 | for(i=0; i<count; ++i) { | |
1283 | oldIndex=tempTable.rows[i].sortIndex; | |
1284 | ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode); | |
1285 | ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode); | |
1286 | } | |
1287 | } else { | |
1288 | /* | |
1289 | * If we swap in-place, then the permutation must use another | |
1290 | * temporary array (tempTable.resort) | |
1291 | * before the results are copied to the outBundle. | |
1292 | */ | |
1293 | uint16_t *r=tempTable.resort; | |
1294 | ||
1295 | for(i=0; i<count; ++i) { | |
1296 | oldIndex=tempTable.rows[i].sortIndex; | |
1297 | ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode); | |
1298 | } | |
1299 | uprv_memcpy(q, r, 2*count); | |
1300 | ||
1301 | for(i=0; i<count; ++i) { | |
1302 | oldIndex=tempTable.rows[i].sortIndex; | |
1303 | ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode); | |
1304 | } | |
1305 | uprv_memcpy(q2, r, 2*count); | |
1306 | } | |
1307 | } | |
1308 | ||
1309 | if(tempTable.rows!=rows) { | |
1310 | uprv_free(tempTable.rows); | |
1311 | } | |
1312 | ||
1313 | if(U_FAILURE(*pErrorCode)) { | |
1314 | udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed - %s\n", | |
1315 | count, u_errorName(*pErrorCode)); | |
1316 | return 0; | |
1317 | } | |
1318 | ||
1319 | /* swap remaining 16-bit values */ | |
1320 | ds->swapArray16(ds, | |
1321 | inTable+offsets[converterListIndex], | |
1322 | 2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]), | |
1323 | outTable+offsets[converterListIndex], | |
1324 | pErrorCode); | |
1325 | ds->swapArray16(ds, | |
1326 | inTable+offsets[taggedAliasArrayIndex], | |
1327 | 2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]), | |
1328 | outTable+offsets[taggedAliasArrayIndex], | |
1329 | pErrorCode); | |
1330 | } | |
1331 | } | |
1332 | ||
1333 | return headerSize+2*(int32_t)topOffset; | |
1334 | } | |
1335 | ||
1336 | #endif | |
1337 | ||
1338 | /* | |
1339 | * Hey, Emacs, please set the following: | |
1340 | * | |
1341 | * Local Variables: | |
1342 | * indent-tabs-mode: nil | |
1343 | * End: | |
1344 | * | |
1345 | */ |