2 *******************************************************************************
4 * Copyright (C) 1999-2003, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: gencnval.c
10 * tab size: 8 (not used)
13 * created on: 1999nov05
14 * created by: Markus W. Scherer
16 * This program reads convrtrs.txt and writes a memory-mappable
17 * converter name alias table to cnvalias.dat .
19 * This program currently writes version 2.1 of the data format. See
20 * ucnv_io.c for more details on the format. Note that version 2.1
21 * is written in such a way that a 2.0 reader will be able to use it,
22 * and a 2.1 reader will be able to read 2.0.
25 #include "unicode/utypes.h"
26 #include "unicode/putil.h"
27 #include "unicode/ucnv.h" /* ucnv_compareNames() */
39 /* TODO: Need to check alias name length is less than UCNV_MAX_CONVERTER_NAME_LENGTH */
41 /* STRING_STORE_SIZE + TAG_STORE_SIZE <= ((2^16 - 1) * 2)
42 That is the maximum size for the string stores combined
43 because the strings are index at 16-bit boundries by a
44 16-bit index, and there is only one section for the
47 #define STRING_STORE_SIZE 0x1FBFE /* 130046 */
48 #define TAG_STORE_SIZE 0x400 /* 1024 */
50 /* The combined tag and converter count can affect the number of lists
51 created. The size of all lists must be less than (2^17 - 1)
52 because the lists are indexed as a 16-bit array with a 16-bit index.
54 #define MAX_TAG_COUNT 0x3F /* 63 */
55 #define MAX_CONV_COUNT UCNV_CONVERTER_INDEX_MASK
56 #define MAX_ALIAS_COUNT 0xFFFF /* 65535 */
58 /* The maximum number of aliases that a standard tag/converter combination can have.
59 At this moment 6/18/2002, IANA has 12 names for ASCII. Don't go below 15 for
60 this value. I don't recommend more than 31 for this value.
62 #define MAX_TC_ALIAS_COUNT 0x1F /* 31 */
64 #define MAX_LINE_SIZE 0x7FFF /* 32767 */
65 #define MAX_LIST_SIZE 0xFFFF /* 65535 */
67 #define DATA_NAME "cnvalias"
68 #define DATA_TYPE "icu" /* ICU alias table */
70 #define ALL_TAG_STR "ALL"
72 #define EMPTY_TAG_NUM 0
74 /* UDataInfo cf. udata.h */
75 static const UDataInfo dataInfo
={
84 {0x43, 0x76, 0x41, 0x6c}, /* dataFormat="CvAl" */
85 {3, 0, 0, 0}, /* formatVersion */
86 {1, 4, 2, 0} /* dataVersion */
95 static char stringStore
[STRING_STORE_SIZE
];
96 static StringBlock stringBlock
= { stringStore
, 0, STRING_STORE_SIZE
};
100 uint16_t *aliases
; /* Index into stringStore */
104 uint16_t converter
; /* Index into stringStore */
105 uint16_t totalAliasCount
; /* Total aliases in this column */
108 static Converter converters
[MAX_CONV_COUNT
];
109 static uint16_t converterCount
=0;
111 static char tagStore
[TAG_STORE_SIZE
];
112 static StringBlock tagBlock
= { tagStore
, 0, TAG_STORE_SIZE
};
115 uint16_t tag
; /* Index into tagStore */
116 uint16_t totalAliasCount
; /* Total aliases in this row */
117 AliasList aliasList
[MAX_CONV_COUNT
];
120 /* Think of this as a 3D array. It's tagCount by converterCount by aliasCount */
121 static Tag tags
[MAX_TAG_COUNT
];
122 static uint16_t tagCount
= 0;
124 /* Used for storing all aliases */
125 static uint16_t knownAliases
[MAX_ALIAS_COUNT
];
126 static uint16_t knownAliasesCount
= 0;
127 /*static uint16_t duplicateKnownAliasesCount = 0;*/
129 /* Used for storing the lists section that point to aliases */
130 static uint16_t aliasLists
[MAX_LIST_SIZE
];
131 static uint16_t aliasListsSize
= 0;
133 /* Were the standard tags declared before the aliases. */
134 static UBool standardTagsUsed
= FALSE
;
135 static UBool verbose
= FALSE
;
136 static int32_t lineNum
= 1;
138 /* prototypes --------------------------------------------------------------- */
141 parseLine(const char *line
);
144 parseFile(FileStream
*in
);
150 addOfficialTaggedStandards(char *line
, int32_t lineLen
);
153 addAlias(const char *alias
, uint16_t standard
, uint16_t converter
, UBool defaultName
);
156 addConverter(const char *converter
);
159 allocString(StringBlock
*block
, uint32_t length
);
162 addToKnownAliases(const char *alias
);
165 compareAliases(const void *alias1
, const void *alias2
);
168 getTagNumber(const char *tag
, uint16_t tagLen
);
171 addTaggedAlias(uint16_t tag, const char *alias, uint16_t converter);*/
174 writeAliasTable(UNewDataMemory
*out
);
176 /* -------------------------------------------------------------------------- */
178 /* Presumes that you used allocString() */
179 #define GET_ALIAS_STR(index) (stringStore + ((size_t)(index) << 1))
180 #define GET_TAG_STR(index) (tagStore + ((size_t)(index) << 1))
182 /* Presumes that you used allocString() */
183 #define GET_ALIAS_NUM(str) ((uint16_t)((str - stringStore) >> 1))
184 #define GET_TAG_NUM(str) ((uint16_t)((str - tagStore) >> 1))
196 static UOption options
[]={
198 UOPTION_HELP_QUESTION_MARK
,
206 main(int argc
, char* argv
[]) {
211 UErrorCode errorCode
=U_ZERO_ERROR
;
213 U_MAIN_INIT_ARGS(argc
, argv
);
215 /* preset then read command line options */
216 options
[3].value
=options
[4].value
=u_getDataDirectory();
217 argc
=u_parseArgs(argc
, argv
, sizeof(options
)/sizeof(options
[0]), options
);
219 /* error handling, printing usage message */
222 "error in command line argument \"%s\"\n",
225 if(argc
<0 || options
[HELP1
].doesOccur
|| options
[HELP2
].doesOccur
) {
227 "usage: %s [-options] [convrtrs.txt]\n"
228 "\tread convrtrs.txt and create " U_ICUDATA_NAME
"_" DATA_NAME
"." DATA_TYPE
"\n"
230 "\t-h or -? or --help this usage text\n"
231 "\t-v or --verbose prints out extra information about the alias table\n"
232 "\t-c or --copyright include a copyright notice\n"
233 "\t-d or --destdir destination directory, followed by the path\n"
234 "\t-s or --sourcedir source directory, followed by the path\n",
236 return argc
<0 ? U_ILLEGAL_ARGUMENT_ERROR
: U_ZERO_ERROR
;
239 if(options
[VERBOSE
].doesOccur
) {
246 path
=options
[4].value
;
247 if(path
!=NULL
&& *path
!=0) {
248 char *end
= pathBuf
+uprv_strlen(pathBuf
);
249 uprv_strcpy(pathBuf
, path
);
250 if(*(end
-1)!=U_FILE_SEP_CHAR
) {
251 *(end
++)=U_FILE_SEP_CHAR
;
253 uprv_strcpy(end
, "convrtrs.txt");
256 path
= "convrtrs.txt";
260 uprv_memset(stringStore
, 0, sizeof(stringStore
));
261 uprv_memset(tagStore
, 0, sizeof(tagStore
));
262 uprv_memset(converters
, 0, sizeof(converters
));
263 uprv_memset(tags
, 0, sizeof(tags
));
264 uprv_memset(aliasLists
, 0, sizeof(aliasLists
));
265 uprv_memset(knownAliases
, 0, sizeof(aliasLists
));
268 in
=T_FileStream_open(path
, "r");
270 fprintf(stderr
, "gencnval: unable to open input file convrtrs.txt\n");
271 exit(U_FILE_ACCESS_ERROR
);
274 T_FileStream_close(in
);
276 /* create the output file */
277 out
=udata_create(options
[3].value
, DATA_TYPE
, U_ICUDATA_NAME
"_" DATA_NAME
, &dataInfo
,
278 options
[2].doesOccur
? U_COPYRIGHT_STRING
: NULL
, &errorCode
);
279 if(U_FAILURE(errorCode
)) {
280 fprintf(stderr
, "gencnval: unable to open output file - error %s\n", u_errorName(errorCode
));
284 /* write the table of aliases based on a tag/converter name combination */
285 writeAliasTable(out
);
288 udata_finish(out
, &errorCode
);
289 if(U_FAILURE(errorCode
)) {
290 fprintf(stderr
, "gencnval: error finishing output file - %s\n", u_errorName(errorCode
));
298 parseFile(FileStream
*in
) {
299 char line
[MAX_LINE_SIZE
];
300 char lastLine
[MAX_LINE_SIZE
];
301 int32_t lineSize
= 0;
302 int32_t lastLineSize
= 0;
303 UBool validParse
= TRUE
;
307 /* Add the empty tag, which is for untagged aliases */
309 getTagNumber(ALL_TAG_STR
, 3);
310 allocString(&stringBlock
, 1);
312 /* read the list of aliases */
316 /* Read non-empty lines that don't start with a space character. */
317 while (T_FileStream_readLine(in
, lastLine
, MAX_LINE_SIZE
) != NULL
) {
318 lastLineSize
= chomp(lastLine
);
319 if (lineSize
== 0 || (lastLineSize
> 0 && isspace(*lastLine
))) {
320 uprv_strcpy(line
+ lineSize
, lastLine
);
321 lineSize
+= lastLineSize
;
322 } else if (lineSize
> 0) {
329 if (validParse
|| lineSize
> 0) {
330 if (isspace(*line
)) {
331 fprintf(stderr
, "error(line %d): cannot start an alias with a space\n", lineNum
-1);
333 } else if (line
[0] == '{') {
334 if (!standardTagsUsed
&& line
[lineSize
- 1] != '}') {
335 fprintf(stderr
, "error(line %d): alias needs to start with a converter name\n", lineNum
);
338 addOfficialTaggedStandards(line
, lineSize
);
339 standardTagsUsed
= TRUE
;
341 if (standardTagsUsed
) {
345 fprintf(stderr
, "error(line %d): alias table needs to start a list of standard tags\n", lineNum
);
349 /* Was the last line consumed */
350 if (lastLineSize
> 0) {
351 uprv_strcpy(line
, lastLine
);
352 lineSize
= lastLineSize
;
362 /* This works almost like the Perl chomp.
363 It removes the newlines, comments and trailing whitespace (not preceding whitespace).
368 char *lastNonSpace
= line
;
370 /* truncate at a newline or a comment */
371 if(*s
== '\r' || *s
== '\n' || *s
== '#') {
380 if (lastNonSpace
++ > line
) {
384 return (int32_t)(s
- line
);
388 parseLine(const char *line
) {
389 uint16_t pos
=0, start
, limit
, length
, cnv
;
390 char *converter
, *alias
;
392 /* skip leading white space */
393 /* There is no whitespace at the beginning anymore */
394 /* while(line[pos]!=0 && isspace(line[pos])) {
399 /* is there nothing on this line? */
404 /* get the converter name */
406 while(line
[pos
]!=0 && !isspace(line
[pos
])) {
411 /* store the converter name */
412 length
=(uint16_t)(limit
-start
);
413 converter
=allocString(&stringBlock
, length
+1);
414 uprv_memcpy(converter
, line
+start
, length
);
417 /* add the converter to the converter table */
418 cnv
=addConverter(converter
);
420 /* The name itself may be tagged, so let's added it to the aliases list properly */
423 /* get all the real aliases */
426 /* skip white space */
427 while(line
[pos
]!=0 && isspace(line
[pos
])) {
431 /* is there no more alias name on this line? */
436 /* get an alias name */
438 while(line
[pos
]!=0 && line
[pos
]!='{' && !isspace(line
[pos
])) {
443 /* store the alias name */
444 length
=(uint16_t)(limit
-start
);
446 /* add the converter as its own alias to the alias table */
448 addAlias(alias
, ALL_TAG_NUM
, cnv
, TRUE
);
451 alias
=allocString(&stringBlock
, length
+1);
452 uprv_memcpy(alias
, line
+start
, length
);
454 addAlias(alias
, ALL_TAG_NUM
, cnv
, FALSE
);
456 addToKnownAliases(alias
);
458 /* add the alias/converter pair to the alias table */
459 /* addAlias(alias, 0, cnv, FALSE);*/
461 /* skip whitespace */
462 while (line
[pos
] && isspace(line
[pos
])) {
466 /* handle tags if they are present */
467 if (line
[pos
] == '{') {
471 while (line
[pos
] && line
[pos
] != '}' && !isspace( line
[pos
])) {
476 if (start
!= limit
) {
477 /* add the tag to the tag table */
478 uint16_t tag
= getTagNumber(line
+ start
, (uint16_t)(limit
- start
));
479 addAlias(alias
, tag
, cnv
, (UBool
)(line
[limit
-1] == '*'));
482 while (line
[pos
] && isspace(line
[pos
])) {
485 } while (line
[pos
] && line
[pos
] != '}');
487 if (line
[pos
] == '}') {
490 fprintf(stderr
, "error(line %d): Unterminated tag list\n", lineNum
);
491 exit(U_UNMATCHED_BRACES
);
494 addAlias(alias
, EMPTY_TAG_NUM
, cnv
, (UBool
)(tags
[0].aliasList
[cnv
].aliasCount
== 0));
500 getTagNumber(const char *tag
, uint16_t tagLen
) {
503 UBool preferredName
= ((tagLen
> 0) ? (tag
[tagLen
- 1] == '*') : (FALSE
));
505 if (tagCount
>= MAX_TAG_COUNT
) {
506 fprintf(stderr
, "error(line %d): too many tags\n", lineNum
);
507 exit(U_BUFFER_OVERFLOW_ERROR
);
515 for (t
= 0; t
< tagCount
; ++t
) {
516 const char *currTag
= GET_TAG_STR(tags
[t
].tag
);
517 if (uprv_strlen(currTag
) == tagLen
&& !uprv_strnicmp(currTag
, tag
, tagLen
)) {
522 /* we need to add this tag */
523 if (tagCount
>= MAX_TAG_COUNT
) {
524 fprintf(stderr
, "error(line %d): too many tags\n", lineNum
);
525 exit(U_BUFFER_OVERFLOW_ERROR
);
528 /* allocate a new entry in the tag table */
529 atag
= allocString(&tagBlock
, tagLen
+ 1);
530 uprv_memcpy(atag
, tag
, tagLen
);
533 if (standardTagsUsed
) {
534 fprintf(stderr
, "error(line %d): Tag \"%s\" is not declared at the beginning of the alias table.\n",
538 else if (tagLen
> 0 && strcmp(tag
, ALL_TAG_STR
) != 0) {
539 fprintf(stderr
, "warning(line %d): Tag \"%s\" was added to the list of standards because it was not declared at beginning of the alias table.\n",
543 /* add the tag to the tag table */
544 tags
[tagCount
].tag
= GET_TAG_NUM(atag
);
545 /* The aliasList should be set to 0's already */
551 addTaggedAlias(uint16_t tag, const char *alias, uint16_t converter) {
552 tags[tag].aliases[converter] = alias;
557 addOfficialTaggedStandards(char *line
, int32_t lineLen
) {
559 char *tag
= strchr(line
, '{') + 1;
561 static const char WHITESPACE
[] = " \t";
563 if (tagCount
> UCNV_NUM_RESERVED_TAGS
) {
564 fprintf(stderr
, "error(line %d): official tags already added\n", lineNum
);
565 exit(U_BUFFER_OVERFLOW_ERROR
);
567 strchr(tag
, '}')[0] = 0;
569 tag
= strtok(tag
, WHITESPACE
);
570 while (tag
!= NULL
) {
571 /* printf("Adding original tag \"%s\"\n", tag);*/
573 tagSize
= strlen(tag
) + 1;
574 /* allocate a new entry in the tag table */
576 atag
= allocString(&tagBlock
, tagSize
);
577 uprv_memcpy(atag
, tag
, tagSize
);
579 /* add the tag to the tag table */
580 tags
[tagCount
++].tag
= (uint16_t)((atag
- tagStore
) >> 1);
582 /* The aliasList should already be set to 0's */
585 tag
= strtok(NULL
, WHITESPACE
);
590 addToKnownAliases(const char *alias
) {
592 /* strict matching */
593 /* for (idx = 0; idx < knownAliasesCount; idx++) {
594 uint16_t num = GET_ALIAS_NUM(alias);
595 if (knownAliases[idx] != num
596 && uprv_strcmp(alias, GET_ALIAS_STR(knownAliases[idx])) == 0)
598 fprintf(stderr, "warning(line %d): duplicate alias %s and %s found\n",
599 lineNum, alias, GET_ALIAS_STR(knownAliases[idx]));
600 duplicateKnownAliasesCount++;
603 else if (knownAliases[idx] != num
604 && ucnv_compareNames(alias, GET_ALIAS_STR(knownAliases[idx])) == 0)
607 fprintf(stderr, "information(line %d): duplicate alias %s and %s found\n",
608 lineNum, alias, GET_ALIAS_STR(knownAliases[idx]));
610 duplicateKnownAliasesCount++;
615 if (knownAliasesCount
>= MAX_ALIAS_COUNT
) {
616 fprintf(stderr
, "warning(line %d): Too many aliases defined for all converters\n",
618 exit(U_BUFFER_OVERFLOW_ERROR
);
620 /* TODO: We could try to unlist exact duplicates. */
621 return knownAliases
[knownAliasesCount
++] = GET_ALIAS_NUM(alias
);
625 @param standard When standard is 0, then it's the "empty" tag.
628 addAlias(const char *alias
, uint16_t standard
, uint16_t converter
, UBool defaultName
) {
630 UBool dupFound
= FALSE
;
631 UBool startEmptyWithoutDefault
= FALSE
;
632 AliasList
*aliasList
;
634 if(standard
>=MAX_TAG_COUNT
) {
635 fprintf(stderr
, "error(line %d): too many standard tags\n", lineNum
);
636 exit(U_BUFFER_OVERFLOW_ERROR
);
638 if(converter
>=MAX_CONV_COUNT
) {
639 fprintf(stderr
, "error(line %d): too many converter names\n", lineNum
);
640 exit(U_BUFFER_OVERFLOW_ERROR
);
642 aliasList
= &tags
[standard
].aliasList
[converter
];
644 if (strchr(alias
, '}')) {
645 fprintf(stderr
, "error(line %d): unmatched } found\n",
649 if(aliasList
->aliasCount
+ 1 >= MAX_TC_ALIAS_COUNT
) {
650 fprintf(stderr
, "error(line %d): too many aliases for alias %s and converter %s\n",
651 lineNum
, alias
, GET_ALIAS_STR(converters
[converter
].converter
));
652 exit(U_BUFFER_OVERFLOW_ERROR
);
655 /* Show this warning only once. All aliases are added to the "ALL" tag. */
656 if (standard
== ALL_TAG_NUM
&& GET_ALIAS_STR(converters
[converter
].converter
) != alias
) {
657 /* Normally these option values are parsed at runtime, and they can
658 be discarded when the alias is a default converter. Options should
659 only be on a converter and not an alias. */
660 if (uprv_strchr(alias
, UCNV_OPTION_SEP_CHAR
) != 0)
662 fprintf(stderr
, "warning(line %d): alias %s contains a \""UCNV_OPTION_SEP_STRING
"\". Options are parsed at run-time and do not need to be in the alias table.\n",
665 if (uprv_strchr(alias
, UCNV_VALUE_SEP_CHAR
) != 0)
667 fprintf(stderr
, "warning(line %d): alias %s contains an \""UCNV_VALUE_SEP_STRING
"\". Options are parsed at run-time and do not need to be in the alias table.\n",
672 /* Check for duplicates in a tag/converter combination */
673 for (idx
= 0; idx
< aliasList
->aliasCount
; idx
++) {
674 uint16_t aliasNum
= tags
[standard
].aliasList
[converter
].aliases
[idx
];
675 if (aliasNum
&& ucnv_compareNames(alias
, GET_ALIAS_STR(aliasNum
)) == 0 && standard
!= ALL_TAG_NUM
)
677 fprintf(stderr
, "warning(line %d): duplicate alias %s and %s found for standard %s\n",
678 lineNum
, alias
, GET_ALIAS_STR(aliasNum
), GET_TAG_STR(tags
[standard
].tag
));
684 if (!dupFound
&& standard
!= ALL_TAG_NUM
) {
685 /* Check for duplicate aliases for this tag on all converters */
686 for (idx
= 0; idx
< converterCount
; idx
++) {
687 for (idx2
= 0; idx2
< tags
[standard
].aliasList
[idx
].aliasCount
; idx2
++) {
688 uint16_t aliasNum
= tags
[standard
].aliasList
[idx
].aliases
[idx2
];
690 && ucnv_compareNames(alias
, GET_ALIAS_STR(aliasNum
)) == 0)
692 fprintf(stderr
, "warning(line %d): duplicate alias %s found for standard tag %s between converter %s and converter %s\n",
693 lineNum
, alias
, GET_TAG_STR(tags
[standard
].tag
), GET_ALIAS_STR(converters
[converter
].converter
), GET_ALIAS_STR(converters
[idx
].converter
));
700 /* Check for duplicate default aliases for this converter on all tags */
701 /* It's okay to have multiple standards prefer the same name */
702 /* if (verbose && !dupFound) {
703 for (idx = 0; idx < tagCount; idx++) {
704 if (tags[idx].aliasList[converter].aliases) {
705 uint16_t aliasNum = tags[idx].aliasList[converter].aliases[0];
707 && ucnv_compareNames(alias, GET_ALIAS_STR(aliasNum)) == 0)
709 fprintf(stderr, "warning(line %d): duplicate alias %s found for converter %s and standard tag %s\n",
710 lineNum, alias, GET_ALIAS_STR(converters[converter].converter), GET_TAG_STR(tags[standard].tag));
718 if (aliasList
->aliasCount
<= 0) {
719 aliasList
->aliasCount
++;
720 startEmptyWithoutDefault
= TRUE
;
722 aliasList
->aliases
= (uint16_t *)uprv_realloc(aliasList
->aliases
, (aliasList
->aliasCount
+ 1) * sizeof(aliasList
->aliases
[0]));
723 if (startEmptyWithoutDefault
) {
724 aliasList
->aliases
[0] = 0;
727 if (aliasList
->aliases
[0] != 0) {
728 fprintf(stderr
, "error(line %d): Alias %s and %s cannot both be the default alias for standard tag %s and converter %s\n",
731 GET_ALIAS_STR(aliasList
->aliases
[0]),
732 GET_TAG_STR(tags
[standard
].tag
),
733 GET_ALIAS_STR(converters
[converter
].converter
));
736 aliasList
->aliases
[0] = GET_ALIAS_NUM(alias
);
738 aliasList
->aliases
[aliasList
->aliasCount
++] = GET_ALIAS_NUM(alias
);
740 /* aliasList->converter = converter;*/
742 converters
[converter
].totalAliasCount
++; /* One more to the column */
743 tags
[standard
].totalAliasCount
++; /* One more to the row */
745 return aliasList
->aliasCount
;
749 addConverter(const char *converter
) {
751 if(converterCount
>=MAX_CONV_COUNT
) {
752 fprintf(stderr
, "error(line %d): too many converters\n", lineNum
);
753 exit(U_BUFFER_OVERFLOW_ERROR
);
756 for (idx
= 0; idx
< converterCount
; idx
++) {
757 if (ucnv_compareNames(converter
, GET_ALIAS_STR(converters
[idx
].converter
)) == 0) {
758 fprintf(stderr
, "error(line %d): duplicate converter %s found!\n", lineNum
, converter
);
764 converters
[converterCount
].converter
= GET_ALIAS_NUM(converter
);
765 converters
[converterCount
].totalAliasCount
= 0;
767 return converterCount
++;
770 /* resolve this alias based on the prioritization of the standard tags. */
772 resolveAliasToConverter(uint16_t alias
, uint16_t *tagNum
, uint16_t *converterNum
) {
773 uint16_t idx
, idx2
, idx3
;
775 for (idx
= UCNV_NUM_RESERVED_TAGS
; idx
< tagCount
; idx
++) {
776 for (idx2
= 0; idx2
< converterCount
; idx2
++) {
777 for (idx3
= 0; idx3
< tags
[idx
].aliasList
[idx2
].aliasCount
; idx3
++) {
778 uint16_t aliasNum
= tags
[idx
].aliasList
[idx2
].aliases
[idx3
];
779 if (aliasNum
== alias
) {
781 *converterNum
= idx2
;
787 /* Do the leftovers last, just in case */
788 /* There is no need to do the ALL tag */
790 for (idx2
= 0; idx2
< converterCount
; idx2
++) {
791 for (idx3
= 0; idx3
< tags
[idx
].aliasList
[idx2
].aliasCount
; idx3
++) {
792 uint16_t aliasNum
= tags
[idx
].aliasList
[idx2
].aliases
[idx3
];
793 if (aliasNum
== alias
) {
795 *converterNum
= idx2
;
800 *tagNum
= UINT16_MAX
;
801 *converterNum
= UINT16_MAX
;
802 fprintf(stderr
, "warning: alias %s not found\n",
803 GET_ALIAS_STR(alias
));
807 /* The knownAliases should be sorted before calling this function */
809 resolveAliases(uint16_t *uniqueAliasArr
, uint16_t *uniqueAliasToConverterArr
, uint16_t aliasOffset
) {
810 uint32_t uniqueAliasIdx
= 0;
812 uint16_t currTagNum
, oldTagNum
;
813 uint16_t currConvNum
, oldConvNum
;
814 const char *lastName
;
816 resolveAliasToConverter(knownAliases
[0], &oldTagNum
, &currConvNum
);
817 uniqueAliasToConverterArr
[uniqueAliasIdx
] = currConvNum
;
818 oldConvNum
= currConvNum
;
819 uniqueAliasArr
[uniqueAliasIdx
] = knownAliases
[0] + aliasOffset
;
821 lastName
= GET_ALIAS_STR(knownAliases
[0]);
823 for (idx
= 1; idx
< knownAliasesCount
; idx
++) {
824 resolveAliasToConverter(knownAliases
[idx
], &currTagNum
, &currConvNum
);
825 if (ucnv_compareNames(lastName
, GET_ALIAS_STR(knownAliases
[idx
])) == 0) {
826 /* duplicate found */
827 if ((currTagNum
< oldTagNum
&& currTagNum
>= UCNV_NUM_RESERVED_TAGS
)
829 oldTagNum
= currTagNum
;
830 uniqueAliasToConverterArr
[uniqueAliasIdx
- 1] = currConvNum
;
831 uniqueAliasArr
[uniqueAliasIdx
- 1] = knownAliases
[idx
] + aliasOffset
;
833 printf("using %s instead of %s -> %s",
834 GET_ALIAS_STR(knownAliases
[idx
]),
836 GET_ALIAS_STR(converters
[currConvNum
].converter
));
837 if (oldConvNum
!= currConvNum
) {
838 printf(" (alias conflict)");
846 printf("folding %s into %s -> %s",
847 GET_ALIAS_STR(knownAliases
[idx
]),
849 GET_ALIAS_STR(converters
[oldConvNum
].converter
));
850 if (oldConvNum
!= currConvNum
) {
851 printf(" (alias conflict)");
856 if (oldConvNum
!= currConvNum
) {
857 uniqueAliasToConverterArr
[uniqueAliasIdx
- 1] |= UCNV_AMBIGUOUS_ALIAS_MAP_BIT
;
861 uniqueAliasToConverterArr
[uniqueAliasIdx
] = currConvNum
;
862 oldConvNum
= currConvNum
;
863 uniqueAliasArr
[uniqueAliasIdx
] = knownAliases
[idx
] + aliasOffset
;
865 lastName
= GET_ALIAS_STR(knownAliases
[idx
]);
866 oldTagNum
= currTagNum
;
867 /*printf("%s -> %s\n", GET_ALIAS_STR(knownAliases[idx]), GET_ALIAS_STR(converters[currConvNum].converter));*/
870 return uniqueAliasIdx
;
874 createOneAliasList(uint16_t *aliasArrLists
, uint32_t tag
, uint32_t converter
, uint16_t offset
) {
876 AliasList
*aliasList
= &tags
[tag
].aliasList
[converter
];
878 if (aliasList
->aliasCount
== 0) {
879 aliasArrLists
[tag
*converterCount
+ converter
] = 0;
882 aliasLists
[aliasListsSize
++] = aliasList
->aliasCount
;
884 /* write into the array area a 1's based index. */
885 aliasArrLists
[tag
*converterCount
+ converter
] = aliasListsSize
;
887 /* printf("tag %s converter %s\n",
888 GET_TAG_STR(tags[tag].tag),
889 GET_ALIAS_STR(converters[converter].converter));*/
890 for (aliasNum
= 0; aliasNum
< aliasList
->aliasCount
; aliasNum
++) {
893 GET_ALIAS_STR(aliasList->aliases[aliasNum]));*/
894 if (aliasList
->aliases
[aliasNum
]) {
895 value
= aliasList
->aliases
[aliasNum
] + offset
;
898 if (tag
!= 0) { /* Only show the warning when it's not the leftover tag. */
899 printf("warning: tag %s does not have a default alias for %s\n",
900 GET_TAG_STR(tags
[tag
].tag
),
901 GET_ALIAS_STR(converters
[converter
].converter
));
904 aliasLists
[aliasListsSize
++] = value
;
905 if (aliasListsSize
>= MAX_LIST_SIZE
) {
906 fprintf(stderr
, "error: Too many alias lists\n");
907 exit(U_BUFFER_OVERFLOW_ERROR
);
915 writeAliasTable(UNewDataMemory
*out
) {
917 uint32_t uniqueAliasesSize
;
918 uint16_t aliasOffset
= (uint16_t)(tagBlock
.top
/sizeof(uint16_t));
919 uint16_t *aliasArrLists
= (uint16_t *)uprv_malloc(tagCount
* converterCount
* sizeof(uint16_t));
920 uint16_t *uniqueAliases
= (uint16_t *)uprv_malloc(knownAliasesCount
* sizeof(uint16_t));
921 uint16_t *uniqueAliasesToConverter
= (uint16_t *)uprv_malloc(knownAliasesCount
* sizeof(uint16_t));
923 qsort(knownAliases
, knownAliasesCount
, sizeof(knownAliases
[0]), compareAliases
);
924 uniqueAliasesSize
= resolveAliases(uniqueAliases
, uniqueAliasesToConverter
, aliasOffset
);
926 /* Array index starts at 1. aliasLists[0] is the size of the lists section. */
929 /* write the offsets of all the aliases lists in a 2D array, and create the lists. */
930 for (i
= 0; i
< tagCount
; ++i
) {
931 for (j
= 0; j
< converterCount
; ++j
) {
932 createOneAliasList(aliasArrLists
, i
, j
, aliasOffset
);
936 /* Write the size of the TOC */
937 udata_write32(out
, 8);
939 /* Write the sizes of each section */
940 /* All sizes are the number of uint16_t units, not bytes */
941 udata_write32(out
, converterCount
);
942 udata_write32(out
, tagCount
);
943 udata_write32(out
, uniqueAliasesSize
); /* list of aliases */
944 udata_write32(out
, uniqueAliasesSize
); /* The preresolved form of mapping an untagged the alias to a converter */
945 udata_write32(out
, tagCount
* converterCount
);
946 udata_write32(out
, aliasListsSize
+ 1);
947 udata_write32(out
, 0); /* Reserved space. */
948 udata_write32(out
, (tagBlock
.top
+ stringBlock
.top
) / sizeof(uint16_t));
950 /* write the table of converters */
951 /* Think of this as the column headers */
952 for(i
=0; i
<converterCount
; ++i
) {
953 udata_write16(out
, (uint16_t)(converters
[i
].converter
+ aliasOffset
));
956 /* write the table of tags */
957 /* Think of this as the row headers */
958 for(i
=UCNV_NUM_RESERVED_TAGS
; i
<tagCount
; ++i
) {
959 udata_write16(out
, tags
[i
].tag
);
961 /* The empty tag is considered the leftover list, and put that at the end of the priority list. */
962 udata_write16(out
, tags
[EMPTY_TAG_NUM
].tag
);
963 udata_write16(out
, tags
[ALL_TAG_NUM
].tag
);
965 /* Write the unique list of aliases */
966 udata_writeBlock(out
, uniqueAliases
, uniqueAliasesSize
* sizeof(uint16_t));
968 /* Write the unique list of aliases */
969 udata_writeBlock(out
, uniqueAliasesToConverter
, uniqueAliasesSize
* sizeof(uint16_t));
971 /* Write the array to the lists */
972 udata_writeBlock(out
, (const void *)(aliasArrLists
+ (2*converterCount
)), (((tagCount
- 2) * converterCount
) * sizeof(uint16_t)));
973 /* Now write the leftover part of the array for the EMPTY and ALL lists */
974 udata_writeBlock(out
, (const void *)aliasArrLists
, (2 * converterCount
* sizeof(uint16_t)));
976 /* Offset the next array to make the index start at 1. */
977 udata_write16(out
, 0xDEAD);
979 /* Write the lists */
980 udata_writeBlock(out
, (const void *)aliasLists
, aliasListsSize
* sizeof(uint16_t));
982 /* write the tags strings */
983 udata_writeString(out
, tagBlock
.store
, tagBlock
.top
);
985 /* write the aliases strings */
986 udata_writeString(out
, stringBlock
.store
, stringBlock
.top
);
988 uprv_free(aliasArrLists
);
989 uprv_free(uniqueAliases
);
993 allocString(StringBlock
*block
, uint32_t length
) {
994 /* The (length&1) is used to keep the addresses on a 16-bit boundary */
995 uint32_t top
=block
->top
+ length
+ (length
&1);
998 if(top
>= block
->max
) {
999 fprintf(stderr
, "error(line %d): out of memory\n", lineNum
);
1000 exit(U_MEMORY_ALLOCATION_ERROR
);
1002 p
= block
->store
+ block
->top
;
1008 compareAliases(const void *alias1
, const void *alias2
) {
1009 /* Names like IBM850 and ibm-850 need to be sorted together */
1010 int result
= ucnv_compareNames(GET_ALIAS_STR(*(uint16_t*)alias1
), GET_ALIAS_STR(*(uint16_t*)alias2
));
1012 /* Sort the shortest first */
1013 return uprv_strlen(GET_ALIAS_STR(*(uint16_t*)alias1
)) - uprv_strlen(GET_ALIAS_STR(*(uint16_t*)alias2
));
1019 * Hey, Emacs, please set the following:
1022 * indent-tabs-mode: nil