1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 1999-2016 International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
10 * file name: gencnval.c
12 * tab size: 8 (not used)
15 * created on: 1999nov05
16 * created by: Markus W. Scherer
18 * This program reads convrtrs.txt and writes a memory-mappable
19 * converter name alias table to cnvalias.dat .
21 * This program currently writes version 2.1 of the data format. See
22 * ucnv_io.c for more details on the format. Note that version 2.1
23 * is written in such a way that a 2.0 reader will be able to use it,
24 * and a 2.1 reader will be able to read 2.0.
27 #include "unicode/utypes.h"
28 #include "unicode/putil.h"
29 #include "unicode/ucnv.h" /* ucnv_compareNames() */
35 #include "unicode/uclean.h"
43 /* TODO: Need to check alias name length is less than UCNV_MAX_CONVERTER_NAME_LENGTH */
45 /* STRING_STORE_SIZE + TAG_STORE_SIZE <= ((2^16 - 1) * 2)
46 That is the maximum size for the string stores combined
47 because the strings are index at 16-bit boundries by a
48 16-bit index, and there is only one section for the
51 #define STRING_STORE_SIZE 0x1FBFE /* 130046 */
52 #define TAG_STORE_SIZE 0x400 /* 1024 */
54 /* The combined tag and converter count can affect the number of lists
55 created. The size of all lists must be less than (2^17 - 1)
56 because the lists are indexed as a 16-bit array with a 16-bit index.
58 #define MAX_TAG_COUNT 0x3F /* 63 */
59 #define MAX_CONV_COUNT UCNV_CONVERTER_INDEX_MASK
60 #define MAX_ALIAS_COUNT 0xFFFF /* 65535 */
62 /* The maximum number of aliases that a standard tag/converter combination can have.
63 At this moment 6/18/2002, IANA has 12 names for ASCII. Don't go below 15 for
64 this value. I don't recommend more than 31 for this value.
66 #define MAX_TC_ALIAS_COUNT 0x1F /* 31 */
68 #define MAX_LINE_SIZE 0x7FFF /* 32767 */
69 #define MAX_LIST_SIZE 0xFFFF /* 65535 */
71 #define DATA_NAME "cnvalias"
72 #define DATA_TYPE "icu" /* ICU alias table */
74 #define ALL_TAG_STR "ALL"
76 #define EMPTY_TAG_NUM 0
78 /* UDataInfo cf. udata.h */
79 static const UDataInfo dataInfo
={
88 {0x43, 0x76, 0x41, 0x6c}, /* dataFormat="CvAl" */
89 {3, 0, 1, 0}, /* formatVersion */
90 {1, 4, 2, 0} /* dataVersion */
99 static char stringStore
[STRING_STORE_SIZE
];
100 static StringBlock stringBlock
= { stringStore
, 0, STRING_STORE_SIZE
};
104 uint16_t *aliases
; /* Index into stringStore */
108 uint16_t converter
; /* Index into stringStore */
109 uint16_t totalAliasCount
; /* Total aliases in this column */
112 static Converter converters
[MAX_CONV_COUNT
];
113 static uint16_t converterCount
=0;
115 static char tagStore
[TAG_STORE_SIZE
];
116 static StringBlock tagBlock
= { tagStore
, 0, TAG_STORE_SIZE
};
119 uint16_t tag
; /* Index into tagStore */
120 uint16_t totalAliasCount
; /* Total aliases in this row */
121 AliasList aliasList
[MAX_CONV_COUNT
];
124 /* Think of this as a 3D array. It's tagCount by converterCount by aliasCount */
125 static Tag tags
[MAX_TAG_COUNT
];
126 static uint16_t tagCount
= 0;
128 /* Used for storing all aliases */
129 static uint16_t knownAliases
[MAX_ALIAS_COUNT
];
130 static uint16_t knownAliasesCount
= 0;
131 /*static uint16_t duplicateKnownAliasesCount = 0;*/
133 /* Used for storing the lists section that point to aliases */
134 static uint16_t aliasLists
[MAX_LIST_SIZE
];
135 static uint16_t aliasListsSize
= 0;
137 /* Were the standard tags declared before the aliases. */
138 static UBool standardTagsUsed
= FALSE
;
139 static UBool verbose
= FALSE
;
140 static UBool quiet
= FALSE
;
141 static int lineNum
= 1;
143 static UConverterAliasOptions tableOptions
= {
144 UCNV_IO_STD_NORMALIZED
,
145 1 /* containsCnvOptionInfo */
150 * path to convrtrs.txt
154 /* prototypes --------------------------------------------------------------- */
157 parseLine(const char *line
);
160 parseFile(FileStream
*in
);
166 addOfficialTaggedStandards(char *line
, int32_t lineLen
);
169 addAlias(const char *alias
, uint16_t standard
, uint16_t converter
, UBool defaultName
);
172 addConverter(const char *converter
);
175 allocString(StringBlock
*block
, const char *s
, int32_t length
);
178 addToKnownAliases(const char *alias
);
181 compareAliases(const void *alias1
, const void *alias2
);
184 getTagNumber(const char *tag
, uint16_t tagLen
);
187 addTaggedAlias(uint16_t tag, const char *alias, uint16_t converter);*/
190 writeAliasTable(UNewDataMemory
*out
);
192 /* -------------------------------------------------------------------------- */
194 /* Presumes that you used allocString() */
195 #define GET_ALIAS_STR(index) (stringStore + ((size_t)(index) << 1))
196 #define GET_TAG_STR(index) (tagStore + ((size_t)(index) << 1))
198 /* Presumes that you used allocString() */
199 #define GET_ALIAS_NUM(str) ((uint16_t)((str - stringStore) >> 1))
200 #define GET_TAG_NUM(str) ((uint16_t)((str - tagStore) >> 1))
213 static UOption options
[]={
215 UOPTION_HELP_QUESTION_MARK
,
224 main(int argc
, char* argv
[]) {
229 UErrorCode errorCode
=U_ZERO_ERROR
;
231 U_MAIN_INIT_ARGS(argc
, argv
);
233 /* preset then read command line options */
234 options
[DESTDIR
].value
=options
[SOURCEDIR
].value
=u_getDataDirectory();
235 argc
=u_parseArgs(argc
, argv
, UPRV_LENGTHOF(options
), options
);
237 /* error handling, printing usage message */
240 "error in command line argument \"%s\"\n",
243 if(argc
<0 || options
[HELP1
].doesOccur
|| options
[HELP2
].doesOccur
) {
245 "usage: %s [-options] [convrtrs.txt]\n"
246 "\tread convrtrs.txt and create " U_ICUDATA_NAME
"_" DATA_NAME
"." DATA_TYPE
"\n"
248 "\t-h or -? or --help this usage text\n"
249 "\t-v or --verbose prints out extra information about the alias table\n"
250 "\t-q or --quiet do not display warnings and progress\n"
251 "\t-c or --copyright include a copyright notice\n"
252 "\t-d or --destdir destination directory, followed by the path\n"
253 "\t-s or --sourcedir source directory, followed by the path\n",
255 return argc
<0 ? U_ILLEGAL_ARGUMENT_ERROR
: U_ZERO_ERROR
;
258 if(options
[VERBOSE
].doesOccur
) {
262 if(options
[QUIET
].doesOccur
) {
269 path
= "convrtrs.txt";
272 const char* sourcedir
= options
[SOURCEDIR
].value
;
273 if (sourcedir
!= NULL
&& *sourcedir
!= 0) {
275 uprv_strcpy(pathBuf
, sourcedir
);
276 end
= uprv_strchr(pathBuf
, 0);
277 if(*(end
-1)!=U_FILE_SEP_CHAR
) {
278 *(end
++)=U_FILE_SEP_CHAR
;
280 uprv_strcpy(end
, path
);
284 uprv_memset(stringStore
, 0, sizeof(stringStore
));
285 uprv_memset(tagStore
, 0, sizeof(tagStore
));
286 uprv_memset(converters
, 0, sizeof(converters
));
287 uprv_memset(tags
, 0, sizeof(tags
));
288 uprv_memset(aliasLists
, 0, sizeof(aliasLists
));
289 uprv_memset(knownAliases
, 0, sizeof(aliasLists
));
292 in
=T_FileStream_open(path
, "r");
294 fprintf(stderr
, "gencnval: unable to open input file %s\n", path
);
295 exit(U_FILE_ACCESS_ERROR
);
298 T_FileStream_close(in
);
300 /* create the output file */
301 out
=udata_create(options
[DESTDIR
].value
, DATA_TYPE
, DATA_NAME
, &dataInfo
,
302 options
[COPYRIGHT
].doesOccur
? U_COPYRIGHT_STRING
: NULL
, &errorCode
);
303 if(U_FAILURE(errorCode
)) {
304 fprintf(stderr
, "gencnval: unable to open output file - error %s\n", u_errorName(errorCode
));
308 /* write the table of aliases based on a tag/converter name combination */
309 writeAliasTable(out
);
312 udata_finish(out
, &errorCode
);
313 if(U_FAILURE(errorCode
)) {
314 fprintf(stderr
, "gencnval: error finishing output file - %s\n", u_errorName(errorCode
));
319 for (i
= 0; i
< MAX_TAG_COUNT
; i
++) {
320 for (n
= 0; n
< MAX_CONV_COUNT
; n
++) {
321 if (tags
[i
].aliasList
[n
].aliases
!=NULL
) {
322 uprv_free(tags
[i
].aliasList
[n
].aliases
);
331 parseFile(FileStream
*in
) {
332 char line
[MAX_LINE_SIZE
];
333 char lastLine
[MAX_LINE_SIZE
];
334 int32_t lineSize
= 0;
335 int32_t lastLineSize
= 0;
336 UBool validParse
= TRUE
;
340 /* Add the empty tag, which is for untagged aliases */
342 getTagNumber(ALL_TAG_STR
, 3);
343 allocString(&stringBlock
, "", 0);
345 /* read the list of aliases */
349 /* Read non-empty lines that don't start with a space character. */
350 while (T_FileStream_readLine(in
, lastLine
, MAX_LINE_SIZE
) != NULL
) {
351 lastLineSize
= chomp(lastLine
);
352 if (lineSize
== 0 || (lastLineSize
> 0 && isspace((int)*lastLine
))) {
353 uprv_strcpy(line
+ lineSize
, lastLine
);
354 lineSize
+= lastLineSize
;
355 } else if (lineSize
> 0) {
362 if (validParse
|| lineSize
> 0) {
363 if (isspace((int)*line
)) {
364 fprintf(stderr
, "%s:%d: error: cannot start an alias with a space\n", path
, lineNum
-1);
366 } else if (line
[0] == '{') {
367 if (!standardTagsUsed
&& line
[lineSize
- 1] != '}') {
368 fprintf(stderr
, "%s:%d: error: alias needs to start with a converter name\n", path
, lineNum
);
371 addOfficialTaggedStandards(line
, lineSize
);
372 standardTagsUsed
= TRUE
;
374 if (standardTagsUsed
) {
378 fprintf(stderr
, "%s:%d: error: alias table needs to start a list of standard tags\n", path
, lineNum
);
382 /* Was the last line consumed */
383 if (lastLineSize
> 0) {
384 uprv_strcpy(line
, lastLine
);
385 lineSize
= lastLineSize
;
395 /* This works almost like the Perl chomp.
396 It removes the newlines, comments and trailing whitespace (not preceding whitespace).
401 char *lastNonSpace
= line
;
403 /* truncate at a newline or a comment */
404 if(*s
== '\r' || *s
== '\n' || *s
== '#') {
408 if (!isspace((int)*s
)) {
413 if (lastNonSpace
++ > line
) {
417 return (int32_t)(s
- line
);
421 parseLine(const char *line
) {
422 uint16_t pos
=0, start
, limit
, length
, cnv
;
423 char *converter
, *alias
;
425 /* skip leading white space */
426 /* There is no whitespace at the beginning anymore */
427 /* while(line[pos]!=0 && isspace(line[pos])) {
432 /* is there nothing on this line? */
437 /* get the converter name */
439 while(line
[pos
]!=0 && !isspace((int)line
[pos
])) {
444 /* store the converter name */
445 length
=(uint16_t)(limit
-start
);
446 converter
=allocString(&stringBlock
, line
+start
, length
);
448 /* add the converter to the converter table */
449 cnv
=addConverter(converter
);
451 /* The name itself may be tagged, so let's added it to the aliases list properly */
454 /* get all the real aliases */
457 /* skip white space */
458 while(line
[pos
]!=0 && isspace((int)line
[pos
])) {
462 /* is there no more alias name on this line? */
467 /* get an alias name */
469 while(line
[pos
]!=0 && line
[pos
]!='{' && !isspace((int)line
[pos
])) {
474 /* store the alias name */
475 length
=(uint16_t)(limit
-start
);
477 /* add the converter as its own alias to the alias table */
479 addAlias(alias
, ALL_TAG_NUM
, cnv
, TRUE
);
482 alias
=allocString(&stringBlock
, line
+start
, length
);
483 addAlias(alias
, ALL_TAG_NUM
, cnv
, FALSE
);
485 addToKnownAliases(alias
);
487 /* add the alias/converter pair to the alias table */
488 /* addAlias(alias, 0, cnv, FALSE);*/
490 /* skip whitespace */
491 while (line
[pos
] && isspace((int)line
[pos
])) {
495 /* handle tags if they are present */
496 if (line
[pos
] == '{') {
500 while (line
[pos
] && line
[pos
] != '}' && !isspace((int)line
[pos
])) {
505 if (start
!= limit
) {
506 /* add the tag to the tag table */
507 uint16_t tag
= getTagNumber(line
+ start
, (uint16_t)(limit
- start
));
508 addAlias(alias
, tag
, cnv
, (UBool
)(line
[limit
-1] == '*'));
511 while (line
[pos
] && isspace((int)line
[pos
])) {
514 } while (line
[pos
] && line
[pos
] != '}');
516 if (line
[pos
] == '}') {
519 fprintf(stderr
, "%s:%d: Unterminated tag list\n", path
, lineNum
);
520 exit(U_UNMATCHED_BRACES
);
523 addAlias(alias
, EMPTY_TAG_NUM
, cnv
, (UBool
)(tags
[0].aliasList
[cnv
].aliasCount
== 0));
529 getTagNumber(const char *tag
, uint16_t tagLen
) {
532 UBool preferredName
= ((tagLen
> 0) ? (tag
[tagLen
- 1] == '*') : (FALSE
));
534 if (tagCount
>= MAX_TAG_COUNT
) {
535 fprintf(stderr
, "%s:%d: too many tags\n", path
, lineNum
);
536 exit(U_BUFFER_OVERFLOW_ERROR
);
544 for (t
= 0; t
< tagCount
; ++t
) {
545 const char *currTag
= GET_TAG_STR(tags
[t
].tag
);
546 if (uprv_strlen(currTag
) == tagLen
&& !uprv_strnicmp(currTag
, tag
, tagLen
)) {
551 /* we need to add this tag */
552 if (tagCount
>= MAX_TAG_COUNT
) {
553 fprintf(stderr
, "%s:%d: error: too many tags\n", path
, lineNum
);
554 exit(U_BUFFER_OVERFLOW_ERROR
);
557 /* allocate a new entry in the tag table */
558 atag
= allocString(&tagBlock
, tag
, tagLen
);
560 if (standardTagsUsed
) {
561 fprintf(stderr
, "%s:%d: error: Tag \"%s\" is not declared at the beginning of the alias table.\n",
562 path
, lineNum
, atag
);
565 else if (tagLen
> 0 && strcmp(tag
, ALL_TAG_STR
) != 0) {
566 fprintf(stderr
, "%s:%d: warning: Tag \"%s\" was added to the list of standards because it was not declared at beginning of the alias table.\n",
567 path
, lineNum
, atag
);
570 /* add the tag to the tag table */
571 tags
[tagCount
].tag
= GET_TAG_NUM(atag
);
572 /* The aliasList should be set to 0's already */
578 addTaggedAlias(uint16_t tag, const char *alias, uint16_t converter) {
579 tags[tag].aliases[converter] = alias;
584 addOfficialTaggedStandards(char *line
, int32_t lineLen
) {
585 (void) lineLen
; // suppress compiler warnings about unused variable
589 static const char WHITESPACE
[] = " \t";
591 if (tagCount
> UCNV_NUM_RESERVED_TAGS
) {
592 fprintf(stderr
, "%s:%d: error: official tags already added\n", path
, lineNum
);
593 exit(U_BUFFER_OVERFLOW_ERROR
);
595 tag
= strchr(line
, '{');
597 /* Why were we called? */
598 fprintf(stderr
, "%s:%d: error: Missing start of tag group\n", path
, lineNum
);
602 endTagExp
= strchr(tag
, '}');
603 if (endTagExp
== NULL
) {
604 fprintf(stderr
, "%s:%d: error: Missing end of tag group\n", path
, lineNum
);
609 tag
= strtok(tag
, WHITESPACE
);
610 while (tag
!= NULL
) {
611 /* printf("Adding original tag \"%s\"\n", tag);*/
613 /* allocate a new entry in the tag table */
614 atag
= allocString(&tagBlock
, tag
, -1);
616 /* add the tag to the tag table */
617 tags
[tagCount
++].tag
= (uint16_t)((atag
- tagStore
) >> 1);
619 /* The aliasList should already be set to 0's */
622 tag
= strtok(NULL
, WHITESPACE
);
627 addToKnownAliases(const char *alias
) {
629 /* strict matching */
630 /* for (idx = 0; idx < knownAliasesCount; idx++) {
631 uint16_t num = GET_ALIAS_NUM(alias);
632 if (knownAliases[idx] != num
633 && uprv_strcmp(alias, GET_ALIAS_STR(knownAliases[idx])) == 0)
635 fprintf(stderr, "%s:%d: warning: duplicate alias %s and %s found\n", path,
636 lineNum, alias, GET_ALIAS_STR(knownAliases[idx]));
637 duplicateKnownAliasesCount++;
640 else if (knownAliases[idx] != num
641 && ucnv_compareNames(alias, GET_ALIAS_STR(knownAliases[idx])) == 0)
644 fprintf(stderr, "%s:%d: information: duplicate alias %s and %s found\n", path,
645 lineNum, alias, GET_ALIAS_STR(knownAliases[idx]));
647 duplicateKnownAliasesCount++;
652 if (knownAliasesCount
>= MAX_ALIAS_COUNT
) {
653 fprintf(stderr
, "%s:%d: warning: Too many aliases defined for all converters\n",
655 exit(U_BUFFER_OVERFLOW_ERROR
);
657 /* TODO: We could try to unlist exact duplicates. */
658 return knownAliases
[knownAliasesCount
++] = GET_ALIAS_NUM(alias
);
662 @param standard When standard is 0, then it's the "empty" tag.
665 addAlias(const char *alias
, uint16_t standard
, uint16_t converter
, UBool defaultName
) {
667 UBool startEmptyWithoutDefault
= FALSE
;
668 AliasList
*aliasList
;
670 if(standard
>=MAX_TAG_COUNT
) {
671 fprintf(stderr
, "%s:%d: error: too many standard tags\n", path
, lineNum
);
672 exit(U_BUFFER_OVERFLOW_ERROR
);
674 if(converter
>=MAX_CONV_COUNT
) {
675 fprintf(stderr
, "%s:%d: error: too many converter names\n", path
, lineNum
);
676 exit(U_BUFFER_OVERFLOW_ERROR
);
678 aliasList
= &tags
[standard
].aliasList
[converter
];
680 if (strchr(alias
, '}')) {
681 fprintf(stderr
, "%s:%d: error: unmatched } found\n", path
,
685 if(aliasList
->aliasCount
+ 1 >= MAX_TC_ALIAS_COUNT
) {
686 fprintf(stderr
, "%s:%d: error: too many aliases for alias %s and converter %s\n", path
,
687 lineNum
, alias
, GET_ALIAS_STR(converters
[converter
].converter
));
688 exit(U_BUFFER_OVERFLOW_ERROR
);
691 /* Show this warning only once. All aliases are added to the "ALL" tag. */
692 if (standard
== ALL_TAG_NUM
&& GET_ALIAS_STR(converters
[converter
].converter
) != alias
) {
693 /* Normally these option values are parsed at runtime, and they can
694 be discarded when the alias is a default converter. Options should
695 only be on a converter and not an alias. */
696 if (uprv_strchr(alias
, UCNV_OPTION_SEP_CHAR
) != 0)
698 fprintf(stderr
, "warning(line %d): alias %s contains a \""UCNV_OPTION_SEP_STRING
"\". Options are parsed at run-time and do not need to be in the alias table.\n",
701 if (uprv_strchr(alias
, UCNV_VALUE_SEP_CHAR
) != 0)
703 fprintf(stderr
, "warning(line %d): alias %s contains an \""UCNV_VALUE_SEP_STRING
"\". Options are parsed at run-time and do not need to be in the alias table.\n",
708 if (standard
!= ALL_TAG_NUM
) {
709 /* Check for duplicate aliases for this tag on all converters */
710 for (idx
= 0; idx
< converterCount
; idx
++) {
711 for (idx2
= 0; idx2
< tags
[standard
].aliasList
[idx
].aliasCount
; idx2
++) {
712 uint16_t aliasNum
= tags
[standard
].aliasList
[idx
].aliases
[idx2
];
714 && ucnv_compareNames(alias
, GET_ALIAS_STR(aliasNum
)) == 0)
716 if (idx
== converter
) {
718 * (alias, standard) duplicates are harmless if they map to the same converter.
719 * Only print a warning in verbose mode, or if the alias is a precise duplicate,
720 * not just a lenient-match duplicate.
722 if (verbose
|| 0 == uprv_strcmp(alias
, GET_ALIAS_STR(aliasNum
))) {
723 fprintf(stderr
, "%s:%d: warning: duplicate aliases %s and %s found for standard %s and converter %s\n", path
,
724 lineNum
, alias
, GET_ALIAS_STR(aliasNum
),
725 GET_TAG_STR(tags
[standard
].tag
),
726 GET_ALIAS_STR(converters
[converter
].converter
));
729 fprintf(stderr
, "%s:%d: warning: duplicate aliases %s and %s found for standard tag %s between converter %s and converter %s\n", path
,
730 lineNum
, alias
, GET_ALIAS_STR(aliasNum
),
731 GET_TAG_STR(tags
[standard
].tag
),
732 GET_ALIAS_STR(converters
[converter
].converter
),
733 GET_ALIAS_STR(converters
[idx
].converter
));
740 /* Check for duplicate default aliases for this converter on all tags */
741 /* It's okay to have multiple standards prefer the same name */
742 /* if (verbose && !dupFound) {
743 for (idx = 0; idx < tagCount; idx++) {
744 if (tags[idx].aliasList[converter].aliases) {
745 uint16_t aliasNum = tags[idx].aliasList[converter].aliases[0];
747 && ucnv_compareNames(alias, GET_ALIAS_STR(aliasNum)) == 0)
749 fprintf(stderr, "%s:%d: warning: duplicate alias %s found for converter %s and standard tag %s\n", path,
750 lineNum, alias, GET_ALIAS_STR(converters[converter].converter), GET_TAG_STR(tags[standard].tag));
758 if (aliasList
->aliasCount
<= 0) {
759 aliasList
->aliasCount
++;
760 startEmptyWithoutDefault
= TRUE
;
762 aliasList
->aliases
= (uint16_t *)uprv_realloc(aliasList
->aliases
, (aliasList
->aliasCount
+ 1) * sizeof(aliasList
->aliases
[0]));
763 if (startEmptyWithoutDefault
) {
764 aliasList
->aliases
[0] = 0;
767 if (aliasList
->aliases
[0] != 0) {
768 fprintf(stderr
, "%s:%d: error: Alias %s and %s cannot both be the default alias for standard tag %s and converter %s\n", path
,
771 GET_ALIAS_STR(aliasList
->aliases
[0]),
772 GET_TAG_STR(tags
[standard
].tag
),
773 GET_ALIAS_STR(converters
[converter
].converter
));
776 aliasList
->aliases
[0] = GET_ALIAS_NUM(alias
);
778 aliasList
->aliases
[aliasList
->aliasCount
++] = GET_ALIAS_NUM(alias
);
780 /* aliasList->converter = converter;*/
782 converters
[converter
].totalAliasCount
++; /* One more to the column */
783 tags
[standard
].totalAliasCount
++; /* One more to the row */
785 return aliasList
->aliasCount
;
789 addConverter(const char *converter
) {
791 if(converterCount
>=MAX_CONV_COUNT
) {
792 fprintf(stderr
, "%s:%d: error: too many converters\n", path
, lineNum
);
793 exit(U_BUFFER_OVERFLOW_ERROR
);
796 for (idx
= 0; idx
< converterCount
; idx
++) {
797 if (ucnv_compareNames(converter
, GET_ALIAS_STR(converters
[idx
].converter
)) == 0) {
798 fprintf(stderr
, "%s:%d: error: duplicate converter %s found!\n", path
, lineNum
, converter
);
804 converters
[converterCount
].converter
= GET_ALIAS_NUM(converter
);
805 converters
[converterCount
].totalAliasCount
= 0;
807 return converterCount
++;
810 /* resolve this alias based on the prioritization of the standard tags. */
812 resolveAliasToConverter(uint16_t alias
, uint16_t *tagNum
, uint16_t *converterNum
) {
813 uint16_t idx
, idx2
, idx3
;
815 for (idx
= UCNV_NUM_RESERVED_TAGS
; idx
< tagCount
; idx
++) {
816 for (idx2
= 0; idx2
< converterCount
; idx2
++) {
817 for (idx3
= 0; idx3
< tags
[idx
].aliasList
[idx2
].aliasCount
; idx3
++) {
818 uint16_t aliasNum
= tags
[idx
].aliasList
[idx2
].aliases
[idx3
];
819 if (aliasNum
== alias
) {
821 *converterNum
= idx2
;
827 /* Do the leftovers last, just in case */
828 /* There is no need to do the ALL tag */
830 for (idx2
= 0; idx2
< converterCount
; idx2
++) {
831 for (idx3
= 0; idx3
< tags
[idx
].aliasList
[idx2
].aliasCount
; idx3
++) {
832 uint16_t aliasNum
= tags
[idx
].aliasList
[idx2
].aliases
[idx3
];
833 if (aliasNum
== alias
) {
835 *converterNum
= idx2
;
840 *tagNum
= UINT16_MAX
;
841 *converterNum
= UINT16_MAX
;
842 fprintf(stderr
, "%s: warning: alias %s not found\n",
844 GET_ALIAS_STR(alias
));
848 /* The knownAliases should be sorted before calling this function */
850 resolveAliases(uint16_t *uniqueAliasArr
, uint16_t *uniqueAliasToConverterArr
, uint16_t aliasOffset
) {
851 uint32_t uniqueAliasIdx
= 0;
853 uint16_t currTagNum
, oldTagNum
;
854 uint16_t currConvNum
, oldConvNum
;
855 const char *lastName
;
857 if (knownAliasesCount
!= 0) {
858 resolveAliasToConverter(knownAliases
[0], &oldTagNum
, &currConvNum
);
859 uniqueAliasToConverterArr
[uniqueAliasIdx
] = currConvNum
;
860 oldConvNum
= currConvNum
;
861 uniqueAliasArr
[uniqueAliasIdx
] = knownAliases
[0] + aliasOffset
;
863 lastName
= GET_ALIAS_STR(knownAliases
[0]);
865 for (idx
= 1; idx
< knownAliasesCount
; idx
++) {
866 resolveAliasToConverter(knownAliases
[idx
], &currTagNum
, &currConvNum
);
867 if (ucnv_compareNames(lastName
, GET_ALIAS_STR(knownAliases
[idx
])) == 0) {
868 /* duplicate found */
869 if ((currTagNum
< oldTagNum
&& currTagNum
>= UCNV_NUM_RESERVED_TAGS
)
871 oldTagNum
= currTagNum
;
872 uniqueAliasToConverterArr
[uniqueAliasIdx
- 1] = currConvNum
;
873 uniqueAliasArr
[uniqueAliasIdx
- 1] = knownAliases
[idx
] + aliasOffset
;
875 printf("using %s instead of %s -> %s",
876 GET_ALIAS_STR(knownAliases
[idx
]),
878 GET_ALIAS_STR(converters
[currConvNum
].converter
));
879 if (oldConvNum
!= currConvNum
) {
880 printf(" (alias conflict)");
888 printf("folding %s into %s -> %s",
889 GET_ALIAS_STR(knownAliases
[idx
]),
891 GET_ALIAS_STR(converters
[oldConvNum
].converter
));
892 if (oldConvNum
!= currConvNum
) {
893 printf(" (alias conflict)");
898 if (oldConvNum
!= currConvNum
) {
899 uniqueAliasToConverterArr
[uniqueAliasIdx
- 1] |= UCNV_AMBIGUOUS_ALIAS_MAP_BIT
;
903 uniqueAliasToConverterArr
[uniqueAliasIdx
] = currConvNum
;
904 oldConvNum
= currConvNum
;
905 uniqueAliasArr
[uniqueAliasIdx
] = knownAliases
[idx
] + aliasOffset
;
907 lastName
= GET_ALIAS_STR(knownAliases
[idx
]);
908 oldTagNum
= currTagNum
;
909 /*printf("%s -> %s\n", GET_ALIAS_STR(knownAliases[idx]), GET_ALIAS_STR(converters[currConvNum].converter));*/
911 if (uprv_strchr(GET_ALIAS_STR(converters
[currConvNum
].converter
), UCNV_OPTION_SEP_CHAR
) != NULL
) {
912 uniqueAliasToConverterArr
[uniqueAliasIdx
-1] |= UCNV_CONTAINS_OPTION_BIT
;
916 return uniqueAliasIdx
;
920 createOneAliasList(uint16_t *aliasArrLists
, uint32_t tag
, uint32_t converter
, uint16_t offset
) {
922 AliasList
*aliasList
= &tags
[tag
].aliasList
[converter
];
924 if (aliasList
->aliasCount
== 0) {
925 aliasArrLists
[tag
*converterCount
+ converter
] = 0;
928 aliasLists
[aliasListsSize
++] = aliasList
->aliasCount
;
930 /* write into the array area a 1's based index. */
931 aliasArrLists
[tag
*converterCount
+ converter
] = aliasListsSize
;
933 /* printf("tag %s converter %s\n",
934 GET_TAG_STR(tags[tag].tag),
935 GET_ALIAS_STR(converters[converter].converter));*/
936 for (aliasNum
= 0; aliasNum
< aliasList
->aliasCount
; aliasNum
++) {
939 GET_ALIAS_STR(aliasList->aliases[aliasNum]));*/
940 if (aliasList
->aliases
[aliasNum
]) {
941 value
= aliasList
->aliases
[aliasNum
] + offset
;
944 if (tag
!= 0 && !quiet
) { /* Only show the warning when it's not the leftover tag. */
945 fprintf(stderr
, "%s: warning: tag %s does not have a default alias for %s\n",
947 GET_TAG_STR(tags
[tag
].tag
),
948 GET_ALIAS_STR(converters
[converter
].converter
));
951 aliasLists
[aliasListsSize
++] = value
;
952 if (aliasListsSize
>= MAX_LIST_SIZE
) {
953 fprintf(stderr
, "%s: error: Too many alias lists\n", path
);
954 exit(U_BUFFER_OVERFLOW_ERROR
);
962 createNormalizedAliasStrings(char *normalizedStrings
, const char *origStringBlock
, int32_t stringBlockLength
) {
964 uprv_memcpy(normalizedStrings
, origStringBlock
, stringBlockLength
);
965 while ((currStrLen
= (int32_t)uprv_strlen(origStringBlock
)) < stringBlockLength
) {
966 int32_t currStrSize
= currStrLen
+ 1;
967 if (currStrLen
> 0) {
969 ucnv_io_stripForCompare(normalizedStrings
, origStringBlock
);
970 normStrLen
= (int32_t)uprv_strlen(normalizedStrings
);
971 if (normStrLen
> 0) {
972 uprv_memset(normalizedStrings
+ normStrLen
, 0, currStrSize
- normStrLen
);
975 stringBlockLength
-= currStrSize
;
976 normalizedStrings
+= currStrSize
;
977 origStringBlock
+= currStrSize
;
982 writeAliasTable(UNewDataMemory
*out
) {
984 uint32_t uniqueAliasesSize
;
985 uint16_t aliasOffset
= (uint16_t)(tagBlock
.top
/sizeof(uint16_t));
986 uint16_t *aliasArrLists
= (uint16_t *)uprv_malloc(tagCount
* converterCount
* sizeof(uint16_t));
987 uint16_t *uniqueAliases
= (uint16_t *)uprv_malloc(knownAliasesCount
* sizeof(uint16_t));
988 uint16_t *uniqueAliasesToConverter
= (uint16_t *)uprv_malloc(knownAliasesCount
* sizeof(uint16_t));
990 qsort(knownAliases
, knownAliasesCount
, sizeof(knownAliases
[0]), compareAliases
);
991 uniqueAliasesSize
= resolveAliases(uniqueAliases
, uniqueAliasesToConverter
, aliasOffset
);
993 /* Array index starts at 1. aliasLists[0] is the size of the lists section. */
996 /* write the offsets of all the aliases lists in a 2D array, and create the lists. */
997 for (i
= 0; i
< tagCount
; ++i
) {
998 for (j
= 0; j
< converterCount
; ++j
) {
999 createOneAliasList(aliasArrLists
, i
, j
, aliasOffset
);
1003 /* Write the size of the TOC */
1004 if (tableOptions
.stringNormalizationType
== UCNV_IO_UNNORMALIZED
) {
1005 udata_write32(out
, 8);
1008 udata_write32(out
, 9);
1011 /* Write the sizes of each section */
1012 /* All sizes are the number of uint16_t units, not bytes */
1013 udata_write32(out
, converterCount
);
1014 udata_write32(out
, tagCount
);
1015 udata_write32(out
, uniqueAliasesSize
); /* list of aliases */
1016 udata_write32(out
, uniqueAliasesSize
); /* The preresolved form of mapping an untagged the alias to a converter */
1017 udata_write32(out
, tagCount
* converterCount
);
1018 udata_write32(out
, aliasListsSize
+ 1);
1019 udata_write32(out
, sizeof(tableOptions
) / sizeof(uint16_t));
1020 udata_write32(out
, (tagBlock
.top
+ stringBlock
.top
) / sizeof(uint16_t));
1021 if (tableOptions
.stringNormalizationType
!= UCNV_IO_UNNORMALIZED
) {
1022 udata_write32(out
, (tagBlock
.top
+ stringBlock
.top
) / sizeof(uint16_t));
1025 /* write the table of converters */
1026 /* Think of this as the column headers */
1027 for(i
=0; i
<converterCount
; ++i
) {
1028 udata_write16(out
, (uint16_t)(converters
[i
].converter
+ aliasOffset
));
1031 /* write the table of tags */
1032 /* Think of this as the row headers */
1033 for(i
=UCNV_NUM_RESERVED_TAGS
; i
<tagCount
; ++i
) {
1034 udata_write16(out
, tags
[i
].tag
);
1036 /* The empty tag is considered the leftover list, and put that at the end of the priority list. */
1037 udata_write16(out
, tags
[EMPTY_TAG_NUM
].tag
);
1038 udata_write16(out
, tags
[ALL_TAG_NUM
].tag
);
1040 /* Write the unique list of aliases */
1041 udata_writeBlock(out
, uniqueAliases
, uniqueAliasesSize
* sizeof(uint16_t));
1043 /* Write the unique list of aliases */
1044 udata_writeBlock(out
, uniqueAliasesToConverter
, uniqueAliasesSize
* sizeof(uint16_t));
1046 /* Write the array to the lists */
1047 udata_writeBlock(out
, (const void *)(aliasArrLists
+ (2*converterCount
)), (((tagCount
- 2) * converterCount
) * sizeof(uint16_t)));
1048 /* Now write the leftover part of the array for the EMPTY and ALL lists */
1049 udata_writeBlock(out
, (const void *)aliasArrLists
, (2 * converterCount
* sizeof(uint16_t)));
1051 /* Offset the next array to make the index start at 1. */
1052 udata_write16(out
, 0xDEAD);
1054 /* Write the lists */
1055 udata_writeBlock(out
, (const void *)aliasLists
, aliasListsSize
* sizeof(uint16_t));
1057 /* Write any options for the alias table. */
1058 udata_writeBlock(out
, (const void *)&tableOptions
, sizeof(tableOptions
));
1060 /* write the tags strings */
1061 udata_writeString(out
, tagBlock
.store
, tagBlock
.top
);
1063 /* write the aliases strings */
1064 udata_writeString(out
, stringBlock
.store
, stringBlock
.top
);
1066 /* write the normalized aliases strings */
1067 if (tableOptions
.stringNormalizationType
!= UCNV_IO_UNNORMALIZED
) {
1068 char *normalizedStrings
= (char *)uprv_malloc(tagBlock
.top
+ stringBlock
.top
);
1069 createNormalizedAliasStrings(normalizedStrings
, tagBlock
.store
, tagBlock
.top
);
1070 createNormalizedAliasStrings(normalizedStrings
+ tagBlock
.top
, stringBlock
.store
, stringBlock
.top
);
1072 /* Write out the complete normalized array. */
1073 udata_writeString(out
, normalizedStrings
, tagBlock
.top
+ stringBlock
.top
);
1074 uprv_free(normalizedStrings
);
1077 uprv_free(uniqueAliasesToConverter
);
1078 uprv_free(uniqueAliases
);
1079 uprv_free(aliasArrLists
);
1083 allocString(StringBlock
*block
, const char *s
, int32_t length
) {
1088 length
=(int32_t)uprv_strlen(s
);
1092 * add 1 for the terminating NUL
1093 * and round up (+1 &~1)
1094 * to keep the addresses on a 16-bit boundary
1096 top
=block
->top
+ (uint32_t)((length
+ 1 + 1) & ~1);
1098 if(top
>= block
->max
) {
1099 fprintf(stderr
, "%s:%d: error: out of memory\n", path
, lineNum
);
1100 exit(U_MEMORY_ALLOCATION_ERROR
);
1103 /* get the pointer and copy the string */
1104 p
= block
->store
+ block
->top
;
1105 uprv_memcpy(p
, s
, length
);
1106 p
[length
] = 0; /* NUL-terminate it */
1107 if((length
& 1) == 0) {
1108 p
[length
+ 1] = 0; /* set the padding byte */
1111 /* check for invariant characters now that we have a NUL-terminated string for easy output */
1112 if(!uprv_isInvariantString(p
, length
)) {
1113 fprintf(stderr
, "%s:%d: error: the name %s contains not just invariant characters\n", path
, lineNum
, p
);
1114 exit(U_INVALID_TABLE_FORMAT
);
1122 compareAliases(const void *alias1
, const void *alias2
) {
1123 /* Names like IBM850 and ibm-850 need to be sorted together */
1124 int result
= ucnv_compareNames(GET_ALIAS_STR(*(uint16_t*)alias1
), GET_ALIAS_STR(*(uint16_t*)alias2
));
1126 /* Sort the shortest first */
1127 return (int)uprv_strlen(GET_ALIAS_STR(*(uint16_t*)alias1
)) - (int)uprv_strlen(GET_ALIAS_STR(*(uint16_t*)alias2
));
1133 * Hey, Emacs, please set the following:
1136 * indent-tabs-mode: nil