]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/gencnval/gencnval.c
ICU-8.11.1.tar.gz
[apple/icu.git] / icuSources / tools / gencnval / gencnval.c
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1999-2006, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: gencnval.c
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 1999nov05
14 * created by: Markus W. Scherer
15 *
16 * This program reads convrtrs.txt and writes a memory-mappable
17 * converter name alias table to cnvalias.dat .
18 *
19 * This program currently writes version 2.1 of the data format. See
20 * ucnv_io.c for more details on the format. Note that version 2.1
21 * is written in such a way that a 2.0 reader will be able to use it,
22 * and a 2.1 reader will be able to read 2.0.
23 */
24
25 #include "unicode/utypes.h"
26 #include "unicode/putil.h"
27 #include "unicode/ucnv.h" /* ucnv_compareNames() */
28 #include "ucnv_io.h"
29 #include "cmemory.h"
30 #include "cstring.h"
31 #include "uinvchar.h"
32 #include "filestrm.h"
33 #include "unicode/uclean.h"
34 #include "unewdata.h"
35 #include "uoptions.h"
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <ctype.h>
40
41 /* TODO: Need to check alias name length is less than UCNV_MAX_CONVERTER_NAME_LENGTH */
42
43 /* STRING_STORE_SIZE + TAG_STORE_SIZE <= ((2^16 - 1) * 2)
44 That is the maximum size for the string stores combined
45 because the strings are index at 16-bit boundries by a
46 16-bit index, and there is only one section for the
47 strings.
48 */
49 #define STRING_STORE_SIZE 0x1FBFE /* 130046 */
50 #define TAG_STORE_SIZE 0x400 /* 1024 */
51
52 /* The combined tag and converter count can affect the number of lists
53 created. The size of all lists must be less than (2^17 - 1)
54 because the lists are indexed as a 16-bit array with a 16-bit index.
55 */
56 #define MAX_TAG_COUNT 0x3F /* 63 */
57 #define MAX_CONV_COUNT UCNV_CONVERTER_INDEX_MASK
58 #define MAX_ALIAS_COUNT 0xFFFF /* 65535 */
59
60 /* The maximum number of aliases that a standard tag/converter combination can have.
61 At this moment 6/18/2002, IANA has 12 names for ASCII. Don't go below 15 for
62 this value. I don't recommend more than 31 for this value.
63 */
64 #define MAX_TC_ALIAS_COUNT 0x1F /* 31 */
65
66 #define MAX_LINE_SIZE 0x7FFF /* 32767 */
67 #define MAX_LIST_SIZE 0xFFFF /* 65535 */
68
69 #define DATA_NAME "cnvalias"
70 #define DATA_TYPE "icu" /* ICU alias table */
71
72 #define ALL_TAG_STR "ALL"
73 #define ALL_TAG_NUM 1
74 #define EMPTY_TAG_NUM 0
75
76 /* UDataInfo cf. udata.h */
77 static const UDataInfo dataInfo={
78 sizeof(UDataInfo),
79 0,
80
81 U_IS_BIG_ENDIAN,
82 U_CHARSET_FAMILY,
83 sizeof(UChar),
84 0,
85
86 {0x43, 0x76, 0x41, 0x6c}, /* dataFormat="CvAl" */
87 {3, 0, 1, 0}, /* formatVersion */
88 {1, 4, 2, 0} /* dataVersion */
89 };
90
91 typedef struct {
92 char *store;
93 uint32_t top;
94 uint32_t max;
95 } StringBlock;
96
97 static char stringStore[STRING_STORE_SIZE];
98 static StringBlock stringBlock = { stringStore, 0, STRING_STORE_SIZE };
99
100 typedef struct {
101 uint16_t aliasCount;
102 uint16_t *aliases; /* Index into stringStore */
103 } AliasList;
104
105 typedef struct {
106 uint16_t converter; /* Index into stringStore */
107 uint16_t totalAliasCount; /* Total aliases in this column */
108 } Converter;
109
110 static Converter converters[MAX_CONV_COUNT];
111 static uint16_t converterCount=0;
112
113 static char tagStore[TAG_STORE_SIZE];
114 static StringBlock tagBlock = { tagStore, 0, TAG_STORE_SIZE };
115
116 typedef struct {
117 uint16_t tag; /* Index into tagStore */
118 uint16_t totalAliasCount; /* Total aliases in this row */
119 AliasList aliasList[MAX_CONV_COUNT];
120 } Tag;
121
122 /* Think of this as a 3D array. It's tagCount by converterCount by aliasCount */
123 static Tag tags[MAX_TAG_COUNT];
124 static uint16_t tagCount = 0;
125
126 /* Used for storing all aliases */
127 static uint16_t knownAliases[MAX_ALIAS_COUNT];
128 static uint16_t knownAliasesCount = 0;
129 /*static uint16_t duplicateKnownAliasesCount = 0;*/
130
131 /* Used for storing the lists section that point to aliases */
132 static uint16_t aliasLists[MAX_LIST_SIZE];
133 static uint16_t aliasListsSize = 0;
134
135 /* Were the standard tags declared before the aliases. */
136 static UBool standardTagsUsed = FALSE;
137 static UBool verbose = FALSE;
138 static int lineNum = 1;
139
140 static UConverterAliasOptions tableOptions = {
141 UCNV_IO_STD_NORMALIZED,
142 1 /* containsCnvOptionInfo */
143 };
144
145 /* prototypes --------------------------------------------------------------- */
146
147 static void
148 parseLine(const char *line);
149
150 static void
151 parseFile(FileStream *in);
152
153 static int32_t
154 chomp(char *line);
155
156 static void
157 addOfficialTaggedStandards(char *line, int32_t lineLen);
158
159 static uint16_t
160 addAlias(const char *alias, uint16_t standard, uint16_t converter, UBool defaultName);
161
162 static uint16_t
163 addConverter(const char *converter);
164
165 static char *
166 allocString(StringBlock *block, const char *s, int32_t length);
167
168 static uint16_t
169 addToKnownAliases(const char *alias);
170
171 static int
172 compareAliases(const void *alias1, const void *alias2);
173
174 static uint16_t
175 getTagNumber(const char *tag, uint16_t tagLen);
176
177 /*static void
178 addTaggedAlias(uint16_t tag, const char *alias, uint16_t converter);*/
179
180 static void
181 writeAliasTable(UNewDataMemory *out);
182
183 /* -------------------------------------------------------------------------- */
184
185 /* Presumes that you used allocString() */
186 #define GET_ALIAS_STR(index) (stringStore + ((size_t)(index) << 1))
187 #define GET_TAG_STR(index) (tagStore + ((size_t)(index) << 1))
188
189 /* Presumes that you used allocString() */
190 #define GET_ALIAS_NUM(str) ((uint16_t)((str - stringStore) >> 1))
191 #define GET_TAG_NUM(str) ((uint16_t)((str - tagStore) >> 1))
192
193 enum
194 {
195 HELP1,
196 HELP2,
197 VERBOSE,
198 COPYRIGHT,
199 DESTDIR,
200 SOURCEDIR
201 };
202
203 static UOption options[]={
204 UOPTION_HELP_H,
205 UOPTION_HELP_QUESTION_MARK,
206 UOPTION_VERBOSE,
207 UOPTION_COPYRIGHT,
208 UOPTION_DESTDIR,
209 UOPTION_SOURCEDIR
210 };
211
212 extern int
213 main(int argc, char* argv[]) {
214 char pathBuf[512];
215 const char *path;
216 FileStream *in;
217 UNewDataMemory *out;
218 UErrorCode errorCode=U_ZERO_ERROR;
219
220 U_MAIN_INIT_ARGS(argc, argv);
221
222 /* preset then read command line options */
223 options[DESTDIR].value=options[SOURCEDIR].value=u_getDataDirectory();
224 argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
225
226 /* error handling, printing usage message */
227 if(argc<0) {
228 fprintf(stderr,
229 "error in command line argument \"%s\"\n",
230 argv[-argc]);
231 }
232 if(argc<0 || options[HELP1].doesOccur || options[HELP2].doesOccur) {
233 fprintf(stderr,
234 "usage: %s [-options] [convrtrs.txt]\n"
235 "\tread convrtrs.txt and create " U_ICUDATA_NAME "_" DATA_NAME "." DATA_TYPE "\n"
236 "options:\n"
237 "\t-h or -? or --help this usage text\n"
238 "\t-v or --verbose prints out extra information about the alias table\n"
239 "\t-c or --copyright include a copyright notice\n"
240 "\t-d or --destdir destination directory, followed by the path\n"
241 "\t-s or --sourcedir source directory, followed by the path\n",
242 argv[0]);
243 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
244 }
245
246 if(options[VERBOSE].doesOccur) {
247 verbose = TRUE;
248 }
249
250 if(argc>=2) {
251 path=argv[1];
252 } else {
253 path=options[SOURCEDIR].value;
254 if(path!=NULL && *path!=0) {
255 char *end;
256
257 uprv_strcpy(pathBuf, path);
258 end = uprv_strchr(pathBuf, 0);
259 if(*(end-1)!=U_FILE_SEP_CHAR) {
260 *(end++)=U_FILE_SEP_CHAR;
261 }
262 uprv_strcpy(end, "convrtrs.txt");
263 path=pathBuf;
264 } else {
265 path = "convrtrs.txt";
266 }
267 }
268
269 uprv_memset(stringStore, 0, sizeof(stringStore));
270 uprv_memset(tagStore, 0, sizeof(tagStore));
271 uprv_memset(converters, 0, sizeof(converters));
272 uprv_memset(tags, 0, sizeof(tags));
273 uprv_memset(aliasLists, 0, sizeof(aliasLists));
274 uprv_memset(knownAliases, 0, sizeof(aliasLists));
275
276
277 in=T_FileStream_open(path, "r");
278 if(in==NULL) {
279 fprintf(stderr, "gencnval: unable to open input file convrtrs.txt\n");
280 exit(U_FILE_ACCESS_ERROR);
281 }
282 parseFile(in);
283 T_FileStream_close(in);
284
285 /* create the output file */
286 out=udata_create(options[DESTDIR].value, DATA_TYPE, DATA_NAME, &dataInfo,
287 options[COPYRIGHT].doesOccur ? U_COPYRIGHT_STRING : NULL, &errorCode);
288 if(U_FAILURE(errorCode)) {
289 fprintf(stderr, "gencnval: unable to open output file - error %s\n", u_errorName(errorCode));
290 exit(errorCode);
291 }
292
293 /* write the table of aliases based on a tag/converter name combination */
294 writeAliasTable(out);
295
296 /* finish */
297 udata_finish(out, &errorCode);
298 if(U_FAILURE(errorCode)) {
299 fprintf(stderr, "gencnval: error finishing output file - %s\n", u_errorName(errorCode));
300 exit(errorCode);
301 }
302
303 return 0;
304 }
305
306 static void
307 parseFile(FileStream *in) {
308 char line[MAX_LINE_SIZE];
309 char lastLine[MAX_LINE_SIZE];
310 int32_t lineSize = 0;
311 int32_t lastLineSize = 0;
312 UBool validParse = TRUE;
313
314 lineNum = 0;
315
316 /* Add the empty tag, which is for untagged aliases */
317 getTagNumber("", 0);
318 getTagNumber(ALL_TAG_STR, 3);
319 allocString(&stringBlock, "", 0);
320
321 /* read the list of aliases */
322 while (validParse) {
323 validParse = FALSE;
324
325 /* Read non-empty lines that don't start with a space character. */
326 while (T_FileStream_readLine(in, lastLine, MAX_LINE_SIZE) != NULL) {
327 lastLineSize = chomp(lastLine);
328 if (lineSize == 0 || (lastLineSize > 0 && isspace(*lastLine))) {
329 uprv_strcpy(line + lineSize, lastLine);
330 lineSize += lastLineSize;
331 } else if (lineSize > 0) {
332 validParse = TRUE;
333 break;
334 }
335 lineNum++;
336 }
337
338 if (validParse || lineSize > 0) {
339 if (isspace(*line)) {
340 fprintf(stderr, "error(line %d): cannot start an alias with a space\n", lineNum-1);
341 exit(U_PARSE_ERROR);
342 } else if (line[0] == '{') {
343 if (!standardTagsUsed && line[lineSize - 1] != '}') {
344 fprintf(stderr, "error(line %d): alias needs to start with a converter name\n", lineNum);
345 exit(U_PARSE_ERROR);
346 }
347 addOfficialTaggedStandards(line, lineSize);
348 standardTagsUsed = TRUE;
349 } else {
350 if (standardTagsUsed) {
351 parseLine(line);
352 }
353 else {
354 fprintf(stderr, "error(line %d): alias table needs to start a list of standard tags\n", lineNum);
355 exit(U_PARSE_ERROR);
356 }
357 }
358 /* Was the last line consumed */
359 if (lastLineSize > 0) {
360 uprv_strcpy(line, lastLine);
361 lineSize = lastLineSize;
362 }
363 else {
364 lineSize = 0;
365 }
366 }
367 lineNum++;
368 }
369 }
370
371 /* This works almost like the Perl chomp.
372 It removes the newlines, comments and trailing whitespace (not preceding whitespace).
373 */
374 static int32_t
375 chomp(char *line) {
376 char *s = line;
377 char *lastNonSpace = line;
378 while(*s!=0) {
379 /* truncate at a newline or a comment */
380 if(*s == '\r' || *s == '\n' || *s == '#') {
381 *s = 0;
382 break;
383 }
384 if (!isspace(*s)) {
385 lastNonSpace = s;
386 }
387 ++s;
388 }
389 if (lastNonSpace++ > line) {
390 *lastNonSpace = 0;
391 s = lastNonSpace;
392 }
393 return (int32_t)(s - line);
394 }
395
396 static void
397 parseLine(const char *line) {
398 uint16_t pos=0, start, limit, length, cnv;
399 char *converter, *alias;
400
401 /* skip leading white space */
402 /* There is no whitespace at the beginning anymore */
403 /* while(line[pos]!=0 && isspace(line[pos])) {
404 ++pos;
405 }
406 */
407
408 /* is there nothing on this line? */
409 if(line[pos]==0) {
410 return;
411 }
412
413 /* get the converter name */
414 start=pos;
415 while(line[pos]!=0 && !isspace(line[pos])) {
416 ++pos;
417 }
418 limit=pos;
419
420 /* store the converter name */
421 length=(uint16_t)(limit-start);
422 converter=allocString(&stringBlock, line+start, length);
423
424 /* add the converter to the converter table */
425 cnv=addConverter(converter);
426
427 /* The name itself may be tagged, so let's added it to the aliases list properly */
428 pos = start;
429
430 /* get all the real aliases */
431 for(;;) {
432
433 /* skip white space */
434 while(line[pos]!=0 && isspace(line[pos])) {
435 ++pos;
436 }
437
438 /* is there no more alias name on this line? */
439 if(line[pos]==0) {
440 break;
441 }
442
443 /* get an alias name */
444 start=pos;
445 while(line[pos]!=0 && line[pos]!='{' && !isspace(line[pos])) {
446 ++pos;
447 }
448 limit=pos;
449
450 /* store the alias name */
451 length=(uint16_t)(limit-start);
452 if (start == 0) {
453 /* add the converter as its own alias to the alias table */
454 alias = converter;
455 addAlias(alias, ALL_TAG_NUM, cnv, TRUE);
456 }
457 else {
458 alias=allocString(&stringBlock, line+start, length);
459 addAlias(alias, ALL_TAG_NUM, cnv, FALSE);
460 }
461 addToKnownAliases(alias);
462
463 /* add the alias/converter pair to the alias table */
464 /* addAlias(alias, 0, cnv, FALSE);*/
465
466 /* skip whitespace */
467 while (line[pos] && isspace(line[pos])) {
468 ++pos;
469 }
470
471 /* handle tags if they are present */
472 if (line[pos] == '{') {
473 ++pos;
474 do {
475 start = pos;
476 while (line[pos] && line[pos] != '}' && !isspace( line[pos])) {
477 ++pos;
478 }
479 limit = pos;
480
481 if (start != limit) {
482 /* add the tag to the tag table */
483 uint16_t tag = getTagNumber(line + start, (uint16_t)(limit - start));
484 addAlias(alias, tag, cnv, (UBool)(line[limit-1] == '*'));
485 }
486
487 while (line[pos] && isspace(line[pos])) {
488 ++pos;
489 }
490 } while (line[pos] && line[pos] != '}');
491
492 if (line[pos] == '}') {
493 ++pos;
494 } else {
495 fprintf(stderr, "error(line %d): Unterminated tag list\n", lineNum);
496 exit(U_UNMATCHED_BRACES);
497 }
498 } else {
499 addAlias(alias, EMPTY_TAG_NUM, cnv, (UBool)(tags[0].aliasList[cnv].aliasCount == 0));
500 }
501 }
502 }
503
504 static uint16_t
505 getTagNumber(const char *tag, uint16_t tagLen) {
506 char *atag;
507 uint16_t t;
508 UBool preferredName = ((tagLen > 0) ? (tag[tagLen - 1] == '*') : (FALSE));
509
510 if (tagCount >= MAX_TAG_COUNT) {
511 fprintf(stderr, "error(line %d): too many tags\n", lineNum);
512 exit(U_BUFFER_OVERFLOW_ERROR);
513 }
514
515 if (preferredName) {
516 /* puts(tag);*/
517 tagLen--;
518 }
519
520 for (t = 0; t < tagCount; ++t) {
521 const char *currTag = GET_TAG_STR(tags[t].tag);
522 if (uprv_strlen(currTag) == tagLen && !uprv_strnicmp(currTag, tag, tagLen)) {
523 return t;
524 }
525 }
526
527 /* we need to add this tag */
528 if (tagCount >= MAX_TAG_COUNT) {
529 fprintf(stderr, "error(line %d): too many tags\n", lineNum);
530 exit(U_BUFFER_OVERFLOW_ERROR);
531 }
532
533 /* allocate a new entry in the tag table */
534 atag = allocString(&tagBlock, tag, tagLen);
535
536 if (standardTagsUsed) {
537 fprintf(stderr, "error(line %d): Tag \"%s\" is not declared at the beginning of the alias table.\n",
538 lineNum, atag);
539 exit(1);
540 }
541 else if (tagLen > 0 && strcmp(tag, ALL_TAG_STR) != 0) {
542 fprintf(stderr, "warning(line %d): Tag \"%s\" was added to the list of standards because it was not declared at beginning of the alias table.\n",
543 lineNum, atag);
544 }
545
546 /* add the tag to the tag table */
547 tags[tagCount].tag = GET_TAG_NUM(atag);
548 /* The aliasList should be set to 0's already */
549
550 return tagCount++;
551 }
552
553 /*static void
554 addTaggedAlias(uint16_t tag, const char *alias, uint16_t converter) {
555 tags[tag].aliases[converter] = alias;
556 }
557 */
558
559 static void
560 addOfficialTaggedStandards(char *line, int32_t lineLen) {
561 char *atag;
562 char *tag = strchr(line, '{') + 1;
563 static const char WHITESPACE[] = " \t";
564
565 if (tagCount > UCNV_NUM_RESERVED_TAGS) {
566 fprintf(stderr, "error(line %d): official tags already added\n", lineNum);
567 exit(U_BUFFER_OVERFLOW_ERROR);
568 }
569 strchr(tag, '}')[0] = 0;
570
571 tag = strtok(tag, WHITESPACE);
572 while (tag != NULL) {
573 /* printf("Adding original tag \"%s\"\n", tag);*/
574
575 /* allocate a new entry in the tag table */
576 atag = allocString(&tagBlock, tag, -1);
577
578 /* add the tag to the tag table */
579 tags[tagCount++].tag = (uint16_t)((atag - tagStore) >> 1);
580
581 /* The aliasList should already be set to 0's */
582
583 /* Get next tag */
584 tag = strtok(NULL, WHITESPACE);
585 }
586 }
587
588 static uint16_t
589 addToKnownAliases(const char *alias) {
590 /* uint32_t idx; */
591 /* strict matching */
592 /* for (idx = 0; idx < knownAliasesCount; idx++) {
593 uint16_t num = GET_ALIAS_NUM(alias);
594 if (knownAliases[idx] != num
595 && uprv_strcmp(alias, GET_ALIAS_STR(knownAliases[idx])) == 0)
596 {
597 fprintf(stderr, "warning(line %d): duplicate alias %s and %s found\n",
598 lineNum, alias, GET_ALIAS_STR(knownAliases[idx]));
599 duplicateKnownAliasesCount++;
600 break;
601 }
602 else if (knownAliases[idx] != num
603 && ucnv_compareNames(alias, GET_ALIAS_STR(knownAliases[idx])) == 0)
604 {
605 if (verbose) {
606 fprintf(stderr, "information(line %d): duplicate alias %s and %s found\n",
607 lineNum, alias, GET_ALIAS_STR(knownAliases[idx]));
608 }
609 duplicateKnownAliasesCount++;
610 break;
611 }
612 }
613 */
614 if (knownAliasesCount >= MAX_ALIAS_COUNT) {
615 fprintf(stderr, "warning(line %d): Too many aliases defined for all converters\n",
616 lineNum);
617 exit(U_BUFFER_OVERFLOW_ERROR);
618 }
619 /* TODO: We could try to unlist exact duplicates. */
620 return knownAliases[knownAliasesCount++] = GET_ALIAS_NUM(alias);
621 }
622
623 /*
624 @param standard When standard is 0, then it's the "empty" tag.
625 */
626 static uint16_t
627 addAlias(const char *alias, uint16_t standard, uint16_t converter, UBool defaultName) {
628 uint32_t idx, idx2;
629 UBool dupFound = FALSE;
630 UBool startEmptyWithoutDefault = FALSE;
631 AliasList *aliasList;
632
633 if(standard>=MAX_TAG_COUNT) {
634 fprintf(stderr, "error(line %d): too many standard tags\n", lineNum);
635 exit(U_BUFFER_OVERFLOW_ERROR);
636 }
637 if(converter>=MAX_CONV_COUNT) {
638 fprintf(stderr, "error(line %d): too many converter names\n", lineNum);
639 exit(U_BUFFER_OVERFLOW_ERROR);
640 }
641 aliasList = &tags[standard].aliasList[converter];
642
643 if (strchr(alias, '}')) {
644 fprintf(stderr, "error(line %d): unmatched } found\n",
645 lineNum);
646 }
647
648 if(aliasList->aliasCount + 1 >= MAX_TC_ALIAS_COUNT) {
649 fprintf(stderr, "error(line %d): too many aliases for alias %s and converter %s\n",
650 lineNum, alias, GET_ALIAS_STR(converters[converter].converter));
651 exit(U_BUFFER_OVERFLOW_ERROR);
652 }
653
654 /* Show this warning only once. All aliases are added to the "ALL" tag. */
655 if (standard == ALL_TAG_NUM && GET_ALIAS_STR(converters[converter].converter) != alias) {
656 /* Normally these option values are parsed at runtime, and they can
657 be discarded when the alias is a default converter. Options should
658 only be on a converter and not an alias. */
659 if (uprv_strchr(alias, UCNV_OPTION_SEP_CHAR) != 0)
660 {
661 fprintf(stderr, "warning(line %d): alias %s contains a \""UCNV_OPTION_SEP_STRING"\". Options are parsed at run-time and do not need to be in the alias table.\n",
662 lineNum, alias);
663 }
664 if (uprv_strchr(alias, UCNV_VALUE_SEP_CHAR) != 0)
665 {
666 fprintf(stderr, "warning(line %d): alias %s contains an \""UCNV_VALUE_SEP_STRING"\". Options are parsed at run-time and do not need to be in the alias table.\n",
667 lineNum, alias);
668 }
669 }
670
671 if (standard != ALL_TAG_NUM) {
672 /* Check for duplicate aliases for this tag on all converters */
673 for (idx = 0; idx < converterCount; idx++) {
674 for (idx2 = 0; idx2 < tags[standard].aliasList[idx].aliasCount; idx2++) {
675 uint16_t aliasNum = tags[standard].aliasList[idx].aliases[idx2];
676 if (aliasNum
677 && ucnv_compareNames(alias, GET_ALIAS_STR(aliasNum)) == 0)
678 {
679 if (idx == converter) {
680 /*
681 * (alias, standard) duplicates are harmless if they map to the same converter.
682 * Only print a warning in verbose mode, or if the alias is a precise duplicate,
683 * not just a lenient-match duplicate.
684 */
685 if (verbose || 0 == uprv_strcmp(alias, GET_ALIAS_STR(aliasNum))) {
686 fprintf(stderr, "warning(line %d): duplicate aliases %s and %s found for standard %s and converter %s\n",
687 lineNum, alias, GET_ALIAS_STR(aliasNum),
688 GET_TAG_STR(tags[standard].tag),
689 GET_ALIAS_STR(converters[converter].converter));
690 }
691 } else {
692 fprintf(stderr, "warning(line %d): duplicate aliases %s and %s found for standard tag %s between converter %s and converter %s\n",
693 lineNum, alias, GET_ALIAS_STR(aliasNum),
694 GET_TAG_STR(tags[standard].tag),
695 GET_ALIAS_STR(converters[converter].converter),
696 GET_ALIAS_STR(converters[idx].converter));
697 }
698 dupFound = TRUE;
699 break;
700 }
701 }
702 }
703
704 /* Check for duplicate default aliases for this converter on all tags */
705 /* It's okay to have multiple standards prefer the same name */
706 /* if (verbose && !dupFound) {
707 for (idx = 0; idx < tagCount; idx++) {
708 if (tags[idx].aliasList[converter].aliases) {
709 uint16_t aliasNum = tags[idx].aliasList[converter].aliases[0];
710 if (aliasNum
711 && ucnv_compareNames(alias, GET_ALIAS_STR(aliasNum)) == 0)
712 {
713 fprintf(stderr, "warning(line %d): duplicate alias %s found for converter %s and standard tag %s\n",
714 lineNum, alias, GET_ALIAS_STR(converters[converter].converter), GET_TAG_STR(tags[standard].tag));
715 break;
716 }
717 }
718 }
719 }*/
720 }
721
722 if (aliasList->aliasCount <= 0) {
723 aliasList->aliasCount++;
724 startEmptyWithoutDefault = TRUE;
725 }
726 aliasList->aliases = (uint16_t *)uprv_realloc(aliasList->aliases, (aliasList->aliasCount + 1) * sizeof(aliasList->aliases[0]));
727 if (startEmptyWithoutDefault) {
728 aliasList->aliases[0] = 0;
729 }
730 if (defaultName) {
731 if (aliasList->aliases[0] != 0) {
732 fprintf(stderr, "error(line %d): Alias %s and %s cannot both be the default alias for standard tag %s and converter %s\n",
733 lineNum,
734 alias,
735 GET_ALIAS_STR(aliasList->aliases[0]),
736 GET_TAG_STR(tags[standard].tag),
737 GET_ALIAS_STR(converters[converter].converter));
738 exit(U_PARSE_ERROR);
739 }
740 aliasList->aliases[0] = GET_ALIAS_NUM(alias);
741 } else {
742 aliasList->aliases[aliasList->aliasCount++] = GET_ALIAS_NUM(alias);
743 }
744 /* aliasList->converter = converter;*/
745
746 converters[converter].totalAliasCount++; /* One more to the column */
747 tags[standard].totalAliasCount++; /* One more to the row */
748
749 return aliasList->aliasCount;
750 }
751
752 static uint16_t
753 addConverter(const char *converter) {
754 uint32_t idx;
755 if(converterCount>=MAX_CONV_COUNT) {
756 fprintf(stderr, "error(line %d): too many converters\n", lineNum);
757 exit(U_BUFFER_OVERFLOW_ERROR);
758 }
759
760 for (idx = 0; idx < converterCount; idx++) {
761 if (ucnv_compareNames(converter, GET_ALIAS_STR(converters[idx].converter)) == 0) {
762 fprintf(stderr, "error(line %d): duplicate converter %s found!\n", lineNum, converter);
763 exit(U_PARSE_ERROR);
764 break;
765 }
766 }
767
768 converters[converterCount].converter = GET_ALIAS_NUM(converter);
769 converters[converterCount].totalAliasCount = 0;
770
771 return converterCount++;
772 }
773
774 /* resolve this alias based on the prioritization of the standard tags. */
775 static void
776 resolveAliasToConverter(uint16_t alias, uint16_t *tagNum, uint16_t *converterNum) {
777 uint16_t idx, idx2, idx3;
778
779 for (idx = UCNV_NUM_RESERVED_TAGS; idx < tagCount; idx++) {
780 for (idx2 = 0; idx2 < converterCount; idx2++) {
781 for (idx3 = 0; idx3 < tags[idx].aliasList[idx2].aliasCount; idx3++) {
782 uint16_t aliasNum = tags[idx].aliasList[idx2].aliases[idx3];
783 if (aliasNum == alias) {
784 *tagNum = idx;
785 *converterNum = idx2;
786 return;
787 }
788 }
789 }
790 }
791 /* Do the leftovers last, just in case */
792 /* There is no need to do the ALL tag */
793 idx = 0;
794 for (idx2 = 0; idx2 < converterCount; idx2++) {
795 for (idx3 = 0; idx3 < tags[idx].aliasList[idx2].aliasCount; idx3++) {
796 uint16_t aliasNum = tags[idx].aliasList[idx2].aliases[idx3];
797 if (aliasNum == alias) {
798 *tagNum = idx;
799 *converterNum = idx2;
800 return;
801 }
802 }
803 }
804 *tagNum = UINT16_MAX;
805 *converterNum = UINT16_MAX;
806 fprintf(stderr, "warning: alias %s not found\n",
807 GET_ALIAS_STR(alias));
808 return;
809 }
810
811 /* The knownAliases should be sorted before calling this function */
812 static uint32_t
813 resolveAliases(uint16_t *uniqueAliasArr, uint16_t *uniqueAliasToConverterArr, uint16_t aliasOffset) {
814 uint32_t uniqueAliasIdx = 0;
815 uint32_t idx;
816 uint16_t currTagNum, oldTagNum;
817 uint16_t currConvNum, oldConvNum;
818 const char *lastName;
819
820 resolveAliasToConverter(knownAliases[0], &oldTagNum, &currConvNum);
821 uniqueAliasToConverterArr[uniqueAliasIdx] = currConvNum;
822 oldConvNum = currConvNum;
823 uniqueAliasArr[uniqueAliasIdx] = knownAliases[0] + aliasOffset;
824 uniqueAliasIdx++;
825 lastName = GET_ALIAS_STR(knownAliases[0]);
826
827 for (idx = 1; idx < knownAliasesCount; idx++) {
828 resolveAliasToConverter(knownAliases[idx], &currTagNum, &currConvNum);
829 if (ucnv_compareNames(lastName, GET_ALIAS_STR(knownAliases[idx])) == 0) {
830 /* duplicate found */
831 if ((currTagNum < oldTagNum && currTagNum >= UCNV_NUM_RESERVED_TAGS)
832 || oldTagNum == 0) {
833 oldTagNum = currTagNum;
834 uniqueAliasToConverterArr[uniqueAliasIdx - 1] = currConvNum;
835 uniqueAliasArr[uniqueAliasIdx - 1] = knownAliases[idx] + aliasOffset;
836 if (verbose) {
837 printf("using %s instead of %s -> %s",
838 GET_ALIAS_STR(knownAliases[idx]),
839 lastName,
840 GET_ALIAS_STR(converters[currConvNum].converter));
841 if (oldConvNum != currConvNum) {
842 printf(" (alias conflict)");
843 }
844 puts("");
845 }
846 }
847 else {
848 /* else ignore it */
849 if (verbose) {
850 printf("folding %s into %s -> %s",
851 GET_ALIAS_STR(knownAliases[idx]),
852 lastName,
853 GET_ALIAS_STR(converters[oldConvNum].converter));
854 if (oldConvNum != currConvNum) {
855 printf(" (alias conflict)");
856 }
857 puts("");
858 }
859 }
860 if (oldConvNum != currConvNum) {
861 uniqueAliasToConverterArr[uniqueAliasIdx - 1] |= UCNV_AMBIGUOUS_ALIAS_MAP_BIT;
862 }
863 }
864 else {
865 uniqueAliasToConverterArr[uniqueAliasIdx] = currConvNum;
866 oldConvNum = currConvNum;
867 uniqueAliasArr[uniqueAliasIdx] = knownAliases[idx] + aliasOffset;
868 uniqueAliasIdx++;
869 lastName = GET_ALIAS_STR(knownAliases[idx]);
870 oldTagNum = currTagNum;
871 /*printf("%s -> %s\n", GET_ALIAS_STR(knownAliases[idx]), GET_ALIAS_STR(converters[currConvNum].converter));*/
872 }
873 if (uprv_strchr(GET_ALIAS_STR(converters[currConvNum].converter), UCNV_OPTION_SEP_CHAR) != NULL) {
874 uniqueAliasToConverterArr[uniqueAliasIdx-1] |= UCNV_CONTAINS_OPTION_BIT;
875 }
876 }
877 return uniqueAliasIdx;
878 }
879
880 static void
881 createOneAliasList(uint16_t *aliasArrLists, uint32_t tag, uint32_t converter, uint16_t offset) {
882 uint32_t aliasNum;
883 AliasList *aliasList = &tags[tag].aliasList[converter];
884
885 if (aliasList->aliasCount == 0) {
886 aliasArrLists[tag*converterCount + converter] = 0;
887 }
888 else {
889 aliasLists[aliasListsSize++] = aliasList->aliasCount;
890
891 /* write into the array area a 1's based index. */
892 aliasArrLists[tag*converterCount + converter] = aliasListsSize;
893
894 /* printf("tag %s converter %s\n",
895 GET_TAG_STR(tags[tag].tag),
896 GET_ALIAS_STR(converters[converter].converter));*/
897 for (aliasNum = 0; aliasNum < aliasList->aliasCount; aliasNum++) {
898 uint16_t value;
899 /* printf(" %s\n",
900 GET_ALIAS_STR(aliasList->aliases[aliasNum]));*/
901 if (aliasList->aliases[aliasNum]) {
902 value = aliasList->aliases[aliasNum] + offset;
903 } else {
904 value = 0;
905 if (tag != 0) { /* Only show the warning when it's not the leftover tag. */
906 printf("warning: tag %s does not have a default alias for %s\n",
907 GET_TAG_STR(tags[tag].tag),
908 GET_ALIAS_STR(converters[converter].converter));
909 }
910 }
911 aliasLists[aliasListsSize++] = value;
912 if (aliasListsSize >= MAX_LIST_SIZE) {
913 fprintf(stderr, "error: Too many alias lists\n");
914 exit(U_BUFFER_OVERFLOW_ERROR);
915 }
916
917 }
918 }
919 }
920
921 static void
922 createNormalizedAliasStrings(char *normalizedStrings, const char *origStringBlock, int32_t stringBlockLength) {
923 int32_t currStrLen;
924 uprv_memcpy(normalizedStrings, origStringBlock, stringBlockLength);
925 while ((currStrLen = (int32_t)uprv_strlen(origStringBlock)) < stringBlockLength) {
926 int32_t currStrSize = currStrLen + 1;
927 if (currStrLen > 0) {
928 int32_t normStrLen;
929 ucnv_io_stripForCompare(normalizedStrings, origStringBlock);
930 normStrLen = uprv_strlen(normalizedStrings);
931 if (normStrLen > 0) {
932 uprv_memset(normalizedStrings + normStrLen, 0, currStrSize - normStrLen);
933 }
934 }
935 stringBlockLength -= currStrSize;
936 normalizedStrings += currStrSize;
937 origStringBlock += currStrSize;
938 }
939 }
940
941 static void
942 writeAliasTable(UNewDataMemory *out) {
943 uint32_t i, j;
944 uint32_t uniqueAliasesSize;
945 uint16_t aliasOffset = (uint16_t)(tagBlock.top/sizeof(uint16_t));
946 uint16_t *aliasArrLists = (uint16_t *)uprv_malloc(tagCount * converterCount * sizeof(uint16_t));
947 uint16_t *uniqueAliases = (uint16_t *)uprv_malloc(knownAliasesCount * sizeof(uint16_t));
948 uint16_t *uniqueAliasesToConverter = (uint16_t *)uprv_malloc(knownAliasesCount * sizeof(uint16_t));
949
950 qsort(knownAliases, knownAliasesCount, sizeof(knownAliases[0]), compareAliases);
951 uniqueAliasesSize = resolveAliases(uniqueAliases, uniqueAliasesToConverter, aliasOffset);
952
953 /* Array index starts at 1. aliasLists[0] is the size of the lists section. */
954 aliasListsSize = 0;
955
956 /* write the offsets of all the aliases lists in a 2D array, and create the lists. */
957 for (i = 0; i < tagCount; ++i) {
958 for (j = 0; j < converterCount; ++j) {
959 createOneAliasList(aliasArrLists, i, j, aliasOffset);
960 }
961 }
962
963 /* Write the size of the TOC */
964 if (tableOptions.stringNormalizationType == UCNV_IO_UNNORMALIZED) {
965 udata_write32(out, 8);
966 }
967 else {
968 udata_write32(out, 9);
969 }
970
971 /* Write the sizes of each section */
972 /* All sizes are the number of uint16_t units, not bytes */
973 udata_write32(out, converterCount);
974 udata_write32(out, tagCount);
975 udata_write32(out, uniqueAliasesSize); /* list of aliases */
976 udata_write32(out, uniqueAliasesSize); /* The preresolved form of mapping an untagged the alias to a converter */
977 udata_write32(out, tagCount * converterCount);
978 udata_write32(out, aliasListsSize + 1);
979 udata_write32(out, sizeof(tableOptions) / sizeof(uint16_t));
980 udata_write32(out, (tagBlock.top + stringBlock.top) / sizeof(uint16_t));
981 if (tableOptions.stringNormalizationType != UCNV_IO_UNNORMALIZED) {
982 udata_write32(out, (tagBlock.top + stringBlock.top) / sizeof(uint16_t));
983 }
984
985 /* write the table of converters */
986 /* Think of this as the column headers */
987 for(i=0; i<converterCount; ++i) {
988 udata_write16(out, (uint16_t)(converters[i].converter + aliasOffset));
989 }
990
991 /* write the table of tags */
992 /* Think of this as the row headers */
993 for(i=UCNV_NUM_RESERVED_TAGS; i<tagCount; ++i) {
994 udata_write16(out, tags[i].tag);
995 }
996 /* The empty tag is considered the leftover list, and put that at the end of the priority list. */
997 udata_write16(out, tags[EMPTY_TAG_NUM].tag);
998 udata_write16(out, tags[ALL_TAG_NUM].tag);
999
1000 /* Write the unique list of aliases */
1001 udata_writeBlock(out, uniqueAliases, uniqueAliasesSize * sizeof(uint16_t));
1002
1003 /* Write the unique list of aliases */
1004 udata_writeBlock(out, uniqueAliasesToConverter, uniqueAliasesSize * sizeof(uint16_t));
1005
1006 /* Write the array to the lists */
1007 udata_writeBlock(out, (const void *)(aliasArrLists + (2*converterCount)), (((tagCount - 2) * converterCount) * sizeof(uint16_t)));
1008 /* Now write the leftover part of the array for the EMPTY and ALL lists */
1009 udata_writeBlock(out, (const void *)aliasArrLists, (2 * converterCount * sizeof(uint16_t)));
1010
1011 /* Offset the next array to make the index start at 1. */
1012 udata_write16(out, 0xDEAD);
1013
1014 /* Write the lists */
1015 udata_writeBlock(out, (const void *)aliasLists, aliasListsSize * sizeof(uint16_t));
1016
1017 /* Write any options for the alias table. */
1018 udata_writeBlock(out, (const void *)&tableOptions, sizeof(tableOptions));
1019
1020 /* write the tags strings */
1021 udata_writeString(out, tagBlock.store, tagBlock.top);
1022
1023 /* write the aliases strings */
1024 udata_writeString(out, stringBlock.store, stringBlock.top);
1025
1026 /* write the normalized aliases strings */
1027 if (tableOptions.stringNormalizationType != UCNV_IO_UNNORMALIZED) {
1028 char *normalizedStrings = (char *)uprv_malloc(tagBlock.top + stringBlock.top);
1029 createNormalizedAliasStrings(normalizedStrings, tagBlock.store, tagBlock.top);
1030 createNormalizedAliasStrings(normalizedStrings + tagBlock.top, stringBlock.store, stringBlock.top);
1031
1032 /* Write out the complete normalized array. */
1033 udata_writeString(out, normalizedStrings, tagBlock.top + stringBlock.top);
1034 uprv_free(normalizedStrings);
1035 }
1036
1037 uprv_free(aliasArrLists);
1038 uprv_free(uniqueAliases);
1039 }
1040
1041 static char *
1042 allocString(StringBlock *block, const char *s, int32_t length) {
1043 uint32_t top;
1044 char *p;
1045
1046 if(length<0) {
1047 length=(int32_t)uprv_strlen(s);
1048 }
1049
1050 /*
1051 * add 1 for the terminating NUL
1052 * and round up (+1 &~1)
1053 * to keep the addresses on a 16-bit boundary
1054 */
1055 top=block->top + (uint32_t)((length + 1 + 1) & ~1);
1056
1057 if(top >= block->max) {
1058 fprintf(stderr, "error(line %d): out of memory\n", lineNum);
1059 exit(U_MEMORY_ALLOCATION_ERROR);
1060 }
1061
1062 /* get the pointer and copy the string */
1063 p = block->store + block->top;
1064 uprv_memcpy(p, s, length);
1065 p[length] = 0; /* NUL-terminate it */
1066 if((length & 1) == 0) {
1067 p[length + 1] = 0; /* set the padding byte */
1068 }
1069
1070 /* check for invariant characters now that we have a NUL-terminated string for easy output */
1071 if(!uprv_isInvariantString(p, length)) {
1072 fprintf(stderr, "error(line %d): the name %s contains not just invariant characters\n", lineNum, p);
1073 exit(U_INVALID_TABLE_FORMAT);
1074 }
1075
1076 block->top = top;
1077 return p;
1078 }
1079
1080 static int
1081 compareAliases(const void *alias1, const void *alias2) {
1082 /* Names like IBM850 and ibm-850 need to be sorted together */
1083 int result = ucnv_compareNames(GET_ALIAS_STR(*(uint16_t*)alias1), GET_ALIAS_STR(*(uint16_t*)alias2));
1084 if (!result) {
1085 /* Sort the shortest first */
1086 return (int)uprv_strlen(GET_ALIAS_STR(*(uint16_t*)alias1)) - (int)uprv_strlen(GET_ALIAS_STR(*(uint16_t*)alias2));
1087 }
1088 return result;
1089 }
1090
1091 /*
1092 * Hey, Emacs, please set the following:
1093 *
1094 * Local Variables:
1095 * indent-tabs-mode: nil
1096 * End:
1097 *
1098 */
1099