]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/gencnval/gencnval.c
ICU-400.40.tar.gz
[apple/icu.git] / icuSources / tools / gencnval / gencnval.c
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1999-2008, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: gencnval.c
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 1999nov05
14 * created by: Markus W. Scherer
15 *
16 * This program reads convrtrs.txt and writes a memory-mappable
17 * converter name alias table to cnvalias.dat .
18 *
19 * This program currently writes version 2.1 of the data format. See
20 * ucnv_io.c for more details on the format. Note that version 2.1
21 * is written in such a way that a 2.0 reader will be able to use it,
22 * and a 2.1 reader will be able to read 2.0.
23 */
24
25 #include "unicode/utypes.h"
26 #include "unicode/putil.h"
27 #include "unicode/ucnv.h" /* ucnv_compareNames() */
28 #include "ucnv_io.h"
29 #include "cmemory.h"
30 #include "cstring.h"
31 #include "uinvchar.h"
32 #include "filestrm.h"
33 #include "unicode/uclean.h"
34 #include "unewdata.h"
35 #include "uoptions.h"
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <ctype.h>
40
41 /* TODO: Need to check alias name length is less than UCNV_MAX_CONVERTER_NAME_LENGTH */
42
43 /* STRING_STORE_SIZE + TAG_STORE_SIZE <= ((2^16 - 1) * 2)
44 That is the maximum size for the string stores combined
45 because the strings are index at 16-bit boundries by a
46 16-bit index, and there is only one section for the
47 strings.
48 */
49 #define STRING_STORE_SIZE 0x1FBFE /* 130046 */
50 #define TAG_STORE_SIZE 0x400 /* 1024 */
51
52 /* The combined tag and converter count can affect the number of lists
53 created. The size of all lists must be less than (2^17 - 1)
54 because the lists are indexed as a 16-bit array with a 16-bit index.
55 */
56 #define MAX_TAG_COUNT 0x3F /* 63 */
57 #define MAX_CONV_COUNT UCNV_CONVERTER_INDEX_MASK
58 #define MAX_ALIAS_COUNT 0xFFFF /* 65535 */
59
60 /* The maximum number of aliases that a standard tag/converter combination can have.
61 At this moment 6/18/2002, IANA has 12 names for ASCII. Don't go below 15 for
62 this value. I don't recommend more than 31 for this value.
63 */
64 #define MAX_TC_ALIAS_COUNT 0x1F /* 31 */
65
66 #define MAX_LINE_SIZE 0x7FFF /* 32767 */
67 #define MAX_LIST_SIZE 0xFFFF /* 65535 */
68
69 #define DATA_NAME "cnvalias"
70 #define DATA_TYPE "icu" /* ICU alias table */
71
72 #define ALL_TAG_STR "ALL"
73 #define ALL_TAG_NUM 1
74 #define EMPTY_TAG_NUM 0
75
76 /* UDataInfo cf. udata.h */
77 static const UDataInfo dataInfo={
78 sizeof(UDataInfo),
79 0,
80
81 U_IS_BIG_ENDIAN,
82 U_CHARSET_FAMILY,
83 sizeof(UChar),
84 0,
85
86 {0x43, 0x76, 0x41, 0x6c}, /* dataFormat="CvAl" */
87 {3, 0, 1, 0}, /* formatVersion */
88 {1, 4, 2, 0} /* dataVersion */
89 };
90
91 typedef struct {
92 char *store;
93 uint32_t top;
94 uint32_t max;
95 } StringBlock;
96
97 static char stringStore[STRING_STORE_SIZE];
98 static StringBlock stringBlock = { stringStore, 0, STRING_STORE_SIZE };
99
100 typedef struct {
101 uint16_t aliasCount;
102 uint16_t *aliases; /* Index into stringStore */
103 } AliasList;
104
105 typedef struct {
106 uint16_t converter; /* Index into stringStore */
107 uint16_t totalAliasCount; /* Total aliases in this column */
108 } Converter;
109
110 static Converter converters[MAX_CONV_COUNT];
111 static uint16_t converterCount=0;
112
113 static char tagStore[TAG_STORE_SIZE];
114 static StringBlock tagBlock = { tagStore, 0, TAG_STORE_SIZE };
115
116 typedef struct {
117 uint16_t tag; /* Index into tagStore */
118 uint16_t totalAliasCount; /* Total aliases in this row */
119 AliasList aliasList[MAX_CONV_COUNT];
120 } Tag;
121
122 /* Think of this as a 3D array. It's tagCount by converterCount by aliasCount */
123 static Tag tags[MAX_TAG_COUNT];
124 static uint16_t tagCount = 0;
125
126 /* Used for storing all aliases */
127 static uint16_t knownAliases[MAX_ALIAS_COUNT];
128 static uint16_t knownAliasesCount = 0;
129 /*static uint16_t duplicateKnownAliasesCount = 0;*/
130
131 /* Used for storing the lists section that point to aliases */
132 static uint16_t aliasLists[MAX_LIST_SIZE];
133 static uint16_t aliasListsSize = 0;
134
135 /* Were the standard tags declared before the aliases. */
136 static UBool standardTagsUsed = FALSE;
137 static UBool verbose = FALSE;
138 static int lineNum = 1;
139
140 static UConverterAliasOptions tableOptions = {
141 UCNV_IO_STD_NORMALIZED,
142 1 /* containsCnvOptionInfo */
143 };
144
145 /* prototypes --------------------------------------------------------------- */
146
147 static void
148 parseLine(const char *line);
149
150 static void
151 parseFile(FileStream *in);
152
153 static int32_t
154 chomp(char *line);
155
156 static void
157 addOfficialTaggedStandards(char *line, int32_t lineLen);
158
159 static uint16_t
160 addAlias(const char *alias, uint16_t standard, uint16_t converter, UBool defaultName);
161
162 static uint16_t
163 addConverter(const char *converter);
164
165 static char *
166 allocString(StringBlock *block, const char *s, int32_t length);
167
168 static uint16_t
169 addToKnownAliases(const char *alias);
170
171 static int
172 compareAliases(const void *alias1, const void *alias2);
173
174 static uint16_t
175 getTagNumber(const char *tag, uint16_t tagLen);
176
177 /*static void
178 addTaggedAlias(uint16_t tag, const char *alias, uint16_t converter);*/
179
180 static void
181 writeAliasTable(UNewDataMemory *out);
182
183 /* -------------------------------------------------------------------------- */
184
185 /* Presumes that you used allocString() */
186 #define GET_ALIAS_STR(index) (stringStore + ((size_t)(index) << 1))
187 #define GET_TAG_STR(index) (tagStore + ((size_t)(index) << 1))
188
189 /* Presumes that you used allocString() */
190 #define GET_ALIAS_NUM(str) ((uint16_t)((str - stringStore) >> 1))
191 #define GET_TAG_NUM(str) ((uint16_t)((str - tagStore) >> 1))
192
193 enum
194 {
195 HELP1,
196 HELP2,
197 VERBOSE,
198 COPYRIGHT,
199 DESTDIR,
200 SOURCEDIR
201 };
202
203 static UOption options[]={
204 UOPTION_HELP_H,
205 UOPTION_HELP_QUESTION_MARK,
206 UOPTION_VERBOSE,
207 UOPTION_COPYRIGHT,
208 UOPTION_DESTDIR,
209 UOPTION_SOURCEDIR
210 };
211
212 extern int
213 main(int argc, char* argv[]) {
214 char pathBuf[512];
215 const char *path;
216 FileStream *in;
217 UNewDataMemory *out;
218 UErrorCode errorCode=U_ZERO_ERROR;
219
220 U_MAIN_INIT_ARGS(argc, argv);
221
222 /* preset then read command line options */
223 options[DESTDIR].value=options[SOURCEDIR].value=u_getDataDirectory();
224 argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
225
226 /* error handling, printing usage message */
227 if(argc<0) {
228 fprintf(stderr,
229 "error in command line argument \"%s\"\n",
230 argv[-argc]);
231 }
232 if(argc<0 || options[HELP1].doesOccur || options[HELP2].doesOccur) {
233 fprintf(stderr,
234 "usage: %s [-options] [convrtrs.txt]\n"
235 "\tread convrtrs.txt and create " U_ICUDATA_NAME "_" DATA_NAME "." DATA_TYPE "\n"
236 "options:\n"
237 "\t-h or -? or --help this usage text\n"
238 "\t-v or --verbose prints out extra information about the alias table\n"
239 "\t-c or --copyright include a copyright notice\n"
240 "\t-d or --destdir destination directory, followed by the path\n"
241 "\t-s or --sourcedir source directory, followed by the path\n",
242 argv[0]);
243 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
244 }
245
246 if(options[VERBOSE].doesOccur) {
247 verbose = TRUE;
248 }
249
250 if(argc>=2) {
251 path=argv[1];
252 } else {
253 path=options[SOURCEDIR].value;
254 if(path!=NULL && *path!=0) {
255 char *end;
256
257 uprv_strcpy(pathBuf, path);
258 end = uprv_strchr(pathBuf, 0);
259 if(*(end-1)!=U_FILE_SEP_CHAR) {
260 *(end++)=U_FILE_SEP_CHAR;
261 }
262 uprv_strcpy(end, "convrtrs.txt");
263 path=pathBuf;
264 } else {
265 path = "convrtrs.txt";
266 }
267 }
268
269 uprv_memset(stringStore, 0, sizeof(stringStore));
270 uprv_memset(tagStore, 0, sizeof(tagStore));
271 uprv_memset(converters, 0, sizeof(converters));
272 uprv_memset(tags, 0, sizeof(tags));
273 uprv_memset(aliasLists, 0, sizeof(aliasLists));
274 uprv_memset(knownAliases, 0, sizeof(aliasLists));
275
276
277 in=T_FileStream_open(path, "r");
278 if(in==NULL) {
279 fprintf(stderr, "gencnval: unable to open input file convrtrs.txt\n");
280 exit(U_FILE_ACCESS_ERROR);
281 }
282 parseFile(in);
283 T_FileStream_close(in);
284
285 /* create the output file */
286 out=udata_create(options[DESTDIR].value, DATA_TYPE, DATA_NAME, &dataInfo,
287 options[COPYRIGHT].doesOccur ? U_COPYRIGHT_STRING : NULL, &errorCode);
288 if(U_FAILURE(errorCode)) {
289 fprintf(stderr, "gencnval: unable to open output file - error %s\n", u_errorName(errorCode));
290 exit(errorCode);
291 }
292
293 /* write the table of aliases based on a tag/converter name combination */
294 writeAliasTable(out);
295
296 /* finish */
297 udata_finish(out, &errorCode);
298 if(U_FAILURE(errorCode)) {
299 fprintf(stderr, "gencnval: error finishing output file - %s\n", u_errorName(errorCode));
300 exit(errorCode);
301 }
302
303 return 0;
304 }
305
306 static void
307 parseFile(FileStream *in) {
308 char line[MAX_LINE_SIZE];
309 char lastLine[MAX_LINE_SIZE];
310 int32_t lineSize = 0;
311 int32_t lastLineSize = 0;
312 UBool validParse = TRUE;
313
314 lineNum = 0;
315
316 /* Add the empty tag, which is for untagged aliases */
317 getTagNumber("", 0);
318 getTagNumber(ALL_TAG_STR, 3);
319 allocString(&stringBlock, "", 0);
320
321 /* read the list of aliases */
322 while (validParse) {
323 validParse = FALSE;
324
325 /* Read non-empty lines that don't start with a space character. */
326 while (T_FileStream_readLine(in, lastLine, MAX_LINE_SIZE) != NULL) {
327 lastLineSize = chomp(lastLine);
328 if (lineSize == 0 || (lastLineSize > 0 && isspace(*lastLine))) {
329 uprv_strcpy(line + lineSize, lastLine);
330 lineSize += lastLineSize;
331 } else if (lineSize > 0) {
332 validParse = TRUE;
333 break;
334 }
335 lineNum++;
336 }
337
338 if (validParse || lineSize > 0) {
339 if (isspace(*line)) {
340 fprintf(stderr, "error(line %d): cannot start an alias with a space\n", lineNum-1);
341 exit(U_PARSE_ERROR);
342 } else if (line[0] == '{') {
343 if (!standardTagsUsed && line[lineSize - 1] != '}') {
344 fprintf(stderr, "error(line %d): alias needs to start with a converter name\n", lineNum);
345 exit(U_PARSE_ERROR);
346 }
347 addOfficialTaggedStandards(line, lineSize);
348 standardTagsUsed = TRUE;
349 } else {
350 if (standardTagsUsed) {
351 parseLine(line);
352 }
353 else {
354 fprintf(stderr, "error(line %d): alias table needs to start a list of standard tags\n", lineNum);
355 exit(U_PARSE_ERROR);
356 }
357 }
358 /* Was the last line consumed */
359 if (lastLineSize > 0) {
360 uprv_strcpy(line, lastLine);
361 lineSize = lastLineSize;
362 }
363 else {
364 lineSize = 0;
365 }
366 }
367 lineNum++;
368 }
369 }
370
371 /* This works almost like the Perl chomp.
372 It removes the newlines, comments and trailing whitespace (not preceding whitespace).
373 */
374 static int32_t
375 chomp(char *line) {
376 char *s = line;
377 char *lastNonSpace = line;
378 while(*s!=0) {
379 /* truncate at a newline or a comment */
380 if(*s == '\r' || *s == '\n' || *s == '#') {
381 *s = 0;
382 break;
383 }
384 if (!isspace(*s)) {
385 lastNonSpace = s;
386 }
387 ++s;
388 }
389 if (lastNonSpace++ > line) {
390 *lastNonSpace = 0;
391 s = lastNonSpace;
392 }
393 return (int32_t)(s - line);
394 }
395
396 static void
397 parseLine(const char *line) {
398 uint16_t pos=0, start, limit, length, cnv;
399 char *converter, *alias;
400
401 /* skip leading white space */
402 /* There is no whitespace at the beginning anymore */
403 /* while(line[pos]!=0 && isspace(line[pos])) {
404 ++pos;
405 }
406 */
407
408 /* is there nothing on this line? */
409 if(line[pos]==0) {
410 return;
411 }
412
413 /* get the converter name */
414 start=pos;
415 while(line[pos]!=0 && !isspace(line[pos])) {
416 ++pos;
417 }
418 limit=pos;
419
420 /* store the converter name */
421 length=(uint16_t)(limit-start);
422 converter=allocString(&stringBlock, line+start, length);
423
424 /* add the converter to the converter table */
425 cnv=addConverter(converter);
426
427 /* The name itself may be tagged, so let's added it to the aliases list properly */
428 pos = start;
429
430 /* get all the real aliases */
431 for(;;) {
432
433 /* skip white space */
434 while(line[pos]!=0 && isspace(line[pos])) {
435 ++pos;
436 }
437
438 /* is there no more alias name on this line? */
439 if(line[pos]==0) {
440 break;
441 }
442
443 /* get an alias name */
444 start=pos;
445 while(line[pos]!=0 && line[pos]!='{' && !isspace(line[pos])) {
446 ++pos;
447 }
448 limit=pos;
449
450 /* store the alias name */
451 length=(uint16_t)(limit-start);
452 if (start == 0) {
453 /* add the converter as its own alias to the alias table */
454 alias = converter;
455 addAlias(alias, ALL_TAG_NUM, cnv, TRUE);
456 }
457 else {
458 alias=allocString(&stringBlock, line+start, length);
459 addAlias(alias, ALL_TAG_NUM, cnv, FALSE);
460 }
461 addToKnownAliases(alias);
462
463 /* add the alias/converter pair to the alias table */
464 /* addAlias(alias, 0, cnv, FALSE);*/
465
466 /* skip whitespace */
467 while (line[pos] && isspace(line[pos])) {
468 ++pos;
469 }
470
471 /* handle tags if they are present */
472 if (line[pos] == '{') {
473 ++pos;
474 do {
475 start = pos;
476 while (line[pos] && line[pos] != '}' && !isspace( line[pos])) {
477 ++pos;
478 }
479 limit = pos;
480
481 if (start != limit) {
482 /* add the tag to the tag table */
483 uint16_t tag = getTagNumber(line + start, (uint16_t)(limit - start));
484 addAlias(alias, tag, cnv, (UBool)(line[limit-1] == '*'));
485 }
486
487 while (line[pos] && isspace(line[pos])) {
488 ++pos;
489 }
490 } while (line[pos] && line[pos] != '}');
491
492 if (line[pos] == '}') {
493 ++pos;
494 } else {
495 fprintf(stderr, "error(line %d): Unterminated tag list\n", lineNum);
496 exit(U_UNMATCHED_BRACES);
497 }
498 } else {
499 addAlias(alias, EMPTY_TAG_NUM, cnv, (UBool)(tags[0].aliasList[cnv].aliasCount == 0));
500 }
501 }
502 }
503
504 static uint16_t
505 getTagNumber(const char *tag, uint16_t tagLen) {
506 char *atag;
507 uint16_t t;
508 UBool preferredName = ((tagLen > 0) ? (tag[tagLen - 1] == '*') : (FALSE));
509
510 if (tagCount >= MAX_TAG_COUNT) {
511 fprintf(stderr, "error(line %d): too many tags\n", lineNum);
512 exit(U_BUFFER_OVERFLOW_ERROR);
513 }
514
515 if (preferredName) {
516 /* puts(tag);*/
517 tagLen--;
518 }
519
520 for (t = 0; t < tagCount; ++t) {
521 const char *currTag = GET_TAG_STR(tags[t].tag);
522 if (uprv_strlen(currTag) == tagLen && !uprv_strnicmp(currTag, tag, tagLen)) {
523 return t;
524 }
525 }
526
527 /* we need to add this tag */
528 if (tagCount >= MAX_TAG_COUNT) {
529 fprintf(stderr, "error(line %d): too many tags\n", lineNum);
530 exit(U_BUFFER_OVERFLOW_ERROR);
531 }
532
533 /* allocate a new entry in the tag table */
534 atag = allocString(&tagBlock, tag, tagLen);
535
536 if (standardTagsUsed) {
537 fprintf(stderr, "error(line %d): Tag \"%s\" is not declared at the beginning of the alias table.\n",
538 lineNum, atag);
539 exit(1);
540 }
541 else if (tagLen > 0 && strcmp(tag, ALL_TAG_STR) != 0) {
542 fprintf(stderr, "warning(line %d): Tag \"%s\" was added to the list of standards because it was not declared at beginning of the alias table.\n",
543 lineNum, atag);
544 }
545
546 /* add the tag to the tag table */
547 tags[tagCount].tag = GET_TAG_NUM(atag);
548 /* The aliasList should be set to 0's already */
549
550 return tagCount++;
551 }
552
553 /*static void
554 addTaggedAlias(uint16_t tag, const char *alias, uint16_t converter) {
555 tags[tag].aliases[converter] = alias;
556 }
557 */
558
559 static void
560 addOfficialTaggedStandards(char *line, int32_t lineLen) {
561 char *atag;
562 char *endTagExp;
563 char *tag;
564 static const char WHITESPACE[] = " \t";
565
566 if (tagCount > UCNV_NUM_RESERVED_TAGS) {
567 fprintf(stderr, "error(line %d): official tags already added\n", lineNum);
568 exit(U_BUFFER_OVERFLOW_ERROR);
569 }
570 tag = strchr(line, '{');
571 if (tag == NULL) {
572 /* Why were we called? */
573 fprintf(stderr, "error(line %d): Missing start of tag group\n", lineNum);
574 exit(U_PARSE_ERROR);
575 }
576 tag++;
577 endTagExp = strchr(tag, '}');
578 if (endTagExp == NULL) {
579 fprintf(stderr, "error(line %d): Missing end of tag group\n", lineNum);
580 exit(U_PARSE_ERROR);
581 }
582 endTagExp[0] = 0;
583
584 tag = strtok(tag, WHITESPACE);
585 while (tag != NULL) {
586 /* printf("Adding original tag \"%s\"\n", tag);*/
587
588 /* allocate a new entry in the tag table */
589 atag = allocString(&tagBlock, tag, -1);
590
591 /* add the tag to the tag table */
592 tags[tagCount++].tag = (uint16_t)((atag - tagStore) >> 1);
593
594 /* The aliasList should already be set to 0's */
595
596 /* Get next tag */
597 tag = strtok(NULL, WHITESPACE);
598 }
599 }
600
601 static uint16_t
602 addToKnownAliases(const char *alias) {
603 /* uint32_t idx; */
604 /* strict matching */
605 /* for (idx = 0; idx < knownAliasesCount; idx++) {
606 uint16_t num = GET_ALIAS_NUM(alias);
607 if (knownAliases[idx] != num
608 && uprv_strcmp(alias, GET_ALIAS_STR(knownAliases[idx])) == 0)
609 {
610 fprintf(stderr, "warning(line %d): duplicate alias %s and %s found\n",
611 lineNum, alias, GET_ALIAS_STR(knownAliases[idx]));
612 duplicateKnownAliasesCount++;
613 break;
614 }
615 else if (knownAliases[idx] != num
616 && ucnv_compareNames(alias, GET_ALIAS_STR(knownAliases[idx])) == 0)
617 {
618 if (verbose) {
619 fprintf(stderr, "information(line %d): duplicate alias %s and %s found\n",
620 lineNum, alias, GET_ALIAS_STR(knownAliases[idx]));
621 }
622 duplicateKnownAliasesCount++;
623 break;
624 }
625 }
626 */
627 if (knownAliasesCount >= MAX_ALIAS_COUNT) {
628 fprintf(stderr, "warning(line %d): Too many aliases defined for all converters\n",
629 lineNum);
630 exit(U_BUFFER_OVERFLOW_ERROR);
631 }
632 /* TODO: We could try to unlist exact duplicates. */
633 return knownAliases[knownAliasesCount++] = GET_ALIAS_NUM(alias);
634 }
635
636 /*
637 @param standard When standard is 0, then it's the "empty" tag.
638 */
639 static uint16_t
640 addAlias(const char *alias, uint16_t standard, uint16_t converter, UBool defaultName) {
641 uint32_t idx, idx2;
642 UBool dupFound = FALSE;
643 UBool startEmptyWithoutDefault = FALSE;
644 AliasList *aliasList;
645
646 if(standard>=MAX_TAG_COUNT) {
647 fprintf(stderr, "error(line %d): too many standard tags\n", lineNum);
648 exit(U_BUFFER_OVERFLOW_ERROR);
649 }
650 if(converter>=MAX_CONV_COUNT) {
651 fprintf(stderr, "error(line %d): too many converter names\n", lineNum);
652 exit(U_BUFFER_OVERFLOW_ERROR);
653 }
654 aliasList = &tags[standard].aliasList[converter];
655
656 if (strchr(alias, '}')) {
657 fprintf(stderr, "error(line %d): unmatched } found\n",
658 lineNum);
659 }
660
661 if(aliasList->aliasCount + 1 >= MAX_TC_ALIAS_COUNT) {
662 fprintf(stderr, "error(line %d): too many aliases for alias %s and converter %s\n",
663 lineNum, alias, GET_ALIAS_STR(converters[converter].converter));
664 exit(U_BUFFER_OVERFLOW_ERROR);
665 }
666
667 /* Show this warning only once. All aliases are added to the "ALL" tag. */
668 if (standard == ALL_TAG_NUM && GET_ALIAS_STR(converters[converter].converter) != alias) {
669 /* Normally these option values are parsed at runtime, and they can
670 be discarded when the alias is a default converter. Options should
671 only be on a converter and not an alias. */
672 if (uprv_strchr(alias, UCNV_OPTION_SEP_CHAR) != 0)
673 {
674 fprintf(stderr, "warning(line %d): alias %s contains a \""UCNV_OPTION_SEP_STRING"\". Options are parsed at run-time and do not need to be in the alias table.\n",
675 lineNum, alias);
676 }
677 if (uprv_strchr(alias, UCNV_VALUE_SEP_CHAR) != 0)
678 {
679 fprintf(stderr, "warning(line %d): alias %s contains an \""UCNV_VALUE_SEP_STRING"\". Options are parsed at run-time and do not need to be in the alias table.\n",
680 lineNum, alias);
681 }
682 }
683
684 if (standard != ALL_TAG_NUM) {
685 /* Check for duplicate aliases for this tag on all converters */
686 for (idx = 0; idx < converterCount; idx++) {
687 for (idx2 = 0; idx2 < tags[standard].aliasList[idx].aliasCount; idx2++) {
688 uint16_t aliasNum = tags[standard].aliasList[idx].aliases[idx2];
689 if (aliasNum
690 && ucnv_compareNames(alias, GET_ALIAS_STR(aliasNum)) == 0)
691 {
692 if (idx == converter) {
693 /*
694 * (alias, standard) duplicates are harmless if they map to the same converter.
695 * Only print a warning in verbose mode, or if the alias is a precise duplicate,
696 * not just a lenient-match duplicate.
697 */
698 if (verbose || 0 == uprv_strcmp(alias, GET_ALIAS_STR(aliasNum))) {
699 fprintf(stderr, "warning(line %d): duplicate aliases %s and %s found for standard %s and converter %s\n",
700 lineNum, alias, GET_ALIAS_STR(aliasNum),
701 GET_TAG_STR(tags[standard].tag),
702 GET_ALIAS_STR(converters[converter].converter));
703 }
704 } else {
705 fprintf(stderr, "warning(line %d): duplicate aliases %s and %s found for standard tag %s between converter %s and converter %s\n",
706 lineNum, alias, GET_ALIAS_STR(aliasNum),
707 GET_TAG_STR(tags[standard].tag),
708 GET_ALIAS_STR(converters[converter].converter),
709 GET_ALIAS_STR(converters[idx].converter));
710 }
711 dupFound = TRUE;
712 break;
713 }
714 }
715 }
716
717 /* Check for duplicate default aliases for this converter on all tags */
718 /* It's okay to have multiple standards prefer the same name */
719 /* if (verbose && !dupFound) {
720 for (idx = 0; idx < tagCount; idx++) {
721 if (tags[idx].aliasList[converter].aliases) {
722 uint16_t aliasNum = tags[idx].aliasList[converter].aliases[0];
723 if (aliasNum
724 && ucnv_compareNames(alias, GET_ALIAS_STR(aliasNum)) == 0)
725 {
726 fprintf(stderr, "warning(line %d): duplicate alias %s found for converter %s and standard tag %s\n",
727 lineNum, alias, GET_ALIAS_STR(converters[converter].converter), GET_TAG_STR(tags[standard].tag));
728 break;
729 }
730 }
731 }
732 }*/
733 }
734
735 if (aliasList->aliasCount <= 0) {
736 aliasList->aliasCount++;
737 startEmptyWithoutDefault = TRUE;
738 }
739 aliasList->aliases = (uint16_t *)uprv_realloc(aliasList->aliases, (aliasList->aliasCount + 1) * sizeof(aliasList->aliases[0]));
740 if (startEmptyWithoutDefault) {
741 aliasList->aliases[0] = 0;
742 }
743 if (defaultName) {
744 if (aliasList->aliases[0] != 0) {
745 fprintf(stderr, "error(line %d): Alias %s and %s cannot both be the default alias for standard tag %s and converter %s\n",
746 lineNum,
747 alias,
748 GET_ALIAS_STR(aliasList->aliases[0]),
749 GET_TAG_STR(tags[standard].tag),
750 GET_ALIAS_STR(converters[converter].converter));
751 exit(U_PARSE_ERROR);
752 }
753 aliasList->aliases[0] = GET_ALIAS_NUM(alias);
754 } else {
755 aliasList->aliases[aliasList->aliasCount++] = GET_ALIAS_NUM(alias);
756 }
757 /* aliasList->converter = converter;*/
758
759 converters[converter].totalAliasCount++; /* One more to the column */
760 tags[standard].totalAliasCount++; /* One more to the row */
761
762 return aliasList->aliasCount;
763 }
764
765 static uint16_t
766 addConverter(const char *converter) {
767 uint32_t idx;
768 if(converterCount>=MAX_CONV_COUNT) {
769 fprintf(stderr, "error(line %d): too many converters\n", lineNum);
770 exit(U_BUFFER_OVERFLOW_ERROR);
771 }
772
773 for (idx = 0; idx < converterCount; idx++) {
774 if (ucnv_compareNames(converter, GET_ALIAS_STR(converters[idx].converter)) == 0) {
775 fprintf(stderr, "error(line %d): duplicate converter %s found!\n", lineNum, converter);
776 exit(U_PARSE_ERROR);
777 break;
778 }
779 }
780
781 converters[converterCount].converter = GET_ALIAS_NUM(converter);
782 converters[converterCount].totalAliasCount = 0;
783
784 return converterCount++;
785 }
786
787 /* resolve this alias based on the prioritization of the standard tags. */
788 static void
789 resolveAliasToConverter(uint16_t alias, uint16_t *tagNum, uint16_t *converterNum) {
790 uint16_t idx, idx2, idx3;
791
792 for (idx = UCNV_NUM_RESERVED_TAGS; idx < tagCount; idx++) {
793 for (idx2 = 0; idx2 < converterCount; idx2++) {
794 for (idx3 = 0; idx3 < tags[idx].aliasList[idx2].aliasCount; idx3++) {
795 uint16_t aliasNum = tags[idx].aliasList[idx2].aliases[idx3];
796 if (aliasNum == alias) {
797 *tagNum = idx;
798 *converterNum = idx2;
799 return;
800 }
801 }
802 }
803 }
804 /* Do the leftovers last, just in case */
805 /* There is no need to do the ALL tag */
806 idx = 0;
807 for (idx2 = 0; idx2 < converterCount; idx2++) {
808 for (idx3 = 0; idx3 < tags[idx].aliasList[idx2].aliasCount; idx3++) {
809 uint16_t aliasNum = tags[idx].aliasList[idx2].aliases[idx3];
810 if (aliasNum == alias) {
811 *tagNum = idx;
812 *converterNum = idx2;
813 return;
814 }
815 }
816 }
817 *tagNum = UINT16_MAX;
818 *converterNum = UINT16_MAX;
819 fprintf(stderr, "warning: alias %s not found\n",
820 GET_ALIAS_STR(alias));
821 return;
822 }
823
824 /* The knownAliases should be sorted before calling this function */
825 static uint32_t
826 resolveAliases(uint16_t *uniqueAliasArr, uint16_t *uniqueAliasToConverterArr, uint16_t aliasOffset) {
827 uint32_t uniqueAliasIdx = 0;
828 uint32_t idx;
829 uint16_t currTagNum, oldTagNum;
830 uint16_t currConvNum, oldConvNum;
831 const char *lastName;
832
833 resolveAliasToConverter(knownAliases[0], &oldTagNum, &currConvNum);
834 uniqueAliasToConverterArr[uniqueAliasIdx] = currConvNum;
835 oldConvNum = currConvNum;
836 uniqueAliasArr[uniqueAliasIdx] = knownAliases[0] + aliasOffset;
837 uniqueAliasIdx++;
838 lastName = GET_ALIAS_STR(knownAliases[0]);
839
840 for (idx = 1; idx < knownAliasesCount; idx++) {
841 resolveAliasToConverter(knownAliases[idx], &currTagNum, &currConvNum);
842 if (ucnv_compareNames(lastName, GET_ALIAS_STR(knownAliases[idx])) == 0) {
843 /* duplicate found */
844 if ((currTagNum < oldTagNum && currTagNum >= UCNV_NUM_RESERVED_TAGS)
845 || oldTagNum == 0) {
846 oldTagNum = currTagNum;
847 uniqueAliasToConverterArr[uniqueAliasIdx - 1] = currConvNum;
848 uniqueAliasArr[uniqueAliasIdx - 1] = knownAliases[idx] + aliasOffset;
849 if (verbose) {
850 printf("using %s instead of %s -> %s",
851 GET_ALIAS_STR(knownAliases[idx]),
852 lastName,
853 GET_ALIAS_STR(converters[currConvNum].converter));
854 if (oldConvNum != currConvNum) {
855 printf(" (alias conflict)");
856 }
857 puts("");
858 }
859 }
860 else {
861 /* else ignore it */
862 if (verbose) {
863 printf("folding %s into %s -> %s",
864 GET_ALIAS_STR(knownAliases[idx]),
865 lastName,
866 GET_ALIAS_STR(converters[oldConvNum].converter));
867 if (oldConvNum != currConvNum) {
868 printf(" (alias conflict)");
869 }
870 puts("");
871 }
872 }
873 if (oldConvNum != currConvNum) {
874 uniqueAliasToConverterArr[uniqueAliasIdx - 1] |= UCNV_AMBIGUOUS_ALIAS_MAP_BIT;
875 }
876 }
877 else {
878 uniqueAliasToConverterArr[uniqueAliasIdx] = currConvNum;
879 oldConvNum = currConvNum;
880 uniqueAliasArr[uniqueAliasIdx] = knownAliases[idx] + aliasOffset;
881 uniqueAliasIdx++;
882 lastName = GET_ALIAS_STR(knownAliases[idx]);
883 oldTagNum = currTagNum;
884 /*printf("%s -> %s\n", GET_ALIAS_STR(knownAliases[idx]), GET_ALIAS_STR(converters[currConvNum].converter));*/
885 }
886 if (uprv_strchr(GET_ALIAS_STR(converters[currConvNum].converter), UCNV_OPTION_SEP_CHAR) != NULL) {
887 uniqueAliasToConverterArr[uniqueAliasIdx-1] |= UCNV_CONTAINS_OPTION_BIT;
888 }
889 }
890 return uniqueAliasIdx;
891 }
892
893 static void
894 createOneAliasList(uint16_t *aliasArrLists, uint32_t tag, uint32_t converter, uint16_t offset) {
895 uint32_t aliasNum;
896 AliasList *aliasList = &tags[tag].aliasList[converter];
897
898 if (aliasList->aliasCount == 0) {
899 aliasArrLists[tag*converterCount + converter] = 0;
900 }
901 else {
902 aliasLists[aliasListsSize++] = aliasList->aliasCount;
903
904 /* write into the array area a 1's based index. */
905 aliasArrLists[tag*converterCount + converter] = aliasListsSize;
906
907 /* printf("tag %s converter %s\n",
908 GET_TAG_STR(tags[tag].tag),
909 GET_ALIAS_STR(converters[converter].converter));*/
910 for (aliasNum = 0; aliasNum < aliasList->aliasCount; aliasNum++) {
911 uint16_t value;
912 /* printf(" %s\n",
913 GET_ALIAS_STR(aliasList->aliases[aliasNum]));*/
914 if (aliasList->aliases[aliasNum]) {
915 value = aliasList->aliases[aliasNum] + offset;
916 } else {
917 value = 0;
918 if (tag != 0) { /* Only show the warning when it's not the leftover tag. */
919 printf("warning: tag %s does not have a default alias for %s\n",
920 GET_TAG_STR(tags[tag].tag),
921 GET_ALIAS_STR(converters[converter].converter));
922 }
923 }
924 aliasLists[aliasListsSize++] = value;
925 if (aliasListsSize >= MAX_LIST_SIZE) {
926 fprintf(stderr, "error: Too many alias lists\n");
927 exit(U_BUFFER_OVERFLOW_ERROR);
928 }
929
930 }
931 }
932 }
933
934 static void
935 createNormalizedAliasStrings(char *normalizedStrings, const char *origStringBlock, int32_t stringBlockLength) {
936 int32_t currStrLen;
937 uprv_memcpy(normalizedStrings, origStringBlock, stringBlockLength);
938 while ((currStrLen = (int32_t)uprv_strlen(origStringBlock)) < stringBlockLength) {
939 int32_t currStrSize = currStrLen + 1;
940 if (currStrLen > 0) {
941 int32_t normStrLen;
942 ucnv_io_stripForCompare(normalizedStrings, origStringBlock);
943 normStrLen = uprv_strlen(normalizedStrings);
944 if (normStrLen > 0) {
945 uprv_memset(normalizedStrings + normStrLen, 0, currStrSize - normStrLen);
946 }
947 }
948 stringBlockLength -= currStrSize;
949 normalizedStrings += currStrSize;
950 origStringBlock += currStrSize;
951 }
952 }
953
954 static void
955 writeAliasTable(UNewDataMemory *out) {
956 uint32_t i, j;
957 uint32_t uniqueAliasesSize;
958 uint16_t aliasOffset = (uint16_t)(tagBlock.top/sizeof(uint16_t));
959 uint16_t *aliasArrLists = (uint16_t *)uprv_malloc(tagCount * converterCount * sizeof(uint16_t));
960 uint16_t *uniqueAliases = (uint16_t *)uprv_malloc(knownAliasesCount * sizeof(uint16_t));
961 uint16_t *uniqueAliasesToConverter = (uint16_t *)uprv_malloc(knownAliasesCount * sizeof(uint16_t));
962
963 qsort(knownAliases, knownAliasesCount, sizeof(knownAliases[0]), compareAliases);
964 uniqueAliasesSize = resolveAliases(uniqueAliases, uniqueAliasesToConverter, aliasOffset);
965
966 /* Array index starts at 1. aliasLists[0] is the size of the lists section. */
967 aliasListsSize = 0;
968
969 /* write the offsets of all the aliases lists in a 2D array, and create the lists. */
970 for (i = 0; i < tagCount; ++i) {
971 for (j = 0; j < converterCount; ++j) {
972 createOneAliasList(aliasArrLists, i, j, aliasOffset);
973 }
974 }
975
976 /* Write the size of the TOC */
977 if (tableOptions.stringNormalizationType == UCNV_IO_UNNORMALIZED) {
978 udata_write32(out, 8);
979 }
980 else {
981 udata_write32(out, 9);
982 }
983
984 /* Write the sizes of each section */
985 /* All sizes are the number of uint16_t units, not bytes */
986 udata_write32(out, converterCount);
987 udata_write32(out, tagCount);
988 udata_write32(out, uniqueAliasesSize); /* list of aliases */
989 udata_write32(out, uniqueAliasesSize); /* The preresolved form of mapping an untagged the alias to a converter */
990 udata_write32(out, tagCount * converterCount);
991 udata_write32(out, aliasListsSize + 1);
992 udata_write32(out, sizeof(tableOptions) / sizeof(uint16_t));
993 udata_write32(out, (tagBlock.top + stringBlock.top) / sizeof(uint16_t));
994 if (tableOptions.stringNormalizationType != UCNV_IO_UNNORMALIZED) {
995 udata_write32(out, (tagBlock.top + stringBlock.top) / sizeof(uint16_t));
996 }
997
998 /* write the table of converters */
999 /* Think of this as the column headers */
1000 for(i=0; i<converterCount; ++i) {
1001 udata_write16(out, (uint16_t)(converters[i].converter + aliasOffset));
1002 }
1003
1004 /* write the table of tags */
1005 /* Think of this as the row headers */
1006 for(i=UCNV_NUM_RESERVED_TAGS; i<tagCount; ++i) {
1007 udata_write16(out, tags[i].tag);
1008 }
1009 /* The empty tag is considered the leftover list, and put that at the end of the priority list. */
1010 udata_write16(out, tags[EMPTY_TAG_NUM].tag);
1011 udata_write16(out, tags[ALL_TAG_NUM].tag);
1012
1013 /* Write the unique list of aliases */
1014 udata_writeBlock(out, uniqueAliases, uniqueAliasesSize * sizeof(uint16_t));
1015
1016 /* Write the unique list of aliases */
1017 udata_writeBlock(out, uniqueAliasesToConverter, uniqueAliasesSize * sizeof(uint16_t));
1018
1019 /* Write the array to the lists */
1020 udata_writeBlock(out, (const void *)(aliasArrLists + (2*converterCount)), (((tagCount - 2) * converterCount) * sizeof(uint16_t)));
1021 /* Now write the leftover part of the array for the EMPTY and ALL lists */
1022 udata_writeBlock(out, (const void *)aliasArrLists, (2 * converterCount * sizeof(uint16_t)));
1023
1024 /* Offset the next array to make the index start at 1. */
1025 udata_write16(out, 0xDEAD);
1026
1027 /* Write the lists */
1028 udata_writeBlock(out, (const void *)aliasLists, aliasListsSize * sizeof(uint16_t));
1029
1030 /* Write any options for the alias table. */
1031 udata_writeBlock(out, (const void *)&tableOptions, sizeof(tableOptions));
1032
1033 /* write the tags strings */
1034 udata_writeString(out, tagBlock.store, tagBlock.top);
1035
1036 /* write the aliases strings */
1037 udata_writeString(out, stringBlock.store, stringBlock.top);
1038
1039 /* write the normalized aliases strings */
1040 if (tableOptions.stringNormalizationType != UCNV_IO_UNNORMALIZED) {
1041 char *normalizedStrings = (char *)uprv_malloc(tagBlock.top + stringBlock.top);
1042 createNormalizedAliasStrings(normalizedStrings, tagBlock.store, tagBlock.top);
1043 createNormalizedAliasStrings(normalizedStrings + tagBlock.top, stringBlock.store, stringBlock.top);
1044
1045 /* Write out the complete normalized array. */
1046 udata_writeString(out, normalizedStrings, tagBlock.top + stringBlock.top);
1047 uprv_free(normalizedStrings);
1048 }
1049
1050 uprv_free(aliasArrLists);
1051 uprv_free(uniqueAliases);
1052 }
1053
1054 static char *
1055 allocString(StringBlock *block, const char *s, int32_t length) {
1056 uint32_t top;
1057 char *p;
1058
1059 if(length<0) {
1060 length=(int32_t)uprv_strlen(s);
1061 }
1062
1063 /*
1064 * add 1 for the terminating NUL
1065 * and round up (+1 &~1)
1066 * to keep the addresses on a 16-bit boundary
1067 */
1068 top=block->top + (uint32_t)((length + 1 + 1) & ~1);
1069
1070 if(top >= block->max) {
1071 fprintf(stderr, "error(line %d): out of memory\n", lineNum);
1072 exit(U_MEMORY_ALLOCATION_ERROR);
1073 }
1074
1075 /* get the pointer and copy the string */
1076 p = block->store + block->top;
1077 uprv_memcpy(p, s, length);
1078 p[length] = 0; /* NUL-terminate it */
1079 if((length & 1) == 0) {
1080 p[length + 1] = 0; /* set the padding byte */
1081 }
1082
1083 /* check for invariant characters now that we have a NUL-terminated string for easy output */
1084 if(!uprv_isInvariantString(p, length)) {
1085 fprintf(stderr, "error(line %d): the name %s contains not just invariant characters\n", lineNum, p);
1086 exit(U_INVALID_TABLE_FORMAT);
1087 }
1088
1089 block->top = top;
1090 return p;
1091 }
1092
1093 static int
1094 compareAliases(const void *alias1, const void *alias2) {
1095 /* Names like IBM850 and ibm-850 need to be sorted together */
1096 int result = ucnv_compareNames(GET_ALIAS_STR(*(uint16_t*)alias1), GET_ALIAS_STR(*(uint16_t*)alias2));
1097 if (!result) {
1098 /* Sort the shortest first */
1099 return (int)uprv_strlen(GET_ALIAS_STR(*(uint16_t*)alias1)) - (int)uprv_strlen(GET_ALIAS_STR(*(uint16_t*)alias2));
1100 }
1101 return result;
1102 }
1103
1104 /*
1105 * Hey, Emacs, please set the following:
1106 *
1107 * Local Variables:
1108 * indent-tabs-mode: nil
1109 * End:
1110 *
1111 */
1112