]> git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/tools/makeconv/makeconv.c
ICU-3.13.tar.gz
[apple/icu.git] / icuSources / tools / makeconv / makeconv.c
... / ...
CommitLineData
1/*
2 ********************************************************************************
3 *
4 * Copyright (C) 1998-2003, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ********************************************************************************
8 *
9 *
10 * makeconv.c:
11 * tool creating a binary (compressed) representation of the conversion mapping
12 * table (IBM NLTC ucmap format).
13 *
14 * 05/04/2000 helena Added fallback mapping into the picture...
15 * 06/29/2000 helena Major rewrite of the callback APIs.
16 */
17
18#include <stdio.h>
19#include "unicode/putil.h"
20#include "ucnv_io.h"
21#include "unicode/ucnv_err.h"
22#include "ucnv_bld.h"
23#include "ucnv_imp.h"
24#include "ucnv_cnv.h"
25#include "cstring.h"
26#include "cmemory.h"
27#include "filestrm.h"
28#include "toolutil.h"
29#include "uoptions.h"
30#include "unicode/udata.h"
31#include "unewdata.h"
32#include "ucmpwrit.h"
33#include "makeconv.h"
34#include "genmbcs.h"
35
36#define DEBUG 0
37
38/*
39 * from ucnvstat.c - static prototypes of data-based converters
40 */
41extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
42
43/*
44 * Global - verbosity
45 */
46UBool VERBOSE = FALSE;
47UBool TOUCHFILE = FALSE;
48
49/*Reads the header of the table file and fills in basic knowledge about the converter
50 *in "converter"
51 */
52static void readHeaderFromFile(UConverterSharedData* myConverter, FileStream* convFile, const char* converterName, UErrorCode* err);
53
54/*Reads the rest of the file, and fills up the shared objects if necessary
55Returns the UConverterTable. */
56static void loadTableFromFile(FileStream* convFile, UConverterSharedData* sharedData, UErrorCode* err);
57
58/* creates a UConverterSharedData from a mapping file.
59 * Fills in: *staticData, *table. Converter is NOT otherwise useful.
60 */
61static UConverterSharedData* createConverterFromTableFile(const char* realName, UErrorCode* err);
62
63/*
64 * Set up the UNewData and write the converter..
65 */
66void writeConverterData(UConverterSharedData *mySharedData, const char *cnvName, const char *cnvDir, UErrorCode *status);
67
68static const char NLTC_SEPARATORS[9] = { '\r', '\n', '\t', ' ', '<', '>' ,'"' , 'U', '\0' };
69static const char FALLBACK_SEPARATOR = '|';
70static const char CODEPOINT_SEPARATORS[8] = { '\r', '>', '\\', 'x', '\n', ' ', '\t', '\0' };
71static const char UNICODE_CODEPOINT_SEPARATORS[6] = { '<', '>', 'U', ' ', '\t', '\0' };
72
73static const char *
74skipWhitespace(const char *s) {
75 while(*s==' ' || *s=='\t') {
76 ++s;
77 }
78 return s;
79}
80
81static int32_t
82parseCodepageBytes(const char *s, uint32_t *pBytes, const char **pEnd) {
83 char *end;
84 int32_t length=0;
85 uint32_t bytes=0, value;
86
87 while(s[0]=='\\' && s[1]=='x') {
88 if(length==4) {
89 return -1;
90 }
91 value=uprv_strtoul(s+2, &end, 16);
92 s+=4;
93 if(end!=s) {
94 return -1;
95 }
96 bytes=(bytes<<8)|value;
97 ++length;
98 }
99 if(length==0) {
100 return -1;
101 }
102 if(pEnd!=NULL) {
103 *pEnd=s;
104 }
105 *pBytes=bytes;
106 return length;
107}
108
109/* Remove all characters followed by '#'. There is an exception if there
110 * is a fallback sign '|' after the comment and the comment does not
111 * start in column 0. In this case, we just blank from '#' to just
112 * before the '|' in order to support the fact that IBM official .ucm
113 * files have the fallback information in comments!
114 */
115static char *
116 removeComments (char *line)
117{
118 char *pound;
119
120 line = (char*)skipWhitespace(line);
121 pound = uprv_strchr (line, '#');
122 if (pound != NULL)
123 {
124 char *fallback = pound == line ? 0 : uprv_strchr(pound + 1, '|');
125 if (fallback != NULL)
126 {
127 uprv_memset(pound, ' ', fallback-pound);
128 }
129 else
130 {
131 *pound = '\0';
132 }
133 }
134 return line;
135}
136
137/* Returns true in c is a in set 'setOfChars', false otherwise
138 */
139static UBool
140 isInSet (char c, const char *setOfChars)
141{
142 uint8_t i = 0;
143
144 while (setOfChars[i] != '\0')
145 {
146 if (c == setOfChars[i++])
147 return TRUE;
148 }
149
150 return FALSE;
151}
152
153/* Returns pointer to the next non-whitespace (or non-separator)
154 */
155static int32_t
156 nextTokenOffset (const char *line, const char *separators)
157{
158 int32_t i = 0;
159
160 while (line[i] && isInSet(line[i], separators))
161 i++;
162
163 return i;
164}
165
166/* Returns pointer to the next token based on the set of separators
167 */
168static char *
169 getToken (char *token, char *line, const char *separators)
170{
171 int32_t i = nextTokenOffset (line, separators);
172 int8_t j = 0;
173
174 while (line[i] && (!isInSet(line[i], separators)))
175 token[j++] = line[i++];
176 token[j] = '\0';
177
178 return line + i;
179}
180
181UBool haveCopyright=TRUE;
182
183static UDataInfo dataInfo={
184 sizeof(UDataInfo),
185 0,
186
187 U_IS_BIG_ENDIAN,
188 U_CHARSET_FAMILY,
189 sizeof(UChar),
190 0,
191
192 {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */
193 {6, 2, 0, 0}, /* formatVersion */
194 {0, 0, 0, 0} /* dataVersion (calculated at runtime) */
195};
196
197void writeConverterData(UConverterSharedData *mySharedData,
198 const char *cnvName,
199 const char *cnvDir,
200 UErrorCode *status)
201{
202 UNewDataMemory *mem = NULL;
203 uint32_t sz2;
204 uint32_t size = 0;
205
206 if(U_FAILURE(*status))
207 {
208 return;
209 }
210
211 mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
212
213 if(U_FAILURE(*status))
214 {
215 fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
216 cnvName,
217 "cnv",
218 u_errorName(*status));
219 return;
220 }
221
222 if(VERBOSE)
223 {
224 fprintf(stderr, "- Opened udata %s.%s\n", cnvName, "cnv");
225 }
226
227 /* all read only, clean, platform independent data. Mmmm. :) */
228 udata_writeBlock(mem, mySharedData->staticData, sizeof(UConverterStaticData));
229 size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */
230 /* Now, write the table */
231 size += ((NewConverter *)mySharedData->table)->write((NewConverter *)mySharedData->table, mySharedData->staticData, mem);
232
233 sz2 = udata_finish(mem, status);
234 if(size != sz2)
235 {
236 fprintf(stderr, "error: wrote %d bytes to the .cnv file but counted %d bytes\n", sz2, size);
237 *status=U_INTERNAL_PROGRAM_ERROR;
238 }
239 if(VERBOSE)
240 {
241 fprintf(stderr, "- Wrote %d bytes to the udata.\n", sz2);
242 }
243}
244
245static UOption options[]={
246 UOPTION_HELP_H, /* 0 Numbers for those who*/
247 UOPTION_HELP_QUESTION_MARK, /* 1 can't count. */
248 UOPTION_COPYRIGHT, /* 2 */
249 UOPTION_VERSION, /* 3 */
250 UOPTION_DESTDIR, /* 4 */
251 UOPTION_VERBOSE, /* 5 */
252 UOPTION_PACKAGE_NAME, /* 6 */
253 UOPTION_DEF( "touchfile", 't', UOPT_NO_ARG) /* 7 */
254};
255
256int main(int argc, char* argv[])
257{
258 UConverterSharedData* mySharedData = NULL;
259 UErrorCode err = U_ZERO_ERROR;
260 char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
261 char touchFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
262 const char* destdir, *arg;
263 const char *pkgName = NULL;
264 size_t destdirlen;
265 char* dot = NULL, *outBasename;
266 char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
267 char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH];
268 UVersionInfo icuVersion;
269
270 U_MAIN_INIT_ARGS(argc, argv);
271
272 /* Set up the ICU version number */
273 u_getVersion(icuVersion);
274 uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
275
276 /* preset then read command line options */
277 options[4].value=u_getDataDirectory();
278 argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
279
280 /* error handling, printing usage message */
281 if(argc<0) {
282 fprintf(stderr,
283 "error in command line argument \"%s\"\n",
284 argv[-argc]);
285 } else if(argc<2) {
286 argc=-1;
287 }
288 if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
289 fprintf(stderr,
290 "usage: %s [-options] files...\n"
291 "\tread .ucm codepage mapping files and write .cnv files\n"
292 "options:\n"
293 "\t-h or -? or --help this usage text\n"
294 "\t-V or --version show a version message\n"
295 "\t-c or --copyright include a copyright notice\n"
296 "\t-d or --destdir destination directory, followed by the path\n"
297 "\t-v or --verbose Turn on verbose output\n",
298 argv[0]);
299 fprintf(stderr,
300 "\t-p or --pkgname sets the 'package' name for output files.\n"
301 "\t If name is ICUDATA, then the default icu package\n"
302 "\t name will be used.\n"
303 "\t-t or --touchfile Generate additional small file without packagename, for nmake\n");
304 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
305 }
306
307 if(options[3].doesOccur) {
308 fprintf(stderr,"makeconv version %hu.%hu, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
309 dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
310 fprintf(stderr, "Copyright (C) 1998-2000, International Business Machines\n");
311 fprintf(stderr,"Corporation and others. All Rights Reserved.\n");
312 exit(0);
313 }
314
315 TOUCHFILE = options[7].doesOccur;
316
317 if(!options[6].doesOccur)
318 {
319 fprintf(stderr, "%s : option -p (package name) is required.\n",
320 argv[0]);
321 exit(1);
322 }
323 else
324 {
325 pkgName =options[6].value;
326 if(!strcmp(pkgName, "ICUDATA"))
327 {
328 pkgName = U_ICUDATA_NAME;
329 }
330 if(pkgName[0] == 0)
331 {
332 pkgName = NULL;
333
334 if(TOUCHFILE)
335 {
336 fprintf(stderr, "%s: Don't use touchfile option with an empty packagename.\n",
337 argv[0]);
338 exit(1);
339 }
340 }
341 }
342
343 /* get the options values */
344 haveCopyright = options[2].doesOccur;
345 destdir = options[4].value;
346 VERBOSE = options[5].doesOccur;
347
348 if (destdir != NULL && *destdir != 0) {
349 uprv_strcpy(outFileName, destdir);
350 destdirlen = uprv_strlen(destdir);
351 outBasename = outFileName + destdirlen;
352 if (*(outBasename - 1) != U_FILE_SEP_CHAR) {
353 *outBasename++ = U_FILE_SEP_CHAR;
354 ++destdirlen;
355 }
356 } else {
357 destdirlen = 0;
358 outBasename = outFileName;
359 }
360
361#if DEBUG
362 {
363 int i;
364 printf("makeconv: processing %d files...\n", argc - 1);
365 for(i=1; i<argc; ++i) {
366 printf("%s ", argv[i]);
367 }
368 printf("\n");
369 fflush(stdout);
370 }
371#endif
372
373 for (++argv; --argc; ++argv)
374 {
375 err = U_ZERO_ERROR;
376 arg = getLongPathname(*argv);
377
378 /*produces the right destination path for display*/
379 if (destdirlen != 0)
380 {
381 const char *basename;
382
383 /* find the last file sepator */
384 basename = uprv_strrchr(arg, U_FILE_SEP_CHAR);
385 if (basename == NULL) {
386 basename = arg;
387 } else {
388 ++basename;
389 }
390
391 uprv_strcpy(outBasename, basename);
392 }
393 else
394 {
395 uprv_strcpy(outFileName, arg);
396 }
397
398 /*removes the extension if any is found*/
399 dot = uprv_strrchr(outBasename, '.');
400 if (dot)
401 {
402 *dot = '\0';
403 }
404
405 /* the basename without extension is the converter name */
406 uprv_strcpy(cnvName, outBasename);
407
408 if(TOUCHFILE)
409 {
410 uprv_strcpy(touchFileName, outBasename);
411 uprv_strcat(touchFileName, ".cnv");
412 }
413
414 if(pkgName != NULL)
415 {
416 /* changes both baename and filename */
417 uprv_strcpy(outBasename, pkgName);
418 uprv_strcat(outBasename, "_");
419 uprv_strcat(outBasename, cnvName);
420 }
421
422
423 /*Adds the target extension*/
424 uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION);
425
426#if DEBUG
427 printf("makeconv: processing %s ...\n", arg);
428 fflush(stdout);
429#endif
430 mySharedData = createConverterFromTableFile(arg, &err);
431
432 if (U_FAILURE(err) || (mySharedData == NULL))
433 {
434 /* if an error is found, print out an error msg and keep going */
435 fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (error code %d - %s)\n", outFileName, arg, err,
436 u_errorName(err));
437 err = U_ZERO_ERROR;
438 }
439 else
440 {
441 /* Make the static data name equal to the file name */
442 if( /*VERBOSE && */ uprv_stricmp(cnvName,mySharedData->staticData->name))
443 {
444 fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
445 cnvName,
446 CONVERTER_FILE_EXTENSION,
447 mySharedData->staticData->name);
448 }
449
450 uprv_strcpy((char*)mySharedData->staticData->name, cnvName);
451
452 if(pkgName == NULL)
453 {
454 uprv_strcpy(cnvNameWithPkg, cnvName);
455 }
456 else
457 {
458 uprv_strcpy(cnvNameWithPkg, pkgName);
459 uprv_strcat(cnvNameWithPkg, "_");
460 uprv_strcat(cnvNameWithPkg, cnvName);
461 }
462
463 writeConverterData(mySharedData, cnvNameWithPkg, destdir, &err);
464 ((NewConverter *)mySharedData->table)->close((NewConverter *)mySharedData->table);
465 if(TOUCHFILE)
466 {
467 FileStream *q;
468 char msg[1024];
469
470 sprintf(msg, "This empty file tells nmake that %s in package %s has been updated.\n",
471 cnvName, pkgName);
472
473 q = T_FileStream_open(touchFileName, "w");
474 if(q == NULL)
475 {
476 fprintf(stderr, "Error writing touchfile \"%s\"\n", touchFileName);
477 err = U_FILE_ACCESS_ERROR;
478 }
479
480 else
481 {
482 T_FileStream_write(q, msg, uprv_strlen(msg));
483 T_FileStream_close(q);
484 }
485 }
486
487 /* write the information data */
488 uprv_free((UConverterStaticData *)mySharedData->staticData);
489 uprv_free(mySharedData);
490
491 if(U_FAILURE(err))
492 {
493 /* if an error is found, print out an error msg and keep going*/
494 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (error code %d - %s)\n", outFileName, arg, err,
495 u_errorName(err));
496 }
497 else
498 {
499 puts(outFileName);
500 }
501 }
502 fflush(stdout);
503 fflush(stderr);
504 }
505
506 return err;
507}
508
509static void
510getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
511 if( (name[0]=='i' || name[0]=='I') &&
512 (name[1]=='b' || name[1]=='B') &&
513 (name[2]=='m' || name[2]=='M')
514 ) {
515 name+=3;
516 if(*name=='-') {
517 ++name;
518 }
519 *pPlatform=UCNV_IBM;
520 *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
521 } else {
522 *pPlatform=UCNV_UNKNOWN;
523 *pCCSID=0;
524 }
525}
526
527/*Reads the header of the table file and fills in basic knowledge about the converter in "converter"*/
528void readHeaderFromFile(UConverterSharedData* mySharedData,
529 FileStream* convFile,
530 const char* converterName,
531 UErrorCode *pErrorCode)
532{
533 char line[200];
534 char *s, *end, *key, *value;
535 UConverterStaticData *staticData;
536 char c;
537
538 if(U_FAILURE(*pErrorCode)) {
539 return;
540 }
541
542 staticData=(UConverterStaticData *)mySharedData->staticData;
543 staticData->conversionType=UCNV_UNSUPPORTED_CONVERTER;
544 staticData->platform=UCNV_IBM;
545 staticData->subCharLen=0;
546
547 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
548 /* remove comments and trailing CR and LF and remove whitespace from the end */
549 for(end=line; (c=*end)!=0; ++end) {
550 if(c=='#' || c=='\r' || c=='\n') {
551 break;
552 }
553 }
554 while(end>line && (*(end-1)==' ' || *(end-1)=='\t')) {
555 --end;
556 }
557 *end=0;
558
559 /* skip leading white space and ignore empty lines */
560 s=(char *)skipWhitespace(line);
561 if(*s==0) {
562 continue;
563 }
564
565 /* stop at the beginning of the mapping section */
566 if(uprv_memcmp(s, "CHARMAP", 7)==0) {
567 break;
568 }
569
570 /* get the key name, bracketed in <> */
571 if(*s!='<') {
572 fprintf(stderr, "error: no header field <key> in line \"%s\"\n", line);
573 *pErrorCode=U_INVALID_TABLE_FORMAT;
574 return;
575 }
576 key=++s;
577 while(*s!='>') {
578 if(*s==0) {
579 fprintf(stderr, "error: incomplete header field <key> in line \"%s\"\n", line);
580 *pErrorCode=U_INVALID_TABLE_FORMAT;
581 return;
582 }
583 ++s;
584 }
585 *s=0;
586
587 /* get the value string, possibly quoted */
588 s=(char *)skipWhitespace(s+1);
589 if(*s!='"') {
590 value=s;
591 } else {
592 /* remove the quotes */
593 value=s+1;
594 if(end>value && *(end-1)=='"') {
595 *--end=0;
596 }
597 }
598
599 /* collect the information from the header field, ignore unknown keys */
600 if(uprv_strcmp(key, "code_set_name")==0) {
601 if(*value!=0) {
602 uprv_strcpy((char*)staticData->name, value);
603 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
604 }
605 } else if(uprv_strcmp(key, "uconv_class")==0) {
606 const UConverterStaticData *prototype;
607
608 if(uprv_strcmp(value, "DBCS")==0) {
609 staticData->conversionType=UCNV_DBCS;
610 } else if(uprv_strcmp(value, "SBCS")==0) {
611 staticData->conversionType = UCNV_SBCS;
612 } else if(uprv_strcmp(value, "MBCS")==0) {
613 staticData->conversionType = UCNV_MBCS;
614 } else if(uprv_strcmp(value, "EBCDIC_STATEFUL")==0) {
615 staticData->conversionType = UCNV_EBCDIC_STATEFUL;
616 } else {
617 fprintf(stderr, "error: unknown <uconv_class> %s\n", value);
618 *pErrorCode=U_INVALID_TABLE_FORMAT;
619 return;
620 }
621
622 /* Now that we know the type, copy any 'default' values from the table. */
623 prototype=ucnv_converterStaticData[staticData->conversionType];
624 if(prototype!=NULL) {
625 if(staticData->name[0]==0) {
626 uprv_strcpy((char*)staticData->name, prototype->name);
627 }
628
629 if(staticData->codepage==0) {
630 staticData->codepage = prototype->codepage;
631 }
632
633 if(staticData->platform==0) {
634 staticData->platform = prototype->platform;
635 }
636
637 if(staticData->minBytesPerChar==0) {
638 staticData->minBytesPerChar = prototype->minBytesPerChar;
639 }
640
641 if(staticData->maxBytesPerChar==0) {
642 staticData->maxBytesPerChar = prototype->maxBytesPerChar;
643 }
644
645 if(staticData->subCharLen==0) {
646 staticData->subCharLen=prototype->subCharLen;
647 if(prototype->subCharLen>0) {
648 uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
649 }
650 }
651 }
652 } else if(uprv_strcmp(key, "mb_cur_max")==0) {
653 if('1'<=*value && *value<='4' && value[1]==0) {
654 staticData->maxBytesPerChar=(int8_t)(*value-'0');
655 } else {
656 fprintf(stderr, "error: illegal <mb_cur_max> %s\n", value);
657 *pErrorCode=U_INVALID_TABLE_FORMAT;
658 return;
659 }
660 } else if(uprv_strcmp(key, "mb_cur_min")==0) {
661 if('1'<=*value && *value<='4' && value[1]==0) {
662 staticData->minBytesPerChar=(int8_t)(*value-'0');
663 } else {
664 fprintf(stderr, "error: illegal <mb_cur_min> %s\n", value);
665 *pErrorCode=U_INVALID_TABLE_FORMAT;
666 return;
667 }
668 } else if(uprv_strcmp(key, "subchar")==0) {
669 uint32_t bytes;
670 int32_t length;
671
672 length=parseCodepageBytes(value, &bytes, (const char **)&end);
673 if(length>0 && *end==0) {
674 staticData->subCharLen=(int8_t)length;
675 do {
676 staticData->subChar[--length]=(uint8_t)bytes;
677 bytes>>=8;
678 } while(length>0);
679 } else {
680 fprintf(stderr, "error: illegal <subchar> %s\n", value);
681 *pErrorCode=U_INVALID_TABLE_FORMAT;
682 return;
683 }
684 } else if(uprv_strcmp(key, "subchar1")==0) {
685 uint32_t bytes;
686
687 if(1==parseCodepageBytes(value, &bytes, (const char **)&end) && *end==0) {
688 staticData->subChar1=(uint8_t)bytes;
689 } else {
690 fprintf(stderr, "error: illegal <subchar1> %s\n", value);
691 *pErrorCode=U_INVALID_TABLE_FORMAT;
692 return;
693 }
694 } else if(uprv_strcmp(key, "icu:state")==0) {
695 /* if an SBCS/DBCS/EBCDIC_STATEFUL converter has icu:state, then turn it into MBCS */
696 switch(staticData->conversionType) {
697 case UCNV_SBCS:
698 case UCNV_DBCS:
699 case UCNV_EBCDIC_STATEFUL:
700 staticData->conversionType = UCNV_MBCS;
701 break;
702 case UCNV_MBCS:
703 break;
704 default:
705 fprintf(stderr, "error: <icu:state> entry for non-MBCS table or before the <uconv_class> line\n");
706 *pErrorCode=U_INVALID_TABLE_FORMAT;
707 return;
708 }
709
710 if(staticData->maxBytesPerChar==0) {
711 fprintf(stderr, "error: <icu:state> before the <mb_cur_max> line\n");
712 *pErrorCode=U_INVALID_TABLE_FORMAT;
713 return;
714 }
715 if(mySharedData->table==NULL) {
716 mySharedData->table=(UConverterTable *)MBCSOpen(staticData->maxBytesPerChar);
717 if(mySharedData->table==NULL) {
718 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
719 return;
720 }
721 }
722 if(!MBCSAddState((NewConverter *)mySharedData->table, value)) {
723 *pErrorCode=U_INVALID_TABLE_FORMAT;
724 return;
725 }
726 }
727 }
728
729 if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
730 *pErrorCode=U_INVALID_TABLE_FORMAT;
731 } else if(staticData->conversionType==UCNV_MBCS && mySharedData->table==NULL) {
732 fprintf(stderr, "error: missing state table information (<icu:state>) for MBCS\n");
733 *pErrorCode=U_INVALID_TABLE_FORMAT;
734 } else if(staticData->subChar1!=0 &&
735 !staticData->conversionType==UCNV_MBCS &&
736 !staticData->conversionType==UCNV_EBCDIC_STATEFUL
737 ) {
738 fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
739 *pErrorCode=U_INVALID_TABLE_FORMAT;
740 }
741}
742
743void loadTableFromFile(FileStream* convFile, UConverterSharedData* sharedData, UErrorCode* err)
744{
745 char storageLine[200];
746 char* line = NULL;
747 UConverterStaticData *staticData=(UConverterStaticData *)sharedData->staticData;
748 NewConverter *cnvData = (NewConverter *)sharedData->table;
749 UChar32 unicodeValue, codepageValue;
750 uint8_t mbcsBytes[8];
751 int32_t mbcsLength;
752 char codepointBytes[20];
753 UBool isOK = TRUE;
754 uint8_t precisionMask = 0, unicodeMask = 0;
755 char endOfLine;
756
757 if(cnvData->startMappings!=NULL)
758 {
759 if(!cnvData->startMappings(cnvData)) {
760 *err = U_INVALID_TABLE_FORMAT;
761 return;
762 }
763 }
764
765 if(cnvData->isValid!=NULL)
766 {
767 const uint8_t *p = staticData->subChar;
768 codepageValue = 0;
769 switch(staticData->subCharLen) {
770 case 4: codepageValue = (codepageValue << 8) | *p++;
771 case 3: codepageValue = (codepageValue << 8) | *p++;
772 case 2: codepageValue = (codepageValue << 8) | *p++;
773 case 1: codepageValue = (codepageValue << 8) | *p;
774 default: break; /* must never occur */
775 }
776 if(!cnvData->isValid(cnvData, staticData->subChar, staticData->subCharLen, codepageValue)) {
777 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
778 *err = U_INVALID_TABLE_FORMAT;
779 isOK = FALSE;
780 }
781 }
782
783 staticData->hasFromUnicodeFallback = staticData->hasToUnicodeFallback = FALSE;
784
785 while (T_FileStream_readLine(convFile, storageLine, sizeof(storageLine)))
786 {
787 removeComments(storageLine);
788 line = storageLine;
789 if (line[nextTokenOffset(line, NLTC_SEPARATORS)] != '\0')
790 {
791 /* get the Unicode code point */
792 line = getToken(codepointBytes, line, UNICODE_CODEPOINT_SEPARATORS);
793 if (uprv_strcmp(codepointBytes, "END") == 0)
794 {
795 break;
796 }
797 unicodeValue = (UChar32)T_CString_stringToInteger(codepointBytes, 16);
798
799 /* get the codepage bytes */
800 codepageValue = 0;
801 mbcsLength = 0;
802 do
803 {
804 line = getToken(codepointBytes, line, CODEPOINT_SEPARATORS);
805 mbcsBytes[mbcsLength] = (uint8_t)T_CString_stringToInteger(codepointBytes, 16);
806 codepageValue = codepageValue << 8 | mbcsBytes[mbcsLength++];
807
808 /* End of line could be \0 or | (if fallback) */
809 endOfLine= line[nextTokenOffset(line, CODEPOINT_SEPARATORS)];
810 } while((endOfLine != '\0') && (endOfLine != FALLBACK_SEPARATOR));
811
812 if(unicodeValue>=0x10000) {
813 unicodeMask|=UCNV_HAS_SUPPLEMENTARY; /* there are supplementary code points */
814 } else if(UTF_IS_SURROGATE(unicodeValue)) {
815 unicodeMask|=UCNV_HAS_SURROGATES; /* there are single surrogates */
816 }
817
818 if((uint32_t)unicodeValue > 0x10ffff)
819 {
820 fprintf(stderr, "error: Unicode code point > U+10ffff in '%s'\n", storageLine);
821 isOK = FALSE;
822 }
823 else if(endOfLine == FALLBACK_SEPARATOR)
824 {
825 /* we know that there is a fallback separator */
826 precisionMask |= 1;
827 line = uprv_strchr(line, FALLBACK_SEPARATOR) + 1;
828 switch(*line)
829 {
830 case '0':
831 /* set roundtrip mappings */
832 isOK &= cnvData->addToUnicode(cnvData, mbcsBytes, mbcsLength, unicodeValue, codepageValue, 0) &&
833 cnvData->addFromUnicode(cnvData, mbcsBytes, mbcsLength, unicodeValue, codepageValue, 0);
834 break;
835 case '1':
836 /* set only a fallback mapping from Unicode to codepage */
837 staticData->hasFromUnicodeFallback = TRUE;
838 isOK &= cnvData->addFromUnicode(cnvData, mbcsBytes, mbcsLength, unicodeValue, codepageValue, 1);
839 break;
840 case '2':
841 /* skip subchar mappings */
842 break;
843 case '3':
844 /* set only a fallback mapping from codepage to Unicode */
845 staticData->hasToUnicodeFallback = TRUE;
846 isOK &= cnvData->addToUnicode(cnvData, mbcsBytes, mbcsLength, unicodeValue, codepageValue, 1);
847 break;
848 default:
849 fprintf(stderr, "error: illegal fallback indicator '%s' in '%s'\n", line - 1, storageLine);
850 *err = U_INVALID_TABLE_FORMAT;
851 break;
852 }
853 }
854 else
855 {
856 precisionMask |= 2;
857 /* set the mappings */
858 isOK &= cnvData->addToUnicode(cnvData, mbcsBytes, mbcsLength, unicodeValue, codepageValue, -1) &&
859 cnvData->addFromUnicode(cnvData, mbcsBytes, mbcsLength, unicodeValue, codepageValue, -1);
860 }
861 }
862 }
863
864 if(unicodeMask == 3)
865 {
866 fprintf(stderr, "warning: contains mappings to both supplementary code points and single surrogates\n");
867 }
868 staticData->unicodeMask = unicodeMask;
869
870 if(cnvData->finishMappings!=NULL)
871 {
872 cnvData->finishMappings(cnvData, staticData);
873 }
874
875 if(!isOK)
876 {
877 *err = U_INVALID_TABLE_FORMAT;
878 }
879 else if(precisionMask == 3)
880 {
881 fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
882 *err = U_INVALID_TABLE_FORMAT;
883 }
884}
885
886/*creates a UConverterStaticData, fills in necessary links to it the appropriate function pointers*/
887UConverterSharedData* createConverterFromTableFile(const char* converterName, UErrorCode* err)
888{
889 FileStream* convFile = NULL;
890 UConverterSharedData* mySharedData = NULL;
891 UConverterStaticData* myStaticData = NULL;
892
893 if (U_FAILURE(*err)) return NULL;
894
895 convFile = T_FileStream_open(converterName, "r");
896 if (convFile == NULL)
897 {
898 *err = U_FILE_ACCESS_ERROR;
899 return NULL;
900 }
901
902
903 mySharedData = (UConverterSharedData*) uprv_malloc(sizeof(UConverterSharedData));
904 if (mySharedData == NULL)
905 {
906 *err = U_MEMORY_ALLOCATION_ERROR;
907 T_FileStream_close(convFile);
908 return NULL;
909 }
910
911 uprv_memset(mySharedData, 0, sizeof(UConverterSharedData));
912
913 mySharedData->structSize = sizeof(UConverterSharedData);
914
915 myStaticData = (UConverterStaticData*) uprv_malloc(sizeof(UConverterStaticData));
916 if (myStaticData == NULL)
917 {
918 *err = U_MEMORY_ALLOCATION_ERROR;
919 T_FileStream_close(convFile);
920 return NULL;
921 }
922 uprv_memset(myStaticData, 0, sizeof(UConverterStaticData));
923 mySharedData->staticData = myStaticData;
924 myStaticData->structSize = sizeof(UConverterStaticData);
925 /* mySharedData->staticDataOwned = FALSE; */ /* not owned if in udata */
926 mySharedData->sharedDataCached = FALSE;
927
928 mySharedData->dataMemory = NULL; /* for init */
929
930 readHeaderFromFile(mySharedData, convFile, converterName, err);
931
932 if (U_FAILURE(*err)) return NULL;
933
934 switch (myStaticData->conversionType)
935 {
936 case UCNV_SBCS:
937 {
938 /* SBCS: use MBCS data structure with a default state table */
939 if(mySharedData->staticData->maxBytesPerChar!=1) {
940 fprintf(stderr, "error: SBCS codepage with max bytes/char!=1\n");
941 *err = U_INVALID_TABLE_FORMAT;
942 break;
943 }
944 myStaticData->conversionType = UCNV_MBCS;
945 if(mySharedData->table == NULL) {
946 NewConverter *sharedDataTable = MBCSOpen(1);
947 if(sharedDataTable != NULL) {
948 if(!MBCSAddState(sharedDataTable, "0-ff")) {
949 *err = U_INVALID_TABLE_FORMAT;
950 sharedDataTable->close(sharedDataTable);
951 } else {
952 mySharedData->table = (UConverterTable *)sharedDataTable;
953 }
954 } else {
955 *err = U_MEMORY_ALLOCATION_ERROR;
956 }
957 }
958 break;
959 }
960 case UCNV_MBCS:
961 {
962 /* MBCSOpen() was called by readHeaderFromFile() */
963 break;
964 }
965 case UCNV_EBCDIC_STATEFUL:
966 {
967 /* EBCDIC_STATEFUL: use MBCS data structure with a default state table */
968 if(mySharedData->staticData->maxBytesPerChar!=2) {
969 fprintf(stderr, "error: DBCS codepage with max bytes/char!=2\n");
970 *err = U_INVALID_TABLE_FORMAT;
971 break;
972 }
973 myStaticData->conversionType = UCNV_MBCS;
974 if(mySharedData->table == NULL) {
975 NewConverter *sharedDataTable = MBCSOpen(2);
976 if(sharedDataTable != NULL) {
977 if( !MBCSAddState(sharedDataTable, "0-ff, e:1.s, f:0.s") ||
978 !MBCSAddState(sharedDataTable, "initial, 0-3f:4, e:1.s, f:0.s, 40:3, 41-fe:2, ff:4") ||
979 !MBCSAddState(sharedDataTable, "0-40:1.i, 41-fe:1., ff:1.i") ||
980 !MBCSAddState(sharedDataTable, "0-ff:1.i, 40:1.") ||
981 !MBCSAddState(sharedDataTable, "0-ff:1.i")
982 ) {
983 *err = U_INVALID_TABLE_FORMAT;
984 sharedDataTable->close(sharedDataTable);
985 } else {
986 mySharedData->table = (UConverterTable *)sharedDataTable;
987 }
988 } else {
989 *err = U_MEMORY_ALLOCATION_ERROR;
990 }
991 }
992 break;
993 }
994 case UCNV_DBCS:
995 {
996 /* DBCS: use MBCS data structure with a default state table */
997 if(mySharedData->staticData->maxBytesPerChar!=2) {
998 fprintf(stderr, "error: DBCS codepage with max bytes/char!=2\n");
999 *err = U_INVALID_TABLE_FORMAT;
1000 break;
1001 }
1002 myStaticData->conversionType = UCNV_MBCS;
1003 if(mySharedData->table == NULL) {
1004 NewConverter *sharedDataTable = MBCSOpen(2);
1005 if(sharedDataTable != NULL) {
1006 if( !MBCSAddState(sharedDataTable, "0-3f:3, 40:2, 41-fe:1, ff:3") ||
1007 !MBCSAddState(sharedDataTable, "41-fe") ||
1008 !MBCSAddState(sharedDataTable, "40") ||
1009 !MBCSAddState(sharedDataTable, "")
1010 ) {
1011 *err = U_INVALID_TABLE_FORMAT;
1012 sharedDataTable->close(sharedDataTable);
1013 } else {
1014 mySharedData->table = (UConverterTable *)sharedDataTable;
1015 }
1016 } else {
1017 *err = U_MEMORY_ALLOCATION_ERROR;
1018 }
1019 }
1020 break;
1021 }
1022
1023 default :
1024 fprintf(stderr, "error: <uconv_class> omitted\n");
1025 *err = U_INVALID_TABLE_FORMAT;
1026 mySharedData->table = NULL;
1027 break;
1028 };
1029
1030 if(U_SUCCESS(*err) && mySharedData->table != NULL)
1031 {
1032 loadTableFromFile(convFile, mySharedData, err);
1033 }
1034
1035 T_FileStream_close(convFile);
1036
1037 return mySharedData;
1038}
1039
1040/*
1041 * Hey, Emacs, please set the following:
1042 *
1043 * Local Variables:
1044 * indent-tabs-mode: nil
1045 * End:
1046 *
1047 */