]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | ******************************************************************************** | |
3 | * | |
4 | * Copyright (C) 1998-2003, International Business Machines | |
5 | * Corporation and others. All Rights Reserved. | |
6 | * | |
7 | ******************************************************************************** | |
8 | * | |
9 | * | |
10 | * makeconv.c: | |
11 | * tool creating a binary (compressed) representation of the conversion mapping | |
12 | * table (IBM NLTC ucmap format). | |
13 | * | |
14 | * 05/04/2000 helena Added fallback mapping into the picture... | |
15 | * 06/29/2000 helena Major rewrite of the callback APIs. | |
16 | */ | |
17 | ||
18 | #include <stdio.h> | |
19 | #include "unicode/putil.h" | |
20 | #include "ucnv_io.h" | |
21 | #include "unicode/ucnv_err.h" | |
22 | #include "ucnv_bld.h" | |
23 | #include "ucnv_imp.h" | |
24 | #include "ucnv_cnv.h" | |
25 | #include "cstring.h" | |
26 | #include "cmemory.h" | |
27 | #include "filestrm.h" | |
28 | #include "toolutil.h" | |
29 | #include "uoptions.h" | |
30 | #include "unicode/udata.h" | |
31 | #include "unewdata.h" | |
32 | #include "ucmpwrit.h" | |
33 | #include "makeconv.h" | |
34 | #include "genmbcs.h" | |
35 | ||
36 | #define DEBUG 0 | |
37 | ||
38 | /* | |
39 | * from ucnvstat.c - static prototypes of data-based converters | |
40 | */ | |
41 | extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]; | |
42 | ||
43 | /* | |
44 | * Global - verbosity | |
45 | */ | |
46 | UBool VERBOSE = FALSE; | |
47 | UBool TOUCHFILE = FALSE; | |
48 | ||
49 | /*Reads the header of the table file and fills in basic knowledge about the converter | |
50 | *in "converter" | |
51 | */ | |
52 | static void readHeaderFromFile(UConverterSharedData* myConverter, FileStream* convFile, const char* converterName, UErrorCode* err); | |
53 | ||
54 | /*Reads the rest of the file, and fills up the shared objects if necessary | |
55 | Returns the UConverterTable. */ | |
56 | static void loadTableFromFile(FileStream* convFile, UConverterSharedData* sharedData, UErrorCode* err); | |
57 | ||
58 | /* creates a UConverterSharedData from a mapping file. | |
59 | * Fills in: *staticData, *table. Converter is NOT otherwise useful. | |
60 | */ | |
61 | static UConverterSharedData* createConverterFromTableFile(const char* realName, UErrorCode* err); | |
62 | ||
63 | /* | |
64 | * Set up the UNewData and write the converter.. | |
65 | */ | |
66 | void writeConverterData(UConverterSharedData *mySharedData, const char *cnvName, const char *cnvDir, UErrorCode *status); | |
67 | ||
68 | static const char NLTC_SEPARATORS[9] = { '\r', '\n', '\t', ' ', '<', '>' ,'"' , 'U', '\0' }; | |
69 | static const char FALLBACK_SEPARATOR = '|'; | |
70 | static const char CODEPOINT_SEPARATORS[8] = { '\r', '>', '\\', 'x', '\n', ' ', '\t', '\0' }; | |
71 | static const char UNICODE_CODEPOINT_SEPARATORS[6] = { '<', '>', 'U', ' ', '\t', '\0' }; | |
72 | ||
73 | static const char * | |
74 | skipWhitespace(const char *s) { | |
75 | while(*s==' ' || *s=='\t') { | |
76 | ++s; | |
77 | } | |
78 | return s; | |
79 | } | |
80 | ||
81 | static int32_t | |
82 | parseCodepageBytes(const char *s, uint32_t *pBytes, const char **pEnd) { | |
83 | char *end; | |
84 | int32_t length=0; | |
85 | uint32_t bytes=0, value; | |
86 | ||
87 | while(s[0]=='\\' && s[1]=='x') { | |
88 | if(length==4) { | |
89 | return -1; | |
90 | } | |
91 | value=uprv_strtoul(s+2, &end, 16); | |
92 | s+=4; | |
93 | if(end!=s) { | |
94 | return -1; | |
95 | } | |
96 | bytes=(bytes<<8)|value; | |
97 | ++length; | |
98 | } | |
99 | if(length==0) { | |
100 | return -1; | |
101 | } | |
102 | if(pEnd!=NULL) { | |
103 | *pEnd=s; | |
104 | } | |
105 | *pBytes=bytes; | |
106 | return length; | |
107 | } | |
108 | ||
109 | /* Remove all characters followed by '#'. There is an exception if there | |
110 | * is a fallback sign '|' after the comment and the comment does not | |
111 | * start in column 0. In this case, we just blank from '#' to just | |
112 | * before the '|' in order to support the fact that IBM official .ucm | |
113 | * files have the fallback information in comments! | |
114 | */ | |
115 | static char * | |
116 | removeComments (char *line) | |
117 | { | |
118 | char *pound; | |
119 | ||
120 | line = (char*)skipWhitespace(line); | |
121 | pound = uprv_strchr (line, '#'); | |
122 | if (pound != NULL) | |
123 | { | |
124 | char *fallback = pound == line ? 0 : uprv_strchr(pound + 1, '|'); | |
125 | if (fallback != NULL) | |
126 | { | |
127 | uprv_memset(pound, ' ', fallback-pound); | |
128 | } | |
129 | else | |
130 | { | |
131 | *pound = '\0'; | |
132 | } | |
133 | } | |
134 | return line; | |
135 | } | |
136 | ||
137 | /* Returns true in c is a in set 'setOfChars', false otherwise | |
138 | */ | |
139 | static UBool | |
140 | isInSet (char c, const char *setOfChars) | |
141 | { | |
142 | uint8_t i = 0; | |
143 | ||
144 | while (setOfChars[i] != '\0') | |
145 | { | |
146 | if (c == setOfChars[i++]) | |
147 | return TRUE; | |
148 | } | |
149 | ||
150 | return FALSE; | |
151 | } | |
152 | ||
153 | /* Returns pointer to the next non-whitespace (or non-separator) | |
154 | */ | |
155 | static int32_t | |
156 | nextTokenOffset (const char *line, const char *separators) | |
157 | { | |
158 | int32_t i = 0; | |
159 | ||
160 | while (line[i] && isInSet(line[i], separators)) | |
161 | i++; | |
162 | ||
163 | return i; | |
164 | } | |
165 | ||
166 | /* Returns pointer to the next token based on the set of separators | |
167 | */ | |
168 | static char * | |
169 | getToken (char *token, char *line, const char *separators) | |
170 | { | |
171 | int32_t i = nextTokenOffset (line, separators); | |
172 | int8_t j = 0; | |
173 | ||
174 | while (line[i] && (!isInSet(line[i], separators))) | |
175 | token[j++] = line[i++]; | |
176 | token[j] = '\0'; | |
177 | ||
178 | return line + i; | |
179 | } | |
180 | ||
181 | UBool haveCopyright=TRUE; | |
182 | ||
183 | static UDataInfo dataInfo={ | |
184 | sizeof(UDataInfo), | |
185 | 0, | |
186 | ||
187 | U_IS_BIG_ENDIAN, | |
188 | U_CHARSET_FAMILY, | |
189 | sizeof(UChar), | |
190 | 0, | |
191 | ||
192 | {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */ | |
193 | {6, 2, 0, 0}, /* formatVersion */ | |
194 | {0, 0, 0, 0} /* dataVersion (calculated at runtime) */ | |
195 | }; | |
196 | ||
197 | void writeConverterData(UConverterSharedData *mySharedData, | |
198 | const char *cnvName, | |
199 | const char *cnvDir, | |
200 | UErrorCode *status) | |
201 | { | |
202 | UNewDataMemory *mem = NULL; | |
203 | uint32_t sz2; | |
204 | uint32_t size = 0; | |
205 | ||
206 | if(U_FAILURE(*status)) | |
207 | { | |
208 | return; | |
209 | } | |
210 | ||
211 | mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status); | |
212 | ||
213 | if(U_FAILURE(*status)) | |
214 | { | |
215 | fprintf(stderr, "Couldn't create the udata %s.%s: %s\n", | |
216 | cnvName, | |
217 | "cnv", | |
218 | u_errorName(*status)); | |
219 | return; | |
220 | } | |
221 | ||
222 | if(VERBOSE) | |
223 | { | |
224 | fprintf(stderr, "- Opened udata %s.%s\n", cnvName, "cnv"); | |
225 | } | |
226 | ||
227 | /* all read only, clean, platform independent data. Mmmm. :) */ | |
228 | udata_writeBlock(mem, mySharedData->staticData, sizeof(UConverterStaticData)); | |
229 | size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */ | |
230 | /* Now, write the table */ | |
231 | size += ((NewConverter *)mySharedData->table)->write((NewConverter *)mySharedData->table, mySharedData->staticData, mem); | |
232 | ||
233 | sz2 = udata_finish(mem, status); | |
234 | if(size != sz2) | |
235 | { | |
236 | fprintf(stderr, "error: wrote %d bytes to the .cnv file but counted %d bytes\n", sz2, size); | |
237 | *status=U_INTERNAL_PROGRAM_ERROR; | |
238 | } | |
239 | if(VERBOSE) | |
240 | { | |
241 | fprintf(stderr, "- Wrote %d bytes to the udata.\n", sz2); | |
242 | } | |
243 | } | |
244 | ||
245 | static UOption options[]={ | |
246 | UOPTION_HELP_H, /* 0 Numbers for those who*/ | |
247 | UOPTION_HELP_QUESTION_MARK, /* 1 can't count. */ | |
248 | UOPTION_COPYRIGHT, /* 2 */ | |
249 | UOPTION_VERSION, /* 3 */ | |
250 | UOPTION_DESTDIR, /* 4 */ | |
251 | UOPTION_VERBOSE, /* 5 */ | |
252 | UOPTION_PACKAGE_NAME, /* 6 */ | |
253 | UOPTION_DEF( "touchfile", 't', UOPT_NO_ARG) /* 7 */ | |
254 | }; | |
255 | ||
256 | int main(int argc, char* argv[]) | |
257 | { | |
258 | UConverterSharedData* mySharedData = NULL; | |
259 | UErrorCode err = U_ZERO_ERROR; | |
260 | char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; | |
261 | char touchFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; | |
262 | const char* destdir, *arg; | |
263 | const char *pkgName = NULL; | |
264 | size_t destdirlen; | |
265 | char* dot = NULL, *outBasename; | |
266 | char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; | |
267 | char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH]; | |
268 | UVersionInfo icuVersion; | |
269 | ||
270 | U_MAIN_INIT_ARGS(argc, argv); | |
271 | ||
272 | /* Set up the ICU version number */ | |
273 | u_getVersion(icuVersion); | |
274 | uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo)); | |
275 | ||
276 | /* preset then read command line options */ | |
277 | options[4].value=u_getDataDirectory(); | |
278 | argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); | |
279 | ||
280 | /* error handling, printing usage message */ | |
281 | if(argc<0) { | |
282 | fprintf(stderr, | |
283 | "error in command line argument \"%s\"\n", | |
284 | argv[-argc]); | |
285 | } else if(argc<2) { | |
286 | argc=-1; | |
287 | } | |
288 | if(argc<0 || options[0].doesOccur || options[1].doesOccur) { | |
289 | fprintf(stderr, | |
290 | "usage: %s [-options] files...\n" | |
291 | "\tread .ucm codepage mapping files and write .cnv files\n" | |
292 | "options:\n" | |
293 | "\t-h or -? or --help this usage text\n" | |
294 | "\t-V or --version show a version message\n" | |
295 | "\t-c or --copyright include a copyright notice\n" | |
296 | "\t-d or --destdir destination directory, followed by the path\n" | |
297 | "\t-v or --verbose Turn on verbose output\n", | |
298 | argv[0]); | |
299 | fprintf(stderr, | |
300 | "\t-p or --pkgname sets the 'package' name for output files.\n" | |
301 | "\t If name is ICUDATA, then the default icu package\n" | |
302 | "\t name will be used.\n" | |
303 | "\t-t or --touchfile Generate additional small file without packagename, for nmake\n"); | |
304 | return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; | |
305 | } | |
306 | ||
307 | if(options[3].doesOccur) { | |
308 | fprintf(stderr,"makeconv version %hu.%hu, ICU tool to read .ucm codepage mapping files and write .cnv files\n", | |
309 | dataInfo.formatVersion[0], dataInfo.formatVersion[1]); | |
310 | fprintf(stderr, "Copyright (C) 1998-2000, International Business Machines\n"); | |
311 | fprintf(stderr,"Corporation and others. All Rights Reserved.\n"); | |
312 | exit(0); | |
313 | } | |
314 | ||
315 | TOUCHFILE = options[7].doesOccur; | |
316 | ||
317 | if(!options[6].doesOccur) | |
318 | { | |
319 | fprintf(stderr, "%s : option -p (package name) is required.\n", | |
320 | argv[0]); | |
321 | exit(1); | |
322 | } | |
323 | else | |
324 | { | |
325 | pkgName =options[6].value; | |
326 | if(!strcmp(pkgName, "ICUDATA")) | |
327 | { | |
328 | pkgName = U_ICUDATA_NAME; | |
329 | } | |
330 | if(pkgName[0] == 0) | |
331 | { | |
332 | pkgName = NULL; | |
333 | ||
334 | if(TOUCHFILE) | |
335 | { | |
336 | fprintf(stderr, "%s: Don't use touchfile option with an empty packagename.\n", | |
337 | argv[0]); | |
338 | exit(1); | |
339 | } | |
340 | } | |
341 | } | |
342 | ||
343 | /* get the options values */ | |
344 | haveCopyright = options[2].doesOccur; | |
345 | destdir = options[4].value; | |
346 | VERBOSE = options[5].doesOccur; | |
347 | ||
348 | if (destdir != NULL && *destdir != 0) { | |
349 | uprv_strcpy(outFileName, destdir); | |
350 | destdirlen = uprv_strlen(destdir); | |
351 | outBasename = outFileName + destdirlen; | |
352 | if (*(outBasename - 1) != U_FILE_SEP_CHAR) { | |
353 | *outBasename++ = U_FILE_SEP_CHAR; | |
354 | ++destdirlen; | |
355 | } | |
356 | } else { | |
357 | destdirlen = 0; | |
358 | outBasename = outFileName; | |
359 | } | |
360 | ||
361 | #if DEBUG | |
362 | { | |
363 | int i; | |
364 | printf("makeconv: processing %d files...\n", argc - 1); | |
365 | for(i=1; i<argc; ++i) { | |
366 | printf("%s ", argv[i]); | |
367 | } | |
368 | printf("\n"); | |
369 | fflush(stdout); | |
370 | } | |
371 | #endif | |
372 | ||
373 | for (++argv; --argc; ++argv) | |
374 | { | |
375 | err = U_ZERO_ERROR; | |
376 | arg = getLongPathname(*argv); | |
377 | ||
378 | /*produces the right destination path for display*/ | |
379 | if (destdirlen != 0) | |
380 | { | |
381 | const char *basename; | |
382 | ||
383 | /* find the last file sepator */ | |
384 | basename = uprv_strrchr(arg, U_FILE_SEP_CHAR); | |
385 | if (basename == NULL) { | |
386 | basename = arg; | |
387 | } else { | |
388 | ++basename; | |
389 | } | |
390 | ||
391 | uprv_strcpy(outBasename, basename); | |
392 | } | |
393 | else | |
394 | { | |
395 | uprv_strcpy(outFileName, arg); | |
396 | } | |
397 | ||
398 | /*removes the extension if any is found*/ | |
399 | dot = uprv_strrchr(outBasename, '.'); | |
400 | if (dot) | |
401 | { | |
402 | *dot = '\0'; | |
403 | } | |
404 | ||
405 | /* the basename without extension is the converter name */ | |
406 | uprv_strcpy(cnvName, outBasename); | |
407 | ||
408 | if(TOUCHFILE) | |
409 | { | |
410 | uprv_strcpy(touchFileName, outBasename); | |
411 | uprv_strcat(touchFileName, ".cnv"); | |
412 | } | |
413 | ||
414 | if(pkgName != NULL) | |
415 | { | |
416 | /* changes both baename and filename */ | |
417 | uprv_strcpy(outBasename, pkgName); | |
418 | uprv_strcat(outBasename, "_"); | |
419 | uprv_strcat(outBasename, cnvName); | |
420 | } | |
421 | ||
422 | ||
423 | /*Adds the target extension*/ | |
424 | uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION); | |
425 | ||
426 | #if DEBUG | |
427 | printf("makeconv: processing %s ...\n", arg); | |
428 | fflush(stdout); | |
429 | #endif | |
430 | mySharedData = createConverterFromTableFile(arg, &err); | |
431 | ||
432 | if (U_FAILURE(err) || (mySharedData == NULL)) | |
433 | { | |
434 | /* if an error is found, print out an error msg and keep going */ | |
435 | fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (error code %d - %s)\n", outFileName, arg, err, | |
436 | u_errorName(err)); | |
437 | err = U_ZERO_ERROR; | |
438 | } | |
439 | else | |
440 | { | |
441 | /* Make the static data name equal to the file name */ | |
442 | if( /*VERBOSE && */ uprv_stricmp(cnvName,mySharedData->staticData->name)) | |
443 | { | |
444 | fprintf(stderr, "Warning: %s%s claims to be '%s'\n", | |
445 | cnvName, | |
446 | CONVERTER_FILE_EXTENSION, | |
447 | mySharedData->staticData->name); | |
448 | } | |
449 | ||
450 | uprv_strcpy((char*)mySharedData->staticData->name, cnvName); | |
451 | ||
452 | if(pkgName == NULL) | |
453 | { | |
454 | uprv_strcpy(cnvNameWithPkg, cnvName); | |
455 | } | |
456 | else | |
457 | { | |
458 | uprv_strcpy(cnvNameWithPkg, pkgName); | |
459 | uprv_strcat(cnvNameWithPkg, "_"); | |
460 | uprv_strcat(cnvNameWithPkg, cnvName); | |
461 | } | |
462 | ||
463 | writeConverterData(mySharedData, cnvNameWithPkg, destdir, &err); | |
464 | ((NewConverter *)mySharedData->table)->close((NewConverter *)mySharedData->table); | |
465 | if(TOUCHFILE) | |
466 | { | |
467 | FileStream *q; | |
468 | char msg[1024]; | |
469 | ||
470 | sprintf(msg, "This empty file tells nmake that %s in package %s has been updated.\n", | |
471 | cnvName, pkgName); | |
472 | ||
473 | q = T_FileStream_open(touchFileName, "w"); | |
474 | if(q == NULL) | |
475 | { | |
476 | fprintf(stderr, "Error writing touchfile \"%s\"\n", touchFileName); | |
477 | err = U_FILE_ACCESS_ERROR; | |
478 | } | |
479 | ||
480 | else | |
481 | { | |
482 | T_FileStream_write(q, msg, uprv_strlen(msg)); | |
483 | T_FileStream_close(q); | |
484 | } | |
485 | } | |
486 | ||
487 | /* write the information data */ | |
488 | uprv_free((UConverterStaticData *)mySharedData->staticData); | |
489 | uprv_free(mySharedData); | |
490 | ||
491 | if(U_FAILURE(err)) | |
492 | { | |
493 | /* if an error is found, print out an error msg and keep going*/ | |
494 | fprintf(stderr, "Error writing \"%s\" file for \"%s\" (error code %d - %s)\n", outFileName, arg, err, | |
495 | u_errorName(err)); | |
496 | } | |
497 | else | |
498 | { | |
499 | puts(outFileName); | |
500 | } | |
501 | } | |
502 | fflush(stdout); | |
503 | fflush(stderr); | |
504 | } | |
505 | ||
506 | return err; | |
507 | } | |
508 | ||
509 | static void | |
510 | getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) { | |
511 | if( (name[0]=='i' || name[0]=='I') && | |
512 | (name[1]=='b' || name[1]=='B') && | |
513 | (name[2]=='m' || name[2]=='M') | |
514 | ) { | |
515 | name+=3; | |
516 | if(*name=='-') { | |
517 | ++name; | |
518 | } | |
519 | *pPlatform=UCNV_IBM; | |
520 | *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10); | |
521 | } else { | |
522 | *pPlatform=UCNV_UNKNOWN; | |
523 | *pCCSID=0; | |
524 | } | |
525 | } | |
526 | ||
527 | /*Reads the header of the table file and fills in basic knowledge about the converter in "converter"*/ | |
528 | void readHeaderFromFile(UConverterSharedData* mySharedData, | |
529 | FileStream* convFile, | |
530 | const char* converterName, | |
531 | UErrorCode *pErrorCode) | |
532 | { | |
533 | char line[200]; | |
534 | char *s, *end, *key, *value; | |
535 | UConverterStaticData *staticData; | |
536 | char c; | |
537 | ||
538 | if(U_FAILURE(*pErrorCode)) { | |
539 | return; | |
540 | } | |
541 | ||
542 | staticData=(UConverterStaticData *)mySharedData->staticData; | |
543 | staticData->conversionType=UCNV_UNSUPPORTED_CONVERTER; | |
544 | staticData->platform=UCNV_IBM; | |
545 | staticData->subCharLen=0; | |
546 | ||
547 | while(T_FileStream_readLine(convFile, line, sizeof(line))) { | |
548 | /* remove comments and trailing CR and LF and remove whitespace from the end */ | |
549 | for(end=line; (c=*end)!=0; ++end) { | |
550 | if(c=='#' || c=='\r' || c=='\n') { | |
551 | break; | |
552 | } | |
553 | } | |
554 | while(end>line && (*(end-1)==' ' || *(end-1)=='\t')) { | |
555 | --end; | |
556 | } | |
557 | *end=0; | |
558 | ||
559 | /* skip leading white space and ignore empty lines */ | |
560 | s=(char *)skipWhitespace(line); | |
561 | if(*s==0) { | |
562 | continue; | |
563 | } | |
564 | ||
565 | /* stop at the beginning of the mapping section */ | |
566 | if(uprv_memcmp(s, "CHARMAP", 7)==0) { | |
567 | break; | |
568 | } | |
569 | ||
570 | /* get the key name, bracketed in <> */ | |
571 | if(*s!='<') { | |
572 | fprintf(stderr, "error: no header field <key> in line \"%s\"\n", line); | |
573 | *pErrorCode=U_INVALID_TABLE_FORMAT; | |
574 | return; | |
575 | } | |
576 | key=++s; | |
577 | while(*s!='>') { | |
578 | if(*s==0) { | |
579 | fprintf(stderr, "error: incomplete header field <key> in line \"%s\"\n", line); | |
580 | *pErrorCode=U_INVALID_TABLE_FORMAT; | |
581 | return; | |
582 | } | |
583 | ++s; | |
584 | } | |
585 | *s=0; | |
586 | ||
587 | /* get the value string, possibly quoted */ | |
588 | s=(char *)skipWhitespace(s+1); | |
589 | if(*s!='"') { | |
590 | value=s; | |
591 | } else { | |
592 | /* remove the quotes */ | |
593 | value=s+1; | |
594 | if(end>value && *(end-1)=='"') { | |
595 | *--end=0; | |
596 | } | |
597 | } | |
598 | ||
599 | /* collect the information from the header field, ignore unknown keys */ | |
600 | if(uprv_strcmp(key, "code_set_name")==0) { | |
601 | if(*value!=0) { | |
602 | uprv_strcpy((char*)staticData->name, value); | |
603 | getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage); | |
604 | } | |
605 | } else if(uprv_strcmp(key, "uconv_class")==0) { | |
606 | const UConverterStaticData *prototype; | |
607 | ||
608 | if(uprv_strcmp(value, "DBCS")==0) { | |
609 | staticData->conversionType=UCNV_DBCS; | |
610 | } else if(uprv_strcmp(value, "SBCS")==0) { | |
611 | staticData->conversionType = UCNV_SBCS; | |
612 | } else if(uprv_strcmp(value, "MBCS")==0) { | |
613 | staticData->conversionType = UCNV_MBCS; | |
614 | } else if(uprv_strcmp(value, "EBCDIC_STATEFUL")==0) { | |
615 | staticData->conversionType = UCNV_EBCDIC_STATEFUL; | |
616 | } else { | |
617 | fprintf(stderr, "error: unknown <uconv_class> %s\n", value); | |
618 | *pErrorCode=U_INVALID_TABLE_FORMAT; | |
619 | return; | |
620 | } | |
621 | ||
622 | /* Now that we know the type, copy any 'default' values from the table. */ | |
623 | prototype=ucnv_converterStaticData[staticData->conversionType]; | |
624 | if(prototype!=NULL) { | |
625 | if(staticData->name[0]==0) { | |
626 | uprv_strcpy((char*)staticData->name, prototype->name); | |
627 | } | |
628 | ||
629 | if(staticData->codepage==0) { | |
630 | staticData->codepage = prototype->codepage; | |
631 | } | |
632 | ||
633 | if(staticData->platform==0) { | |
634 | staticData->platform = prototype->platform; | |
635 | } | |
636 | ||
637 | if(staticData->minBytesPerChar==0) { | |
638 | staticData->minBytesPerChar = prototype->minBytesPerChar; | |
639 | } | |
640 | ||
641 | if(staticData->maxBytesPerChar==0) { | |
642 | staticData->maxBytesPerChar = prototype->maxBytesPerChar; | |
643 | } | |
644 | ||
645 | if(staticData->subCharLen==0) { | |
646 | staticData->subCharLen=prototype->subCharLen; | |
647 | if(prototype->subCharLen>0) { | |
648 | uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen); | |
649 | } | |
650 | } | |
651 | } | |
652 | } else if(uprv_strcmp(key, "mb_cur_max")==0) { | |
653 | if('1'<=*value && *value<='4' && value[1]==0) { | |
654 | staticData->maxBytesPerChar=(int8_t)(*value-'0'); | |
655 | } else { | |
656 | fprintf(stderr, "error: illegal <mb_cur_max> %s\n", value); | |
657 | *pErrorCode=U_INVALID_TABLE_FORMAT; | |
658 | return; | |
659 | } | |
660 | } else if(uprv_strcmp(key, "mb_cur_min")==0) { | |
661 | if('1'<=*value && *value<='4' && value[1]==0) { | |
662 | staticData->minBytesPerChar=(int8_t)(*value-'0'); | |
663 | } else { | |
664 | fprintf(stderr, "error: illegal <mb_cur_min> %s\n", value); | |
665 | *pErrorCode=U_INVALID_TABLE_FORMAT; | |
666 | return; | |
667 | } | |
668 | } else if(uprv_strcmp(key, "subchar")==0) { | |
669 | uint32_t bytes; | |
670 | int32_t length; | |
671 | ||
672 | length=parseCodepageBytes(value, &bytes, (const char **)&end); | |
673 | if(length>0 && *end==0) { | |
674 | staticData->subCharLen=(int8_t)length; | |
675 | do { | |
676 | staticData->subChar[--length]=(uint8_t)bytes; | |
677 | bytes>>=8; | |
678 | } while(length>0); | |
679 | } else { | |
680 | fprintf(stderr, "error: illegal <subchar> %s\n", value); | |
681 | *pErrorCode=U_INVALID_TABLE_FORMAT; | |
682 | return; | |
683 | } | |
684 | } else if(uprv_strcmp(key, "subchar1")==0) { | |
685 | uint32_t bytes; | |
686 | ||
687 | if(1==parseCodepageBytes(value, &bytes, (const char **)&end) && *end==0) { | |
688 | staticData->subChar1=(uint8_t)bytes; | |
689 | } else { | |
690 | fprintf(stderr, "error: illegal <subchar1> %s\n", value); | |
691 | *pErrorCode=U_INVALID_TABLE_FORMAT; | |
692 | return; | |
693 | } | |
694 | } else if(uprv_strcmp(key, "icu:state")==0) { | |
695 | /* if an SBCS/DBCS/EBCDIC_STATEFUL converter has icu:state, then turn it into MBCS */ | |
696 | switch(staticData->conversionType) { | |
697 | case UCNV_SBCS: | |
698 | case UCNV_DBCS: | |
699 | case UCNV_EBCDIC_STATEFUL: | |
700 | staticData->conversionType = UCNV_MBCS; | |
701 | break; | |
702 | case UCNV_MBCS: | |
703 | break; | |
704 | default: | |
705 | fprintf(stderr, "error: <icu:state> entry for non-MBCS table or before the <uconv_class> line\n"); | |
706 | *pErrorCode=U_INVALID_TABLE_FORMAT; | |
707 | return; | |
708 | } | |
709 | ||
710 | if(staticData->maxBytesPerChar==0) { | |
711 | fprintf(stderr, "error: <icu:state> before the <mb_cur_max> line\n"); | |
712 | *pErrorCode=U_INVALID_TABLE_FORMAT; | |
713 | return; | |
714 | } | |
715 | if(mySharedData->table==NULL) { | |
716 | mySharedData->table=(UConverterTable *)MBCSOpen(staticData->maxBytesPerChar); | |
717 | if(mySharedData->table==NULL) { | |
718 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
719 | return; | |
720 | } | |
721 | } | |
722 | if(!MBCSAddState((NewConverter *)mySharedData->table, value)) { | |
723 | *pErrorCode=U_INVALID_TABLE_FORMAT; | |
724 | return; | |
725 | } | |
726 | } | |
727 | } | |
728 | ||
729 | if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) { | |
730 | *pErrorCode=U_INVALID_TABLE_FORMAT; | |
731 | } else if(staticData->conversionType==UCNV_MBCS && mySharedData->table==NULL) { | |
732 | fprintf(stderr, "error: missing state table information (<icu:state>) for MBCS\n"); | |
733 | *pErrorCode=U_INVALID_TABLE_FORMAT; | |
734 | } else if(staticData->subChar1!=0 && | |
735 | !staticData->conversionType==UCNV_MBCS && | |
736 | !staticData->conversionType==UCNV_EBCDIC_STATEFUL | |
737 | ) { | |
738 | fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n"); | |
739 | *pErrorCode=U_INVALID_TABLE_FORMAT; | |
740 | } | |
741 | } | |
742 | ||
743 | void loadTableFromFile(FileStream* convFile, UConverterSharedData* sharedData, UErrorCode* err) | |
744 | { | |
745 | char storageLine[200]; | |
746 | char* line = NULL; | |
747 | UConverterStaticData *staticData=(UConverterStaticData *)sharedData->staticData; | |
748 | NewConverter *cnvData = (NewConverter *)sharedData->table; | |
749 | UChar32 unicodeValue, codepageValue; | |
750 | uint8_t mbcsBytes[8]; | |
751 | int32_t mbcsLength; | |
752 | char codepointBytes[20]; | |
753 | UBool isOK = TRUE; | |
754 | uint8_t precisionMask = 0, unicodeMask = 0; | |
755 | char endOfLine; | |
756 | ||
757 | if(cnvData->startMappings!=NULL) | |
758 | { | |
759 | if(!cnvData->startMappings(cnvData)) { | |
760 | *err = U_INVALID_TABLE_FORMAT; | |
761 | return; | |
762 | } | |
763 | } | |
764 | ||
765 | if(cnvData->isValid!=NULL) | |
766 | { | |
767 | const uint8_t *p = staticData->subChar; | |
768 | codepageValue = 0; | |
769 | switch(staticData->subCharLen) { | |
770 | case 4: codepageValue = (codepageValue << 8) | *p++; | |
771 | case 3: codepageValue = (codepageValue << 8) | *p++; | |
772 | case 2: codepageValue = (codepageValue << 8) | *p++; | |
773 | case 1: codepageValue = (codepageValue << 8) | *p; | |
774 | default: break; /* must never occur */ | |
775 | } | |
776 | if(!cnvData->isValid(cnvData, staticData->subChar, staticData->subCharLen, codepageValue)) { | |
777 | fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n"); | |
778 | *err = U_INVALID_TABLE_FORMAT; | |
779 | isOK = FALSE; | |
780 | } | |
781 | } | |
782 | ||
783 | staticData->hasFromUnicodeFallback = staticData->hasToUnicodeFallback = FALSE; | |
784 | ||
785 | while (T_FileStream_readLine(convFile, storageLine, sizeof(storageLine))) | |
786 | { | |
787 | removeComments(storageLine); | |
788 | line = storageLine; | |
789 | if (line[nextTokenOffset(line, NLTC_SEPARATORS)] != '\0') | |
790 | { | |
791 | /* get the Unicode code point */ | |
792 | line = getToken(codepointBytes, line, UNICODE_CODEPOINT_SEPARATORS); | |
793 | if (uprv_strcmp(codepointBytes, "END") == 0) | |
794 | { | |
795 | break; | |
796 | } | |
797 | unicodeValue = (UChar32)T_CString_stringToInteger(codepointBytes, 16); | |
798 | ||
799 | /* get the codepage bytes */ | |
800 | codepageValue = 0; | |
801 | mbcsLength = 0; | |
802 | do | |
803 | { | |
804 | line = getToken(codepointBytes, line, CODEPOINT_SEPARATORS); | |
805 | mbcsBytes[mbcsLength] = (uint8_t)T_CString_stringToInteger(codepointBytes, 16); | |
806 | codepageValue = codepageValue << 8 | mbcsBytes[mbcsLength++]; | |
807 | ||
808 | /* End of line could be \0 or | (if fallback) */ | |
809 | endOfLine= line[nextTokenOffset(line, CODEPOINT_SEPARATORS)]; | |
810 | } while((endOfLine != '\0') && (endOfLine != FALLBACK_SEPARATOR)); | |
811 | ||
812 | if(unicodeValue>=0x10000) { | |
813 | unicodeMask|=UCNV_HAS_SUPPLEMENTARY; /* there are supplementary code points */ | |
814 | } else if(UTF_IS_SURROGATE(unicodeValue)) { | |
815 | unicodeMask|=UCNV_HAS_SURROGATES; /* there are single surrogates */ | |
816 | } | |
817 | ||
818 | if((uint32_t)unicodeValue > 0x10ffff) | |
819 | { | |
820 | fprintf(stderr, "error: Unicode code point > U+10ffff in '%s'\n", storageLine); | |
821 | isOK = FALSE; | |
822 | } | |
823 | else if(endOfLine == FALLBACK_SEPARATOR) | |
824 | { | |
825 | /* we know that there is a fallback separator */ | |
826 | precisionMask |= 1; | |
827 | line = uprv_strchr(line, FALLBACK_SEPARATOR) + 1; | |
828 | switch(*line) | |
829 | { | |
830 | case '0': | |
831 | /* set roundtrip mappings */ | |
832 | isOK &= cnvData->addToUnicode(cnvData, mbcsBytes, mbcsLength, unicodeValue, codepageValue, 0) && | |
833 | cnvData->addFromUnicode(cnvData, mbcsBytes, mbcsLength, unicodeValue, codepageValue, 0); | |
834 | break; | |
835 | case '1': | |
836 | /* set only a fallback mapping from Unicode to codepage */ | |
837 | staticData->hasFromUnicodeFallback = TRUE; | |
838 | isOK &= cnvData->addFromUnicode(cnvData, mbcsBytes, mbcsLength, unicodeValue, codepageValue, 1); | |
839 | break; | |
840 | case '2': | |
841 | /* skip subchar mappings */ | |
842 | break; | |
843 | case '3': | |
844 | /* set only a fallback mapping from codepage to Unicode */ | |
845 | staticData->hasToUnicodeFallback = TRUE; | |
846 | isOK &= cnvData->addToUnicode(cnvData, mbcsBytes, mbcsLength, unicodeValue, codepageValue, 1); | |
847 | break; | |
848 | default: | |
849 | fprintf(stderr, "error: illegal fallback indicator '%s' in '%s'\n", line - 1, storageLine); | |
850 | *err = U_INVALID_TABLE_FORMAT; | |
851 | break; | |
852 | } | |
853 | } | |
854 | else | |
855 | { | |
856 | precisionMask |= 2; | |
857 | /* set the mappings */ | |
858 | isOK &= cnvData->addToUnicode(cnvData, mbcsBytes, mbcsLength, unicodeValue, codepageValue, -1) && | |
859 | cnvData->addFromUnicode(cnvData, mbcsBytes, mbcsLength, unicodeValue, codepageValue, -1); | |
860 | } | |
861 | } | |
862 | } | |
863 | ||
864 | if(unicodeMask == 3) | |
865 | { | |
866 | fprintf(stderr, "warning: contains mappings to both supplementary code points and single surrogates\n"); | |
867 | } | |
868 | staticData->unicodeMask = unicodeMask; | |
869 | ||
870 | if(cnvData->finishMappings!=NULL) | |
871 | { | |
872 | cnvData->finishMappings(cnvData, staticData); | |
873 | } | |
874 | ||
875 | if(!isOK) | |
876 | { | |
877 | *err = U_INVALID_TABLE_FORMAT; | |
878 | } | |
879 | else if(precisionMask == 3) | |
880 | { | |
881 | fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n"); | |
882 | *err = U_INVALID_TABLE_FORMAT; | |
883 | } | |
884 | } | |
885 | ||
886 | /*creates a UConverterStaticData, fills in necessary links to it the appropriate function pointers*/ | |
887 | UConverterSharedData* createConverterFromTableFile(const char* converterName, UErrorCode* err) | |
888 | { | |
889 | FileStream* convFile = NULL; | |
890 | UConverterSharedData* mySharedData = NULL; | |
891 | UConverterStaticData* myStaticData = NULL; | |
892 | ||
893 | if (U_FAILURE(*err)) return NULL; | |
894 | ||
895 | convFile = T_FileStream_open(converterName, "r"); | |
896 | if (convFile == NULL) | |
897 | { | |
898 | *err = U_FILE_ACCESS_ERROR; | |
899 | return NULL; | |
900 | } | |
901 | ||
902 | ||
903 | mySharedData = (UConverterSharedData*) uprv_malloc(sizeof(UConverterSharedData)); | |
904 | if (mySharedData == NULL) | |
905 | { | |
906 | *err = U_MEMORY_ALLOCATION_ERROR; | |
907 | T_FileStream_close(convFile); | |
908 | return NULL; | |
909 | } | |
910 | ||
911 | uprv_memset(mySharedData, 0, sizeof(UConverterSharedData)); | |
912 | ||
913 | mySharedData->structSize = sizeof(UConverterSharedData); | |
914 | ||
915 | myStaticData = (UConverterStaticData*) uprv_malloc(sizeof(UConverterStaticData)); | |
916 | if (myStaticData == NULL) | |
917 | { | |
918 | *err = U_MEMORY_ALLOCATION_ERROR; | |
919 | T_FileStream_close(convFile); | |
920 | return NULL; | |
921 | } | |
922 | uprv_memset(myStaticData, 0, sizeof(UConverterStaticData)); | |
923 | mySharedData->staticData = myStaticData; | |
924 | myStaticData->structSize = sizeof(UConverterStaticData); | |
925 | /* mySharedData->staticDataOwned = FALSE; */ /* not owned if in udata */ | |
926 | mySharedData->sharedDataCached = FALSE; | |
927 | ||
928 | mySharedData->dataMemory = NULL; /* for init */ | |
929 | ||
930 | readHeaderFromFile(mySharedData, convFile, converterName, err); | |
931 | ||
932 | if (U_FAILURE(*err)) return NULL; | |
933 | ||
934 | switch (myStaticData->conversionType) | |
935 | { | |
936 | case UCNV_SBCS: | |
937 | { | |
938 | /* SBCS: use MBCS data structure with a default state table */ | |
939 | if(mySharedData->staticData->maxBytesPerChar!=1) { | |
940 | fprintf(stderr, "error: SBCS codepage with max bytes/char!=1\n"); | |
941 | *err = U_INVALID_TABLE_FORMAT; | |
942 | break; | |
943 | } | |
944 | myStaticData->conversionType = UCNV_MBCS; | |
945 | if(mySharedData->table == NULL) { | |
946 | NewConverter *sharedDataTable = MBCSOpen(1); | |
947 | if(sharedDataTable != NULL) { | |
948 | if(!MBCSAddState(sharedDataTable, "0-ff")) { | |
949 | *err = U_INVALID_TABLE_FORMAT; | |
950 | sharedDataTable->close(sharedDataTable); | |
951 | } else { | |
952 | mySharedData->table = (UConverterTable *)sharedDataTable; | |
953 | } | |
954 | } else { | |
955 | *err = U_MEMORY_ALLOCATION_ERROR; | |
956 | } | |
957 | } | |
958 | break; | |
959 | } | |
960 | case UCNV_MBCS: | |
961 | { | |
962 | /* MBCSOpen() was called by readHeaderFromFile() */ | |
963 | break; | |
964 | } | |
965 | case UCNV_EBCDIC_STATEFUL: | |
966 | { | |
967 | /* EBCDIC_STATEFUL: use MBCS data structure with a default state table */ | |
968 | if(mySharedData->staticData->maxBytesPerChar!=2) { | |
969 | fprintf(stderr, "error: DBCS codepage with max bytes/char!=2\n"); | |
970 | *err = U_INVALID_TABLE_FORMAT; | |
971 | break; | |
972 | } | |
973 | myStaticData->conversionType = UCNV_MBCS; | |
974 | if(mySharedData->table == NULL) { | |
975 | NewConverter *sharedDataTable = MBCSOpen(2); | |
976 | if(sharedDataTable != NULL) { | |
977 | if( !MBCSAddState(sharedDataTable, "0-ff, e:1.s, f:0.s") || | |
978 | !MBCSAddState(sharedDataTable, "initial, 0-3f:4, e:1.s, f:0.s, 40:3, 41-fe:2, ff:4") || | |
979 | !MBCSAddState(sharedDataTable, "0-40:1.i, 41-fe:1., ff:1.i") || | |
980 | !MBCSAddState(sharedDataTable, "0-ff:1.i, 40:1.") || | |
981 | !MBCSAddState(sharedDataTable, "0-ff:1.i") | |
982 | ) { | |
983 | *err = U_INVALID_TABLE_FORMAT; | |
984 | sharedDataTable->close(sharedDataTable); | |
985 | } else { | |
986 | mySharedData->table = (UConverterTable *)sharedDataTable; | |
987 | } | |
988 | } else { | |
989 | *err = U_MEMORY_ALLOCATION_ERROR; | |
990 | } | |
991 | } | |
992 | break; | |
993 | } | |
994 | case UCNV_DBCS: | |
995 | { | |
996 | /* DBCS: use MBCS data structure with a default state table */ | |
997 | if(mySharedData->staticData->maxBytesPerChar!=2) { | |
998 | fprintf(stderr, "error: DBCS codepage with max bytes/char!=2\n"); | |
999 | *err = U_INVALID_TABLE_FORMAT; | |
1000 | break; | |
1001 | } | |
1002 | myStaticData->conversionType = UCNV_MBCS; | |
1003 | if(mySharedData->table == NULL) { | |
1004 | NewConverter *sharedDataTable = MBCSOpen(2); | |
1005 | if(sharedDataTable != NULL) { | |
1006 | if( !MBCSAddState(sharedDataTable, "0-3f:3, 40:2, 41-fe:1, ff:3") || | |
1007 | !MBCSAddState(sharedDataTable, "41-fe") || | |
1008 | !MBCSAddState(sharedDataTable, "40") || | |
1009 | !MBCSAddState(sharedDataTable, "") | |
1010 | ) { | |
1011 | *err = U_INVALID_TABLE_FORMAT; | |
1012 | sharedDataTable->close(sharedDataTable); | |
1013 | } else { | |
1014 | mySharedData->table = (UConverterTable *)sharedDataTable; | |
1015 | } | |
1016 | } else { | |
1017 | *err = U_MEMORY_ALLOCATION_ERROR; | |
1018 | } | |
1019 | } | |
1020 | break; | |
1021 | } | |
1022 | ||
1023 | default : | |
1024 | fprintf(stderr, "error: <uconv_class> omitted\n"); | |
1025 | *err = U_INVALID_TABLE_FORMAT; | |
1026 | mySharedData->table = NULL; | |
1027 | break; | |
1028 | }; | |
1029 | ||
1030 | if(U_SUCCESS(*err) && mySharedData->table != NULL) | |
1031 | { | |
1032 | loadTableFromFile(convFile, mySharedData, err); | |
1033 | } | |
1034 | ||
1035 | T_FileStream_close(convFile); | |
1036 | ||
1037 | return mySharedData; | |
1038 | } | |
1039 | ||
1040 | /* | |
1041 | * Hey, Emacs, please set the following: | |
1042 | * | |
1043 | * Local Variables: | |
1044 | * indent-tabs-mode: nil | |
1045 | * End: | |
1046 | * | |
1047 | */ |