]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/gencmn/gencmn.c
ICU-6.2.13.tar.gz
[apple/icu.git] / icuSources / tools / gencmn / gencmn.c
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1999-2004, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: gencmn.c
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 1999nov01
14 * created by: Markus W. Scherer
15 *
16 * This program reads a list of data files and combines them
17 * into one common, memory-mappable file.
18 */
19
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include "unicode/utypes.h"
23 #include "unicode/putil.h"
24 #include "cmemory.h"
25 #include "cstring.h"
26 #include "filestrm.h"
27 #include "toolutil.h"
28 #include "unicode/uclean.h"
29 #include "unewdata.h"
30 #include "uoptions.h"
31
32 #define STRING_STORE_SIZE 100000
33 #define MAX_FILE_COUNT 2000
34
35 #define COMMON_DATA_NAME U_ICUDATA_NAME
36 #define DATA_TYPE "dat"
37
38 /* ICU package data file format (.dat files) ------------------------------- ***
39
40 Description of the data format after the usual ICU data file header
41 (UDataInfo etc.).
42
43 Format version 1
44
45 A .dat package file contains a simple Table of Contents of item names,
46 followed by the items themselves:
47
48 1. ToC table
49
50 uint32_t count; - number of items
51 UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item:
52 uint32_t nameOffset; - offset of the item name
53 uint32_t dataOffset; - offset of the item data
54 both are byte offsets from the beginning of the data
55
56 2. item name strings
57
58 All item names are stored as char * strings in one block between the ToC table
59 and the data items.
60
61 3. data items
62
63 The data items are stored following the item names block.
64 Each data item is 16-aligned.
65 The data items are stored in the sorted order of their names.
66
67 Therefore, the top of the name strings block is the offset of the first item,
68 the length of the last item is the difference between its offset and
69 the .dat file length, and the length of all previous items is the difference
70 between its offset and the next one.
71
72 ----------------------------------------------------------------------------- */
73
74 /* UDataInfo cf. udata.h */
75 static const UDataInfo dataInfo={
76 sizeof(UDataInfo),
77 0,
78
79 U_IS_BIG_ENDIAN,
80 U_CHARSET_FAMILY,
81 sizeof(UChar),
82 0,
83
84 {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */
85 {1, 0, 0, 0}, /* formatVersion */
86 {3, 0, 0, 0} /* dataVersion */
87 };
88
89 static uint32_t maxSize;
90
91 static char stringStore[STRING_STORE_SIZE];
92 static uint32_t stringTop=0, basenameTotal=0;
93
94 typedef struct {
95 char *pathname, *basename;
96 uint32_t basenameLength, basenameOffset, fileSize, fileOffset;
97 } File;
98
99 static File files[MAX_FILE_COUNT];
100 static uint32_t fileCount=0;
101 static UBool embed = FALSE;
102
103 /* prototypes --------------------------------------------------------------- */
104
105 static void
106 addFile(const char *filename, UBool sourceTOC, UBool verbose);
107
108 static char *
109 allocString(uint32_t length);
110
111 static int
112 compareFiles(const void *file1, const void *file2);
113
114 static char *
115 pathToFullPath(const char *path);
116
117 /* map non-tree separator (such as '\') to tree separator ('/') inplace. */
118 static void
119 fixDirToTreePath(char *s);
120 /* -------------------------------------------------------------------------- */
121
122 static UOption options[]={
123 /*0*/ UOPTION_HELP_H,
124 /*1*/ UOPTION_HELP_QUESTION_MARK,
125 /*2*/ UOPTION_VERBOSE,
126 /*3*/ UOPTION_COPYRIGHT,
127 /*4*/ UOPTION_DESTDIR,
128 /*5*/ UOPTION_DEF( "comment", 'C', UOPT_REQUIRES_ARG),
129 /*6*/ UOPTION_DEF( "name", 'n', UOPT_REQUIRES_ARG),
130 /*7*/ UOPTION_DEF( "type", 't', UOPT_REQUIRES_ARG),
131 /*8*/ UOPTION_DEF( "source", 'S', UOPT_NO_ARG),
132 /*9*/ UOPTION_DEF( "entrypoint", 'e', UOPT_REQUIRES_ARG),
133 /*10*/UOPTION_SOURCEDIR,
134 /*11*/UOPTION_DEF( "embed", 'E', UOPT_NO_ARG)
135 };
136
137 static char *symPrefix = NULL;
138
139 extern int
140 main(int argc, char* argv[]) {
141 static char buffer[4096];
142 char line[512];
143 FileStream *in, *file;
144 char *s;
145 UErrorCode errorCode=U_ZERO_ERROR;
146 uint32_t i, fileOffset, basenameOffset, length, nread;
147 UBool sourceTOC, verbose;
148 const char *entrypointName = NULL;
149
150 U_MAIN_INIT_ARGS(argc, argv);
151
152 /* preset then read command line options */
153 options[4].value=u_getDataDirectory();
154 options[6].value=COMMON_DATA_NAME;
155 options[7].value=DATA_TYPE;
156 options[10].value=".";
157 argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
158
159 /* error handling, printing usage message */
160 if(argc<0) {
161 fprintf(stderr,
162 "error in command line argument \"%s\"\n",
163 argv[-argc]);
164 } else if(argc<2) {
165 argc=-1;
166 }
167
168 if(options[11].doesOccur) {
169 embed = TRUE;
170 }
171
172 if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
173 FILE *where = argc < 0 ? stderr : stdout;
174
175 /*
176 * Broken into chucks because the C89 standard says the minimum
177 * required supported string length is 509 bytes.
178 */
179 fprintf(where,
180 "%csage: %s [ -h, -?, --help ] [ -v, --verbose ] [ -c, --copyright ] [ -C, --comment comment ] [ -d, --destdir dir ] [ -n, --name filename ] [ -t, --type filetype ] [ -S, --source tocfile ] [ -e, --entrypoint name ] maxsize listfile\n", argc < 0 ? 'u' : 'U', *argv);
181 if (options[0].doesOccur || options[1].doesOccur) {
182 fprintf(where, "\n"
183 "Read the list file (default: standard input) and create a common data\n"
184 "file from specified files. Omit any files larger than maxsize, if maxsize > 0.\n");
185 fprintf(where, "\n"
186 "Options:\n"
187 "\t-h, -?, --help this usage text\n"
188 "\t-v, --verbose verbose output\n"
189 "\t-c, --copyright include the ICU copyright notice\n"
190 "\t-C, --comment comment include a comment string\n"
191 "\t-d, --destdir dir destination directory\n");
192 fprintf(where,
193 "\t-n, --name filename output filename, without .type extension\n"
194 "\t (default: " COMMON_DATA_NAME ")\n"
195 "\t-t, --type filetype type of the destination file\n"
196 "\t (default: \"" DATA_TYPE "\")\n"
197 "\t-S, --source tocfile write a .c source file with the table of\n"
198 "\t contents\n"
199 "\t-e, --entrypoint name override the c entrypoint name\n"
200 "\t (default: \"<name>_<type>\")\n");
201 }
202 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
203 }
204
205 sourceTOC=options[8].doesOccur;
206
207 verbose = options[2].doesOccur;
208
209 maxSize=(uint32_t)uprv_strtoul(argv[1], NULL, 0);
210
211 if(argc==2) {
212 in=T_FileStream_stdin();
213 } else {
214 in=T_FileStream_open(argv[2], "r");
215 if(in==NULL) {
216 fprintf(stderr, "gencmn: unable to open input file %s\n", argv[2]);
217 exit(U_FILE_ACCESS_ERROR);
218 }
219 }
220
221 if (verbose) {
222 if(sourceTOC) {
223 printf("generating %s_%s.c (table of contents source file)\n", options[6].value, options[7].value);
224 } else {
225 printf("generating %s.%s (common data file with table of contents)\n", options[6].value, options[7].value);
226 }
227 }
228
229 /* read the list of files and get their lengths */
230 while(T_FileStream_readLine(in, line, sizeof(line))!=NULL) {
231 /* remove trailing newline characters */
232 s=line;
233 while(*s!=0) {
234 if(*s=='\r' || *s=='\n') {
235 *s=0;
236 break;
237 }
238 ++s;
239 }
240
241 /* check for comment */
242
243 if (*line == '#') {
244 continue;
245 }
246
247 /* add the file */
248 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
249 {
250 char *t;
251 while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) {
252 *t = U_FILE_SEP_CHAR;
253 }
254 }
255 #endif
256 addFile(getLongPathname(line), sourceTOC, verbose);
257 }
258
259 if(in!=T_FileStream_stdin()) {
260 T_FileStream_close(in);
261 }
262
263 if(fileCount==0) {
264 fprintf(stderr, "gencmn: no files listed in %s\n", argc==2 ? "<stdin>" : argv[2]);
265 return 0;
266 }
267
268 /* sort the files by basename */
269 qsort(files, fileCount, sizeof(File), compareFiles);
270
271 if(!sourceTOC) {
272 UNewDataMemory *out;
273
274 /* determine the offsets of all basenames and files in this common one */
275 basenameOffset=4+8*fileCount;
276 fileOffset=(basenameOffset+(basenameTotal+15))&~0xf;
277 for(i=0; i<fileCount; ++i) {
278 files[i].fileOffset=fileOffset;
279 fileOffset+=(files[i].fileSize+15)&~0xf;
280 files[i].basenameOffset=basenameOffset;
281 basenameOffset+=files[i].basenameLength;
282 }
283
284 /* create the output file */
285 out=udata_create(options[4].value, options[7].value, options[6].value,
286 &dataInfo,
287 options[3].doesOccur ? U_COPYRIGHT_STRING : options[5].value,
288 &errorCode);
289 if(U_FAILURE(errorCode)) {
290 fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n",
291 options[4].value, options[6].value, options[7].value,
292 u_errorName(errorCode));
293 exit(errorCode);
294 }
295
296 /* write the table of contents */
297 udata_write32(out, fileCount);
298 for(i=0; i<fileCount; ++i) {
299 udata_write32(out, files[i].basenameOffset);
300 udata_write32(out, files[i].fileOffset);
301 }
302
303 /* write the basenames */
304 for(i=0; i<fileCount; ++i) {
305 udata_writeString(out, files[i].basename, files[i].basenameLength);
306 }
307 length=4+8*fileCount+basenameTotal;
308
309 /* copy the files */
310 for(i=0; i<fileCount; ++i) {
311 /* pad to 16-align the next file */
312 length&=0xf;
313 if(length!=0) {
314 udata_writePadding(out, 16-length);
315 }
316
317 if (verbose) {
318 printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
319 }
320
321 /* copy the next file */
322 file=T_FileStream_open(files[i].pathname, "rb");
323 if(file==NULL) {
324 fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname);
325 exit(U_FILE_ACCESS_ERROR);
326 }
327 for(nread = 0;;) {
328 length=T_FileStream_read(file, buffer, sizeof(buffer));
329 if(length <= 0) {
330 break;
331 }
332 nread += length;
333 udata_writeBlock(out, buffer, length);
334 }
335 T_FileStream_close(file);
336 length=files[i].fileSize;
337
338 if (nread != files[i].fileSize) {
339 fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname, (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
340 exit(U_FILE_ACCESS_ERROR);
341 }
342 }
343
344 /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */
345 length&=0xf;
346 if(length!=0) {
347 udata_writePadding(out, 16-length);
348 }
349
350 /* finish */
351 udata_finish(out, &errorCode);
352 if(U_FAILURE(errorCode)) {
353 fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode));
354 exit(errorCode);
355 }
356 } else {
357 /* write a .c source file with the table of contents */
358 char *filename;
359 FileStream *out;
360
361 /* create the output filename */
362 filename=s=buffer;
363 uprv_strcpy(filename, options[4].value);
364 s=filename+uprv_strlen(filename);
365 if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) {
366 *s++=U_FILE_SEP_CHAR;
367 }
368 uprv_strcpy(s, options[6].value);
369 if(*(options[7].value)!=0) {
370 s+=uprv_strlen(s);
371 *s++='_';
372 uprv_strcpy(s, options[7].value);
373 }
374 s+=uprv_strlen(s);
375 uprv_strcpy(s, ".c");
376
377 /* open the output file */
378 out=T_FileStream_open(filename, "w");
379 if(out==NULL) {
380 fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename);
381 exit(U_FILE_ACCESS_ERROR);
382 }
383
384 /* If an entrypoint is specified, use it. */
385 if(options[9].doesOccur) {
386 entrypointName = options[9].value;
387 } else {
388 entrypointName = options[6].value;
389 }
390
391
392 #if 0
393 if(!embed) {
394 symPrefix = (char *) uprv_malloc(uprv_strlen(entrypointName) + 2);
395
396 /* test for NULL */
397 if (symPrefix == NULL) {
398 sprintf(buffer, "U_MEMORY_ALLOCATION_ERROR");
399 exit(U_MEMORY_ALLOCATION_ERROR);
400 }
401
402 uprv_strcpy(symPrefix, entrypointName);
403 uprv_strcat(symPrefix, "_");
404 }
405 #endif
406
407 /* write the source file */
408 sprintf(buffer,
409 "/*\n"
410 " * ICU common data table of contents for %s.%s ,\n"
411 " * Automatically generated by icu/source/tools/gencmn/gencmn .\n"
412 " */\n\n"
413 "#include \"unicode/utypes.h\"\n"
414 "#include \"unicode/udata.h\"\n"
415 "\n"
416 "/* external symbol declarations for data */\n",
417 options[6].value, options[7].value);
418 T_FileStream_writeLine(out, buffer);
419
420 sprintf(buffer, "extern const char\n %s%s[]", symPrefix?symPrefix:"", files[0].pathname);
421 T_FileStream_writeLine(out, buffer);
422 for(i=1; i<fileCount; ++i) {
423 sprintf(buffer, ",\n %s%s[]", symPrefix?symPrefix:"", files[i].pathname);
424 T_FileStream_writeLine(out, buffer);
425 }
426 T_FileStream_writeLine(out, ";\n\n");
427
428 sprintf(
429 buffer,
430 "U_EXPORT struct {\n"
431 " uint16_t headerSize;\n"
432 " uint8_t magic1, magic2;\n"
433 " UDataInfo info;\n"
434 " char padding[%lu];\n"
435 " uint32_t count, reserved;\n"
436 " struct {\n"
437 " const char *name;\n"
438 " const void *data;\n"
439 " } toc[%lu];\n"
440 "} U_EXPORT2 %s_dat = {\n"
441 " 32, 0xda, 0x27, {\n"
442 " %lu, 0,\n"
443 " %u, %u, %u, 0,\n"
444 " {0x54, 0x6f, 0x43, 0x50},\n"
445 " {1, 0, 0, 0},\n"
446 " {0, 0, 0, 0}\n"
447 " },\n"
448 " \"\", %lu, 0, {\n",
449 (unsigned long)32-4-sizeof(UDataInfo),
450 (unsigned long)fileCount,
451 entrypointName,
452 (unsigned long)sizeof(UDataInfo),
453 U_IS_BIG_ENDIAN,
454 U_CHARSET_FAMILY,
455 U_SIZEOF_UCHAR,
456 (unsigned long)fileCount
457 );
458 T_FileStream_writeLine(out, buffer);
459
460 sprintf(buffer, " { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname);
461 T_FileStream_writeLine(out, buffer);
462 for(i=1; i<fileCount; ++i) {
463 sprintf(buffer, ",\n { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname);
464 T_FileStream_writeLine(out, buffer);
465 }
466
467 T_FileStream_writeLine(out, "\n }\n};\n");
468 T_FileStream_close(out);
469
470 uprv_free(symPrefix);
471 }
472
473 return 0;
474 }
475
476 static void
477 addFile(const char *filename, UBool sourceTOC, UBool verbose) {
478 char *s;
479 uint32_t length;
480 char *fullPath = NULL;
481
482 if(fileCount==MAX_FILE_COUNT) {
483 fprintf(stderr, "gencmn: too many files, maximum is %d\n", MAX_FILE_COUNT);
484 exit(U_BUFFER_OVERFLOW_ERROR);
485 }
486
487 if(!sourceTOC) {
488 FileStream *file;
489
490 fullPath = pathToFullPath(filename);
491
492 /* store the pathname */
493 if(!embed) {
494 length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(options[6].value) + 1);
495 s=allocString(length);
496 uprv_strcpy(s, options[6].value);
497 uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
498 uprv_strcat(s, filename);
499 } else {
500 /* compatibility mode */
501 const char *base;
502 base = findBasename(filename);
503 length = (uint32_t)(uprv_strlen(base) + 1);
504 s=allocString(length);
505 uprv_memcpy(s, base, length);
506 }
507
508 /* get the basename */
509 fixDirToTreePath(s);
510 files[fileCount].basename=s;
511 files[fileCount].basenameLength=length;
512
513 files[fileCount].pathname=fullPath;
514
515 basenameTotal+=length;
516
517 /* try to open the file */
518 file=T_FileStream_open(fullPath, "rb");
519 if(file==NULL) {
520 fprintf(stderr, "gencmn: unable to open listed file %s\n", fullPath);
521 exit(U_FILE_ACCESS_ERROR);
522 }
523
524 /* get the file length */
525 length=T_FileStream_size(file);
526 if(T_FileStream_error(file) || length<=20) {
527 fprintf(stderr, "gencmn: unable to get length of listed file %s\n", fullPath);
528 exit(U_FILE_ACCESS_ERROR);
529 }
530
531 T_FileStream_close(file);
532
533 /* do not add files that are longer than maxSize */
534 if(maxSize && length>maxSize) {
535 if (verbose) {
536 printf("%s ignored (size %ld > %ld)\n", fullPath, (long)length, (long)maxSize);
537 }
538 return;
539 }
540 files[fileCount].fileSize=length;
541 } else {
542 char *t;
543
544 if(embed) {
545 filename = findBasename(filename);
546 }
547 /* get and store the basename */
548 if(!embed) {
549 /* need to include the package name */
550 length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(options[6].value) + 1);
551 s=allocString(length);
552 uprv_strcpy(s, options[6].value);
553 uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
554 uprv_strcat(s, filename);
555 } else {
556 length = (uint32_t)(uprv_strlen(filename) + 1);
557 s=allocString(length);
558 uprv_memcpy(s, filename, length);
559 }
560 fixDirToTreePath(s);
561 files[fileCount].basename=s;
562
563
564 /* turn the basename into an entry point name and store in the pathname field */
565 t=files[fileCount].pathname=allocString(length);
566 while(--length>0) {
567 if(*s=='.' || *s=='-' || *s=='/') {
568 *t='_';
569 } else {
570 *t=*s;
571 }
572 ++s;
573 ++t;
574 }
575 *t=0;
576 }
577 ++fileCount;
578 }
579
580 static char *
581 allocString(uint32_t length) {
582 uint32_t top=stringTop+length;
583 char *p;
584
585 if(top>STRING_STORE_SIZE) {
586 fprintf(stderr, "gencmn: out of memory\n");
587 exit(U_MEMORY_ALLOCATION_ERROR);
588 }
589 p=stringStore+stringTop;
590 stringTop=top;
591 return p;
592 }
593
594 static char *
595 pathToFullPath(const char *path) {
596 int32_t length;
597 int32_t newLength;
598 char *fullPath;
599 int32_t n;
600
601 length = (uint32_t)(uprv_strlen(path) + 1);
602 newLength = (length + 1 + (int32_t)uprv_strlen(options[10].value));
603 fullPath = uprv_malloc(newLength);
604 if(options[10].doesOccur) {
605 uprv_strcpy(fullPath, options[10].value);
606 uprv_strcat(fullPath, U_FILE_SEP_STRING);
607 } else {
608 fullPath[0] = 0;
609 }
610 n = (int32_t)uprv_strlen(fullPath);
611 uprv_strcat(fullPath, path);
612
613 if(!embed) {
614 #if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
615 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR)
616 /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
617 for(;fullPath[n];n++) {
618 if(fullPath[n] == U_FILE_ALT_SEP_CHAR) {
619 fullPath[n] = U_FILE_SEP_CHAR;
620 }
621 }
622 #endif
623 #endif
624 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
625 /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
626 for(;fullPath[n];n++) {
627 if(fullPath[n] == U_TREE_ENTRY_SEP_CHAR) {
628 fullPath[n] = U_FILE_SEP_CHAR;
629 }
630 }
631 #endif
632 }
633 return fullPath;
634 }
635
636 static int
637 compareFiles(const void *file1, const void *file2) {
638 /* sort by basename */
639 return uprv_strcmp(((File *)file1)->basename, ((File *)file2)->basename);
640 }
641
642 static void
643 fixDirToTreePath(char *s)
644 {
645 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR))
646 char *t;
647 #endif
648 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
649 for(t=s;t=uprv_strchr(t,U_FILE_SEP_CHAR);) {
650 *t = U_TREE_ENTRY_SEP_CHAR;
651 }
652 #endif
653 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
654 for(t=s;t=uprv_strchr(t,U_FILE_ALT_SEP_CHAR);) {
655 *t = U_TREE_ENTRY_SEP_CHAR;
656 }
657 #endif
658 }
659 /*
660 * Hey, Emacs, please set the following:
661 *
662 * Local Variables:
663 * indent-tabs-mode: nil
664 * End:
665 *
666 */