]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/gencmn/gencmn.c
ICU-400.42.tar.gz
[apple/icu.git] / icuSources / tools / gencmn / gencmn.c
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1999-2006, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: gencmn.c
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 1999nov01
14 * created by: Markus W. Scherer
15 *
16 * This program reads a list of data files and combines them
17 * into one common, memory-mappable file.
18 */
19
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include "unicode/utypes.h"
23 #include "unicode/putil.h"
24 #include "cmemory.h"
25 #include "cstring.h"
26 #include "filestrm.h"
27 #include "toolutil.h"
28 #include "unicode/uclean.h"
29 #include "unewdata.h"
30 #include "uoptions.h"
31 #include "putilimp.h"
32
33 #define STRING_STORE_SIZE 100000
34 #define MAX_FILE_COUNT 2000
35
36 #define COMMON_DATA_NAME U_ICUDATA_NAME
37 #define DATA_TYPE "dat"
38
39 /* ICU package data file format (.dat files) ------------------------------- ***
40
41 Description of the data format after the usual ICU data file header
42 (UDataInfo etc.).
43
44 Format version 1
45
46 A .dat package file contains a simple Table of Contents of item names,
47 followed by the items themselves:
48
49 1. ToC table
50
51 uint32_t count; - number of items
52 UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item:
53 uint32_t nameOffset; - offset of the item name
54 uint32_t dataOffset; - offset of the item data
55 both are byte offsets from the beginning of the data
56
57 2. item name strings
58
59 All item names are stored as char * strings in one block between the ToC table
60 and the data items.
61
62 3. data items
63
64 The data items are stored following the item names block.
65 Each data item is 16-aligned.
66 The data items are stored in the sorted order of their names.
67
68 Therefore, the top of the name strings block is the offset of the first item,
69 the length of the last item is the difference between its offset and
70 the .dat file length, and the length of all previous items is the difference
71 between its offset and the next one.
72
73 ----------------------------------------------------------------------------- */
74
75 /* UDataInfo cf. udata.h */
76 static const UDataInfo dataInfo={
77 sizeof(UDataInfo),
78 0,
79
80 U_IS_BIG_ENDIAN,
81 U_CHARSET_FAMILY,
82 sizeof(UChar),
83 0,
84
85 {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */
86 {1, 0, 0, 0}, /* formatVersion */
87 {3, 0, 0, 0} /* dataVersion */
88 };
89
90 static uint32_t maxSize;
91
92 static char stringStore[STRING_STORE_SIZE];
93 static uint32_t stringTop=0, basenameTotal=0;
94
95 typedef struct {
96 char *pathname, *basename;
97 uint32_t basenameLength, basenameOffset, fileSize, fileOffset;
98 } File;
99
100 static File files[MAX_FILE_COUNT];
101 static uint32_t fileCount=0;
102
103 /* prototypes --------------------------------------------------------------- */
104
105 static void
106 addFile(const char *filename, UBool sourceTOC, UBool verbose);
107
108 static char *
109 allocString(uint32_t length);
110
111 static int
112 compareFiles(const void *file1, const void *file2);
113
114 static char *
115 pathToFullPath(const char *path);
116
117 /* map non-tree separator (such as '\') to tree separator ('/') inplace. */
118 static void
119 fixDirToTreePath(char *s);
120 /* -------------------------------------------------------------------------- */
121
122 static UOption options[]={
123 /*0*/ UOPTION_HELP_H,
124 /*1*/ UOPTION_HELP_QUESTION_MARK,
125 /*2*/ UOPTION_VERBOSE,
126 /*3*/ UOPTION_COPYRIGHT,
127 /*4*/ UOPTION_DESTDIR,
128 /*5*/ UOPTION_DEF( "comment", 'C', UOPT_REQUIRES_ARG),
129 /*6*/ UOPTION_DEF( "name", 'n', UOPT_REQUIRES_ARG),
130 /*7*/ UOPTION_DEF( "type", 't', UOPT_REQUIRES_ARG),
131 /*8*/ UOPTION_DEF( "source", 'S', UOPT_NO_ARG),
132 /*9*/ UOPTION_DEF( "entrypoint", 'e', UOPT_REQUIRES_ARG),
133 /*10*/UOPTION_SOURCEDIR,
134 };
135
136 static char *symPrefix = NULL;
137
138 extern int
139 main(int argc, char* argv[]) {
140 static char buffer[4096];
141 char line[512];
142 FileStream *in, *file;
143 char *s;
144 UErrorCode errorCode=U_ZERO_ERROR;
145 uint32_t i, fileOffset, basenameOffset, length, nread;
146 UBool sourceTOC, verbose;
147 const char *entrypointName = NULL;
148
149 U_MAIN_INIT_ARGS(argc, argv);
150
151 /* preset then read command line options */
152 options[4].value=u_getDataDirectory();
153 options[6].value=COMMON_DATA_NAME;
154 options[7].value=DATA_TYPE;
155 options[10].value=".";
156 argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
157
158 /* error handling, printing usage message */
159 if(argc<0) {
160 fprintf(stderr,
161 "error in command line argument \"%s\"\n",
162 argv[-argc]);
163 } else if(argc<2) {
164 argc=-1;
165 }
166
167 if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
168 FILE *where = argc < 0 ? stderr : stdout;
169
170 /*
171 * Broken into chucks because the C89 standard says the minimum
172 * required supported string length is 509 bytes.
173 */
174 fprintf(where,
175 "%csage: %s [ -h, -?, --help ] [ -v, --verbose ] [ -c, --copyright ] [ -C, --comment comment ] [ -d, --destdir dir ] [ -n, --name filename ] [ -t, --type filetype ] [ -S, --source tocfile ] [ -e, --entrypoint name ] maxsize listfile\n", argc < 0 ? 'u' : 'U', *argv);
176 if (options[0].doesOccur || options[1].doesOccur) {
177 fprintf(where, "\n"
178 "Read the list file (default: standard input) and create a common data\n"
179 "file from specified files. Omit any files larger than maxsize, if maxsize > 0.\n");
180 fprintf(where, "\n"
181 "Options:\n"
182 "\t-h, -?, --help this usage text\n"
183 "\t-v, --verbose verbose output\n"
184 "\t-c, --copyright include the ICU copyright notice\n"
185 "\t-C, --comment comment include a comment string\n"
186 "\t-d, --destdir dir destination directory\n");
187 fprintf(where,
188 "\t-n, --name filename output filename, without .type extension\n"
189 "\t (default: " COMMON_DATA_NAME ")\n"
190 "\t-t, --type filetype type of the destination file\n"
191 "\t (default: \"" DATA_TYPE "\")\n"
192 "\t-S, --source tocfile write a .c source file with the table of\n"
193 "\t contents\n"
194 "\t-e, --entrypoint name override the c entrypoint name\n"
195 "\t (default: \"<name>_<type>\")\n");
196 }
197 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
198 }
199
200 sourceTOC=options[8].doesOccur;
201
202 verbose = options[2].doesOccur;
203
204 maxSize=(uint32_t)uprv_strtoul(argv[1], NULL, 0);
205
206 if(argc==2) {
207 in=T_FileStream_stdin();
208 } else {
209 in=T_FileStream_open(argv[2], "r");
210 if(in==NULL) {
211 fprintf(stderr, "gencmn: unable to open input file %s\n", argv[2]);
212 exit(U_FILE_ACCESS_ERROR);
213 }
214 }
215
216 if (verbose) {
217 if(sourceTOC) {
218 printf("generating %s_%s.c (table of contents source file)\n", options[6].value, options[7].value);
219 } else {
220 printf("generating %s.%s (common data file with table of contents)\n", options[6].value, options[7].value);
221 }
222 }
223
224 /* read the list of files and get their lengths */
225 while(T_FileStream_readLine(in, line, sizeof(line))!=NULL) {
226 /* remove trailing newline characters */
227 s=line;
228 while(*s!=0) {
229 if(*s=='\r' || *s=='\n') {
230 *s=0;
231 break;
232 }
233 ++s;
234 }
235
236 /* check for comment */
237
238 if (*line == '#') {
239 continue;
240 }
241
242 /* add the file */
243 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
244 {
245 char *t;
246 while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) {
247 *t = U_FILE_SEP_CHAR;
248 }
249 }
250 #endif
251 addFile(getLongPathname(line), sourceTOC, verbose);
252 }
253
254 if(in!=T_FileStream_stdin()) {
255 T_FileStream_close(in);
256 }
257
258 if(fileCount==0) {
259 fprintf(stderr, "gencmn: no files listed in %s\n", argc==2 ? "<stdin>" : argv[2]);
260 return 0;
261 }
262
263 /* sort the files by basename */
264 qsort(files, fileCount, sizeof(File), compareFiles);
265
266 if(!sourceTOC) {
267 UNewDataMemory *out;
268
269 /* determine the offsets of all basenames and files in this common one */
270 basenameOffset=4+8*fileCount;
271 fileOffset=(basenameOffset+(basenameTotal+15))&~0xf;
272 for(i=0; i<fileCount; ++i) {
273 files[i].fileOffset=fileOffset;
274 fileOffset+=(files[i].fileSize+15)&~0xf;
275 files[i].basenameOffset=basenameOffset;
276 basenameOffset+=files[i].basenameLength;
277 }
278
279 /* create the output file */
280 out=udata_create(options[4].value, options[7].value, options[6].value,
281 &dataInfo,
282 options[3].doesOccur ? U_COPYRIGHT_STRING : options[5].value,
283 &errorCode);
284 if(U_FAILURE(errorCode)) {
285 fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n",
286 options[4].value, options[6].value, options[7].value,
287 u_errorName(errorCode));
288 exit(errorCode);
289 }
290
291 /* write the table of contents */
292 udata_write32(out, fileCount);
293 for(i=0; i<fileCount; ++i) {
294 udata_write32(out, files[i].basenameOffset);
295 udata_write32(out, files[i].fileOffset);
296 }
297
298 /* write the basenames */
299 for(i=0; i<fileCount; ++i) {
300 udata_writeString(out, files[i].basename, files[i].basenameLength);
301 }
302 length=4+8*fileCount+basenameTotal;
303
304 /* copy the files */
305 for(i=0; i<fileCount; ++i) {
306 /* pad to 16-align the next file */
307 length&=0xf;
308 if(length!=0) {
309 udata_writePadding(out, 16-length);
310 }
311
312 if (verbose) {
313 printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
314 }
315
316 /* copy the next file */
317 file=T_FileStream_open(files[i].pathname, "rb");
318 if(file==NULL) {
319 fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname);
320 exit(U_FILE_ACCESS_ERROR);
321 }
322 for(nread = 0;;) {
323 length=T_FileStream_read(file, buffer, sizeof(buffer));
324 if(length <= 0) {
325 break;
326 }
327 nread += length;
328 udata_writeBlock(out, buffer, length);
329 }
330 T_FileStream_close(file);
331 length=files[i].fileSize;
332
333 if (nread != files[i].fileSize) {
334 fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname, (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
335 exit(U_FILE_ACCESS_ERROR);
336 }
337 }
338
339 /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */
340 length&=0xf;
341 if(length!=0) {
342 udata_writePadding(out, 16-length);
343 }
344
345 /* finish */
346 udata_finish(out, &errorCode);
347 if(U_FAILURE(errorCode)) {
348 fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode));
349 exit(errorCode);
350 }
351 } else {
352 /* write a .c source file with the table of contents */
353 char *filename;
354 FileStream *out;
355
356 /* create the output filename */
357 filename=s=buffer;
358 uprv_strcpy(filename, options[4].value);
359 s=filename+uprv_strlen(filename);
360 if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) {
361 *s++=U_FILE_SEP_CHAR;
362 }
363 uprv_strcpy(s, options[6].value);
364 if(*(options[7].value)!=0) {
365 s+=uprv_strlen(s);
366 *s++='_';
367 uprv_strcpy(s, options[7].value);
368 }
369 s+=uprv_strlen(s);
370 uprv_strcpy(s, ".c");
371
372 /* open the output file */
373 out=T_FileStream_open(filename, "w");
374 if(out==NULL) {
375 fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename);
376 exit(U_FILE_ACCESS_ERROR);
377 }
378
379 /* If an entrypoint is specified, use it. */
380 if(options[9].doesOccur) {
381 entrypointName = options[9].value;
382 } else {
383 entrypointName = options[6].value;
384 }
385
386
387 /* write the source file */
388 sprintf(buffer,
389 "/*\n"
390 " * ICU common data table of contents for %s.%s ,\n"
391 " * Automatically generated by icu/source/tools/gencmn/gencmn .\n"
392 " */\n\n"
393 "#include \"unicode/utypes.h\"\n"
394 "#include \"unicode/udata.h\"\n"
395 "\n"
396 "/* external symbol declarations for data */\n",
397 options[6].value, options[7].value);
398 T_FileStream_writeLine(out, buffer);
399
400 sprintf(buffer, "extern const char\n %s%s[]", symPrefix?symPrefix:"", files[0].pathname);
401 T_FileStream_writeLine(out, buffer);
402 for(i=1; i<fileCount; ++i) {
403 sprintf(buffer, ",\n %s%s[]", symPrefix?symPrefix:"", files[i].pathname);
404 T_FileStream_writeLine(out, buffer);
405 }
406 T_FileStream_writeLine(out, ";\n\n");
407
408 sprintf(
409 buffer,
410 "U_EXPORT struct {\n"
411 " uint16_t headerSize;\n"
412 " uint8_t magic1, magic2;\n"
413 " UDataInfo info;\n"
414 " char padding[%lu];\n"
415 " uint32_t count, reserved;\n"
416 " struct {\n"
417 " const char *name;\n"
418 " const void *data;\n"
419 " } toc[%lu];\n"
420 "} U_EXPORT2 %s_dat = {\n"
421 " 32, 0xda, 0x27, {\n"
422 " %lu, 0,\n"
423 " %u, %u, %u, 0,\n"
424 " {0x54, 0x6f, 0x43, 0x50},\n"
425 " {1, 0, 0, 0},\n"
426 " {0, 0, 0, 0}\n"
427 " },\n"
428 " \"\", %lu, 0, {\n",
429 (unsigned long)32-4-sizeof(UDataInfo),
430 (unsigned long)fileCount,
431 entrypointName,
432 (unsigned long)sizeof(UDataInfo),
433 U_IS_BIG_ENDIAN,
434 U_CHARSET_FAMILY,
435 U_SIZEOF_UCHAR,
436 (unsigned long)fileCount
437 );
438 T_FileStream_writeLine(out, buffer);
439
440 sprintf(buffer, " { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname);
441 T_FileStream_writeLine(out, buffer);
442 for(i=1; i<fileCount; ++i) {
443 sprintf(buffer, ",\n { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname);
444 T_FileStream_writeLine(out, buffer);
445 }
446
447 T_FileStream_writeLine(out, "\n }\n};\n");
448 T_FileStream_close(out);
449
450 uprv_free(symPrefix);
451 }
452
453 return 0;
454 }
455
456 static void
457 addFile(const char *filename, UBool sourceTOC, UBool verbose) {
458 char *s;
459 uint32_t length;
460 char *fullPath = NULL;
461
462 if(fileCount==MAX_FILE_COUNT) {
463 fprintf(stderr, "gencmn: too many files, maximum is %d\n", MAX_FILE_COUNT);
464 exit(U_BUFFER_OVERFLOW_ERROR);
465 }
466
467 if(!sourceTOC) {
468 FileStream *file;
469
470 if(uprv_pathIsAbsolute(filename)) {
471 fprintf(stderr, "gencmn: Error: absolute path encountered. Old style paths are not supported. Use relative paths such as 'fur.res' or 'translit%cfur.res'.\n\tBad path: '%s'\n", U_FILE_SEP_CHAR, filename);
472 exit(U_ILLEGAL_ARGUMENT_ERROR);
473 }
474 fullPath = pathToFullPath(filename);
475
476 /* store the pathname */
477 length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(options[6].value) + 1);
478 s=allocString(length);
479 uprv_strcpy(s, options[6].value);
480 uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
481 uprv_strcat(s, filename);
482
483 /* get the basename */
484 fixDirToTreePath(s);
485 files[fileCount].basename=s;
486 files[fileCount].basenameLength=length;
487
488 files[fileCount].pathname=fullPath;
489
490 basenameTotal+=length;
491
492 /* try to open the file */
493 file=T_FileStream_open(fullPath, "rb");
494 if(file==NULL) {
495 fprintf(stderr, "gencmn: unable to open listed file %s\n", fullPath);
496 exit(U_FILE_ACCESS_ERROR);
497 }
498
499 /* get the file length */
500 length=T_FileStream_size(file);
501 if(T_FileStream_error(file) || length<=20) {
502 fprintf(stderr, "gencmn: unable to get length of listed file %s\n", fullPath);
503 exit(U_FILE_ACCESS_ERROR);
504 }
505
506 T_FileStream_close(file);
507
508 /* do not add files that are longer than maxSize */
509 if(maxSize && length>maxSize) {
510 if (verbose) {
511 printf("%s ignored (size %ld > %ld)\n", fullPath, (long)length, (long)maxSize);
512 }
513 return;
514 }
515 files[fileCount].fileSize=length;
516 } else {
517 char *t;
518
519 /* get and store the basename */
520 /* need to include the package name */
521 length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(options[6].value) + 1);
522 s=allocString(length);
523 uprv_strcpy(s, options[6].value);
524 uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
525 uprv_strcat(s, filename);
526 fixDirToTreePath(s);
527 files[fileCount].basename=s;
528
529
530 /* turn the basename into an entry point name and store in the pathname field */
531 t=files[fileCount].pathname=allocString(length);
532 while(--length>0) {
533 if(*s=='.' || *s=='-' || *s=='/') {
534 *t='_';
535 } else {
536 *t=*s;
537 }
538 ++s;
539 ++t;
540 }
541 *t=0;
542 }
543 ++fileCount;
544 }
545
546 static char *
547 allocString(uint32_t length) {
548 uint32_t top=stringTop+length;
549 char *p;
550
551 if(top>STRING_STORE_SIZE) {
552 fprintf(stderr, "gencmn: out of memory\n");
553 exit(U_MEMORY_ALLOCATION_ERROR);
554 }
555 p=stringStore+stringTop;
556 stringTop=top;
557 return p;
558 }
559
560 static char *
561 pathToFullPath(const char *path) {
562 int32_t length;
563 int32_t newLength;
564 char *fullPath;
565 int32_t n;
566
567 length = (uint32_t)(uprv_strlen(path) + 1);
568 newLength = (length + 1 + (int32_t)uprv_strlen(options[10].value));
569 fullPath = uprv_malloc(newLength);
570 if(options[10].doesOccur) {
571 uprv_strcpy(fullPath, options[10].value);
572 uprv_strcat(fullPath, U_FILE_SEP_STRING);
573 } else {
574 fullPath[0] = 0;
575 }
576 n = (int32_t)uprv_strlen(fullPath);
577 uprv_strcat(fullPath, path);
578
579 #if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
580 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR)
581 /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
582 for(;fullPath[n];n++) {
583 if(fullPath[n] == U_FILE_ALT_SEP_CHAR) {
584 fullPath[n] = U_FILE_SEP_CHAR;
585 }
586 }
587 #endif
588 #endif
589 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
590 /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
591 for(;fullPath[n];n++) {
592 if(fullPath[n] == U_TREE_ENTRY_SEP_CHAR) {
593 fullPath[n] = U_FILE_SEP_CHAR;
594 }
595 }
596 #endif
597 return fullPath;
598 }
599
600 static int
601 compareFiles(const void *file1, const void *file2) {
602 /* sort by basename */
603 return uprv_strcmp(((File *)file1)->basename, ((File *)file2)->basename);
604 }
605
606 static void
607 fixDirToTreePath(char *s)
608 {
609 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR))
610 char *t;
611 #endif
612 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
613 for(t=s;t=uprv_strchr(t,U_FILE_SEP_CHAR);) {
614 *t = U_TREE_ENTRY_SEP_CHAR;
615 }
616 #endif
617 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
618 for(t=s;t=uprv_strchr(t,U_FILE_ALT_SEP_CHAR);) {
619 *t = U_TREE_ENTRY_SEP_CHAR;
620 }
621 #endif
622 }
623 /*
624 * Hey, Emacs, please set the following:
625 *
626 * Local Variables:
627 * indent-tabs-mode: nil
628 * End:
629 *
630 */