]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/gencmn/gencmn.c
ICU-400.42.tar.gz
[apple/icu.git] / icuSources / tools / gencmn / gencmn.c
CommitLineData
b75a7d8f
A
1/*
2*******************************************************************************
3*
73c04bcf 4* Copyright (C) 1999-2006, International Business Machines
b75a7d8f
A
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8* file name: gencmn.c
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 1999nov01
14* created by: Markus W. Scherer
15*
16* This program reads a list of data files and combines them
17* into one common, memory-mappable file.
18*/
19
20#include <stdio.h>
21#include <stdlib.h>
22#include "unicode/utypes.h"
23#include "unicode/putil.h"
24#include "cmemory.h"
25#include "cstring.h"
26#include "filestrm.h"
27#include "toolutil.h"
374ca955 28#include "unicode/uclean.h"
b75a7d8f
A
29#include "unewdata.h"
30#include "uoptions.h"
73c04bcf 31#include "putilimp.h"
b75a7d8f
A
32
33#define STRING_STORE_SIZE 100000
34#define MAX_FILE_COUNT 2000
35
36#define COMMON_DATA_NAME U_ICUDATA_NAME
37#define DATA_TYPE "dat"
38
374ca955
A
39/* ICU package data file format (.dat files) ------------------------------- ***
40
41Description of the data format after the usual ICU data file header
42(UDataInfo etc.).
43
44Format version 1
45
46A .dat package file contains a simple Table of Contents of item names,
47followed by the items themselves:
48
491. ToC table
50
51uint32_t count; - number of items
52UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item:
53 uint32_t nameOffset; - offset of the item name
54 uint32_t dataOffset; - offset of the item data
55both are byte offsets from the beginning of the data
56
572. item name strings
58
59All item names are stored as char * strings in one block between the ToC table
60and the data items.
61
623. data items
63
64The data items are stored following the item names block.
65Each data item is 16-aligned.
66The data items are stored in the sorted order of their names.
67
68Therefore, the top of the name strings block is the offset of the first item,
69the length of the last item is the difference between its offset and
70the .dat file length, and the length of all previous items is the difference
71between its offset and the next one.
72
73----------------------------------------------------------------------------- */
74
b75a7d8f
A
75/* UDataInfo cf. udata.h */
76static const UDataInfo dataInfo={
77 sizeof(UDataInfo),
78 0,
79
80 U_IS_BIG_ENDIAN,
81 U_CHARSET_FAMILY,
82 sizeof(UChar),
83 0,
84
85 {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */
86 {1, 0, 0, 0}, /* formatVersion */
87 {3, 0, 0, 0} /* dataVersion */
88};
89
90static uint32_t maxSize;
91
92static char stringStore[STRING_STORE_SIZE];
93static uint32_t stringTop=0, basenameTotal=0;
94
95typedef struct {
96 char *pathname, *basename;
97 uint32_t basenameLength, basenameOffset, fileSize, fileOffset;
98} File;
99
100static File files[MAX_FILE_COUNT];
101static uint32_t fileCount=0;
102
103/* prototypes --------------------------------------------------------------- */
104
105static void
106addFile(const char *filename, UBool sourceTOC, UBool verbose);
107
108static char *
109allocString(uint32_t length);
110
111static int
112compareFiles(const void *file1, const void *file2);
113
374ca955
A
114static char *
115pathToFullPath(const char *path);
116
117/* map non-tree separator (such as '\') to tree separator ('/') inplace. */
118static void
119fixDirToTreePath(char *s);
b75a7d8f
A
120/* -------------------------------------------------------------------------- */
121
122static UOption options[]={
123/*0*/ UOPTION_HELP_H,
124/*1*/ UOPTION_HELP_QUESTION_MARK,
125/*2*/ UOPTION_VERBOSE,
126/*3*/ UOPTION_COPYRIGHT,
127/*4*/ UOPTION_DESTDIR,
128/*5*/ UOPTION_DEF( "comment", 'C', UOPT_REQUIRES_ARG),
129/*6*/ UOPTION_DEF( "name", 'n', UOPT_REQUIRES_ARG),
130/*7*/ UOPTION_DEF( "type", 't', UOPT_REQUIRES_ARG),
131/*8*/ UOPTION_DEF( "source", 'S', UOPT_NO_ARG),
374ca955
A
132/*9*/ UOPTION_DEF( "entrypoint", 'e', UOPT_REQUIRES_ARG),
133/*10*/UOPTION_SOURCEDIR,
b75a7d8f
A
134};
135
136static char *symPrefix = NULL;
137
138extern int
139main(int argc, char* argv[]) {
140 static char buffer[4096];
141 char line[512];
142 FileStream *in, *file;
143 char *s;
144 UErrorCode errorCode=U_ZERO_ERROR;
145 uint32_t i, fileOffset, basenameOffset, length, nread;
146 UBool sourceTOC, verbose;
147 const char *entrypointName = NULL;
148
149 U_MAIN_INIT_ARGS(argc, argv);
150
151 /* preset then read command line options */
152 options[4].value=u_getDataDirectory();
153 options[6].value=COMMON_DATA_NAME;
154 options[7].value=DATA_TYPE;
374ca955 155 options[10].value=".";
b75a7d8f
A
156 argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
157
158 /* error handling, printing usage message */
159 if(argc<0) {
160 fprintf(stderr,
161 "error in command line argument \"%s\"\n",
162 argv[-argc]);
163 } else if(argc<2) {
164 argc=-1;
165 }
374ca955 166
b75a7d8f
A
167 if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
168 FILE *where = argc < 0 ? stderr : stdout;
169
170 /*
171 * Broken into chucks because the C89 standard says the minimum
172 * required supported string length is 509 bytes.
173 */
174 fprintf(where,
374ca955 175 "%csage: %s [ -h, -?, --help ] [ -v, --verbose ] [ -c, --copyright ] [ -C, --comment comment ] [ -d, --destdir dir ] [ -n, --name filename ] [ -t, --type filetype ] [ -S, --source tocfile ] [ -e, --entrypoint name ] maxsize listfile\n", argc < 0 ? 'u' : 'U', *argv);
b75a7d8f
A
176 if (options[0].doesOccur || options[1].doesOccur) {
177 fprintf(where, "\n"
374ca955
A
178 "Read the list file (default: standard input) and create a common data\n"
179 "file from specified files. Omit any files larger than maxsize, if maxsize > 0.\n");
b75a7d8f
A
180 fprintf(where, "\n"
181 "Options:\n"
182 "\t-h, -?, --help this usage text\n"
183 "\t-v, --verbose verbose output\n"
184 "\t-c, --copyright include the ICU copyright notice\n"
185 "\t-C, --comment comment include a comment string\n"
186 "\t-d, --destdir dir destination directory\n");
187 fprintf(where,
188 "\t-n, --name filename output filename, without .type extension\n"
189 "\t (default: " COMMON_DATA_NAME ")\n"
190 "\t-t, --type filetype type of the destination file\n"
191 "\t (default: \"" DATA_TYPE "\")\n"
192 "\t-S, --source tocfile write a .c source file with the table of\n"
193 "\t contents\n"
194 "\t-e, --entrypoint name override the c entrypoint name\n"
195 "\t (default: \"<name>_<type>\")\n");
196 }
197 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
198 }
199
200 sourceTOC=options[8].doesOccur;
201
202 verbose = options[2].doesOccur;
203
204 maxSize=(uint32_t)uprv_strtoul(argv[1], NULL, 0);
205
206 if(argc==2) {
207 in=T_FileStream_stdin();
208 } else {
209 in=T_FileStream_open(argv[2], "r");
210 if(in==NULL) {
211 fprintf(stderr, "gencmn: unable to open input file %s\n", argv[2]);
212 exit(U_FILE_ACCESS_ERROR);
213 }
214 }
215
216 if (verbose) {
217 if(sourceTOC) {
218 printf("generating %s_%s.c (table of contents source file)\n", options[6].value, options[7].value);
219 } else {
220 printf("generating %s.%s (common data file with table of contents)\n", options[6].value, options[7].value);
221 }
222 }
223
224 /* read the list of files and get their lengths */
225 while(T_FileStream_readLine(in, line, sizeof(line))!=NULL) {
226 /* remove trailing newline characters */
227 s=line;
228 while(*s!=0) {
229 if(*s=='\r' || *s=='\n') {
230 *s=0;
231 break;
232 }
233 ++s;
234 }
235
236 /* check for comment */
237
238 if (*line == '#') {
239 continue;
240 }
241
242 /* add the file */
374ca955
A
243#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
244 {
245 char *t;
246 while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) {
247 *t = U_FILE_SEP_CHAR;
248 }
249 }
250#endif
b75a7d8f
A
251 addFile(getLongPathname(line), sourceTOC, verbose);
252 }
253
254 if(in!=T_FileStream_stdin()) {
255 T_FileStream_close(in);
256 }
257
258 if(fileCount==0) {
259 fprintf(stderr, "gencmn: no files listed in %s\n", argc==2 ? "<stdin>" : argv[2]);
260 return 0;
261 }
262
263 /* sort the files by basename */
264 qsort(files, fileCount, sizeof(File), compareFiles);
265
266 if(!sourceTOC) {
267 UNewDataMemory *out;
268
269 /* determine the offsets of all basenames and files in this common one */
270 basenameOffset=4+8*fileCount;
271 fileOffset=(basenameOffset+(basenameTotal+15))&~0xf;
272 for(i=0; i<fileCount; ++i) {
273 files[i].fileOffset=fileOffset;
274 fileOffset+=(files[i].fileSize+15)&~0xf;
275 files[i].basenameOffset=basenameOffset;
276 basenameOffset+=files[i].basenameLength;
277 }
278
279 /* create the output file */
280 out=udata_create(options[4].value, options[7].value, options[6].value,
281 &dataInfo,
282 options[3].doesOccur ? U_COPYRIGHT_STRING : options[5].value,
283 &errorCode);
284 if(U_FAILURE(errorCode)) {
285 fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n",
286 options[4].value, options[6].value, options[7].value,
287 u_errorName(errorCode));
288 exit(errorCode);
289 }
290
291 /* write the table of contents */
292 udata_write32(out, fileCount);
293 for(i=0; i<fileCount; ++i) {
294 udata_write32(out, files[i].basenameOffset);
295 udata_write32(out, files[i].fileOffset);
296 }
297
298 /* write the basenames */
299 for(i=0; i<fileCount; ++i) {
300 udata_writeString(out, files[i].basename, files[i].basenameLength);
301 }
302 length=4+8*fileCount+basenameTotal;
303
304 /* copy the files */
305 for(i=0; i<fileCount; ++i) {
306 /* pad to 16-align the next file */
307 length&=0xf;
308 if(length!=0) {
309 udata_writePadding(out, 16-length);
310 }
311
312 if (verbose) {
313 printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
314 }
315
316 /* copy the next file */
317 file=T_FileStream_open(files[i].pathname, "rb");
318 if(file==NULL) {
319 fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname);
320 exit(U_FILE_ACCESS_ERROR);
321 }
322 for(nread = 0;;) {
323 length=T_FileStream_read(file, buffer, sizeof(buffer));
324 if(length <= 0) {
325 break;
326 }
327 nread += length;
328 udata_writeBlock(out, buffer, length);
329 }
330 T_FileStream_close(file);
331 length=files[i].fileSize;
332
333 if (nread != files[i].fileSize) {
374ca955 334 fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname, (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
b75a7d8f
A
335 exit(U_FILE_ACCESS_ERROR);
336 }
337 }
338
374ca955
A
339 /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */
340 length&=0xf;
341 if(length!=0) {
342 udata_writePadding(out, 16-length);
343 }
344
b75a7d8f
A
345 /* finish */
346 udata_finish(out, &errorCode);
347 if(U_FAILURE(errorCode)) {
348 fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode));
349 exit(errorCode);
350 }
351 } else {
352 /* write a .c source file with the table of contents */
353 char *filename;
354 FileStream *out;
355
356 /* create the output filename */
357 filename=s=buffer;
358 uprv_strcpy(filename, options[4].value);
359 s=filename+uprv_strlen(filename);
360 if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) {
361 *s++=U_FILE_SEP_CHAR;
362 }
363 uprv_strcpy(s, options[6].value);
364 if(*(options[7].value)!=0) {
365 s+=uprv_strlen(s);
366 *s++='_';
367 uprv_strcpy(s, options[7].value);
368 }
369 s+=uprv_strlen(s);
370 uprv_strcpy(s, ".c");
371
372 /* open the output file */
373 out=T_FileStream_open(filename, "w");
374 if(out==NULL) {
375 fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename);
376 exit(U_FILE_ACCESS_ERROR);
377 }
378
379 /* If an entrypoint is specified, use it. */
380 if(options[9].doesOccur) {
381 entrypointName = options[9].value;
382 } else {
383 entrypointName = options[6].value;
384 }
385
386
b75a7d8f
A
387 /* write the source file */
388 sprintf(buffer,
389 "/*\n"
390 " * ICU common data table of contents for %s.%s ,\n"
391 " * Automatically generated by icu/source/tools/gencmn/gencmn .\n"
392 " */\n\n"
393 "#include \"unicode/utypes.h\"\n"
394 "#include \"unicode/udata.h\"\n"
395 "\n"
396 "/* external symbol declarations for data */\n",
397 options[6].value, options[7].value);
398 T_FileStream_writeLine(out, buffer);
399
400 sprintf(buffer, "extern const char\n %s%s[]", symPrefix?symPrefix:"", files[0].pathname);
401 T_FileStream_writeLine(out, buffer);
402 for(i=1; i<fileCount; ++i) {
403 sprintf(buffer, ",\n %s%s[]", symPrefix?symPrefix:"", files[i].pathname);
404 T_FileStream_writeLine(out, buffer);
405 }
406 T_FileStream_writeLine(out, ";\n\n");
407
408 sprintf(
409 buffer,
374ca955 410 "U_EXPORT struct {\n"
b75a7d8f
A
411 " uint16_t headerSize;\n"
412 " uint8_t magic1, magic2;\n"
413 " UDataInfo info;\n"
414 " char padding[%lu];\n"
415 " uint32_t count, reserved;\n"
416 " struct {\n"
417 " const char *name;\n"
418 " const void *data;\n"
419 " } toc[%lu];\n"
420 "} U_EXPORT2 %s_dat = {\n"
421 " 32, 0xda, 0x27, {\n"
422 " %lu, 0,\n"
423 " %u, %u, %u, 0,\n"
424 " {0x54, 0x6f, 0x43, 0x50},\n"
425 " {1, 0, 0, 0},\n"
426 " {0, 0, 0, 0}\n"
427 " },\n"
428 " \"\", %lu, 0, {\n",
429 (unsigned long)32-4-sizeof(UDataInfo),
430 (unsigned long)fileCount,
431 entrypointName,
432 (unsigned long)sizeof(UDataInfo),
433 U_IS_BIG_ENDIAN,
434 U_CHARSET_FAMILY,
435 U_SIZEOF_UCHAR,
436 (unsigned long)fileCount
437 );
438 T_FileStream_writeLine(out, buffer);
439
440 sprintf(buffer, " { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname);
441 T_FileStream_writeLine(out, buffer);
442 for(i=1; i<fileCount; ++i) {
443 sprintf(buffer, ",\n { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname);
444 T_FileStream_writeLine(out, buffer);
445 }
446
447 T_FileStream_writeLine(out, "\n }\n};\n");
448 T_FileStream_close(out);
449
450 uprv_free(symPrefix);
451 }
452
453 return 0;
454}
455
456static void
457addFile(const char *filename, UBool sourceTOC, UBool verbose) {
458 char *s;
459 uint32_t length;
374ca955 460 char *fullPath = NULL;
b75a7d8f
A
461
462 if(fileCount==MAX_FILE_COUNT) {
463 fprintf(stderr, "gencmn: too many files, maximum is %d\n", MAX_FILE_COUNT);
464 exit(U_BUFFER_OVERFLOW_ERROR);
465 }
466
467 if(!sourceTOC) {
468 FileStream *file;
469
73c04bcf
A
470 if(uprv_pathIsAbsolute(filename)) {
471 fprintf(stderr, "gencmn: Error: absolute path encountered. Old style paths are not supported. Use relative paths such as 'fur.res' or 'translit%cfur.res'.\n\tBad path: '%s'\n", U_FILE_SEP_CHAR, filename);
472 exit(U_ILLEGAL_ARGUMENT_ERROR);
473 }
374ca955
A
474 fullPath = pathToFullPath(filename);
475
b75a7d8f 476 /* store the pathname */
73c04bcf
A
477 length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(options[6].value) + 1);
478 s=allocString(length);
479 uprv_strcpy(s, options[6].value);
480 uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
481 uprv_strcat(s, filename);
b75a7d8f
A
482
483 /* get the basename */
374ca955 484 fixDirToTreePath(s);
b75a7d8f 485 files[fileCount].basename=s;
b75a7d8f 486 files[fileCount].basenameLength=length;
374ca955
A
487
488 files[fileCount].pathname=fullPath;
489
b75a7d8f
A
490 basenameTotal+=length;
491
492 /* try to open the file */
374ca955 493 file=T_FileStream_open(fullPath, "rb");
b75a7d8f 494 if(file==NULL) {
374ca955 495 fprintf(stderr, "gencmn: unable to open listed file %s\n", fullPath);
b75a7d8f
A
496 exit(U_FILE_ACCESS_ERROR);
497 }
498
499 /* get the file length */
500 length=T_FileStream_size(file);
501 if(T_FileStream_error(file) || length<=20) {
374ca955 502 fprintf(stderr, "gencmn: unable to get length of listed file %s\n", fullPath);
b75a7d8f
A
503 exit(U_FILE_ACCESS_ERROR);
504 }
374ca955 505
b75a7d8f
A
506 T_FileStream_close(file);
507
508 /* do not add files that are longer than maxSize */
509 if(maxSize && length>maxSize) {
510 if (verbose) {
374ca955 511 printf("%s ignored (size %ld > %ld)\n", fullPath, (long)length, (long)maxSize);
b75a7d8f
A
512 }
513 return;
514 }
515 files[fileCount].fileSize=length;
516 } else {
517 char *t;
518
519 /* get and store the basename */
73c04bcf
A
520 /* need to include the package name */
521 length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(options[6].value) + 1);
522 s=allocString(length);
523 uprv_strcpy(s, options[6].value);
524 uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
525 uprv_strcat(s, filename);
374ca955 526 fixDirToTreePath(s);
b75a7d8f
A
527 files[fileCount].basename=s;
528
374ca955 529
b75a7d8f
A
530 /* turn the basename into an entry point name and store in the pathname field */
531 t=files[fileCount].pathname=allocString(length);
532 while(--length>0) {
374ca955 533 if(*s=='.' || *s=='-' || *s=='/') {
b75a7d8f
A
534 *t='_';
535 } else {
536 *t=*s;
537 }
538 ++s;
539 ++t;
540 }
541 *t=0;
542 }
b75a7d8f
A
543 ++fileCount;
544}
545
546static char *
547allocString(uint32_t length) {
548 uint32_t top=stringTop+length;
549 char *p;
550
551 if(top>STRING_STORE_SIZE) {
552 fprintf(stderr, "gencmn: out of memory\n");
553 exit(U_MEMORY_ALLOCATION_ERROR);
554 }
555 p=stringStore+stringTop;
556 stringTop=top;
557 return p;
558}
559
374ca955
A
560static char *
561pathToFullPath(const char *path) {
562 int32_t length;
563 int32_t newLength;
564 char *fullPath;
565 int32_t n;
566
567 length = (uint32_t)(uprv_strlen(path) + 1);
568 newLength = (length + 1 + (int32_t)uprv_strlen(options[10].value));
569 fullPath = uprv_malloc(newLength);
570 if(options[10].doesOccur) {
571 uprv_strcpy(fullPath, options[10].value);
572 uprv_strcat(fullPath, U_FILE_SEP_STRING);
573 } else {
574 fullPath[0] = 0;
575 }
576 n = (int32_t)uprv_strlen(fullPath);
577 uprv_strcat(fullPath, path);
578
374ca955
A
579#if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
580#if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR)
73c04bcf
A
581 /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
582 for(;fullPath[n];n++) {
583 if(fullPath[n] == U_FILE_ALT_SEP_CHAR) {
584 fullPath[n] = U_FILE_SEP_CHAR;
374ca955 585 }
73c04bcf 586 }
374ca955
A
587#endif
588#endif
589#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
73c04bcf
A
590 /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
591 for(;fullPath[n];n++) {
592 if(fullPath[n] == U_TREE_ENTRY_SEP_CHAR) {
593 fullPath[n] = U_FILE_SEP_CHAR;
374ca955 594 }
374ca955 595 }
73c04bcf 596#endif
374ca955
A
597 return fullPath;
598}
599
b75a7d8f
A
600static int
601compareFiles(const void *file1, const void *file2) {
602 /* sort by basename */
603 return uprv_strcmp(((File *)file1)->basename, ((File *)file2)->basename);
604}
605
374ca955
A
606static void
607fixDirToTreePath(char *s)
608{
609#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR))
610 char *t;
611#endif
612#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
613 for(t=s;t=uprv_strchr(t,U_FILE_SEP_CHAR);) {
614 *t = U_TREE_ENTRY_SEP_CHAR;
615 }
616#endif
617#if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
618 for(t=s;t=uprv_strchr(t,U_FILE_ALT_SEP_CHAR);) {
619 *t = U_TREE_ENTRY_SEP_CHAR;
620 }
621#endif
622}
b75a7d8f
A
623/*
624 * Hey, Emacs, please set the following:
625 *
626 * Local Variables:
627 * indent-tabs-mode: nil
628 * End:
629 *
630 */