2 *******************************************************************************
4 * Copyright (C) 1999-2004, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
10 * tab size: 8 (not used)
13 * created on: 1999nov01
14 * created by: Markus W. Scherer
16 * This program reads a list of data files and combines them
17 * into one common, memory-mappable file.
22 #include "unicode/utypes.h"
23 #include "unicode/putil.h"
28 #include "unicode/uclean.h"
32 #define STRING_STORE_SIZE 100000
33 #define MAX_FILE_COUNT 2000
35 #define COMMON_DATA_NAME U_ICUDATA_NAME
36 #define DATA_TYPE "dat"
38 /* ICU package data file format (.dat files) ------------------------------- ***
40 Description of the data format after the usual ICU data file header
45 A .dat package file contains a simple Table of Contents of item names,
46 followed by the items themselves:
50 uint32_t count; - number of items
51 UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item:
52 uint32_t nameOffset; - offset of the item name
53 uint32_t dataOffset; - offset of the item data
54 both are byte offsets from the beginning of the data
58 All item names are stored as char * strings in one block between the ToC table
63 The data items are stored following the item names block.
64 Each data item is 16-aligned.
65 The data items are stored in the sorted order of their names.
67 Therefore, the top of the name strings block is the offset of the first item,
68 the length of the last item is the difference between its offset and
69 the .dat file length, and the length of all previous items is the difference
70 between its offset and the next one.
72 ----------------------------------------------------------------------------- */
74 /* UDataInfo cf. udata.h */
75 static const UDataInfo dataInfo
={
84 {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */
85 {1, 0, 0, 0}, /* formatVersion */
86 {3, 0, 0, 0} /* dataVersion */
89 static uint32_t maxSize
;
91 static char stringStore
[STRING_STORE_SIZE
];
92 static uint32_t stringTop
=0, basenameTotal
=0;
95 char *pathname
, *basename
;
96 uint32_t basenameLength
, basenameOffset
, fileSize
, fileOffset
;
99 static File files
[MAX_FILE_COUNT
];
100 static uint32_t fileCount
=0;
101 static UBool embed
= FALSE
;
103 /* prototypes --------------------------------------------------------------- */
106 addFile(const char *filename
, UBool sourceTOC
, UBool verbose
);
109 allocString(uint32_t length
);
112 compareFiles(const void *file1
, const void *file2
);
115 pathToFullPath(const char *path
);
117 /* map non-tree separator (such as '\') to tree separator ('/') inplace. */
119 fixDirToTreePath(char *s
);
120 /* -------------------------------------------------------------------------- */
122 static UOption options
[]={
123 /*0*/ UOPTION_HELP_H
,
124 /*1*/ UOPTION_HELP_QUESTION_MARK
,
125 /*2*/ UOPTION_VERBOSE
,
126 /*3*/ UOPTION_COPYRIGHT
,
127 /*4*/ UOPTION_DESTDIR
,
128 /*5*/ UOPTION_DEF( "comment", 'C', UOPT_REQUIRES_ARG
),
129 /*6*/ UOPTION_DEF( "name", 'n', UOPT_REQUIRES_ARG
),
130 /*7*/ UOPTION_DEF( "type", 't', UOPT_REQUIRES_ARG
),
131 /*8*/ UOPTION_DEF( "source", 'S', UOPT_NO_ARG
),
132 /*9*/ UOPTION_DEF( "entrypoint", 'e', UOPT_REQUIRES_ARG
),
133 /*10*/UOPTION_SOURCEDIR
,
134 /*11*/UOPTION_DEF( "embed", 'E', UOPT_NO_ARG
)
137 static char *symPrefix
= NULL
;
140 main(int argc
, char* argv
[]) {
141 static char buffer
[4096];
143 FileStream
*in
, *file
;
145 UErrorCode errorCode
=U_ZERO_ERROR
;
146 uint32_t i
, fileOffset
, basenameOffset
, length
, nread
;
147 UBool sourceTOC
, verbose
;
148 const char *entrypointName
= NULL
;
150 U_MAIN_INIT_ARGS(argc
, argv
);
152 /* preset then read command line options */
153 options
[4].value
=u_getDataDirectory();
154 options
[6].value
=COMMON_DATA_NAME
;
155 options
[7].value
=DATA_TYPE
;
156 options
[10].value
=".";
157 argc
=u_parseArgs(argc
, argv
, sizeof(options
)/sizeof(options
[0]), options
);
159 /* error handling, printing usage message */
162 "error in command line argument \"%s\"\n",
168 if(options
[11].doesOccur
) {
172 if(argc
<0 || options
[0].doesOccur
|| options
[1].doesOccur
) {
173 FILE *where
= argc
< 0 ? stderr
: stdout
;
176 * Broken into chucks because the C89 standard says the minimum
177 * required supported string length is 509 bytes.
180 "%csage: %s [ -h, -?, --help ] [ -v, --verbose ] [ -c, --copyright ] [ -C, --comment comment ] [ -d, --destdir dir ] [ -n, --name filename ] [ -t, --type filetype ] [ -S, --source tocfile ] [ -e, --entrypoint name ] maxsize listfile\n", argc
< 0 ? 'u' : 'U', *argv
);
181 if (options
[0].doesOccur
|| options
[1].doesOccur
) {
183 "Read the list file (default: standard input) and create a common data\n"
184 "file from specified files. Omit any files larger than maxsize, if maxsize > 0.\n");
187 "\t-h, -?, --help this usage text\n"
188 "\t-v, --verbose verbose output\n"
189 "\t-c, --copyright include the ICU copyright notice\n"
190 "\t-C, --comment comment include a comment string\n"
191 "\t-d, --destdir dir destination directory\n");
193 "\t-n, --name filename output filename, without .type extension\n"
194 "\t (default: " COMMON_DATA_NAME
")\n"
195 "\t-t, --type filetype type of the destination file\n"
196 "\t (default: \"" DATA_TYPE
"\")\n"
197 "\t-S, --source tocfile write a .c source file with the table of\n"
199 "\t-e, --entrypoint name override the c entrypoint name\n"
200 "\t (default: \"<name>_<type>\")\n");
202 return argc
<0 ? U_ILLEGAL_ARGUMENT_ERROR
: U_ZERO_ERROR
;
205 sourceTOC
=options
[8].doesOccur
;
207 verbose
= options
[2].doesOccur
;
209 maxSize
=(uint32_t)uprv_strtoul(argv
[1], NULL
, 0);
212 in
=T_FileStream_stdin();
214 in
=T_FileStream_open(argv
[2], "r");
216 fprintf(stderr
, "gencmn: unable to open input file %s\n", argv
[2]);
217 exit(U_FILE_ACCESS_ERROR
);
223 printf("generating %s_%s.c (table of contents source file)\n", options
[6].value
, options
[7].value
);
225 printf("generating %s.%s (common data file with table of contents)\n", options
[6].value
, options
[7].value
);
229 /* read the list of files and get their lengths */
230 while(T_FileStream_readLine(in
, line
, sizeof(line
))!=NULL
) {
231 /* remove trailing newline characters */
234 if(*s
=='\r' || *s
=='\n') {
241 /* check for comment */
248 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
251 while((t
= uprv_strchr(line
,U_FILE_ALT_SEP_CHAR
))) {
252 *t
= U_FILE_SEP_CHAR
;
256 addFile(getLongPathname(line
), sourceTOC
, verbose
);
259 if(in
!=T_FileStream_stdin()) {
260 T_FileStream_close(in
);
264 fprintf(stderr
, "gencmn: no files listed in %s\n", argc
==2 ? "<stdin>" : argv
[2]);
268 /* sort the files by basename */
269 qsort(files
, fileCount
, sizeof(File
), compareFiles
);
274 /* determine the offsets of all basenames and files in this common one */
275 basenameOffset
=4+8*fileCount
;
276 fileOffset
=(basenameOffset
+(basenameTotal
+15))&~0xf;
277 for(i
=0; i
<fileCount
; ++i
) {
278 files
[i
].fileOffset
=fileOffset
;
279 fileOffset
+=(files
[i
].fileSize
+15)&~0xf;
280 files
[i
].basenameOffset
=basenameOffset
;
281 basenameOffset
+=files
[i
].basenameLength
;
284 /* create the output file */
285 out
=udata_create(options
[4].value
, options
[7].value
, options
[6].value
,
287 options
[3].doesOccur
? U_COPYRIGHT_STRING
: options
[5].value
,
289 if(U_FAILURE(errorCode
)) {
290 fprintf(stderr
, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n",
291 options
[4].value
, options
[6].value
, options
[7].value
,
292 u_errorName(errorCode
));
296 /* write the table of contents */
297 udata_write32(out
, fileCount
);
298 for(i
=0; i
<fileCount
; ++i
) {
299 udata_write32(out
, files
[i
].basenameOffset
);
300 udata_write32(out
, files
[i
].fileOffset
);
303 /* write the basenames */
304 for(i
=0; i
<fileCount
; ++i
) {
305 udata_writeString(out
, files
[i
].basename
, files
[i
].basenameLength
);
307 length
=4+8*fileCount
+basenameTotal
;
310 for(i
=0; i
<fileCount
; ++i
) {
311 /* pad to 16-align the next file */
314 udata_writePadding(out
, 16-length
);
318 printf("adding %s (%ld byte%s)\n", files
[i
].pathname
, (long)files
[i
].fileSize
, files
[i
].fileSize
== 1 ? "" : "s");
321 /* copy the next file */
322 file
=T_FileStream_open(files
[i
].pathname
, "rb");
324 fprintf(stderr
, "gencmn: unable to open listed file %s\n", files
[i
].pathname
);
325 exit(U_FILE_ACCESS_ERROR
);
328 length
=T_FileStream_read(file
, buffer
, sizeof(buffer
));
333 udata_writeBlock(out
, buffer
, length
);
335 T_FileStream_close(file
);
336 length
=files
[i
].fileSize
;
338 if (nread
!= files
[i
].fileSize
) {
339 fprintf(stderr
, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files
[i
].pathname
, (long)nread
, (long)files
[i
].fileSize
, files
[i
].fileSize
== 1 ? "" : "s");
340 exit(U_FILE_ACCESS_ERROR
);
344 /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */
347 udata_writePadding(out
, 16-length
);
351 udata_finish(out
, &errorCode
);
352 if(U_FAILURE(errorCode
)) {
353 fprintf(stderr
, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode
));
357 /* write a .c source file with the table of contents */
361 /* create the output filename */
363 uprv_strcpy(filename
, options
[4].value
);
364 s
=filename
+uprv_strlen(filename
);
365 if(s
>filename
&& *(s
-1)!=U_FILE_SEP_CHAR
) {
366 *s
++=U_FILE_SEP_CHAR
;
368 uprv_strcpy(s
, options
[6].value
);
369 if(*(options
[7].value
)!=0) {
372 uprv_strcpy(s
, options
[7].value
);
375 uprv_strcpy(s
, ".c");
377 /* open the output file */
378 out
=T_FileStream_open(filename
, "w");
380 fprintf(stderr
, "gencmn: unable to open .c output file %s\n", filename
);
381 exit(U_FILE_ACCESS_ERROR
);
384 /* If an entrypoint is specified, use it. */
385 if(options
[9].doesOccur
) {
386 entrypointName
= options
[9].value
;
388 entrypointName
= options
[6].value
;
394 symPrefix
= (char *) uprv_malloc(uprv_strlen(entrypointName
) + 2);
397 if (symPrefix
== NULL
) {
398 sprintf(buffer
, "U_MEMORY_ALLOCATION_ERROR");
399 exit(U_MEMORY_ALLOCATION_ERROR
);
402 uprv_strcpy(symPrefix
, entrypointName
);
403 uprv_strcat(symPrefix
, "_");
407 /* write the source file */
410 " * ICU common data table of contents for %s.%s ,\n"
411 " * Automatically generated by icu/source/tools/gencmn/gencmn .\n"
413 "#include \"unicode/utypes.h\"\n"
414 "#include \"unicode/udata.h\"\n"
416 "/* external symbol declarations for data */\n",
417 options
[6].value
, options
[7].value
);
418 T_FileStream_writeLine(out
, buffer
);
420 sprintf(buffer
, "extern const char\n %s%s[]", symPrefix
?symPrefix
:"", files
[0].pathname
);
421 T_FileStream_writeLine(out
, buffer
);
422 for(i
=1; i
<fileCount
; ++i
) {
423 sprintf(buffer
, ",\n %s%s[]", symPrefix
?symPrefix
:"", files
[i
].pathname
);
424 T_FileStream_writeLine(out
, buffer
);
426 T_FileStream_writeLine(out
, ";\n\n");
430 "U_EXPORT struct {\n"
431 " uint16_t headerSize;\n"
432 " uint8_t magic1, magic2;\n"
434 " char padding[%lu];\n"
435 " uint32_t count, reserved;\n"
437 " const char *name;\n"
438 " const void *data;\n"
440 "} U_EXPORT2 %s_dat = {\n"
441 " 32, 0xda, 0x27, {\n"
444 " {0x54, 0x6f, 0x43, 0x50},\n"
448 " \"\", %lu, 0, {\n",
449 (unsigned long)32-4-sizeof(UDataInfo
),
450 (unsigned long)fileCount
,
452 (unsigned long)sizeof(UDataInfo
),
456 (unsigned long)fileCount
458 T_FileStream_writeLine(out
, buffer
);
460 sprintf(buffer
, " { \"%s\", %s%s }", files
[0].basename
, symPrefix
?symPrefix
:"", files
[0].pathname
);
461 T_FileStream_writeLine(out
, buffer
);
462 for(i
=1; i
<fileCount
; ++i
) {
463 sprintf(buffer
, ",\n { \"%s\", %s%s }", files
[i
].basename
, symPrefix
?symPrefix
:"", files
[i
].pathname
);
464 T_FileStream_writeLine(out
, buffer
);
467 T_FileStream_writeLine(out
, "\n }\n};\n");
468 T_FileStream_close(out
);
470 uprv_free(symPrefix
);
477 addFile(const char *filename
, UBool sourceTOC
, UBool verbose
) {
480 char *fullPath
= NULL
;
482 if(fileCount
==MAX_FILE_COUNT
) {
483 fprintf(stderr
, "gencmn: too many files, maximum is %d\n", MAX_FILE_COUNT
);
484 exit(U_BUFFER_OVERFLOW_ERROR
);
490 fullPath
= pathToFullPath(filename
);
492 /* store the pathname */
494 length
= (uint32_t)(uprv_strlen(filename
) + 1 + uprv_strlen(options
[6].value
) + 1);
495 s
=allocString(length
);
496 uprv_strcpy(s
, options
[6].value
);
497 uprv_strcat(s
, U_TREE_ENTRY_SEP_STRING
);
498 uprv_strcat(s
, filename
);
500 /* compatibility mode */
502 base
= findBasename(filename
);
503 length
= (uint32_t)(uprv_strlen(base
) + 1);
504 s
=allocString(length
);
505 uprv_memcpy(s
, base
, length
);
508 /* get the basename */
510 files
[fileCount
].basename
=s
;
511 files
[fileCount
].basenameLength
=length
;
513 files
[fileCount
].pathname
=fullPath
;
515 basenameTotal
+=length
;
517 /* try to open the file */
518 file
=T_FileStream_open(fullPath
, "rb");
520 fprintf(stderr
, "gencmn: unable to open listed file %s\n", fullPath
);
521 exit(U_FILE_ACCESS_ERROR
);
524 /* get the file length */
525 length
=T_FileStream_size(file
);
526 if(T_FileStream_error(file
) || length
<=20) {
527 fprintf(stderr
, "gencmn: unable to get length of listed file %s\n", fullPath
);
528 exit(U_FILE_ACCESS_ERROR
);
531 T_FileStream_close(file
);
533 /* do not add files that are longer than maxSize */
534 if(maxSize
&& length
>maxSize
) {
536 printf("%s ignored (size %ld > %ld)\n", fullPath
, (long)length
, (long)maxSize
);
540 files
[fileCount
].fileSize
=length
;
545 filename
= findBasename(filename
);
547 /* get and store the basename */
549 /* need to include the package name */
550 length
= (uint32_t)(uprv_strlen(filename
) + 1 + uprv_strlen(options
[6].value
) + 1);
551 s
=allocString(length
);
552 uprv_strcpy(s
, options
[6].value
);
553 uprv_strcat(s
, U_TREE_ENTRY_SEP_STRING
);
554 uprv_strcat(s
, filename
);
556 length
= (uint32_t)(uprv_strlen(filename
) + 1);
557 s
=allocString(length
);
558 uprv_memcpy(s
, filename
, length
);
561 files
[fileCount
].basename
=s
;
564 /* turn the basename into an entry point name and store in the pathname field */
565 t
=files
[fileCount
].pathname
=allocString(length
);
567 if(*s
=='.' || *s
=='-' || *s
=='/') {
581 allocString(uint32_t length
) {
582 uint32_t top
=stringTop
+length
;
585 if(top
>STRING_STORE_SIZE
) {
586 fprintf(stderr
, "gencmn: out of memory\n");
587 exit(U_MEMORY_ALLOCATION_ERROR
);
589 p
=stringStore
+stringTop
;
595 pathToFullPath(const char *path
) {
601 length
= (uint32_t)(uprv_strlen(path
) + 1);
602 newLength
= (length
+ 1 + (int32_t)uprv_strlen(options
[10].value
));
603 fullPath
= uprv_malloc(newLength
);
604 if(options
[10].doesOccur
) {
605 uprv_strcpy(fullPath
, options
[10].value
);
606 uprv_strcat(fullPath
, U_FILE_SEP_STRING
);
610 n
= (int32_t)uprv_strlen(fullPath
);
611 uprv_strcat(fullPath
, path
);
614 #if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
615 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR)
616 /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
617 for(;fullPath
[n
];n
++) {
618 if(fullPath
[n
] == U_FILE_ALT_SEP_CHAR
) {
619 fullPath
[n
] = U_FILE_SEP_CHAR
;
624 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
625 /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
626 for(;fullPath
[n
];n
++) {
627 if(fullPath
[n
] == U_TREE_ENTRY_SEP_CHAR
) {
628 fullPath
[n
] = U_FILE_SEP_CHAR
;
637 compareFiles(const void *file1
, const void *file2
) {
638 /* sort by basename */
639 return uprv_strcmp(((File
*)file1
)->basename
, ((File
*)file2
)->basename
);
643 fixDirToTreePath(char *s
)
645 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR))
648 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
649 for(t
=s
;t
=uprv_strchr(t
,U_FILE_SEP_CHAR
);) {
650 *t
= U_TREE_ENTRY_SEP_CHAR
;
653 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
654 for(t
=s
;t
=uprv_strchr(t
,U_FILE_ALT_SEP_CHAR
);) {
655 *t
= U_TREE_ENTRY_SEP_CHAR
;
660 * Hey, Emacs, please set the following:
663 * indent-tabs-mode: nil