2 *******************************************************************************
4 * Copyright (C) 1999-2006, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
10 * tab size: 8 (not used)
13 * created on: 1999nov01
14 * created by: Markus W. Scherer
16 * This program reads a list of data files and combines them
17 * into one common, memory-mappable file.
22 #include "unicode/utypes.h"
23 #include "unicode/putil.h"
28 #include "unicode/uclean.h"
33 #define STRING_STORE_SIZE 100000
34 #define MAX_FILE_COUNT 2000
36 #define COMMON_DATA_NAME U_ICUDATA_NAME
37 #define DATA_TYPE "dat"
39 /* ICU package data file format (.dat files) ------------------------------- ***
41 Description of the data format after the usual ICU data file header
46 A .dat package file contains a simple Table of Contents of item names,
47 followed by the items themselves:
51 uint32_t count; - number of items
52 UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item:
53 uint32_t nameOffset; - offset of the item name
54 uint32_t dataOffset; - offset of the item data
55 both are byte offsets from the beginning of the data
59 All item names are stored as char * strings in one block between the ToC table
64 The data items are stored following the item names block.
65 Each data item is 16-aligned.
66 The data items are stored in the sorted order of their names.
68 Therefore, the top of the name strings block is the offset of the first item,
69 the length of the last item is the difference between its offset and
70 the .dat file length, and the length of all previous items is the difference
71 between its offset and the next one.
73 ----------------------------------------------------------------------------- */
75 /* UDataInfo cf. udata.h */
76 static const UDataInfo dataInfo
={
85 {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */
86 {1, 0, 0, 0}, /* formatVersion */
87 {3, 0, 0, 0} /* dataVersion */
90 static uint32_t maxSize
;
92 static char stringStore
[STRING_STORE_SIZE
];
93 static uint32_t stringTop
=0, basenameTotal
=0;
96 char *pathname
, *basename
;
97 uint32_t basenameLength
, basenameOffset
, fileSize
, fileOffset
;
100 static File files
[MAX_FILE_COUNT
];
101 static uint32_t fileCount
=0;
103 /* prototypes --------------------------------------------------------------- */
106 addFile(const char *filename
, UBool sourceTOC
, UBool verbose
);
109 allocString(uint32_t length
);
112 compareFiles(const void *file1
, const void *file2
);
115 pathToFullPath(const char *path
);
117 /* map non-tree separator (such as '\') to tree separator ('/') inplace. */
119 fixDirToTreePath(char *s
);
120 /* -------------------------------------------------------------------------- */
122 static UOption options
[]={
123 /*0*/ UOPTION_HELP_H
,
124 /*1*/ UOPTION_HELP_QUESTION_MARK
,
125 /*2*/ UOPTION_VERBOSE
,
126 /*3*/ UOPTION_COPYRIGHT
,
127 /*4*/ UOPTION_DESTDIR
,
128 /*5*/ UOPTION_DEF( "comment", 'C', UOPT_REQUIRES_ARG
),
129 /*6*/ UOPTION_DEF( "name", 'n', UOPT_REQUIRES_ARG
),
130 /*7*/ UOPTION_DEF( "type", 't', UOPT_REQUIRES_ARG
),
131 /*8*/ UOPTION_DEF( "source", 'S', UOPT_NO_ARG
),
132 /*9*/ UOPTION_DEF( "entrypoint", 'e', UOPT_REQUIRES_ARG
),
133 /*10*/UOPTION_SOURCEDIR
,
136 static char *symPrefix
= NULL
;
139 main(int argc
, char* argv
[]) {
140 static char buffer
[4096];
142 FileStream
*in
, *file
;
144 UErrorCode errorCode
=U_ZERO_ERROR
;
145 uint32_t i
, fileOffset
, basenameOffset
, length
, nread
;
146 UBool sourceTOC
, verbose
;
147 const char *entrypointName
= NULL
;
149 U_MAIN_INIT_ARGS(argc
, argv
);
151 /* preset then read command line options */
152 options
[4].value
=u_getDataDirectory();
153 options
[6].value
=COMMON_DATA_NAME
;
154 options
[7].value
=DATA_TYPE
;
155 options
[10].value
=".";
156 argc
=u_parseArgs(argc
, argv
, sizeof(options
)/sizeof(options
[0]), options
);
158 /* error handling, printing usage message */
161 "error in command line argument \"%s\"\n",
167 if(argc
<0 || options
[0].doesOccur
|| options
[1].doesOccur
) {
168 FILE *where
= argc
< 0 ? stderr
: stdout
;
171 * Broken into chucks because the C89 standard says the minimum
172 * required supported string length is 509 bytes.
175 "%csage: %s [ -h, -?, --help ] [ -v, --verbose ] [ -c, --copyright ] [ -C, --comment comment ] [ -d, --destdir dir ] [ -n, --name filename ] [ -t, --type filetype ] [ -S, --source tocfile ] [ -e, --entrypoint name ] maxsize listfile\n", argc
< 0 ? 'u' : 'U', *argv
);
176 if (options
[0].doesOccur
|| options
[1].doesOccur
) {
178 "Read the list file (default: standard input) and create a common data\n"
179 "file from specified files. Omit any files larger than maxsize, if maxsize > 0.\n");
182 "\t-h, -?, --help this usage text\n"
183 "\t-v, --verbose verbose output\n"
184 "\t-c, --copyright include the ICU copyright notice\n"
185 "\t-C, --comment comment include a comment string\n"
186 "\t-d, --destdir dir destination directory\n");
188 "\t-n, --name filename output filename, without .type extension\n"
189 "\t (default: " COMMON_DATA_NAME
")\n"
190 "\t-t, --type filetype type of the destination file\n"
191 "\t (default: \"" DATA_TYPE
"\")\n"
192 "\t-S, --source tocfile write a .c source file with the table of\n"
194 "\t-e, --entrypoint name override the c entrypoint name\n"
195 "\t (default: \"<name>_<type>\")\n");
197 return argc
<0 ? U_ILLEGAL_ARGUMENT_ERROR
: U_ZERO_ERROR
;
200 sourceTOC
=options
[8].doesOccur
;
202 verbose
= options
[2].doesOccur
;
204 maxSize
=(uint32_t)uprv_strtoul(argv
[1], NULL
, 0);
207 in
=T_FileStream_stdin();
209 in
=T_FileStream_open(argv
[2], "r");
211 fprintf(stderr
, "gencmn: unable to open input file %s\n", argv
[2]);
212 exit(U_FILE_ACCESS_ERROR
);
218 printf("generating %s_%s.c (table of contents source file)\n", options
[6].value
, options
[7].value
);
220 printf("generating %s.%s (common data file with table of contents)\n", options
[6].value
, options
[7].value
);
224 /* read the list of files and get their lengths */
225 while(T_FileStream_readLine(in
, line
, sizeof(line
))!=NULL
) {
226 /* remove trailing newline characters */
229 if(*s
=='\r' || *s
=='\n') {
236 /* check for comment */
243 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
246 while((t
= uprv_strchr(line
,U_FILE_ALT_SEP_CHAR
))) {
247 *t
= U_FILE_SEP_CHAR
;
251 addFile(getLongPathname(line
), sourceTOC
, verbose
);
254 if(in
!=T_FileStream_stdin()) {
255 T_FileStream_close(in
);
259 fprintf(stderr
, "gencmn: no files listed in %s\n", argc
==2 ? "<stdin>" : argv
[2]);
263 /* sort the files by basename */
264 qsort(files
, fileCount
, sizeof(File
), compareFiles
);
269 /* determine the offsets of all basenames and files in this common one */
270 basenameOffset
=4+8*fileCount
;
271 fileOffset
=(basenameOffset
+(basenameTotal
+15))&~0xf;
272 for(i
=0; i
<fileCount
; ++i
) {
273 files
[i
].fileOffset
=fileOffset
;
274 fileOffset
+=(files
[i
].fileSize
+15)&~0xf;
275 files
[i
].basenameOffset
=basenameOffset
;
276 basenameOffset
+=files
[i
].basenameLength
;
279 /* create the output file */
280 out
=udata_create(options
[4].value
, options
[7].value
, options
[6].value
,
282 options
[3].doesOccur
? U_COPYRIGHT_STRING
: options
[5].value
,
284 if(U_FAILURE(errorCode
)) {
285 fprintf(stderr
, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n",
286 options
[4].value
, options
[6].value
, options
[7].value
,
287 u_errorName(errorCode
));
291 /* write the table of contents */
292 udata_write32(out
, fileCount
);
293 for(i
=0; i
<fileCount
; ++i
) {
294 udata_write32(out
, files
[i
].basenameOffset
);
295 udata_write32(out
, files
[i
].fileOffset
);
298 /* write the basenames */
299 for(i
=0; i
<fileCount
; ++i
) {
300 udata_writeString(out
, files
[i
].basename
, files
[i
].basenameLength
);
302 length
=4+8*fileCount
+basenameTotal
;
305 for(i
=0; i
<fileCount
; ++i
) {
306 /* pad to 16-align the next file */
309 udata_writePadding(out
, 16-length
);
313 printf("adding %s (%ld byte%s)\n", files
[i
].pathname
, (long)files
[i
].fileSize
, files
[i
].fileSize
== 1 ? "" : "s");
316 /* copy the next file */
317 file
=T_FileStream_open(files
[i
].pathname
, "rb");
319 fprintf(stderr
, "gencmn: unable to open listed file %s\n", files
[i
].pathname
);
320 exit(U_FILE_ACCESS_ERROR
);
323 length
=T_FileStream_read(file
, buffer
, sizeof(buffer
));
328 udata_writeBlock(out
, buffer
, length
);
330 T_FileStream_close(file
);
331 length
=files
[i
].fileSize
;
333 if (nread
!= files
[i
].fileSize
) {
334 fprintf(stderr
, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files
[i
].pathname
, (long)nread
, (long)files
[i
].fileSize
, files
[i
].fileSize
== 1 ? "" : "s");
335 exit(U_FILE_ACCESS_ERROR
);
339 /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */
342 udata_writePadding(out
, 16-length
);
346 udata_finish(out
, &errorCode
);
347 if(U_FAILURE(errorCode
)) {
348 fprintf(stderr
, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode
));
352 /* write a .c source file with the table of contents */
356 /* create the output filename */
358 uprv_strcpy(filename
, options
[4].value
);
359 s
=filename
+uprv_strlen(filename
);
360 if(s
>filename
&& *(s
-1)!=U_FILE_SEP_CHAR
) {
361 *s
++=U_FILE_SEP_CHAR
;
363 uprv_strcpy(s
, options
[6].value
);
364 if(*(options
[7].value
)!=0) {
367 uprv_strcpy(s
, options
[7].value
);
370 uprv_strcpy(s
, ".c");
372 /* open the output file */
373 out
=T_FileStream_open(filename
, "w");
375 fprintf(stderr
, "gencmn: unable to open .c output file %s\n", filename
);
376 exit(U_FILE_ACCESS_ERROR
);
379 /* If an entrypoint is specified, use it. */
380 if(options
[9].doesOccur
) {
381 entrypointName
= options
[9].value
;
383 entrypointName
= options
[6].value
;
387 /* write the source file */
390 " * ICU common data table of contents for %s.%s ,\n"
391 " * Automatically generated by icu/source/tools/gencmn/gencmn .\n"
393 "#include \"unicode/utypes.h\"\n"
394 "#include \"unicode/udata.h\"\n"
396 "/* external symbol declarations for data */\n",
397 options
[6].value
, options
[7].value
);
398 T_FileStream_writeLine(out
, buffer
);
400 sprintf(buffer
, "extern const char\n %s%s[]", symPrefix
?symPrefix
:"", files
[0].pathname
);
401 T_FileStream_writeLine(out
, buffer
);
402 for(i
=1; i
<fileCount
; ++i
) {
403 sprintf(buffer
, ",\n %s%s[]", symPrefix
?symPrefix
:"", files
[i
].pathname
);
404 T_FileStream_writeLine(out
, buffer
);
406 T_FileStream_writeLine(out
, ";\n\n");
410 "U_EXPORT struct {\n"
411 " uint16_t headerSize;\n"
412 " uint8_t magic1, magic2;\n"
414 " char padding[%lu];\n"
415 " uint32_t count, reserved;\n"
417 " const char *name;\n"
418 " const void *data;\n"
420 "} U_EXPORT2 %s_dat = {\n"
421 " 32, 0xda, 0x27, {\n"
424 " {0x54, 0x6f, 0x43, 0x50},\n"
428 " \"\", %lu, 0, {\n",
429 (unsigned long)32-4-sizeof(UDataInfo
),
430 (unsigned long)fileCount
,
432 (unsigned long)sizeof(UDataInfo
),
436 (unsigned long)fileCount
438 T_FileStream_writeLine(out
, buffer
);
440 sprintf(buffer
, " { \"%s\", %s%s }", files
[0].basename
, symPrefix
?symPrefix
:"", files
[0].pathname
);
441 T_FileStream_writeLine(out
, buffer
);
442 for(i
=1; i
<fileCount
; ++i
) {
443 sprintf(buffer
, ",\n { \"%s\", %s%s }", files
[i
].basename
, symPrefix
?symPrefix
:"", files
[i
].pathname
);
444 T_FileStream_writeLine(out
, buffer
);
447 T_FileStream_writeLine(out
, "\n }\n};\n");
448 T_FileStream_close(out
);
450 uprv_free(symPrefix
);
457 addFile(const char *filename
, UBool sourceTOC
, UBool verbose
) {
460 char *fullPath
= NULL
;
462 if(fileCount
==MAX_FILE_COUNT
) {
463 fprintf(stderr
, "gencmn: too many files, maximum is %d\n", MAX_FILE_COUNT
);
464 exit(U_BUFFER_OVERFLOW_ERROR
);
470 if(uprv_pathIsAbsolute(filename
)) {
471 fprintf(stderr
, "gencmn: Error: absolute path encountered. Old style paths are not supported. Use relative paths such as 'fur.res' or 'translit%cfur.res'.\n\tBad path: '%s'\n", U_FILE_SEP_CHAR
, filename
);
472 exit(U_ILLEGAL_ARGUMENT_ERROR
);
474 fullPath
= pathToFullPath(filename
);
476 /* store the pathname */
477 length
= (uint32_t)(uprv_strlen(filename
) + 1 + uprv_strlen(options
[6].value
) + 1);
478 s
=allocString(length
);
479 uprv_strcpy(s
, options
[6].value
);
480 uprv_strcat(s
, U_TREE_ENTRY_SEP_STRING
);
481 uprv_strcat(s
, filename
);
483 /* get the basename */
485 files
[fileCount
].basename
=s
;
486 files
[fileCount
].basenameLength
=length
;
488 files
[fileCount
].pathname
=fullPath
;
490 basenameTotal
+=length
;
492 /* try to open the file */
493 file
=T_FileStream_open(fullPath
, "rb");
495 fprintf(stderr
, "gencmn: unable to open listed file %s\n", fullPath
);
496 exit(U_FILE_ACCESS_ERROR
);
499 /* get the file length */
500 length
=T_FileStream_size(file
);
501 if(T_FileStream_error(file
) || length
<=20) {
502 fprintf(stderr
, "gencmn: unable to get length of listed file %s\n", fullPath
);
503 exit(U_FILE_ACCESS_ERROR
);
506 T_FileStream_close(file
);
508 /* do not add files that are longer than maxSize */
509 if(maxSize
&& length
>maxSize
) {
511 printf("%s ignored (size %ld > %ld)\n", fullPath
, (long)length
, (long)maxSize
);
515 files
[fileCount
].fileSize
=length
;
519 /* get and store the basename */
520 /* need to include the package name */
521 length
= (uint32_t)(uprv_strlen(filename
) + 1 + uprv_strlen(options
[6].value
) + 1);
522 s
=allocString(length
);
523 uprv_strcpy(s
, options
[6].value
);
524 uprv_strcat(s
, U_TREE_ENTRY_SEP_STRING
);
525 uprv_strcat(s
, filename
);
527 files
[fileCount
].basename
=s
;
530 /* turn the basename into an entry point name and store in the pathname field */
531 t
=files
[fileCount
].pathname
=allocString(length
);
533 if(*s
=='.' || *s
=='-' || *s
=='/') {
547 allocString(uint32_t length
) {
548 uint32_t top
=stringTop
+length
;
551 if(top
>STRING_STORE_SIZE
) {
552 fprintf(stderr
, "gencmn: out of memory\n");
553 exit(U_MEMORY_ALLOCATION_ERROR
);
555 p
=stringStore
+stringTop
;
561 pathToFullPath(const char *path
) {
567 length
= (uint32_t)(uprv_strlen(path
) + 1);
568 newLength
= (length
+ 1 + (int32_t)uprv_strlen(options
[10].value
));
569 fullPath
= uprv_malloc(newLength
);
570 if(options
[10].doesOccur
) {
571 uprv_strcpy(fullPath
, options
[10].value
);
572 uprv_strcat(fullPath
, U_FILE_SEP_STRING
);
576 n
= (int32_t)uprv_strlen(fullPath
);
577 uprv_strcat(fullPath
, path
);
579 #if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
580 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR)
581 /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
582 for(;fullPath
[n
];n
++) {
583 if(fullPath
[n
] == U_FILE_ALT_SEP_CHAR
) {
584 fullPath
[n
] = U_FILE_SEP_CHAR
;
589 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
590 /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
591 for(;fullPath
[n
];n
++) {
592 if(fullPath
[n
] == U_TREE_ENTRY_SEP_CHAR
) {
593 fullPath
[n
] = U_FILE_SEP_CHAR
;
601 compareFiles(const void *file1
, const void *file2
) {
602 /* sort by basename */
603 return uprv_strcmp(((File
*)file1
)->basename
, ((File
*)file2
)->basename
);
607 fixDirToTreePath(char *s
)
609 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR))
612 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
613 for(t
=s
;t
=uprv_strchr(t
,U_FILE_SEP_CHAR
);) {
614 *t
= U_TREE_ENTRY_SEP_CHAR
;
617 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
618 for(t
=s
;t
=uprv_strchr(t
,U_FILE_ALT_SEP_CHAR
);) {
619 *t
= U_TREE_ENTRY_SEP_CHAR
;
624 * Hey, Emacs, please set the following:
627 * indent-tabs-mode: nil