1 /******************************************************************************
2 * Copyright (C) 2008-2012, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 *******************************************************************************
6 #include "unicode/utypes.h"
10 #include "unicode/utypes.h"
11 #include "unicode/putil.h"
16 #include "unicode/uclean.h"
19 #include "pkg_gencmn.h"
21 #define STRING_STORE_SIZE 200000
23 #define COMMON_DATA_NAME U_ICUDATA_NAME
24 #define DATA_TYPE "dat"
26 /* ICU package data file format (.dat files) ------------------------------- ***
28 Description of the data format after the usual ICU data file header
33 A .dat package file contains a simple Table of Contents of item names,
34 followed by the items themselves:
38 uint32_t count; - number of items
39 UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item:
40 uint32_t nameOffset; - offset of the item name
41 uint32_t dataOffset; - offset of the item data
42 both are byte offsets from the beginning of the data
46 All item names are stored as char * strings in one block between the ToC table
51 The data items are stored following the item names block.
52 Each data item is 16-aligned.
53 The data items are stored in the sorted order of their names.
55 Therefore, the top of the name strings block is the offset of the first item,
56 the length of the last item is the difference between its offset and
57 the .dat file length, and the length of all previous items is the difference
58 between its offset and the next one.
60 ----------------------------------------------------------------------------- */
62 /* UDataInfo cf. udata.h */
63 static const UDataInfo dataInfo
={
72 {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */
73 {1, 0, 0, 0}, /* formatVersion */
74 {3, 0, 0, 0} /* dataVersion */
77 static uint32_t maxSize
;
79 static char stringStore
[STRING_STORE_SIZE
];
80 static uint32_t stringTop
=0, basenameTotal
=0;
83 char *pathname
, *basename
;
84 uint32_t basenameLength
, basenameOffset
, fileSize
, fileOffset
;
87 #define CHUNK_FILE_COUNT 256
88 static File
*files
= NULL
;
89 static uint32_t fileCount
=0;
90 static uint32_t fileMax
= 0;
93 static char *symPrefix
= NULL
;
95 #define LINE_BUFFER_SIZE 512
96 /* prototypes --------------------------------------------------------------- */
99 addFile(const char *filename
, const char *name
, const char *source
, UBool sourceTOC
, UBool verbose
);
102 allocString(uint32_t length
);
105 compareFiles(const void *file1
, const void *file2
);
108 pathToFullPath(const char *path
, const char *source
);
110 /* map non-tree separator (such as '\') to tree separator ('/') inplace. */
112 fixDirToTreePath(char *s
);
113 /* -------------------------------------------------------------------------- */
115 U_CAPI
void U_EXPORT2
116 createCommonDataFile(const char *destDir
, const char *name
, const char *entrypointName
, const char *type
, const char *source
, const char *copyRight
,
117 const char *dataFile
, uint32_t max_size
, UBool sourceTOC
, UBool verbose
, char *gencmnFileName
) {
118 static char buffer
[4096];
122 UErrorCode errorCode
=U_ZERO_ERROR
;
123 uint32_t i
, fileOffset
, basenameOffset
, length
, nread
;
124 FileStream
*in
, *file
;
126 line
= (char *)uprv_malloc(sizeof(char) * LINE_BUFFER_SIZE
);
128 fprintf(stderr
, "gencmn: unable to allocate memory for line buffer of size %d\n", LINE_BUFFER_SIZE
);
129 exit(U_MEMORY_ALLOCATION_ERROR
);
136 if (destDir
== NULL
) {
137 destDir
= u_getDataDirectory();
140 name
= COMMON_DATA_NAME
;
145 if (source
== NULL
) {
149 if (dataFile
== NULL
) {
150 in
= T_FileStream_stdin();
152 in
= T_FileStream_open(dataFile
, "r");
154 fprintf(stderr
, "gencmn: unable to open input file %s\n", dataFile
);
155 exit(U_FILE_ACCESS_ERROR
);
161 printf("generating %s_%s.c (table of contents source file)\n", name
, type
);
163 printf("generating %s.%s (common data file with table of contents)\n", name
, type
);
167 /* read the list of files and get their lengths */
168 while((s
!= NULL
&& *s
!= 0) || (s
=T_FileStream_readLine(in
, (line
=linePtr
),
169 LINE_BUFFER_SIZE
))!=NULL
) {
170 /* remove trailing newline characters and parse space separated items */
171 if (s
!= NULL
&& *s
!= 0) {
181 } else if(*s
=='\r' || *s
=='\n') {
188 /* check for comment */
195 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
198 while((t
= uprv_strchr(line
,U_FILE_ALT_SEP_CHAR
))) {
199 *t
= U_FILE_SEP_CHAR
;
203 addFile(getLongPathname(line
), name
, source
, sourceTOC
, verbose
);
210 if(in
!=T_FileStream_stdin()) {
211 T_FileStream_close(in
);
215 fprintf(stderr
, "gencmn: no files listed in %s\n", dataFile
== NULL
? "<stdin>" : dataFile
);
219 /* sort the files by basename */
220 qsort(files
, fileCount
, sizeof(File
), compareFiles
);
225 /* determine the offsets of all basenames and files in this common one */
226 basenameOffset
=4+8*fileCount
;
227 fileOffset
=(basenameOffset
+(basenameTotal
+15))&~0xf;
228 for(i
=0; i
<fileCount
; ++i
) {
229 files
[i
].fileOffset
=fileOffset
;
230 fileOffset
+=(files
[i
].fileSize
+15)&~0xf;
231 files
[i
].basenameOffset
=basenameOffset
;
232 basenameOffset
+=files
[i
].basenameLength
;
235 /* create the output file */
236 out
=udata_create(destDir
, type
, name
,
238 copyRight
== NULL
? U_COPYRIGHT_STRING
: copyRight
,
240 if(U_FAILURE(errorCode
)) {
241 fprintf(stderr
, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n",
243 u_errorName(errorCode
));
247 /* write the table of contents */
248 udata_write32(out
, fileCount
);
249 for(i
=0; i
<fileCount
; ++i
) {
250 udata_write32(out
, files
[i
].basenameOffset
);
251 udata_write32(out
, files
[i
].fileOffset
);
254 /* write the basenames */
255 for(i
=0; i
<fileCount
; ++i
) {
256 udata_writeString(out
, files
[i
].basename
, files
[i
].basenameLength
);
258 length
=4+8*fileCount
+basenameTotal
;
261 for(i
=0; i
<fileCount
; ++i
) {
262 /* pad to 16-align the next file */
265 udata_writePadding(out
, 16-length
);
269 printf("adding %s (%ld byte%s)\n", files
[i
].pathname
, (long)files
[i
].fileSize
, files
[i
].fileSize
== 1 ? "" : "s");
272 /* copy the next file */
273 file
=T_FileStream_open(files
[i
].pathname
, "rb");
275 fprintf(stderr
, "gencmn: unable to open listed file %s\n", files
[i
].pathname
);
276 exit(U_FILE_ACCESS_ERROR
);
279 length
=T_FileStream_read(file
, buffer
, sizeof(buffer
));
284 udata_writeBlock(out
, buffer
, length
);
286 T_FileStream_close(file
);
287 length
=files
[i
].fileSize
;
289 if (nread
!= files
[i
].fileSize
) {
290 fprintf(stderr
, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files
[i
].pathname
, (long)nread
, (long)files
[i
].fileSize
, files
[i
].fileSize
== 1 ? "" : "s");
291 exit(U_FILE_ACCESS_ERROR
);
295 /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */
298 udata_writePadding(out
, 16-length
);
302 udata_finish(out
, &errorCode
);
303 if(U_FAILURE(errorCode
)) {
304 fprintf(stderr
, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode
));
308 /* write a .c source file with the table of contents */
312 /* create the output filename */
314 uprv_strcpy(filename
, destDir
);
315 s
=filename
+uprv_strlen(filename
);
316 if(s
>filename
&& *(s
-1)!=U_FILE_SEP_CHAR
) {
317 *s
++=U_FILE_SEP_CHAR
;
319 uprv_strcpy(s
, name
);
323 uprv_strcpy(s
, type
);
326 uprv_strcpy(s
, ".c");
328 /* open the output file */
329 out
=T_FileStream_open(filename
, "w");
330 if (gencmnFileName
!= NULL
) {
331 uprv_strcpy(gencmnFileName
, filename
);
334 fprintf(stderr
, "gencmn: unable to open .c output file %s\n", filename
);
335 exit(U_FILE_ACCESS_ERROR
);
338 /* write the source file */
341 " * ICU common data table of contents for %s.%s\n"
342 " * Automatically generated by icu/source/tools/gencmn/gencmn .\n"
344 "#include \"unicode/utypes.h\"\n"
345 "#include \"unicode/udata.h\"\n"
347 "/* external symbol declarations for data (%d files) */\n",
348 name
, type
, fileCount
);
349 T_FileStream_writeLine(out
, buffer
);
351 sprintf(buffer
, "extern const char\n %s%s[]", symPrefix
?symPrefix
:"", files
[0].pathname
);
352 T_FileStream_writeLine(out
, buffer
);
353 for(i
=1; i
<fileCount
; ++i
) {
354 sprintf(buffer
, ",\n %s%s[]", symPrefix
?symPrefix
:"", files
[i
].pathname
);
355 T_FileStream_writeLine(out
, buffer
);
357 T_FileStream_writeLine(out
, ";\n\n");
361 "U_EXPORT struct {\n"
362 " uint16_t headerSize;\n"
363 " uint8_t magic1, magic2;\n"
365 " char padding[%lu];\n"
366 " uint32_t count, reserved;\n"
368 " const char *name;\n"
369 " const void *data;\n"
371 "} U_EXPORT2 %s_dat = {\n"
372 " 32, 0xda, 0x27, {\n"
375 " {0x54, 0x6f, 0x43, 0x50},\n"
379 " \"\", %lu, 0, {\n",
380 (unsigned long)32-4-sizeof(UDataInfo
),
381 (unsigned long)fileCount
,
383 (unsigned long)sizeof(UDataInfo
),
387 (unsigned long)fileCount
389 T_FileStream_writeLine(out
, buffer
);
391 sprintf(buffer
, " { \"%s\", %s%s }", files
[0].basename
, symPrefix
?symPrefix
:"", files
[0].pathname
);
392 T_FileStream_writeLine(out
, buffer
);
393 for(i
=1; i
<fileCount
; ++i
) {
394 sprintf(buffer
, ",\n { \"%s\", %s%s }", files
[i
].basename
, symPrefix
?symPrefix
:"", files
[i
].pathname
);
395 T_FileStream_writeLine(out
, buffer
);
398 T_FileStream_writeLine(out
, "\n }\n};\n");
399 T_FileStream_close(out
);
401 uprv_free(symPrefix
);
406 addFile(const char *filename
, const char *name
, const char *source
, UBool sourceTOC
, UBool verbose
) {
409 char *fullPath
= NULL
;
411 if(fileCount
==fileMax
) {
412 fileMax
+= CHUNK_FILE_COUNT
;
413 files
= uprv_realloc(files
, fileMax
*sizeof(files
[0])); /* note: never freed. */
415 fprintf(stderr
, "pkgdata/gencmn: Could not allocate %u bytes for %d files\n", (unsigned int)(fileMax
*sizeof(files
[0])), fileCount
);
416 exit(U_MEMORY_ALLOCATION_ERROR
);
423 if(uprv_pathIsAbsolute(filename
)) {
424 fprintf(stderr
, "gencmn: Error: absolute path encountered. Old style paths are not supported. Use relative paths such as 'fur.res' or 'translit%cfur.res'.\n\tBad path: '%s'\n", U_FILE_SEP_CHAR
, filename
);
425 exit(U_ILLEGAL_ARGUMENT_ERROR
);
427 fullPath
= pathToFullPath(filename
, source
);
428 /* store the pathname */
429 length
= (uint32_t)(uprv_strlen(filename
) + 1 + uprv_strlen(name
) + 1);
430 s
=allocString(length
);
431 uprv_strcpy(s
, name
);
432 uprv_strcat(s
, U_TREE_ENTRY_SEP_STRING
);
433 uprv_strcat(s
, filename
);
435 /* get the basename */
437 files
[fileCount
].basename
=s
;
438 files
[fileCount
].basenameLength
=length
;
440 files
[fileCount
].pathname
=fullPath
;
442 basenameTotal
+=length
;
444 /* try to open the file */
445 file
=T_FileStream_open(fullPath
, "rb");
447 fprintf(stderr
, "gencmn: unable to open listed file %s\n", fullPath
);
448 exit(U_FILE_ACCESS_ERROR
);
451 /* get the file length */
452 length
=T_FileStream_size(file
);
453 if(T_FileStream_error(file
) || length
<=20) {
454 fprintf(stderr
, "gencmn: unable to get length of listed file %s\n", fullPath
);
455 exit(U_FILE_ACCESS_ERROR
);
458 T_FileStream_close(file
);
460 /* do not add files that are longer than maxSize */
461 if(maxSize
&& length
>maxSize
) {
463 printf("%s ignored (size %ld > %ld)\n", fullPath
, (long)length
, (long)maxSize
);
467 files
[fileCount
].fileSize
=length
;
470 /* get and store the basename */
471 /* need to include the package name */
472 length
= (uint32_t)(uprv_strlen(filename
) + 1 + uprv_strlen(name
) + 1);
473 s
=allocString(length
);
474 uprv_strcpy(s
, name
);
475 uprv_strcat(s
, U_TREE_ENTRY_SEP_STRING
);
476 uprv_strcat(s
, filename
);
478 files
[fileCount
].basename
=s
;
479 /* turn the basename into an entry point name and store in the pathname field */
480 t
=files
[fileCount
].pathname
=allocString(length
);
482 if(*s
=='.' || *s
=='-' || *s
=='/') {
496 allocString(uint32_t length
) {
497 uint32_t top
=stringTop
+length
;
500 if(top
>STRING_STORE_SIZE
) {
501 fprintf(stderr
, "gencmn: out of memory\n");
502 exit(U_MEMORY_ALLOCATION_ERROR
);
504 p
=stringStore
+stringTop
;
510 pathToFullPath(const char *path
, const char *source
) {
516 length
= (uint32_t)(uprv_strlen(path
) + 1);
517 newLength
= (length
+ 1 + (int32_t)uprv_strlen(source
));
518 fullPath
= uprv_malloc(newLength
);
520 uprv_strcpy(fullPath
, source
);
521 uprv_strcat(fullPath
, U_FILE_SEP_STRING
);
525 n
= (int32_t)uprv_strlen(fullPath
);
526 fullPath
[n
] = 0; /* Suppress compiler warning for unused variable n */
527 /* when conditional code below is not compiled. */
528 uprv_strcat(fullPath
, path
);
530 #if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
531 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR)
532 /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
533 for(;fullPath
[n
];n
++) {
534 if(fullPath
[n
] == U_FILE_ALT_SEP_CHAR
) {
535 fullPath
[n
] = U_FILE_SEP_CHAR
;
540 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
541 /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
542 for(;fullPath
[n
];n
++) {
543 if(fullPath
[n
] == U_TREE_ENTRY_SEP_CHAR
) {
544 fullPath
[n
] = U_FILE_SEP_CHAR
;
552 compareFiles(const void *file1
, const void *file2
) {
553 /* sort by basename */
554 return uprv_strcmp(((File
*)file1
)->basename
, ((File
*)file2
)->basename
);
558 fixDirToTreePath(char *s
)
560 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR))
563 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
564 for(t
=s
;t
=uprv_strchr(t
,U_FILE_SEP_CHAR
);) {
565 *t
= U_TREE_ENTRY_SEP_CHAR
;
568 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
569 for(t
=s
;t
=uprv_strchr(t
,U_FILE_ALT_SEP_CHAR
);) {
570 *t
= U_TREE_ENTRY_SEP_CHAR
;