1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /******************************************************************************
4 * Copyright (C) 2008-2012, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *******************************************************************************
8 #include "unicode/utypes.h"
12 #include "unicode/utypes.h"
13 #include "unicode/putil.h"
18 #include "unicode/uclean.h"
21 #include "pkg_gencmn.h"
23 #define STRING_STORE_SIZE 200000
25 #define COMMON_DATA_NAME U_ICUDATA_NAME
26 #define DATA_TYPE "dat"
28 /* ICU package data file format (.dat files) ------------------------------- ***
30 Description of the data format after the usual ICU data file header
35 A .dat package file contains a simple Table of Contents of item names,
36 followed by the items themselves:
40 uint32_t count; - number of items
41 UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item:
42 uint32_t nameOffset; - offset of the item name
43 uint32_t dataOffset; - offset of the item data
44 both are byte offsets from the beginning of the data
48 All item names are stored as char * strings in one block between the ToC table
53 The data items are stored following the item names block.
54 Each data item is 16-aligned.
55 The data items are stored in the sorted order of their names.
57 Therefore, the top of the name strings block is the offset of the first item,
58 the length of the last item is the difference between its offset and
59 the .dat file length, and the length of all previous items is the difference
60 between its offset and the next one.
62 ----------------------------------------------------------------------------- */
64 /* UDataInfo cf. udata.h */
65 static const UDataInfo dataInfo
={
74 {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */
75 {1, 0, 0, 0}, /* formatVersion */
76 {3, 0, 0, 0} /* dataVersion */
79 static uint32_t maxSize
;
81 static char stringStore
[STRING_STORE_SIZE
];
82 static uint32_t stringTop
=0, basenameTotal
=0;
85 char *pathname
, *basename
;
86 uint32_t basenameLength
, basenameOffset
, fileSize
, fileOffset
;
89 #define CHUNK_FILE_COUNT 256
90 static File
*files
= NULL
;
91 static uint32_t fileCount
=0;
92 static uint32_t fileMax
= 0;
95 static char *symPrefix
= NULL
;
97 #define LINE_BUFFER_SIZE 512
98 /* prototypes --------------------------------------------------------------- */
101 addFile(const char *filename
, const char *name
, const char *source
, UBool sourceTOC
, UBool verbose
);
104 allocString(uint32_t length
);
108 compareFiles(const void *file1
, const void *file2
);
112 pathToFullPath(const char *path
, const char *source
);
114 /* map non-tree separator (such as '\') to tree separator ('/') inplace. */
116 fixDirToTreePath(char *s
);
117 /* -------------------------------------------------------------------------- */
119 U_CAPI
void U_EXPORT2
120 createCommonDataFile(const char *destDir
, const char *name
, const char *entrypointName
, const char *type
, const char *source
, const char *copyRight
,
121 const char *dataFile
, uint32_t max_size
, UBool sourceTOC
, UBool verbose
, char *gencmnFileName
) {
122 static char buffer
[4096];
126 UErrorCode errorCode
=U_ZERO_ERROR
;
127 uint32_t i
, fileOffset
, basenameOffset
, length
, nread
;
128 FileStream
*in
, *file
;
130 line
= (char *)uprv_malloc(sizeof(char) * LINE_BUFFER_SIZE
);
132 fprintf(stderr
, "gencmn: unable to allocate memory for line buffer of size %d\n", LINE_BUFFER_SIZE
);
133 exit(U_MEMORY_ALLOCATION_ERROR
);
140 if (destDir
== NULL
) {
141 destDir
= u_getDataDirectory();
144 name
= COMMON_DATA_NAME
;
149 if (source
== NULL
) {
153 if (dataFile
== NULL
) {
154 in
= T_FileStream_stdin();
156 in
= T_FileStream_open(dataFile
, "r");
158 fprintf(stderr
, "gencmn: unable to open input file %s\n", dataFile
);
159 exit(U_FILE_ACCESS_ERROR
);
165 printf("generating %s_%s.c (table of contents source file)\n", name
, type
);
167 printf("generating %s.%s (common data file with table of contents)\n", name
, type
);
171 /* read the list of files and get their lengths */
172 while((s
!= NULL
&& *s
!= 0) || (s
=T_FileStream_readLine(in
, (line
=linePtr
),
173 LINE_BUFFER_SIZE
))!=NULL
) {
174 /* remove trailing newline characters and parse space separated items */
175 if (s
!= NULL
&& *s
!= 0) {
185 } else if(*s
=='\r' || *s
=='\n') {
192 /* check for comment */
199 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
202 while((t
= uprv_strchr(line
,U_FILE_ALT_SEP_CHAR
))) {
203 *t
= U_FILE_SEP_CHAR
;
207 addFile(getLongPathname(line
), name
, source
, sourceTOC
, verbose
);
212 if(in
!=T_FileStream_stdin()) {
213 T_FileStream_close(in
);
217 fprintf(stderr
, "gencmn: no files listed in %s\n", dataFile
== NULL
? "<stdin>" : dataFile
);
221 /* sort the files by basename */
222 qsort(files
, fileCount
, sizeof(File
), compareFiles
);
227 /* determine the offsets of all basenames and files in this common one */
228 basenameOffset
=4+8*fileCount
;
229 fileOffset
=(basenameOffset
+(basenameTotal
+15))&~0xf;
230 for(i
=0; i
<fileCount
; ++i
) {
231 files
[i
].fileOffset
=fileOffset
;
232 fileOffset
+=(files
[i
].fileSize
+15)&~0xf;
233 files
[i
].basenameOffset
=basenameOffset
;
234 basenameOffset
+=files
[i
].basenameLength
;
237 /* create the output file */
238 out
=udata_create(destDir
, type
, name
,
240 copyRight
== NULL
? U_COPYRIGHT_STRING
: copyRight
,
242 if(U_FAILURE(errorCode
)) {
243 fprintf(stderr
, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n",
245 u_errorName(errorCode
));
249 /* write the table of contents */
250 udata_write32(out
, fileCount
);
251 for(i
=0; i
<fileCount
; ++i
) {
252 udata_write32(out
, files
[i
].basenameOffset
);
253 udata_write32(out
, files
[i
].fileOffset
);
256 /* write the basenames */
257 for(i
=0; i
<fileCount
; ++i
) {
258 udata_writeString(out
, files
[i
].basename
, files
[i
].basenameLength
);
260 length
=4+8*fileCount
+basenameTotal
;
263 for(i
=0; i
<fileCount
; ++i
) {
264 /* pad to 16-align the next file */
267 udata_writePadding(out
, 16-length
);
271 printf("adding %s (%ld byte%s)\n", files
[i
].pathname
, (long)files
[i
].fileSize
, files
[i
].fileSize
== 1 ? "" : "s");
274 /* copy the next file */
275 file
=T_FileStream_open(files
[i
].pathname
, "rb");
277 fprintf(stderr
, "gencmn: unable to open listed file %s\n", files
[i
].pathname
);
278 exit(U_FILE_ACCESS_ERROR
);
281 length
=T_FileStream_read(file
, buffer
, sizeof(buffer
));
286 udata_writeBlock(out
, buffer
, length
);
288 T_FileStream_close(file
);
289 length
=files
[i
].fileSize
;
291 if (nread
!= files
[i
].fileSize
) {
292 fprintf(stderr
, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files
[i
].pathname
, (long)nread
, (long)files
[i
].fileSize
, files
[i
].fileSize
== 1 ? "" : "s");
293 exit(U_FILE_ACCESS_ERROR
);
297 /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */
300 udata_writePadding(out
, 16-length
);
304 udata_finish(out
, &errorCode
);
305 if(U_FAILURE(errorCode
)) {
306 fprintf(stderr
, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode
));
310 /* write a .c source file with the table of contents */
314 /* create the output filename */
316 uprv_strcpy(filename
, destDir
);
317 s
=filename
+uprv_strlen(filename
);
318 if(s
>filename
&& *(s
-1)!=U_FILE_SEP_CHAR
) {
319 *s
++=U_FILE_SEP_CHAR
;
321 uprv_strcpy(s
, name
);
325 uprv_strcpy(s
, type
);
328 uprv_strcpy(s
, ".c");
330 /* open the output file */
331 out
=T_FileStream_open(filename
, "w");
332 if (gencmnFileName
!= NULL
) {
333 uprv_strcpy(gencmnFileName
, filename
);
336 fprintf(stderr
, "gencmn: unable to open .c output file %s\n", filename
);
337 exit(U_FILE_ACCESS_ERROR
);
340 /* write the source file */
343 " * ICU common data table of contents for %s.%s\n"
344 " * Automatically generated by icu/source/tools/gencmn/gencmn .\n"
346 "#include \"unicode/utypes.h\"\n"
347 "#include \"unicode/udata.h\"\n"
349 "/* external symbol declarations for data (%d files) */\n",
350 name
, type
, fileCount
);
351 T_FileStream_writeLine(out
, buffer
);
353 sprintf(buffer
, "extern const char\n %s%s[]", symPrefix
?symPrefix
:"", files
[0].pathname
);
354 T_FileStream_writeLine(out
, buffer
);
355 for(i
=1; i
<fileCount
; ++i
) {
356 sprintf(buffer
, ",\n %s%s[]", symPrefix
?symPrefix
:"", files
[i
].pathname
);
357 T_FileStream_writeLine(out
, buffer
);
359 T_FileStream_writeLine(out
, ";\n\n");
363 "U_EXPORT struct {\n"
364 " uint16_t headerSize;\n"
365 " uint8_t magic1, magic2;\n"
367 " char padding[%lu];\n"
368 " uint32_t count, reserved;\n"
370 " const char *name;\n"
371 " const void *data;\n"
373 "} U_EXPORT2 %s_dat = {\n"
374 " 32, 0xda, 0x27, {\n"
377 " {0x54, 0x6f, 0x43, 0x50},\n"
381 " \"\", %lu, 0, {\n",
382 static_cast<unsigned long>(32-4-sizeof(UDataInfo
)),
383 static_cast<unsigned long>(fileCount
),
385 static_cast<unsigned long>(sizeof(UDataInfo
)),
389 static_cast<unsigned long>(fileCount
)
391 T_FileStream_writeLine(out
, buffer
);
393 sprintf(buffer
, " { \"%s\", %s%s }", files
[0].basename
, symPrefix
?symPrefix
:"", files
[0].pathname
);
394 T_FileStream_writeLine(out
, buffer
);
395 for(i
=1; i
<fileCount
; ++i
) {
396 sprintf(buffer
, ",\n { \"%s\", %s%s }", files
[i
].basename
, symPrefix
?symPrefix
:"", files
[i
].pathname
);
397 T_FileStream_writeLine(out
, buffer
);
400 T_FileStream_writeLine(out
, "\n }\n};\n");
401 T_FileStream_close(out
);
403 uprv_free(symPrefix
);
408 addFile(const char *filename
, const char *name
, const char *source
, UBool sourceTOC
, UBool verbose
) {
411 char *fullPath
= NULL
;
413 if(fileCount
==fileMax
) {
414 fileMax
+= CHUNK_FILE_COUNT
;
415 files
= (File
*)uprv_realloc(files
, fileMax
*sizeof(files
[0])); /* note: never freed. */
417 fprintf(stderr
, "pkgdata/gencmn: Could not allocate %u bytes for %d files\n", (unsigned int)(fileMax
*sizeof(files
[0])), fileCount
);
418 exit(U_MEMORY_ALLOCATION_ERROR
);
425 if(uprv_pathIsAbsolute(filename
)) {
426 fprintf(stderr
, "gencmn: Error: absolute path encountered. Old style paths are not supported. Use relative paths such as 'fur.res' or 'translit%cfur.res'.\n\tBad path: '%s'\n", U_FILE_SEP_CHAR
, filename
);
427 exit(U_ILLEGAL_ARGUMENT_ERROR
);
429 fullPath
= pathToFullPath(filename
, source
);
430 /* store the pathname */
431 length
= (uint32_t)(uprv_strlen(filename
) + 1 + uprv_strlen(name
) + 1);
432 s
=allocString(length
);
433 uprv_strcpy(s
, name
);
434 uprv_strcat(s
, U_TREE_ENTRY_SEP_STRING
);
435 uprv_strcat(s
, filename
);
437 /* get the basename */
439 files
[fileCount
].basename
=s
;
440 files
[fileCount
].basenameLength
=length
;
442 files
[fileCount
].pathname
=fullPath
;
444 basenameTotal
+=length
;
446 /* try to open the file */
447 file
=T_FileStream_open(fullPath
, "rb");
449 fprintf(stderr
, "gencmn: unable to open listed file %s\n", fullPath
);
450 exit(U_FILE_ACCESS_ERROR
);
453 /* get the file length */
454 length
=T_FileStream_size(file
);
455 if(T_FileStream_error(file
) || length
<=20) {
456 fprintf(stderr
, "gencmn: unable to get length of listed file %s\n", fullPath
);
457 exit(U_FILE_ACCESS_ERROR
);
460 T_FileStream_close(file
);
462 /* do not add files that are longer than maxSize */
463 if(maxSize
&& length
>maxSize
) {
465 printf("%s ignored (size %ld > %ld)\n", fullPath
, (long)length
, (long)maxSize
);
469 files
[fileCount
].fileSize
=length
;
472 /* get and store the basename */
473 /* need to include the package name */
474 length
= (uint32_t)(uprv_strlen(filename
) + 1 + uprv_strlen(name
) + 1);
475 s
=allocString(length
);
476 uprv_strcpy(s
, name
);
477 uprv_strcat(s
, U_TREE_ENTRY_SEP_STRING
);
478 uprv_strcat(s
, filename
);
480 files
[fileCount
].basename
=s
;
481 /* turn the basename into an entry point name and store in the pathname field */
482 t
=files
[fileCount
].pathname
=allocString(length
);
484 if(*s
=='.' || *s
=='-' || *s
=='/') {
498 allocString(uint32_t length
) {
499 uint32_t top
=stringTop
+length
;
502 if(top
>STRING_STORE_SIZE
) {
503 fprintf(stderr
, "gencmn: out of memory\n");
504 exit(U_MEMORY_ALLOCATION_ERROR
);
506 p
=stringStore
+stringTop
;
512 pathToFullPath(const char *path
, const char *source
) {
518 length
= (uint32_t)(uprv_strlen(path
) + 1);
519 newLength
= (length
+ 1 + (int32_t)uprv_strlen(source
));
520 fullPath
= (char *)uprv_malloc(newLength
);
522 uprv_strcpy(fullPath
, source
);
523 uprv_strcat(fullPath
, U_FILE_SEP_STRING
);
527 n
= (int32_t)uprv_strlen(fullPath
);
528 fullPath
[n
] = 0; /* Suppress compiler warning for unused variable n */
529 /* when conditional code below is not compiled. */
530 uprv_strcat(fullPath
, path
);
532 #if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
533 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR)
534 /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
535 for(;fullPath
[n
];n
++) {
536 if(fullPath
[n
] == U_FILE_ALT_SEP_CHAR
) {
537 fullPath
[n
] = U_FILE_SEP_CHAR
;
542 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
543 /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
544 for(;fullPath
[n
];n
++) {
545 if(fullPath
[n
] == U_TREE_ENTRY_SEP_CHAR
) {
546 fullPath
[n
] = U_FILE_SEP_CHAR
;
555 compareFiles(const void *file1
, const void *file2
) {
556 /* sort by basename */
557 return uprv_strcmp(((File
*)file1
)->basename
, ((File
*)file2
)->basename
);
562 fixDirToTreePath(char *s
)
565 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR))
568 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
569 for(t
=s
;t
=uprv_strchr(t
,U_FILE_SEP_CHAR
);) {
570 *t
= U_TREE_ENTRY_SEP_CHAR
;
573 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
574 for(t
=s
;t
=uprv_strchr(t
,U_FILE_ALT_SEP_CHAR
);) {
575 *t
= U_TREE_ENTRY_SEP_CHAR
;