]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/toolutil/pkg_gencmn.cpp
ICU-64232.0.1.tar.gz
[apple/icu.git] / icuSources / tools / toolutil / pkg_gencmn.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
729e4ab9 3/******************************************************************************
4388f060 4 * Copyright (C) 2008-2012, International Business Machines
729e4ab9
A
5 * Corporation and others. All Rights Reserved.
6 *******************************************************************************
7 */
8#include "unicode/utypes.h"
9
10#include <stdio.h>
11#include <stdlib.h>
12#include "unicode/utypes.h"
13#include "unicode/putil.h"
14#include "cmemory.h"
15#include "cstring.h"
16#include "filestrm.h"
17#include "toolutil.h"
18#include "unicode/uclean.h"
19#include "unewdata.h"
20#include "putilimp.h"
21#include "pkg_gencmn.h"
22
4388f060 23#define STRING_STORE_SIZE 200000
729e4ab9
A
24
25#define COMMON_DATA_NAME U_ICUDATA_NAME
26#define DATA_TYPE "dat"
27
28/* ICU package data file format (.dat files) ------------------------------- ***
29
30Description of the data format after the usual ICU data file header
31(UDataInfo etc.).
32
33Format version 1
34
35A .dat package file contains a simple Table of Contents of item names,
36followed by the items themselves:
37
381. ToC table
39
40uint32_t count; - number of items
41UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item:
42 uint32_t nameOffset; - offset of the item name
43 uint32_t dataOffset; - offset of the item data
44both are byte offsets from the beginning of the data
45
462. item name strings
47
48All item names are stored as char * strings in one block between the ToC table
49and the data items.
50
513. data items
52
53The data items are stored following the item names block.
54Each data item is 16-aligned.
55The data items are stored in the sorted order of their names.
56
57Therefore, the top of the name strings block is the offset of the first item,
58the length of the last item is the difference between its offset and
59the .dat file length, and the length of all previous items is the difference
60between its offset and the next one.
61
62----------------------------------------------------------------------------- */
63
64/* UDataInfo cf. udata.h */
65static const UDataInfo dataInfo={
66 sizeof(UDataInfo),
67 0,
68
69 U_IS_BIG_ENDIAN,
70 U_CHARSET_FAMILY,
71 sizeof(UChar),
72 0,
73
74 {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */
75 {1, 0, 0, 0}, /* formatVersion */
76 {3, 0, 0, 0} /* dataVersion */
77};
78
79static uint32_t maxSize;
80
81static char stringStore[STRING_STORE_SIZE];
82static uint32_t stringTop=0, basenameTotal=0;
83
84typedef struct {
85 char *pathname, *basename;
86 uint32_t basenameLength, basenameOffset, fileSize, fileOffset;
87} File;
88
89#define CHUNK_FILE_COUNT 256
90static File *files = NULL;
91static uint32_t fileCount=0;
92static uint32_t fileMax = 0;
93
94
95static char *symPrefix = NULL;
96
4388f060 97#define LINE_BUFFER_SIZE 512
729e4ab9
A
98/* prototypes --------------------------------------------------------------- */
99
100static void
101addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose);
102
103static char *
104allocString(uint32_t length);
105
f3c0d7a5 106U_CDECL_BEGIN
729e4ab9
A
107static int
108compareFiles(const void *file1, const void *file2);
f3c0d7a5 109U_CDECL_END
729e4ab9
A
110
111static char *
112pathToFullPath(const char *path, const char *source);
113
114/* map non-tree separator (such as '\') to tree separator ('/') inplace. */
115static void
116fixDirToTreePath(char *s);
117/* -------------------------------------------------------------------------- */
118
119U_CAPI void U_EXPORT2
120createCommonDataFile(const char *destDir, const char *name, const char *entrypointName, const char *type, const char *source, const char *copyRight,
121 const char *dataFile, uint32_t max_size, UBool sourceTOC, UBool verbose, char *gencmnFileName) {
122 static char buffer[4096];
4388f060
A
123 char *line;
124 char *linePtr;
125 char *s = NULL;
729e4ab9
A
126 UErrorCode errorCode=U_ZERO_ERROR;
127 uint32_t i, fileOffset, basenameOffset, length, nread;
128 FileStream *in, *file;
129
4388f060
A
130 line = (char *)uprv_malloc(sizeof(char) * LINE_BUFFER_SIZE);
131 if (line == NULL) {
132 fprintf(stderr, "gencmn: unable to allocate memory for line buffer of size %d\n", LINE_BUFFER_SIZE);
133 exit(U_MEMORY_ALLOCATION_ERROR);
134 }
135
136 linePtr = line;
137
729e4ab9
A
138 maxSize = max_size;
139
140 if (destDir == NULL) {
141 destDir = u_getDataDirectory();
142 }
143 if (name == NULL) {
144 name = COMMON_DATA_NAME;
145 }
146 if (type == NULL) {
147 type = DATA_TYPE;
148 }
149 if (source == NULL) {
150 source = ".";
151 }
152
153 if (dataFile == NULL) {
154 in = T_FileStream_stdin();
155 } else {
156 in = T_FileStream_open(dataFile, "r");
157 if(in == NULL) {
158 fprintf(stderr, "gencmn: unable to open input file %s\n", dataFile);
159 exit(U_FILE_ACCESS_ERROR);
160 }
161 }
162
163 if (verbose) {
164 if(sourceTOC) {
165 printf("generating %s_%s.c (table of contents source file)\n", name, type);
166 } else {
167 printf("generating %s.%s (common data file with table of contents)\n", name, type);
168 }
169 }
170
171 /* read the list of files and get their lengths */
4388f060
A
172 while((s != NULL && *s != 0) || (s=T_FileStream_readLine(in, (line=linePtr),
173 LINE_BUFFER_SIZE))!=NULL) {
174 /* remove trailing newline characters and parse space separated items */
175 if (s != NULL && *s != 0) {
176 line=s;
177 } else {
178 s=line;
179 }
729e4ab9 180 while(*s!=0) {
4388f060
A
181 if(*s==' ') {
182 *s=0;
183 ++s;
184 break;
185 } else if(*s=='\r' || *s=='\n') {
729e4ab9
A
186 *s=0;
187 break;
188 }
189 ++s;
190 }
191
192 /* check for comment */
193
194 if (*line == '#') {
195 continue;
196 }
197
198 /* add the file */
199#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
200 {
201 char *t;
202 while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) {
203 *t = U_FILE_SEP_CHAR;
204 }
205 }
206#endif
207 addFile(getLongPathname(line), name, source, sourceTOC, verbose);
208 }
209
51004dcb 210 uprv_free(linePtr);
4388f060 211
729e4ab9
A
212 if(in!=T_FileStream_stdin()) {
213 T_FileStream_close(in);
214 }
215
216 if(fileCount==0) {
217 fprintf(stderr, "gencmn: no files listed in %s\n", dataFile == NULL ? "<stdin>" : dataFile);
218 return;
219 }
220
221 /* sort the files by basename */
222 qsort(files, fileCount, sizeof(File), compareFiles);
223
224 if(!sourceTOC) {
225 UNewDataMemory *out;
226
227 /* determine the offsets of all basenames and files in this common one */
228 basenameOffset=4+8*fileCount;
229 fileOffset=(basenameOffset+(basenameTotal+15))&~0xf;
230 for(i=0; i<fileCount; ++i) {
231 files[i].fileOffset=fileOffset;
232 fileOffset+=(files[i].fileSize+15)&~0xf;
233 files[i].basenameOffset=basenameOffset;
234 basenameOffset+=files[i].basenameLength;
235 }
236
237 /* create the output file */
238 out=udata_create(destDir, type, name,
239 &dataInfo,
240 copyRight == NULL ? U_COPYRIGHT_STRING : copyRight,
241 &errorCode);
242 if(U_FAILURE(errorCode)) {
243 fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n",
244 destDir, name, type,
245 u_errorName(errorCode));
246 exit(errorCode);
247 }
248
249 /* write the table of contents */
250 udata_write32(out, fileCount);
251 for(i=0; i<fileCount; ++i) {
252 udata_write32(out, files[i].basenameOffset);
253 udata_write32(out, files[i].fileOffset);
254 }
255
256 /* write the basenames */
257 for(i=0; i<fileCount; ++i) {
258 udata_writeString(out, files[i].basename, files[i].basenameLength);
259 }
260 length=4+8*fileCount+basenameTotal;
261
262 /* copy the files */
263 for(i=0; i<fileCount; ++i) {
264 /* pad to 16-align the next file */
265 length&=0xf;
266 if(length!=0) {
267 udata_writePadding(out, 16-length);
268 }
269
270 if (verbose) {
271 printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
272 }
273
274 /* copy the next file */
275 file=T_FileStream_open(files[i].pathname, "rb");
276 if(file==NULL) {
277 fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname);
278 exit(U_FILE_ACCESS_ERROR);
279 }
280 for(nread = 0;;) {
281 length=T_FileStream_read(file, buffer, sizeof(buffer));
282 if(length <= 0) {
283 break;
284 }
285 nread += length;
286 udata_writeBlock(out, buffer, length);
287 }
288 T_FileStream_close(file);
289 length=files[i].fileSize;
290
291 if (nread != files[i].fileSize) {
292 fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname, (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
293 exit(U_FILE_ACCESS_ERROR);
294 }
295 }
296
297 /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */
298 length&=0xf;
299 if(length!=0) {
300 udata_writePadding(out, 16-length);
301 }
302
303 /* finish */
304 udata_finish(out, &errorCode);
305 if(U_FAILURE(errorCode)) {
306 fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode));
307 exit(errorCode);
308 }
309 } else {
310 /* write a .c source file with the table of contents */
311 char *filename;
312 FileStream *out;
313
314 /* create the output filename */
315 filename=s=buffer;
316 uprv_strcpy(filename, destDir);
317 s=filename+uprv_strlen(filename);
318 if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) {
319 *s++=U_FILE_SEP_CHAR;
320 }
321 uprv_strcpy(s, name);
322 if(*(type)!=0) {
323 s+=uprv_strlen(s);
324 *s++='_';
325 uprv_strcpy(s, type);
326 }
327 s+=uprv_strlen(s);
328 uprv_strcpy(s, ".c");
329
330 /* open the output file */
331 out=T_FileStream_open(filename, "w");
332 if (gencmnFileName != NULL) {
333 uprv_strcpy(gencmnFileName, filename);
334 }
335 if(out==NULL) {
336 fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename);
337 exit(U_FILE_ACCESS_ERROR);
338 }
339
340 /* write the source file */
341 sprintf(buffer,
342 "/*\n"
4388f060 343 " * ICU common data table of contents for %s.%s\n"
729e4ab9
A
344 " * Automatically generated by icu/source/tools/gencmn/gencmn .\n"
345 " */\n\n"
346 "#include \"unicode/utypes.h\"\n"
347 "#include \"unicode/udata.h\"\n"
348 "\n"
4388f060
A
349 "/* external symbol declarations for data (%d files) */\n",
350 name, type, fileCount);
729e4ab9
A
351 T_FileStream_writeLine(out, buffer);
352
353 sprintf(buffer, "extern const char\n %s%s[]", symPrefix?symPrefix:"", files[0].pathname);
354 T_FileStream_writeLine(out, buffer);
355 for(i=1; i<fileCount; ++i) {
356 sprintf(buffer, ",\n %s%s[]", symPrefix?symPrefix:"", files[i].pathname);
357 T_FileStream_writeLine(out, buffer);
358 }
359 T_FileStream_writeLine(out, ";\n\n");
360
361 sprintf(
362 buffer,
363 "U_EXPORT struct {\n"
364 " uint16_t headerSize;\n"
365 " uint8_t magic1, magic2;\n"
366 " UDataInfo info;\n"
367 " char padding[%lu];\n"
368 " uint32_t count, reserved;\n"
369 " struct {\n"
370 " const char *name;\n"
371 " const void *data;\n"
372 " } toc[%lu];\n"
373 "} U_EXPORT2 %s_dat = {\n"
374 " 32, 0xda, 0x27, {\n"
375 " %lu, 0,\n"
376 " %u, %u, %u, 0,\n"
377 " {0x54, 0x6f, 0x43, 0x50},\n"
378 " {1, 0, 0, 0},\n"
379 " {0, 0, 0, 0}\n"
380 " },\n"
381 " \"\", %lu, 0, {\n",
3d1f044b
A
382 static_cast<unsigned long>(32-4-sizeof(UDataInfo)),
383 static_cast<unsigned long>(fileCount),
729e4ab9 384 entrypointName,
3d1f044b 385 static_cast<unsigned long>(sizeof(UDataInfo)),
729e4ab9
A
386 U_IS_BIG_ENDIAN,
387 U_CHARSET_FAMILY,
388 U_SIZEOF_UCHAR,
3d1f044b 389 static_cast<unsigned long>(fileCount)
729e4ab9
A
390 );
391 T_FileStream_writeLine(out, buffer);
392
393 sprintf(buffer, " { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname);
394 T_FileStream_writeLine(out, buffer);
395 for(i=1; i<fileCount; ++i) {
396 sprintf(buffer, ",\n { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname);
397 T_FileStream_writeLine(out, buffer);
398 }
399
400 T_FileStream_writeLine(out, "\n }\n};\n");
401 T_FileStream_close(out);
402
403 uprv_free(symPrefix);
404 }
405}
406
407static void
408addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose) {
409 char *s;
410 uint32_t length;
411 char *fullPath = NULL;
412
413 if(fileCount==fileMax) {
414 fileMax += CHUNK_FILE_COUNT;
f3c0d7a5 415 files = (File *)uprv_realloc(files, fileMax*sizeof(files[0])); /* note: never freed. */
729e4ab9 416 if(files==NULL) {
4388f060 417 fprintf(stderr, "pkgdata/gencmn: Could not allocate %u bytes for %d files\n", (unsigned int)(fileMax*sizeof(files[0])), fileCount);
729e4ab9
A
418 exit(U_MEMORY_ALLOCATION_ERROR);
419 }
420 }
421
422 if(!sourceTOC) {
423 FileStream *file;
424
425 if(uprv_pathIsAbsolute(filename)) {
426 fprintf(stderr, "gencmn: Error: absolute path encountered. Old style paths are not supported. Use relative paths such as 'fur.res' or 'translit%cfur.res'.\n\tBad path: '%s'\n", U_FILE_SEP_CHAR, filename);
427 exit(U_ILLEGAL_ARGUMENT_ERROR);
428 }
429 fullPath = pathToFullPath(filename, source);
729e4ab9
A
430 /* store the pathname */
431 length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
432 s=allocString(length);
433 uprv_strcpy(s, name);
434 uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
435 uprv_strcat(s, filename);
436
437 /* get the basename */
438 fixDirToTreePath(s);
439 files[fileCount].basename=s;
440 files[fileCount].basenameLength=length;
441
442 files[fileCount].pathname=fullPath;
443
444 basenameTotal+=length;
445
446 /* try to open the file */
447 file=T_FileStream_open(fullPath, "rb");
448 if(file==NULL) {
449 fprintf(stderr, "gencmn: unable to open listed file %s\n", fullPath);
450 exit(U_FILE_ACCESS_ERROR);
451 }
452
453 /* get the file length */
454 length=T_FileStream_size(file);
455 if(T_FileStream_error(file) || length<=20) {
456 fprintf(stderr, "gencmn: unable to get length of listed file %s\n", fullPath);
457 exit(U_FILE_ACCESS_ERROR);
458 }
459
460 T_FileStream_close(file);
461
462 /* do not add files that are longer than maxSize */
463 if(maxSize && length>maxSize) {
464 if (verbose) {
465 printf("%s ignored (size %ld > %ld)\n", fullPath, (long)length, (long)maxSize);
466 }
467 return;
468 }
469 files[fileCount].fileSize=length;
470 } else {
471 char *t;
729e4ab9
A
472 /* get and store the basename */
473 /* need to include the package name */
474 length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
475 s=allocString(length);
476 uprv_strcpy(s, name);
477 uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
478 uprv_strcat(s, filename);
479 fixDirToTreePath(s);
480 files[fileCount].basename=s;
729e4ab9
A
481 /* turn the basename into an entry point name and store in the pathname field */
482 t=files[fileCount].pathname=allocString(length);
483 while(--length>0) {
484 if(*s=='.' || *s=='-' || *s=='/') {
485 *t='_';
486 } else {
487 *t=*s;
488 }
489 ++s;
490 ++t;
491 }
492 *t=0;
493 }
494 ++fileCount;
495}
496
497static char *
498allocString(uint32_t length) {
499 uint32_t top=stringTop+length;
500 char *p;
501
502 if(top>STRING_STORE_SIZE) {
503 fprintf(stderr, "gencmn: out of memory\n");
504 exit(U_MEMORY_ALLOCATION_ERROR);
505 }
506 p=stringStore+stringTop;
507 stringTop=top;
508 return p;
509}
510
511static char *
512pathToFullPath(const char *path, const char *source) {
513 int32_t length;
514 int32_t newLength;
515 char *fullPath;
516 int32_t n;
517
518 length = (uint32_t)(uprv_strlen(path) + 1);
519 newLength = (length + 1 + (int32_t)uprv_strlen(source));
f3c0d7a5 520 fullPath = (char *)uprv_malloc(newLength);
729e4ab9
A
521 if(source != NULL) {
522 uprv_strcpy(fullPath, source);
523 uprv_strcat(fullPath, U_FILE_SEP_STRING);
524 } else {
525 fullPath[0] = 0;
526 }
527 n = (int32_t)uprv_strlen(fullPath);
4388f060
A
528 fullPath[n] = 0; /* Suppress compiler warning for unused variable n */
529 /* when conditional code below is not compiled. */
729e4ab9
A
530 uprv_strcat(fullPath, path);
531
532#if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
533#if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR)
534 /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
535 for(;fullPath[n];n++) {
536 if(fullPath[n] == U_FILE_ALT_SEP_CHAR) {
537 fullPath[n] = U_FILE_SEP_CHAR;
538 }
539 }
540#endif
541#endif
542#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
543 /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
544 for(;fullPath[n];n++) {
545 if(fullPath[n] == U_TREE_ENTRY_SEP_CHAR) {
546 fullPath[n] = U_FILE_SEP_CHAR;
547 }
548 }
549#endif
550 return fullPath;
551}
552
f3c0d7a5 553U_CDECL_BEGIN
729e4ab9
A
554static int
555compareFiles(const void *file1, const void *file2) {
556 /* sort by basename */
557 return uprv_strcmp(((File *)file1)->basename, ((File *)file2)->basename);
558}
f3c0d7a5 559U_CDECL_END
729e4ab9
A
560
561static void
562fixDirToTreePath(char *s)
563{
f3c0d7a5 564 (void)s;
729e4ab9
A
565#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR))
566 char *t;
567#endif
568#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
569 for(t=s;t=uprv_strchr(t,U_FILE_SEP_CHAR);) {
570 *t = U_TREE_ENTRY_SEP_CHAR;
571 }
572#endif
573#if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
574 for(t=s;t=uprv_strchr(t,U_FILE_ALT_SEP_CHAR);) {
575 *t = U_TREE_ENTRY_SEP_CHAR;
576 }
577#endif
578}