]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/toolutil/pkg_gencmn.c
ICU-491.11.1.tar.gz
[apple/icu.git] / icuSources / tools / toolutil / pkg_gencmn.c
CommitLineData
729e4ab9 1/******************************************************************************
4388f060 2 * Copyright (C) 2008-2012, International Business Machines
729e4ab9
A
3 * Corporation and others. All Rights Reserved.
4 *******************************************************************************
5 */
6#include "unicode/utypes.h"
7
8#include <stdio.h>
9#include <stdlib.h>
10#include "unicode/utypes.h"
11#include "unicode/putil.h"
12#include "cmemory.h"
13#include "cstring.h"
14#include "filestrm.h"
15#include "toolutil.h"
16#include "unicode/uclean.h"
17#include "unewdata.h"
18#include "putilimp.h"
19#include "pkg_gencmn.h"
20
4388f060 21#define STRING_STORE_SIZE 200000
729e4ab9
A
22
23#define COMMON_DATA_NAME U_ICUDATA_NAME
24#define DATA_TYPE "dat"
25
26/* ICU package data file format (.dat files) ------------------------------- ***
27
28Description of the data format after the usual ICU data file header
29(UDataInfo etc.).
30
31Format version 1
32
33A .dat package file contains a simple Table of Contents of item names,
34followed by the items themselves:
35
361. ToC table
37
38uint32_t count; - number of items
39UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item:
40 uint32_t nameOffset; - offset of the item name
41 uint32_t dataOffset; - offset of the item data
42both are byte offsets from the beginning of the data
43
442. item name strings
45
46All item names are stored as char * strings in one block between the ToC table
47and the data items.
48
493. data items
50
51The data items are stored following the item names block.
52Each data item is 16-aligned.
53The data items are stored in the sorted order of their names.
54
55Therefore, the top of the name strings block is the offset of the first item,
56the length of the last item is the difference between its offset and
57the .dat file length, and the length of all previous items is the difference
58between its offset and the next one.
59
60----------------------------------------------------------------------------- */
61
62/* UDataInfo cf. udata.h */
63static const UDataInfo dataInfo={
64 sizeof(UDataInfo),
65 0,
66
67 U_IS_BIG_ENDIAN,
68 U_CHARSET_FAMILY,
69 sizeof(UChar),
70 0,
71
72 {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */
73 {1, 0, 0, 0}, /* formatVersion */
74 {3, 0, 0, 0} /* dataVersion */
75};
76
77static uint32_t maxSize;
78
79static char stringStore[STRING_STORE_SIZE];
80static uint32_t stringTop=0, basenameTotal=0;
81
82typedef struct {
83 char *pathname, *basename;
84 uint32_t basenameLength, basenameOffset, fileSize, fileOffset;
85} File;
86
87#define CHUNK_FILE_COUNT 256
88static File *files = NULL;
89static uint32_t fileCount=0;
90static uint32_t fileMax = 0;
91
92
93static char *symPrefix = NULL;
94
4388f060 95#define LINE_BUFFER_SIZE 512
729e4ab9
A
96/* prototypes --------------------------------------------------------------- */
97
98static void
99addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose);
100
101static char *
102allocString(uint32_t length);
103
104static int
105compareFiles(const void *file1, const void *file2);
106
107static char *
108pathToFullPath(const char *path, const char *source);
109
110/* map non-tree separator (such as '\') to tree separator ('/') inplace. */
111static void
112fixDirToTreePath(char *s);
113/* -------------------------------------------------------------------------- */
114
115U_CAPI void U_EXPORT2
116createCommonDataFile(const char *destDir, const char *name, const char *entrypointName, const char *type, const char *source, const char *copyRight,
117 const char *dataFile, uint32_t max_size, UBool sourceTOC, UBool verbose, char *gencmnFileName) {
118 static char buffer[4096];
4388f060
A
119 char *line;
120 char *linePtr;
121 char *s = NULL;
729e4ab9
A
122 UErrorCode errorCode=U_ZERO_ERROR;
123 uint32_t i, fileOffset, basenameOffset, length, nread;
124 FileStream *in, *file;
125
4388f060
A
126 line = (char *)uprv_malloc(sizeof(char) * LINE_BUFFER_SIZE);
127 if (line == NULL) {
128 fprintf(stderr, "gencmn: unable to allocate memory for line buffer of size %d\n", LINE_BUFFER_SIZE);
129 exit(U_MEMORY_ALLOCATION_ERROR);
130 }
131
132 linePtr = line;
133
729e4ab9
A
134 maxSize = max_size;
135
136 if (destDir == NULL) {
137 destDir = u_getDataDirectory();
138 }
139 if (name == NULL) {
140 name = COMMON_DATA_NAME;
141 }
142 if (type == NULL) {
143 type = DATA_TYPE;
144 }
145 if (source == NULL) {
146 source = ".";
147 }
148
149 if (dataFile == NULL) {
150 in = T_FileStream_stdin();
151 } else {
152 in = T_FileStream_open(dataFile, "r");
153 if(in == NULL) {
154 fprintf(stderr, "gencmn: unable to open input file %s\n", dataFile);
155 exit(U_FILE_ACCESS_ERROR);
156 }
157 }
158
159 if (verbose) {
160 if(sourceTOC) {
161 printf("generating %s_%s.c (table of contents source file)\n", name, type);
162 } else {
163 printf("generating %s.%s (common data file with table of contents)\n", name, type);
164 }
165 }
166
167 /* read the list of files and get their lengths */
4388f060
A
168 while((s != NULL && *s != 0) || (s=T_FileStream_readLine(in, (line=linePtr),
169 LINE_BUFFER_SIZE))!=NULL) {
170 /* remove trailing newline characters and parse space separated items */
171 if (s != NULL && *s != 0) {
172 line=s;
173 } else {
174 s=line;
175 }
729e4ab9 176 while(*s!=0) {
4388f060
A
177 if(*s==' ') {
178 *s=0;
179 ++s;
180 break;
181 } else if(*s=='\r' || *s=='\n') {
729e4ab9
A
182 *s=0;
183 break;
184 }
185 ++s;
186 }
187
188 /* check for comment */
189
190 if (*line == '#') {
191 continue;
192 }
193
194 /* add the file */
195#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
196 {
197 char *t;
198 while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) {
199 *t = U_FILE_SEP_CHAR;
200 }
201 }
202#endif
203 addFile(getLongPathname(line), name, source, sourceTOC, verbose);
204 }
205
4388f060
A
206 if (linePtr) {
207 uprv_free(linePtr);
208 }
209
729e4ab9
A
210 if(in!=T_FileStream_stdin()) {
211 T_FileStream_close(in);
212 }
213
214 if(fileCount==0) {
215 fprintf(stderr, "gencmn: no files listed in %s\n", dataFile == NULL ? "<stdin>" : dataFile);
216 return;
217 }
218
219 /* sort the files by basename */
220 qsort(files, fileCount, sizeof(File), compareFiles);
221
222 if(!sourceTOC) {
223 UNewDataMemory *out;
224
225 /* determine the offsets of all basenames and files in this common one */
226 basenameOffset=4+8*fileCount;
227 fileOffset=(basenameOffset+(basenameTotal+15))&~0xf;
228 for(i=0; i<fileCount; ++i) {
229 files[i].fileOffset=fileOffset;
230 fileOffset+=(files[i].fileSize+15)&~0xf;
231 files[i].basenameOffset=basenameOffset;
232 basenameOffset+=files[i].basenameLength;
233 }
234
235 /* create the output file */
236 out=udata_create(destDir, type, name,
237 &dataInfo,
238 copyRight == NULL ? U_COPYRIGHT_STRING : copyRight,
239 &errorCode);
240 if(U_FAILURE(errorCode)) {
241 fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n",
242 destDir, name, type,
243 u_errorName(errorCode));
244 exit(errorCode);
245 }
246
247 /* write the table of contents */
248 udata_write32(out, fileCount);
249 for(i=0; i<fileCount; ++i) {
250 udata_write32(out, files[i].basenameOffset);
251 udata_write32(out, files[i].fileOffset);
252 }
253
254 /* write the basenames */
255 for(i=0; i<fileCount; ++i) {
256 udata_writeString(out, files[i].basename, files[i].basenameLength);
257 }
258 length=4+8*fileCount+basenameTotal;
259
260 /* copy the files */
261 for(i=0; i<fileCount; ++i) {
262 /* pad to 16-align the next file */
263 length&=0xf;
264 if(length!=0) {
265 udata_writePadding(out, 16-length);
266 }
267
268 if (verbose) {
269 printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
270 }
271
272 /* copy the next file */
273 file=T_FileStream_open(files[i].pathname, "rb");
274 if(file==NULL) {
275 fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname);
276 exit(U_FILE_ACCESS_ERROR);
277 }
278 for(nread = 0;;) {
279 length=T_FileStream_read(file, buffer, sizeof(buffer));
280 if(length <= 0) {
281 break;
282 }
283 nread += length;
284 udata_writeBlock(out, buffer, length);
285 }
286 T_FileStream_close(file);
287 length=files[i].fileSize;
288
289 if (nread != files[i].fileSize) {
290 fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname, (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
291 exit(U_FILE_ACCESS_ERROR);
292 }
293 }
294
295 /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */
296 length&=0xf;
297 if(length!=0) {
298 udata_writePadding(out, 16-length);
299 }
300
301 /* finish */
302 udata_finish(out, &errorCode);
303 if(U_FAILURE(errorCode)) {
304 fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode));
305 exit(errorCode);
306 }
307 } else {
308 /* write a .c source file with the table of contents */
309 char *filename;
310 FileStream *out;
311
312 /* create the output filename */
313 filename=s=buffer;
314 uprv_strcpy(filename, destDir);
315 s=filename+uprv_strlen(filename);
316 if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) {
317 *s++=U_FILE_SEP_CHAR;
318 }
319 uprv_strcpy(s, name);
320 if(*(type)!=0) {
321 s+=uprv_strlen(s);
322 *s++='_';
323 uprv_strcpy(s, type);
324 }
325 s+=uprv_strlen(s);
326 uprv_strcpy(s, ".c");
327
328 /* open the output file */
329 out=T_FileStream_open(filename, "w");
330 if (gencmnFileName != NULL) {
331 uprv_strcpy(gencmnFileName, filename);
332 }
333 if(out==NULL) {
334 fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename);
335 exit(U_FILE_ACCESS_ERROR);
336 }
337
338 /* write the source file */
339 sprintf(buffer,
340 "/*\n"
4388f060 341 " * ICU common data table of contents for %s.%s\n"
729e4ab9
A
342 " * Automatically generated by icu/source/tools/gencmn/gencmn .\n"
343 " */\n\n"
344 "#include \"unicode/utypes.h\"\n"
345 "#include \"unicode/udata.h\"\n"
346 "\n"
4388f060
A
347 "/* external symbol declarations for data (%d files) */\n",
348 name, type, fileCount);
729e4ab9
A
349 T_FileStream_writeLine(out, buffer);
350
351 sprintf(buffer, "extern const char\n %s%s[]", symPrefix?symPrefix:"", files[0].pathname);
352 T_FileStream_writeLine(out, buffer);
353 for(i=1; i<fileCount; ++i) {
354 sprintf(buffer, ",\n %s%s[]", symPrefix?symPrefix:"", files[i].pathname);
355 T_FileStream_writeLine(out, buffer);
356 }
357 T_FileStream_writeLine(out, ";\n\n");
358
359 sprintf(
360 buffer,
361 "U_EXPORT struct {\n"
362 " uint16_t headerSize;\n"
363 " uint8_t magic1, magic2;\n"
364 " UDataInfo info;\n"
365 " char padding[%lu];\n"
366 " uint32_t count, reserved;\n"
367 " struct {\n"
368 " const char *name;\n"
369 " const void *data;\n"
370 " } toc[%lu];\n"
371 "} U_EXPORT2 %s_dat = {\n"
372 " 32, 0xda, 0x27, {\n"
373 " %lu, 0,\n"
374 " %u, %u, %u, 0,\n"
375 " {0x54, 0x6f, 0x43, 0x50},\n"
376 " {1, 0, 0, 0},\n"
377 " {0, 0, 0, 0}\n"
378 " },\n"
379 " \"\", %lu, 0, {\n",
380 (unsigned long)32-4-sizeof(UDataInfo),
381 (unsigned long)fileCount,
382 entrypointName,
383 (unsigned long)sizeof(UDataInfo),
384 U_IS_BIG_ENDIAN,
385 U_CHARSET_FAMILY,
386 U_SIZEOF_UCHAR,
387 (unsigned long)fileCount
388 );
389 T_FileStream_writeLine(out, buffer);
390
391 sprintf(buffer, " { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname);
392 T_FileStream_writeLine(out, buffer);
393 for(i=1; i<fileCount; ++i) {
394 sprintf(buffer, ",\n { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname);
395 T_FileStream_writeLine(out, buffer);
396 }
397
398 T_FileStream_writeLine(out, "\n }\n};\n");
399 T_FileStream_close(out);
400
401 uprv_free(symPrefix);
402 }
403}
404
405static void
406addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose) {
407 char *s;
408 uint32_t length;
409 char *fullPath = NULL;
410
411 if(fileCount==fileMax) {
412 fileMax += CHUNK_FILE_COUNT;
413 files = uprv_realloc(files, fileMax*sizeof(files[0])); /* note: never freed. */
414 if(files==NULL) {
4388f060 415 fprintf(stderr, "pkgdata/gencmn: Could not allocate %u bytes for %d files\n", (unsigned int)(fileMax*sizeof(files[0])), fileCount);
729e4ab9
A
416 exit(U_MEMORY_ALLOCATION_ERROR);
417 }
418 }
419
420 if(!sourceTOC) {
421 FileStream *file;
422
423 if(uprv_pathIsAbsolute(filename)) {
424 fprintf(stderr, "gencmn: Error: absolute path encountered. Old style paths are not supported. Use relative paths such as 'fur.res' or 'translit%cfur.res'.\n\tBad path: '%s'\n", U_FILE_SEP_CHAR, filename);
425 exit(U_ILLEGAL_ARGUMENT_ERROR);
426 }
427 fullPath = pathToFullPath(filename, source);
729e4ab9
A
428 /* store the pathname */
429 length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
430 s=allocString(length);
431 uprv_strcpy(s, name);
432 uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
433 uprv_strcat(s, filename);
434
435 /* get the basename */
436 fixDirToTreePath(s);
437 files[fileCount].basename=s;
438 files[fileCount].basenameLength=length;
439
440 files[fileCount].pathname=fullPath;
441
442 basenameTotal+=length;
443
444 /* try to open the file */
445 file=T_FileStream_open(fullPath, "rb");
446 if(file==NULL) {
447 fprintf(stderr, "gencmn: unable to open listed file %s\n", fullPath);
448 exit(U_FILE_ACCESS_ERROR);
449 }
450
451 /* get the file length */
452 length=T_FileStream_size(file);
453 if(T_FileStream_error(file) || length<=20) {
454 fprintf(stderr, "gencmn: unable to get length of listed file %s\n", fullPath);
455 exit(U_FILE_ACCESS_ERROR);
456 }
457
458 T_FileStream_close(file);
459
460 /* do not add files that are longer than maxSize */
461 if(maxSize && length>maxSize) {
462 if (verbose) {
463 printf("%s ignored (size %ld > %ld)\n", fullPath, (long)length, (long)maxSize);
464 }
465 return;
466 }
467 files[fileCount].fileSize=length;
468 } else {
469 char *t;
729e4ab9
A
470 /* get and store the basename */
471 /* need to include the package name */
472 length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
473 s=allocString(length);
474 uprv_strcpy(s, name);
475 uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
476 uprv_strcat(s, filename);
477 fixDirToTreePath(s);
478 files[fileCount].basename=s;
729e4ab9
A
479 /* turn the basename into an entry point name and store in the pathname field */
480 t=files[fileCount].pathname=allocString(length);
481 while(--length>0) {
482 if(*s=='.' || *s=='-' || *s=='/') {
483 *t='_';
484 } else {
485 *t=*s;
486 }
487 ++s;
488 ++t;
489 }
490 *t=0;
491 }
492 ++fileCount;
493}
494
495static char *
496allocString(uint32_t length) {
497 uint32_t top=stringTop+length;
498 char *p;
499
500 if(top>STRING_STORE_SIZE) {
501 fprintf(stderr, "gencmn: out of memory\n");
502 exit(U_MEMORY_ALLOCATION_ERROR);
503 }
504 p=stringStore+stringTop;
505 stringTop=top;
506 return p;
507}
508
509static char *
510pathToFullPath(const char *path, const char *source) {
511 int32_t length;
512 int32_t newLength;
513 char *fullPath;
514 int32_t n;
515
516 length = (uint32_t)(uprv_strlen(path) + 1);
517 newLength = (length + 1 + (int32_t)uprv_strlen(source));
518 fullPath = uprv_malloc(newLength);
519 if(source != NULL) {
520 uprv_strcpy(fullPath, source);
521 uprv_strcat(fullPath, U_FILE_SEP_STRING);
522 } else {
523 fullPath[0] = 0;
524 }
525 n = (int32_t)uprv_strlen(fullPath);
4388f060
A
526 fullPath[n] = 0; /* Suppress compiler warning for unused variable n */
527 /* when conditional code below is not compiled. */
729e4ab9
A
528 uprv_strcat(fullPath, path);
529
530#if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
531#if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR)
532 /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
533 for(;fullPath[n];n++) {
534 if(fullPath[n] == U_FILE_ALT_SEP_CHAR) {
535 fullPath[n] = U_FILE_SEP_CHAR;
536 }
537 }
538#endif
539#endif
540#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
541 /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
542 for(;fullPath[n];n++) {
543 if(fullPath[n] == U_TREE_ENTRY_SEP_CHAR) {
544 fullPath[n] = U_FILE_SEP_CHAR;
545 }
546 }
547#endif
548 return fullPath;
549}
550
551static int
552compareFiles(const void *file1, const void *file2) {
553 /* sort by basename */
554 return uprv_strcmp(((File *)file1)->basename, ((File *)file2)->basename);
555}
556
557static void
558fixDirToTreePath(char *s)
559{
560#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR))
561 char *t;
562#endif
563#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
564 for(t=s;t=uprv_strchr(t,U_FILE_SEP_CHAR);) {
565 *t = U_TREE_ENTRY_SEP_CHAR;
566 }
567#endif
568#if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
569 for(t=s;t=uprv_strchr(t,U_FILE_ALT_SEP_CHAR);) {
570 *t = U_TREE_ENTRY_SEP_CHAR;
571 }
572#endif
573}