]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/toolutil/package.cpp
ICU-57132.0.1.tar.gz
[apple/icu.git] / icuSources / tools / toolutil / package.cpp
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1999-2015, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: package.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2005aug25
14 * created by: Markus W. Scherer
15 *
16 * Read, modify, and write ICU .dat data package files.
17 * This is an integral part of the icupkg tool, moved to the toolutil library
18 * because parts of tool implementations tend to be later shared by
19 * other tools.
20 * Subsumes functionality and implementation code from
21 * gencmn, decmn, and icuswap tools.
22 */
23
24 #include "unicode/utypes.h"
25 #include "unicode/putil.h"
26 #include "unicode/udata.h"
27 #include "cstring.h"
28 #include "uarrsort.h"
29 #include "ucmndata.h"
30 #include "udataswp.h"
31 #include "swapimpl.h"
32 #include "toolutil.h"
33 #include "package.h"
34 #include "cmemory.h"
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39
40
41 static const int32_t kItemsChunk = 256; /* How much to increase the filesarray by each time */
42
43 // general definitions ----------------------------------------------------- ***
44
45 /* UDataInfo cf. udata.h */
46 static const UDataInfo dataInfo={
47 (uint16_t)sizeof(UDataInfo),
48 0,
49
50 U_IS_BIG_ENDIAN,
51 U_CHARSET_FAMILY,
52 (uint8_t)sizeof(UChar),
53 0,
54
55 {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */
56 {1, 0, 0, 0}, /* formatVersion */
57 {3, 0, 0, 0} /* dataVersion */
58 };
59
60 U_CDECL_BEGIN
61 static void U_CALLCONV
62 printPackageError(void *context, const char *fmt, va_list args) {
63 vfprintf((FILE *)context, fmt, args);
64 }
65 U_CDECL_END
66
67 static uint16_t
68 readSwapUInt16(uint16_t x) {
69 return (uint16_t)((x<<8)|(x>>8));
70 }
71
72 // platform types ---------------------------------------------------------- ***
73
74 static const char *types="lb?e";
75
76 enum { TYPE_L, TYPE_B, TYPE_LE, TYPE_E, TYPE_COUNT };
77
78 static inline int32_t
79 makeTypeEnum(uint8_t charset, UBool isBigEndian) {
80 return 2*(int32_t)charset+isBigEndian;
81 }
82
83 static inline int32_t
84 makeTypeEnum(char type) {
85 return
86 type == 'l' ? TYPE_L :
87 type == 'b' ? TYPE_B :
88 type == 'e' ? TYPE_E :
89 -1;
90 }
91
92 static inline char
93 makeTypeLetter(uint8_t charset, UBool isBigEndian) {
94 return types[makeTypeEnum(charset, isBigEndian)];
95 }
96
97 static inline char
98 makeTypeLetter(int32_t typeEnum) {
99 return types[typeEnum];
100 }
101
102 static void
103 makeTypeProps(char type, uint8_t &charset, UBool &isBigEndian) {
104 int32_t typeEnum=makeTypeEnum(type);
105 charset=(uint8_t)(typeEnum>>1);
106 isBigEndian=(UBool)(typeEnum&1);
107 }
108
109 U_CFUNC const UDataInfo *
110 getDataInfo(const uint8_t *data, int32_t length,
111 int32_t &infoLength, int32_t &headerLength,
112 UErrorCode *pErrorCode) {
113 const DataHeader *pHeader;
114 const UDataInfo *pInfo;
115
116 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
117 return NULL;
118 }
119 if( data==NULL ||
120 (length>=0 && length<(int32_t)sizeof(DataHeader))
121 ) {
122 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
123 return NULL;
124 }
125
126 pHeader=(const DataHeader *)data;
127 pInfo=&pHeader->info;
128 if( (length>=0 && length<(int32_t)sizeof(DataHeader)) ||
129 pHeader->dataHeader.magic1!=0xda ||
130 pHeader->dataHeader.magic2!=0x27 ||
131 pInfo->sizeofUChar!=2
132 ) {
133 *pErrorCode=U_UNSUPPORTED_ERROR;
134 return NULL;
135 }
136
137 if(pInfo->isBigEndian==U_IS_BIG_ENDIAN) {
138 headerLength=pHeader->dataHeader.headerSize;
139 infoLength=pInfo->size;
140 } else {
141 headerLength=readSwapUInt16(pHeader->dataHeader.headerSize);
142 infoLength=readSwapUInt16(pInfo->size);
143 }
144
145 if( headerLength<(int32_t)sizeof(DataHeader) ||
146 infoLength<(int32_t)sizeof(UDataInfo) ||
147 headerLength<(int32_t)(sizeof(pHeader->dataHeader)+infoLength) ||
148 (length>=0 && length<headerLength)
149 ) {
150 *pErrorCode=U_UNSUPPORTED_ERROR;
151 return NULL;
152 }
153
154 return pInfo;
155 }
156
157 static int32_t
158 getTypeEnumForInputData(const uint8_t *data, int32_t length,
159 UErrorCode *pErrorCode) {
160 const UDataInfo *pInfo;
161 int32_t infoLength, headerLength;
162
163 /* getDataInfo() checks for illegal arguments */
164 pInfo=getDataInfo(data, length, infoLength, headerLength, pErrorCode);
165 if(pInfo==NULL) {
166 return -1;
167 }
168
169 return makeTypeEnum(pInfo->charsetFamily, (UBool)pInfo->isBigEndian);
170 }
171
172 // file handling ----------------------------------------------------------- ***
173
174 static void
175 extractPackageName(const char *filename,
176 char pkg[], int32_t capacity) {
177 const char *basename;
178 int32_t len;
179
180 basename=findBasename(filename);
181 len=(int32_t)strlen(basename)-4; /* -4: subtract the length of ".dat" */
182
183 if(len<=0 || 0!=strcmp(basename+len, ".dat")) {
184 fprintf(stderr, "icupkg: \"%s\" is not recognized as a package filename (must end with .dat)\n",
185 basename);
186 exit(U_ILLEGAL_ARGUMENT_ERROR);
187 }
188
189 if(len>=capacity) {
190 fprintf(stderr, "icupkg: the package name \"%s\" is too long (>=%ld)\n",
191 basename, (long)capacity);
192 exit(U_ILLEGAL_ARGUMENT_ERROR);
193 }
194
195 memcpy(pkg, basename, len);
196 pkg[len]=0;
197 }
198
199 static int32_t
200 getFileLength(FILE *f) {
201 int32_t length;
202
203 fseek(f, 0, SEEK_END);
204 length=(int32_t)ftell(f);
205 fseek(f, 0, SEEK_SET);
206 return length;
207 }
208
209 /*
210 * Turn tree separators and alternate file separators into normal file separators.
211 */
212 #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR
213 #define treeToPath(s)
214 #else
215 static void
216 treeToPath(char *s) {
217 char *t;
218
219 for(t=s; *t!=0; ++t) {
220 if(*t==U_TREE_ENTRY_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) {
221 *t=U_FILE_SEP_CHAR;
222 }
223 }
224 }
225 #endif
226
227 /*
228 * Turn file separators into tree separators.
229 */
230 #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR
231 #define pathToTree(s)
232 #else
233 static void
234 pathToTree(char *s) {
235 char *t;
236
237 for(t=s; *t!=0; ++t) {
238 if(*t==U_FILE_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) {
239 *t=U_TREE_ENTRY_SEP_CHAR;
240 }
241 }
242 }
243 #endif
244
245 /*
246 * Prepend the path (if any) to the name and run the name through treeToName().
247 */
248 static void
249 makeFullFilename(const char *path, const char *name,
250 char *filename, int32_t capacity) {
251 char *s;
252
253 // prepend the path unless NULL or empty
254 if(path!=NULL && path[0]!=0) {
255 if((int32_t)(strlen(path)+1)>=capacity) {
256 fprintf(stderr, "pathname too long: \"%s\"\n", path);
257 exit(U_BUFFER_OVERFLOW_ERROR);
258 }
259 strcpy(filename, path);
260
261 // make sure the path ends with a file separator
262 s=strchr(filename, 0);
263 if(*(s-1)!=U_FILE_SEP_CHAR && *(s-1)!=U_FILE_ALT_SEP_CHAR) {
264 *s++=U_FILE_SEP_CHAR;
265 }
266 } else {
267 s=filename;
268 }
269
270 // turn the name into a filename, turn tree separators into file separators
271 if((int32_t)((s-filename)+strlen(name))>=capacity) {
272 fprintf(stderr, "path/filename too long: \"%s%s\"\n", filename, name);
273 exit(U_BUFFER_OVERFLOW_ERROR);
274 }
275 strcpy(s, name);
276 treeToPath(s);
277 }
278
279 static void
280 makeFullFilenameAndDirs(const char *path, const char *name,
281 char *filename, int32_t capacity) {
282 char *sep;
283 UErrorCode errorCode;
284
285 makeFullFilename(path, name, filename, capacity);
286
287 // make tree directories
288 errorCode=U_ZERO_ERROR;
289 sep=strchr(filename, 0)-strlen(name);
290 while((sep=strchr(sep, U_FILE_SEP_CHAR))!=NULL) {
291 if(sep!=filename) {
292 *sep=0; // truncate temporarily
293 uprv_mkdir(filename, &errorCode);
294 if(U_FAILURE(errorCode)) {
295 fprintf(stderr, "icupkg: unable to create tree directory \"%s\"\n", filename);
296 exit(U_FILE_ACCESS_ERROR);
297 }
298 }
299 *sep++=U_FILE_SEP_CHAR; // restore file separator character
300 }
301 }
302
303 static uint8_t *
304 readFile(const char *path, const char *name, int32_t &length, char &type) {
305 char filename[1024];
306 FILE *file;
307 UErrorCode errorCode;
308 int32_t fileLength, typeEnum;
309
310 makeFullFilename(path, name, filename, (int32_t)sizeof(filename));
311
312 /* open the input file, get its length, allocate memory for it, read the file */
313 file=fopen(filename, "rb");
314 if(file==NULL) {
315 fprintf(stderr, "icupkg: unable to open input file \"%s\"\n", filename);
316 exit(U_FILE_ACCESS_ERROR);
317 }
318
319 /* get the file length */
320 fileLength=getFileLength(file);
321 if(ferror(file) || fileLength<=0) {
322 fprintf(stderr, "icupkg: empty input file \"%s\"\n", filename);
323 fclose(file);
324 exit(U_FILE_ACCESS_ERROR);
325 }
326
327 /* allocate the buffer, pad to multiple of 16 */
328 length=(fileLength+0xf)&~0xf;
329 icu::LocalMemory<uint8_t> data((uint8_t *)uprv_malloc(length));
330 if(data.isNull()) {
331 fclose(file);
332 fprintf(stderr, "icupkg: malloc error allocating %d bytes.\n", (int)length);
333 exit(U_MEMORY_ALLOCATION_ERROR);
334 }
335
336 /* read the file */
337 if(fileLength!=(int32_t)fread(data.getAlias(), 1, fileLength, file)) {
338 fprintf(stderr, "icupkg: error reading \"%s\"\n", filename);
339 fclose(file);
340 exit(U_FILE_ACCESS_ERROR);
341 }
342
343 /* pad the file to a multiple of 16 using the usual padding byte */
344 if(fileLength<length) {
345 memset(data.getAlias()+fileLength, 0xaa, length-fileLength);
346 }
347
348 fclose(file);
349
350 // minimum check for ICU-format data
351 errorCode=U_ZERO_ERROR;
352 typeEnum=getTypeEnumForInputData(data.getAlias(), length, &errorCode);
353 if(typeEnum<0 || U_FAILURE(errorCode)) {
354 fprintf(stderr, "icupkg: not an ICU data file: \"%s\"\n", filename);
355 #if !UCONFIG_NO_LEGACY_CONVERSION
356 exit(U_INVALID_FORMAT_ERROR);
357 #else
358 fprintf(stderr, "U_INVALID_FORMAT_ERROR occurred but UCONFIG_NO_LEGACY_CONVERSION is on so this is expected.\n");
359 exit(0);
360 #endif
361 }
362 type=makeTypeLetter(typeEnum);
363
364 return data.orphan();
365 }
366
367 // .dat package file representation ---------------------------------------- ***
368
369 U_CDECL_BEGIN
370
371 static int32_t U_CALLCONV
372 compareItems(const void * /*context*/, const void *left, const void *right) {
373 U_NAMESPACE_USE
374
375 return (int32_t)strcmp(((Item *)left)->name, ((Item *)right)->name);
376 }
377
378 U_CDECL_END
379
380 U_NAMESPACE_BEGIN
381
382 Package::Package()
383 : doAutoPrefix(FALSE), prefixEndsWithType(FALSE) {
384 inPkgName[0]=0;
385 pkgPrefix[0]=0;
386 inData=NULL;
387 inLength=0;
388 inCharset=U_CHARSET_FAMILY;
389 inIsBigEndian=U_IS_BIG_ENDIAN;
390
391 itemCount=0;
392 itemMax=0;
393 items=NULL;
394
395 inStringTop=outStringTop=0;
396
397 matchMode=0;
398 findPrefix=findSuffix=NULL;
399 findPrefixLength=findSuffixLength=0;
400 findNextIndex=-1;
401
402 // create a header for an empty package
403 DataHeader *pHeader;
404 pHeader=(DataHeader *)header;
405 pHeader->dataHeader.magic1=0xda;
406 pHeader->dataHeader.magic2=0x27;
407 memcpy(&pHeader->info, &dataInfo, sizeof(dataInfo));
408 headerLength=(int32_t)(4+sizeof(dataInfo));
409 if(headerLength&0xf) {
410 /* NUL-pad the header to a multiple of 16 */
411 int32_t length=(headerLength+0xf)&~0xf;
412 memset(header+headerLength, 0, length-headerLength);
413 headerLength=length;
414 }
415 pHeader->dataHeader.headerSize=(uint16_t)headerLength;
416 }
417
418 Package::~Package() {
419 int32_t idx;
420
421 uprv_free(inData);
422
423 for(idx=0; idx<itemCount; ++idx) {
424 if(items[idx].isDataOwned) {
425 uprv_free(items[idx].data);
426 }
427 }
428
429 uprv_free((void*)items);
430 }
431
432 void
433 Package::setPrefix(const char *p) {
434 if(strlen(p)>=sizeof(pkgPrefix)) {
435 fprintf(stderr, "icupkg: --toc_prefix %s too long\n", p);
436 exit(U_ILLEGAL_ARGUMENT_ERROR);
437 }
438 strcpy(pkgPrefix, p);
439 }
440
441 void
442 Package::readPackage(const char *filename) {
443 UDataSwapper *ds;
444 const UDataInfo *pInfo;
445 UErrorCode errorCode;
446
447 const uint8_t *inBytes;
448
449 int32_t length, offset, i;
450 int32_t itemLength, typeEnum;
451 char type;
452
453 const UDataOffsetTOCEntry *inEntries;
454
455 extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName));
456
457 /* read the file */
458 inData=readFile(NULL, filename, inLength, type);
459 length=inLength;
460
461 /*
462 * swap the header - even if the swapping itself is a no-op
463 * because it tells us the header length
464 */
465 errorCode=U_ZERO_ERROR;
466 makeTypeProps(type, inCharset, inIsBigEndian);
467 ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
468 if(U_FAILURE(errorCode)) {
469 fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
470 filename, u_errorName(errorCode));
471 exit(errorCode);
472 }
473
474 ds->printError=printPackageError;
475 ds->printErrorContext=stderr;
476
477 headerLength=sizeof(header);
478 if(length<headerLength) {
479 headerLength=length;
480 }
481 headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode);
482 if(U_FAILURE(errorCode)) {
483 exit(errorCode);
484 }
485
486 /* check data format and format version */
487 pInfo=(const UDataInfo *)((const char *)inData+4);
488 if(!(
489 pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */
490 pInfo->dataFormat[1]==0x6d &&
491 pInfo->dataFormat[2]==0x6e &&
492 pInfo->dataFormat[3]==0x44 &&
493 pInfo->formatVersion[0]==1
494 )) {
495 fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n",
496 pInfo->dataFormat[0], pInfo->dataFormat[1],
497 pInfo->dataFormat[2], pInfo->dataFormat[3],
498 pInfo->formatVersion[0]);
499 exit(U_UNSUPPORTED_ERROR);
500 }
501 inIsBigEndian=(UBool)pInfo->isBigEndian;
502 inCharset=pInfo->charsetFamily;
503
504 inBytes=(const uint8_t *)inData+headerLength;
505 inEntries=(const UDataOffsetTOCEntry *)(inBytes+4);
506
507 /* check that the itemCount fits, then the ToC table, then at least the header of the last item */
508 length-=headerLength;
509 if(length<4) {
510 /* itemCount does not fit */
511 offset=0x7fffffff;
512 } else {
513 itemCount=udata_readInt32(ds, *(const int32_t *)inBytes);
514 setItemCapacity(itemCount); /* resize so there's space */
515 if(itemCount==0) {
516 offset=4;
517 } else if(length<(4+8*itemCount)) {
518 /* ToC table does not fit */
519 offset=0x7fffffff;
520 } else {
521 /* offset of the last item plus at least 20 bytes for its header */
522 offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset);
523 }
524 }
525 if(length<offset) {
526 fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n",
527 (long)length);
528 exit(U_INDEX_OUTOFBOUNDS_ERROR);
529 }
530 /* do not modify the package length variable until the last item's length is set */
531
532 if(itemCount<=0) {
533 if(doAutoPrefix) {
534 fprintf(stderr, "icupkg: --auto_toc_prefix[_with_type] but the input package is empty\n");
535 exit(U_INVALID_FORMAT_ERROR);
536 }
537 } else {
538 char prefix[MAX_PKG_NAME_LENGTH+4];
539 char *s, *inItemStrings;
540
541 if(itemCount>itemMax) {
542 fprintf(stderr, "icupkg: too many items, maximum is %d\n", itemMax);
543 exit(U_BUFFER_OVERFLOW_ERROR);
544 }
545
546 /* swap the item name strings */
547 int32_t stringsOffset=4+8*itemCount;
548 itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset;
549
550 // don't include padding bytes at the end of the item names
551 while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) {
552 --itemLength;
553 }
554
555 if((inStringTop+itemLength)>STRING_STORE_SIZE) {
556 fprintf(stderr, "icupkg: total length of item name strings too long\n");
557 exit(U_BUFFER_OVERFLOW_ERROR);
558 }
559
560 inItemStrings=inStrings+inStringTop;
561 ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode);
562 if(U_FAILURE(errorCode)) {
563 fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n");
564 exit(U_INVALID_FORMAT_ERROR);
565 }
566 inStringTop+=itemLength;
567
568 // reset the Item entries
569 memset(items, 0, itemCount*sizeof(Item));
570
571 /*
572 * Get the common prefix of the items.
573 * New-style ICU .dat packages use tree separators ('/') between package names,
574 * tree names, and item names,
575 * while old-style ICU .dat packages (before multi-tree support)
576 * use an underscore ('_') between package and item names.
577 */
578 offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset;
579 s=inItemStrings+offset; // name of the first entry
580 int32_t prefixLength;
581 if(doAutoPrefix) {
582 // Use the first entry's prefix. Must be a new-style package.
583 const char *prefixLimit=strchr(s, U_TREE_ENTRY_SEP_CHAR);
584 if(prefixLimit==NULL) {
585 fprintf(stderr,
586 "icupkg: --auto_toc_prefix[_with_type] but "
587 "the first entry \"%s\" does not contain a '%c'\n",
588 s, U_TREE_ENTRY_SEP_CHAR);
589 exit(U_INVALID_FORMAT_ERROR);
590 }
591 prefixLength=(int32_t)(prefixLimit-s);
592 if(prefixLength==0 || prefixLength>=UPRV_LENGTHOF(pkgPrefix)) {
593 fprintf(stderr,
594 "icupkg: --auto_toc_prefix[_with_type] but "
595 "the prefix of the first entry \"%s\" is empty or too long\n",
596 s);
597 exit(U_INVALID_FORMAT_ERROR);
598 }
599 if(prefixEndsWithType && s[prefixLength-1]!=type) {
600 fprintf(stderr,
601 "icupkg: --auto_toc_prefix_with_type but "
602 "the prefix of the first entry \"%s\" does not end with '%c'\n",
603 s, type);
604 exit(U_INVALID_FORMAT_ERROR);
605 }
606 memcpy(pkgPrefix, s, prefixLength);
607 pkgPrefix[prefixLength]=0;
608 memcpy(prefix, s, ++prefixLength); // include the /
609 } else {
610 // Use the package basename as prefix.
611 int32_t inPkgNameLength=strlen(inPkgName);
612 memcpy(prefix, inPkgName, inPkgNameLength);
613 prefixLength=inPkgNameLength;
614
615 if( (int32_t)strlen(s)>=(inPkgNameLength+2) &&
616 0==memcmp(s, inPkgName, inPkgNameLength) &&
617 s[inPkgNameLength]=='_'
618 ) {
619 // old-style .dat package
620 prefix[prefixLength++]='_';
621 } else {
622 // new-style .dat package
623 prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR;
624 // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR
625 // then the test in the loop below will fail
626 }
627 }
628 prefix[prefixLength]=0;
629
630 /* read the ToC table */
631 for(i=0; i<itemCount; ++i) {
632 // skip the package part of the item name, error if it does not match the actual package name
633 // or if nothing follows the package name
634 offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset;
635 s=inItemStrings+offset;
636 if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) {
637 fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n",
638 s, prefix);
639 exit(U_INVALID_FORMAT_ERROR);
640 }
641 items[i].name=s+prefixLength;
642
643 // set the item's data
644 items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset);
645 if(i>0) {
646 items[i-1].length=(int32_t)(items[i].data-items[i-1].data);
647
648 // set the previous item's platform type
649 typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode);
650 if(typeEnum<0 || U_FAILURE(errorCode)) {
651 fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename);
652 exit(U_INVALID_FORMAT_ERROR);
653 }
654 items[i-1].type=makeTypeLetter(typeEnum);
655 }
656 items[i].isDataOwned=FALSE;
657 }
658 // set the last item's length
659 items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset);
660
661 // set the last item's platform type
662 typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode);
663 if(typeEnum<0 || U_FAILURE(errorCode)) {
664 fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename);
665 exit(U_INVALID_FORMAT_ERROR);
666 }
667 items[itemCount-1].type=makeTypeLetter(typeEnum);
668
669 if(type!=U_ICUDATA_TYPE_LETTER[0]) {
670 // sort the item names for the local charset
671 sortItems();
672 }
673 }
674
675 udata_closeSwapper(ds);
676 }
677
678 char
679 Package::getInType() {
680 return makeTypeLetter(inCharset, inIsBigEndian);
681 }
682
683 void
684 Package::writePackage(const char *filename, char outType, const char *comment) {
685 char prefix[MAX_PKG_NAME_LENGTH+4];
686 UDataOffsetTOCEntry entry;
687 UDataSwapper *dsLocalToOut, *ds[TYPE_COUNT];
688 FILE *file;
689 Item *pItem;
690 char *name;
691 UErrorCode errorCode;
692 int32_t i, length, prefixLength, maxItemLength, basenameOffset, offset, outInt32;
693 uint8_t outCharset;
694 UBool outIsBigEndian;
695
696 extractPackageName(filename, prefix, MAX_PKG_NAME_LENGTH);
697
698 // if there is an explicit comment, then use it, else use what's in the current header
699 if(comment!=NULL) {
700 /* get the header size minus the current comment */
701 DataHeader *pHeader;
702 int32_t length;
703
704 pHeader=(DataHeader *)header;
705 headerLength=4+pHeader->info.size;
706 length=(int32_t)strlen(comment);
707 if((int32_t)(headerLength+length)>=(int32_t)sizeof(header)) {
708 fprintf(stderr, "icupkg: comment too long\n");
709 exit(U_BUFFER_OVERFLOW_ERROR);
710 }
711 memcpy(header+headerLength, comment, length+1);
712 headerLength+=length;
713 if(headerLength&0xf) {
714 /* NUL-pad the header to a multiple of 16 */
715 length=(headerLength+0xf)&~0xf;
716 memset(header+headerLength, 0, length-headerLength);
717 headerLength=length;
718 }
719 pHeader->dataHeader.headerSize=(uint16_t)headerLength;
720 }
721
722 makeTypeProps(outType, outCharset, outIsBigEndian);
723
724 // open (TYPE_COUNT-2) swappers
725 // one is a no-op for local type==outType
726 // one type (TYPE_LE) is bogus
727 errorCode=U_ZERO_ERROR;
728 i=makeTypeEnum(outType);
729 ds[TYPE_B]= i==TYPE_B ? NULL : udata_openSwapper(TRUE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode);
730 ds[TYPE_L]= i==TYPE_L ? NULL : udata_openSwapper(FALSE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode);
731 ds[TYPE_LE]=NULL;
732 ds[TYPE_E]= i==TYPE_E ? NULL : udata_openSwapper(TRUE, U_EBCDIC_FAMILY, outIsBigEndian, outCharset, &errorCode);
733 if(U_FAILURE(errorCode)) {
734 fprintf(stderr, "icupkg: udata_openSwapper() failed - %s\n", u_errorName(errorCode));
735 exit(errorCode);
736 }
737 for(i=0; i<TYPE_COUNT; ++i) {
738 if(ds[i]!=NULL) {
739 ds[i]->printError=printPackageError;
740 ds[i]->printErrorContext=stderr;
741 }
742 }
743
744 dsLocalToOut=ds[makeTypeEnum(U_CHARSET_FAMILY, U_IS_BIG_ENDIAN)];
745
746 // create the file and write its contents
747 file=fopen(filename, "wb");
748 if(file==NULL) {
749 fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename);
750 exit(U_FILE_ACCESS_ERROR);
751 }
752
753 // swap and write the header
754 if(dsLocalToOut!=NULL) {
755 udata_swapDataHeader(dsLocalToOut, header, headerLength, header, &errorCode);
756 if(U_FAILURE(errorCode)) {
757 fprintf(stderr, "icupkg: udata_swapDataHeader(local to out) failed - %s\n", u_errorName(errorCode));
758 exit(errorCode);
759 }
760 }
761 length=(int32_t)fwrite(header, 1, headerLength, file);
762 if(length!=headerLength) {
763 fprintf(stderr, "icupkg: unable to write complete header to file \"%s\"\n", filename);
764 exit(U_FILE_ACCESS_ERROR);
765 }
766
767 // prepare and swap the package name with a tree separator
768 // for prepending to item names
769 if(pkgPrefix[0]==0) {
770 prefixLength=(int32_t)strlen(prefix);
771 } else {
772 prefixLength=(int32_t)strlen(pkgPrefix);
773 memcpy(prefix, pkgPrefix, prefixLength);
774 if(prefixEndsWithType) {
775 prefix[prefixLength-1]=outType;
776 }
777 }
778 prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR;
779 prefix[prefixLength]=0;
780 if(dsLocalToOut!=NULL) {
781 dsLocalToOut->swapInvChars(dsLocalToOut, prefix, prefixLength, prefix, &errorCode);
782 if(U_FAILURE(errorCode)) {
783 fprintf(stderr, "icupkg: swapInvChars(output package name) failed - %s\n", u_errorName(errorCode));
784 exit(errorCode);
785 }
786
787 // swap and sort the item names (sorting needs to be done in the output charset)
788 dsLocalToOut->swapInvChars(dsLocalToOut, inStrings, inStringTop, inStrings, &errorCode);
789 if(U_FAILURE(errorCode)) {
790 fprintf(stderr, "icupkg: swapInvChars(item names) failed - %s\n", u_errorName(errorCode));
791 exit(errorCode);
792 }
793 sortItems();
794 }
795
796 // create the output item names in sorted order, with the package name prepended to each
797 for(i=0; i<itemCount; ++i) {
798 length=(int32_t)strlen(items[i].name);
799 name=allocString(FALSE, length+prefixLength);
800 memcpy(name, prefix, prefixLength);
801 memcpy(name+prefixLength, items[i].name, length+1);
802 items[i].name=name;
803 }
804
805 // calculate offsets for item names and items, pad to 16-align items
806 // align only the first item; each item's length is a multiple of 16
807 basenameOffset=4+8*itemCount;
808 offset=basenameOffset+outStringTop;
809 if((length=(offset&15))!=0) {
810 length=16-length;
811 memset(allocString(FALSE, length-1), 0xaa, length);
812 offset+=length;
813 }
814
815 // write the table of contents
816 // first the itemCount
817 outInt32=itemCount;
818 if(dsLocalToOut!=NULL) {
819 dsLocalToOut->swapArray32(dsLocalToOut, &outInt32, 4, &outInt32, &errorCode);
820 if(U_FAILURE(errorCode)) {
821 fprintf(stderr, "icupkg: swapArray32(item count) failed - %s\n", u_errorName(errorCode));
822 exit(errorCode);
823 }
824 }
825 length=(int32_t)fwrite(&outInt32, 1, 4, file);
826 if(length!=4) {
827 fprintf(stderr, "icupkg: unable to write complete item count to file \"%s\"\n", filename);
828 exit(U_FILE_ACCESS_ERROR);
829 }
830
831 // then write the item entries (and collect the maxItemLength)
832 maxItemLength=0;
833 for(i=0; i<itemCount; ++i) {
834 entry.nameOffset=(uint32_t)(basenameOffset+(items[i].name-outStrings));
835 entry.dataOffset=(uint32_t)offset;
836 if(dsLocalToOut!=NULL) {
837 dsLocalToOut->swapArray32(dsLocalToOut, &entry, 8, &entry, &errorCode);
838 if(U_FAILURE(errorCode)) {
839 fprintf(stderr, "icupkg: swapArray32(item entry %ld) failed - %s\n", (long)i, u_errorName(errorCode));
840 exit(errorCode);
841 }
842 }
843 length=(int32_t)fwrite(&entry, 1, 8, file);
844 if(length!=8) {
845 fprintf(stderr, "icupkg: unable to write complete item entry %ld to file \"%s\"\n", (long)i, filename);
846 exit(U_FILE_ACCESS_ERROR);
847 }
848
849 length=items[i].length;
850 if(length>maxItemLength) {
851 maxItemLength=length;
852 }
853 offset+=length;
854 }
855
856 // write the item names
857 length=(int32_t)fwrite(outStrings, 1, outStringTop, file);
858 if(length!=outStringTop) {
859 fprintf(stderr, "icupkg: unable to write complete item names to file \"%s\"\n", filename);
860 exit(U_FILE_ACCESS_ERROR);
861 }
862
863 // write the items
864 for(pItem=items, i=0; i<itemCount; ++pItem, ++i) {
865 int32_t type=makeTypeEnum(pItem->type);
866 if(ds[type]!=NULL) {
867 // swap each item from its platform properties to the desired ones
868 udata_swap(
869 ds[type],
870 pItem->data, pItem->length, pItem->data,
871 &errorCode);
872 if(U_FAILURE(errorCode)) {
873 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)i, u_errorName(errorCode));
874 exit(errorCode);
875 }
876 }
877 length=(int32_t)fwrite(pItem->data, 1, pItem->length, file);
878 if(length!=pItem->length) {
879 fprintf(stderr, "icupkg: unable to write complete item %ld to file \"%s\"\n", (long)i, filename);
880 exit(U_FILE_ACCESS_ERROR);
881 }
882 }
883
884 if(ferror(file)) {
885 fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename);
886 exit(U_FILE_ACCESS_ERROR);
887 }
888
889 fclose(file);
890 for(i=0; i<TYPE_COUNT; ++i) {
891 udata_closeSwapper(ds[i]);
892 }
893 }
894
895 int32_t
896 Package::findItem(const char *name, int32_t length) const {
897 int32_t i, start, limit;
898 int result;
899
900 /* do a binary search for the string */
901 start=0;
902 limit=itemCount;
903 while(start<limit) {
904 i=(start+limit)/2;
905 if(length>=0) {
906 result=strncmp(name, items[i].name, length);
907 } else {
908 result=strcmp(name, items[i].name);
909 }
910
911 if(result==0) {
912 /* found */
913 if(length>=0) {
914 /*
915 * if we compared just prefixes, then we may need to back up
916 * to the first item with this prefix
917 */
918 while(i>0 && 0==strncmp(name, items[i-1].name, length)) {
919 --i;
920 }
921 }
922 return i;
923 } else if(result<0) {
924 limit=i;
925 } else /* result>0 */ {
926 start=i+1;
927 }
928 }
929
930 return ~start; /* not found, return binary-not of the insertion point */
931 }
932
933 void
934 Package::findItems(const char *pattern) {
935 const char *wild;
936
937 if(pattern==NULL || *pattern==0) {
938 findNextIndex=-1;
939 return;
940 }
941
942 findPrefix=pattern;
943 findSuffix=NULL;
944 findSuffixLength=0;
945
946 wild=strchr(pattern, '*');
947 if(wild==NULL) {
948 // no wildcard
949 findPrefixLength=(int32_t)strlen(pattern);
950 } else {
951 // one wildcard
952 findPrefixLength=(int32_t)(wild-pattern);
953 findSuffix=wild+1;
954 findSuffixLength=(int32_t)strlen(findSuffix);
955 if(NULL!=strchr(findSuffix, '*')) {
956 // two or more wildcards
957 fprintf(stderr, "icupkg: syntax error (more than one '*') in item pattern \"%s\"\n", pattern);
958 exit(U_PARSE_ERROR);
959 }
960 }
961
962 if(findPrefixLength==0) {
963 findNextIndex=0;
964 } else {
965 findNextIndex=findItem(findPrefix, findPrefixLength);
966 }
967 }
968
969 int32_t
970 Package::findNextItem() {
971 const char *name, *middle, *treeSep;
972 int32_t idx, nameLength, middleLength;
973
974 if(findNextIndex<0) {
975 return -1;
976 }
977
978 while(findNextIndex<itemCount) {
979 idx=findNextIndex++;
980 name=items[idx].name;
981 nameLength=(int32_t)strlen(name);
982 if(nameLength<(findPrefixLength+findSuffixLength)) {
983 // item name too short for prefix & suffix
984 continue;
985 }
986 if(findPrefixLength>0 && 0!=memcmp(findPrefix, name, findPrefixLength)) {
987 // left the range of names with this prefix
988 break;
989 }
990 middle=name+findPrefixLength;
991 middleLength=nameLength-findPrefixLength-findSuffixLength;
992 if(findSuffixLength>0 && 0!=memcmp(findSuffix, name+(nameLength-findSuffixLength), findSuffixLength)) {
993 // suffix does not match
994 continue;
995 }
996 // prefix & suffix match
997
998 if(matchMode&MATCH_NOSLASH) {
999 treeSep=strchr(middle, U_TREE_ENTRY_SEP_CHAR);
1000 if(treeSep!=NULL && (treeSep-middle)<middleLength) {
1001 // the middle (matching the * wildcard) contains a tree separator /
1002 continue;
1003 }
1004 }
1005
1006 // found a matching item
1007 return idx;
1008 }
1009
1010 // no more items
1011 findNextIndex=-1;
1012 return -1;
1013 }
1014
1015 void
1016 Package::setMatchMode(uint32_t mode) {
1017 matchMode=mode;
1018 }
1019
1020 void
1021 Package::addItem(const char *name) {
1022 addItem(name, NULL, 0, FALSE, U_ICUDATA_TYPE_LETTER[0]);
1023 }
1024
1025 void
1026 Package::addItem(const char *name, uint8_t *data, int32_t length, UBool isDataOwned, char type) {
1027 int32_t idx;
1028
1029 idx=findItem(name);
1030 if(idx<0) {
1031 // new item, make space at the insertion point
1032 ensureItemCapacity();
1033 // move the following items down
1034 idx=~idx;
1035 if(idx<itemCount) {
1036 memmove(items+idx+1, items+idx, (itemCount-idx)*sizeof(Item));
1037 }
1038 ++itemCount;
1039
1040 // reset this Item entry
1041 memset(items+idx, 0, sizeof(Item));
1042
1043 // copy the item's name
1044 items[idx].name=allocString(TRUE, strlen(name));
1045 strcpy(items[idx].name, name);
1046 pathToTree(items[idx].name);
1047 } else {
1048 // same-name item found, replace it
1049 if(items[idx].isDataOwned) {
1050 uprv_free(items[idx].data);
1051 }
1052
1053 // keep the item's name since it is the same
1054 }
1055
1056 // set the item's data
1057 items[idx].data=data;
1058 items[idx].length=length;
1059 items[idx].isDataOwned=isDataOwned;
1060 items[idx].type=type;
1061 }
1062
1063 void
1064 Package::addFile(const char *filesPath, const char *name) {
1065 uint8_t *data;
1066 int32_t length;
1067 char type;
1068
1069 data=readFile(filesPath, name, length, type);
1070 // readFile() exits the tool if it fails
1071 addItem(name, data, length, TRUE, type);
1072 }
1073
1074 void
1075 Package::addItems(const Package &listPkg) {
1076 const Item *pItem;
1077 int32_t i;
1078
1079 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
1080 addItem(pItem->name, pItem->data, pItem->length, FALSE, pItem->type);
1081 }
1082 }
1083
1084 void
1085 Package::removeItem(int32_t idx) {
1086 if(idx>=0) {
1087 // remove the item
1088 if(items[idx].isDataOwned) {
1089 uprv_free(items[idx].data);
1090 }
1091
1092 // move the following items up
1093 if((idx+1)<itemCount) {
1094 memmove(items+idx, items+idx+1, (itemCount-(idx+1))*sizeof(Item));
1095 }
1096 --itemCount;
1097
1098 if(idx<=findNextIndex) {
1099 --findNextIndex;
1100 }
1101 }
1102 }
1103
1104 void
1105 Package::removeItems(const char *pattern) {
1106 int32_t idx;
1107
1108 findItems(pattern);
1109 while((idx=findNextItem())>=0) {
1110 removeItem(idx);
1111 }
1112 }
1113
1114 void
1115 Package::removeItems(const Package &listPkg) {
1116 const Item *pItem;
1117 int32_t i;
1118
1119 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
1120 removeItems(pItem->name);
1121 }
1122 }
1123
1124 void
1125 Package::extractItem(const char *filesPath, const char *outName, int32_t idx, char outType) {
1126 char filename[1024];
1127 UDataSwapper *ds;
1128 FILE *file;
1129 Item *pItem;
1130 int32_t fileLength;
1131 uint8_t itemCharset, outCharset;
1132 UBool itemIsBigEndian, outIsBigEndian;
1133
1134 if(idx<0 || itemCount<=idx) {
1135 return;
1136 }
1137 pItem=items+idx;
1138
1139 // swap the data to the outType
1140 // outType==0: don't swap
1141 if(outType!=0 && pItem->type!=outType) {
1142 // open the swapper
1143 UErrorCode errorCode=U_ZERO_ERROR;
1144 makeTypeProps(pItem->type, itemCharset, itemIsBigEndian);
1145 makeTypeProps(outType, outCharset, outIsBigEndian);
1146 ds=udata_openSwapper(itemIsBigEndian, itemCharset, outIsBigEndian, outCharset, &errorCode);
1147 if(U_FAILURE(errorCode)) {
1148 fprintf(stderr, "icupkg: udata_openSwapper(item %ld) failed - %s\n",
1149 (long)idx, u_errorName(errorCode));
1150 exit(errorCode);
1151 }
1152
1153 ds->printError=printPackageError;
1154 ds->printErrorContext=stderr;
1155
1156 // swap the item from its platform properties to the desired ones
1157 udata_swap(ds, pItem->data, pItem->length, pItem->data, &errorCode);
1158 if(U_FAILURE(errorCode)) {
1159 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)idx, u_errorName(errorCode));
1160 exit(errorCode);
1161 }
1162 udata_closeSwapper(ds);
1163 pItem->type=outType;
1164 }
1165
1166 // create the file and write its contents
1167 makeFullFilenameAndDirs(filesPath, outName, filename, (int32_t)sizeof(filename));
1168 file=fopen(filename, "wb");
1169 if(file==NULL) {
1170 fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename);
1171 exit(U_FILE_ACCESS_ERROR);
1172 }
1173 fileLength=(int32_t)fwrite(pItem->data, 1, pItem->length, file);
1174
1175 if(ferror(file) || fileLength!=pItem->length) {
1176 fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename);
1177 exit(U_FILE_ACCESS_ERROR);
1178 }
1179 fclose(file);
1180 }
1181
1182 void
1183 Package::extractItem(const char *filesPath, int32_t idx, char outType) {
1184 extractItem(filesPath, items[idx].name, idx, outType);
1185 }
1186
1187 void
1188 Package::extractItems(const char *filesPath, const char *pattern, char outType) {
1189 int32_t idx;
1190
1191 findItems(pattern);
1192 while((idx=findNextItem())>=0) {
1193 extractItem(filesPath, idx, outType);
1194 }
1195 }
1196
1197 void
1198 Package::extractItems(const char *filesPath, const Package &listPkg, char outType) {
1199 const Item *pItem;
1200 int32_t i;
1201
1202 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
1203 extractItems(filesPath, pItem->name, outType);
1204 }
1205 }
1206
1207 int32_t
1208 Package::getItemCount() const {
1209 return itemCount;
1210 }
1211
1212 const Item *
1213 Package::getItem(int32_t idx) const {
1214 if (0 <= idx && idx < itemCount) {
1215 return &items[idx];
1216 }
1217 return NULL;
1218 }
1219
1220 void
1221 Package::checkDependency(void *context, const char *itemName, const char *targetName) {
1222 // check dependency: make sure the target item is in the package
1223 Package *me=(Package *)context;
1224 if(me->findItem(targetName)<0) {
1225 me->isMissingItems=TRUE;
1226 fprintf(stderr, "Item %s depends on missing item %s\n", itemName, targetName);
1227 }
1228 }
1229
1230 UBool
1231 Package::checkDependencies() {
1232 isMissingItems=FALSE;
1233 enumDependencies(this, checkDependency);
1234 return (UBool)!isMissingItems;
1235 }
1236
1237 void
1238 Package::enumDependencies(void *context, CheckDependency check) {
1239 int32_t i;
1240
1241 for(i=0; i<itemCount; ++i) {
1242 enumDependencies(items+i, context, check);
1243 }
1244 }
1245
1246 char *
1247 Package::allocString(UBool in, int32_t length) {
1248 char *p;
1249 int32_t top;
1250
1251 if(in) {
1252 top=inStringTop;
1253 p=inStrings+top;
1254 } else {
1255 top=outStringTop;
1256 p=outStrings+top;
1257 }
1258 top+=length+1;
1259
1260 if(top>STRING_STORE_SIZE) {
1261 fprintf(stderr, "icupkg: string storage overflow\n");
1262 exit(U_BUFFER_OVERFLOW_ERROR);
1263 }
1264 if(in) {
1265 inStringTop=top;
1266 } else {
1267 outStringTop=top;
1268 }
1269 return p;
1270 }
1271
1272 void
1273 Package::sortItems() {
1274 UErrorCode errorCode=U_ZERO_ERROR;
1275 uprv_sortArray(items, itemCount, (int32_t)sizeof(Item), compareItems, NULL, FALSE, &errorCode);
1276 if(U_FAILURE(errorCode)) {
1277 fprintf(stderr, "icupkg: sorting item names failed - %s\n", u_errorName(errorCode));
1278 exit(errorCode);
1279 }
1280 }
1281
1282 void Package::setItemCapacity(int32_t max)
1283 {
1284 if(max<=itemMax) {
1285 return;
1286 }
1287 Item *newItems = (Item*)uprv_malloc(max * sizeof(items[0]));
1288 Item *oldItems = items;
1289 if(newItems == NULL) {
1290 fprintf(stderr, "icupkg: Out of memory trying to allocate %lu bytes for %d items\n",
1291 (unsigned long)max*sizeof(items[0]), max);
1292 exit(U_MEMORY_ALLOCATION_ERROR);
1293 }
1294 if(items && itemCount>0) {
1295 uprv_memcpy(newItems, items, itemCount*sizeof(items[0]));
1296 }
1297 itemMax = max;
1298 items = newItems;
1299 uprv_free(oldItems);
1300 }
1301
1302 void Package::ensureItemCapacity()
1303 {
1304 if((itemCount+1)>itemMax) {
1305 setItemCapacity(itemCount+kItemsChunk);
1306 }
1307 }
1308
1309 U_NAMESPACE_END