]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/toolutil/package.cpp
ICU-531.48.tar.gz
[apple/icu.git] / icuSources / tools / toolutil / package.cpp
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1999-2013, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: package.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2005aug25
14 * created by: Markus W. Scherer
15 *
16 * Read, modify, and write ICU .dat data package files.
17 * This is an integral part of the icupkg tool, moved to the toolutil library
18 * because parts of tool implementations tend to be later shared by
19 * other tools.
20 * Subsumes functionality and implementation code from
21 * gencmn, decmn, and icuswap tools.
22 */
23
24 #include "unicode/utypes.h"
25 #include "unicode/putil.h"
26 #include "unicode/udata.h"
27 #include "cstring.h"
28 #include "uarrsort.h"
29 #include "ucmndata.h"
30 #include "udataswp.h"
31 #include "swapimpl.h"
32 #include "toolutil.h"
33 #include "package.h"
34 #include "cmemory.h"
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39
40
41 static const int32_t kItemsChunk = 256; /* How much to increase the filesarray by each time */
42
43 // general definitions ----------------------------------------------------- ***
44
45 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
46
47 /* UDataInfo cf. udata.h */
48 static const UDataInfo dataInfo={
49 (uint16_t)sizeof(UDataInfo),
50 0,
51
52 U_IS_BIG_ENDIAN,
53 U_CHARSET_FAMILY,
54 (uint8_t)sizeof(UChar),
55 0,
56
57 {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */
58 {1, 0, 0, 0}, /* formatVersion */
59 {3, 0, 0, 0} /* dataVersion */
60 };
61
62 U_CDECL_BEGIN
63 static void U_CALLCONV
64 printPackageError(void *context, const char *fmt, va_list args) {
65 vfprintf((FILE *)context, fmt, args);
66 }
67 U_CDECL_END
68
69 static uint16_t
70 readSwapUInt16(uint16_t x) {
71 return (uint16_t)((x<<8)|(x>>8));
72 }
73
74 // platform types ---------------------------------------------------------- ***
75
76 static const char *types="lb?e";
77
78 enum { TYPE_L, TYPE_B, TYPE_LE, TYPE_E, TYPE_COUNT };
79
80 static inline int32_t
81 makeTypeEnum(uint8_t charset, UBool isBigEndian) {
82 return 2*(int32_t)charset+isBigEndian;
83 }
84
85 static inline int32_t
86 makeTypeEnum(char type) {
87 return
88 type == 'l' ? TYPE_L :
89 type == 'b' ? TYPE_B :
90 type == 'e' ? TYPE_E :
91 -1;
92 }
93
94 static inline char
95 makeTypeLetter(uint8_t charset, UBool isBigEndian) {
96 return types[makeTypeEnum(charset, isBigEndian)];
97 }
98
99 static inline char
100 makeTypeLetter(int32_t typeEnum) {
101 return types[typeEnum];
102 }
103
104 static void
105 makeTypeProps(char type, uint8_t &charset, UBool &isBigEndian) {
106 int32_t typeEnum=makeTypeEnum(type);
107 charset=(uint8_t)(typeEnum>>1);
108 isBigEndian=(UBool)(typeEnum&1);
109 }
110
111 U_CFUNC const UDataInfo *
112 getDataInfo(const uint8_t *data, int32_t length,
113 int32_t &infoLength, int32_t &headerLength,
114 UErrorCode *pErrorCode) {
115 const DataHeader *pHeader;
116 const UDataInfo *pInfo;
117
118 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
119 return NULL;
120 }
121 if( data==NULL ||
122 (length>=0 && length<(int32_t)sizeof(DataHeader))
123 ) {
124 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
125 return NULL;
126 }
127
128 pHeader=(const DataHeader *)data;
129 pInfo=&pHeader->info;
130 if( (length>=0 && length<(int32_t)sizeof(DataHeader)) ||
131 pHeader->dataHeader.magic1!=0xda ||
132 pHeader->dataHeader.magic2!=0x27 ||
133 pInfo->sizeofUChar!=2
134 ) {
135 *pErrorCode=U_UNSUPPORTED_ERROR;
136 return NULL;
137 }
138
139 if(pInfo->isBigEndian==U_IS_BIG_ENDIAN) {
140 headerLength=pHeader->dataHeader.headerSize;
141 infoLength=pInfo->size;
142 } else {
143 headerLength=readSwapUInt16(pHeader->dataHeader.headerSize);
144 infoLength=readSwapUInt16(pInfo->size);
145 }
146
147 if( headerLength<(int32_t)sizeof(DataHeader) ||
148 infoLength<(int32_t)sizeof(UDataInfo) ||
149 headerLength<(int32_t)(sizeof(pHeader->dataHeader)+infoLength) ||
150 (length>=0 && length<headerLength)
151 ) {
152 *pErrorCode=U_UNSUPPORTED_ERROR;
153 return NULL;
154 }
155
156 return pInfo;
157 }
158
159 static int32_t
160 getTypeEnumForInputData(const uint8_t *data, int32_t length,
161 UErrorCode *pErrorCode) {
162 const UDataInfo *pInfo;
163 int32_t infoLength, headerLength;
164
165 /* getDataInfo() checks for illegal arguments */
166 pInfo=getDataInfo(data, length, infoLength, headerLength, pErrorCode);
167 if(pInfo==NULL) {
168 return -1;
169 }
170
171 return makeTypeEnum(pInfo->charsetFamily, (UBool)pInfo->isBigEndian);
172 }
173
174 // file handling ----------------------------------------------------------- ***
175
176 static void
177 extractPackageName(const char *filename,
178 char pkg[], int32_t capacity) {
179 const char *basename;
180 int32_t len;
181
182 basename=findBasename(filename);
183 len=(int32_t)strlen(basename)-4; /* -4: subtract the length of ".dat" */
184
185 if(len<=0 || 0!=strcmp(basename+len, ".dat")) {
186 fprintf(stderr, "icupkg: \"%s\" is not recognized as a package filename (must end with .dat)\n",
187 basename);
188 exit(U_ILLEGAL_ARGUMENT_ERROR);
189 }
190
191 if(len>=capacity) {
192 fprintf(stderr, "icupkg: the package name \"%s\" is too long (>=%ld)\n",
193 basename, (long)capacity);
194 exit(U_ILLEGAL_ARGUMENT_ERROR);
195 }
196
197 memcpy(pkg, basename, len);
198 pkg[len]=0;
199 }
200
201 static int32_t
202 getFileLength(FILE *f) {
203 int32_t length;
204
205 fseek(f, 0, SEEK_END);
206 length=(int32_t)ftell(f);
207 fseek(f, 0, SEEK_SET);
208 return length;
209 }
210
211 /*
212 * Turn tree separators and alternate file separators into normal file separators.
213 */
214 #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR
215 #define treeToPath(s)
216 #else
217 static void
218 treeToPath(char *s) {
219 char *t;
220
221 for(t=s; *t!=0; ++t) {
222 if(*t==U_TREE_ENTRY_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) {
223 *t=U_FILE_SEP_CHAR;
224 }
225 }
226 }
227 #endif
228
229 /*
230 * Turn file separators into tree separators.
231 */
232 #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR
233 #define pathToTree(s)
234 #else
235 static void
236 pathToTree(char *s) {
237 char *t;
238
239 for(t=s; *t!=0; ++t) {
240 if(*t==U_FILE_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) {
241 *t=U_TREE_ENTRY_SEP_CHAR;
242 }
243 }
244 }
245 #endif
246
247 /*
248 * Prepend the path (if any) to the name and run the name through treeToName().
249 */
250 static void
251 makeFullFilename(const char *path, const char *name,
252 char *filename, int32_t capacity) {
253 char *s;
254
255 // prepend the path unless NULL or empty
256 if(path!=NULL && path[0]!=0) {
257 if((int32_t)(strlen(path)+1)>=capacity) {
258 fprintf(stderr, "pathname too long: \"%s\"\n", path);
259 exit(U_BUFFER_OVERFLOW_ERROR);
260 }
261 strcpy(filename, path);
262
263 // make sure the path ends with a file separator
264 s=strchr(filename, 0);
265 if(*(s-1)!=U_FILE_SEP_CHAR && *(s-1)!=U_FILE_ALT_SEP_CHAR) {
266 *s++=U_FILE_SEP_CHAR;
267 }
268 } else {
269 s=filename;
270 }
271
272 // turn the name into a filename, turn tree separators into file separators
273 if((int32_t)((s-filename)+strlen(name))>=capacity) {
274 fprintf(stderr, "path/filename too long: \"%s%s\"\n", filename, name);
275 exit(U_BUFFER_OVERFLOW_ERROR);
276 }
277 strcpy(s, name);
278 treeToPath(s);
279 }
280
281 static void
282 makeFullFilenameAndDirs(const char *path, const char *name,
283 char *filename, int32_t capacity) {
284 char *sep;
285 UErrorCode errorCode;
286
287 makeFullFilename(path, name, filename, capacity);
288
289 // make tree directories
290 errorCode=U_ZERO_ERROR;
291 sep=strchr(filename, 0)-strlen(name);
292 while((sep=strchr(sep, U_FILE_SEP_CHAR))!=NULL) {
293 if(sep!=filename) {
294 *sep=0; // truncate temporarily
295 uprv_mkdir(filename, &errorCode);
296 if(U_FAILURE(errorCode)) {
297 fprintf(stderr, "icupkg: unable to create tree directory \"%s\"\n", filename);
298 exit(U_FILE_ACCESS_ERROR);
299 }
300 }
301 *sep++=U_FILE_SEP_CHAR; // restore file separator character
302 }
303 }
304
305 static uint8_t *
306 readFile(const char *path, const char *name, int32_t &length, char &type) {
307 char filename[1024];
308 FILE *file;
309 uint8_t *data;
310 UErrorCode errorCode;
311 int32_t fileLength, typeEnum;
312
313 makeFullFilename(path, name, filename, (int32_t)sizeof(filename));
314
315 /* open the input file, get its length, allocate memory for it, read the file */
316 file=fopen(filename, "rb");
317 if(file==NULL) {
318 fprintf(stderr, "icupkg: unable to open input file \"%s\"\n", filename);
319 exit(U_FILE_ACCESS_ERROR);
320 }
321
322 /* get the file length */
323 fileLength=getFileLength(file);
324 if(ferror(file) || fileLength<=0) {
325 fprintf(stderr, "icupkg: empty input file \"%s\"\n", filename);
326 fclose(file);
327 exit(U_FILE_ACCESS_ERROR);
328 }
329
330 /* allocate the buffer, pad to multiple of 16 */
331 length=(fileLength+0xf)&~0xf;
332 data=(uint8_t *)uprv_malloc(length);
333 if(data==NULL) {
334 fclose(file);
335 fprintf(stderr, "icupkg: malloc error allocating %d bytes.\n", (int)length);
336 exit(U_MEMORY_ALLOCATION_ERROR);
337 }
338
339 /* read the file */
340 if(fileLength!=(int32_t)fread(data, 1, fileLength, file)) {
341 fprintf(stderr, "icupkg: error reading \"%s\"\n", filename);
342 fclose(file);
343 free(data);
344 exit(U_FILE_ACCESS_ERROR);
345 }
346
347 /* pad the file to a multiple of 16 using the usual padding byte */
348 if(fileLength<length) {
349 memset(data+fileLength, 0xaa, length-fileLength);
350 }
351
352 fclose(file);
353
354 // minimum check for ICU-format data
355 errorCode=U_ZERO_ERROR;
356 typeEnum=getTypeEnumForInputData(data, length, &errorCode);
357 if(typeEnum<0 || U_FAILURE(errorCode)) {
358 fprintf(stderr, "icupkg: not an ICU data file: \"%s\"\n", filename);
359 free(data);
360 #if !UCONFIG_NO_LEGACY_CONVERSION
361 exit(U_INVALID_FORMAT_ERROR);
362 #else
363 fprintf(stderr, "U_INVALID_FORMAT_ERROR occurred but UCONFIG_NO_LEGACY_CONVERSION is on so this is expected.\n");
364 exit(0);
365 #endif
366 }
367 type=makeTypeLetter(typeEnum);
368
369 return data;
370 }
371
372 // .dat package file representation ---------------------------------------- ***
373
374 U_CDECL_BEGIN
375
376 static int32_t U_CALLCONV
377 compareItems(const void * /*context*/, const void *left, const void *right) {
378 U_NAMESPACE_USE
379
380 return (int32_t)strcmp(((Item *)left)->name, ((Item *)right)->name);
381 }
382
383 U_CDECL_END
384
385 U_NAMESPACE_BEGIN
386
387 Package::Package()
388 : doAutoPrefix(FALSE), prefixEndsWithType(FALSE) {
389 inPkgName[0]=0;
390 pkgPrefix[0]=0;
391 inData=NULL;
392 inLength=0;
393 inCharset=U_CHARSET_FAMILY;
394 inIsBigEndian=U_IS_BIG_ENDIAN;
395
396 itemCount=0;
397 itemMax=0;
398 items=NULL;
399
400 inStringTop=outStringTop=0;
401
402 matchMode=0;
403 findPrefix=findSuffix=NULL;
404 findPrefixLength=findSuffixLength=0;
405 findNextIndex=-1;
406
407 // create a header for an empty package
408 DataHeader *pHeader;
409 pHeader=(DataHeader *)header;
410 pHeader->dataHeader.magic1=0xda;
411 pHeader->dataHeader.magic2=0x27;
412 memcpy(&pHeader->info, &dataInfo, sizeof(dataInfo));
413 headerLength=(int32_t)(4+sizeof(dataInfo));
414 if(headerLength&0xf) {
415 /* NUL-pad the header to a multiple of 16 */
416 int32_t length=(headerLength+0xf)&~0xf;
417 memset(header+headerLength, 0, length-headerLength);
418 headerLength=length;
419 }
420 pHeader->dataHeader.headerSize=(uint16_t)headerLength;
421 }
422
423 Package::~Package() {
424 int32_t idx;
425
426 free(inData);
427
428 for(idx=0; idx<itemCount; ++idx) {
429 if(items[idx].isDataOwned) {
430 free(items[idx].data);
431 }
432 }
433
434 uprv_free((void*)items);
435 }
436
437 void
438 Package::setPrefix(const char *p) {
439 if(strlen(p)>=sizeof(pkgPrefix)) {
440 fprintf(stderr, "icupkg: --toc_prefix %s too long\n", p);
441 exit(U_ILLEGAL_ARGUMENT_ERROR);
442 }
443 strcpy(pkgPrefix, p);
444 }
445
446 void
447 Package::readPackage(const char *filename) {
448 UDataSwapper *ds;
449 const UDataInfo *pInfo;
450 UErrorCode errorCode;
451
452 const uint8_t *inBytes;
453
454 int32_t length, offset, i;
455 int32_t itemLength, typeEnum;
456 char type;
457
458 const UDataOffsetTOCEntry *inEntries;
459
460 extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName));
461
462 /* read the file */
463 inData=readFile(NULL, filename, inLength, type);
464 length=inLength;
465
466 /*
467 * swap the header - even if the swapping itself is a no-op
468 * because it tells us the header length
469 */
470 errorCode=U_ZERO_ERROR;
471 makeTypeProps(type, inCharset, inIsBigEndian);
472 ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
473 if(U_FAILURE(errorCode)) {
474 fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
475 filename, u_errorName(errorCode));
476 exit(errorCode);
477 }
478
479 ds->printError=printPackageError;
480 ds->printErrorContext=stderr;
481
482 headerLength=sizeof(header);
483 if(length<headerLength) {
484 headerLength=length;
485 }
486 headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode);
487 if(U_FAILURE(errorCode)) {
488 exit(errorCode);
489 }
490
491 /* check data format and format version */
492 pInfo=(const UDataInfo *)((const char *)inData+4);
493 if(!(
494 pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */
495 pInfo->dataFormat[1]==0x6d &&
496 pInfo->dataFormat[2]==0x6e &&
497 pInfo->dataFormat[3]==0x44 &&
498 pInfo->formatVersion[0]==1
499 )) {
500 fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n",
501 pInfo->dataFormat[0], pInfo->dataFormat[1],
502 pInfo->dataFormat[2], pInfo->dataFormat[3],
503 pInfo->formatVersion[0]);
504 exit(U_UNSUPPORTED_ERROR);
505 }
506 inIsBigEndian=(UBool)pInfo->isBigEndian;
507 inCharset=pInfo->charsetFamily;
508
509 inBytes=(const uint8_t *)inData+headerLength;
510 inEntries=(const UDataOffsetTOCEntry *)(inBytes+4);
511
512 /* check that the itemCount fits, then the ToC table, then at least the header of the last item */
513 length-=headerLength;
514 if(length<4) {
515 /* itemCount does not fit */
516 offset=0x7fffffff;
517 } else {
518 itemCount=udata_readInt32(ds, *(const int32_t *)inBytes);
519 setItemCapacity(itemCount); /* resize so there's space */
520 if(itemCount==0) {
521 offset=4;
522 } else if(length<(4+8*itemCount)) {
523 /* ToC table does not fit */
524 offset=0x7fffffff;
525 } else {
526 /* offset of the last item plus at least 20 bytes for its header */
527 offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset);
528 }
529 }
530 if(length<offset) {
531 fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n",
532 (long)length);
533 exit(U_INDEX_OUTOFBOUNDS_ERROR);
534 }
535 /* do not modify the package length variable until the last item's length is set */
536
537 if(itemCount<=0) {
538 if(doAutoPrefix) {
539 fprintf(stderr, "icupkg: --auto_toc_prefix[_with_type] but the input package is empty\n");
540 exit(U_INVALID_FORMAT_ERROR);
541 }
542 } else {
543 char prefix[MAX_PKG_NAME_LENGTH+4];
544 char *s, *inItemStrings;
545
546 if(itemCount>itemMax) {
547 fprintf(stderr, "icupkg: too many items, maximum is %d\n", itemMax);
548 exit(U_BUFFER_OVERFLOW_ERROR);
549 }
550
551 /* swap the item name strings */
552 int32_t stringsOffset=4+8*itemCount;
553 itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset;
554
555 // don't include padding bytes at the end of the item names
556 while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) {
557 --itemLength;
558 }
559
560 if((inStringTop+itemLength)>STRING_STORE_SIZE) {
561 fprintf(stderr, "icupkg: total length of item name strings too long\n");
562 exit(U_BUFFER_OVERFLOW_ERROR);
563 }
564
565 inItemStrings=inStrings+inStringTop;
566 ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode);
567 if(U_FAILURE(errorCode)) {
568 fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n");
569 exit(U_INVALID_FORMAT_ERROR);
570 }
571 inStringTop+=itemLength;
572
573 // reset the Item entries
574 memset(items, 0, itemCount*sizeof(Item));
575
576 /*
577 * Get the common prefix of the items.
578 * New-style ICU .dat packages use tree separators ('/') between package names,
579 * tree names, and item names,
580 * while old-style ICU .dat packages (before multi-tree support)
581 * use an underscore ('_') between package and item names.
582 */
583 offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset;
584 s=inItemStrings+offset; // name of the first entry
585 int32_t prefixLength;
586 if(doAutoPrefix) {
587 // Use the first entry's prefix. Must be a new-style package.
588 const char *prefixLimit=strchr(s, U_TREE_ENTRY_SEP_CHAR);
589 if(prefixLimit==NULL) {
590 fprintf(stderr,
591 "icupkg: --auto_toc_prefix[_with_type] but "
592 "the first entry \"%s\" does not contain a '%c'\n",
593 s, U_TREE_ENTRY_SEP_CHAR);
594 exit(U_INVALID_FORMAT_ERROR);
595 }
596 prefixLength=(int32_t)(prefixLimit-s);
597 if(prefixLength==0 || prefixLength>=LENGTHOF(pkgPrefix)) {
598 fprintf(stderr,
599 "icupkg: --auto_toc_prefix[_with_type] but "
600 "the prefix of the first entry \"%s\" is empty or too long\n",
601 s);
602 exit(U_INVALID_FORMAT_ERROR);
603 }
604 if(prefixEndsWithType && s[prefixLength-1]!=type) {
605 fprintf(stderr,
606 "icupkg: --auto_toc_prefix_with_type but "
607 "the prefix of the first entry \"%s\" does not end with '%c'\n",
608 s, type);
609 exit(U_INVALID_FORMAT_ERROR);
610 }
611 memcpy(pkgPrefix, s, prefixLength);
612 pkgPrefix[prefixLength]=0;
613 memcpy(prefix, s, ++prefixLength); // include the /
614 } else {
615 // Use the package basename as prefix.
616 int32_t inPkgNameLength=strlen(inPkgName);
617 memcpy(prefix, inPkgName, inPkgNameLength);
618 prefixLength=inPkgNameLength;
619
620 if( (int32_t)strlen(s)>=(inPkgNameLength+2) &&
621 0==memcmp(s, inPkgName, inPkgNameLength) &&
622 s[inPkgNameLength]=='_'
623 ) {
624 // old-style .dat package
625 prefix[prefixLength++]='_';
626 } else {
627 // new-style .dat package
628 prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR;
629 // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR
630 // then the test in the loop below will fail
631 }
632 }
633 prefix[prefixLength]=0;
634
635 /* read the ToC table */
636 for(i=0; i<itemCount; ++i) {
637 // skip the package part of the item name, error if it does not match the actual package name
638 // or if nothing follows the package name
639 offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset;
640 s=inItemStrings+offset;
641 if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) {
642 fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n",
643 s, prefix);
644 exit(U_INVALID_FORMAT_ERROR);
645 }
646 items[i].name=s+prefixLength;
647
648 // set the item's data
649 items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset);
650 if(i>0) {
651 items[i-1].length=(int32_t)(items[i].data-items[i-1].data);
652
653 // set the previous item's platform type
654 typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode);
655 if(typeEnum<0 || U_FAILURE(errorCode)) {
656 fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename);
657 exit(U_INVALID_FORMAT_ERROR);
658 }
659 items[i-1].type=makeTypeLetter(typeEnum);
660 }
661 items[i].isDataOwned=FALSE;
662 }
663 // set the last item's length
664 items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset);
665
666 // set the last item's platform type
667 typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode);
668 if(typeEnum<0 || U_FAILURE(errorCode)) {
669 fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename);
670 exit(U_INVALID_FORMAT_ERROR);
671 }
672 items[itemCount-1].type=makeTypeLetter(typeEnum);
673
674 if(type!=U_ICUDATA_TYPE_LETTER[0]) {
675 // sort the item names for the local charset
676 sortItems();
677 }
678 }
679
680 udata_closeSwapper(ds);
681 }
682
683 char
684 Package::getInType() {
685 return makeTypeLetter(inCharset, inIsBigEndian);
686 }
687
688 void
689 Package::writePackage(const char *filename, char outType, const char *comment) {
690 char prefix[MAX_PKG_NAME_LENGTH+4];
691 UDataOffsetTOCEntry entry;
692 UDataSwapper *dsLocalToOut, *ds[TYPE_COUNT];
693 FILE *file;
694 Item *pItem;
695 char *name;
696 UErrorCode errorCode;
697 int32_t i, length, prefixLength, maxItemLength, basenameOffset, offset, outInt32;
698 uint8_t outCharset;
699 UBool outIsBigEndian;
700
701 extractPackageName(filename, prefix, MAX_PKG_NAME_LENGTH);
702
703 // if there is an explicit comment, then use it, else use what's in the current header
704 if(comment!=NULL) {
705 /* get the header size minus the current comment */
706 DataHeader *pHeader;
707 int32_t length;
708
709 pHeader=(DataHeader *)header;
710 headerLength=4+pHeader->info.size;
711 length=(int32_t)strlen(comment);
712 if((int32_t)(headerLength+length)>=(int32_t)sizeof(header)) {
713 fprintf(stderr, "icupkg: comment too long\n");
714 exit(U_BUFFER_OVERFLOW_ERROR);
715 }
716 memcpy(header+headerLength, comment, length+1);
717 headerLength+=length;
718 if(headerLength&0xf) {
719 /* NUL-pad the header to a multiple of 16 */
720 length=(headerLength+0xf)&~0xf;
721 memset(header+headerLength, 0, length-headerLength);
722 headerLength=length;
723 }
724 pHeader->dataHeader.headerSize=(uint16_t)headerLength;
725 }
726
727 makeTypeProps(outType, outCharset, outIsBigEndian);
728
729 // open (TYPE_COUNT-2) swappers
730 // one is a no-op for local type==outType
731 // one type (TYPE_LE) is bogus
732 errorCode=U_ZERO_ERROR;
733 i=makeTypeEnum(outType);
734 ds[TYPE_B]= i==TYPE_B ? NULL : udata_openSwapper(TRUE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode);
735 ds[TYPE_L]= i==TYPE_L ? NULL : udata_openSwapper(FALSE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode);
736 ds[TYPE_LE]=NULL;
737 ds[TYPE_E]= i==TYPE_E ? NULL : udata_openSwapper(TRUE, U_EBCDIC_FAMILY, outIsBigEndian, outCharset, &errorCode);
738 if(U_FAILURE(errorCode)) {
739 fprintf(stderr, "icupkg: udata_openSwapper() failed - %s\n", u_errorName(errorCode));
740 exit(errorCode);
741 }
742 for(i=0; i<TYPE_COUNT; ++i) {
743 if(ds[i]!=NULL) {
744 ds[i]->printError=printPackageError;
745 ds[i]->printErrorContext=stderr;
746 }
747 }
748
749 dsLocalToOut=ds[makeTypeEnum(U_CHARSET_FAMILY, U_IS_BIG_ENDIAN)];
750
751 // create the file and write its contents
752 file=fopen(filename, "wb");
753 if(file==NULL) {
754 fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename);
755 exit(U_FILE_ACCESS_ERROR);
756 }
757
758 // swap and write the header
759 if(dsLocalToOut!=NULL) {
760 udata_swapDataHeader(dsLocalToOut, header, headerLength, header, &errorCode);
761 if(U_FAILURE(errorCode)) {
762 fprintf(stderr, "icupkg: udata_swapDataHeader(local to out) failed - %s\n", u_errorName(errorCode));
763 exit(errorCode);
764 }
765 }
766 length=(int32_t)fwrite(header, 1, headerLength, file);
767 if(length!=headerLength) {
768 fprintf(stderr, "icupkg: unable to write complete header to file \"%s\"\n", filename);
769 exit(U_FILE_ACCESS_ERROR);
770 }
771
772 // prepare and swap the package name with a tree separator
773 // for prepending to item names
774 if(pkgPrefix[0]==0) {
775 prefixLength=(int32_t)strlen(prefix);
776 } else {
777 prefixLength=(int32_t)strlen(pkgPrefix);
778 memcpy(prefix, pkgPrefix, prefixLength);
779 if(prefixEndsWithType) {
780 prefix[prefixLength-1]=outType;
781 }
782 }
783 prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR;
784 prefix[prefixLength]=0;
785 if(dsLocalToOut!=NULL) {
786 dsLocalToOut->swapInvChars(dsLocalToOut, prefix, prefixLength, prefix, &errorCode);
787 if(U_FAILURE(errorCode)) {
788 fprintf(stderr, "icupkg: swapInvChars(output package name) failed - %s\n", u_errorName(errorCode));
789 exit(errorCode);
790 }
791
792 // swap and sort the item names (sorting needs to be done in the output charset)
793 dsLocalToOut->swapInvChars(dsLocalToOut, inStrings, inStringTop, inStrings, &errorCode);
794 if(U_FAILURE(errorCode)) {
795 fprintf(stderr, "icupkg: swapInvChars(item names) failed - %s\n", u_errorName(errorCode));
796 exit(errorCode);
797 }
798 sortItems();
799 }
800
801 // create the output item names in sorted order, with the package name prepended to each
802 for(i=0; i<itemCount; ++i) {
803 length=(int32_t)strlen(items[i].name);
804 name=allocString(FALSE, length+prefixLength);
805 memcpy(name, prefix, prefixLength);
806 memcpy(name+prefixLength, items[i].name, length+1);
807 items[i].name=name;
808 }
809
810 // calculate offsets for item names and items, pad to 16-align items
811 // align only the first item; each item's length is a multiple of 16
812 basenameOffset=4+8*itemCount;
813 offset=basenameOffset+outStringTop;
814 if((length=(offset&15))!=0) {
815 length=16-length;
816 memset(allocString(FALSE, length-1), 0xaa, length);
817 offset+=length;
818 }
819
820 // write the table of contents
821 // first the itemCount
822 outInt32=itemCount;
823 if(dsLocalToOut!=NULL) {
824 dsLocalToOut->swapArray32(dsLocalToOut, &outInt32, 4, &outInt32, &errorCode);
825 if(U_FAILURE(errorCode)) {
826 fprintf(stderr, "icupkg: swapArray32(item count) failed - %s\n", u_errorName(errorCode));
827 exit(errorCode);
828 }
829 }
830 length=(int32_t)fwrite(&outInt32, 1, 4, file);
831 if(length!=4) {
832 fprintf(stderr, "icupkg: unable to write complete item count to file \"%s\"\n", filename);
833 exit(U_FILE_ACCESS_ERROR);
834 }
835
836 // then write the item entries (and collect the maxItemLength)
837 maxItemLength=0;
838 for(i=0; i<itemCount; ++i) {
839 entry.nameOffset=(uint32_t)(basenameOffset+(items[i].name-outStrings));
840 entry.dataOffset=(uint32_t)offset;
841 if(dsLocalToOut!=NULL) {
842 dsLocalToOut->swapArray32(dsLocalToOut, &entry, 8, &entry, &errorCode);
843 if(U_FAILURE(errorCode)) {
844 fprintf(stderr, "icupkg: swapArray32(item entry %ld) failed - %s\n", (long)i, u_errorName(errorCode));
845 exit(errorCode);
846 }
847 }
848 length=(int32_t)fwrite(&entry, 1, 8, file);
849 if(length!=8) {
850 fprintf(stderr, "icupkg: unable to write complete item entry %ld to file \"%s\"\n", (long)i, filename);
851 exit(U_FILE_ACCESS_ERROR);
852 }
853
854 length=items[i].length;
855 if(length>maxItemLength) {
856 maxItemLength=length;
857 }
858 offset+=length;
859 }
860
861 // write the item names
862 length=(int32_t)fwrite(outStrings, 1, outStringTop, file);
863 if(length!=outStringTop) {
864 fprintf(stderr, "icupkg: unable to write complete item names to file \"%s\"\n", filename);
865 exit(U_FILE_ACCESS_ERROR);
866 }
867
868 // write the items
869 for(pItem=items, i=0; i<itemCount; ++pItem, ++i) {
870 int32_t type=makeTypeEnum(pItem->type);
871 if(ds[type]!=NULL) {
872 // swap each item from its platform properties to the desired ones
873 udata_swap(
874 ds[type],
875 pItem->data, pItem->length, pItem->data,
876 &errorCode);
877 if(U_FAILURE(errorCode)) {
878 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)i, u_errorName(errorCode));
879 exit(errorCode);
880 }
881 }
882 length=(int32_t)fwrite(pItem->data, 1, pItem->length, file);
883 if(length!=pItem->length) {
884 fprintf(stderr, "icupkg: unable to write complete item %ld to file \"%s\"\n", (long)i, filename);
885 exit(U_FILE_ACCESS_ERROR);
886 }
887 }
888
889 if(ferror(file)) {
890 fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename);
891 exit(U_FILE_ACCESS_ERROR);
892 }
893
894 fclose(file);
895 for(i=0; i<TYPE_COUNT; ++i) {
896 udata_closeSwapper(ds[i]);
897 }
898 }
899
900 int32_t
901 Package::findItem(const char *name, int32_t length) const {
902 int32_t i, start, limit;
903 int result;
904
905 /* do a binary search for the string */
906 start=0;
907 limit=itemCount;
908 while(start<limit) {
909 i=(start+limit)/2;
910 if(length>=0) {
911 result=strncmp(name, items[i].name, length);
912 } else {
913 result=strcmp(name, items[i].name);
914 }
915
916 if(result==0) {
917 /* found */
918 if(length>=0) {
919 /*
920 * if we compared just prefixes, then we may need to back up
921 * to the first item with this prefix
922 */
923 while(i>0 && 0==strncmp(name, items[i-1].name, length)) {
924 --i;
925 }
926 }
927 return i;
928 } else if(result<0) {
929 limit=i;
930 } else /* result>0 */ {
931 start=i+1;
932 }
933 }
934
935 return ~start; /* not found, return binary-not of the insertion point */
936 }
937
938 void
939 Package::findItems(const char *pattern) {
940 const char *wild;
941
942 if(pattern==NULL || *pattern==0) {
943 findNextIndex=-1;
944 return;
945 }
946
947 findPrefix=pattern;
948 findSuffix=NULL;
949 findSuffixLength=0;
950
951 wild=strchr(pattern, '*');
952 if(wild==NULL) {
953 // no wildcard
954 findPrefixLength=(int32_t)strlen(pattern);
955 } else {
956 // one wildcard
957 findPrefixLength=(int32_t)(wild-pattern);
958 findSuffix=wild+1;
959 findSuffixLength=(int32_t)strlen(findSuffix);
960 if(NULL!=strchr(findSuffix, '*')) {
961 // two or more wildcards
962 fprintf(stderr, "icupkg: syntax error (more than one '*') in item pattern \"%s\"\n", pattern);
963 exit(U_PARSE_ERROR);
964 }
965 }
966
967 if(findPrefixLength==0) {
968 findNextIndex=0;
969 } else {
970 findNextIndex=findItem(findPrefix, findPrefixLength);
971 }
972 }
973
974 int32_t
975 Package::findNextItem() {
976 const char *name, *middle, *treeSep;
977 int32_t idx, nameLength, middleLength;
978
979 if(findNextIndex<0) {
980 return -1;
981 }
982
983 while(findNextIndex<itemCount) {
984 idx=findNextIndex++;
985 name=items[idx].name;
986 nameLength=(int32_t)strlen(name);
987 if(nameLength<(findPrefixLength+findSuffixLength)) {
988 // item name too short for prefix & suffix
989 continue;
990 }
991 if(findPrefixLength>0 && 0!=memcmp(findPrefix, name, findPrefixLength)) {
992 // left the range of names with this prefix
993 break;
994 }
995 middle=name+findPrefixLength;
996 middleLength=nameLength-findPrefixLength-findSuffixLength;
997 if(findSuffixLength>0 && 0!=memcmp(findSuffix, name+(nameLength-findSuffixLength), findSuffixLength)) {
998 // suffix does not match
999 continue;
1000 }
1001 // prefix & suffix match
1002
1003 if(matchMode&MATCH_NOSLASH) {
1004 treeSep=strchr(middle, U_TREE_ENTRY_SEP_CHAR);
1005 if(treeSep!=NULL && (treeSep-middle)<middleLength) {
1006 // the middle (matching the * wildcard) contains a tree separator /
1007 continue;
1008 }
1009 }
1010
1011 // found a matching item
1012 return idx;
1013 }
1014
1015 // no more items
1016 findNextIndex=-1;
1017 return -1;
1018 }
1019
1020 void
1021 Package::setMatchMode(uint32_t mode) {
1022 matchMode=mode;
1023 }
1024
1025 void
1026 Package::addItem(const char *name) {
1027 addItem(name, NULL, 0, FALSE, U_ICUDATA_TYPE_LETTER[0]);
1028 }
1029
1030 void
1031 Package::addItem(const char *name, uint8_t *data, int32_t length, UBool isDataOwned, char type) {
1032 int32_t idx;
1033
1034 idx=findItem(name);
1035 if(idx<0) {
1036 // new item, make space at the insertion point
1037 ensureItemCapacity();
1038 // move the following items down
1039 idx=~idx;
1040 if(idx<itemCount) {
1041 memmove(items+idx+1, items+idx, (itemCount-idx)*sizeof(Item));
1042 }
1043 ++itemCount;
1044
1045 // reset this Item entry
1046 memset(items+idx, 0, sizeof(Item));
1047
1048 // copy the item's name
1049 items[idx].name=allocString(TRUE, strlen(name));
1050 strcpy(items[idx].name, name);
1051 pathToTree(items[idx].name);
1052 } else {
1053 // same-name item found, replace it
1054 if(items[idx].isDataOwned) {
1055 free(items[idx].data);
1056 }
1057
1058 // keep the item's name since it is the same
1059 }
1060
1061 // set the item's data
1062 items[idx].data=data;
1063 items[idx].length=length;
1064 items[idx].isDataOwned=isDataOwned;
1065 items[idx].type=type;
1066 }
1067
1068 void
1069 Package::addFile(const char *filesPath, const char *name) {
1070 uint8_t *data;
1071 int32_t length;
1072 char type;
1073
1074 data=readFile(filesPath, name, length, type);
1075 // readFile() exits the tool if it fails
1076 addItem(name, data, length, TRUE, type);
1077 }
1078
1079 void
1080 Package::addItems(const Package &listPkg) {
1081 const Item *pItem;
1082 int32_t i;
1083
1084 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
1085 addItem(pItem->name, pItem->data, pItem->length, FALSE, pItem->type);
1086 }
1087 }
1088
1089 void
1090 Package::removeItem(int32_t idx) {
1091 if(idx>=0) {
1092 // remove the item
1093 if(items[idx].isDataOwned) {
1094 free(items[idx].data);
1095 }
1096
1097 // move the following items up
1098 if((idx+1)<itemCount) {
1099 memmove(items+idx, items+idx+1, (itemCount-(idx+1))*sizeof(Item));
1100 }
1101 --itemCount;
1102
1103 if(idx<=findNextIndex) {
1104 --findNextIndex;
1105 }
1106 }
1107 }
1108
1109 void
1110 Package::removeItems(const char *pattern) {
1111 int32_t idx;
1112
1113 findItems(pattern);
1114 while((idx=findNextItem())>=0) {
1115 removeItem(idx);
1116 }
1117 }
1118
1119 void
1120 Package::removeItems(const Package &listPkg) {
1121 const Item *pItem;
1122 int32_t i;
1123
1124 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
1125 removeItems(pItem->name);
1126 }
1127 }
1128
1129 void
1130 Package::extractItem(const char *filesPath, const char *outName, int32_t idx, char outType) {
1131 char filename[1024];
1132 UDataSwapper *ds;
1133 FILE *file;
1134 Item *pItem;
1135 int32_t fileLength;
1136 uint8_t itemCharset, outCharset;
1137 UBool itemIsBigEndian, outIsBigEndian;
1138
1139 if(idx<0 || itemCount<=idx) {
1140 return;
1141 }
1142 pItem=items+idx;
1143
1144 // swap the data to the outType
1145 // outType==0: don't swap
1146 if(outType!=0 && pItem->type!=outType) {
1147 // open the swapper
1148 UErrorCode errorCode=U_ZERO_ERROR;
1149 makeTypeProps(pItem->type, itemCharset, itemIsBigEndian);
1150 makeTypeProps(outType, outCharset, outIsBigEndian);
1151 ds=udata_openSwapper(itemIsBigEndian, itemCharset, outIsBigEndian, outCharset, &errorCode);
1152 if(U_FAILURE(errorCode)) {
1153 fprintf(stderr, "icupkg: udata_openSwapper(item %ld) failed - %s\n",
1154 (long)idx, u_errorName(errorCode));
1155 exit(errorCode);
1156 }
1157
1158 ds->printError=printPackageError;
1159 ds->printErrorContext=stderr;
1160
1161 // swap the item from its platform properties to the desired ones
1162 udata_swap(ds, pItem->data, pItem->length, pItem->data, &errorCode);
1163 if(U_FAILURE(errorCode)) {
1164 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)idx, u_errorName(errorCode));
1165 exit(errorCode);
1166 }
1167 udata_closeSwapper(ds);
1168 pItem->type=outType;
1169 }
1170
1171 // create the file and write its contents
1172 makeFullFilenameAndDirs(filesPath, outName, filename, (int32_t)sizeof(filename));
1173 file=fopen(filename, "wb");
1174 if(file==NULL) {
1175 fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename);
1176 exit(U_FILE_ACCESS_ERROR);
1177 }
1178 fileLength=(int32_t)fwrite(pItem->data, 1, pItem->length, file);
1179
1180 if(ferror(file) || fileLength!=pItem->length) {
1181 fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename);
1182 exit(U_FILE_ACCESS_ERROR);
1183 }
1184 fclose(file);
1185 }
1186
1187 void
1188 Package::extractItem(const char *filesPath, int32_t idx, char outType) {
1189 extractItem(filesPath, items[idx].name, idx, outType);
1190 }
1191
1192 void
1193 Package::extractItems(const char *filesPath, const char *pattern, char outType) {
1194 int32_t idx;
1195
1196 findItems(pattern);
1197 while((idx=findNextItem())>=0) {
1198 extractItem(filesPath, idx, outType);
1199 }
1200 }
1201
1202 void
1203 Package::extractItems(const char *filesPath, const Package &listPkg, char outType) {
1204 const Item *pItem;
1205 int32_t i;
1206
1207 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
1208 extractItems(filesPath, pItem->name, outType);
1209 }
1210 }
1211
1212 int32_t
1213 Package::getItemCount() const {
1214 return itemCount;
1215 }
1216
1217 const Item *
1218 Package::getItem(int32_t idx) const {
1219 if (0 <= idx && idx < itemCount) {
1220 return &items[idx];
1221 }
1222 return NULL;
1223 }
1224
1225 void
1226 Package::checkDependency(void *context, const char *itemName, const char *targetName) {
1227 // check dependency: make sure the target item is in the package
1228 Package *me=(Package *)context;
1229 if(me->findItem(targetName)<0) {
1230 me->isMissingItems=TRUE;
1231 fprintf(stderr, "Item %s depends on missing item %s\n", itemName, targetName);
1232 }
1233 }
1234
1235 UBool
1236 Package::checkDependencies() {
1237 isMissingItems=FALSE;
1238 enumDependencies(this, checkDependency);
1239 return (UBool)!isMissingItems;
1240 }
1241
1242 void
1243 Package::enumDependencies(void *context, CheckDependency check) {
1244 int32_t i;
1245
1246 for(i=0; i<itemCount; ++i) {
1247 enumDependencies(items+i, context, check);
1248 }
1249 }
1250
1251 char *
1252 Package::allocString(UBool in, int32_t length) {
1253 char *p;
1254 int32_t top;
1255
1256 if(in) {
1257 top=inStringTop;
1258 p=inStrings+top;
1259 } else {
1260 top=outStringTop;
1261 p=outStrings+top;
1262 }
1263 top+=length+1;
1264
1265 if(top>STRING_STORE_SIZE) {
1266 fprintf(stderr, "icupkg: string storage overflow\n");
1267 exit(U_BUFFER_OVERFLOW_ERROR);
1268 }
1269 if(in) {
1270 inStringTop=top;
1271 } else {
1272 outStringTop=top;
1273 }
1274 return p;
1275 }
1276
1277 void
1278 Package::sortItems() {
1279 UErrorCode errorCode=U_ZERO_ERROR;
1280 uprv_sortArray(items, itemCount, (int32_t)sizeof(Item), compareItems, NULL, FALSE, &errorCode);
1281 if(U_FAILURE(errorCode)) {
1282 fprintf(stderr, "icupkg: sorting item names failed - %s\n", u_errorName(errorCode));
1283 exit(errorCode);
1284 }
1285 }
1286
1287 void Package::setItemCapacity(int32_t max)
1288 {
1289 if(max<=itemMax) {
1290 return;
1291 }
1292 Item *newItems = (Item*)uprv_malloc(max * sizeof(items[0]));
1293 Item *oldItems = items;
1294 if(newItems == NULL) {
1295 fprintf(stderr, "icupkg: Out of memory trying to allocate %lu bytes for %d items\n",
1296 (unsigned long)max*sizeof(items[0]), max);
1297 exit(U_MEMORY_ALLOCATION_ERROR);
1298 }
1299 if(items && itemCount>0) {
1300 uprv_memcpy(newItems, items, itemCount*sizeof(items[0]));
1301 }
1302 itemMax = max;
1303 items = newItems;
1304 uprv_free(oldItems);
1305 }
1306
1307 void Package::ensureItemCapacity()
1308 {
1309 if((itemCount+1)>itemMax) {
1310 setItemCapacity(itemCount+kItemsChunk);
1311 }
1312 }
1313
1314 U_NAMESPACE_END