1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 1999-2015, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
10 * file name: package.cpp
12 * tab size: 8 (not used)
15 * created on: 2005aug25
16 * created by: Markus W. Scherer
18 * Read, modify, and write ICU .dat data package files.
19 * This is an integral part of the icupkg tool, moved to the toolutil library
20 * because parts of tool implementations tend to be later shared by
22 * Subsumes functionality and implementation code from
23 * gencmn, decmn, and icuswap tools.
26 #include "unicode/utypes.h"
27 #include "unicode/putil.h"
28 #include "unicode/udata.h"
43 static const int32_t kItemsChunk
= 256; /* How much to increase the filesarray by each time */
45 // general definitions ----------------------------------------------------- ***
47 /* UDataInfo cf. udata.h */
48 static const UDataInfo dataInfo
={
49 (uint16_t)sizeof(UDataInfo
),
54 (uint8_t)sizeof(UChar
),
57 {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */
58 {1, 0, 0, 0}, /* formatVersion */
59 {3, 0, 0, 0} /* dataVersion */
63 static void U_CALLCONV
64 printPackageError(void *context
, const char *fmt
, va_list args
) {
65 vfprintf((FILE *)context
, fmt
, args
);
70 readSwapUInt16(uint16_t x
) {
71 return (uint16_t)((x
<<8)|(x
>>8));
74 // platform types ---------------------------------------------------------- ***
76 static const char *types
="lb?e";
78 enum { TYPE_L
, TYPE_B
, TYPE_LE
, TYPE_E
, TYPE_COUNT
};
81 makeTypeEnum(uint8_t charset
, UBool isBigEndian
) {
82 return 2*(int32_t)charset
+isBigEndian
;
86 makeTypeEnum(char type
) {
88 type
== 'l' ? TYPE_L
:
89 type
== 'b' ? TYPE_B
:
90 type
== 'e' ? TYPE_E
:
95 makeTypeLetter(uint8_t charset
, UBool isBigEndian
) {
96 return types
[makeTypeEnum(charset
, isBigEndian
)];
100 makeTypeLetter(int32_t typeEnum
) {
101 return types
[typeEnum
];
105 makeTypeProps(char type
, uint8_t &charset
, UBool
&isBigEndian
) {
106 int32_t typeEnum
=makeTypeEnum(type
);
107 charset
=(uint8_t)(typeEnum
>>1);
108 isBigEndian
=(UBool
)(typeEnum
&1);
111 U_CFUNC
const UDataInfo
*
112 getDataInfo(const uint8_t *data
, int32_t length
,
113 int32_t &infoLength
, int32_t &headerLength
,
114 UErrorCode
*pErrorCode
) {
115 const DataHeader
*pHeader
;
116 const UDataInfo
*pInfo
;
118 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
122 (length
>=0 && length
<(int32_t)sizeof(DataHeader
))
124 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
128 pHeader
=(const DataHeader
*)data
;
129 pInfo
=&pHeader
->info
;
130 if( (length
>=0 && length
<(int32_t)sizeof(DataHeader
)) ||
131 pHeader
->dataHeader
.magic1
!=0xda ||
132 pHeader
->dataHeader
.magic2
!=0x27 ||
133 pInfo
->sizeofUChar
!=2
135 *pErrorCode
=U_UNSUPPORTED_ERROR
;
139 if(pInfo
->isBigEndian
==U_IS_BIG_ENDIAN
) {
140 headerLength
=pHeader
->dataHeader
.headerSize
;
141 infoLength
=pInfo
->size
;
143 headerLength
=readSwapUInt16(pHeader
->dataHeader
.headerSize
);
144 infoLength
=readSwapUInt16(pInfo
->size
);
147 if( headerLength
<(int32_t)sizeof(DataHeader
) ||
148 infoLength
<(int32_t)sizeof(UDataInfo
) ||
149 headerLength
<(int32_t)(sizeof(pHeader
->dataHeader
)+infoLength
) ||
150 (length
>=0 && length
<headerLength
)
152 *pErrorCode
=U_UNSUPPORTED_ERROR
;
160 getTypeEnumForInputData(const uint8_t *data
, int32_t length
,
161 UErrorCode
*pErrorCode
) {
162 const UDataInfo
*pInfo
;
163 int32_t infoLength
, headerLength
;
165 /* getDataInfo() checks for illegal arguments */
166 pInfo
=getDataInfo(data
, length
, infoLength
, headerLength
, pErrorCode
);
171 return makeTypeEnum(pInfo
->charsetFamily
, (UBool
)pInfo
->isBigEndian
);
174 // file handling ----------------------------------------------------------- ***
177 extractPackageName(const char *filename
,
178 char pkg
[], int32_t capacity
) {
179 const char *basename
;
182 basename
=findBasename(filename
);
183 len
=(int32_t)strlen(basename
)-4; /* -4: subtract the length of ".dat" */
185 if(len
<=0 || 0!=strcmp(basename
+len
, ".dat")) {
186 fprintf(stderr
, "icupkg: \"%s\" is not recognized as a package filename (must end with .dat)\n",
188 exit(U_ILLEGAL_ARGUMENT_ERROR
);
192 fprintf(stderr
, "icupkg: the package name \"%s\" is too long (>=%ld)\n",
193 basename
, (long)capacity
);
194 exit(U_ILLEGAL_ARGUMENT_ERROR
);
197 memcpy(pkg
, basename
, len
);
202 getFileLength(FILE *f
) {
205 fseek(f
, 0, SEEK_END
);
206 length
=(int32_t)ftell(f
);
207 fseek(f
, 0, SEEK_SET
);
212 * Turn tree separators and alternate file separators into normal file separators.
214 #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR
215 #define treeToPath(s)
218 treeToPath(char *s
) {
221 for(t
=s
; *t
!=0; ++t
) {
222 if(*t
==U_TREE_ENTRY_SEP_CHAR
|| *t
==U_FILE_ALT_SEP_CHAR
) {
230 * Turn file separators into tree separators.
232 #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR
233 #define pathToTree(s)
236 pathToTree(char *s
) {
239 for(t
=s
; *t
!=0; ++t
) {
240 if(*t
==U_FILE_SEP_CHAR
|| *t
==U_FILE_ALT_SEP_CHAR
) {
241 *t
=U_TREE_ENTRY_SEP_CHAR
;
248 * Prepend the path (if any) to the name and run the name through treeToName().
251 makeFullFilename(const char *path
, const char *name
,
252 char *filename
, int32_t capacity
) {
255 // prepend the path unless NULL or empty
256 if(path
!=NULL
&& path
[0]!=0) {
257 if((int32_t)(strlen(path
)+1)>=capacity
) {
258 fprintf(stderr
, "pathname too long: \"%s\"\n", path
);
259 exit(U_BUFFER_OVERFLOW_ERROR
);
261 strcpy(filename
, path
);
263 // make sure the path ends with a file separator
264 s
=strchr(filename
, 0);
265 if(*(s
-1)!=U_FILE_SEP_CHAR
&& *(s
-1)!=U_FILE_ALT_SEP_CHAR
) {
266 *s
++=U_FILE_SEP_CHAR
;
272 // turn the name into a filename, turn tree separators into file separators
273 if((int32_t)((s
-filename
)+strlen(name
))>=capacity
) {
274 fprintf(stderr
, "path/filename too long: \"%s%s\"\n", filename
, name
);
275 exit(U_BUFFER_OVERFLOW_ERROR
);
282 makeFullFilenameAndDirs(const char *path
, const char *name
,
283 char *filename
, int32_t capacity
) {
285 UErrorCode errorCode
;
287 makeFullFilename(path
, name
, filename
, capacity
);
289 // make tree directories
290 errorCode
=U_ZERO_ERROR
;
291 sep
=strchr(filename
, 0)-strlen(name
);
292 while((sep
=strchr(sep
, U_FILE_SEP_CHAR
))!=NULL
) {
294 *sep
=0; // truncate temporarily
295 uprv_mkdir(filename
, &errorCode
);
296 if(U_FAILURE(errorCode
)) {
297 fprintf(stderr
, "icupkg: unable to create tree directory \"%s\"\n", filename
);
298 exit(U_FILE_ACCESS_ERROR
);
301 *sep
++=U_FILE_SEP_CHAR
; // restore file separator character
306 readFile(const char *path
, const char *name
, int32_t &length
, char &type
) {
309 UErrorCode errorCode
;
310 int32_t fileLength
, typeEnum
;
312 makeFullFilename(path
, name
, filename
, (int32_t)sizeof(filename
));
314 /* open the input file, get its length, allocate memory for it, read the file */
315 file
=fopen(filename
, "rb");
317 fprintf(stderr
, "icupkg: unable to open input file \"%s\"\n", filename
);
318 exit(U_FILE_ACCESS_ERROR
);
321 /* get the file length */
322 fileLength
=getFileLength(file
);
323 if(ferror(file
) || fileLength
<=0) {
324 fprintf(stderr
, "icupkg: empty input file \"%s\"\n", filename
);
326 exit(U_FILE_ACCESS_ERROR
);
329 /* allocate the buffer, pad to multiple of 16 */
330 length
=(fileLength
+0xf)&~0xf;
331 icu::LocalMemory
<uint8_t> data((uint8_t *)uprv_malloc(length
));
334 fprintf(stderr
, "icupkg: malloc error allocating %d bytes.\n", (int)length
);
335 exit(U_MEMORY_ALLOCATION_ERROR
);
339 if(fileLength
!=(int32_t)fread(data
.getAlias(), 1, fileLength
, file
)) {
340 fprintf(stderr
, "icupkg: error reading \"%s\"\n", filename
);
342 exit(U_FILE_ACCESS_ERROR
);
345 /* pad the file to a multiple of 16 using the usual padding byte */
346 if(fileLength
<length
) {
347 memset(data
.getAlias()+fileLength
, 0xaa, length
-fileLength
);
352 // minimum check for ICU-format data
353 errorCode
=U_ZERO_ERROR
;
354 typeEnum
=getTypeEnumForInputData(data
.getAlias(), length
, &errorCode
);
355 if(typeEnum
<0 || U_FAILURE(errorCode
)) {
356 fprintf(stderr
, "icupkg: not an ICU data file: \"%s\"\n", filename
);
357 #if !UCONFIG_NO_LEGACY_CONVERSION
358 exit(U_INVALID_FORMAT_ERROR
);
360 fprintf(stderr
, "U_INVALID_FORMAT_ERROR occurred but UCONFIG_NO_LEGACY_CONVERSION is on so this is expected.\n");
364 type
=makeTypeLetter(typeEnum
);
366 return data
.orphan();
369 // .dat package file representation ---------------------------------------- ***
373 static int32_t U_CALLCONV
374 compareItems(const void * /*context*/, const void *left
, const void *right
) {
377 return (int32_t)strcmp(((Item
*)left
)->name
, ((Item
*)right
)->name
);
385 : doAutoPrefix(FALSE
), prefixEndsWithType(FALSE
) {
390 inCharset
=U_CHARSET_FAMILY
;
391 inIsBigEndian
=U_IS_BIG_ENDIAN
;
397 inStringTop
=outStringTop
=0;
400 findPrefix
=findSuffix
=NULL
;
401 findPrefixLength
=findSuffixLength
=0;
404 // create a header for an empty package
406 pHeader
=(DataHeader
*)header
;
407 pHeader
->dataHeader
.magic1
=0xda;
408 pHeader
->dataHeader
.magic2
=0x27;
409 memcpy(&pHeader
->info
, &dataInfo
, sizeof(dataInfo
));
410 headerLength
=(int32_t)(4+sizeof(dataInfo
));
411 if(headerLength
&0xf) {
412 /* NUL-pad the header to a multiple of 16 */
413 int32_t length
=(headerLength
+0xf)&~0xf;
414 memset(header
+headerLength
, 0, length
-headerLength
);
417 pHeader
->dataHeader
.headerSize
=(uint16_t)headerLength
;
420 Package::~Package() {
425 for(idx
=0; idx
<itemCount
; ++idx
) {
426 if(items
[idx
].isDataOwned
) {
427 uprv_free(items
[idx
].data
);
431 uprv_free((void*)items
);
435 Package::setPrefix(const char *p
) {
436 if(strlen(p
)>=sizeof(pkgPrefix
)) {
437 fprintf(stderr
, "icupkg: --toc_prefix %s too long\n", p
);
438 exit(U_ILLEGAL_ARGUMENT_ERROR
);
440 strcpy(pkgPrefix
, p
);
444 Package::readPackage(const char *filename
) {
446 const UDataInfo
*pInfo
;
447 UErrorCode errorCode
;
449 const uint8_t *inBytes
;
451 int32_t length
, offset
, i
;
452 int32_t itemLength
, typeEnum
;
455 const UDataOffsetTOCEntry
*inEntries
;
457 extractPackageName(filename
, inPkgName
, (int32_t)sizeof(inPkgName
));
460 inData
=readFile(NULL
, filename
, inLength
, type
);
464 * swap the header - even if the swapping itself is a no-op
465 * because it tells us the header length
467 errorCode
=U_ZERO_ERROR
;
468 makeTypeProps(type
, inCharset
, inIsBigEndian
);
469 ds
=udata_openSwapper(inIsBigEndian
, inCharset
, U_IS_BIG_ENDIAN
, U_CHARSET_FAMILY
, &errorCode
);
470 if(U_FAILURE(errorCode
)) {
471 fprintf(stderr
, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
472 filename
, u_errorName(errorCode
));
476 ds
->printError
=printPackageError
;
477 ds
->printErrorContext
=stderr
;
479 headerLength
=sizeof(header
);
480 if(length
<headerLength
) {
483 headerLength
=udata_swapDataHeader(ds
, inData
, headerLength
, header
, &errorCode
);
484 if(U_FAILURE(errorCode
)) {
488 /* check data format and format version */
489 pInfo
=(const UDataInfo
*)((const char *)inData
+4);
491 pInfo
->dataFormat
[0]==0x43 && /* dataFormat="CmnD" */
492 pInfo
->dataFormat
[1]==0x6d &&
493 pInfo
->dataFormat
[2]==0x6e &&
494 pInfo
->dataFormat
[3]==0x44 &&
495 pInfo
->formatVersion
[0]==1
497 fprintf(stderr
, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n",
498 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
499 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
500 pInfo
->formatVersion
[0]);
501 exit(U_UNSUPPORTED_ERROR
);
503 inIsBigEndian
=(UBool
)pInfo
->isBigEndian
;
504 inCharset
=pInfo
->charsetFamily
;
506 inBytes
=(const uint8_t *)inData
+headerLength
;
507 inEntries
=(const UDataOffsetTOCEntry
*)(inBytes
+4);
509 /* check that the itemCount fits, then the ToC table, then at least the header of the last item */
510 length
-=headerLength
;
512 /* itemCount does not fit */
515 itemCount
=udata_readInt32(ds
, *(const int32_t *)inBytes
);
516 setItemCapacity(itemCount
); /* resize so there's space */
519 } else if(length
<(4+8*itemCount
)) {
520 /* ToC table does not fit */
523 /* offset of the last item plus at least 20 bytes for its header */
524 offset
=20+(int32_t)ds
->readUInt32(inEntries
[itemCount
-1].dataOffset
);
528 fprintf(stderr
, "icupkg: too few bytes (%ld after header) for a .dat package\n",
530 exit(U_INDEX_OUTOFBOUNDS_ERROR
);
532 /* do not modify the package length variable until the last item's length is set */
536 fprintf(stderr
, "icupkg: --auto_toc_prefix[_with_type] but the input package is empty\n");
537 exit(U_INVALID_FORMAT_ERROR
);
540 char prefix
[MAX_PKG_NAME_LENGTH
+4];
541 char *s
, *inItemStrings
;
543 if(itemCount
>itemMax
) {
544 fprintf(stderr
, "icupkg: too many items, maximum is %d\n", itemMax
);
545 exit(U_BUFFER_OVERFLOW_ERROR
);
548 /* swap the item name strings */
549 int32_t stringsOffset
=4+8*itemCount
;
550 itemLength
=(int32_t)(ds
->readUInt32(inEntries
[0].dataOffset
))-stringsOffset
;
552 // don't include padding bytes at the end of the item names
553 while(itemLength
>0 && inBytes
[stringsOffset
+itemLength
-1]!=0) {
557 if((inStringTop
+itemLength
)>STRING_STORE_SIZE
) {
558 fprintf(stderr
, "icupkg: total length of item name strings too long\n");
559 exit(U_BUFFER_OVERFLOW_ERROR
);
562 inItemStrings
=inStrings
+inStringTop
;
563 ds
->swapInvChars(ds
, inBytes
+stringsOffset
, itemLength
, inItemStrings
, &errorCode
);
564 if(U_FAILURE(errorCode
)) {
565 fprintf(stderr
, "icupkg failed to swap the input .dat package item name strings\n");
566 exit(U_INVALID_FORMAT_ERROR
);
568 inStringTop
+=itemLength
;
570 // reset the Item entries
571 memset(items
, 0, itemCount
*sizeof(Item
));
574 * Get the common prefix of the items.
575 * New-style ICU .dat packages use tree separators ('/') between package names,
576 * tree names, and item names,
577 * while old-style ICU .dat packages (before multi-tree support)
578 * use an underscore ('_') between package and item names.
580 offset
=(int32_t)ds
->readUInt32(inEntries
[0].nameOffset
)-stringsOffset
;
581 s
=inItemStrings
+offset
; // name of the first entry
582 int32_t prefixLength
;
584 // Use the first entry's prefix. Must be a new-style package.
585 const char *prefixLimit
=strchr(s
, U_TREE_ENTRY_SEP_CHAR
);
586 if(prefixLimit
==NULL
) {
588 "icupkg: --auto_toc_prefix[_with_type] but "
589 "the first entry \"%s\" does not contain a '%c'\n",
590 s
, U_TREE_ENTRY_SEP_CHAR
);
591 exit(U_INVALID_FORMAT_ERROR
);
593 prefixLength
=(int32_t)(prefixLimit
-s
);
594 if(prefixLength
==0 || prefixLength
>=UPRV_LENGTHOF(pkgPrefix
)) {
596 "icupkg: --auto_toc_prefix[_with_type] but "
597 "the prefix of the first entry \"%s\" is empty or too long\n",
599 exit(U_INVALID_FORMAT_ERROR
);
601 if(prefixEndsWithType
&& s
[prefixLength
-1]!=type
) {
603 "icupkg: --auto_toc_prefix_with_type but "
604 "the prefix of the first entry \"%s\" does not end with '%c'\n",
606 exit(U_INVALID_FORMAT_ERROR
);
608 memcpy(pkgPrefix
, s
, prefixLength
);
609 pkgPrefix
[prefixLength
]=0;
610 memcpy(prefix
, s
, ++prefixLength
); // include the /
612 // Use the package basename as prefix.
613 int32_t inPkgNameLength
= static_cast<int32_t>(strlen(inPkgName
));
614 memcpy(prefix
, inPkgName
, inPkgNameLength
);
615 prefixLength
=inPkgNameLength
;
617 if( (int32_t)strlen(s
)>=(inPkgNameLength
+2) &&
618 0==memcmp(s
, inPkgName
, inPkgNameLength
) &&
619 s
[inPkgNameLength
]=='_'
621 // old-style .dat package
622 prefix
[prefixLength
++]='_';
624 // new-style .dat package
625 prefix
[prefixLength
++]=U_TREE_ENTRY_SEP_CHAR
;
626 // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR
627 // then the test in the loop below will fail
630 prefix
[prefixLength
]=0;
632 /* read the ToC table */
633 for(i
=0; i
<itemCount
; ++i
) {
634 // skip the package part of the item name, error if it does not match the actual package name
635 // or if nothing follows the package name
636 offset
=(int32_t)ds
->readUInt32(inEntries
[i
].nameOffset
)-stringsOffset
;
637 s
=inItemStrings
+offset
;
638 if(0!=strncmp(s
, prefix
, prefixLength
) || s
[prefixLength
]==0) {
639 fprintf(stderr
, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n",
641 exit(U_INVALID_FORMAT_ERROR
);
643 items
[i
].name
=s
+prefixLength
;
645 // set the item's data
646 items
[i
].data
=(uint8_t *)inBytes
+ds
->readUInt32(inEntries
[i
].dataOffset
);
648 items
[i
-1].length
=(int32_t)(items
[i
].data
-items
[i
-1].data
);
650 // set the previous item's platform type
651 typeEnum
=getTypeEnumForInputData(items
[i
-1].data
, items
[i
-1].length
, &errorCode
);
652 if(typeEnum
<0 || U_FAILURE(errorCode
)) {
653 fprintf(stderr
, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items
[i
-1].name
, filename
);
654 exit(U_INVALID_FORMAT_ERROR
);
656 items
[i
-1].type
=makeTypeLetter(typeEnum
);
658 items
[i
].isDataOwned
=FALSE
;
660 // set the last item's length
661 items
[itemCount
-1].length
=length
-ds
->readUInt32(inEntries
[itemCount
-1].dataOffset
);
663 // set the last item's platform type
664 typeEnum
=getTypeEnumForInputData(items
[itemCount
-1].data
, items
[itemCount
-1].length
, &errorCode
);
665 if(typeEnum
<0 || U_FAILURE(errorCode
)) {
666 fprintf(stderr
, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items
[itemCount
-1].name
, filename
);
667 exit(U_INVALID_FORMAT_ERROR
);
669 items
[itemCount
-1].type
=makeTypeLetter(typeEnum
);
671 if(type
!=U_ICUDATA_TYPE_LETTER
[0]) {
672 // sort the item names for the local charset
677 udata_closeSwapper(ds
);
681 Package::getInType() {
682 return makeTypeLetter(inCharset
, inIsBigEndian
);
686 Package::writePackage(const char *filename
, char outType
, const char *comment
) {
687 char prefix
[MAX_PKG_NAME_LENGTH
+4];
688 UDataOffsetTOCEntry entry
;
689 UDataSwapper
*dsLocalToOut
, *ds
[TYPE_COUNT
];
693 UErrorCode errorCode
;
694 int32_t i
, length
, prefixLength
, maxItemLength
, basenameOffset
, offset
, outInt32
;
696 UBool outIsBigEndian
;
698 extractPackageName(filename
, prefix
, MAX_PKG_NAME_LENGTH
);
700 // if there is an explicit comment, then use it, else use what's in the current header
702 /* get the header size minus the current comment */
706 pHeader
=(DataHeader
*)header
;
707 headerLength
=4+pHeader
->info
.size
;
708 length
=(int32_t)strlen(comment
);
709 if((int32_t)(headerLength
+length
)>=(int32_t)sizeof(header
)) {
710 fprintf(stderr
, "icupkg: comment too long\n");
711 exit(U_BUFFER_OVERFLOW_ERROR
);
713 memcpy(header
+headerLength
, comment
, length
+1);
714 headerLength
+=length
;
715 if(headerLength
&0xf) {
716 /* NUL-pad the header to a multiple of 16 */
717 length
=(headerLength
+0xf)&~0xf;
718 memset(header
+headerLength
, 0, length
-headerLength
);
721 pHeader
->dataHeader
.headerSize
=(uint16_t)headerLength
;
724 makeTypeProps(outType
, outCharset
, outIsBigEndian
);
726 // open (TYPE_COUNT-2) swappers
727 // one is a no-op for local type==outType
728 // one type (TYPE_LE) is bogus
729 errorCode
=U_ZERO_ERROR
;
730 i
=makeTypeEnum(outType
);
731 ds
[TYPE_B
]= i
==TYPE_B
? NULL
: udata_openSwapper(TRUE
, U_ASCII_FAMILY
, outIsBigEndian
, outCharset
, &errorCode
);
732 ds
[TYPE_L
]= i
==TYPE_L
? NULL
: udata_openSwapper(FALSE
, U_ASCII_FAMILY
, outIsBigEndian
, outCharset
, &errorCode
);
734 ds
[TYPE_E
]= i
==TYPE_E
? NULL
: udata_openSwapper(TRUE
, U_EBCDIC_FAMILY
, outIsBigEndian
, outCharset
, &errorCode
);
735 if(U_FAILURE(errorCode
)) {
736 fprintf(stderr
, "icupkg: udata_openSwapper() failed - %s\n", u_errorName(errorCode
));
739 for(i
=0; i
<TYPE_COUNT
; ++i
) {
741 ds
[i
]->printError
=printPackageError
;
742 ds
[i
]->printErrorContext
=stderr
;
746 dsLocalToOut
=ds
[makeTypeEnum(U_CHARSET_FAMILY
, U_IS_BIG_ENDIAN
)];
748 // create the file and write its contents
749 file
=fopen(filename
, "wb");
751 fprintf(stderr
, "icupkg: unable to create file \"%s\"\n", filename
);
752 exit(U_FILE_ACCESS_ERROR
);
755 // swap and write the header
756 if(dsLocalToOut
!=NULL
) {
757 udata_swapDataHeader(dsLocalToOut
, header
, headerLength
, header
, &errorCode
);
758 if(U_FAILURE(errorCode
)) {
759 fprintf(stderr
, "icupkg: udata_swapDataHeader(local to out) failed - %s\n", u_errorName(errorCode
));
763 length
=(int32_t)fwrite(header
, 1, headerLength
, file
);
764 if(length
!=headerLength
) {
765 fprintf(stderr
, "icupkg: unable to write complete header to file \"%s\"\n", filename
);
766 exit(U_FILE_ACCESS_ERROR
);
769 // prepare and swap the package name with a tree separator
770 // for prepending to item names
771 if(pkgPrefix
[0]==0) {
772 prefixLength
=(int32_t)strlen(prefix
);
774 prefixLength
=(int32_t)strlen(pkgPrefix
);
775 memcpy(prefix
, pkgPrefix
, prefixLength
);
776 if(prefixEndsWithType
) {
777 prefix
[prefixLength
-1]=outType
;
780 prefix
[prefixLength
++]=U_TREE_ENTRY_SEP_CHAR
;
781 prefix
[prefixLength
]=0;
782 if(dsLocalToOut
!=NULL
) {
783 dsLocalToOut
->swapInvChars(dsLocalToOut
, prefix
, prefixLength
, prefix
, &errorCode
);
784 if(U_FAILURE(errorCode
)) {
785 fprintf(stderr
, "icupkg: swapInvChars(output package name) failed - %s\n", u_errorName(errorCode
));
789 // swap and sort the item names (sorting needs to be done in the output charset)
790 dsLocalToOut
->swapInvChars(dsLocalToOut
, inStrings
, inStringTop
, inStrings
, &errorCode
);
791 if(U_FAILURE(errorCode
)) {
792 fprintf(stderr
, "icupkg: swapInvChars(item names) failed - %s\n", u_errorName(errorCode
));
798 // create the output item names in sorted order, with the package name prepended to each
799 for(i
=0; i
<itemCount
; ++i
) {
800 length
=(int32_t)strlen(items
[i
].name
);
801 name
=allocString(FALSE
, length
+prefixLength
);
802 memcpy(name
, prefix
, prefixLength
);
803 memcpy(name
+prefixLength
, items
[i
].name
, length
+1);
807 // calculate offsets for item names and items, pad to 16-align items
808 // align only the first item; each item's length is a multiple of 16
809 basenameOffset
=4+8*itemCount
;
810 offset
=basenameOffset
+outStringTop
;
811 if((length
=(offset
&15))!=0) {
813 memset(allocString(FALSE
, length
-1), 0xaa, length
);
817 // write the table of contents
818 // first the itemCount
820 if(dsLocalToOut
!=NULL
) {
821 dsLocalToOut
->swapArray32(dsLocalToOut
, &outInt32
, 4, &outInt32
, &errorCode
);
822 if(U_FAILURE(errorCode
)) {
823 fprintf(stderr
, "icupkg: swapArray32(item count) failed - %s\n", u_errorName(errorCode
));
827 length
=(int32_t)fwrite(&outInt32
, 1, 4, file
);
829 fprintf(stderr
, "icupkg: unable to write complete item count to file \"%s\"\n", filename
);
830 exit(U_FILE_ACCESS_ERROR
);
833 // then write the item entries (and collect the maxItemLength)
835 for(i
=0; i
<itemCount
; ++i
) {
836 entry
.nameOffset
=(uint32_t)(basenameOffset
+(items
[i
].name
-outStrings
));
837 entry
.dataOffset
=(uint32_t)offset
;
838 if(dsLocalToOut
!=NULL
) {
839 dsLocalToOut
->swapArray32(dsLocalToOut
, &entry
, 8, &entry
, &errorCode
);
840 if(U_FAILURE(errorCode
)) {
841 fprintf(stderr
, "icupkg: swapArray32(item entry %ld) failed - %s\n", (long)i
, u_errorName(errorCode
));
845 length
=(int32_t)fwrite(&entry
, 1, 8, file
);
847 fprintf(stderr
, "icupkg: unable to write complete item entry %ld to file \"%s\"\n", (long)i
, filename
);
848 exit(U_FILE_ACCESS_ERROR
);
851 length
=items
[i
].length
;
852 if(length
>maxItemLength
) {
853 maxItemLength
=length
;
858 // write the item names
859 length
=(int32_t)fwrite(outStrings
, 1, outStringTop
, file
);
860 if(length
!=outStringTop
) {
861 fprintf(stderr
, "icupkg: unable to write complete item names to file \"%s\"\n", filename
);
862 exit(U_FILE_ACCESS_ERROR
);
866 for(pItem
=items
, i
=0; i
<itemCount
; ++pItem
, ++i
) {
867 int32_t type
=makeTypeEnum(pItem
->type
);
869 // swap each item from its platform properties to the desired ones
872 pItem
->data
, pItem
->length
, pItem
->data
,
874 if(U_FAILURE(errorCode
)) {
875 fprintf(stderr
, "icupkg: udata_swap(item %ld) failed - %s\n", (long)i
, u_errorName(errorCode
));
879 length
=(int32_t)fwrite(pItem
->data
, 1, pItem
->length
, file
);
880 if(length
!=pItem
->length
) {
881 fprintf(stderr
, "icupkg: unable to write complete item %ld to file \"%s\"\n", (long)i
, filename
);
882 exit(U_FILE_ACCESS_ERROR
);
887 fprintf(stderr
, "icupkg: unable to write complete file \"%s\"\n", filename
);
888 exit(U_FILE_ACCESS_ERROR
);
892 for(i
=0; i
<TYPE_COUNT
; ++i
) {
893 udata_closeSwapper(ds
[i
]);
898 Package::findItem(const char *name
, int32_t length
) const {
899 int32_t i
, start
, limit
;
902 /* do a binary search for the string */
908 result
=strncmp(name
, items
[i
].name
, length
);
910 result
=strcmp(name
, items
[i
].name
);
917 * if we compared just prefixes, then we may need to back up
918 * to the first item with this prefix
920 while(i
>0 && 0==strncmp(name
, items
[i
-1].name
, length
)) {
925 } else if(result
<0) {
927 } else /* result>0 */ {
932 return ~start
; /* not found, return binary-not of the insertion point */
936 Package::findItems(const char *pattern
) {
939 if(pattern
==NULL
|| *pattern
==0) {
948 wild
=strchr(pattern
, '*');
951 findPrefixLength
=(int32_t)strlen(pattern
);
954 findPrefixLength
=(int32_t)(wild
-pattern
);
956 findSuffixLength
=(int32_t)strlen(findSuffix
);
957 if(NULL
!=strchr(findSuffix
, '*')) {
958 // two or more wildcards
959 fprintf(stderr
, "icupkg: syntax error (more than one '*') in item pattern \"%s\"\n", pattern
);
964 if(findPrefixLength
==0) {
967 findNextIndex
=findItem(findPrefix
, findPrefixLength
);
972 Package::findNextItem() {
973 const char *name
, *middle
, *treeSep
;
974 int32_t idx
, nameLength
, middleLength
;
976 if(findNextIndex
<0) {
980 while(findNextIndex
<itemCount
) {
982 name
=items
[idx
].name
;
983 nameLength
=(int32_t)strlen(name
);
984 if(nameLength
<(findPrefixLength
+findSuffixLength
)) {
985 // item name too short for prefix & suffix
988 if(findPrefixLength
>0 && 0!=memcmp(findPrefix
, name
, findPrefixLength
)) {
989 // left the range of names with this prefix
992 middle
=name
+findPrefixLength
;
993 middleLength
=nameLength
-findPrefixLength
-findSuffixLength
;
994 if(findSuffixLength
>0 && 0!=memcmp(findSuffix
, name
+(nameLength
-findSuffixLength
), findSuffixLength
)) {
995 // suffix does not match
998 // prefix & suffix match
1000 if(matchMode
&MATCH_NOSLASH
) {
1001 treeSep
=strchr(middle
, U_TREE_ENTRY_SEP_CHAR
);
1002 if(treeSep
!=NULL
&& (treeSep
-middle
)<middleLength
) {
1003 // the middle (matching the * wildcard) contains a tree separator /
1008 // found a matching item
1018 Package::setMatchMode(uint32_t mode
) {
1023 Package::addItem(const char *name
) {
1024 addItem(name
, NULL
, 0, FALSE
, U_ICUDATA_TYPE_LETTER
[0]);
1028 Package::addItem(const char *name
, uint8_t *data
, int32_t length
, UBool isDataOwned
, char type
) {
1033 // new item, make space at the insertion point
1034 ensureItemCapacity();
1035 // move the following items down
1038 memmove(items
+idx
+1, items
+idx
, (itemCount
-idx
)*sizeof(Item
));
1042 // reset this Item entry
1043 memset(items
+idx
, 0, sizeof(Item
));
1045 // copy the item's name
1046 items
[idx
].name
=allocString(TRUE
, static_cast<int32_t>(strlen(name
)));
1047 strcpy(items
[idx
].name
, name
);
1048 pathToTree(items
[idx
].name
);
1050 // same-name item found, replace it
1051 if(items
[idx
].isDataOwned
) {
1052 uprv_free(items
[idx
].data
);
1055 // keep the item's name since it is the same
1058 // set the item's data
1059 items
[idx
].data
=data
;
1060 items
[idx
].length
=length
;
1061 items
[idx
].isDataOwned
=isDataOwned
;
1062 items
[idx
].type
=type
;
1066 Package::addFile(const char *filesPath
, const char *name
) {
1071 data
=readFile(filesPath
, name
, length
, type
);
1072 // readFile() exits the tool if it fails
1073 addItem(name
, data
, length
, TRUE
, type
);
1077 Package::addItems(const Package
&listPkg
) {
1081 for(pItem
=listPkg
.items
, i
=0; i
<listPkg
.itemCount
; ++pItem
, ++i
) {
1082 addItem(pItem
->name
, pItem
->data
, pItem
->length
, FALSE
, pItem
->type
);
1087 Package::removeItem(int32_t idx
) {
1090 if(items
[idx
].isDataOwned
) {
1091 uprv_free(items
[idx
].data
);
1094 // move the following items up
1095 if((idx
+1)<itemCount
) {
1096 memmove(items
+idx
, items
+idx
+1, (itemCount
-(idx
+1))*sizeof(Item
));
1100 if(idx
<=findNextIndex
) {
1107 Package::removeItems(const char *pattern
) {
1111 while((idx
=findNextItem())>=0) {
1117 Package::removeItems(const Package
&listPkg
) {
1121 for(pItem
=listPkg
.items
, i
=0; i
<listPkg
.itemCount
; ++pItem
, ++i
) {
1122 removeItems(pItem
->name
);
1127 Package::extractItem(const char *filesPath
, const char *outName
, int32_t idx
, char outType
) {
1128 char filename
[1024];
1133 uint8_t itemCharset
, outCharset
;
1134 UBool itemIsBigEndian
, outIsBigEndian
;
1136 if(idx
<0 || itemCount
<=idx
) {
1141 // swap the data to the outType
1142 // outType==0: don't swap
1143 if(outType
!=0 && pItem
->type
!=outType
) {
1145 UErrorCode errorCode
=U_ZERO_ERROR
;
1146 makeTypeProps(pItem
->type
, itemCharset
, itemIsBigEndian
);
1147 makeTypeProps(outType
, outCharset
, outIsBigEndian
);
1148 ds
=udata_openSwapper(itemIsBigEndian
, itemCharset
, outIsBigEndian
, outCharset
, &errorCode
);
1149 if(U_FAILURE(errorCode
)) {
1150 fprintf(stderr
, "icupkg: udata_openSwapper(item %ld) failed - %s\n",
1151 (long)idx
, u_errorName(errorCode
));
1155 ds
->printError
=printPackageError
;
1156 ds
->printErrorContext
=stderr
;
1158 // swap the item from its platform properties to the desired ones
1159 udata_swap(ds
, pItem
->data
, pItem
->length
, pItem
->data
, &errorCode
);
1160 if(U_FAILURE(errorCode
)) {
1161 fprintf(stderr
, "icupkg: udata_swap(item %ld) failed - %s\n", (long)idx
, u_errorName(errorCode
));
1164 udata_closeSwapper(ds
);
1165 pItem
->type
=outType
;
1168 // create the file and write its contents
1169 makeFullFilenameAndDirs(filesPath
, outName
, filename
, (int32_t)sizeof(filename
));
1170 file
=fopen(filename
, "wb");
1172 fprintf(stderr
, "icupkg: unable to create file \"%s\"\n", filename
);
1173 exit(U_FILE_ACCESS_ERROR
);
1175 fileLength
=(int32_t)fwrite(pItem
->data
, 1, pItem
->length
, file
);
1177 if(ferror(file
) || fileLength
!=pItem
->length
) {
1178 fprintf(stderr
, "icupkg: unable to write complete file \"%s\"\n", filename
);
1179 exit(U_FILE_ACCESS_ERROR
);
1185 Package::extractItem(const char *filesPath
, int32_t idx
, char outType
) {
1186 extractItem(filesPath
, items
[idx
].name
, idx
, outType
);
1190 Package::extractItems(const char *filesPath
, const char *pattern
, char outType
) {
1194 while((idx
=findNextItem())>=0) {
1195 extractItem(filesPath
, idx
, outType
);
1200 Package::extractItems(const char *filesPath
, const Package
&listPkg
, char outType
) {
1204 for(pItem
=listPkg
.items
, i
=0; i
<listPkg
.itemCount
; ++pItem
, ++i
) {
1205 extractItems(filesPath
, pItem
->name
, outType
);
1210 Package::getItemCount() const {
1215 Package::getItem(int32_t idx
) const {
1216 if (0 <= idx
&& idx
< itemCount
) {
1223 Package::checkDependency(void *context
, const char *itemName
, const char *targetName
) {
1224 // check dependency: make sure the target item is in the package
1225 Package
*me
=(Package
*)context
;
1226 if(me
->findItem(targetName
)<0) {
1227 me
->isMissingItems
=TRUE
;
1228 fprintf(stderr
, "Item %s depends on missing item %s\n", itemName
, targetName
);
1233 Package::checkDependencies() {
1234 isMissingItems
=FALSE
;
1235 enumDependencies(this, checkDependency
);
1236 return (UBool
)!isMissingItems
;
1240 Package::enumDependencies(void *context
, CheckDependency check
) {
1243 for(i
=0; i
<itemCount
; ++i
) {
1244 enumDependencies(items
+i
, context
, check
);
1249 Package::allocString(UBool in
, int32_t length
) {
1262 if(top
>STRING_STORE_SIZE
) {
1263 fprintf(stderr
, "icupkg: string storage overflow\n");
1264 exit(U_BUFFER_OVERFLOW_ERROR
);
1275 Package::sortItems() {
1276 UErrorCode errorCode
=U_ZERO_ERROR
;
1277 uprv_sortArray(items
, itemCount
, (int32_t)sizeof(Item
), compareItems
, NULL
, FALSE
, &errorCode
);
1278 if(U_FAILURE(errorCode
)) {
1279 fprintf(stderr
, "icupkg: sorting item names failed - %s\n", u_errorName(errorCode
));
1284 void Package::setItemCapacity(int32_t max
)
1289 Item
*newItems
= (Item
*)uprv_malloc(max
* sizeof(items
[0]));
1290 Item
*oldItems
= items
;
1291 if(newItems
== NULL
) {
1292 fprintf(stderr
, "icupkg: Out of memory trying to allocate %lu bytes for %d items\n",
1293 (unsigned long)(max
*sizeof(items
[0])), max
);
1294 exit(U_MEMORY_ALLOCATION_ERROR
);
1296 if(items
&& itemCount
>0) {
1297 uprv_memcpy(newItems
, items
, (size_t)itemCount
*sizeof(items
[0]));
1301 uprv_free(oldItems
);
1304 void Package::ensureItemCapacity()
1306 if((itemCount
+1)>itemMax
) {
1307 setItemCapacity(itemCount
+kItemsChunk
);