1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 2003-2015, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
10 * file name: pkgitems.cpp
12 * tab size: 8 (not used)
15 * created on: 2005sep18
16 * created by: Markus W. Scherer
18 * Companion file to package.cpp. Deals with details of ICU data item formats.
19 * Used for item dependencies.
20 * Contains adapted code from ucnv_bld.c (swapper code from 2003).
23 #include "unicode/utypes.h"
24 #include "unicode/ures.h"
25 #include "unicode/putil.h"
26 #include "unicode/udata.h"
40 /* item formats in common */
46 // general definitions ----------------------------------------------------- ***
50 static void U_CALLCONV
51 printError(void *context
, const char *fmt
, va_list args
) {
52 vfprintf((FILE *)context
, fmt
, args
);
57 // a data item in native-platform form ------------------------------------- ***
63 NativeItem() : pItem(NULL
), pInfo(NULL
), bytes(NULL
), swapped(NULL
), length(0) {}
64 NativeItem(const Item
*item
, UDataSwapFn
*swap
) : swapped(NULL
) {
70 const UDataInfo
*getDataInfo() const {
73 const uint8_t *getBytes() const {
76 int32_t getLength() const {
80 void setItem(const Item
*item
, UDataSwapFn
*swap
) {
82 int32_t infoLength
, itemHeaderLength
;
83 UErrorCode errorCode
=U_ZERO_ERROR
;
84 pInfo
=::getDataInfo(pItem
->data
, pItem
->length
, infoLength
, itemHeaderLength
, &errorCode
);
85 if(U_FAILURE(errorCode
)) {
86 exit(errorCode
); // should succeed because readFile() checks headers
88 length
=pItem
->length
-itemHeaderLength
;
90 if(pInfo
->isBigEndian
==U_IS_BIG_ENDIAN
&& pInfo
->charsetFamily
==U_CHARSET_FAMILY
) {
91 bytes
=pItem
->data
+itemHeaderLength
;
93 UDataSwapper
*ds
=udata_openSwapper((UBool
)pInfo
->isBigEndian
, pInfo
->charsetFamily
, U_IS_BIG_ENDIAN
, U_CHARSET_FAMILY
, &errorCode
);
94 if(U_FAILURE(errorCode
)) {
95 fprintf(stderr
, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
96 pItem
->name
, u_errorName(errorCode
));
100 ds
->printError
=printError
;
101 ds
->printErrorContext
=stderr
;
103 swapped
=new uint8_t[pItem
->length
];
105 fprintf(stderr
, "icupkg: unable to allocate memory for swapping \"%s\"\n", pItem
->name
);
106 exit(U_MEMORY_ALLOCATION_ERROR
);
108 swap(ds
, pItem
->data
, pItem
->length
, swapped
, &errorCode
);
109 pInfo
=::getDataInfo(swapped
, pItem
->length
, infoLength
, itemHeaderLength
, &errorCode
);
110 bytes
=swapped
+itemHeaderLength
;
111 udata_closeSwapper(ds
);
117 const UDataInfo
*pInfo
;
118 const uint8_t *bytes
;
123 // check a dependency ------------------------------------------------------ ***
126 * assemble the target item name from the source item name, an ID
130 makeTargetName(const char *itemName
, const char *id
, int32_t idLength
, const char *suffix
,
131 char *target
, int32_t capacity
,
132 UErrorCode
*pErrorCode
) {
134 int32_t treeLength
, suffixLength
, targetLength
;
136 // get the item basename
137 itemID
=strrchr(itemName
, '/');
144 // build the target string
145 treeLength
=(int32_t)(itemID
-itemName
);
147 idLength
=(int32_t)strlen(id
);
149 suffixLength
=(int32_t)strlen(suffix
);
150 targetLength
=treeLength
+idLength
+suffixLength
;
151 if(targetLength
>=capacity
) {
152 fprintf(stderr
, "icupkg/makeTargetName(%s) target item name length %ld too long\n",
153 itemName
, (long)targetLength
);
154 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
;
158 memcpy(target
, itemName
, treeLength
);
159 memcpy(target
+treeLength
, id
, idLength
);
160 memcpy(target
+treeLength
+idLength
, suffix
, suffixLength
+1); // +1 includes the terminating NUL
164 checkIDSuffix(const char *itemName
, const char *id
, int32_t idLength
, const char *suffix
,
165 CheckDependency check
, void *context
,
166 UErrorCode
*pErrorCode
) {
168 makeTargetName(itemName
, id
, idLength
, suffix
, target
, (int32_t)sizeof(target
), pErrorCode
);
169 if(U_SUCCESS(*pErrorCode
)) {
170 check(context
, itemName
, target
);
174 /* assemble the target item name from the item's parent item name */
176 checkParent(const char *itemName
, CheckDependency check
, void *context
,
177 UErrorCode
*pErrorCode
) {
178 const char *itemID
, *parent
, *parentLimit
, *suffix
;
179 int32_t parentLength
;
181 // get the item basename
182 itemID
=strrchr(itemName
, '/');
189 // get the item suffix
190 suffix
=strrchr(itemID
, '.');
192 // empty suffix, point to the end of the string
193 suffix
=strrchr(itemID
, 0);
196 // get the position of the last '_'
197 for(parentLimit
=suffix
; parentLimit
>itemID
&& *--parentLimit
!='_';) {}
199 if(parentLimit
!=itemID
) {
200 // get the parent item name by truncating the last part of this item's name */
202 parentLength
=(int32_t)(parentLimit
-itemID
);
204 // no '_' in the item name: the parent is the root bundle
207 if((suffix
-itemID
)==parentLength
&& 0==memcmp(itemID
, parent
, parentLength
)) {
208 // the item itself is "root", which does not depend on a parent
212 checkIDSuffix(itemName
, parent
, parentLength
, suffix
, check
, context
, pErrorCode
);
215 // get dependencies from resource bundles ---------------------------------- ***
217 static const UChar SLASH
=0x2f;
220 * Check for the alias from the string or alias resource res.
223 checkAlias(const char *itemName
,
224 Resource res
, const UChar
*alias
, int32_t length
, UBool useResSuffix
,
225 CheckDependency check
, void *context
, UErrorCode
*pErrorCode
) {
228 if(!uprv_isInvariantUString(alias
, length
)) {
229 fprintf(stderr
, "icupkg/ures_enumDependencies(%s res=%08x) alias string contains non-invariant characters\n",
231 *pErrorCode
=U_INVALID_CHAR_FOUND
;
235 // extract the locale ID from alias strings like
236 // locale_ID/key1/key2/key3
239 // search for the first slash
240 for(i
=0; i
<length
&& alias
[i
]!=SLASH
; ++i
) {}
242 if(res_getPublicType(res
)==URES_ALIAS
) {
243 // ignore aliases with an initial slash:
244 // /ICUDATA/... and /pkgname/... go to a different package
245 // /LOCALE/... are for dynamic sideways fallbacks and don't go to a fixed bundle
247 return; // initial slash ('/')
250 // ignore the intra-bundle path starting from the first slash ('/')
252 } else /* URES_STRING */ {
253 // the whole string should only consist of a locale ID
255 fprintf(stderr
, "icupkg/ures_enumDependencies(%s res=%08x) %%ALIAS contains a '/'\n",
257 *pErrorCode
=U_UNSUPPORTED_ERROR
;
262 // convert the Unicode string to char *
264 if(length
>=(int32_t)sizeof(localeID
)) {
265 fprintf(stderr
, "icupkg/ures_enumDependencies(%s res=%08x) alias locale ID length %ld too long\n",
266 itemName
, res
, (long)length
);
267 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
;
270 u_UCharsToChars(alias
, localeID
, length
);
273 checkIDSuffix(itemName
, localeID
, -1, (useResSuffix
? ".res" : ""), check
, context
, pErrorCode
);
277 * Enumerate one resource item and its children and extract dependencies from
281 ures_enumDependencies(const char *itemName
,
282 const ResourceData
*pResData
,
283 Resource res
, const char *inKey
, const char *parentKey
, int32_t depth
,
284 CheckDependency check
, void *context
,
286 UErrorCode
*pErrorCode
) {
287 switch(res_getPublicType(res
)) {
290 UBool useResSuffix
= TRUE
;
292 if(depth
==1 && inKey
!=NULL
) {
293 if(0!=strcmp(inKey
, "%%ALIAS")) {
297 // Check for %%DEPENDENCY
298 else if(depth
==2 && parentKey
!=NULL
) {
299 if(0!=strcmp(parentKey
, "%%DEPENDENCY")) {
302 useResSuffix
= FALSE
;
304 // we ignore all other strings
308 const UChar
*alias
=res_getString(pResData
, res
, &length
);
309 checkAlias(itemName
, res
, alias
, length
, useResSuffix
, check
, context
, pErrorCode
);
315 const UChar
*alias
=res_getAlias(pResData
, res
, &length
);
316 checkAlias(itemName
, res
, alias
, length
, TRUE
, check
, context
, pErrorCode
);
322 int32_t count
=res_countArrayItems(pResData
, res
);
323 for(int32_t i
=0; i
<count
; ++i
) {
325 Resource item
=res_getTableItemByIndex(pResData
, res
, i
, &itemKey
);
326 ures_enumDependencies(
333 if(U_FAILURE(*pErrorCode
)) {
334 fprintf(stderr
, "icupkg/ures_enumDependencies(%s table res=%08x)[%d].recurse(%s: %08x) failed\n",
335 itemName
, res
, i
, itemKey
, item
);
344 int32_t count
=res_countArrayItems(pResData
, res
);
345 for(int32_t i
=0; i
<count
; ++i
) {
346 Resource item
=res_getArrayItem(pResData
, res
, i
);
347 ures_enumDependencies(
354 if(U_FAILURE(*pErrorCode
)) {
355 fprintf(stderr
, "icupkg/ures_enumDependencies(%s array res=%08x)[%d].recurse(%08x) failed\n",
356 itemName
, res
, i
, item
);
368 ures_enumDependencies(const char *itemName
, const UDataInfo
*pInfo
,
369 const uint8_t *inBytes
, int32_t length
,
370 CheckDependency check
, void *context
,
372 UErrorCode
*pErrorCode
) {
373 ResourceData resData
;
375 res_read(&resData
, pInfo
, inBytes
, length
, pErrorCode
);
376 if(U_FAILURE(*pErrorCode
)) {
377 fprintf(stderr
, "icupkg: .res format version %02x.%02x not supported, or bundle malformed\n",
378 pInfo
->formatVersion
[0], pInfo
->formatVersion
[1]);
379 exit(U_UNSUPPORTED_ERROR
);
383 * if the bundle attributes are present and the nofallback flag is not set,
384 * then add the parent bundle as a dependency
386 if(pInfo
->formatVersion
[0]>1 || (pInfo
->formatVersion
[0]==1 && pInfo
->formatVersion
[1]>=1)) {
387 if(!resData
.noFallback
) {
388 /* this bundle participates in locale fallback */
389 checkParent(itemName
, check
, context
, pErrorCode
);
393 icu::NativeItem nativePool
;
395 if(resData
.usesPoolBundle
) {
397 makeTargetName(itemName
, "pool", 4, ".res", poolName
, (int32_t)sizeof(poolName
), pErrorCode
);
398 if(U_FAILURE(*pErrorCode
)) {
401 check(context
, itemName
, poolName
);
402 int32_t index
=pkg
->findItem(poolName
);
404 // We cannot work with a bundle if its pool resource is missing.
405 // check() already printed a complaint.
408 // TODO: Cache the native version in the Item itself.
409 nativePool
.setItem(pkg
->getItem(index
), ures_swap
);
410 const UDataInfo
*poolInfo
=nativePool
.getDataInfo();
411 if(poolInfo
->formatVersion
[0]<=1) {
412 fprintf(stderr
, "icupkg: %s is not a pool bundle\n", poolName
);
415 const int32_t *poolRoot
=(const int32_t *)nativePool
.getBytes();
416 const int32_t *poolIndexes
=poolRoot
+1;
417 int32_t poolIndexLength
=poolIndexes
[URES_INDEX_LENGTH
]&0xff;
418 if(!(poolIndexLength
>URES_INDEX_POOL_CHECKSUM
&&
419 (poolIndexes
[URES_INDEX_ATTRIBUTES
]&URES_ATT_IS_POOL_BUNDLE
))
421 fprintf(stderr
, "icupkg: %s is not a pool bundle\n", poolName
);
424 if(resData
.pRoot
[1+URES_INDEX_POOL_CHECKSUM
]==poolIndexes
[URES_INDEX_POOL_CHECKSUM
]) {
425 resData
.poolBundleKeys
=(const char *)(poolIndexes
+poolIndexLength
);
426 resData
.poolBundleStrings
=(const uint16_t *)(poolRoot
+poolIndexes
[URES_INDEX_KEYS_TOP
]);
428 fprintf(stderr
, "icupkg: %s has mismatched checksum for %s\n", poolName
, itemName
);
433 ures_enumDependencies(
435 resData
.rootRes
, NULL
, NULL
, 0,
441 // get dependencies from conversion tables --------------------------------- ***
443 /* code adapted from ucnv_swap() */
445 ucnv_enumDependencies(const UDataSwapper
*ds
,
446 const char *itemName
, const UDataInfo
*pInfo
,
447 const uint8_t *inBytes
, int32_t length
,
448 CheckDependency check
, void *context
,
449 UErrorCode
*pErrorCode
) {
450 uint32_t staticDataSize
;
452 const UConverterStaticData
*inStaticData
;
454 const _MBCSHeader
*inMBCSHeader
;
457 /* check format version */
459 pInfo
->formatVersion
[0]==6 &&
460 pInfo
->formatVersion
[1]>=2
462 fprintf(stderr
, "icupkg/ucnv_enumDependencies(): .cnv format version %02x.%02x not supported\n",
463 pInfo
->formatVersion
[0], pInfo
->formatVersion
[1]);
464 exit(U_UNSUPPORTED_ERROR
);
467 /* read the initial UConverterStaticData structure after the UDataInfo header */
468 inStaticData
=(const UConverterStaticData
*)inBytes
;
470 if( length
<(int32_t)sizeof(UConverterStaticData
) ||
471 (uint32_t)length
<(staticDataSize
=ds
->readUInt32(inStaticData
->structSize
))
473 udata_printError(ds
, "icupkg/ucnv_enumDependencies(): too few bytes (%d after header) for an ICU .cnv conversion table\n",
475 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
479 inBytes
+=staticDataSize
;
480 length
-=(int32_t)staticDataSize
;
482 /* check for supported conversionType values */
483 if(inStaticData
->conversionType
==UCNV_MBCS
) {
485 uint32_t mbcsHeaderLength
, mbcsHeaderFlags
, mbcsHeaderOptions
;
488 inMBCSHeader
=(const _MBCSHeader
*)inBytes
;
490 if(length
<(int32_t)sizeof(_MBCSHeader
)) {
491 udata_printError(ds
, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n",
493 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
496 if(inMBCSHeader
->version
[0]==4 && inMBCSHeader
->version
[1]>=1) {
497 mbcsHeaderLength
=MBCS_HEADER_V4_LENGTH
;
498 } else if(inMBCSHeader
->version
[0]==5 && inMBCSHeader
->version
[1]>=3 &&
499 ((mbcsHeaderOptions
=ds
->readUInt32(inMBCSHeader
->options
))&
500 MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK
)==0
502 mbcsHeaderLength
=mbcsHeaderOptions
&MBCS_OPT_LENGTH_MASK
;
504 udata_printError(ds
, "icupkg/ucnv_enumDependencies(): unsupported _MBCSHeader.version %d.%d\n",
505 inMBCSHeader
->version
[0], inMBCSHeader
->version
[1]);
506 *pErrorCode
=U_UNSUPPORTED_ERROR
;
510 mbcsHeaderFlags
=ds
->readUInt32(inMBCSHeader
->flags
);
511 extOffset
=(int32_t)(mbcsHeaderFlags
>>8);
512 outputType
=(uint8_t)mbcsHeaderFlags
;
514 if(outputType
==MBCS_OUTPUT_EXT_ONLY
) {
516 * extension-only file,
517 * contains a base name instead of normal base table data
520 int32_t baseNameLength
;
522 /* there is extension data after the base data, see ucnv_ext.h */
523 if(length
<(extOffset
+UCNV_EXT_INDEXES_MIN_LENGTH
*4)) {
524 udata_printError(ds
, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n",
526 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
530 /* swap the base name, between the header and the extension data */
531 const char *inBaseName
=(const char *)inBytes
+mbcsHeaderLength
*4;
532 baseNameLength
=(int32_t)strlen(inBaseName
);
533 if(baseNameLength
>=(int32_t)sizeof(baseName
)) {
534 udata_printError(ds
, "icupkg/ucnv_enumDependencies(%s): base name length %ld too long\n",
535 itemName
, baseNameLength
);
536 *pErrorCode
=U_UNSUPPORTED_ERROR
;
539 ds
->swapInvChars(ds
, inBaseName
, baseNameLength
+1, baseName
, pErrorCode
);
541 checkIDSuffix(itemName
, baseName
, -1, ".cnv", check
, context
, pErrorCode
);
546 // ICU data formats -------------------------------------------------------- ***
548 static const struct {
549 uint8_t dataFormat
[4];
551 { { 0x52, 0x65, 0x73, 0x42 } }, /* dataFormat="ResB" */
552 { { 0x63, 0x6e, 0x76, 0x74 } }, /* dataFormat="cnvt" */
553 { { 0x43, 0x76, 0x41, 0x6c } } /* dataFormat="CvAl" */
564 getDataFormat(const uint8_t dataFormat
[4]) {
567 for(i
=0; i
<FMT_COUNT
; ++i
) {
568 if(0==memcmp(dataFormats
[i
].dataFormat
, dataFormat
, 4)) {
575 // enumerate dependencies of a package item -------------------------------- ***
578 Package::enumDependencies(Item
*pItem
, void *context
, CheckDependency check
) {
579 int32_t infoLength
, itemHeaderLength
;
580 UErrorCode errorCode
=U_ZERO_ERROR
;
581 const UDataInfo
*pInfo
=getDataInfo(pItem
->data
, pItem
->length
, infoLength
, itemHeaderLength
, &errorCode
);
582 if(U_FAILURE(errorCode
)) {
583 return; // should not occur because readFile() checks headers
586 // find the data format and call the corresponding function, if any
587 int32_t format
=getDataFormat(pInfo
->dataFormat
);
593 * Swap the resource bundle (if necessary) so that we can use
594 * the normal runtime uresdata.c code to read it.
595 * We do not want to duplicate that code, especially not together with on-the-fly swapping.
597 NativeItem
nrb(pItem
, ures_swap
);
598 ures_enumDependencies(pItem
->name
, nrb
.getDataInfo(), nrb
.getBytes(), nrb
.getLength(), check
, context
, this, &errorCode
);
603 // TODO: share/cache swappers
604 UDataSwapper
*ds
=udata_openSwapper(
605 (UBool
)pInfo
->isBigEndian
, pInfo
->charsetFamily
,
606 U_IS_BIG_ENDIAN
, U_CHARSET_FAMILY
,
608 if(U_FAILURE(errorCode
)) {
609 fprintf(stderr
, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
610 pItem
->name
, u_errorName(errorCode
));
614 ds
->printError
=printError
;
615 ds
->printErrorContext
=stderr
;
617 const uint8_t *inBytes
=pItem
->data
+itemHeaderLength
;
618 int32_t length
=pItem
->length
-itemHeaderLength
;
620 ucnv_enumDependencies(ds
, pItem
->name
, pInfo
, inBytes
, length
, check
, context
, &errorCode
);
621 udata_closeSwapper(ds
);
628 if(U_FAILURE(errorCode
)) {