1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 2003-2015, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
10 * file name: pkgitems.cpp
12 * tab size: 8 (not used)
15 * created on: 2005sep18
16 * created by: Markus W. Scherer
18 * Companion file to package.cpp. Deals with details of ICU data item formats.
19 * Used for item dependencies.
20 * Contains adapted code from ucnv_bld.c (swapper code from 2003).
23 #include "unicode/utypes.h"
24 #include "unicode/ures.h"
25 #include "unicode/putil.h"
26 #include "unicode/udata.h"
40 /* item formats in common */
46 // general definitions ----------------------------------------------------- ***
50 static void U_CALLCONV
51 printError(void *context
, const char *fmt
, va_list args
) {
52 vfprintf((FILE *)context
, fmt
, args
);
57 // a data item in native-platform form ------------------------------------- ***
63 NativeItem() : pItem(NULL
), pInfo(NULL
), bytes(NULL
), swapped(NULL
), length(0) {}
64 NativeItem(const Item
*item
, UDataSwapFn
*swap
) : swapped(NULL
) {
70 const UDataInfo
*getDataInfo() const {
73 const uint8_t *getBytes() const {
76 int32_t getLength() const {
80 void setItem(const Item
*item
, UDataSwapFn
*swap
) {
82 int32_t infoLength
, itemHeaderLength
;
83 UErrorCode errorCode
=U_ZERO_ERROR
;
84 pInfo
=::getDataInfo(pItem
->data
, pItem
->length
, infoLength
, itemHeaderLength
, &errorCode
);
85 if(U_FAILURE(errorCode
)) {
86 exit(errorCode
); // should succeed because readFile() checks headers
88 length
=pItem
->length
-itemHeaderLength
;
90 if(pInfo
->isBigEndian
==U_IS_BIG_ENDIAN
&& pInfo
->charsetFamily
==U_CHARSET_FAMILY
) {
91 bytes
=pItem
->data
+itemHeaderLength
;
93 UDataSwapper
*ds
=udata_openSwapper((UBool
)pInfo
->isBigEndian
, pInfo
->charsetFamily
, U_IS_BIG_ENDIAN
, U_CHARSET_FAMILY
, &errorCode
);
94 if(U_FAILURE(errorCode
)) {
95 fprintf(stderr
, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
96 pItem
->name
, u_errorName(errorCode
));
100 ds
->printError
=printError
;
101 ds
->printErrorContext
=stderr
;
103 swapped
=new uint8_t[pItem
->length
];
105 fprintf(stderr
, "icupkg: unable to allocate memory for swapping \"%s\"\n", pItem
->name
);
106 exit(U_MEMORY_ALLOCATION_ERROR
);
108 swap(ds
, pItem
->data
, pItem
->length
, swapped
, &errorCode
);
109 pInfo
=::getDataInfo(swapped
, pItem
->length
, infoLength
, itemHeaderLength
, &errorCode
);
110 bytes
=swapped
+itemHeaderLength
;
111 udata_closeSwapper(ds
);
117 const UDataInfo
*pInfo
;
118 const uint8_t *bytes
;
123 // check a dependency ------------------------------------------------------ ***
126 * assemble the target item name from the source item name, an ID
130 makeTargetName(const char *itemName
, const char *id
, int32_t idLength
, const char *suffix
,
131 char *target
, int32_t capacity
,
132 UErrorCode
*pErrorCode
) {
134 int32_t treeLength
, suffixLength
, targetLength
;
136 // get the item basename
137 itemID
=strrchr(itemName
, '/');
144 // build the target string
145 treeLength
=(int32_t)(itemID
-itemName
);
147 idLength
=(int32_t)strlen(id
);
149 suffixLength
=(int32_t)strlen(suffix
);
150 targetLength
=treeLength
+idLength
+suffixLength
;
151 if(targetLength
>=capacity
) {
152 fprintf(stderr
, "icupkg/makeTargetName(%s) target item name length %ld too long\n",
153 itemName
, (long)targetLength
);
154 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
;
158 memcpy(target
, itemName
, treeLength
);
159 memcpy(target
+treeLength
, id
, idLength
);
160 memcpy(target
+treeLength
+idLength
, suffix
, suffixLength
+1); // +1 includes the terminating NUL
164 checkIDSuffix(const char *itemName
, const char *id
, int32_t idLength
, const char *suffix
,
165 CheckDependency check
, void *context
,
166 UErrorCode
*pErrorCode
) {
168 makeTargetName(itemName
, id
, idLength
, suffix
, target
, (int32_t)sizeof(target
), pErrorCode
);
169 if(U_SUCCESS(*pErrorCode
)) {
170 check(context
, itemName
, target
);
174 /* assemble the target item name from the item's parent item name */
176 checkParent(const char *itemName
, CheckDependency check
, void *context
,
177 UErrorCode
*pErrorCode
) {
178 const char *itemID
, *parent
, *parentLimit
, *suffix
;
179 int32_t parentLength
;
181 // get the item basename
182 itemID
=strrchr(itemName
, '/');
189 // get the item suffix
190 suffix
=strrchr(itemID
, '.');
192 // empty suffix, point to the end of the string
193 suffix
=strrchr(itemID
, 0);
196 // get the position of the last '_'
197 for(parentLimit
=suffix
; parentLimit
>itemID
&& *--parentLimit
!='_';) {}
199 if(parentLimit
!=itemID
) {
200 // get the parent item name by truncating the last part of this item's name */
202 parentLength
=(int32_t)(parentLimit
-itemID
);
204 // no '_' in the item name: the parent is the root bundle
207 if((suffix
-itemID
)==parentLength
&& 0==memcmp(itemID
, parent
, parentLength
)) {
208 // the item itself is "root", which does not depend on a parent
212 checkIDSuffix(itemName
, parent
, parentLength
, suffix
, check
, context
, pErrorCode
);
215 // get dependencies from resource bundles ---------------------------------- ***
217 static const UChar SLASH
=0x2f;
220 * Check for the alias from the string or alias resource res.
223 checkAlias(const char *itemName
,
224 Resource res
, const UChar
*alias
, int32_t length
, UBool useResSuffix
,
225 CheckDependency check
, void *context
, UErrorCode
*pErrorCode
) {
228 if(!uprv_isInvariantUString(alias
, length
)) {
229 fprintf(stderr
, "icupkg/ures_enumDependencies(%s res=%08x) alias string contains non-invariant characters\n",
231 *pErrorCode
=U_INVALID_CHAR_FOUND
;
235 // extract the locale ID from alias strings like
236 // locale_ID/key1/key2/key3
239 // search for the first slash
240 for(i
=0; i
<length
&& alias
[i
]!=SLASH
; ++i
) {}
242 if(res_getPublicType(res
)==URES_ALIAS
) {
243 // ignore aliases with an initial slash:
244 // /ICUDATA/... and /pkgname/... go to a different package
245 // /LOCALE/... are for dynamic sideways fallbacks and don't go to a fixed bundle
247 return; // initial slash ('/')
250 // ignore the intra-bundle path starting from the first slash ('/')
252 } else /* URES_STRING */ {
253 // the whole string should only consist of a locale ID
255 fprintf(stderr
, "icupkg/ures_enumDependencies(%s res=%08x) %%ALIAS contains a '/'\n",
257 *pErrorCode
=U_UNSUPPORTED_ERROR
;
262 // convert the Unicode string to char *
264 if(length
>=(int32_t)sizeof(localeID
)) {
265 fprintf(stderr
, "icupkg/ures_enumDependencies(%s res=%08x) alias locale ID length %ld too long\n",
266 itemName
, res
, (long)length
);
267 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
;
270 u_UCharsToChars(alias
, localeID
, length
);
273 checkIDSuffix(itemName
, localeID
, -1, (useResSuffix
? ".res" : ""), check
, context
, pErrorCode
);
277 * Enumerate one resource item and its children and extract dependencies from
281 ures_enumDependencies(const char *itemName
,
282 const ResourceData
*pResData
,
283 Resource res
, const char *inKey
, const char *parentKey
, int32_t depth
,
284 CheckDependency check
, void *context
,
286 UErrorCode
*pErrorCode
) {
287 switch(res_getPublicType(res
)) {
290 UBool useResSuffix
= TRUE
;
292 if(depth
==1 && inKey
!=NULL
) {
293 if(0!=strcmp(inKey
, "%%ALIAS")) {
297 // Check for %%DEPENDENCY
298 else if(depth
==2 && parentKey
!=NULL
) {
299 if(0!=strcmp(parentKey
, "%%DEPENDENCY")) {
302 useResSuffix
= FALSE
;
304 // we ignore all other strings
308 // No tracing: build tool
309 const UChar
*alias
=res_getStringNoTrace(pResData
, res
, &length
);
310 checkAlias(itemName
, res
, alias
, length
, useResSuffix
, check
, context
, pErrorCode
);
316 const UChar
*alias
=res_getAlias(pResData
, res
, &length
);
317 checkAlias(itemName
, res
, alias
, length
, TRUE
, check
, context
, pErrorCode
);
323 int32_t count
=res_countArrayItems(pResData
, res
);
324 for(int32_t i
=0; i
<count
; ++i
) {
326 Resource item
=res_getTableItemByIndex(pResData
, res
, i
, &itemKey
);
327 ures_enumDependencies(
334 if(U_FAILURE(*pErrorCode
)) {
335 fprintf(stderr
, "icupkg/ures_enumDependencies(%s table res=%08x)[%d].recurse(%s: %08x) failed\n",
336 itemName
, res
, i
, itemKey
, item
);
345 int32_t count
=res_countArrayItems(pResData
, res
);
346 for(int32_t i
=0; i
<count
; ++i
) {
347 Resource item
=res_getArrayItem(pResData
, res
, i
);
348 ures_enumDependencies(
355 if(U_FAILURE(*pErrorCode
)) {
356 fprintf(stderr
, "icupkg/ures_enumDependencies(%s array res=%08x)[%d].recurse(%08x) failed\n",
357 itemName
, res
, i
, item
);
369 ures_enumDependencies(const char *itemName
, const UDataInfo
*pInfo
,
370 const uint8_t *inBytes
, int32_t length
,
371 CheckDependency check
, void *context
,
373 UErrorCode
*pErrorCode
) {
374 ResourceData resData
;
376 res_read(&resData
, pInfo
, inBytes
, length
, pErrorCode
);
377 if(U_FAILURE(*pErrorCode
)) {
378 fprintf(stderr
, "icupkg: .res format version %02x.%02x not supported, or bundle malformed\n",
379 pInfo
->formatVersion
[0], pInfo
->formatVersion
[1]);
380 exit(U_UNSUPPORTED_ERROR
);
384 * if the bundle attributes are present and the nofallback flag is not set,
385 * then add the parent bundle as a dependency
387 if(pInfo
->formatVersion
[0]>1 || (pInfo
->formatVersion
[0]==1 && pInfo
->formatVersion
[1]>=1)) {
388 if(!resData
.noFallback
) {
389 /* this bundle participates in locale fallback */
390 checkParent(itemName
, check
, context
, pErrorCode
);
394 icu::NativeItem nativePool
;
396 if(resData
.usesPoolBundle
) {
398 makeTargetName(itemName
, "pool", 4, ".res", poolName
, (int32_t)sizeof(poolName
), pErrorCode
);
399 if(U_FAILURE(*pErrorCode
)) {
402 check(context
, itemName
, poolName
);
403 int32_t index
=pkg
->findItem(poolName
);
405 // We cannot work with a bundle if its pool resource is missing.
406 // check() already printed a complaint.
409 // TODO: Cache the native version in the Item itself.
410 nativePool
.setItem(pkg
->getItem(index
), ures_swap
);
411 const UDataInfo
*poolInfo
=nativePool
.getDataInfo();
412 if(poolInfo
->formatVersion
[0]<=1) {
413 fprintf(stderr
, "icupkg: %s is not a pool bundle\n", poolName
);
416 const int32_t *poolRoot
=(const int32_t *)nativePool
.getBytes();
417 const int32_t *poolIndexes
=poolRoot
+1;
418 int32_t poolIndexLength
=poolIndexes
[URES_INDEX_LENGTH
]&0xff;
419 if(!(poolIndexLength
>URES_INDEX_POOL_CHECKSUM
&&
420 (poolIndexes
[URES_INDEX_ATTRIBUTES
]&URES_ATT_IS_POOL_BUNDLE
))
422 fprintf(stderr
, "icupkg: %s is not a pool bundle\n", poolName
);
425 if(resData
.pRoot
[1+URES_INDEX_POOL_CHECKSUM
]==poolIndexes
[URES_INDEX_POOL_CHECKSUM
]) {
426 resData
.poolBundleKeys
=(const char *)(poolIndexes
+poolIndexLength
);
427 resData
.poolBundleStrings
=(const uint16_t *)(poolRoot
+poolIndexes
[URES_INDEX_KEYS_TOP
]);
429 fprintf(stderr
, "icupkg: %s has mismatched checksum for %s\n", poolName
, itemName
);
434 ures_enumDependencies(
436 resData
.rootRes
, NULL
, NULL
, 0,
442 // get dependencies from conversion tables --------------------------------- ***
444 /* code adapted from ucnv_swap() */
446 ucnv_enumDependencies(const UDataSwapper
*ds
,
447 const char *itemName
, const UDataInfo
*pInfo
,
448 const uint8_t *inBytes
, int32_t length
,
449 CheckDependency check
, void *context
,
450 UErrorCode
*pErrorCode
) {
451 uint32_t staticDataSize
;
453 const UConverterStaticData
*inStaticData
;
455 const _MBCSHeader
*inMBCSHeader
;
458 /* check format version */
460 pInfo
->formatVersion
[0]==6 &&
461 pInfo
->formatVersion
[1]>=2
463 fprintf(stderr
, "icupkg/ucnv_enumDependencies(): .cnv format version %02x.%02x not supported\n",
464 pInfo
->formatVersion
[0], pInfo
->formatVersion
[1]);
465 exit(U_UNSUPPORTED_ERROR
);
468 /* read the initial UConverterStaticData structure after the UDataInfo header */
469 inStaticData
=(const UConverterStaticData
*)inBytes
;
471 if( length
<(int32_t)sizeof(UConverterStaticData
) ||
472 (uint32_t)length
<(staticDataSize
=ds
->readUInt32(inStaticData
->structSize
))
474 udata_printError(ds
, "icupkg/ucnv_enumDependencies(): too few bytes (%d after header) for an ICU .cnv conversion table\n",
476 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
480 inBytes
+=staticDataSize
;
481 length
-=(int32_t)staticDataSize
;
483 /* check for supported conversionType values */
484 if(inStaticData
->conversionType
==UCNV_MBCS
) {
486 uint32_t mbcsHeaderLength
, mbcsHeaderFlags
, mbcsHeaderOptions
;
489 inMBCSHeader
=(const _MBCSHeader
*)inBytes
;
491 if(length
<(int32_t)sizeof(_MBCSHeader
)) {
492 udata_printError(ds
, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n",
494 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
497 if(inMBCSHeader
->version
[0]==4 && inMBCSHeader
->version
[1]>=1) {
498 mbcsHeaderLength
=MBCS_HEADER_V4_LENGTH
;
499 } else if(inMBCSHeader
->version
[0]==5 && inMBCSHeader
->version
[1]>=3 &&
500 ((mbcsHeaderOptions
=ds
->readUInt32(inMBCSHeader
->options
))&
501 MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK
)==0
503 mbcsHeaderLength
=mbcsHeaderOptions
&MBCS_OPT_LENGTH_MASK
;
505 udata_printError(ds
, "icupkg/ucnv_enumDependencies(): unsupported _MBCSHeader.version %d.%d\n",
506 inMBCSHeader
->version
[0], inMBCSHeader
->version
[1]);
507 *pErrorCode
=U_UNSUPPORTED_ERROR
;
511 mbcsHeaderFlags
=ds
->readUInt32(inMBCSHeader
->flags
);
512 extOffset
=(int32_t)(mbcsHeaderFlags
>>8);
513 outputType
=(uint8_t)mbcsHeaderFlags
;
515 if(outputType
==MBCS_OUTPUT_EXT_ONLY
) {
517 * extension-only file,
518 * contains a base name instead of normal base table data
521 int32_t baseNameLength
;
523 /* there is extension data after the base data, see ucnv_ext.h */
524 if(length
<(extOffset
+UCNV_EXT_INDEXES_MIN_LENGTH
*4)) {
525 udata_printError(ds
, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n",
527 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
531 /* swap the base name, between the header and the extension data */
532 const char *inBaseName
=(const char *)inBytes
+mbcsHeaderLength
*4;
533 baseNameLength
=(int32_t)strlen(inBaseName
);
534 if(baseNameLength
>=(int32_t)sizeof(baseName
)) {
535 udata_printError(ds
, "icupkg/ucnv_enumDependencies(%s): base name length %ld too long\n",
536 itemName
, baseNameLength
);
537 *pErrorCode
=U_UNSUPPORTED_ERROR
;
540 ds
->swapInvChars(ds
, inBaseName
, baseNameLength
+1, baseName
, pErrorCode
);
542 checkIDSuffix(itemName
, baseName
, -1, ".cnv", check
, context
, pErrorCode
);
547 // ICU data formats -------------------------------------------------------- ***
549 static const struct {
550 uint8_t dataFormat
[4];
552 { { 0x52, 0x65, 0x73, 0x42 } }, /* dataFormat="ResB" */
553 { { 0x63, 0x6e, 0x76, 0x74 } }, /* dataFormat="cnvt" */
554 { { 0x43, 0x76, 0x41, 0x6c } } /* dataFormat="CvAl" */
565 getDataFormat(const uint8_t dataFormat
[4]) {
568 for(i
=0; i
<FMT_COUNT
; ++i
) {
569 if(0==memcmp(dataFormats
[i
].dataFormat
, dataFormat
, 4)) {
576 // enumerate dependencies of a package item -------------------------------- ***
579 Package::enumDependencies(Item
*pItem
, void *context
, CheckDependency check
) {
580 int32_t infoLength
, itemHeaderLength
;
581 UErrorCode errorCode
=U_ZERO_ERROR
;
582 const UDataInfo
*pInfo
=getDataInfo(pItem
->data
, pItem
->length
, infoLength
, itemHeaderLength
, &errorCode
);
583 if(U_FAILURE(errorCode
)) {
584 return; // should not occur because readFile() checks headers
587 // find the data format and call the corresponding function, if any
588 int32_t format
=getDataFormat(pInfo
->dataFormat
);
594 * Swap the resource bundle (if necessary) so that we can use
595 * the normal runtime uresdata.c code to read it.
596 * We do not want to duplicate that code, especially not together with on-the-fly swapping.
598 NativeItem
nrb(pItem
, ures_swap
);
599 ures_enumDependencies(pItem
->name
, nrb
.getDataInfo(), nrb
.getBytes(), nrb
.getLength(), check
, context
, this, &errorCode
);
604 // TODO: share/cache swappers
605 UDataSwapper
*ds
=udata_openSwapper(
606 (UBool
)pInfo
->isBigEndian
, pInfo
->charsetFamily
,
607 U_IS_BIG_ENDIAN
, U_CHARSET_FAMILY
,
609 if(U_FAILURE(errorCode
)) {
610 fprintf(stderr
, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
611 pItem
->name
, u_errorName(errorCode
));
615 ds
->printError
=printError
;
616 ds
->printErrorContext
=stderr
;
618 const uint8_t *inBytes
=pItem
->data
+itemHeaderLength
;
619 int32_t length
=pItem
->length
-itemHeaderLength
;
621 ucnv_enumDependencies(ds
, pItem
->name
, pInfo
, inBytes
, length
, check
, context
, &errorCode
);
622 udata_closeSwapper(ds
);
629 if(U_FAILURE(errorCode
)) {