]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/toolutil/pkgitems.cpp
ICU-551.51.4.tar.gz
[apple/icu.git] / icuSources / tools / toolutil / pkgitems.cpp
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2003-2014, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: pkgitems.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2005sep18
14 * created by: Markus W. Scherer
15 *
16 * Companion file to package.cpp. Deals with details of ICU data item formats.
17 * Used for item dependencies.
18 * Contains adapted code from ucnv_bld.c (swapper code from 2003).
19 */
20
21 #include "unicode/utypes.h"
22 #include "unicode/ures.h"
23 #include "unicode/putil.h"
24 #include "unicode/udata.h"
25 #include "cstring.h"
26 #include "uinvchar.h"
27 #include "ucmndata.h"
28 #include "udataswp.h"
29 #include "swapimpl.h"
30 #include "toolutil.h"
31 #include "package.h"
32 #include "pkg_imp.h"
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37
38 /* item formats in common */
39
40 #include "uresdata.h"
41 #include "ucnv_bld.h"
42 #include "ucnv_io.h"
43
44 // general definitions ----------------------------------------------------- ***
45
46 U_CDECL_BEGIN
47
48 static void U_CALLCONV
49 printError(void *context, const char *fmt, va_list args) {
50 vfprintf((FILE *)context, fmt, args);
51 }
52
53 U_CDECL_END
54
55 // a data item in native-platform form ------------------------------------- ***
56
57 U_NAMESPACE_BEGIN
58
59 class NativeItem {
60 public:
61 NativeItem() : pItem(NULL), pInfo(NULL), bytes(NULL), swapped(NULL), length(0) {}
62 NativeItem(const Item *item, UDataSwapFn *swap) : swapped(NULL) {
63 setItem(item, swap);
64 }
65 ~NativeItem() {
66 delete [] swapped;
67 }
68 const UDataInfo *getDataInfo() const {
69 return pInfo;
70 }
71 const uint8_t *getBytes() const {
72 return bytes;
73 }
74 int32_t getLength() const {
75 return length;
76 }
77
78 void setItem(const Item *item, UDataSwapFn *swap) {
79 pItem=item;
80 int32_t infoLength, itemHeaderLength;
81 UErrorCode errorCode=U_ZERO_ERROR;
82 pInfo=::getDataInfo(pItem->data, pItem->length, infoLength, itemHeaderLength, &errorCode);
83 if(U_FAILURE(errorCode)) {
84 exit(errorCode); // should succeed because readFile() checks headers
85 }
86 length=pItem->length-itemHeaderLength;
87
88 if(pInfo->isBigEndian==U_IS_BIG_ENDIAN && pInfo->charsetFamily==U_CHARSET_FAMILY) {
89 bytes=pItem->data+itemHeaderLength;
90 } else {
91 UDataSwapper *ds=udata_openSwapper((UBool)pInfo->isBigEndian, pInfo->charsetFamily, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
92 if(U_FAILURE(errorCode)) {
93 fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
94 pItem->name, u_errorName(errorCode));
95 exit(errorCode);
96 }
97
98 ds->printError=printError;
99 ds->printErrorContext=stderr;
100
101 swapped=new uint8_t[pItem->length];
102 if(swapped==NULL) {
103 fprintf(stderr, "icupkg: unable to allocate memory for swapping \"%s\"\n", pItem->name);
104 exit(U_MEMORY_ALLOCATION_ERROR);
105 }
106 swap(ds, pItem->data, pItem->length, swapped, &errorCode);
107 pInfo=::getDataInfo(swapped, pItem->length, infoLength, itemHeaderLength, &errorCode);
108 bytes=swapped+itemHeaderLength;
109 udata_closeSwapper(ds);
110 }
111 }
112
113 private:
114 const Item *pItem;
115 const UDataInfo *pInfo;
116 const uint8_t *bytes;
117 uint8_t *swapped;
118 int32_t length;
119 };
120
121 // check a dependency ------------------------------------------------------ ***
122
123 /*
124 * assemble the target item name from the source item name, an ID
125 * and a suffix
126 */
127 static void
128 makeTargetName(const char *itemName, const char *id, int32_t idLength, const char *suffix,
129 char *target, int32_t capacity,
130 UErrorCode *pErrorCode) {
131 const char *itemID;
132 int32_t treeLength, suffixLength, targetLength;
133
134 // get the item basename
135 itemID=strrchr(itemName, '/');
136 if(itemID!=NULL) {
137 ++itemID;
138 } else {
139 itemID=itemName;
140 }
141
142 // build the target string
143 treeLength=(int32_t)(itemID-itemName);
144 if(idLength<0) {
145 idLength=(int32_t)strlen(id);
146 }
147 suffixLength=(int32_t)strlen(suffix);
148 targetLength=treeLength+idLength+suffixLength;
149 if(targetLength>=capacity) {
150 fprintf(stderr, "icupkg/makeTargetName(%s) target item name length %ld too long\n",
151 itemName, (long)targetLength);
152 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
153 return;
154 }
155
156 memcpy(target, itemName, treeLength);
157 memcpy(target+treeLength, id, idLength);
158 memcpy(target+treeLength+idLength, suffix, suffixLength+1); // +1 includes the terminating NUL
159 }
160
161 static void
162 checkIDSuffix(const char *itemName, const char *id, int32_t idLength, const char *suffix,
163 CheckDependency check, void *context,
164 UErrorCode *pErrorCode) {
165 char target[200];
166 makeTargetName(itemName, id, idLength, suffix, target, (int32_t)sizeof(target), pErrorCode);
167 if(U_SUCCESS(*pErrorCode)) {
168 check(context, itemName, target);
169 }
170 }
171
172 /* assemble the target item name from the item's parent item name */
173 static void
174 checkParent(const char *itemName, CheckDependency check, void *context,
175 UErrorCode *pErrorCode) {
176 const char *itemID, *parent, *parentLimit, *suffix;
177 int32_t parentLength;
178
179 // get the item basename
180 itemID=strrchr(itemName, '/');
181 if(itemID!=NULL) {
182 ++itemID;
183 } else {
184 itemID=itemName;
185 }
186
187 // get the item suffix
188 suffix=strrchr(itemID, '.');
189 if(suffix==NULL) {
190 // empty suffix, point to the end of the string
191 suffix=strrchr(itemID, 0);
192 }
193
194 // get the position of the last '_'
195 for(parentLimit=suffix; parentLimit>itemID && *--parentLimit!='_';) {}
196
197 if(parentLimit!=itemID) {
198 // get the parent item name by truncating the last part of this item's name */
199 parent=itemID;
200 parentLength=(int32_t)(parentLimit-itemID);
201 } else {
202 // no '_' in the item name: the parent is the root bundle
203 parent="root";
204 parentLength=4;
205 if((suffix-itemID)==parentLength && 0==memcmp(itemID, parent, parentLength)) {
206 // the item itself is "root", which does not depend on a parent
207 return;
208 }
209 }
210 checkIDSuffix(itemName, parent, parentLength, suffix, check, context, pErrorCode);
211 }
212
213 // get dependencies from resource bundles ---------------------------------- ***
214
215 static const UChar SLASH=0x2f;
216
217 /*
218 * Check for the alias from the string or alias resource res.
219 */
220 static void
221 checkAlias(const char *itemName,
222 Resource res, const UChar *alias, int32_t length, UBool useResSuffix,
223 CheckDependency check, void *context, UErrorCode *pErrorCode) {
224 int32_t i;
225
226 if(!uprv_isInvariantUString(alias, length)) {
227 fprintf(stderr, "icupkg/ures_enumDependencies(%s res=%08x) alias string contains non-invariant characters\n",
228 itemName, res);
229 *pErrorCode=U_INVALID_CHAR_FOUND;
230 return;
231 }
232
233 // extract the locale ID from alias strings like
234 // locale_ID/key1/key2/key3
235 // locale_ID
236
237 // search for the first slash
238 for(i=0; i<length && alias[i]!=SLASH; ++i) {}
239
240 if(res_getPublicType(res)==URES_ALIAS) {
241 // ignore aliases with an initial slash:
242 // /ICUDATA/... and /pkgname/... go to a different package
243 // /LOCALE/... are for dynamic sideways fallbacks and don't go to a fixed bundle
244 if(i==0) {
245 return; // initial slash ('/')
246 }
247
248 // ignore the intra-bundle path starting from the first slash ('/')
249 length=i;
250 } else /* URES_STRING */ {
251 // the whole string should only consist of a locale ID
252 if(i!=length) {
253 fprintf(stderr, "icupkg/ures_enumDependencies(%s res=%08x) %%ALIAS contains a '/'\n",
254 itemName, res);
255 *pErrorCode=U_UNSUPPORTED_ERROR;
256 return;
257 }
258 }
259
260 // convert the Unicode string to char *
261 char localeID[32];
262 if(length>=(int32_t)sizeof(localeID)) {
263 fprintf(stderr, "icupkg/ures_enumDependencies(%s res=%08x) alias locale ID length %ld too long\n",
264 itemName, res, (long)length);
265 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
266 return;
267 }
268 u_UCharsToChars(alias, localeID, length);
269 localeID[length]=0;
270
271 checkIDSuffix(itemName, localeID, -1, (useResSuffix ? ".res" : ""), check, context, pErrorCode);
272 }
273
274 /*
275 * Enumerate one resource item and its children and extract dependencies from
276 * aliases.
277 */
278 static void
279 ures_enumDependencies(const char *itemName,
280 const ResourceData *pResData,
281 Resource res, const char *inKey, const char *parentKey, int32_t depth,
282 CheckDependency check, void *context,
283 Package *pkg,
284 UErrorCode *pErrorCode) {
285 switch(res_getPublicType(res)) {
286 case URES_STRING:
287 {
288 UBool useResSuffix = TRUE;
289 // Check for %%ALIAS
290 if(depth==1 && inKey!=NULL) {
291 if(0!=strcmp(inKey, "%%ALIAS")) {
292 break;
293 }
294 }
295 // Check for %%DEPENDENCY
296 else if(depth==2 && parentKey!=NULL) {
297 if(0!=strcmp(parentKey, "%%DEPENDENCY")) {
298 break;
299 }
300 useResSuffix = FALSE;
301 } else {
302 // we ignore all other strings
303 break;
304 }
305 int32_t length;
306 const UChar *alias=res_getString(pResData, res, &length);
307 checkAlias(itemName, res, alias, length, useResSuffix, check, context, pErrorCode);
308 }
309 break;
310 case URES_ALIAS:
311 {
312 int32_t length;
313 const UChar *alias=res_getAlias(pResData, res, &length);
314 checkAlias(itemName, res, alias, length, TRUE, check, context, pErrorCode);
315 }
316 break;
317 case URES_TABLE:
318 {
319 /* recurse */
320 int32_t count=res_countArrayItems(pResData, res);
321 for(int32_t i=0; i<count; ++i) {
322 const char *itemKey;
323 Resource item=res_getTableItemByIndex(pResData, res, i, &itemKey);
324 ures_enumDependencies(
325 itemName, pResData,
326 item, itemKey,
327 inKey, depth+1,
328 check, context,
329 pkg,
330 pErrorCode);
331 if(U_FAILURE(*pErrorCode)) {
332 fprintf(stderr, "icupkg/ures_enumDependencies(%s table res=%08x)[%d].recurse(%s: %08x) failed\n",
333 itemName, res, i, itemKey, item);
334 break;
335 }
336 }
337 }
338 break;
339 case URES_ARRAY:
340 {
341 /* recurse */
342 int32_t count=res_countArrayItems(pResData, res);
343 for(int32_t i=0; i<count; ++i) {
344 Resource item=res_getArrayItem(pResData, res, i);
345 ures_enumDependencies(
346 itemName, pResData,
347 item, NULL,
348 inKey, depth+1,
349 check, context,
350 pkg,
351 pErrorCode);
352 if(U_FAILURE(*pErrorCode)) {
353 fprintf(stderr, "icupkg/ures_enumDependencies(%s array res=%08x)[%d].recurse(%08x) failed\n",
354 itemName, res, i, item);
355 break;
356 }
357 }
358 }
359 break;
360 default:
361 break;
362 }
363 }
364
365 static void
366 ures_enumDependencies(const char *itemName, const UDataInfo *pInfo,
367 const uint8_t *inBytes, int32_t length,
368 CheckDependency check, void *context,
369 Package *pkg,
370 UErrorCode *pErrorCode) {
371 ResourceData resData;
372
373 res_read(&resData, pInfo, inBytes, length, pErrorCode);
374 if(U_FAILURE(*pErrorCode)) {
375 fprintf(stderr, "icupkg: .res format version %02x.%02x not supported, or bundle malformed\n",
376 pInfo->formatVersion[0], pInfo->formatVersion[1]);
377 exit(U_UNSUPPORTED_ERROR);
378 }
379
380 /*
381 * if the bundle attributes are present and the nofallback flag is not set,
382 * then add the parent bundle as a dependency
383 */
384 if(pInfo->formatVersion[0]>1 || (pInfo->formatVersion[0]==1 && pInfo->formatVersion[1]>=1)) {
385 if(!resData.noFallback) {
386 /* this bundle participates in locale fallback */
387 checkParent(itemName, check, context, pErrorCode);
388 }
389 }
390
391 icu::NativeItem nativePool;
392
393 if(resData.usesPoolBundle) {
394 char poolName[200];
395 makeTargetName(itemName, "pool", 4, ".res", poolName, (int32_t)sizeof(poolName), pErrorCode);
396 if(U_FAILURE(*pErrorCode)) {
397 return;
398 }
399 check(context, itemName, poolName);
400 int32_t index=pkg->findItem(poolName);
401 if(index<0) {
402 // We cannot work with a bundle if its pool resource is missing.
403 // check() already printed a complaint.
404 return;
405 }
406 // TODO: Cache the native version in the Item itself.
407 nativePool.setItem(pkg->getItem(index), ures_swap);
408 const UDataInfo *poolInfo=nativePool.getDataInfo();
409 if(poolInfo->formatVersion[0]<=1) {
410 fprintf(stderr, "icupkg: %s is not a pool bundle\n", poolName);
411 return;
412 }
413 const int32_t *poolIndexes=(const int32_t *)nativePool.getBytes()+1;
414 int32_t poolIndexLength=poolIndexes[URES_INDEX_LENGTH]&0xff;
415 if(!(poolIndexLength>URES_INDEX_POOL_CHECKSUM &&
416 (poolIndexes[URES_INDEX_ATTRIBUTES]&URES_ATT_IS_POOL_BUNDLE))
417 ) {
418 fprintf(stderr, "icupkg: %s is not a pool bundle\n", poolName);
419 return;
420 }
421 if(resData.pRoot[1+URES_INDEX_POOL_CHECKSUM]==poolIndexes[URES_INDEX_POOL_CHECKSUM]) {
422 resData.poolBundleKeys=(const char *)(poolIndexes+poolIndexLength);
423 } else {
424 fprintf(stderr, "icupkg: %s has mismatched checksum for %s\n", poolName, itemName);
425 return;
426 }
427 }
428
429 ures_enumDependencies(
430 itemName, &resData,
431 resData.rootRes, NULL, NULL, 0,
432 check, context,
433 pkg,
434 pErrorCode);
435 }
436
437 // get dependencies from conversion tables --------------------------------- ***
438
439 /* code adapted from ucnv_swap() */
440 static void
441 ucnv_enumDependencies(const UDataSwapper *ds,
442 const char *itemName, const UDataInfo *pInfo,
443 const uint8_t *inBytes, int32_t length,
444 CheckDependency check, void *context,
445 UErrorCode *pErrorCode) {
446 uint32_t staticDataSize;
447
448 const UConverterStaticData *inStaticData;
449
450 const _MBCSHeader *inMBCSHeader;
451 uint8_t outputType;
452
453 /* check format version */
454 if(!(
455 pInfo->formatVersion[0]==6 &&
456 pInfo->formatVersion[1]>=2
457 )) {
458 fprintf(stderr, "icupkg/ucnv_enumDependencies(): .cnv format version %02x.%02x not supported\n",
459 pInfo->formatVersion[0], pInfo->formatVersion[1]);
460 exit(U_UNSUPPORTED_ERROR);
461 }
462
463 /* read the initial UConverterStaticData structure after the UDataInfo header */
464 inStaticData=(const UConverterStaticData *)inBytes;
465
466 if( length<(int32_t)sizeof(UConverterStaticData) ||
467 (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize))
468 ) {
469 udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after header) for an ICU .cnv conversion table\n",
470 length);
471 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
472 return;
473 }
474
475 inBytes+=staticDataSize;
476 length-=(int32_t)staticDataSize;
477
478 /* check for supported conversionType values */
479 if(inStaticData->conversionType==UCNV_MBCS) {
480 /* MBCS data */
481 uint32_t mbcsHeaderLength, mbcsHeaderFlags, mbcsHeaderOptions;
482 int32_t extOffset;
483
484 inMBCSHeader=(const _MBCSHeader *)inBytes;
485
486 if(length<(int32_t)sizeof(_MBCSHeader)) {
487 udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n",
488 length);
489 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
490 return;
491 }
492 if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) {
493 mbcsHeaderLength=MBCS_HEADER_V4_LENGTH;
494 } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 &&
495 ((mbcsHeaderOptions=ds->readUInt32(inMBCSHeader->options))&
496 MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0
497 ) {
498 mbcsHeaderLength=mbcsHeaderOptions&MBCS_OPT_LENGTH_MASK;
499 } else {
500 udata_printError(ds, "icupkg/ucnv_enumDependencies(): unsupported _MBCSHeader.version %d.%d\n",
501 inMBCSHeader->version[0], inMBCSHeader->version[1]);
502 *pErrorCode=U_UNSUPPORTED_ERROR;
503 return;
504 }
505
506 mbcsHeaderFlags=ds->readUInt32(inMBCSHeader->flags);
507 extOffset=(int32_t)(mbcsHeaderFlags>>8);
508 outputType=(uint8_t)mbcsHeaderFlags;
509
510 if(outputType==MBCS_OUTPUT_EXT_ONLY) {
511 /*
512 * extension-only file,
513 * contains a base name instead of normal base table data
514 */
515 char baseName[32];
516 int32_t baseNameLength;
517
518 /* there is extension data after the base data, see ucnv_ext.h */
519 if(length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) {
520 udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n",
521 length);
522 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
523 return;
524 }
525
526 /* swap the base name, between the header and the extension data */
527 const char *inBaseName=(const char *)inBytes+mbcsHeaderLength*4;
528 baseNameLength=(int32_t)strlen(inBaseName);
529 if(baseNameLength>=(int32_t)sizeof(baseName)) {
530 udata_printError(ds, "icupkg/ucnv_enumDependencies(%s): base name length %ld too long\n",
531 itemName, baseNameLength);
532 *pErrorCode=U_UNSUPPORTED_ERROR;
533 return;
534 }
535 ds->swapInvChars(ds, inBaseName, baseNameLength+1, baseName, pErrorCode);
536
537 checkIDSuffix(itemName, baseName, -1, ".cnv", check, context, pErrorCode);
538 }
539 }
540 }
541
542 // ICU data formats -------------------------------------------------------- ***
543
544 static const struct {
545 uint8_t dataFormat[4];
546 } dataFormats[]={
547 { { 0x52, 0x65, 0x73, 0x42 } }, /* dataFormat="ResB" */
548 { { 0x63, 0x6e, 0x76, 0x74 } }, /* dataFormat="cnvt" */
549 { { 0x43, 0x76, 0x41, 0x6c } } /* dataFormat="CvAl" */
550 };
551
552 enum {
553 FMT_RES,
554 FMT_CNV,
555 FMT_ALIAS,
556 FMT_COUNT
557 };
558
559 static int32_t
560 getDataFormat(const uint8_t dataFormat[4]) {
561 int32_t i;
562
563 for(i=0; i<FMT_COUNT; ++i) {
564 if(0==memcmp(dataFormats[i].dataFormat, dataFormat, 4)) {
565 return i;
566 }
567 }
568 return -1;
569 }
570
571 // enumerate dependencies of a package item -------------------------------- ***
572
573 void
574 Package::enumDependencies(Item *pItem, void *context, CheckDependency check) {
575 int32_t infoLength, itemHeaderLength;
576 UErrorCode errorCode=U_ZERO_ERROR;
577 const UDataInfo *pInfo=getDataInfo(pItem->data, pItem->length, infoLength, itemHeaderLength, &errorCode);
578 if(U_FAILURE(errorCode)) {
579 return; // should not occur because readFile() checks headers
580 }
581
582 // find the data format and call the corresponding function, if any
583 int32_t format=getDataFormat(pInfo->dataFormat);
584 if(format>=0) {
585 switch(format) {
586 case FMT_RES:
587 {
588 /*
589 * Swap the resource bundle (if necessary) so that we can use
590 * the normal runtime uresdata.c code to read it.
591 * We do not want to duplicate that code, especially not together with on-the-fly swapping.
592 */
593 NativeItem nrb(pItem, ures_swap);
594 ures_enumDependencies(pItem->name, nrb.getDataInfo(), nrb.getBytes(), nrb.getLength(), check, context, this, &errorCode);
595 break;
596 }
597 case FMT_CNV:
598 {
599 // TODO: share/cache swappers
600 UDataSwapper *ds=udata_openSwapper(
601 (UBool)pInfo->isBigEndian, pInfo->charsetFamily,
602 U_IS_BIG_ENDIAN, U_CHARSET_FAMILY,
603 &errorCode);
604 if(U_FAILURE(errorCode)) {
605 fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
606 pItem->name, u_errorName(errorCode));
607 exit(errorCode);
608 }
609
610 ds->printError=printError;
611 ds->printErrorContext=stderr;
612
613 const uint8_t *inBytes=pItem->data+itemHeaderLength;
614 int32_t length=pItem->length-itemHeaderLength;
615
616 ucnv_enumDependencies(ds, pItem->name, pInfo, inBytes, length, check, context, &errorCode);
617 udata_closeSwapper(ds);
618 break;
619 }
620 default:
621 break;
622 }
623
624 if(U_FAILURE(errorCode)) {
625 exit(errorCode);
626 }
627 }
628 }
629
630 U_NAMESPACE_END