]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/toolutil/pkgitems.cpp
7b86c55fa423bf656b6363bb3416f57b4944caaf
[apple/icu.git] / icuSources / tools / toolutil / pkgitems.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2003-2015, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: pkgitems.cpp
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2005sep18
16 * created by: Markus W. Scherer
17 *
18 * Companion file to package.cpp. Deals with details of ICU data item formats.
19 * Used for item dependencies.
20 * Contains adapted code from ucnv_bld.c (swapper code from 2003).
21 */
22
23 #include "unicode/utypes.h"
24 #include "unicode/ures.h"
25 #include "unicode/putil.h"
26 #include "unicode/udata.h"
27 #include "cstring.h"
28 #include "uinvchar.h"
29 #include "ucmndata.h"
30 #include "udataswp.h"
31 #include "swapimpl.h"
32 #include "toolutil.h"
33 #include "package.h"
34 #include "pkg_imp.h"
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39
40 /* item formats in common */
41
42 #include "uresdata.h"
43 #include "ucnv_bld.h"
44 #include "ucnv_io.h"
45
46 // general definitions ----------------------------------------------------- ***
47
48 U_CDECL_BEGIN
49
50 static void U_CALLCONV
51 printError(void *context, const char *fmt, va_list args) {
52 vfprintf((FILE *)context, fmt, args);
53 }
54
55 U_CDECL_END
56
57 // a data item in native-platform form ------------------------------------- ***
58
59 U_NAMESPACE_BEGIN
60
61 class NativeItem {
62 public:
63 NativeItem() : pItem(NULL), pInfo(NULL), bytes(NULL), swapped(NULL), length(0) {}
64 NativeItem(const Item *item, UDataSwapFn *swap) : swapped(NULL) {
65 setItem(item, swap);
66 }
67 ~NativeItem() {
68 delete [] swapped;
69 }
70 const UDataInfo *getDataInfo() const {
71 return pInfo;
72 }
73 const uint8_t *getBytes() const {
74 return bytes;
75 }
76 int32_t getLength() const {
77 return length;
78 }
79
80 void setItem(const Item *item, UDataSwapFn *swap) {
81 pItem=item;
82 int32_t infoLength, itemHeaderLength;
83 UErrorCode errorCode=U_ZERO_ERROR;
84 pInfo=::getDataInfo(pItem->data, pItem->length, infoLength, itemHeaderLength, &errorCode);
85 if(U_FAILURE(errorCode)) {
86 exit(errorCode); // should succeed because readFile() checks headers
87 }
88 length=pItem->length-itemHeaderLength;
89
90 if(pInfo->isBigEndian==U_IS_BIG_ENDIAN && pInfo->charsetFamily==U_CHARSET_FAMILY) {
91 bytes=pItem->data+itemHeaderLength;
92 } else {
93 UDataSwapper *ds=udata_openSwapper((UBool)pInfo->isBigEndian, pInfo->charsetFamily, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
94 if(U_FAILURE(errorCode)) {
95 fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
96 pItem->name, u_errorName(errorCode));
97 exit(errorCode);
98 }
99
100 ds->printError=printError;
101 ds->printErrorContext=stderr;
102
103 swapped=new uint8_t[pItem->length];
104 if(swapped==NULL) {
105 fprintf(stderr, "icupkg: unable to allocate memory for swapping \"%s\"\n", pItem->name);
106 exit(U_MEMORY_ALLOCATION_ERROR);
107 }
108 swap(ds, pItem->data, pItem->length, swapped, &errorCode);
109 pInfo=::getDataInfo(swapped, pItem->length, infoLength, itemHeaderLength, &errorCode);
110 bytes=swapped+itemHeaderLength;
111 udata_closeSwapper(ds);
112 }
113 }
114
115 private:
116 const Item *pItem;
117 const UDataInfo *pInfo;
118 const uint8_t *bytes;
119 uint8_t *swapped;
120 int32_t length;
121 };
122
123 // check a dependency ------------------------------------------------------ ***
124
125 /*
126 * assemble the target item name from the source item name, an ID
127 * and a suffix
128 */
129 static void
130 makeTargetName(const char *itemName, const char *id, int32_t idLength, const char *suffix,
131 char *target, int32_t capacity,
132 UErrorCode *pErrorCode) {
133 const char *itemID;
134 int32_t treeLength, suffixLength, targetLength;
135
136 // get the item basename
137 itemID=strrchr(itemName, '/');
138 if(itemID!=NULL) {
139 ++itemID;
140 } else {
141 itemID=itemName;
142 }
143
144 // build the target string
145 treeLength=(int32_t)(itemID-itemName);
146 if(idLength<0) {
147 idLength=(int32_t)strlen(id);
148 }
149 suffixLength=(int32_t)strlen(suffix);
150 targetLength=treeLength+idLength+suffixLength;
151 if(targetLength>=capacity) {
152 fprintf(stderr, "icupkg/makeTargetName(%s) target item name length %ld too long\n",
153 itemName, (long)targetLength);
154 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
155 return;
156 }
157
158 memcpy(target, itemName, treeLength);
159 memcpy(target+treeLength, id, idLength);
160 memcpy(target+treeLength+idLength, suffix, suffixLength+1); // +1 includes the terminating NUL
161 }
162
163 static void
164 checkIDSuffix(const char *itemName, const char *id, int32_t idLength, const char *suffix,
165 CheckDependency check, void *context,
166 UErrorCode *pErrorCode) {
167 char target[200];
168 makeTargetName(itemName, id, idLength, suffix, target, (int32_t)sizeof(target), pErrorCode);
169 if(U_SUCCESS(*pErrorCode)) {
170 check(context, itemName, target);
171 }
172 }
173
174 /* assemble the target item name from the item's parent item name */
175 static void
176 checkParent(const char *itemName, CheckDependency check, void *context,
177 UErrorCode *pErrorCode) {
178 const char *itemID, *parent, *parentLimit, *suffix;
179 int32_t parentLength;
180
181 // get the item basename
182 itemID=strrchr(itemName, '/');
183 if(itemID!=NULL) {
184 ++itemID;
185 } else {
186 itemID=itemName;
187 }
188
189 // get the item suffix
190 suffix=strrchr(itemID, '.');
191 if(suffix==NULL) {
192 // empty suffix, point to the end of the string
193 suffix=strrchr(itemID, 0);
194 }
195
196 // get the position of the last '_'
197 for(parentLimit=suffix; parentLimit>itemID && *--parentLimit!='_';) {}
198
199 if(parentLimit!=itemID) {
200 // get the parent item name by truncating the last part of this item's name */
201 parent=itemID;
202 parentLength=(int32_t)(parentLimit-itemID);
203 } else {
204 // no '_' in the item name: the parent is the root bundle
205 parent="root";
206 parentLength=4;
207 if((suffix-itemID)==parentLength && 0==memcmp(itemID, parent, parentLength)) {
208 // the item itself is "root", which does not depend on a parent
209 return;
210 }
211 }
212 checkIDSuffix(itemName, parent, parentLength, suffix, check, context, pErrorCode);
213 }
214
215 // get dependencies from resource bundles ---------------------------------- ***
216
217 static const UChar SLASH=0x2f;
218
219 /*
220 * Check for the alias from the string or alias resource res.
221 */
222 static void
223 checkAlias(const char *itemName,
224 Resource res, const UChar *alias, int32_t length, UBool useResSuffix,
225 CheckDependency check, void *context, UErrorCode *pErrorCode) {
226 int32_t i;
227
228 if(!uprv_isInvariantUString(alias, length)) {
229 fprintf(stderr, "icupkg/ures_enumDependencies(%s res=%08x) alias string contains non-invariant characters\n",
230 itemName, res);
231 *pErrorCode=U_INVALID_CHAR_FOUND;
232 return;
233 }
234
235 // extract the locale ID from alias strings like
236 // locale_ID/key1/key2/key3
237 // locale_ID
238
239 // search for the first slash
240 for(i=0; i<length && alias[i]!=SLASH; ++i) {}
241
242 if(res_getPublicType(res)==URES_ALIAS) {
243 // ignore aliases with an initial slash:
244 // /ICUDATA/... and /pkgname/... go to a different package
245 // /LOCALE/... are for dynamic sideways fallbacks and don't go to a fixed bundle
246 if(i==0) {
247 return; // initial slash ('/')
248 }
249
250 // ignore the intra-bundle path starting from the first slash ('/')
251 length=i;
252 } else /* URES_STRING */ {
253 // the whole string should only consist of a locale ID
254 if(i!=length) {
255 fprintf(stderr, "icupkg/ures_enumDependencies(%s res=%08x) %%ALIAS contains a '/'\n",
256 itemName, res);
257 *pErrorCode=U_UNSUPPORTED_ERROR;
258 return;
259 }
260 }
261
262 // convert the Unicode string to char *
263 char localeID[32];
264 if(length>=(int32_t)sizeof(localeID)) {
265 fprintf(stderr, "icupkg/ures_enumDependencies(%s res=%08x) alias locale ID length %ld too long\n",
266 itemName, res, (long)length);
267 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
268 return;
269 }
270 u_UCharsToChars(alias, localeID, length);
271 localeID[length]=0;
272
273 checkIDSuffix(itemName, localeID, -1, (useResSuffix ? ".res" : ""), check, context, pErrorCode);
274 }
275
276 /*
277 * Enumerate one resource item and its children and extract dependencies from
278 * aliases.
279 */
280 static void
281 ures_enumDependencies(const char *itemName,
282 const ResourceData *pResData,
283 Resource res, const char *inKey, const char *parentKey, int32_t depth,
284 CheckDependency check, void *context,
285 Package *pkg,
286 UErrorCode *pErrorCode) {
287 switch(res_getPublicType(res)) {
288 case URES_STRING:
289 {
290 UBool useResSuffix = TRUE;
291 // Check for %%ALIAS
292 if(depth==1 && inKey!=NULL) {
293 if(0!=strcmp(inKey, "%%ALIAS")) {
294 break;
295 }
296 }
297 // Check for %%DEPENDENCY
298 else if(depth==2 && parentKey!=NULL) {
299 if(0!=strcmp(parentKey, "%%DEPENDENCY")) {
300 break;
301 }
302 useResSuffix = FALSE;
303 } else {
304 // we ignore all other strings
305 break;
306 }
307 int32_t length;
308 // No tracing: build tool
309 const UChar *alias=res_getStringNoTrace(pResData, res, &length);
310 checkAlias(itemName, res, alias, length, useResSuffix, check, context, pErrorCode);
311 }
312 break;
313 case URES_ALIAS:
314 {
315 int32_t length;
316 const UChar *alias=res_getAlias(pResData, res, &length);
317 checkAlias(itemName, res, alias, length, TRUE, check, context, pErrorCode);
318 }
319 break;
320 case URES_TABLE:
321 {
322 /* recurse */
323 int32_t count=res_countArrayItems(pResData, res);
324 for(int32_t i=0; i<count; ++i) {
325 const char *itemKey;
326 Resource item=res_getTableItemByIndex(pResData, res, i, &itemKey);
327 ures_enumDependencies(
328 itemName, pResData,
329 item, itemKey,
330 inKey, depth+1,
331 check, context,
332 pkg,
333 pErrorCode);
334 if(U_FAILURE(*pErrorCode)) {
335 fprintf(stderr, "icupkg/ures_enumDependencies(%s table res=%08x)[%d].recurse(%s: %08x) failed\n",
336 itemName, res, i, itemKey, item);
337 break;
338 }
339 }
340 }
341 break;
342 case URES_ARRAY:
343 {
344 /* recurse */
345 int32_t count=res_countArrayItems(pResData, res);
346 for(int32_t i=0; i<count; ++i) {
347 Resource item=res_getArrayItem(pResData, res, i);
348 ures_enumDependencies(
349 itemName, pResData,
350 item, NULL,
351 inKey, depth+1,
352 check, context,
353 pkg,
354 pErrorCode);
355 if(U_FAILURE(*pErrorCode)) {
356 fprintf(stderr, "icupkg/ures_enumDependencies(%s array res=%08x)[%d].recurse(%08x) failed\n",
357 itemName, res, i, item);
358 break;
359 }
360 }
361 }
362 break;
363 default:
364 break;
365 }
366 }
367
368 static void
369 ures_enumDependencies(const char *itemName, const UDataInfo *pInfo,
370 const uint8_t *inBytes, int32_t length,
371 CheckDependency check, void *context,
372 Package *pkg,
373 UErrorCode *pErrorCode) {
374 ResourceData resData;
375
376 res_read(&resData, pInfo, inBytes, length, pErrorCode);
377 if(U_FAILURE(*pErrorCode)) {
378 fprintf(stderr, "icupkg: .res format version %02x.%02x not supported, or bundle malformed\n",
379 pInfo->formatVersion[0], pInfo->formatVersion[1]);
380 exit(U_UNSUPPORTED_ERROR);
381 }
382
383 /*
384 * if the bundle attributes are present and the nofallback flag is not set,
385 * then add the parent bundle as a dependency
386 */
387 if(pInfo->formatVersion[0]>1 || (pInfo->formatVersion[0]==1 && pInfo->formatVersion[1]>=1)) {
388 if(!resData.noFallback) {
389 /* this bundle participates in locale fallback */
390 checkParent(itemName, check, context, pErrorCode);
391 }
392 }
393
394 icu::NativeItem nativePool;
395
396 if(resData.usesPoolBundle) {
397 char poolName[200];
398 makeTargetName(itemName, "pool", 4, ".res", poolName, (int32_t)sizeof(poolName), pErrorCode);
399 if(U_FAILURE(*pErrorCode)) {
400 return;
401 }
402 check(context, itemName, poolName);
403 int32_t index=pkg->findItem(poolName);
404 if(index<0) {
405 // We cannot work with a bundle if its pool resource is missing.
406 // check() already printed a complaint.
407 return;
408 }
409 // TODO: Cache the native version in the Item itself.
410 nativePool.setItem(pkg->getItem(index), ures_swap);
411 const UDataInfo *poolInfo=nativePool.getDataInfo();
412 if(poolInfo->formatVersion[0]<=1) {
413 fprintf(stderr, "icupkg: %s is not a pool bundle\n", poolName);
414 return;
415 }
416 const int32_t *poolRoot=(const int32_t *)nativePool.getBytes();
417 const int32_t *poolIndexes=poolRoot+1;
418 int32_t poolIndexLength=poolIndexes[URES_INDEX_LENGTH]&0xff;
419 if(!(poolIndexLength>URES_INDEX_POOL_CHECKSUM &&
420 (poolIndexes[URES_INDEX_ATTRIBUTES]&URES_ATT_IS_POOL_BUNDLE))
421 ) {
422 fprintf(stderr, "icupkg: %s is not a pool bundle\n", poolName);
423 return;
424 }
425 if(resData.pRoot[1+URES_INDEX_POOL_CHECKSUM]==poolIndexes[URES_INDEX_POOL_CHECKSUM]) {
426 resData.poolBundleKeys=(const char *)(poolIndexes+poolIndexLength);
427 resData.poolBundleStrings=(const uint16_t *)(poolRoot+poolIndexes[URES_INDEX_KEYS_TOP]);
428 } else {
429 fprintf(stderr, "icupkg: %s has mismatched checksum for %s\n", poolName, itemName);
430 return;
431 }
432 }
433
434 ures_enumDependencies(
435 itemName, &resData,
436 resData.rootRes, NULL, NULL, 0,
437 check, context,
438 pkg,
439 pErrorCode);
440 }
441
442 // get dependencies from conversion tables --------------------------------- ***
443
444 /* code adapted from ucnv_swap() */
445 static void
446 ucnv_enumDependencies(const UDataSwapper *ds,
447 const char *itemName, const UDataInfo *pInfo,
448 const uint8_t *inBytes, int32_t length,
449 CheckDependency check, void *context,
450 UErrorCode *pErrorCode) {
451 uint32_t staticDataSize;
452
453 const UConverterStaticData *inStaticData;
454
455 const _MBCSHeader *inMBCSHeader;
456 uint8_t outputType;
457
458 /* check format version */
459 if(!(
460 pInfo->formatVersion[0]==6 &&
461 pInfo->formatVersion[1]>=2
462 )) {
463 fprintf(stderr, "icupkg/ucnv_enumDependencies(): .cnv format version %02x.%02x not supported\n",
464 pInfo->formatVersion[0], pInfo->formatVersion[1]);
465 exit(U_UNSUPPORTED_ERROR);
466 }
467
468 /* read the initial UConverterStaticData structure after the UDataInfo header */
469 inStaticData=(const UConverterStaticData *)inBytes;
470
471 if( length<(int32_t)sizeof(UConverterStaticData) ||
472 (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize))
473 ) {
474 udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after header) for an ICU .cnv conversion table\n",
475 length);
476 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
477 return;
478 }
479
480 inBytes+=staticDataSize;
481 length-=(int32_t)staticDataSize;
482
483 /* check for supported conversionType values */
484 if(inStaticData->conversionType==UCNV_MBCS) {
485 /* MBCS data */
486 uint32_t mbcsHeaderLength, mbcsHeaderFlags, mbcsHeaderOptions;
487 int32_t extOffset;
488
489 inMBCSHeader=(const _MBCSHeader *)inBytes;
490
491 if(length<(int32_t)sizeof(_MBCSHeader)) {
492 udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n",
493 length);
494 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
495 return;
496 }
497 if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) {
498 mbcsHeaderLength=MBCS_HEADER_V4_LENGTH;
499 } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 &&
500 ((mbcsHeaderOptions=ds->readUInt32(inMBCSHeader->options))&
501 MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0
502 ) {
503 mbcsHeaderLength=mbcsHeaderOptions&MBCS_OPT_LENGTH_MASK;
504 } else {
505 udata_printError(ds, "icupkg/ucnv_enumDependencies(): unsupported _MBCSHeader.version %d.%d\n",
506 inMBCSHeader->version[0], inMBCSHeader->version[1]);
507 *pErrorCode=U_UNSUPPORTED_ERROR;
508 return;
509 }
510
511 mbcsHeaderFlags=ds->readUInt32(inMBCSHeader->flags);
512 extOffset=(int32_t)(mbcsHeaderFlags>>8);
513 outputType=(uint8_t)mbcsHeaderFlags;
514
515 if(outputType==MBCS_OUTPUT_EXT_ONLY) {
516 /*
517 * extension-only file,
518 * contains a base name instead of normal base table data
519 */
520 char baseName[32];
521 int32_t baseNameLength;
522
523 /* there is extension data after the base data, see ucnv_ext.h */
524 if(length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) {
525 udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n",
526 length);
527 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
528 return;
529 }
530
531 /* swap the base name, between the header and the extension data */
532 const char *inBaseName=(const char *)inBytes+mbcsHeaderLength*4;
533 baseNameLength=(int32_t)strlen(inBaseName);
534 if(baseNameLength>=(int32_t)sizeof(baseName)) {
535 udata_printError(ds, "icupkg/ucnv_enumDependencies(%s): base name length %ld too long\n",
536 itemName, baseNameLength);
537 *pErrorCode=U_UNSUPPORTED_ERROR;
538 return;
539 }
540 ds->swapInvChars(ds, inBaseName, baseNameLength+1, baseName, pErrorCode);
541
542 checkIDSuffix(itemName, baseName, -1, ".cnv", check, context, pErrorCode);
543 }
544 }
545 }
546
547 // ICU data formats -------------------------------------------------------- ***
548
549 static const struct {
550 uint8_t dataFormat[4];
551 } dataFormats[]={
552 { { 0x52, 0x65, 0x73, 0x42 } }, /* dataFormat="ResB" */
553 { { 0x63, 0x6e, 0x76, 0x74 } }, /* dataFormat="cnvt" */
554 { { 0x43, 0x76, 0x41, 0x6c } } /* dataFormat="CvAl" */
555 };
556
557 enum {
558 FMT_RES,
559 FMT_CNV,
560 FMT_ALIAS,
561 FMT_COUNT
562 };
563
564 static int32_t
565 getDataFormat(const uint8_t dataFormat[4]) {
566 int32_t i;
567
568 for(i=0; i<FMT_COUNT; ++i) {
569 if(0==memcmp(dataFormats[i].dataFormat, dataFormat, 4)) {
570 return i;
571 }
572 }
573 return -1;
574 }
575
576 // enumerate dependencies of a package item -------------------------------- ***
577
578 void
579 Package::enumDependencies(Item *pItem, void *context, CheckDependency check) {
580 int32_t infoLength, itemHeaderLength;
581 UErrorCode errorCode=U_ZERO_ERROR;
582 const UDataInfo *pInfo=getDataInfo(pItem->data, pItem->length, infoLength, itemHeaderLength, &errorCode);
583 if(U_FAILURE(errorCode)) {
584 return; // should not occur because readFile() checks headers
585 }
586
587 // find the data format and call the corresponding function, if any
588 int32_t format=getDataFormat(pInfo->dataFormat);
589 if(format>=0) {
590 switch(format) {
591 case FMT_RES:
592 {
593 /*
594 * Swap the resource bundle (if necessary) so that we can use
595 * the normal runtime uresdata.c code to read it.
596 * We do not want to duplicate that code, especially not together with on-the-fly swapping.
597 */
598 NativeItem nrb(pItem, ures_swap);
599 ures_enumDependencies(pItem->name, nrb.getDataInfo(), nrb.getBytes(), nrb.getLength(), check, context, this, &errorCode);
600 break;
601 }
602 case FMT_CNV:
603 {
604 // TODO: share/cache swappers
605 UDataSwapper *ds=udata_openSwapper(
606 (UBool)pInfo->isBigEndian, pInfo->charsetFamily,
607 U_IS_BIG_ENDIAN, U_CHARSET_FAMILY,
608 &errorCode);
609 if(U_FAILURE(errorCode)) {
610 fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
611 pItem->name, u_errorName(errorCode));
612 exit(errorCode);
613 }
614
615 ds->printError=printError;
616 ds->printErrorContext=stderr;
617
618 const uint8_t *inBytes=pItem->data+itemHeaderLength;
619 int32_t length=pItem->length-itemHeaderLength;
620
621 ucnv_enumDependencies(ds, pItem->name, pInfo, inBytes, length, check, context, &errorCode);
622 udata_closeSwapper(ds);
623 break;
624 }
625 default:
626 break;
627 }
628
629 if(U_FAILURE(errorCode)) {
630 exit(errorCode);
631 }
632 }
633 }
634
635 U_NAMESPACE_END