]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/toolutil/pkgitems.cpp
ICU-8.11.1.tar.gz
[apple/icu.git] / icuSources / tools / toolutil / pkgitems.cpp
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2003-2006, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: pkgitems.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2005sep18
14 * created by: Markus W. Scherer
15 *
16 * Companion file to package.cpp. Deals with details of ICU data item formats.
17 * Used for item dependencies.
18 * Contains adapted code from uresdata.c and ucnv_bld.c (swapper code from 2003).
19 */
20
21 #include "unicode/utypes.h"
22 #include "unicode/ures.h"
23 #include "unicode/putil.h"
24 #include "unicode/udata.h"
25 #include "cstring.h"
26 #include "ucmndata.h"
27 #include "udataswp.h"
28 #include "swapimpl.h"
29 #include "toolutil.h"
30 #include "package.h"
31 #include "pkg_imp.h"
32
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36
37 /* item formats in common */
38
39 #include "uresdata.h"
40 #include "ucnv_bld.h"
41 #include "ucnv_io.h"
42
43 // general definitions ----------------------------------------------------- ***
44
45 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
46
47 U_CDECL_BEGIN
48
49 static void U_CALLCONV
50 printError(void *context, const char *fmt, va_list args) {
51 vfprintf((FILE *)context, fmt, args);
52 }
53
54 U_CDECL_END
55
56 typedef void CheckDependency(void *context, const char *itemName, const char *targetName);
57
58 // check a dependency ------------------------------------------------------ ***
59
60 /*
61 * assemble the target item name from the source item name, an ID
62 * and a suffix
63 */
64 static void
65 checkIDSuffix(const char *itemName, const char *id, int32_t idLength, const char *suffix,
66 CheckDependency check, void *context,
67 UErrorCode *pErrorCode) {
68 char target[200];
69 const char *itemID;
70 int32_t treeLength, suffixLength, targetLength;
71
72 // get the item basename
73 itemID=strrchr(itemName, '/');
74 if(itemID!=NULL) {
75 ++itemID;
76 } else {
77 itemID=itemName;
78 }
79
80 // build the target string
81 treeLength=(int32_t)(itemID-itemName);
82 if(idLength<0) {
83 idLength=(int32_t)strlen(id);
84 }
85 suffixLength=(int32_t)strlen(suffix);
86 targetLength=treeLength+idLength+suffixLength;
87 if(targetLength>=(int32_t)sizeof(target)) {
88 fprintf(stderr, "icupkg/checkIDSuffix(%s) alias target item name length %ld too long\n",
89 itemName, (long)targetLength);
90 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
91 return;
92 }
93
94 memcpy(target, itemName, treeLength);
95 memcpy(target+treeLength, id, idLength);
96 memcpy(target+treeLength+idLength, suffix, suffixLength+1); // +1 includes the terminating NUL
97
98 check(context, itemName, target);
99 }
100
101 /* assemble the target item name from the item's parent item name */
102 static void
103 checkParent(const char *itemName, CheckDependency check, void *context,
104 UErrorCode *pErrorCode) {
105 const char *itemID, *parent, *parentLimit, *suffix;
106 int32_t parentLength;
107
108 // get the item basename
109 itemID=strrchr(itemName, '/');
110 if(itemID!=NULL) {
111 ++itemID;
112 } else {
113 itemID=itemName;
114 }
115
116 // get the item suffix
117 suffix=strrchr(itemID, '.');
118 if(suffix==NULL) {
119 // empty suffix, point to the end of the string
120 suffix=strrchr(itemID, 0);
121 }
122
123 // get the position of the last '_'
124 for(parentLimit=suffix; parentLimit>itemID && *--parentLimit!='_';) {}
125
126 if(parentLimit!=itemID) {
127 // get the parent item name by truncating the last part of this item's name */
128 parent=itemID;
129 parentLength=(int32_t)(parentLimit-itemID);
130 } else {
131 // no '_' in the item name: the parent is the root bundle
132 parent="root";
133 parentLength=4;
134 if((suffix-itemID)==parentLength && 0==memcmp(itemID, parent, parentLength)) {
135 // the item itself is "root", which does not depend on a parent
136 return;
137 }
138 }
139 checkIDSuffix(itemName, parent, parentLength, suffix, check, context, pErrorCode);
140 }
141
142 // get dependencies from resource bundles ---------------------------------- ***
143
144 static const char *const gAliasKey="%%ALIAS";
145 enum { gAliasKeyLength=7 };
146
147 /*
148 * Enumerate one resource item and its children and extract dependencies from
149 * aliases.
150 * Code adapted from ures_preflightResource() and ures_swapResource().
151 */
152 static void
153 ures_enumDependencies(const UDataSwapper *ds,
154 const char *itemName,
155 const Resource *inBundle, int32_t length,
156 Resource res, const char *inKey, int32_t depth,
157 CheckDependency check, void *context,
158 UErrorCode *pErrorCode) {
159 const Resource *p;
160 int32_t offset;
161
162 if(res==0 || RES_GET_TYPE(res)==URES_INT) {
163 /* empty string or integer, nothing to do */
164 return;
165 }
166
167 /* all other types use an offset to point to their data */
168 offset=(int32_t)RES_GET_OFFSET(res);
169 if(0<=length && length<=offset) {
170 udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) resource offset exceeds bundle length %d\n",
171 itemName, res, length);
172 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
173 return;
174 }
175 p=inBundle+offset;
176
177 switch(RES_GET_TYPE(res)) {
178 /* strings and aliases have physically the same value layout */
179 case URES_STRING:
180 // we ignore all strings except top-level strings with a %%ALIAS key
181 if(depth!=1) {
182 break;
183 } else {
184 char key[8];
185 int32_t keyLength;
186
187 keyLength=(int32_t)strlen(inKey);
188 if(keyLength!=gAliasKeyLength) {
189 break;
190 }
191 ds->swapInvChars(ds, inKey, gAliasKeyLength+1, key, pErrorCode);
192 if(U_FAILURE(*pErrorCode)) {
193 udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) string key contains variant characters\n",
194 itemName, res);
195 return;
196 }
197 if(0!=strcmp(key, gAliasKey)) {
198 break;
199 }
200 }
201 // for the top-level %%ALIAS string fall through to URES_ALIAS
202 case URES_ALIAS:
203 {
204 char localeID[32];
205 const uint16_t *p16;
206 int32_t i, stringLength;
207 uint16_t u16, ored16;
208
209 stringLength=udata_readInt32(ds, (int32_t)*p);
210
211 /* top=offset+1+(string length +1)/2 rounded up */
212 offset+=1+((stringLength+1)+1)/2;
213 if(offset>length) {
214 break; // the resource does not fit into the bundle, print error below
215 }
216
217 // extract the locale ID from alias strings like
218 // locale_ID/key1/key2/key3
219 // locale_ID
220 if(U_IS_BIG_ENDIAN==ds->inIsBigEndian) {
221 u16=0x2f; // slash in local endianness
222 } else {
223 u16=0x2f00; // slash in opposite endianness
224 }
225 p16=(const uint16_t *)(p+1); // Unicode string contents
226
227 // search for the first slash
228 for(i=0; i<stringLength && p16[i]!=u16; ++i) {}
229
230 if(RES_GET_TYPE(res)==URES_ALIAS) {
231 // ignore aliases with an initial slash:
232 // /ICUDATA/... and /pkgname/... go to a different package
233 // /LOCALE/... are for dynamic sideways fallbacks and don't go to a fixed bundle
234 if(i==0) {
235 break; // initial slash ('/')
236 }
237
238 // ignore the intra-bundle path starting from the first slash ('/')
239 stringLength=i;
240 } else /* URES_STRING */ {
241 // the whole string should only consist of a locale ID
242 if(i!=stringLength) {
243 udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) %%ALIAS contains a '/'\n",
244 itemName, res);
245 *pErrorCode=U_UNSUPPORTED_ERROR;
246 return;
247 }
248 }
249
250 // convert the Unicode string to char * and
251 // check that it has a bundle path but no package
252 if(stringLength>=(int32_t)sizeof(localeID)) {
253 udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) alias locale ID length %ld too long\n",
254 itemName, res, stringLength);
255 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
256 return;
257 }
258
259 // convert the alias Unicode string to US-ASCII
260 ored16=0;
261 if(U_IS_BIG_ENDIAN==ds->inIsBigEndian) {
262 for(i=0; i<stringLength; ++i) {
263 u16=p16[i];
264 ored16|=u16;
265 localeID[i]=(char)u16;
266 }
267 } else {
268 for(i=0; i<stringLength; ++i) {
269 u16=p16[i];
270 ored16|=u16;
271 localeID[i]=(char)(u16>>8);
272 }
273 ored16=(uint16_t)((ored16<<8)|(ored16>>8));
274 }
275 localeID[stringLength]=0;
276 if(ored16>0x7f) {
277 udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) alias string contains non-ASCII characters\n",
278 itemName, res);
279 *pErrorCode=U_INVALID_CHAR_FOUND;
280 return;
281 }
282
283 if(U_CHARSET_FAMILY==U_EBCDIC_FAMILY) {
284 // swap to EBCDIC
285 // our swapper is probably not the right one, but
286 // the function uses it only for printing errors
287 uprv_ebcdicFromAscii(ds, localeID, stringLength, localeID, pErrorCode);
288 if(U_FAILURE(*pErrorCode)) {
289 return;
290 }
291 }
292 #if U_CHARSET_FAMILY!=U_ASCII_FAMILY && U_CHARSET_FAMILY!=U_EBCDIC_FAMILY
293 # error Unknown U_CHARSET_FAMILY value!
294 #endif
295
296 checkIDSuffix(itemName, localeID, -1, ".res", check, context, pErrorCode);
297 }
298 break;
299 case URES_TABLE:
300 case URES_TABLE32:
301 {
302 const uint16_t *pKey16;
303 const int32_t *pKey32;
304
305 Resource item;
306 int32_t i, count;
307
308 if(RES_GET_TYPE(res)==URES_TABLE) {
309 /* get table item count */
310 pKey16=(const uint16_t *)p;
311 count=ds->readUInt16(*pKey16++);
312
313 pKey32=NULL;
314
315 /* top=((1+ table item count)/2 rounded up)+(table item count) */
316 offset+=((1+count)+1)/2;
317 } else {
318 /* get table item count */
319 pKey32=(const int32_t *)p;
320 count=udata_readInt32(ds, *pKey32++);
321
322 pKey16=NULL;
323
324 /* top=(1+ table item count)+(table item count) */
325 offset+=1+count;
326 }
327
328 p=inBundle+offset; /* pointer to table resources */
329 offset+=count;
330
331 if(offset>length) {
332 break; // the resource does not fit into the bundle, print error below
333 }
334
335 /* recurse */
336 for(i=0; i<count; ++i) {
337 item=ds->readUInt32(*p++);
338 ures_enumDependencies(
339 ds, itemName, inBundle, length, item,
340 ((const char *)inBundle)+
341 (pKey16!=NULL ?
342 ds->readUInt16(pKey16[i]) :
343 udata_readInt32(ds, pKey32[i])),
344 depth+1,
345 check, context,
346 pErrorCode);
347 if(U_FAILURE(*pErrorCode)) {
348 udata_printError(ds, "icupkg/ures_enumDependencies(%s table res=%08x)[%d].recurse(%08x) failed\n",
349 itemName, res, i, item);
350 break;
351 }
352 }
353 }
354 break;
355 case URES_ARRAY:
356 {
357 Resource item;
358 int32_t i, count;
359
360 /* top=offset+1+(array length) */
361 count=udata_readInt32(ds, (int32_t)*p++);
362 offset+=1+count;
363
364 if(offset>length) {
365 break; // the resource does not fit into the bundle, print error below
366 }
367
368 /* recurse */
369 for(i=0; i<count; ++i) {
370 item=ds->readUInt32(*p++);
371 ures_enumDependencies(
372 ds, itemName, inBundle, length,
373 item, NULL, depth+1,
374 check, context,
375 pErrorCode);
376 if(U_FAILURE(*pErrorCode)) {
377 udata_printError(ds, "icupkg/ures_enumDependencies(%s array res=%08x)[%d].recurse(%08x) failed\n",
378 itemName, res, i, item);
379 break;
380 }
381 }
382 }
383 break;
384 default:
385 break;
386 }
387
388 if(U_FAILURE(*pErrorCode)) {
389 /* nothing to do */
390 } else if(0<=length && length<offset) {
391 udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) resource limit exceeds bundle length %d\n",
392 itemName, res, length);
393 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
394 }
395 }
396
397 /* code adapted from ures_swap() */
398 static void
399 ures_enumDependencies(const UDataSwapper *ds,
400 const char *itemName, const UDataInfo *pInfo,
401 const uint8_t *inBytes, int32_t length,
402 CheckDependency check, void *context,
403 UErrorCode *pErrorCode) {
404 const Resource *inBundle;
405 Resource rootRes;
406
407 /* the following integers count Resource item offsets (4 bytes each), not bytes */
408 int32_t bundleLength;
409
410 /* check format version */
411 if(pInfo->formatVersion[0]!=1) {
412 fprintf(stderr, "icupkg: .res format version %02x not supported\n",
413 pInfo->formatVersion[0]);
414 exit(U_UNSUPPORTED_ERROR);
415 }
416
417 /* a resource bundle must contain at least one resource item */
418 bundleLength=length/4;
419
420 /* formatVersion 1.1 must have a root item and at least 5 indexes */
421 if( bundleLength<
422 (pInfo->formatVersion[1]==0 ? 1 : 1+5)
423 ) {
424 fprintf(stderr, "icupkg: too few bytes (%d after header) for a resource bundle\n",
425 length);
426 exit(U_INDEX_OUTOFBOUNDS_ERROR);
427 }
428
429 inBundle=(const Resource *)inBytes;
430 rootRes=ds->readUInt32(*inBundle);
431
432 ures_enumDependencies(
433 ds, itemName, inBundle, bundleLength,
434 rootRes, NULL, 0,
435 check, context,
436 pErrorCode);
437
438 /*
439 * if the bundle attributes are present and the nofallback flag is not set,
440 * then add the parent bundle as a dependency
441 */
442 if(pInfo->formatVersion[1]>=1) {
443 int32_t indexes[URES_INDEX_TOP];
444 const int32_t *inIndexes;
445
446 inIndexes=(const int32_t *)inBundle+1;
447 indexes[URES_INDEX_LENGTH]=udata_readInt32(ds, inIndexes[URES_INDEX_LENGTH]);
448 if(indexes[URES_INDEX_LENGTH]>URES_INDEX_ATTRIBUTES) {
449 indexes[URES_INDEX_ATTRIBUTES]=udata_readInt32(ds, inIndexes[URES_INDEX_ATTRIBUTES]);
450 if(0==(indexes[URES_INDEX_ATTRIBUTES]&URES_ATT_NO_FALLBACK)) {
451 /* this bundle participates in locale fallback */
452 checkParent(itemName, check, context, pErrorCode);
453 }
454 }
455 }
456 }
457
458 // get dependencies from conversion tables --------------------------------- ***
459
460 /* code adapted from ucnv_swap() */
461 static void
462 ucnv_enumDependencies(const UDataSwapper *ds,
463 const char *itemName, const UDataInfo *pInfo,
464 const uint8_t *inBytes, int32_t length,
465 CheckDependency check, void *context,
466 UErrorCode *pErrorCode) {
467 uint32_t staticDataSize;
468
469 const UConverterStaticData *inStaticData;
470
471 const _MBCSHeader *inMBCSHeader;
472 uint8_t outputType;
473
474 /* check format version */
475 if(!(
476 pInfo->formatVersion[0]==6 &&
477 pInfo->formatVersion[1]>=2
478 )) {
479 fprintf(stderr, "icupkg/ucnv_enumDependencies(): .cnv format version %02x.%02x not supported\n",
480 pInfo->formatVersion[0], pInfo->formatVersion[1]);
481 exit(U_UNSUPPORTED_ERROR);
482 }
483
484 /* read the initial UConverterStaticData structure after the UDataInfo header */
485 inStaticData=(const UConverterStaticData *)inBytes;
486
487 if( length<(int32_t)sizeof(UConverterStaticData) ||
488 (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize))
489 ) {
490 udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after header) for an ICU .cnv conversion table\n",
491 length);
492 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
493 return;
494 }
495
496 inBytes+=staticDataSize;
497 length-=(int32_t)staticDataSize;
498
499 /* check for supported conversionType values */
500 if(inStaticData->conversionType==UCNV_MBCS) {
501 /* MBCS data */
502 uint32_t mbcsHeaderFlags;
503 int32_t extOffset;
504
505 inMBCSHeader=(const _MBCSHeader *)inBytes;
506
507 if(length<(int32_t)sizeof(_MBCSHeader)) {
508 udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n",
509 length);
510 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
511 return;
512 }
513 if(!(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1)) {
514 udata_printError(ds, "icupkg/ucnv_enumDependencies(): unsupported _MBCSHeader.version %d.%d\n",
515 inMBCSHeader->version[0], inMBCSHeader->version[1]);
516 *pErrorCode=U_UNSUPPORTED_ERROR;
517 return;
518 }
519
520 mbcsHeaderFlags=ds->readUInt32(inMBCSHeader->flags);
521 extOffset=(int32_t)(mbcsHeaderFlags>>8);
522 outputType=(uint8_t)mbcsHeaderFlags;
523
524 if(outputType==MBCS_OUTPUT_EXT_ONLY) {
525 /*
526 * extension-only file,
527 * contains a base name instead of normal base table data
528 */
529 char baseName[32];
530 int32_t baseNameLength;
531
532 /* there is extension data after the base data, see ucnv_ext.h */
533 if(length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) {
534 udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n",
535 length);
536 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
537 return;
538 }
539
540 /* swap the base name, between the header and the extension data */
541 baseNameLength=(int32_t)strlen((const char *)(inMBCSHeader+1));
542 if(baseNameLength>=(int32_t)sizeof(baseName)) {
543 udata_printError(ds, "icupkg/ucnv_enumDependencies(%s): base name length %ld too long\n",
544 itemName, baseNameLength);
545 *pErrorCode=U_UNSUPPORTED_ERROR;
546 return;
547 }
548 ds->swapInvChars(ds, inMBCSHeader+1, baseNameLength+1, baseName, pErrorCode);
549
550 checkIDSuffix(itemName, baseName, -1, ".cnv", check, context, pErrorCode);
551 }
552 }
553 }
554
555 // ICU data formats -------------------------------------------------------- ***
556
557 static const struct {
558 uint8_t dataFormat[4];
559 } dataFormats[]={
560 { { 0x52, 0x65, 0x73, 0x42 } }, /* dataFormat="ResB" */
561 { { 0x63, 0x6e, 0x76, 0x74 } }, /* dataFormat="cnvt" */
562 { { 0x43, 0x76, 0x41, 0x6c } } /* dataFormat="CvAl" */
563 };
564
565 enum {
566 FMT_RES,
567 FMT_CNV,
568 FMT_ALIAS,
569 FMT_COUNT
570 };
571
572 static int32_t
573 getDataFormat(const uint8_t dataFormat[4]) {
574 int32_t i;
575
576 for(i=0; i<FMT_COUNT; ++i) {
577 if(0==memcmp(dataFormats[i].dataFormat, dataFormat, 4)) {
578 return i;
579 }
580 }
581 return -1;
582 }
583
584 // enumerate dependencies of a package item -------------------------------- ***
585
586 U_NAMESPACE_BEGIN
587
588 void
589 Package::enumDependencies(Item *pItem) {
590 const UDataInfo *pInfo;
591 const uint8_t *inBytes;
592 int32_t format, length, infoLength, itemHeaderLength;
593 UErrorCode errorCode;
594
595 errorCode=U_ZERO_ERROR;
596 pInfo=getDataInfo(pItem->data,pItem->length, infoLength, itemHeaderLength, &errorCode);
597 if(U_FAILURE(errorCode)) {
598 return; // should not occur because readFile() checks headers
599 }
600
601 // find the data format and call the corresponding function, if any
602 format=getDataFormat(pInfo->dataFormat);
603 if(format>=0) {
604 UDataSwapper *ds;
605
606 // TODO: share/cache swappers
607 ds=udata_openSwapper((UBool)pInfo->isBigEndian, pInfo->charsetFamily, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
608 if(U_FAILURE(errorCode)) {
609 fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
610 pItem->name, u_errorName(errorCode));
611 exit(errorCode);
612 }
613
614 ds->printError=printError;
615 ds->printErrorContext=stderr;
616
617 inBytes=pItem->data+itemHeaderLength;
618 length=pItem->length-itemHeaderLength;
619
620 switch(format) {
621 case FMT_RES:
622 ures_enumDependencies(ds, pItem->name, pInfo, inBytes, length, checkDependency, this, &errorCode);
623 break;
624 case FMT_CNV:
625 ucnv_enumDependencies(ds, pItem->name, pInfo, inBytes, length, checkDependency, this, &errorCode);
626 break;
627 default:
628 break;
629 }
630
631 udata_closeSwapper(ds);
632
633 if(U_FAILURE(errorCode)) {
634 exit(errorCode);
635 }
636 }
637 }
638 U_NAMESPACE_END