2 *******************************************************************************
4 * Copyright (C) 2003-2007, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: icuswap.cpp
10 * tab size: 8 (not used)
13 * created on: 2003aug08
14 * created by: Markus W. Scherer
16 * This tool takes an ICU data file and "swaps" it, that is, changes its
17 * platform properties between big-/little-endianness and ASCII/EBCDIC charset
19 * The modified data file is written to a new file.
20 * Useful as an install-time tool for shipping only one flavor of ICU data
21 * and preparing data files for the target platform.
22 * Will not work with data DLLs (shared libraries).
25 #include "unicode/utypes.h"
26 #include "unicode/putil.h"
27 #include "unicode/udata.h"
44 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
45 #define DEFAULT_PADDING_LENGTH 15
47 static UOption options
[]={
49 UOPTION_HELP_QUESTION_MARK
,
50 UOPTION_DEF("type", 't', UOPT_REQUIRES_ARG
)
55 OPT_HELP_QUESTION_MARK
,
63 fseek(f
, 0, SEEK_END
);
64 size
=(int32_t)ftell(f
);
65 fseek(f
, 0, SEEK_SET
);
70 * Swap an ICU .dat package, including swapping of enclosed items.
72 U_CFUNC
int32_t U_CALLCONV
73 udata_swapPackage(const char *inFilename
, const char *outFilename
,
74 const UDataSwapper
*ds
,
75 const void *inData
, int32_t length
, void *outData
,
76 UErrorCode
*pErrorCode
);
79 static void U_CALLCONV
80 printError(void *context
, const char *fmt
, va_list args
) {
81 vfprintf((FILE *)context
, fmt
, args
);
86 printUsage(const char *pname
, UBool ishelp
) {
88 "%csage: %s [ -h, -?, --help ] -tl|-tb|-te|--type=b|... infilename outfilename\n",
89 ishelp
? 'U' : 'u', pname
);
92 "\nOptions: -h, -?, --help print this message and exit\n"
93 " Read the input file, swap its platform properties according\n"
94 " to the -t or --type option, and write the result to the output file.\n"
95 " -tl change to little-endian/ASCII charset family\n"
96 " -tb change to big-endian/ASCII charset family\n"
97 " -te change to big-endian/EBCDIC charset family\n");
104 main(int argc
, char *argv
[]) {
113 const UDataInfo
*pInfo
;
114 UErrorCode errorCode
;
116 UBool outIsBigEndian
;
118 U_MAIN_INIT_ARGS(argc
, argv
);
120 fprintf(stderr
, "Warning: icuswap is an obsolete tool and it will be removed in the next ICU release.\nPlease use the icupkg tool instead.\n");
122 /* get the program basename */
123 pname
=strrchr(argv
[0], U_FILE_SEP_CHAR
);
125 pname
=strrchr(argv
[0], '/');
133 argc
=u_parseArgs(argc
, argv
, LENGTHOF(options
), options
);
134 ishelp
=options
[OPT_HELP_H
].doesOccur
|| options
[OPT_HELP_QUESTION_MARK
].doesOccur
;
135 if(ishelp
|| argc
!=3) {
136 return printUsage(pname
, ishelp
);
139 /* parse the output type option */
140 data
=(char *)options
[OPT_OUT_TYPE
].value
;
141 if(data
[0]==0 || data
[1]!=0) {
142 /* the type must be exactly one letter */
143 return printUsage(pname
, FALSE
);
147 outIsBigEndian
=FALSE
;
148 outCharset
=U_ASCII_FAMILY
;
152 outCharset
=U_ASCII_FAMILY
;
156 outCharset
=U_EBCDIC_FAMILY
;
159 return printUsage(pname
, FALSE
);
165 /* open the input file, get its length, allocate memory for it, read the file */
166 in
=fopen(argv
[1], "rb");
168 fprintf(stderr
, "%s: unable to open input file \"%s\"\n", pname
, argv
[1]);
174 if(length
<DEFAULT_PADDING_LENGTH
) {
175 fprintf(stderr
, "%s: empty input file \"%s\"\n", pname
, argv
[1]);
181 * +15: udata_swapPackage() may need to add a few padding bytes to the
182 * last item if charset swapping is done,
183 * because the last item may be resorted into the middle and then needs
184 * additional padding bytes
186 data
=(char *)malloc(length
+DEFAULT_PADDING_LENGTH
);
188 fprintf(stderr
, "%s: error allocating memory for \"%s\"\n", pname
, argv
[1]);
193 /* set the last 15 bytes to the usual padding byte, see udata_swapPackage() */
194 uprv_memset(data
+length
-DEFAULT_PADDING_LENGTH
, 0xaa, DEFAULT_PADDING_LENGTH
);
196 if(length
!=(int32_t)fread(data
, 1, length
, in
)) {
197 fprintf(stderr
, "%s: error reading \"%s\"\n", pname
, argv
[1]);
205 /* swap the data in-place */
206 errorCode
=U_ZERO_ERROR
;
207 ds
=udata_openSwapperForInputData(data
, length
, outIsBigEndian
, outCharset
, &errorCode
);
208 if(U_FAILURE(errorCode
)) {
209 fprintf(stderr
, "%s: udata_openSwapperForInputData(\"%s\") failed - %s\n",
210 pname
, argv
[1], u_errorName(errorCode
));
215 ds
->printError
=printError
;
216 ds
->printErrorContext
=stderr
;
218 /* speculative cast, protected by the following length check */
219 pInfo
=(const UDataInfo
*)((const char *)data
+4);
222 pInfo
->dataFormat
[0]==0x43 && /* dataFormat="CmnD" */
223 pInfo
->dataFormat
[1]==0x6d &&
224 pInfo
->dataFormat
[2]==0x6e &&
225 pInfo
->dataFormat
[3]==0x44
228 * swap the .dat package
229 * udata_swapPackage() needs to rename ToC name entries from the old package
230 * name to the new one.
231 * We pass it the filenames, and udata_swapPackage() will extract the
234 length
=udata_swapPackage(argv
[1], argv
[2], ds
, data
, length
, data
, &errorCode
);
235 udata_closeSwapper(ds
);
236 if(U_FAILURE(errorCode
)) {
237 fprintf(stderr
, "%s: udata_swapPackage(\"%s\") failed - %s\n",
238 pname
, argv
[1], u_errorName(errorCode
));
243 /* swap the data, which is not a .dat package */
244 length
=udata_swap(ds
, data
, length
, data
, &errorCode
);
245 udata_closeSwapper(ds
);
246 if(U_FAILURE(errorCode
)) {
247 fprintf(stderr
, "%s: udata_swap(\"%s\") failed - %s\n",
248 pname
, argv
[1], u_errorName(errorCode
));
254 out
=fopen(argv
[2], "wb");
256 fprintf(stderr
, "%s: unable to open output file \"%s\"\n", pname
, argv
[2]);
261 if(length
!=(int32_t)fwrite(data
, 1, length
, out
)) {
262 fprintf(stderr
, "%s: error writing \"%s\"\n", pname
, argv
[2]);
286 /* swap .dat package files -------------------------------------------------- */
289 extractPackageName(const UDataSwapper
*ds
, const char *filename
,
290 char pkg
[], int32_t capacity
,
291 UErrorCode
*pErrorCode
) {
292 const char *basename
;
295 if(U_FAILURE(*pErrorCode
)) {
299 basename
=findBasename(filename
);
300 len
=(int32_t)uprv_strlen(basename
)-4; /* -4: subtract the length of ".dat" */
302 if(len
<=0 || 0!=uprv_strcmp(basename
+len
, ".dat")) {
303 udata_printError(ds
, "udata_swapPackage(): \"%s\" is not recognized as a package filename (must end with .dat)\n",
305 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
310 udata_printError(ds
, "udata_swapPackage(): the package name \"%s\" is too long (>=%ld)\n",
312 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
316 uprv_memcpy(pkg
, basename
, len
);
322 uint32_t nameOffset
, inOffset
, outOffset
, length
;
326 static int32_t U_CALLCONV
327 compareToCEntries(const void *context
, const void *left
, const void *right
) {
328 const char *chars
=(const char *)context
;
329 return (int32_t)uprv_strcmp(chars
+((const ToCEntry
*)left
)->nameOffset
,
330 chars
+((const ToCEntry
*)right
)->nameOffset
);
334 U_CFUNC
int32_t U_CALLCONV
335 udata_swapPackage(const char *inFilename
, const char *outFilename
,
336 const UDataSwapper
*ds
,
337 const void *inData
, int32_t length
, void *outData
,
338 UErrorCode
*pErrorCode
) {
339 const UDataInfo
*pInfo
;
342 const uint8_t *inBytes
;
345 uint32_t itemCount
, offset
, i
;
348 const UDataOffsetTOCEntry
*inEntries
;
349 UDataOffsetTOCEntry
*outEntries
;
353 char inPkgName
[32], outPkgName
[32];
354 int32_t inPkgNameLength
, outPkgNameLength
;
356 /* udata_swapDataHeader checks the arguments */
357 headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, pErrorCode
);
358 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
362 /* check data format and format version */
363 pInfo
=(const UDataInfo
*)((const char *)inData
+4);
365 pInfo
->dataFormat
[0]==0x43 && /* dataFormat="CmnD" */
366 pInfo
->dataFormat
[1]==0x6d &&
367 pInfo
->dataFormat
[2]==0x6e &&
368 pInfo
->dataFormat
[3]==0x44 &&
369 pInfo
->formatVersion
[0]==1
371 udata_printError(ds
, "udata_swapPackage(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n",
372 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
373 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
374 pInfo
->formatVersion
[0]);
375 *pErrorCode
=U_UNSUPPORTED_ERROR
;
380 * We need to change the ToC name entries so that they have the correct
381 * package name prefix.
382 * Extract the package names from the in/out filenames.
384 inPkgNameLength
=extractPackageName(
386 inPkgName
, (int32_t)sizeof(inPkgName
),
388 outPkgNameLength
=extractPackageName(
390 outPkgName
, (int32_t)sizeof(outPkgName
),
392 if(U_FAILURE(*pErrorCode
)) {
397 * It is possible to work with inPkgNameLength!=outPkgNameLength,
398 * but then the length of the data file would change more significantly,
399 * which we are not currently prepared for.
401 if(inPkgNameLength
!=outPkgNameLength
) {
402 udata_printError(ds
, "udata_swapPackage(): the package names \"%s\" and \"%s\" must have the same length\n",
403 inPkgName
, outPkgName
);
404 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
408 inBytes
=(const uint8_t *)inData
+headerSize
;
409 inEntries
=(const UDataOffsetTOCEntry
*)(inBytes
+4);
413 itemCount
=ds
->readUInt32(*(const uint32_t *)inBytes
);
415 /* no items: count only the item count and return */
419 /* read the last item's offset and preflight it */
420 offset
=ds
->readUInt32(inEntries
[itemCount
-1].dataOffset
);
421 itemLength
=udata_swap(ds
, inBytes
+offset
, -1, NULL
, pErrorCode
);
423 if(U_SUCCESS(*pErrorCode
)) {
424 return headerSize
+offset
+(uint32_t)itemLength
;
429 /* check that the itemCount fits, then the ToC table, then at least the header of the last item */
432 /* itemCount does not fit */
434 itemCount
=0; /* make compilers happy */
436 itemCount
=ds
->readUInt32(*(const uint32_t *)inBytes
);
439 } else if((uint32_t)length
<(4+8*itemCount
)) {
440 /* ToC table does not fit */
443 /* offset of the last item plus at least 20 bytes for its header */
444 offset
=20+ds
->readUInt32(inEntries
[itemCount
-1].dataOffset
);
447 if((uint32_t)length
<offset
) {
448 udata_printError(ds
, "udata_swapPackage(): too few bytes (%d after header) for a .dat package\n",
450 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
454 outBytes
=(uint8_t *)outData
+headerSize
;
456 /* swap the item count */
457 ds
->swapArray32(ds
, inBytes
, 4, outBytes
, pErrorCode
);
460 /* no items: just return now */
464 /* swap the item name strings */
465 offset
=4+8*itemCount
;
466 itemLength
=(int32_t)(ds
->readUInt32(inEntries
[0].dataOffset
)-offset
);
467 udata_swapInvStringBlock(ds
, inBytes
+offset
, itemLength
, outBytes
+offset
, pErrorCode
);
468 if(U_FAILURE(*pErrorCode
)) {
469 udata_printError(ds
, "udata_swapPackage() failed to swap the data item name strings\n");
472 /* keep offset and itemLength in case we allocate and copy the strings below */
474 /* swap the package names into the output charset */
475 if(ds
->outCharset
!=U_CHARSET_FAMILY
) {
477 ds2
=udata_openSwapper(TRUE
, U_CHARSET_FAMILY
, TRUE
, ds
->outCharset
, pErrorCode
);
478 ds2
->swapInvChars(ds2
, inPkgName
, inPkgNameLength
, inPkgName
, pErrorCode
);
479 ds2
->swapInvChars(ds2
, outPkgName
, outPkgNameLength
, outPkgName
, pErrorCode
);
480 udata_closeSwapper(ds2
);
481 if(U_FAILURE(*pErrorCode
)) {
482 udata_printError(ds
, "udata_swapPackage() failed to swap the input/output package names\n");
486 /* change the prefix of each ToC entry name from the old to the new package name */
490 for(i
=0; i
<itemCount
; ++i
) {
491 entryName
=(char *)inBytes
+ds
->readUInt32(inEntries
[i
].nameOffset
);
493 if(0==uprv_memcmp(entryName
, inPkgName
, inPkgNameLength
)) {
494 uprv_memcpy(entryName
, outPkgName
, inPkgNameLength
);
496 udata_printError(ds
, "udata_swapPackage() failed: ToC item %ld does not have the input package name as a prefix\n",
498 *pErrorCode
=U_INVALID_FORMAT_ERROR
;
505 * Allocate the ToC table and, if necessary, a temporary buffer for
506 * pseudo-in-place swapping.
508 * We cannot swap in-place because:
510 * 1. If the swapping of an item fails mid-way, then in-place swapping
511 * has destroyed its data.
512 * Out-of-place swapping allows us to then copy its original data.
514 * 2. If swapping changes the charset family, then we must resort
515 * not only the ToC table but also the data items themselves.
516 * This requires a permutation and is best done with separate in/out
519 * We swapped the strings above to avoid the malloc below if string swapping fails.
521 if(inData
==outData
) {
522 /* +15: prepare for extra padding of a newly-last item */
523 table
=(ToCEntry
*)uprv_malloc(itemCount
*sizeof(ToCEntry
)+length
+DEFAULT_PADDING_LENGTH
);
525 outBytes
=(uint8_t *)(table
+itemCount
);
527 /* copy the item count and the swapped strings */
528 uprv_memcpy(outBytes
, inBytes
, 4);
529 uprv_memcpy(outBytes
+offset
, inBytes
+offset
, itemLength
);
532 table
=(ToCEntry
*)uprv_malloc(itemCount
*sizeof(ToCEntry
));
535 udata_printError(ds
, "udata_swapPackage(): out of memory allocating %d bytes\n",
537 itemCount
*sizeof(ToCEntry
)+length
+DEFAULT_PADDING_LENGTH
:
538 itemCount
*sizeof(ToCEntry
));
539 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
542 outEntries
=(UDataOffsetTOCEntry
*)(outBytes
+4);
544 /* read the ToC table */
545 for(i
=0; i
<itemCount
; ++i
) {
546 table
[i
].nameOffset
=ds
->readUInt32(inEntries
[i
].nameOffset
);
547 table
[i
].inOffset
=ds
->readUInt32(inEntries
[i
].dataOffset
);
549 table
[i
-1].length
=table
[i
].inOffset
-table
[i
-1].inOffset
;
552 table
[itemCount
-1].length
=(uint32_t)length
-table
[itemCount
-1].inOffset
;
554 if(ds
->inCharset
==ds
->outCharset
) {
555 /* no charset swapping, no resorting: keep item offsets the same */
556 for(i
=0; i
<itemCount
; ++i
) {
557 table
[i
].outOffset
=table
[i
].inOffset
;
560 /* charset swapping: resort items by their swapped names */
563 * Before the actual sorting, we need to make sure that each item
564 * has a length that is a multiple of 16 bytes so that all items
566 * Only the old last item may be missing up to 15 padding bytes.
567 * Add padding bytes for it.
568 * Since the icuswap main() function has already allocated enough
569 * input buffer space and set the last 15 bytes there to 0xaa,
570 * we only need to increase the total data length and the length
571 * of the last item here.
573 if((length
&0xf)!=0) {
574 int32_t delta
=16-(length
&0xf);
576 table
[itemCount
-1].length
+=(uint32_t)delta
;
579 /* Save the offset before we sort the TOC. */
580 offset
=table
[0].inOffset
;
581 /* sort the TOC entries */
582 uprv_sortArray(table
, (int32_t)itemCount
, (int32_t)sizeof(ToCEntry
),
583 compareToCEntries
, outBytes
, FALSE
, pErrorCode
);
586 * Note: Before sorting, the inOffset values were in order.
587 * Now the outOffset values are in order.
590 /* assign outOffset values */
591 for(i
=0; i
<itemCount
; ++i
) {
592 table
[i
].outOffset
=offset
;
593 offset
+=table
[i
].length
;
597 /* write the output ToC table */
598 for(i
=0; i
<itemCount
; ++i
) {
599 ds
->writeUInt32(&outEntries
[i
].nameOffset
, table
[i
].nameOffset
);
600 ds
->writeUInt32(&outEntries
[i
].dataOffset
, table
[i
].outOffset
);
603 /* swap each data item */
604 for(i
=0; i
<itemCount
; ++i
) {
605 /* first copy the item bytes to make sure that unreachable bytes are copied */
606 uprv_memcpy(outBytes
+table
[i
].outOffset
, inBytes
+table
[i
].inOffset
, table
[i
].length
);
609 udata_swap(ds
, inBytes
+table
[i
].inOffset
, (int32_t)table
[i
].length
,
610 outBytes
+table
[i
].outOffset
, pErrorCode
);
612 if(U_FAILURE(*pErrorCode
)) {
613 if(ds
->outCharset
==U_CHARSET_FAMILY
) {
614 udata_printError(ds
, "warning: udata_swapPackage() failed to swap item \"%s\"\n"
615 " at inOffset 0x%x length 0x%x - %s\n"
616 " the data item will be copied, not swapped\n\n",
617 (char *)outBytes
+table
[i
].nameOffset
,
618 table
[i
].inOffset
, table
[i
].length
, u_errorName(*pErrorCode
));
620 udata_printError(ds
, "warning: udata_swapPackage() failed to swap an item\n"
621 " at inOffset 0x%x length 0x%x - %s\n"
622 " the data item will be copied, not swapped\n\n",
623 table
[i
].inOffset
, table
[i
].length
, u_errorName(*pErrorCode
));
625 /* reset the error code, copy the data item, and continue */
626 *pErrorCode
=U_ZERO_ERROR
;
627 uprv_memcpy(outBytes
+table
[i
].outOffset
, inBytes
+table
[i
].inOffset
, table
[i
].length
);
631 if(inData
==outData
) {
632 /* copy the data from the temporary buffer to the in-place buffer */
633 uprv_memcpy((uint8_t *)outData
+headerSize
, outBytes
, length
);
637 return headerSize
+length
;
642 * Hey, Emacs, please set the following:
645 * indent-tabs-mode: nil