2 *******************************************************************************
4 * Copyright (C) 2003-2014, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: icuswap.cpp
10 * tab size: 8 (not used)
13 * created on: 2003aug08
14 * created by: Markus W. Scherer
16 * This tool takes an ICU data file and "swaps" it, that is, changes its
17 * platform properties between big-/little-endianness and ASCII/EBCDIC charset
19 * The modified data file is written to a new file.
20 * Useful as an install-time tool for shipping only one flavor of ICU data
21 * and preparing data files for the target platform.
22 * Will not work with data DLLs (shared libraries).
25 #include "unicode/utypes.h"
26 #include "unicode/putil.h"
27 #include "unicode/udata.h"
44 #define DEFAULT_PADDING_LENGTH 15
46 static UOption options
[]={
48 UOPTION_HELP_QUESTION_MARK
,
49 UOPTION_DEF("type", 't', UOPT_REQUIRES_ARG
)
54 OPT_HELP_QUESTION_MARK
,
62 fseek(f
, 0, SEEK_END
);
63 size
=(int32_t)ftell(f
);
64 fseek(f
, 0, SEEK_SET
);
69 * Swap an ICU .dat package, including swapping of enclosed items.
71 U_CFUNC
int32_t U_CALLCONV
72 udata_swapPackage(const char *inFilename
, const char *outFilename
,
73 const UDataSwapper
*ds
,
74 const void *inData
, int32_t length
, void *outData
,
75 UErrorCode
*pErrorCode
);
78 static void U_CALLCONV
79 printError(void *context
, const char *fmt
, va_list args
) {
80 vfprintf((FILE *)context
, fmt
, args
);
85 printUsage(const char *pname
, UBool ishelp
) {
87 "%csage: %s [ -h, -?, --help ] -tl|-tb|-te|--type=b|... infilename outfilename\n",
88 ishelp
? 'U' : 'u', pname
);
91 "\nOptions: -h, -?, --help print this message and exit\n"
92 " Read the input file, swap its platform properties according\n"
93 " to the -t or --type option, and write the result to the output file.\n"
94 " -tl change to little-endian/ASCII charset family\n"
95 " -tb change to big-endian/ASCII charset family\n"
96 " -te change to big-endian/EBCDIC charset family\n");
103 main(int argc
, char *argv
[]) {
112 const UDataInfo
*pInfo
;
113 UErrorCode errorCode
;
115 UBool outIsBigEndian
;
117 U_MAIN_INIT_ARGS(argc
, argv
);
119 fprintf(stderr
, "Warning: icuswap is an obsolete tool and it will be removed in the next ICU release.\nPlease use the icupkg tool instead.\n");
121 /* get the program basename */
122 pname
=strrchr(argv
[0], U_FILE_SEP_CHAR
);
124 pname
=strrchr(argv
[0], '/');
132 argc
=u_parseArgs(argc
, argv
, UPRV_LENGTHOF(options
), options
);
133 ishelp
=options
[OPT_HELP_H
].doesOccur
|| options
[OPT_HELP_QUESTION_MARK
].doesOccur
;
134 if(ishelp
|| argc
!=3) {
135 return printUsage(pname
, ishelp
);
138 /* parse the output type option */
139 data
=(char *)options
[OPT_OUT_TYPE
].value
;
140 if(data
[0]==0 || data
[1]!=0) {
141 /* the type must be exactly one letter */
142 return printUsage(pname
, FALSE
);
146 outIsBigEndian
=FALSE
;
147 outCharset
=U_ASCII_FAMILY
;
151 outCharset
=U_ASCII_FAMILY
;
155 outCharset
=U_EBCDIC_FAMILY
;
158 return printUsage(pname
, FALSE
);
164 /* open the input file, get its length, allocate memory for it, read the file */
165 in
=fopen(argv
[1], "rb");
167 fprintf(stderr
, "%s: unable to open input file \"%s\"\n", pname
, argv
[1]);
173 if(length
<DEFAULT_PADDING_LENGTH
) {
174 fprintf(stderr
, "%s: empty input file \"%s\"\n", pname
, argv
[1]);
180 * +15: udata_swapPackage() may need to add a few padding bytes to the
181 * last item if charset swapping is done,
182 * because the last item may be resorted into the middle and then needs
183 * additional padding bytes
185 data
=(char *)malloc(length
+DEFAULT_PADDING_LENGTH
);
187 fprintf(stderr
, "%s: error allocating memory for \"%s\"\n", pname
, argv
[1]);
192 /* set the last 15 bytes to the usual padding byte, see udata_swapPackage() */
193 uprv_memset(data
+length
-DEFAULT_PADDING_LENGTH
, 0xaa, DEFAULT_PADDING_LENGTH
);
195 if(length
!=(int32_t)fread(data
, 1, length
, in
)) {
196 fprintf(stderr
, "%s: error reading \"%s\"\n", pname
, argv
[1]);
204 /* swap the data in-place */
205 errorCode
=U_ZERO_ERROR
;
206 ds
=udata_openSwapperForInputData(data
, length
, outIsBigEndian
, outCharset
, &errorCode
);
207 if(U_FAILURE(errorCode
)) {
208 fprintf(stderr
, "%s: udata_openSwapperForInputData(\"%s\") failed - %s\n",
209 pname
, argv
[1], u_errorName(errorCode
));
214 ds
->printError
=printError
;
215 ds
->printErrorContext
=stderr
;
217 /* speculative cast, protected by the following length check */
218 pInfo
=(const UDataInfo
*)((const char *)data
+4);
221 pInfo
->dataFormat
[0]==0x43 && /* dataFormat="CmnD" */
222 pInfo
->dataFormat
[1]==0x6d &&
223 pInfo
->dataFormat
[2]==0x6e &&
224 pInfo
->dataFormat
[3]==0x44
227 * swap the .dat package
228 * udata_swapPackage() needs to rename ToC name entries from the old package
229 * name to the new one.
230 * We pass it the filenames, and udata_swapPackage() will extract the
233 length
=udata_swapPackage(argv
[1], argv
[2], ds
, data
, length
, data
, &errorCode
);
234 udata_closeSwapper(ds
);
235 if(U_FAILURE(errorCode
)) {
236 fprintf(stderr
, "%s: udata_swapPackage(\"%s\") failed - %s\n",
237 pname
, argv
[1], u_errorName(errorCode
));
242 /* swap the data, which is not a .dat package */
243 length
=udata_swap(ds
, data
, length
, data
, &errorCode
);
244 udata_closeSwapper(ds
);
245 if(U_FAILURE(errorCode
)) {
246 fprintf(stderr
, "%s: udata_swap(\"%s\") failed - %s\n",
247 pname
, argv
[1], u_errorName(errorCode
));
253 out
=fopen(argv
[2], "wb");
255 fprintf(stderr
, "%s: unable to open output file \"%s\"\n", pname
, argv
[2]);
260 if(length
!=(int32_t)fwrite(data
, 1, length
, out
)) {
261 fprintf(stderr
, "%s: error writing \"%s\"\n", pname
, argv
[2]);
285 /* swap .dat package files -------------------------------------------------- */
288 extractPackageName(const UDataSwapper
*ds
, const char *filename
,
289 char pkg
[], int32_t capacity
,
290 UErrorCode
*pErrorCode
) {
291 const char *basename
;
294 if(U_FAILURE(*pErrorCode
)) {
298 basename
=findBasename(filename
);
299 len
=(int32_t)uprv_strlen(basename
)-4; /* -4: subtract the length of ".dat" */
301 if(len
<=0 || 0!=uprv_strcmp(basename
+len
, ".dat")) {
302 udata_printError(ds
, "udata_swapPackage(): \"%s\" is not recognized as a package filename (must end with .dat)\n",
304 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
309 udata_printError(ds
, "udata_swapPackage(): the package name \"%s\" is too long (>=%ld)\n",
311 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
315 uprv_memcpy(pkg
, basename
, len
);
321 uint32_t nameOffset
, inOffset
, outOffset
, length
;
325 static int32_t U_CALLCONV
326 compareToCEntries(const void *context
, const void *left
, const void *right
) {
327 const char *chars
=(const char *)context
;
328 return (int32_t)uprv_strcmp(chars
+((const ToCEntry
*)left
)->nameOffset
,
329 chars
+((const ToCEntry
*)right
)->nameOffset
);
333 U_CFUNC
int32_t U_CALLCONV
334 udata_swapPackage(const char *inFilename
, const char *outFilename
,
335 const UDataSwapper
*ds
,
336 const void *inData
, int32_t length
, void *outData
,
337 UErrorCode
*pErrorCode
) {
338 const UDataInfo
*pInfo
;
341 const uint8_t *inBytes
;
344 uint32_t itemCount
, offset
, i
;
347 const UDataOffsetTOCEntry
*inEntries
;
348 UDataOffsetTOCEntry
*outEntries
;
352 char inPkgName
[32], outPkgName
[32];
353 int32_t inPkgNameLength
, outPkgNameLength
;
355 /* udata_swapDataHeader checks the arguments */
356 headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, pErrorCode
);
357 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
361 /* check data format and format version */
362 pInfo
=(const UDataInfo
*)((const char *)inData
+4);
364 pInfo
->dataFormat
[0]==0x43 && /* dataFormat="CmnD" */
365 pInfo
->dataFormat
[1]==0x6d &&
366 pInfo
->dataFormat
[2]==0x6e &&
367 pInfo
->dataFormat
[3]==0x44 &&
368 pInfo
->formatVersion
[0]==1
370 udata_printError(ds
, "udata_swapPackage(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n",
371 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
372 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
373 pInfo
->formatVersion
[0]);
374 *pErrorCode
=U_UNSUPPORTED_ERROR
;
379 * We need to change the ToC name entries so that they have the correct
380 * package name prefix.
381 * Extract the package names from the in/out filenames.
383 inPkgNameLength
=extractPackageName(
385 inPkgName
, (int32_t)sizeof(inPkgName
),
387 outPkgNameLength
=extractPackageName(
389 outPkgName
, (int32_t)sizeof(outPkgName
),
391 if(U_FAILURE(*pErrorCode
)) {
396 * It is possible to work with inPkgNameLength!=outPkgNameLength,
397 * but then the length of the data file would change more significantly,
398 * which we are not currently prepared for.
400 if(inPkgNameLength
!=outPkgNameLength
) {
401 udata_printError(ds
, "udata_swapPackage(): the package names \"%s\" and \"%s\" must have the same length\n",
402 inPkgName
, outPkgName
);
403 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
407 inBytes
=(const uint8_t *)inData
+headerSize
;
408 inEntries
=(const UDataOffsetTOCEntry
*)(inBytes
+4);
412 itemCount
=ds
->readUInt32(*(const uint32_t *)inBytes
);
414 /* no items: count only the item count and return */
418 /* read the last item's offset and preflight it */
419 offset
=ds
->readUInt32(inEntries
[itemCount
-1].dataOffset
);
420 itemLength
=udata_swap(ds
, inBytes
+offset
, -1, NULL
, pErrorCode
);
422 if(U_SUCCESS(*pErrorCode
)) {
423 return headerSize
+offset
+(uint32_t)itemLength
;
428 /* check that the itemCount fits, then the ToC table, then at least the header of the last item */
431 /* itemCount does not fit */
433 itemCount
=0; /* make compilers happy */
435 itemCount
=ds
->readUInt32(*(const uint32_t *)inBytes
);
438 } else if((uint32_t)length
<(4+8*itemCount
)) {
439 /* ToC table does not fit */
442 /* offset of the last item plus at least 20 bytes for its header */
443 offset
=20+ds
->readUInt32(inEntries
[itemCount
-1].dataOffset
);
446 if((uint32_t)length
<offset
) {
447 udata_printError(ds
, "udata_swapPackage(): too few bytes (%d after header) for a .dat package\n",
449 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
453 outBytes
=(uint8_t *)outData
+headerSize
;
455 /* swap the item count */
456 ds
->swapArray32(ds
, inBytes
, 4, outBytes
, pErrorCode
);
459 /* no items: just return now */
463 /* swap the item name strings */
464 offset
=4+8*itemCount
;
465 itemLength
=(int32_t)(ds
->readUInt32(inEntries
[0].dataOffset
)-offset
);
466 udata_swapInvStringBlock(ds
, inBytes
+offset
, itemLength
, outBytes
+offset
, pErrorCode
);
467 if(U_FAILURE(*pErrorCode
)) {
468 udata_printError(ds
, "udata_swapPackage() failed to swap the data item name strings\n");
471 /* keep offset and itemLength in case we allocate and copy the strings below */
473 /* swap the package names into the output charset */
474 if(ds
->outCharset
!=U_CHARSET_FAMILY
) {
476 ds2
=udata_openSwapper(TRUE
, U_CHARSET_FAMILY
, TRUE
, ds
->outCharset
, pErrorCode
);
477 ds2
->swapInvChars(ds2
, inPkgName
, inPkgNameLength
, inPkgName
, pErrorCode
);
478 ds2
->swapInvChars(ds2
, outPkgName
, outPkgNameLength
, outPkgName
, pErrorCode
);
479 udata_closeSwapper(ds2
);
480 if(U_FAILURE(*pErrorCode
)) {
481 udata_printError(ds
, "udata_swapPackage() failed to swap the input/output package names\n");
485 /* change the prefix of each ToC entry name from the old to the new package name */
489 for(i
=0; i
<itemCount
; ++i
) {
490 entryName
=(char *)inBytes
+ds
->readUInt32(inEntries
[i
].nameOffset
);
492 if(0==uprv_memcmp(entryName
, inPkgName
, inPkgNameLength
)) {
493 uprv_memcpy(entryName
, outPkgName
, inPkgNameLength
);
495 udata_printError(ds
, "udata_swapPackage() failed: ToC item %ld does not have the input package name as a prefix\n",
497 *pErrorCode
=U_INVALID_FORMAT_ERROR
;
504 * Allocate the ToC table and, if necessary, a temporary buffer for
505 * pseudo-in-place swapping.
507 * We cannot swap in-place because:
509 * 1. If the swapping of an item fails mid-way, then in-place swapping
510 * has destroyed its data.
511 * Out-of-place swapping allows us to then copy its original data.
513 * 2. If swapping changes the charset family, then we must resort
514 * not only the ToC table but also the data items themselves.
515 * This requires a permutation and is best done with separate in/out
518 * We swapped the strings above to avoid the malloc below if string swapping fails.
520 if(inData
==outData
) {
521 /* +15: prepare for extra padding of a newly-last item */
522 table
=(ToCEntry
*)uprv_malloc(itemCount
*sizeof(ToCEntry
)+length
+DEFAULT_PADDING_LENGTH
);
524 outBytes
=(uint8_t *)(table
+itemCount
);
526 /* copy the item count and the swapped strings */
527 uprv_memcpy(outBytes
, inBytes
, 4);
528 uprv_memcpy(outBytes
+offset
, inBytes
+offset
, itemLength
);
531 table
=(ToCEntry
*)uprv_malloc(itemCount
*sizeof(ToCEntry
));
534 udata_printError(ds
, "udata_swapPackage(): out of memory allocating %d bytes\n",
536 itemCount
*sizeof(ToCEntry
)+length
+DEFAULT_PADDING_LENGTH
:
537 itemCount
*sizeof(ToCEntry
));
538 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
541 outEntries
=(UDataOffsetTOCEntry
*)(outBytes
+4);
543 /* read the ToC table */
544 for(i
=0; i
<itemCount
; ++i
) {
545 table
[i
].nameOffset
=ds
->readUInt32(inEntries
[i
].nameOffset
);
546 table
[i
].inOffset
=ds
->readUInt32(inEntries
[i
].dataOffset
);
548 table
[i
-1].length
=table
[i
].inOffset
-table
[i
-1].inOffset
;
551 table
[itemCount
-1].length
=(uint32_t)length
-table
[itemCount
-1].inOffset
;
553 if(ds
->inCharset
==ds
->outCharset
) {
554 /* no charset swapping, no resorting: keep item offsets the same */
555 for(i
=0; i
<itemCount
; ++i
) {
556 table
[i
].outOffset
=table
[i
].inOffset
;
559 /* charset swapping: resort items by their swapped names */
562 * Before the actual sorting, we need to make sure that each item
563 * has a length that is a multiple of 16 bytes so that all items
565 * Only the old last item may be missing up to 15 padding bytes.
566 * Add padding bytes for it.
567 * Since the icuswap main() function has already allocated enough
568 * input buffer space and set the last 15 bytes there to 0xaa,
569 * we only need to increase the total data length and the length
570 * of the last item here.
572 if((length
&0xf)!=0) {
573 int32_t delta
=16-(length
&0xf);
575 table
[itemCount
-1].length
+=(uint32_t)delta
;
578 /* Save the offset before we sort the TOC. */
579 offset
=table
[0].inOffset
;
580 /* sort the TOC entries */
581 uprv_sortArray(table
, (int32_t)itemCount
, (int32_t)sizeof(ToCEntry
),
582 compareToCEntries
, outBytes
, FALSE
, pErrorCode
);
585 * Note: Before sorting, the inOffset values were in order.
586 * Now the outOffset values are in order.
589 /* assign outOffset values */
590 for(i
=0; i
<itemCount
; ++i
) {
591 table
[i
].outOffset
=offset
;
592 offset
+=table
[i
].length
;
596 /* write the output ToC table */
597 for(i
=0; i
<itemCount
; ++i
) {
598 ds
->writeUInt32(&outEntries
[i
].nameOffset
, table
[i
].nameOffset
);
599 ds
->writeUInt32(&outEntries
[i
].dataOffset
, table
[i
].outOffset
);
602 /* swap each data item */
603 for(i
=0; i
<itemCount
; ++i
) {
604 /* first copy the item bytes to make sure that unreachable bytes are copied */
605 uprv_memcpy(outBytes
+table
[i
].outOffset
, inBytes
+table
[i
].inOffset
, table
[i
].length
);
608 udata_swap(ds
, inBytes
+table
[i
].inOffset
, (int32_t)table
[i
].length
,
609 outBytes
+table
[i
].outOffset
, pErrorCode
);
611 if(U_FAILURE(*pErrorCode
)) {
612 if(ds
->outCharset
==U_CHARSET_FAMILY
) {
613 udata_printError(ds
, "warning: udata_swapPackage() failed to swap item \"%s\"\n"
614 " at inOffset 0x%x length 0x%x - %s\n"
615 " the data item will be copied, not swapped\n\n",
616 (char *)outBytes
+table
[i
].nameOffset
,
617 table
[i
].inOffset
, table
[i
].length
, u_errorName(*pErrorCode
));
619 udata_printError(ds
, "warning: udata_swapPackage() failed to swap an item\n"
620 " at inOffset 0x%x length 0x%x - %s\n"
621 " the data item will be copied, not swapped\n\n",
622 table
[i
].inOffset
, table
[i
].length
, u_errorName(*pErrorCode
));
624 /* reset the error code, copy the data item, and continue */
625 *pErrorCode
=U_ZERO_ERROR
;
626 uprv_memcpy(outBytes
+table
[i
].outOffset
, inBytes
+table
[i
].inOffset
, table
[i
].length
);
630 if(inData
==outData
) {
631 /* copy the data from the temporary buffer to the in-place buffer */
632 uprv_memcpy((uint8_t *)outData
+headerSize
, outBytes
, length
);
636 return headerSize
+length
;
641 * Hey, Emacs, please set the following:
644 * indent-tabs-mode: nil