1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 2003-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
10 * file name: icuswap.cpp
12 * tab size: 8 (not used)
15 * created on: 2003aug08
16 * created by: Markus W. Scherer
18 * This tool takes an ICU data file and "swaps" it, that is, changes its
19 * platform properties between big-/little-endianness and ASCII/EBCDIC charset
21 * The modified data file is written to a new file.
22 * Useful as an install-time tool for shipping only one flavor of ICU data
23 * and preparing data files for the target platform.
24 * Will not work with data DLLs (shared libraries).
27 #include "unicode/utypes.h"
28 #include "unicode/putil.h"
29 #include "unicode/udata.h"
46 #define DEFAULT_PADDING_LENGTH 15
48 static UOption options
[]={
50 UOPTION_HELP_QUESTION_MARK
,
51 UOPTION_DEF("type", 't', UOPT_REQUIRES_ARG
)
56 OPT_HELP_QUESTION_MARK
,
64 fseek(f
, 0, SEEK_END
);
65 size
=(int32_t)ftell(f
);
66 fseek(f
, 0, SEEK_SET
);
71 * Swap an ICU .dat package, including swapping of enclosed items.
73 U_CFUNC
int32_t U_CALLCONV
74 udata_swapPackage(const char *inFilename
, const char *outFilename
,
75 const UDataSwapper
*ds
,
76 const void *inData
, int32_t length
, void *outData
,
77 UErrorCode
*pErrorCode
);
80 static void U_CALLCONV
81 printError(void *context
, const char *fmt
, va_list args
) {
82 vfprintf((FILE *)context
, fmt
, args
);
87 printUsage(const char *pname
, UBool ishelp
) {
89 "%csage: %s [ -h, -?, --help ] -tl|-tb|-te|--type=b|... infilename outfilename\n",
90 ishelp
? 'U' : 'u', pname
);
93 "\nOptions: -h, -?, --help print this message and exit\n"
94 " Read the input file, swap its platform properties according\n"
95 " to the -t or --type option, and write the result to the output file.\n"
96 " -tl change to little-endian/ASCII charset family\n"
97 " -tb change to big-endian/ASCII charset family\n"
98 " -te change to big-endian/EBCDIC charset family\n");
105 main(int argc
, char *argv
[]) {
114 const UDataInfo
*pInfo
;
115 UErrorCode errorCode
;
117 UBool outIsBigEndian
;
119 U_MAIN_INIT_ARGS(argc
, argv
);
121 fprintf(stderr
, "Warning: icuswap is an obsolete tool and it will be removed in the next ICU release.\nPlease use the icupkg tool instead.\n");
123 /* get the program basename */
124 pname
=strrchr(argv
[0], U_FILE_SEP_CHAR
);
126 pname
=strrchr(argv
[0], '/');
134 argc
=u_parseArgs(argc
, argv
, UPRV_LENGTHOF(options
), options
);
135 ishelp
=options
[OPT_HELP_H
].doesOccur
|| options
[OPT_HELP_QUESTION_MARK
].doesOccur
;
136 if(ishelp
|| argc
!=3) {
137 return printUsage(pname
, ishelp
);
140 /* parse the output type option */
141 data
=(char *)options
[OPT_OUT_TYPE
].value
;
142 if(data
[0]==0 || data
[1]!=0) {
143 /* the type must be exactly one letter */
144 return printUsage(pname
, FALSE
);
148 outIsBigEndian
=FALSE
;
149 outCharset
=U_ASCII_FAMILY
;
153 outCharset
=U_ASCII_FAMILY
;
157 outCharset
=U_EBCDIC_FAMILY
;
160 return printUsage(pname
, FALSE
);
166 /* open the input file, get its length, allocate memory for it, read the file */
167 in
=fopen(argv
[1], "rb");
169 fprintf(stderr
, "%s: unable to open input file \"%s\"\n", pname
, argv
[1]);
175 if(length
<DEFAULT_PADDING_LENGTH
) {
176 fprintf(stderr
, "%s: empty input file \"%s\"\n", pname
, argv
[1]);
182 * +15: udata_swapPackage() may need to add a few padding bytes to the
183 * last item if charset swapping is done,
184 * because the last item may be resorted into the middle and then needs
185 * additional padding bytes
187 data
=(char *)malloc(length
+DEFAULT_PADDING_LENGTH
);
189 fprintf(stderr
, "%s: error allocating memory for \"%s\"\n", pname
, argv
[1]);
194 /* set the last 15 bytes to the usual padding byte, see udata_swapPackage() */
195 uprv_memset(data
+length
-DEFAULT_PADDING_LENGTH
, 0xaa, DEFAULT_PADDING_LENGTH
);
197 if(length
!=(int32_t)fread(data
, 1, length
, in
)) {
198 fprintf(stderr
, "%s: error reading \"%s\"\n", pname
, argv
[1]);
206 /* swap the data in-place */
207 errorCode
=U_ZERO_ERROR
;
208 ds
=udata_openSwapperForInputData(data
, length
, outIsBigEndian
, outCharset
, &errorCode
);
209 if(U_FAILURE(errorCode
)) {
210 fprintf(stderr
, "%s: udata_openSwapperForInputData(\"%s\") failed - %s\n",
211 pname
, argv
[1], u_errorName(errorCode
));
216 ds
->printError
=printError
;
217 ds
->printErrorContext
=stderr
;
219 /* speculative cast, protected by the following length check */
220 pInfo
=(const UDataInfo
*)((const char *)data
+4);
223 pInfo
->dataFormat
[0]==0x43 && /* dataFormat="CmnD" */
224 pInfo
->dataFormat
[1]==0x6d &&
225 pInfo
->dataFormat
[2]==0x6e &&
226 pInfo
->dataFormat
[3]==0x44
229 * swap the .dat package
230 * udata_swapPackage() needs to rename ToC name entries from the old package
231 * name to the new one.
232 * We pass it the filenames, and udata_swapPackage() will extract the
235 length
=udata_swapPackage(argv
[1], argv
[2], ds
, data
, length
, data
, &errorCode
);
236 udata_closeSwapper(ds
);
237 if(U_FAILURE(errorCode
)) {
238 fprintf(stderr
, "%s: udata_swapPackage(\"%s\") failed - %s\n",
239 pname
, argv
[1], u_errorName(errorCode
));
244 /* swap the data, which is not a .dat package */
245 length
=udata_swap(ds
, data
, length
, data
, &errorCode
);
246 udata_closeSwapper(ds
);
247 if(U_FAILURE(errorCode
)) {
248 fprintf(stderr
, "%s: udata_swap(\"%s\") failed - %s\n",
249 pname
, argv
[1], u_errorName(errorCode
));
255 out
=fopen(argv
[2], "wb");
257 fprintf(stderr
, "%s: unable to open output file \"%s\"\n", pname
, argv
[2]);
262 if(length
!=(int32_t)fwrite(data
, 1, length
, out
)) {
263 fprintf(stderr
, "%s: error writing \"%s\"\n", pname
, argv
[2]);
287 /* swap .dat package files -------------------------------------------------- */
290 extractPackageName(const UDataSwapper
*ds
, const char *filename
,
291 char pkg
[], int32_t capacity
,
292 UErrorCode
*pErrorCode
) {
293 const char *basename
;
296 if(U_FAILURE(*pErrorCode
)) {
300 basename
=findBasename(filename
);
301 len
=(int32_t)uprv_strlen(basename
)-4; /* -4: subtract the length of ".dat" */
303 if(len
<=0 || 0!=uprv_strcmp(basename
+len
, ".dat")) {
304 udata_printError(ds
, "udata_swapPackage(): \"%s\" is not recognized as a package filename (must end with .dat)\n",
306 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
311 udata_printError(ds
, "udata_swapPackage(): the package name \"%s\" is too long (>=%ld)\n",
313 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
317 uprv_memcpy(pkg
, basename
, len
);
323 uint32_t nameOffset
, inOffset
, outOffset
, length
;
327 static int32_t U_CALLCONV
328 compareToCEntries(const void *context
, const void *left
, const void *right
) {
329 const char *chars
=(const char *)context
;
330 return (int32_t)uprv_strcmp(chars
+((const ToCEntry
*)left
)->nameOffset
,
331 chars
+((const ToCEntry
*)right
)->nameOffset
);
335 U_CFUNC
int32_t U_CALLCONV
336 udata_swapPackage(const char *inFilename
, const char *outFilename
,
337 const UDataSwapper
*ds
,
338 const void *inData
, int32_t length
, void *outData
,
339 UErrorCode
*pErrorCode
) {
340 const UDataInfo
*pInfo
;
343 const uint8_t *inBytes
;
346 uint32_t itemCount
, offset
, i
;
349 const UDataOffsetTOCEntry
*inEntries
;
350 UDataOffsetTOCEntry
*outEntries
;
354 char inPkgName
[32], outPkgName
[32];
355 int32_t inPkgNameLength
, outPkgNameLength
;
357 /* udata_swapDataHeader checks the arguments */
358 headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, pErrorCode
);
359 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
363 /* check data format and format version */
364 pInfo
=(const UDataInfo
*)((const char *)inData
+4);
366 pInfo
->dataFormat
[0]==0x43 && /* dataFormat="CmnD" */
367 pInfo
->dataFormat
[1]==0x6d &&
368 pInfo
->dataFormat
[2]==0x6e &&
369 pInfo
->dataFormat
[3]==0x44 &&
370 pInfo
->formatVersion
[0]==1
372 udata_printError(ds
, "udata_swapPackage(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n",
373 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
374 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
375 pInfo
->formatVersion
[0]);
376 *pErrorCode
=U_UNSUPPORTED_ERROR
;
381 * We need to change the ToC name entries so that they have the correct
382 * package name prefix.
383 * Extract the package names from the in/out filenames.
385 inPkgNameLength
=extractPackageName(
387 inPkgName
, (int32_t)sizeof(inPkgName
),
389 outPkgNameLength
=extractPackageName(
391 outPkgName
, (int32_t)sizeof(outPkgName
),
393 if(U_FAILURE(*pErrorCode
)) {
398 * It is possible to work with inPkgNameLength!=outPkgNameLength,
399 * but then the length of the data file would change more significantly,
400 * which we are not currently prepared for.
402 if(inPkgNameLength
!=outPkgNameLength
) {
403 udata_printError(ds
, "udata_swapPackage(): the package names \"%s\" and \"%s\" must have the same length\n",
404 inPkgName
, outPkgName
);
405 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
409 inBytes
=(const uint8_t *)inData
+headerSize
;
410 inEntries
=(const UDataOffsetTOCEntry
*)(inBytes
+4);
414 itemCount
=ds
->readUInt32(*(const uint32_t *)inBytes
);
416 /* no items: count only the item count and return */
420 /* read the last item's offset and preflight it */
421 offset
=ds
->readUInt32(inEntries
[itemCount
-1].dataOffset
);
422 itemLength
=udata_swap(ds
, inBytes
+offset
, -1, NULL
, pErrorCode
);
424 if(U_SUCCESS(*pErrorCode
)) {
425 return headerSize
+offset
+(uint32_t)itemLength
;
430 /* check that the itemCount fits, then the ToC table, then at least the header of the last item */
433 /* itemCount does not fit */
435 itemCount
=0; /* make compilers happy */
437 itemCount
=ds
->readUInt32(*(const uint32_t *)inBytes
);
440 } else if((uint32_t)length
<(4+8*itemCount
)) {
441 /* ToC table does not fit */
444 /* offset of the last item plus at least 20 bytes for its header */
445 offset
=20+ds
->readUInt32(inEntries
[itemCount
-1].dataOffset
);
448 if((uint32_t)length
<offset
) {
449 udata_printError(ds
, "udata_swapPackage(): too few bytes (%d after header) for a .dat package\n",
451 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
455 outBytes
=(uint8_t *)outData
+headerSize
;
457 /* swap the item count */
458 ds
->swapArray32(ds
, inBytes
, 4, outBytes
, pErrorCode
);
461 /* no items: just return now */
465 /* swap the item name strings */
466 offset
=4+8*itemCount
;
467 itemLength
=(int32_t)(ds
->readUInt32(inEntries
[0].dataOffset
)-offset
);
468 udata_swapInvStringBlock(ds
, inBytes
+offset
, itemLength
, outBytes
+offset
, pErrorCode
);
469 if(U_FAILURE(*pErrorCode
)) {
470 udata_printError(ds
, "udata_swapPackage() failed to swap the data item name strings\n");
473 /* keep offset and itemLength in case we allocate and copy the strings below */
475 /* swap the package names into the output charset */
476 if(ds
->outCharset
!=U_CHARSET_FAMILY
) {
478 ds2
=udata_openSwapper(TRUE
, U_CHARSET_FAMILY
, TRUE
, ds
->outCharset
, pErrorCode
);
479 ds2
->swapInvChars(ds2
, inPkgName
, inPkgNameLength
, inPkgName
, pErrorCode
);
480 ds2
->swapInvChars(ds2
, outPkgName
, outPkgNameLength
, outPkgName
, pErrorCode
);
481 udata_closeSwapper(ds2
);
482 if(U_FAILURE(*pErrorCode
)) {
483 udata_printError(ds
, "udata_swapPackage() failed to swap the input/output package names\n");
487 /* change the prefix of each ToC entry name from the old to the new package name */
491 for(i
=0; i
<itemCount
; ++i
) {
492 entryName
=(char *)inBytes
+ds
->readUInt32(inEntries
[i
].nameOffset
);
494 if(0==uprv_memcmp(entryName
, inPkgName
, inPkgNameLength
)) {
495 uprv_memcpy(entryName
, outPkgName
, inPkgNameLength
);
497 udata_printError(ds
, "udata_swapPackage() failed: ToC item %ld does not have the input package name as a prefix\n",
499 *pErrorCode
=U_INVALID_FORMAT_ERROR
;
506 * Allocate the ToC table and, if necessary, a temporary buffer for
507 * pseudo-in-place swapping.
509 * We cannot swap in-place because:
511 * 1. If the swapping of an item fails mid-way, then in-place swapping
512 * has destroyed its data.
513 * Out-of-place swapping allows us to then copy its original data.
515 * 2. If swapping changes the charset family, then we must resort
516 * not only the ToC table but also the data items themselves.
517 * This requires a permutation and is best done with separate in/out
520 * We swapped the strings above to avoid the malloc below if string swapping fails.
522 if(inData
==outData
) {
523 /* +15: prepare for extra padding of a newly-last item */
524 table
=(ToCEntry
*)uprv_malloc(itemCount
*sizeof(ToCEntry
)+length
+DEFAULT_PADDING_LENGTH
);
526 outBytes
=(uint8_t *)(table
+itemCount
);
528 /* copy the item count and the swapped strings */
529 uprv_memcpy(outBytes
, inBytes
, 4);
530 uprv_memcpy(outBytes
+offset
, inBytes
+offset
, itemLength
);
533 table
=(ToCEntry
*)uprv_malloc(itemCount
*sizeof(ToCEntry
));
536 udata_printError(ds
, "udata_swapPackage(): out of memory allocating %d bytes\n",
538 itemCount
*sizeof(ToCEntry
)+length
+DEFAULT_PADDING_LENGTH
:
539 itemCount
*sizeof(ToCEntry
));
540 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
543 outEntries
=(UDataOffsetTOCEntry
*)(outBytes
+4);
545 /* read the ToC table */
546 for(i
=0; i
<itemCount
; ++i
) {
547 table
[i
].nameOffset
=ds
->readUInt32(inEntries
[i
].nameOffset
);
548 table
[i
].inOffset
=ds
->readUInt32(inEntries
[i
].dataOffset
);
550 table
[i
-1].length
=table
[i
].inOffset
-table
[i
-1].inOffset
;
553 table
[itemCount
-1].length
=(uint32_t)length
-table
[itemCount
-1].inOffset
;
555 if(ds
->inCharset
==ds
->outCharset
) {
556 /* no charset swapping, no resorting: keep item offsets the same */
557 for(i
=0; i
<itemCount
; ++i
) {
558 table
[i
].outOffset
=table
[i
].inOffset
;
561 /* charset swapping: resort items by their swapped names */
564 * Before the actual sorting, we need to make sure that each item
565 * has a length that is a multiple of 16 bytes so that all items
567 * Only the old last item may be missing up to 15 padding bytes.
568 * Add padding bytes for it.
569 * Since the icuswap main() function has already allocated enough
570 * input buffer space and set the last 15 bytes there to 0xaa,
571 * we only need to increase the total data length and the length
572 * of the last item here.
574 if((length
&0xf)!=0) {
575 int32_t delta
=16-(length
&0xf);
577 table
[itemCount
-1].length
+=(uint32_t)delta
;
580 /* Save the offset before we sort the TOC. */
581 offset
=table
[0].inOffset
;
582 /* sort the TOC entries */
583 uprv_sortArray(table
, (int32_t)itemCount
, (int32_t)sizeof(ToCEntry
),
584 compareToCEntries
, outBytes
, FALSE
, pErrorCode
);
587 * Note: Before sorting, the inOffset values were in order.
588 * Now the outOffset values are in order.
591 /* assign outOffset values */
592 for(i
=0; i
<itemCount
; ++i
) {
593 table
[i
].outOffset
=offset
;
594 offset
+=table
[i
].length
;
598 /* write the output ToC table */
599 for(i
=0; i
<itemCount
; ++i
) {
600 ds
->writeUInt32(&outEntries
[i
].nameOffset
, table
[i
].nameOffset
);
601 ds
->writeUInt32(&outEntries
[i
].dataOffset
, table
[i
].outOffset
);
604 /* swap each data item */
605 for(i
=0; i
<itemCount
; ++i
) {
606 /* first copy the item bytes to make sure that unreachable bytes are copied */
607 uprv_memcpy(outBytes
+table
[i
].outOffset
, inBytes
+table
[i
].inOffset
, table
[i
].length
);
610 udata_swap(ds
, inBytes
+table
[i
].inOffset
, (int32_t)table
[i
].length
,
611 outBytes
+table
[i
].outOffset
, pErrorCode
);
613 if(U_FAILURE(*pErrorCode
)) {
614 if(ds
->outCharset
==U_CHARSET_FAMILY
) {
615 udata_printError(ds
, "warning: udata_swapPackage() failed to swap item \"%s\"\n"
616 " at inOffset 0x%x length 0x%x - %s\n"
617 " the data item will be copied, not swapped\n\n",
618 (char *)outBytes
+table
[i
].nameOffset
,
619 table
[i
].inOffset
, table
[i
].length
, u_errorName(*pErrorCode
));
621 udata_printError(ds
, "warning: udata_swapPackage() failed to swap an item\n"
622 " at inOffset 0x%x length 0x%x - %s\n"
623 " the data item will be copied, not swapped\n\n",
624 table
[i
].inOffset
, table
[i
].length
, u_errorName(*pErrorCode
));
626 /* reset the error code, copy the data item, and continue */
627 *pErrorCode
=U_ZERO_ERROR
;
628 uprv_memcpy(outBytes
+table
[i
].outOffset
, inBytes
+table
[i
].inOffset
, table
[i
].length
);
632 if(inData
==outData
) {
633 /* copy the data from the temporary buffer to the in-place buffer */
634 uprv_memcpy((uint8_t *)outData
+headerSize
, outBytes
, length
);
638 return headerSize
+length
;
643 * Hey, Emacs, please set the following:
646 * indent-tabs-mode: nil