| 1 | /* |
| 2 | ******************************************************************************* |
| 3 | * |
| 4 | * Copyright (C) 2003-2004, International Business Machines |
| 5 | * Corporation and others. All Rights Reserved. |
| 6 | * |
| 7 | ******************************************************************************* |
| 8 | * file name: icuswap.cpp |
| 9 | * encoding: US-ASCII |
| 10 | * tab size: 8 (not used) |
| 11 | * indentation:4 |
| 12 | * |
| 13 | * created on: 2003aug08 |
| 14 | * created by: Markus W. Scherer |
| 15 | * |
| 16 | * This tool takes an ICU data file and "swaps" it, that is, changes its |
| 17 | * platform properties between big-/little-endianness and ASCII/EBCDIC charset |
| 18 | * families. |
| 19 | * The modified data file is written to a new file. |
| 20 | * Useful as an install-time tool for shipping only one flavor of ICU data |
| 21 | * and preparing data files for the target platform. |
| 22 | * Will not work with data DLLs (shared libraries). |
| 23 | */ |
| 24 | |
| 25 | #include "unicode/utypes.h" |
| 26 | #include "unicode/putil.h" |
| 27 | #include "unicode/udata.h" |
| 28 | #include "cmemory.h" |
| 29 | #include "cstring.h" |
| 30 | #include "uinvchar.h" |
| 31 | #include "uarrsort.h" |
| 32 | #include "ucmndata.h" |
| 33 | #include "udataswp.h" |
| 34 | #include "toolutil.h" |
| 35 | #include "uoptions.h" |
| 36 | |
| 37 | /* swapping implementations in common */ |
| 38 | |
| 39 | #include "uresdata.h" |
| 40 | #include "ucnv_io.h" |
| 41 | #include "uprops.h" |
| 42 | #include "ucase.h" |
| 43 | #include "ucol_swp.h" |
| 44 | #include "ucnv_bld.h" |
| 45 | #include "unormimp.h" |
| 46 | #include "sprpimpl.h" |
| 47 | #include "propname.h" |
| 48 | #include "rbbidata.h" |
| 49 | |
| 50 | #include <stdio.h> |
| 51 | #include <stdlib.h> |
| 52 | #include <string.h> |
| 53 | |
| 54 | /* swapping implementations in i18n */ |
| 55 | |
| 56 | /* definitions */ |
| 57 | |
| 58 | #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) |
| 59 | |
| 60 | static UOption options[]={ |
| 61 | UOPTION_HELP_H, |
| 62 | UOPTION_HELP_QUESTION_MARK, |
| 63 | UOPTION_DEF("type", 't', UOPT_REQUIRES_ARG) |
| 64 | }; |
| 65 | |
| 66 | enum { |
| 67 | OPT_HELP_H, |
| 68 | OPT_HELP_QUESTION_MARK, |
| 69 | OPT_OUT_TYPE |
| 70 | }; |
| 71 | |
| 72 | static int32_t |
| 73 | fileSize(FILE *f) { |
| 74 | int32_t size; |
| 75 | |
| 76 | fseek(f, 0, SEEK_END); |
| 77 | size=(int32_t)ftell(f); |
| 78 | fseek(f, 0, SEEK_SET); |
| 79 | return size; |
| 80 | } |
| 81 | |
| 82 | /** |
| 83 | * Identifies and then transforms the ICU data piece in-place, or determines |
| 84 | * its length. See UDataSwapFn. |
| 85 | * This function handles .dat data packages as well as single data pieces |
| 86 | * and internally dispatches to per-type swap functions. |
| 87 | * Sets a U_UNSUPPORTED_ERROR if the data format is not recognized. |
| 88 | * |
| 89 | * @see UDataSwapFn |
| 90 | * @see udata_openSwapper |
| 91 | * @see udata_openSwapperForInputData |
| 92 | * @draft ICU 2.8 |
| 93 | */ |
| 94 | static int32_t |
| 95 | udata_swap(const UDataSwapper *ds, |
| 96 | const void *inData, int32_t length, void *outData, |
| 97 | UErrorCode *pErrorCode); |
| 98 | |
| 99 | /** |
| 100 | * Swap an ICU .dat package, including swapping of enclosed items. |
| 101 | */ |
| 102 | U_CFUNC int32_t U_CALLCONV |
| 103 | udata_swapPackage(const UDataSwapper *ds, |
| 104 | const void *inData, int32_t length, void *outData, |
| 105 | UErrorCode *pErrorCode); |
| 106 | |
| 107 | /* |
| 108 | * udata_swapPackage() needs to rename ToC name entries from the old package |
| 109 | * name to the new one. |
| 110 | * We store the filenames here, and udata_swapPackage() will extract the |
| 111 | * package names. |
| 112 | */ |
| 113 | static const char *inFilename, *outFilename; |
| 114 | |
| 115 | U_CDECL_BEGIN |
| 116 | static void U_CALLCONV |
| 117 | printError(void *context, const char *fmt, va_list args) { |
| 118 | vfprintf((FILE *)context, fmt, args); |
| 119 | } |
| 120 | U_CDECL_END |
| 121 | |
| 122 | static int |
| 123 | printUsage(const char *pname, UBool ishelp) { |
| 124 | fprintf(stderr, |
| 125 | "%csage: %s [ -h, -?, --help ] -tl|-tb|-te|--type=b|... infilename outfilename\n", |
| 126 | ishelp ? 'U' : 'u', pname); |
| 127 | if(ishelp) { |
| 128 | fprintf(stderr, |
| 129 | "\nOptions: -h, -?, --help print this message and exit\n" |
| 130 | " Read the input file, swap its platform properties according\n" |
| 131 | " to the -t or --type option, and write the result to the output file.\n" |
| 132 | " -tl change to little-endian/ASCII charset family\n" |
| 133 | " -tb change to big-endian/ASCII charset family\n" |
| 134 | " -te change to big-endian/EBCDIC charset family\n"); |
| 135 | } |
| 136 | |
| 137 | return !ishelp; |
| 138 | } |
| 139 | |
| 140 | extern int |
| 141 | main(int argc, char *argv[]) { |
| 142 | FILE *in, *out; |
| 143 | const char *pname; |
| 144 | char *data; |
| 145 | int32_t length; |
| 146 | UBool ishelp; |
| 147 | int rc; |
| 148 | |
| 149 | UDataSwapper *ds; |
| 150 | UErrorCode errorCode; |
| 151 | uint8_t outCharset; |
| 152 | UBool outIsBigEndian; |
| 153 | |
| 154 | U_MAIN_INIT_ARGS(argc, argv); |
| 155 | |
| 156 | /* get the program basename */ |
| 157 | pname=strrchr(argv[0], U_FILE_SEP_CHAR); |
| 158 | if(pname==NULL) { |
| 159 | pname=strrchr(argv[0], '/'); |
| 160 | } |
| 161 | if(pname!=NULL) { |
| 162 | ++pname; |
| 163 | } else { |
| 164 | pname=argv[0]; |
| 165 | } |
| 166 | |
| 167 | argc=u_parseArgs(argc, argv, LENGTHOF(options), options); |
| 168 | ishelp=options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur; |
| 169 | if(ishelp || argc!=3) { |
| 170 | return printUsage(pname, ishelp); |
| 171 | } |
| 172 | |
| 173 | /* parse the output type option */ |
| 174 | data=(char *)options[OPT_OUT_TYPE].value; |
| 175 | if(data[0]==0 || data[1]!=0) { |
| 176 | /* the type must be exactly one letter */ |
| 177 | return printUsage(pname, FALSE); |
| 178 | } |
| 179 | switch(data[0]) { |
| 180 | case 'l': |
| 181 | outIsBigEndian=FALSE; |
| 182 | outCharset=U_ASCII_FAMILY; |
| 183 | break; |
| 184 | case 'b': |
| 185 | outIsBigEndian=TRUE; |
| 186 | outCharset=U_ASCII_FAMILY; |
| 187 | break; |
| 188 | case 'e': |
| 189 | outIsBigEndian=TRUE; |
| 190 | outCharset=U_EBCDIC_FAMILY; |
| 191 | break; |
| 192 | default: |
| 193 | return printUsage(pname, FALSE); |
| 194 | } |
| 195 | |
| 196 | in=out=NULL; |
| 197 | data=NULL; |
| 198 | |
| 199 | /* udata_swapPackage() needs the filenames */ |
| 200 | inFilename=argv[1]; |
| 201 | outFilename=argv[2]; |
| 202 | |
| 203 | /* open the input file, get its length, allocate memory for it, read the file */ |
| 204 | in=fopen(argv[1], "rb"); |
| 205 | if(in==NULL) { |
| 206 | fprintf(stderr, "%s: unable to open input file \"%s\"\n", pname, argv[1]); |
| 207 | rc=2; |
| 208 | goto done; |
| 209 | } |
| 210 | |
| 211 | length=fileSize(in); |
| 212 | if(length<=0) { |
| 213 | fprintf(stderr, "%s: empty input file \"%s\"\n", pname, argv[1]); |
| 214 | rc=2; |
| 215 | goto done; |
| 216 | } |
| 217 | |
| 218 | /* |
| 219 | * +15: udata_swapPackage() may need to add a few padding bytes to the |
| 220 | * last item if charset swapping is done, |
| 221 | * because the last item may be resorted into the middle and then needs |
| 222 | * additional padding bytes |
| 223 | */ |
| 224 | data=(char *)malloc(length+15); |
| 225 | if(data==NULL) { |
| 226 | fprintf(stderr, "%s: error allocating memory for \"%s\"\n", pname, argv[1]); |
| 227 | rc=2; |
| 228 | goto done; |
| 229 | } |
| 230 | |
| 231 | /* set the last 15 bytes to the usual padding byte, see udata_swapPackage() */ |
| 232 | uprv_memset(data+length-15, 0xaa, 15); |
| 233 | |
| 234 | if(length!=(int32_t)fread(data, 1, length, in)) { |
| 235 | fprintf(stderr, "%s: error reading \"%s\"\n", pname, argv[1]); |
| 236 | rc=3; |
| 237 | goto done; |
| 238 | } |
| 239 | |
| 240 | fclose(in); |
| 241 | in=NULL; |
| 242 | |
| 243 | /* swap the data in-place */ |
| 244 | errorCode=U_ZERO_ERROR; |
| 245 | ds=udata_openSwapperForInputData(data, length, outIsBigEndian, outCharset, &errorCode); |
| 246 | if(U_FAILURE(errorCode)) { |
| 247 | fprintf(stderr, "%s: udata_openSwapperForInputData(\"%s\") failed - %s\n", |
| 248 | pname, argv[1], u_errorName(errorCode)); |
| 249 | rc=4; |
| 250 | goto done; |
| 251 | } |
| 252 | |
| 253 | ds->printError=printError; |
| 254 | ds->printErrorContext=stderr; |
| 255 | |
| 256 | length=udata_swap(ds, data, length, data, &errorCode); |
| 257 | udata_closeSwapper(ds); |
| 258 | if(U_FAILURE(errorCode)) { |
| 259 | fprintf(stderr, "%s: udata_swap(\"%s\") failed - %s\n", |
| 260 | pname, argv[1], u_errorName(errorCode)); |
| 261 | rc=4; |
| 262 | goto done; |
| 263 | } |
| 264 | |
| 265 | out=fopen(argv[2], "wb"); |
| 266 | if(out==NULL) { |
| 267 | fprintf(stderr, "%s: unable to open output file \"%s\"\n", pname, argv[2]); |
| 268 | rc=5; |
| 269 | goto done; |
| 270 | } |
| 271 | |
| 272 | if(length!=(int32_t)fwrite(data, 1, length, out)) { |
| 273 | fprintf(stderr, "%s: error writing \"%s\"\n", pname, argv[2]); |
| 274 | rc=6; |
| 275 | goto done; |
| 276 | } |
| 277 | |
| 278 | fclose(out); |
| 279 | out=NULL; |
| 280 | |
| 281 | /* all done */ |
| 282 | rc=0; |
| 283 | |
| 284 | done: |
| 285 | if(in!=NULL) { |
| 286 | fclose(in); |
| 287 | } |
| 288 | if(out!=NULL) { |
| 289 | fclose(out); |
| 290 | } |
| 291 | if(data!=NULL) { |
| 292 | free(data); |
| 293 | } |
| 294 | return rc; |
| 295 | } |
| 296 | |
| 297 | /* swap the data ------------------------------------------------------------ */ |
| 298 | |
| 299 | static const struct { |
| 300 | uint8_t dataFormat[4]; |
| 301 | UDataSwapFn *swapFn; |
| 302 | } swapFns[]={ |
| 303 | { { 0x52, 0x65, 0x73, 0x42 }, ures_swap }, /* dataFormat="ResB" */ |
| 304 | #if !UCONFIG_NO_LEGACY_CONVERSION |
| 305 | { { 0x63, 0x6e, 0x76, 0x74 }, ucnv_swap }, /* dataFormat="cnvt" */ |
| 306 | { { 0x43, 0x76, 0x41, 0x6c }, ucnv_swapAliases }, /* dataFormat="CvAl" */ |
| 307 | #endif |
| 308 | { { 0x43, 0x6d, 0x6e, 0x44 }, udata_swapPackage }, /* dataFormat="CmnD" */ |
| 309 | #if !UCONFIG_NO_IDNA |
| 310 | { { 0x53, 0x50, 0x52, 0x50 }, usprep_swap }, /* dataFormat="SPRP" */ |
| 311 | #endif |
| 312 | /* insert data formats here, descending by expected frequency of occurrence */ |
| 313 | { { 0x55, 0x50, 0x72, 0x6f }, uprops_swap }, /* dataFormat="UPro" */ |
| 314 | |
| 315 | { { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 }, |
| 316 | ucase_swap }, /* dataFormat="cAsE" */ |
| 317 | |
| 318 | #if !UCONFIG_NO_NORMALIZATION |
| 319 | { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap }, /* dataFormat="Norm" */ |
| 320 | #endif |
| 321 | #if !UCONFIG_NO_COLLATION |
| 322 | { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap }, /* dataFormat="UCol" */ |
| 323 | { { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */ |
| 324 | #endif |
| 325 | #if !UCONFIG_NO_BREAK_ITERATION |
| 326 | { { 0x42, 0x72, 0x6b, 0x20 }, ubrk_swap }, /* dataFormat="Brk " */ |
| 327 | #endif |
| 328 | { { 0x70, 0x6e, 0x61, 0x6d }, upname_swap }, /* dataFormat="pnam" */ |
| 329 | { { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames } /* dataFormat="unam" */ |
| 330 | }; |
| 331 | |
| 332 | static int32_t |
| 333 | udata_swap(const UDataSwapper *ds, |
| 334 | const void *inData, int32_t length, void *outData, |
| 335 | UErrorCode *pErrorCode) { |
| 336 | char dataFormatChars[4]; |
| 337 | const UDataInfo *pInfo; |
| 338 | int32_t headerSize, i, swappedLength; |
| 339 | |
| 340 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
| 341 | return 0; |
| 342 | } |
| 343 | |
| 344 | /* |
| 345 | * Preflight the header first; checks for illegal arguments, too. |
| 346 | * Do not swap the header right away because the format-specific swapper |
| 347 | * will swap it, get the headerSize again, and also use the header |
| 348 | * information. Otherwise we would have to pass some of the information |
| 349 | * and not be able to use the UDataSwapFn signature. |
| 350 | */ |
| 351 | headerSize=udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode); |
| 352 | |
| 353 | /* |
| 354 | * If we wanted udata_swap() to also handle non-loadable data like a UTrie, |
| 355 | * then we could check here for further known magic values and structures. |
| 356 | */ |
| 357 | if(U_FAILURE(*pErrorCode)) { |
| 358 | return 0; /* the data format was not recognized */ |
| 359 | } |
| 360 | |
| 361 | pInfo=(const UDataInfo *)((const char *)inData+4); |
| 362 | |
| 363 | { |
| 364 | /* convert the data format from ASCII to Unicode to the system charset */ |
| 365 | UChar u[4]={ |
| 366 | pInfo->dataFormat[0], pInfo->dataFormat[1], |
| 367 | pInfo->dataFormat[2], pInfo->dataFormat[3] |
| 368 | }; |
| 369 | |
| 370 | if(uprv_isInvariantUString(u, 4)) { |
| 371 | u_UCharsToChars(u, dataFormatChars, 4); |
| 372 | } else { |
| 373 | dataFormatChars[0]=dataFormatChars[1]=dataFormatChars[2]=dataFormatChars[3]='?'; |
| 374 | } |
| 375 | } |
| 376 | |
| 377 | /* dispatch to the swap function for the dataFormat */ |
| 378 | for(i=0; i<LENGTHOF(swapFns); ++i) { |
| 379 | if(0==memcmp(swapFns[i].dataFormat, pInfo->dataFormat, 4)) { |
| 380 | swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode); |
| 381 | |
| 382 | if(U_FAILURE(*pErrorCode)) { |
| 383 | udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n", |
| 384 | pInfo->dataFormat[0], pInfo->dataFormat[1], |
| 385 | pInfo->dataFormat[2], pInfo->dataFormat[3], |
| 386 | dataFormatChars[0], dataFormatChars[1], |
| 387 | dataFormatChars[2], dataFormatChars[3], |
| 388 | u_errorName(*pErrorCode)); |
| 389 | } else if(swappedLength<(length-15)) { |
| 390 | /* swapped less than expected */ |
| 391 | udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n", |
| 392 | swappedLength, length, |
| 393 | pInfo->dataFormat[0], pInfo->dataFormat[1], |
| 394 | pInfo->dataFormat[2], pInfo->dataFormat[3], |
| 395 | dataFormatChars[0], dataFormatChars[1], |
| 396 | dataFormatChars[2], dataFormatChars[3], |
| 397 | u_errorName(*pErrorCode)); |
| 398 | } |
| 399 | |
| 400 | return swappedLength; |
| 401 | } |
| 402 | } |
| 403 | |
| 404 | /* the dataFormat was not recognized */ |
| 405 | udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n", |
| 406 | pInfo->dataFormat[0], pInfo->dataFormat[1], |
| 407 | pInfo->dataFormat[2], pInfo->dataFormat[3], |
| 408 | dataFormatChars[0], dataFormatChars[1], |
| 409 | dataFormatChars[2], dataFormatChars[3]); |
| 410 | |
| 411 | *pErrorCode=U_UNSUPPORTED_ERROR; |
| 412 | return 0; |
| 413 | } |
| 414 | |
| 415 | /* swap .dat package files -------------------------------------------------- */ |
| 416 | |
| 417 | static int32_t |
| 418 | extractPackageName(const UDataSwapper *ds, const char *filename, |
| 419 | char pkg[], int32_t capacity, |
| 420 | UErrorCode *pErrorCode) { |
| 421 | const char *basename; |
| 422 | int32_t len; |
| 423 | |
| 424 | if(U_FAILURE(*pErrorCode)) { |
| 425 | return 0; |
| 426 | } |
| 427 | |
| 428 | basename=findBasename(filename); |
| 429 | len=(int32_t)uprv_strlen(basename)-4; /* -4: subtract the length of ".dat" */ |
| 430 | |
| 431 | if(len<=0 || 0!=uprv_strcmp(basename+len, ".dat")) { |
| 432 | udata_printError(ds, "udata_swapPackage(): \"%s\" is not recognized as a package filename (must end with .dat)\n", |
| 433 | basename); |
| 434 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 435 | return 0; |
| 436 | } |
| 437 | |
| 438 | if(len>=capacity) { |
| 439 | udata_printError(ds, "udata_swapPackage(): the package name \"%s\" is too long (>=%ld)\n", |
| 440 | (long)capacity); |
| 441 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 442 | return 0; |
| 443 | } |
| 444 | |
| 445 | uprv_memcpy(pkg, basename, len); |
| 446 | pkg[len]=0; |
| 447 | return len; |
| 448 | } |
| 449 | |
| 450 | struct ToCEntry { |
| 451 | uint32_t nameOffset, inOffset, outOffset, length; |
| 452 | }; |
| 453 | |
| 454 | U_CDECL_BEGIN |
| 455 | static int32_t U_CALLCONV |
| 456 | compareToCEntries(const void *context, const void *left, const void *right) { |
| 457 | const char *chars=(const char *)context; |
| 458 | return (int32_t)uprv_strcmp(chars+((const ToCEntry *)left)->nameOffset, |
| 459 | chars+((const ToCEntry *)right)->nameOffset); |
| 460 | } |
| 461 | U_CDECL_END |
| 462 | |
| 463 | U_CFUNC int32_t U_CALLCONV |
| 464 | udata_swapPackage(const UDataSwapper *ds, |
| 465 | const void *inData, int32_t length, void *outData, |
| 466 | UErrorCode *pErrorCode) { |
| 467 | const UDataInfo *pInfo; |
| 468 | int32_t headerSize; |
| 469 | |
| 470 | const uint8_t *inBytes; |
| 471 | uint8_t *outBytes; |
| 472 | |
| 473 | uint32_t itemCount, offset, i; |
| 474 | int32_t itemLength; |
| 475 | |
| 476 | const UDataOffsetTOCEntry *inEntries; |
| 477 | UDataOffsetTOCEntry *outEntries; |
| 478 | |
| 479 | ToCEntry *table; |
| 480 | |
| 481 | char inPkgName[32], outPkgName[32]; |
| 482 | int32_t inPkgNameLength, outPkgNameLength; |
| 483 | |
| 484 | /* udata_swapDataHeader checks the arguments */ |
| 485 | headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); |
| 486 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
| 487 | return 0; |
| 488 | } |
| 489 | |
| 490 | /* check data format and format version */ |
| 491 | pInfo=(const UDataInfo *)((const char *)inData+4); |
| 492 | if(!( |
| 493 | pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ |
| 494 | pInfo->dataFormat[1]==0x6d && |
| 495 | pInfo->dataFormat[2]==0x6e && |
| 496 | pInfo->dataFormat[3]==0x44 && |
| 497 | pInfo->formatVersion[0]==1 |
| 498 | )) { |
| 499 | udata_printError(ds, "udata_swapPackage(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n", |
| 500 | pInfo->dataFormat[0], pInfo->dataFormat[1], |
| 501 | pInfo->dataFormat[2], pInfo->dataFormat[3], |
| 502 | pInfo->formatVersion[0]); |
| 503 | *pErrorCode=U_UNSUPPORTED_ERROR; |
| 504 | return 0; |
| 505 | } |
| 506 | |
| 507 | /* |
| 508 | * We need to change the ToC name entries so that they have the correct |
| 509 | * package name prefix. |
| 510 | * Extract the package names from the in/out filenames. |
| 511 | */ |
| 512 | inPkgNameLength=extractPackageName( |
| 513 | ds, inFilename, |
| 514 | inPkgName, (int32_t)sizeof(inPkgName), |
| 515 | pErrorCode); |
| 516 | outPkgNameLength=extractPackageName( |
| 517 | ds, outFilename, |
| 518 | outPkgName, (int32_t)sizeof(outPkgName), |
| 519 | pErrorCode); |
| 520 | if(U_FAILURE(*pErrorCode)) { |
| 521 | return 0; |
| 522 | } |
| 523 | |
| 524 | /* |
| 525 | * It is possible to work with inPkgNameLength!=outPkgNameLength, |
| 526 | * but then the length of the data file would change more significantly, |
| 527 | * which we are not currently prepared for. |
| 528 | */ |
| 529 | if(inPkgNameLength!=outPkgNameLength) { |
| 530 | udata_printError(ds, "udata_swapPackage(): the package names \"%s\" and \"%s\" must have the same length\n", |
| 531 | inPkgName, outPkgName); |
| 532 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 533 | return 0; |
| 534 | } |
| 535 | |
| 536 | inBytes=(const uint8_t *)inData+headerSize; |
| 537 | inEntries=(const UDataOffsetTOCEntry *)(inBytes+4); |
| 538 | |
| 539 | if(length<0) { |
| 540 | /* preflighting */ |
| 541 | itemCount=ds->readUInt32(*(const uint32_t *)inBytes); |
| 542 | if(itemCount==0) { |
| 543 | /* no items: count only the item count and return */ |
| 544 | return headerSize+4; |
| 545 | } |
| 546 | |
| 547 | /* read the last item's offset and preflight it */ |
| 548 | offset=ds->readUInt32(inEntries[itemCount-1].dataOffset); |
| 549 | itemLength=udata_swap(ds, inBytes+offset, -1, NULL, pErrorCode); |
| 550 | |
| 551 | if(U_SUCCESS(*pErrorCode)) { |
| 552 | return headerSize+offset+(uint32_t)itemLength; |
| 553 | } else { |
| 554 | return 0; |
| 555 | } |
| 556 | } else { |
| 557 | /* check that the itemCount fits, then the ToC table, then at least the header of the last item */ |
| 558 | length-=headerSize; |
| 559 | if(length<4) { |
| 560 | /* itemCount does not fit */ |
| 561 | offset=0xffffffff; |
| 562 | itemCount=0; /* make compilers happy */ |
| 563 | } else { |
| 564 | itemCount=ds->readUInt32(*(const uint32_t *)inBytes); |
| 565 | if(itemCount==0) { |
| 566 | offset=4; |
| 567 | } else if((uint32_t)length<(4+8*itemCount)) { |
| 568 | /* ToC table does not fit */ |
| 569 | offset=0xffffffff; |
| 570 | } else { |
| 571 | /* offset of the last item plus at least 20 bytes for its header */ |
| 572 | offset=20+ds->readUInt32(inEntries[itemCount-1].dataOffset); |
| 573 | } |
| 574 | } |
| 575 | if((uint32_t)length<offset) { |
| 576 | udata_printError(ds, "udata_swapPackage(): too few bytes (%d after header) for unames.icu\n", |
| 577 | length); |
| 578 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
| 579 | return 0; |
| 580 | } |
| 581 | |
| 582 | outBytes=(uint8_t *)outData+headerSize; |
| 583 | |
| 584 | /* swap the item count */ |
| 585 | ds->swapArray32(ds, inBytes, 4, outBytes, pErrorCode); |
| 586 | |
| 587 | if(itemCount==0) { |
| 588 | /* no items: just return now */ |
| 589 | return headerSize+4; |
| 590 | } |
| 591 | |
| 592 | /* swap the item name strings */ |
| 593 | offset=4+8*itemCount; |
| 594 | itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset)-offset); |
| 595 | udata_swapInvStringBlock(ds, inBytes+offset, itemLength, outBytes+offset, pErrorCode); |
| 596 | if(U_FAILURE(*pErrorCode)) { |
| 597 | udata_printError(ds, "udata_swapPackage() failed to swap the data item name strings\n"); |
| 598 | return 0; |
| 599 | } |
| 600 | /* keep offset and itemLength in case we allocate and copy the strings below */ |
| 601 | |
| 602 | /* swap the package names into the output charset */ |
| 603 | if(ds->outCharset!=U_CHARSET_FAMILY) { |
| 604 | UDataSwapper *ds2; |
| 605 | ds2=udata_openSwapper(TRUE, U_CHARSET_FAMILY, TRUE, ds->outCharset, pErrorCode); |
| 606 | ds2->swapInvChars(ds2, inPkgName, inPkgNameLength, inPkgName, pErrorCode); |
| 607 | ds2->swapInvChars(ds2, outPkgName, outPkgNameLength, outPkgName, pErrorCode); |
| 608 | udata_closeSwapper(ds2); |
| 609 | if(U_FAILURE(*pErrorCode)) { |
| 610 | udata_printError(ds, "udata_swapPackage() failed to swap the input/output package names\n"); |
| 611 | } |
| 612 | } |
| 613 | |
| 614 | /* change the prefix of each ToC entry name from the old to the new package name */ |
| 615 | { |
| 616 | char *entryName; |
| 617 | |
| 618 | for(i=0; i<itemCount; ++i) { |
| 619 | entryName=(char *)inBytes+ds->readUInt32(inEntries[i].nameOffset); |
| 620 | |
| 621 | if(0==uprv_memcmp(entryName, inPkgName, inPkgNameLength)) { |
| 622 | uprv_memcpy(entryName, outPkgName, inPkgNameLength); |
| 623 | } else { |
| 624 | udata_printError(ds, "udata_swapPackage() failed: ToC item %ld does not have the input package name as a prefix\n", |
| 625 | (long)i); |
| 626 | *pErrorCode=U_INVALID_FORMAT_ERROR; |
| 627 | return 0; |
| 628 | } |
| 629 | } |
| 630 | } |
| 631 | |
| 632 | /* |
| 633 | * Allocate the ToC table and, if necessary, a temporary buffer for |
| 634 | * pseudo-in-place swapping. |
| 635 | * |
| 636 | * We cannot swap in-place because: |
| 637 | * |
| 638 | * 1. If the swapping of an item fails mid-way, then in-place swapping |
| 639 | * has destroyed its data. |
| 640 | * Out-of-place swapping allows us to then copy its original data. |
| 641 | * |
| 642 | * 2. If swapping changes the charset family, then we must resort |
| 643 | * not only the ToC table but also the data items themselves. |
| 644 | * This requires a permutation and is best done with separate in/out |
| 645 | * buffers. |
| 646 | * |
| 647 | * We swapped the strings above to avoid the malloc below if string swapping fails. |
| 648 | */ |
| 649 | if(inData==outData) { |
| 650 | /* +15: prepare for extra padding of a newly-last item */ |
| 651 | table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry)+length+15); |
| 652 | if(table!=NULL) { |
| 653 | outBytes=(uint8_t *)(table+itemCount); |
| 654 | |
| 655 | /* copy the item count and the swapped strings */ |
| 656 | uprv_memcpy(outBytes, inBytes, 4); |
| 657 | uprv_memcpy(outBytes+offset, inBytes+offset, itemLength); |
| 658 | } |
| 659 | } else { |
| 660 | table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry)); |
| 661 | } |
| 662 | if(table==NULL) { |
| 663 | udata_printError(ds, "udata_swapPackage(): out of memory allocating %d bytes\n", |
| 664 | inData==outData ? |
| 665 | itemCount*sizeof(ToCEntry)+length+15 : |
| 666 | itemCount*sizeof(ToCEntry)); |
| 667 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| 668 | return 0; |
| 669 | } |
| 670 | outEntries=(UDataOffsetTOCEntry *)(outBytes+4); |
| 671 | |
| 672 | /* read the ToC table */ |
| 673 | for(i=0; i<itemCount; ++i) { |
| 674 | table[i].nameOffset=ds->readUInt32(inEntries[i].nameOffset); |
| 675 | table[i].inOffset=ds->readUInt32(inEntries[i].dataOffset); |
| 676 | if(i>0) { |
| 677 | table[i-1].length=table[i].inOffset-table[i-1].inOffset; |
| 678 | } |
| 679 | } |
| 680 | table[itemCount-1].length=(uint32_t)length-table[itemCount-1].inOffset; |
| 681 | |
| 682 | if(ds->inCharset==ds->outCharset) { |
| 683 | /* no charset swapping, no resorting: keep item offsets the same */ |
| 684 | for(i=0; i<itemCount; ++i) { |
| 685 | table[i].outOffset=table[i].inOffset; |
| 686 | } |
| 687 | } else { |
| 688 | /* charset swapping: resort items by their swapped names */ |
| 689 | |
| 690 | /* |
| 691 | * Before the actual sorting, we need to make sure that each item |
| 692 | * has a length that is a multiple of 16 bytes so that all items |
| 693 | * are 16-aligned. |
| 694 | * Only the old last item may be missing up to 15 padding bytes. |
| 695 | * Add padding bytes for it. |
| 696 | * Since the icuswap main() function has already allocated enough |
| 697 | * input buffer space and set the last 15 bytes there to 0xaa, |
| 698 | * we only need to increase the total data length and the length |
| 699 | * of the last item here. |
| 700 | */ |
| 701 | if((length&0xf)!=0) { |
| 702 | int32_t delta=16-(length&0xf); |
| 703 | length+=delta; |
| 704 | table[itemCount-1].length+=(uint32_t)delta; |
| 705 | } |
| 706 | |
| 707 | uprv_sortArray(table, (int32_t)itemCount, (int32_t)sizeof(ToCEntry), |
| 708 | compareToCEntries, outBytes, FALSE, pErrorCode); |
| 709 | |
| 710 | /* |
| 711 | * Note: Before sorting, the inOffset values were in order. |
| 712 | * Now the outOffset values are in order. |
| 713 | */ |
| 714 | |
| 715 | /* assign outOffset values */ |
| 716 | offset=table[0].inOffset; |
| 717 | for(i=0; i<itemCount; ++i) { |
| 718 | table[i].outOffset=offset; |
| 719 | offset+=table[i].length; |
| 720 | } |
| 721 | } |
| 722 | |
| 723 | /* write the output ToC table */ |
| 724 | for(i=0; i<itemCount; ++i) { |
| 725 | ds->writeUInt32(&outEntries[i].nameOffset, table[i].nameOffset); |
| 726 | ds->writeUInt32(&outEntries[i].dataOffset, table[i].outOffset); |
| 727 | } |
| 728 | |
| 729 | /* swap each data item */ |
| 730 | for(i=0; i<itemCount; ++i) { |
| 731 | /* first copy the item bytes to make sure that unreachable bytes are copied */ |
| 732 | uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length); |
| 733 | |
| 734 | /* swap the item */ |
| 735 | udata_swap(ds, inBytes+table[i].inOffset, (int32_t)table[i].length, |
| 736 | outBytes+table[i].outOffset, pErrorCode); |
| 737 | |
| 738 | if(U_FAILURE(*pErrorCode)) { |
| 739 | if(ds->outCharset==U_CHARSET_FAMILY) { |
| 740 | udata_printError(ds, "warning: udata_swapPackage() failed to swap item \"%s\"\n" |
| 741 | " at inOffset 0x%x length 0x%x - %s\n" |
| 742 | " the data item will be copied, not swapped\n\n", |
| 743 | (char *)outBytes+table[i].nameOffset, |
| 744 | table[i].inOffset, table[i].length, u_errorName(*pErrorCode)); |
| 745 | } else { |
| 746 | udata_printError(ds, "warning: udata_swapPackage() failed to swap an item\n" |
| 747 | " at inOffset 0x%x length 0x%x - %s\n" |
| 748 | " the data item will be copied, not swapped\n\n", |
| 749 | table[i].inOffset, table[i].length, u_errorName(*pErrorCode)); |
| 750 | } |
| 751 | /* reset the error code, copy the data item, and continue */ |
| 752 | *pErrorCode=U_ZERO_ERROR; |
| 753 | uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length); |
| 754 | } |
| 755 | } |
| 756 | |
| 757 | if(inData==outData) { |
| 758 | /* copy the data from the temporary buffer to the in-place buffer */ |
| 759 | uprv_memcpy((uint8_t *)outData+headerSize, outBytes, length); |
| 760 | } |
| 761 | uprv_free(table); |
| 762 | |
| 763 | return headerSize+length; |
| 764 | } |
| 765 | } |
| 766 | |
| 767 | /* |
| 768 | * Hey, Emacs, please set the following: |
| 769 | * |
| 770 | * Local Variables: |
| 771 | * indent-tabs-mode: nil |
| 772 | * End: |
| 773 | * |
| 774 | */ |