]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | ******************************************************************************* | |
3 | * | |
4 | * Copyright (C) 2003-2004, International Business Machines | |
5 | * Corporation and others. All Rights Reserved. | |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: icuswap.cpp | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2003aug08 | |
14 | * created by: Markus W. Scherer | |
15 | * | |
16 | * This tool takes an ICU data file and "swaps" it, that is, changes its | |
17 | * platform properties between big-/little-endianness and ASCII/EBCDIC charset | |
18 | * families. | |
19 | * The modified data file is written to a new file. | |
20 | * Useful as an install-time tool for shipping only one flavor of ICU data | |
21 | * and preparing data files for the target platform. | |
22 | * Will not work with data DLLs (shared libraries). | |
23 | */ | |
24 | ||
25 | #include "unicode/utypes.h" | |
26 | #include "unicode/putil.h" | |
27 | #include "unicode/udata.h" | |
28 | #include "cmemory.h" | |
29 | #include "cstring.h" | |
30 | #include "uinvchar.h" | |
31 | #include "uarrsort.h" | |
32 | #include "ucmndata.h" | |
33 | #include "udataswp.h" | |
34 | #include "toolutil.h" | |
35 | #include "uoptions.h" | |
36 | ||
37 | /* swapping implementations in common */ | |
38 | ||
39 | #include "uresdata.h" | |
40 | #include "ucnv_io.h" | |
41 | #include "uprops.h" | |
42 | #include "ucase.h" | |
43 | #include "ucol_swp.h" | |
44 | #include "ucnv_bld.h" | |
45 | #include "unormimp.h" | |
46 | #include "sprpimpl.h" | |
47 | #include "propname.h" | |
48 | #include "rbbidata.h" | |
49 | ||
50 | #include <stdio.h> | |
51 | #include <stdlib.h> | |
52 | #include <string.h> | |
53 | ||
54 | /* swapping implementations in i18n */ | |
55 | ||
56 | /* definitions */ | |
57 | ||
58 | #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) | |
59 | ||
60 | static UOption options[]={ | |
61 | UOPTION_HELP_H, | |
62 | UOPTION_HELP_QUESTION_MARK, | |
63 | UOPTION_DEF("type", 't', UOPT_REQUIRES_ARG) | |
64 | }; | |
65 | ||
66 | enum { | |
67 | OPT_HELP_H, | |
68 | OPT_HELP_QUESTION_MARK, | |
69 | OPT_OUT_TYPE | |
70 | }; | |
71 | ||
72 | static int32_t | |
73 | fileSize(FILE *f) { | |
74 | int32_t size; | |
75 | ||
76 | fseek(f, 0, SEEK_END); | |
77 | size=(int32_t)ftell(f); | |
78 | fseek(f, 0, SEEK_SET); | |
79 | return size; | |
80 | } | |
81 | ||
82 | /** | |
83 | * Identifies and then transforms the ICU data piece in-place, or determines | |
84 | * its length. See UDataSwapFn. | |
85 | * This function handles .dat data packages as well as single data pieces | |
86 | * and internally dispatches to per-type swap functions. | |
87 | * Sets a U_UNSUPPORTED_ERROR if the data format is not recognized. | |
88 | * | |
89 | * @see UDataSwapFn | |
90 | * @see udata_openSwapper | |
91 | * @see udata_openSwapperForInputData | |
92 | * @draft ICU 2.8 | |
93 | */ | |
94 | static int32_t | |
95 | udata_swap(const UDataSwapper *ds, | |
96 | const void *inData, int32_t length, void *outData, | |
97 | UErrorCode *pErrorCode); | |
98 | ||
99 | /** | |
100 | * Swap an ICU .dat package, including swapping of enclosed items. | |
101 | */ | |
102 | U_CFUNC int32_t U_CALLCONV | |
103 | udata_swapPackage(const UDataSwapper *ds, | |
104 | const void *inData, int32_t length, void *outData, | |
105 | UErrorCode *pErrorCode); | |
106 | ||
107 | /* | |
108 | * udata_swapPackage() needs to rename ToC name entries from the old package | |
109 | * name to the new one. | |
110 | * We store the filenames here, and udata_swapPackage() will extract the | |
111 | * package names. | |
112 | */ | |
113 | static const char *inFilename, *outFilename; | |
114 | ||
115 | U_CDECL_BEGIN | |
116 | static void U_CALLCONV | |
117 | printError(void *context, const char *fmt, va_list args) { | |
118 | vfprintf((FILE *)context, fmt, args); | |
119 | } | |
120 | U_CDECL_END | |
121 | ||
122 | static int | |
123 | printUsage(const char *pname, UBool ishelp) { | |
124 | fprintf(stderr, | |
125 | "%csage: %s [ -h, -?, --help ] -tl|-tb|-te|--type=b|... infilename outfilename\n", | |
126 | ishelp ? 'U' : 'u', pname); | |
127 | if(ishelp) { | |
128 | fprintf(stderr, | |
129 | "\nOptions: -h, -?, --help print this message and exit\n" | |
130 | " Read the input file, swap its platform properties according\n" | |
131 | " to the -t or --type option, and write the result to the output file.\n" | |
132 | " -tl change to little-endian/ASCII charset family\n" | |
133 | " -tb change to big-endian/ASCII charset family\n" | |
134 | " -te change to big-endian/EBCDIC charset family\n"); | |
135 | } | |
136 | ||
137 | return !ishelp; | |
138 | } | |
139 | ||
140 | extern int | |
141 | main(int argc, char *argv[]) { | |
142 | FILE *in, *out; | |
143 | const char *pname; | |
144 | char *data; | |
145 | int32_t length; | |
146 | UBool ishelp; | |
147 | int rc; | |
148 | ||
149 | UDataSwapper *ds; | |
150 | UErrorCode errorCode; | |
151 | uint8_t outCharset; | |
152 | UBool outIsBigEndian; | |
153 | ||
154 | U_MAIN_INIT_ARGS(argc, argv); | |
155 | ||
156 | /* get the program basename */ | |
157 | pname=strrchr(argv[0], U_FILE_SEP_CHAR); | |
158 | if(pname==NULL) { | |
159 | pname=strrchr(argv[0], '/'); | |
160 | } | |
161 | if(pname!=NULL) { | |
162 | ++pname; | |
163 | } else { | |
164 | pname=argv[0]; | |
165 | } | |
166 | ||
167 | argc=u_parseArgs(argc, argv, LENGTHOF(options), options); | |
168 | ishelp=options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur; | |
169 | if(ishelp || argc!=3) { | |
170 | return printUsage(pname, ishelp); | |
171 | } | |
172 | ||
173 | /* parse the output type option */ | |
174 | data=(char *)options[OPT_OUT_TYPE].value; | |
175 | if(data[0]==0 || data[1]!=0) { | |
176 | /* the type must be exactly one letter */ | |
177 | return printUsage(pname, FALSE); | |
178 | } | |
179 | switch(data[0]) { | |
180 | case 'l': | |
181 | outIsBigEndian=FALSE; | |
182 | outCharset=U_ASCII_FAMILY; | |
183 | break; | |
184 | case 'b': | |
185 | outIsBigEndian=TRUE; | |
186 | outCharset=U_ASCII_FAMILY; | |
187 | break; | |
188 | case 'e': | |
189 | outIsBigEndian=TRUE; | |
190 | outCharset=U_EBCDIC_FAMILY; | |
191 | break; | |
192 | default: | |
193 | return printUsage(pname, FALSE); | |
194 | } | |
195 | ||
196 | in=out=NULL; | |
197 | data=NULL; | |
198 | ||
199 | /* udata_swapPackage() needs the filenames */ | |
200 | inFilename=argv[1]; | |
201 | outFilename=argv[2]; | |
202 | ||
203 | /* open the input file, get its length, allocate memory for it, read the file */ | |
204 | in=fopen(argv[1], "rb"); | |
205 | if(in==NULL) { | |
206 | fprintf(stderr, "%s: unable to open input file \"%s\"\n", pname, argv[1]); | |
207 | rc=2; | |
208 | goto done; | |
209 | } | |
210 | ||
211 | length=fileSize(in); | |
212 | if(length<=0) { | |
213 | fprintf(stderr, "%s: empty input file \"%s\"\n", pname, argv[1]); | |
214 | rc=2; | |
215 | goto done; | |
216 | } | |
217 | ||
218 | /* | |
219 | * +15: udata_swapPackage() may need to add a few padding bytes to the | |
220 | * last item if charset swapping is done, | |
221 | * because the last item may be resorted into the middle and then needs | |
222 | * additional padding bytes | |
223 | */ | |
224 | data=(char *)malloc(length+15); | |
225 | if(data==NULL) { | |
226 | fprintf(stderr, "%s: error allocating memory for \"%s\"\n", pname, argv[1]); | |
227 | rc=2; | |
228 | goto done; | |
229 | } | |
230 | ||
231 | /* set the last 15 bytes to the usual padding byte, see udata_swapPackage() */ | |
232 | uprv_memset(data+length-15, 0xaa, 15); | |
233 | ||
234 | if(length!=(int32_t)fread(data, 1, length, in)) { | |
235 | fprintf(stderr, "%s: error reading \"%s\"\n", pname, argv[1]); | |
236 | rc=3; | |
237 | goto done; | |
238 | } | |
239 | ||
240 | fclose(in); | |
241 | in=NULL; | |
242 | ||
243 | /* swap the data in-place */ | |
244 | errorCode=U_ZERO_ERROR; | |
245 | ds=udata_openSwapperForInputData(data, length, outIsBigEndian, outCharset, &errorCode); | |
246 | if(U_FAILURE(errorCode)) { | |
247 | fprintf(stderr, "%s: udata_openSwapperForInputData(\"%s\") failed - %s\n", | |
248 | pname, argv[1], u_errorName(errorCode)); | |
249 | rc=4; | |
250 | goto done; | |
251 | } | |
252 | ||
253 | ds->printError=printError; | |
254 | ds->printErrorContext=stderr; | |
255 | ||
256 | length=udata_swap(ds, data, length, data, &errorCode); | |
257 | udata_closeSwapper(ds); | |
258 | if(U_FAILURE(errorCode)) { | |
259 | fprintf(stderr, "%s: udata_swap(\"%s\") failed - %s\n", | |
260 | pname, argv[1], u_errorName(errorCode)); | |
261 | rc=4; | |
262 | goto done; | |
263 | } | |
264 | ||
265 | out=fopen(argv[2], "wb"); | |
266 | if(out==NULL) { | |
267 | fprintf(stderr, "%s: unable to open output file \"%s\"\n", pname, argv[2]); | |
268 | rc=5; | |
269 | goto done; | |
270 | } | |
271 | ||
272 | if(length!=(int32_t)fwrite(data, 1, length, out)) { | |
273 | fprintf(stderr, "%s: error writing \"%s\"\n", pname, argv[2]); | |
274 | rc=6; | |
275 | goto done; | |
276 | } | |
277 | ||
278 | fclose(out); | |
279 | out=NULL; | |
280 | ||
281 | /* all done */ | |
282 | rc=0; | |
283 | ||
284 | done: | |
285 | if(in!=NULL) { | |
286 | fclose(in); | |
287 | } | |
288 | if(out!=NULL) { | |
289 | fclose(out); | |
290 | } | |
291 | if(data!=NULL) { | |
292 | free(data); | |
293 | } | |
294 | return rc; | |
295 | } | |
296 | ||
297 | /* swap the data ------------------------------------------------------------ */ | |
298 | ||
299 | static const struct { | |
300 | uint8_t dataFormat[4]; | |
301 | UDataSwapFn *swapFn; | |
302 | } swapFns[]={ | |
303 | { { 0x52, 0x65, 0x73, 0x42 }, ures_swap }, /* dataFormat="ResB" */ | |
304 | #if !UCONFIG_NO_LEGACY_CONVERSION | |
305 | { { 0x63, 0x6e, 0x76, 0x74 }, ucnv_swap }, /* dataFormat="cnvt" */ | |
306 | { { 0x43, 0x76, 0x41, 0x6c }, ucnv_swapAliases }, /* dataFormat="CvAl" */ | |
307 | #endif | |
308 | { { 0x43, 0x6d, 0x6e, 0x44 }, udata_swapPackage }, /* dataFormat="CmnD" */ | |
309 | #if !UCONFIG_NO_IDNA | |
310 | { { 0x53, 0x50, 0x52, 0x50 }, usprep_swap }, /* dataFormat="SPRP" */ | |
311 | #endif | |
312 | /* insert data formats here, descending by expected frequency of occurrence */ | |
313 | { { 0x55, 0x50, 0x72, 0x6f }, uprops_swap }, /* dataFormat="UPro" */ | |
314 | ||
315 | { { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 }, | |
316 | ucase_swap }, /* dataFormat="cAsE" */ | |
317 | ||
318 | #if !UCONFIG_NO_NORMALIZATION | |
319 | { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap }, /* dataFormat="Norm" */ | |
320 | #endif | |
321 | #if !UCONFIG_NO_COLLATION | |
322 | { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap }, /* dataFormat="UCol" */ | |
323 | { { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */ | |
324 | #endif | |
325 | #if !UCONFIG_NO_BREAK_ITERATION | |
326 | { { 0x42, 0x72, 0x6b, 0x20 }, ubrk_swap }, /* dataFormat="Brk " */ | |
327 | #endif | |
328 | { { 0x70, 0x6e, 0x61, 0x6d }, upname_swap }, /* dataFormat="pnam" */ | |
329 | { { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames } /* dataFormat="unam" */ | |
330 | }; | |
331 | ||
332 | static int32_t | |
333 | udata_swap(const UDataSwapper *ds, | |
334 | const void *inData, int32_t length, void *outData, | |
335 | UErrorCode *pErrorCode) { | |
336 | char dataFormatChars[4]; | |
337 | const UDataInfo *pInfo; | |
338 | int32_t headerSize, i, swappedLength; | |
339 | ||
340 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
341 | return 0; | |
342 | } | |
343 | ||
344 | /* | |
345 | * Preflight the header first; checks for illegal arguments, too. | |
346 | * Do not swap the header right away because the format-specific swapper | |
347 | * will swap it, get the headerSize again, and also use the header | |
348 | * information. Otherwise we would have to pass some of the information | |
349 | * and not be able to use the UDataSwapFn signature. | |
350 | */ | |
351 | headerSize=udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode); | |
352 | ||
353 | /* | |
354 | * If we wanted udata_swap() to also handle non-loadable data like a UTrie, | |
355 | * then we could check here for further known magic values and structures. | |
356 | */ | |
357 | if(U_FAILURE(*pErrorCode)) { | |
358 | return 0; /* the data format was not recognized */ | |
359 | } | |
360 | ||
361 | pInfo=(const UDataInfo *)((const char *)inData+4); | |
362 | ||
363 | { | |
364 | /* convert the data format from ASCII to Unicode to the system charset */ | |
365 | UChar u[4]={ | |
366 | pInfo->dataFormat[0], pInfo->dataFormat[1], | |
367 | pInfo->dataFormat[2], pInfo->dataFormat[3] | |
368 | }; | |
369 | ||
370 | if(uprv_isInvariantUString(u, 4)) { | |
371 | u_UCharsToChars(u, dataFormatChars, 4); | |
372 | } else { | |
373 | dataFormatChars[0]=dataFormatChars[1]=dataFormatChars[2]=dataFormatChars[3]='?'; | |
374 | } | |
375 | } | |
376 | ||
377 | /* dispatch to the swap function for the dataFormat */ | |
378 | for(i=0; i<LENGTHOF(swapFns); ++i) { | |
379 | if(0==memcmp(swapFns[i].dataFormat, pInfo->dataFormat, 4)) { | |
380 | swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode); | |
381 | ||
382 | if(U_FAILURE(*pErrorCode)) { | |
383 | udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n", | |
384 | pInfo->dataFormat[0], pInfo->dataFormat[1], | |
385 | pInfo->dataFormat[2], pInfo->dataFormat[3], | |
386 | dataFormatChars[0], dataFormatChars[1], | |
387 | dataFormatChars[2], dataFormatChars[3], | |
388 | u_errorName(*pErrorCode)); | |
389 | } else if(swappedLength<(length-15)) { | |
390 | /* swapped less than expected */ | |
391 | udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n", | |
392 | swappedLength, length, | |
393 | pInfo->dataFormat[0], pInfo->dataFormat[1], | |
394 | pInfo->dataFormat[2], pInfo->dataFormat[3], | |
395 | dataFormatChars[0], dataFormatChars[1], | |
396 | dataFormatChars[2], dataFormatChars[3], | |
397 | u_errorName(*pErrorCode)); | |
398 | } | |
399 | ||
400 | return swappedLength; | |
401 | } | |
402 | } | |
403 | ||
404 | /* the dataFormat was not recognized */ | |
405 | udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n", | |
406 | pInfo->dataFormat[0], pInfo->dataFormat[1], | |
407 | pInfo->dataFormat[2], pInfo->dataFormat[3], | |
408 | dataFormatChars[0], dataFormatChars[1], | |
409 | dataFormatChars[2], dataFormatChars[3]); | |
410 | ||
411 | *pErrorCode=U_UNSUPPORTED_ERROR; | |
412 | return 0; | |
413 | } | |
414 | ||
415 | /* swap .dat package files -------------------------------------------------- */ | |
416 | ||
417 | static int32_t | |
418 | extractPackageName(const UDataSwapper *ds, const char *filename, | |
419 | char pkg[], int32_t capacity, | |
420 | UErrorCode *pErrorCode) { | |
421 | const char *basename; | |
422 | int32_t len; | |
423 | ||
424 | if(U_FAILURE(*pErrorCode)) { | |
425 | return 0; | |
426 | } | |
427 | ||
428 | basename=findBasename(filename); | |
429 | len=(int32_t)uprv_strlen(basename)-4; /* -4: subtract the length of ".dat" */ | |
430 | ||
431 | if(len<=0 || 0!=uprv_strcmp(basename+len, ".dat")) { | |
432 | udata_printError(ds, "udata_swapPackage(): \"%s\" is not recognized as a package filename (must end with .dat)\n", | |
433 | basename); | |
434 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
435 | return 0; | |
436 | } | |
437 | ||
438 | if(len>=capacity) { | |
439 | udata_printError(ds, "udata_swapPackage(): the package name \"%s\" is too long (>=%ld)\n", | |
440 | (long)capacity); | |
441 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
442 | return 0; | |
443 | } | |
444 | ||
445 | uprv_memcpy(pkg, basename, len); | |
446 | pkg[len]=0; | |
447 | return len; | |
448 | } | |
449 | ||
450 | struct ToCEntry { | |
451 | uint32_t nameOffset, inOffset, outOffset, length; | |
452 | }; | |
453 | ||
454 | U_CDECL_BEGIN | |
455 | static int32_t U_CALLCONV | |
456 | compareToCEntries(const void *context, const void *left, const void *right) { | |
457 | const char *chars=(const char *)context; | |
458 | return (int32_t)uprv_strcmp(chars+((const ToCEntry *)left)->nameOffset, | |
459 | chars+((const ToCEntry *)right)->nameOffset); | |
460 | } | |
461 | U_CDECL_END | |
462 | ||
463 | U_CFUNC int32_t U_CALLCONV | |
464 | udata_swapPackage(const UDataSwapper *ds, | |
465 | const void *inData, int32_t length, void *outData, | |
466 | UErrorCode *pErrorCode) { | |
467 | const UDataInfo *pInfo; | |
468 | int32_t headerSize; | |
469 | ||
470 | const uint8_t *inBytes; | |
471 | uint8_t *outBytes; | |
472 | ||
473 | uint32_t itemCount, offset, i; | |
474 | int32_t itemLength; | |
475 | ||
476 | const UDataOffsetTOCEntry *inEntries; | |
477 | UDataOffsetTOCEntry *outEntries; | |
478 | ||
479 | ToCEntry *table; | |
480 | ||
481 | char inPkgName[32], outPkgName[32]; | |
482 | int32_t inPkgNameLength, outPkgNameLength; | |
483 | ||
484 | /* udata_swapDataHeader checks the arguments */ | |
485 | headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); | |
486 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
487 | return 0; | |
488 | } | |
489 | ||
490 | /* check data format and format version */ | |
491 | pInfo=(const UDataInfo *)((const char *)inData+4); | |
492 | if(!( | |
493 | pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ | |
494 | pInfo->dataFormat[1]==0x6d && | |
495 | pInfo->dataFormat[2]==0x6e && | |
496 | pInfo->dataFormat[3]==0x44 && | |
497 | pInfo->formatVersion[0]==1 | |
498 | )) { | |
499 | udata_printError(ds, "udata_swapPackage(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n", | |
500 | pInfo->dataFormat[0], pInfo->dataFormat[1], | |
501 | pInfo->dataFormat[2], pInfo->dataFormat[3], | |
502 | pInfo->formatVersion[0]); | |
503 | *pErrorCode=U_UNSUPPORTED_ERROR; | |
504 | return 0; | |
505 | } | |
506 | ||
507 | /* | |
508 | * We need to change the ToC name entries so that they have the correct | |
509 | * package name prefix. | |
510 | * Extract the package names from the in/out filenames. | |
511 | */ | |
512 | inPkgNameLength=extractPackageName( | |
513 | ds, inFilename, | |
514 | inPkgName, (int32_t)sizeof(inPkgName), | |
515 | pErrorCode); | |
516 | outPkgNameLength=extractPackageName( | |
517 | ds, outFilename, | |
518 | outPkgName, (int32_t)sizeof(outPkgName), | |
519 | pErrorCode); | |
520 | if(U_FAILURE(*pErrorCode)) { | |
521 | return 0; | |
522 | } | |
523 | ||
524 | /* | |
525 | * It is possible to work with inPkgNameLength!=outPkgNameLength, | |
526 | * but then the length of the data file would change more significantly, | |
527 | * which we are not currently prepared for. | |
528 | */ | |
529 | if(inPkgNameLength!=outPkgNameLength) { | |
530 | udata_printError(ds, "udata_swapPackage(): the package names \"%s\" and \"%s\" must have the same length\n", | |
531 | inPkgName, outPkgName); | |
532 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
533 | return 0; | |
534 | } | |
535 | ||
536 | inBytes=(const uint8_t *)inData+headerSize; | |
537 | inEntries=(const UDataOffsetTOCEntry *)(inBytes+4); | |
538 | ||
539 | if(length<0) { | |
540 | /* preflighting */ | |
541 | itemCount=ds->readUInt32(*(const uint32_t *)inBytes); | |
542 | if(itemCount==0) { | |
543 | /* no items: count only the item count and return */ | |
544 | return headerSize+4; | |
545 | } | |
546 | ||
547 | /* read the last item's offset and preflight it */ | |
548 | offset=ds->readUInt32(inEntries[itemCount-1].dataOffset); | |
549 | itemLength=udata_swap(ds, inBytes+offset, -1, NULL, pErrorCode); | |
550 | ||
551 | if(U_SUCCESS(*pErrorCode)) { | |
552 | return headerSize+offset+(uint32_t)itemLength; | |
553 | } else { | |
554 | return 0; | |
555 | } | |
556 | } else { | |
557 | /* check that the itemCount fits, then the ToC table, then at least the header of the last item */ | |
558 | length-=headerSize; | |
559 | if(length<4) { | |
560 | /* itemCount does not fit */ | |
561 | offset=0xffffffff; | |
562 | itemCount=0; /* make compilers happy */ | |
563 | } else { | |
564 | itemCount=ds->readUInt32(*(const uint32_t *)inBytes); | |
565 | if(itemCount==0) { | |
566 | offset=4; | |
567 | } else if((uint32_t)length<(4+8*itemCount)) { | |
568 | /* ToC table does not fit */ | |
569 | offset=0xffffffff; | |
570 | } else { | |
571 | /* offset of the last item plus at least 20 bytes for its header */ | |
572 | offset=20+ds->readUInt32(inEntries[itemCount-1].dataOffset); | |
573 | } | |
574 | } | |
575 | if((uint32_t)length<offset) { | |
576 | udata_printError(ds, "udata_swapPackage(): too few bytes (%d after header) for unames.icu\n", | |
577 | length); | |
578 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | |
579 | return 0; | |
580 | } | |
581 | ||
582 | outBytes=(uint8_t *)outData+headerSize; | |
583 | ||
584 | /* swap the item count */ | |
585 | ds->swapArray32(ds, inBytes, 4, outBytes, pErrorCode); | |
586 | ||
587 | if(itemCount==0) { | |
588 | /* no items: just return now */ | |
589 | return headerSize+4; | |
590 | } | |
591 | ||
592 | /* swap the item name strings */ | |
593 | offset=4+8*itemCount; | |
594 | itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset)-offset); | |
595 | udata_swapInvStringBlock(ds, inBytes+offset, itemLength, outBytes+offset, pErrorCode); | |
596 | if(U_FAILURE(*pErrorCode)) { | |
597 | udata_printError(ds, "udata_swapPackage() failed to swap the data item name strings\n"); | |
598 | return 0; | |
599 | } | |
600 | /* keep offset and itemLength in case we allocate and copy the strings below */ | |
601 | ||
602 | /* swap the package names into the output charset */ | |
603 | if(ds->outCharset!=U_CHARSET_FAMILY) { | |
604 | UDataSwapper *ds2; | |
605 | ds2=udata_openSwapper(TRUE, U_CHARSET_FAMILY, TRUE, ds->outCharset, pErrorCode); | |
606 | ds2->swapInvChars(ds2, inPkgName, inPkgNameLength, inPkgName, pErrorCode); | |
607 | ds2->swapInvChars(ds2, outPkgName, outPkgNameLength, outPkgName, pErrorCode); | |
608 | udata_closeSwapper(ds2); | |
609 | if(U_FAILURE(*pErrorCode)) { | |
610 | udata_printError(ds, "udata_swapPackage() failed to swap the input/output package names\n"); | |
611 | } | |
612 | } | |
613 | ||
614 | /* change the prefix of each ToC entry name from the old to the new package name */ | |
615 | { | |
616 | char *entryName; | |
617 | ||
618 | for(i=0; i<itemCount; ++i) { | |
619 | entryName=(char *)inBytes+ds->readUInt32(inEntries[i].nameOffset); | |
620 | ||
621 | if(0==uprv_memcmp(entryName, inPkgName, inPkgNameLength)) { | |
622 | uprv_memcpy(entryName, outPkgName, inPkgNameLength); | |
623 | } else { | |
624 | udata_printError(ds, "udata_swapPackage() failed: ToC item %ld does not have the input package name as a prefix\n", | |
625 | (long)i); | |
626 | *pErrorCode=U_INVALID_FORMAT_ERROR; | |
627 | return 0; | |
628 | } | |
629 | } | |
630 | } | |
631 | ||
632 | /* | |
633 | * Allocate the ToC table and, if necessary, a temporary buffer for | |
634 | * pseudo-in-place swapping. | |
635 | * | |
636 | * We cannot swap in-place because: | |
637 | * | |
638 | * 1. If the swapping of an item fails mid-way, then in-place swapping | |
639 | * has destroyed its data. | |
640 | * Out-of-place swapping allows us to then copy its original data. | |
641 | * | |
642 | * 2. If swapping changes the charset family, then we must resort | |
643 | * not only the ToC table but also the data items themselves. | |
644 | * This requires a permutation and is best done with separate in/out | |
645 | * buffers. | |
646 | * | |
647 | * We swapped the strings above to avoid the malloc below if string swapping fails. | |
648 | */ | |
649 | if(inData==outData) { | |
650 | /* +15: prepare for extra padding of a newly-last item */ | |
651 | table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry)+length+15); | |
652 | if(table!=NULL) { | |
653 | outBytes=(uint8_t *)(table+itemCount); | |
654 | ||
655 | /* copy the item count and the swapped strings */ | |
656 | uprv_memcpy(outBytes, inBytes, 4); | |
657 | uprv_memcpy(outBytes+offset, inBytes+offset, itemLength); | |
658 | } | |
659 | } else { | |
660 | table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry)); | |
661 | } | |
662 | if(table==NULL) { | |
663 | udata_printError(ds, "udata_swapPackage(): out of memory allocating %d bytes\n", | |
664 | inData==outData ? | |
665 | itemCount*sizeof(ToCEntry)+length+15 : | |
666 | itemCount*sizeof(ToCEntry)); | |
667 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
668 | return 0; | |
669 | } | |
670 | outEntries=(UDataOffsetTOCEntry *)(outBytes+4); | |
671 | ||
672 | /* read the ToC table */ | |
673 | for(i=0; i<itemCount; ++i) { | |
674 | table[i].nameOffset=ds->readUInt32(inEntries[i].nameOffset); | |
675 | table[i].inOffset=ds->readUInt32(inEntries[i].dataOffset); | |
676 | if(i>0) { | |
677 | table[i-1].length=table[i].inOffset-table[i-1].inOffset; | |
678 | } | |
679 | } | |
680 | table[itemCount-1].length=(uint32_t)length-table[itemCount-1].inOffset; | |
681 | ||
682 | if(ds->inCharset==ds->outCharset) { | |
683 | /* no charset swapping, no resorting: keep item offsets the same */ | |
684 | for(i=0; i<itemCount; ++i) { | |
685 | table[i].outOffset=table[i].inOffset; | |
686 | } | |
687 | } else { | |
688 | /* charset swapping: resort items by their swapped names */ | |
689 | ||
690 | /* | |
691 | * Before the actual sorting, we need to make sure that each item | |
692 | * has a length that is a multiple of 16 bytes so that all items | |
693 | * are 16-aligned. | |
694 | * Only the old last item may be missing up to 15 padding bytes. | |
695 | * Add padding bytes for it. | |
696 | * Since the icuswap main() function has already allocated enough | |
697 | * input buffer space and set the last 15 bytes there to 0xaa, | |
698 | * we only need to increase the total data length and the length | |
699 | * of the last item here. | |
700 | */ | |
701 | if((length&0xf)!=0) { | |
702 | int32_t delta=16-(length&0xf); | |
703 | length+=delta; | |
704 | table[itemCount-1].length+=(uint32_t)delta; | |
705 | } | |
706 | ||
707 | uprv_sortArray(table, (int32_t)itemCount, (int32_t)sizeof(ToCEntry), | |
708 | compareToCEntries, outBytes, FALSE, pErrorCode); | |
709 | ||
710 | /* | |
711 | * Note: Before sorting, the inOffset values were in order. | |
712 | * Now the outOffset values are in order. | |
713 | */ | |
714 | ||
715 | /* assign outOffset values */ | |
716 | offset=table[0].inOffset; | |
717 | for(i=0; i<itemCount; ++i) { | |
718 | table[i].outOffset=offset; | |
719 | offset+=table[i].length; | |
720 | } | |
721 | } | |
722 | ||
723 | /* write the output ToC table */ | |
724 | for(i=0; i<itemCount; ++i) { | |
725 | ds->writeUInt32(&outEntries[i].nameOffset, table[i].nameOffset); | |
726 | ds->writeUInt32(&outEntries[i].dataOffset, table[i].outOffset); | |
727 | } | |
728 | ||
729 | /* swap each data item */ | |
730 | for(i=0; i<itemCount; ++i) { | |
731 | /* first copy the item bytes to make sure that unreachable bytes are copied */ | |
732 | uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length); | |
733 | ||
734 | /* swap the item */ | |
735 | udata_swap(ds, inBytes+table[i].inOffset, (int32_t)table[i].length, | |
736 | outBytes+table[i].outOffset, pErrorCode); | |
737 | ||
738 | if(U_FAILURE(*pErrorCode)) { | |
739 | if(ds->outCharset==U_CHARSET_FAMILY) { | |
740 | udata_printError(ds, "warning: udata_swapPackage() failed to swap item \"%s\"\n" | |
741 | " at inOffset 0x%x length 0x%x - %s\n" | |
742 | " the data item will be copied, not swapped\n\n", | |
743 | (char *)outBytes+table[i].nameOffset, | |
744 | table[i].inOffset, table[i].length, u_errorName(*pErrorCode)); | |
745 | } else { | |
746 | udata_printError(ds, "warning: udata_swapPackage() failed to swap an item\n" | |
747 | " at inOffset 0x%x length 0x%x - %s\n" | |
748 | " the data item will be copied, not swapped\n\n", | |
749 | table[i].inOffset, table[i].length, u_errorName(*pErrorCode)); | |
750 | } | |
751 | /* reset the error code, copy the data item, and continue */ | |
752 | *pErrorCode=U_ZERO_ERROR; | |
753 | uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length); | |
754 | } | |
755 | } | |
756 | ||
757 | if(inData==outData) { | |
758 | /* copy the data from the temporary buffer to the in-place buffer */ | |
759 | uprv_memcpy((uint8_t *)outData+headerSize, outBytes, length); | |
760 | } | |
761 | uprv_free(table); | |
762 | ||
763 | return headerSize+length; | |
764 | } | |
765 | } | |
766 | ||
767 | /* | |
768 | * Hey, Emacs, please set the following: | |
769 | * | |
770 | * Local Variables: | |
771 | * indent-tabs-mode: nil | |
772 | * End: | |
773 | * | |
774 | */ |