2 *******************************************************************************
4 * Copyright (C) 2003-2010, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: ucol_swp.c
10 * tab size: 8 (not used)
13 * created on: 2003sep10
14 * created by: Markus W. Scherer
16 * Swap collation binaries.
19 #include "unicode/udata.h" /* UDataInfo */
26 /* swapping ----------------------------------------------------------------- */
29 * This performs data swapping for a folded trie (see utrie.c for details).
32 U_CAPI
int32_t U_EXPORT2
33 utrie_swap(const UDataSwapper
*ds
,
34 const void *inData
, int32_t length
, void *outData
,
35 UErrorCode
*pErrorCode
) {
36 const UTrieHeader
*inTrie
;
41 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
44 if(ds
==NULL
|| inData
==NULL
|| (length
>=0 && outData
==NULL
)) {
45 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
49 /* setup and swapping */
50 if(length
>=0 && (uint32_t)length
<sizeof(UTrieHeader
)) {
51 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
55 inTrie
=(const UTrieHeader
*)inData
;
56 trie
.signature
=ds
->readUInt32(inTrie
->signature
);
57 trie
.options
=ds
->readUInt32(inTrie
->options
);
58 trie
.indexLength
=udata_readInt32(ds
, inTrie
->indexLength
);
59 trie
.dataLength
=udata_readInt32(ds
, inTrie
->dataLength
);
61 if( trie
.signature
!=0x54726965 ||
62 (trie
.options
&UTRIE_OPTIONS_SHIFT_MASK
)!=UTRIE_SHIFT
||
63 ((trie
.options
>>UTRIE_OPTIONS_INDEX_SHIFT
)&UTRIE_OPTIONS_SHIFT_MASK
)!=UTRIE_INDEX_SHIFT
||
64 trie
.indexLength
<UTRIE_BMP_INDEX_LENGTH
||
65 (trie
.indexLength
&(UTRIE_SURROGATE_BLOCK_COUNT
-1))!=0 ||
66 trie
.dataLength
<UTRIE_DATA_BLOCK_LENGTH
||
67 (trie
.dataLength
&(UTRIE_DATA_GRANULARITY
-1))!=0 ||
68 ((trie
.options
&UTRIE_OPTIONS_LATIN1_IS_LINEAR
)!=0 && trie
.dataLength
<(UTRIE_DATA_BLOCK_LENGTH
+0x100))
70 *pErrorCode
=U_INVALID_FORMAT_ERROR
; /* not a UTrie */
74 dataIs32
=(UBool
)((trie
.options
&UTRIE_OPTIONS_DATA_IS_32_BIT
)!=0);
75 size
=sizeof(UTrieHeader
)+trie
.indexLength
*2+trie
.dataLength
*(dataIs32
?4:2);
81 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
85 outTrie
=(UTrieHeader
*)outData
;
88 ds
->swapArray32(ds
, inTrie
, sizeof(UTrieHeader
), outTrie
, pErrorCode
);
90 /* swap the index and the data */
92 ds
->swapArray16(ds
, inTrie
+1, trie
.indexLength
*2, outTrie
+1, pErrorCode
);
93 ds
->swapArray32(ds
, (const uint16_t *)(inTrie
+1)+trie
.indexLength
, trie
.dataLength
*4,
94 (uint16_t *)(outTrie
+1)+trie
.indexLength
, pErrorCode
);
96 ds
->swapArray16(ds
, inTrie
+1, (trie
.indexLength
+trie
.dataLength
)*2, outTrie
+1, pErrorCode
);
103 #if !UCONFIG_NO_COLLATION
105 /* Modified copy of the beginning of ucol_swapBinary(). */
106 U_CAPI UBool U_EXPORT2
107 ucol_looksLikeCollationBinary(const UDataSwapper
*ds
,
108 const void *inData
, int32_t length
) {
109 const uint8_t *inBytes
;
110 const UCATableHeader
*inHeader
;
111 UCATableHeader header
;
113 if(ds
==NULL
|| inData
==NULL
|| length
<-1) {
117 inBytes
=(const uint8_t *)inData
;
118 inHeader
=(const UCATableHeader
*)inData
;
121 * The collation binary must contain at least the UCATableHeader,
122 * starting with its size field.
123 * sizeof(UCATableHeader)==42*4 in ICU 2.8
124 * check the length against the header size before reading the size field
126 uprv_memset(&header
, 0, sizeof(header
));
128 header
.size
=udata_readInt32(ds
, inHeader
->size
);
129 } else if((length
<(42*4) || length
<(header
.size
=udata_readInt32(ds
, inHeader
->size
)))) {
133 header
.magic
=ds
->readUInt32(inHeader
->magic
);
135 header
.magic
==UCOL_HEADER_MAGIC
&&
136 inHeader
->formatVersion
[0]==3 /*&&
137 inHeader->formatVersion[1]>=0*/
142 if(inHeader
->isBigEndian
!=ds
->inIsBigEndian
|| inHeader
->charSetFamily
!=ds
->inCharset
) {
149 /* swap a header-less collation binary, inside a resource bundle or ucadata.icu */
150 U_CAPI
int32_t U_EXPORT2
151 ucol_swapBinary(const UDataSwapper
*ds
,
152 const void *inData
, int32_t length
, void *outData
,
153 UErrorCode
*pErrorCode
) {
154 const uint8_t *inBytes
;
157 const UCATableHeader
*inHeader
;
158 UCATableHeader
*outHeader
;
159 UCATableHeader header
;
163 /* argument checking in case we were not called from ucol_swap() */
164 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
167 if(ds
==NULL
|| inData
==NULL
|| length
<-1 || (length
>0 && outData
==NULL
)) {
168 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
172 inBytes
=(const uint8_t *)inData
;
173 outBytes
=(uint8_t *)outData
;
175 inHeader
=(const UCATableHeader
*)inData
;
176 outHeader
=(UCATableHeader
*)outData
;
179 * The collation binary must contain at least the UCATableHeader,
180 * starting with its size field.
181 * sizeof(UCATableHeader)==42*4 in ICU 2.8
182 * check the length against the header size before reading the size field
184 uprv_memset(&header
, 0, sizeof(header
));
186 header
.size
=udata_readInt32(ds
, inHeader
->size
);
187 } else if((length
<(42*4) || length
<(header
.size
=udata_readInt32(ds
, inHeader
->size
)))) {
188 udata_printError(ds
, "ucol_swapBinary(): too few bytes (%d after header) for collation data\n",
190 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
194 header
.magic
=ds
->readUInt32(inHeader
->magic
);
196 header
.magic
==UCOL_HEADER_MAGIC
&&
197 inHeader
->formatVersion
[0]==3 /*&&
198 inHeader->formatVersion[1]>=0*/
200 udata_printError(ds
, "ucol_swapBinary(): magic 0x%08x or format version %02x.%02x is not a collation binary\n",
202 inHeader
->formatVersion
[0], inHeader
->formatVersion
[1]);
203 *pErrorCode
=U_UNSUPPORTED_ERROR
;
207 if(inHeader
->isBigEndian
!=ds
->inIsBigEndian
|| inHeader
->charSetFamily
!=ds
->inCharset
) {
208 udata_printError(ds
, "ucol_swapBinary(): endianness %d or charset %d does not match the swapper\n",
209 inHeader
->isBigEndian
, inHeader
->charSetFamily
);
210 *pErrorCode
=U_INVALID_FORMAT_ERROR
;
215 /* copy everything, takes care of data that needs no swapping */
216 if(inBytes
!=outBytes
) {
217 uprv_memcpy(outBytes
, inBytes
, header
.size
);
220 /* swap the necessary pieces in the order of their occurrence in the data */
222 /* read more of the UCATableHeader (the size field was read above) */
223 header
.options
= ds
->readUInt32(inHeader
->options
);
224 header
.UCAConsts
= ds
->readUInt32(inHeader
->UCAConsts
);
225 header
.contractionUCACombos
= ds
->readUInt32(inHeader
->contractionUCACombos
);
226 header
.mappingPosition
= ds
->readUInt32(inHeader
->mappingPosition
);
227 header
.expansion
= ds
->readUInt32(inHeader
->expansion
);
228 header
.contractionIndex
= ds
->readUInt32(inHeader
->contractionIndex
);
229 header
.contractionCEs
= ds
->readUInt32(inHeader
->contractionCEs
);
230 header
.contractionSize
= ds
->readUInt32(inHeader
->contractionSize
);
231 header
.endExpansionCE
= ds
->readUInt32(inHeader
->endExpansionCE
);
232 header
.expansionCESize
= ds
->readUInt32(inHeader
->expansionCESize
);
233 header
.endExpansionCECount
= udata_readInt32(ds
, inHeader
->endExpansionCECount
);
234 header
.contractionUCACombosSize
=udata_readInt32(ds
, inHeader
->contractionUCACombosSize
);
235 header
.scriptToLeadByte
= ds
->readUInt32(inHeader
->scriptToLeadByte
);
236 header
.leadByteToScript
= ds
->readUInt32(inHeader
->leadByteToScript
);
238 /* swap the 32-bit integers in the header */
239 ds
->swapArray32(ds
, inHeader
, (int32_t)((const char *)&inHeader
->jamoSpecial
-(const char *)inHeader
),
240 outHeader
, pErrorCode
);
241 ds
->swapArray32(ds
, &(inHeader
->scriptToLeadByte
), sizeof(header
.scriptToLeadByte
) + sizeof(header
.leadByteToScript
),
242 &(outHeader
->scriptToLeadByte
), pErrorCode
);
243 /* set the output platform properties */
244 outHeader
->isBigEndian
=ds
->outIsBigEndian
;
245 outHeader
->charSetFamily
=ds
->outCharset
;
247 /* swap the options */
248 if(header
.options
!=0) {
249 ds
->swapArray32(ds
, inBytes
+header
.options
, header
.expansion
-header
.options
,
250 outBytes
+header
.options
, pErrorCode
);
253 /* swap the expansions */
254 if(header
.mappingPosition
!=0 && header
.expansion
!=0) {
255 if(header
.contractionIndex
!=0) {
256 /* expansions bounded by contractions */
257 count
=header
.contractionIndex
-header
.expansion
;
259 /* no contractions: expansions bounded by the main trie */
260 count
=header
.mappingPosition
-header
.expansion
;
262 ds
->swapArray32(ds
, inBytes
+header
.expansion
, (int32_t)count
,
263 outBytes
+header
.expansion
, pErrorCode
);
266 /* swap the contractions */
267 if(header
.contractionSize
!=0) {
268 /* contractionIndex: UChar[] */
269 ds
->swapArray16(ds
, inBytes
+header
.contractionIndex
, header
.contractionSize
*2,
270 outBytes
+header
.contractionIndex
, pErrorCode
);
272 /* contractionCEs: CEs[] */
273 ds
->swapArray32(ds
, inBytes
+header
.contractionCEs
, header
.contractionSize
*4,
274 outBytes
+header
.contractionCEs
, pErrorCode
);
277 /* swap the main trie */
278 if(header
.mappingPosition
!=0) {
279 count
=header
.endExpansionCE
-header
.mappingPosition
;
280 utrie_swap(ds
, inBytes
+header
.mappingPosition
, (int32_t)count
,
281 outBytes
+header
.mappingPosition
, pErrorCode
);
284 /* swap the max expansion table */
285 if(header
.endExpansionCECount
!=0) {
286 ds
->swapArray32(ds
, inBytes
+header
.endExpansionCE
, header
.endExpansionCECount
*4,
287 outBytes
+header
.endExpansionCE
, pErrorCode
);
290 /* expansionCESize, unsafeCP, contrEndCP: uint8_t[], no need to swap */
292 /* swap UCA constants */
293 if(header
.UCAConsts
!=0) {
295 * if UCAConsts!=0 then contractionUCACombos because we are swapping
296 * the UCA data file, and we know that the UCA contains contractions
298 count
=header
.contractionUCACombos
-header
.UCAConsts
;
299 ds
->swapArray32(ds
, inBytes
+header
.UCAConsts
, header
.contractionUCACombos
-header
.UCAConsts
,
300 outBytes
+header
.UCAConsts
, pErrorCode
);
303 /* swap UCA contractions */
304 if(header
.contractionUCACombosSize
!=0) {
305 count
=header
.contractionUCACombosSize
*inHeader
->contractionUCACombosWidth
*U_SIZEOF_UCHAR
;
306 ds
->swapArray16(ds
, inBytes
+header
.contractionUCACombos
, (int32_t)count
,
307 outBytes
+header
.contractionUCACombos
, pErrorCode
);
310 /* swap the script to lead bytes */
311 if(header
.scriptToLeadByte
!=0) {
312 int indexCount
= ds
->readUInt16(*((uint16_t*)(inBytes
+header
.scriptToLeadByte
))); // each entry = 2 * uint16
313 int dataCount
= ds
->readUInt16(*((uint16_t*)(inBytes
+header
.scriptToLeadByte
+ 2))); // each entry = uint16
314 ds
->swapArray16(ds
, inBytes
+header
.scriptToLeadByte
,
315 4 + (4 * indexCount
) + (2 * dataCount
),
316 outBytes
+header
.scriptToLeadByte
, pErrorCode
);
319 /* swap the lead byte to scripts */
320 if(header
.leadByteToScript
!=0) {
321 int indexCount
= ds
->readUInt16(*((uint16_t*)(inBytes
+header
.leadByteToScript
))); // each entry = uint16
322 int dataCount
= ds
->readUInt16(*((uint16_t*)(inBytes
+header
.leadByteToScript
+ 2))); // each entry = uint16
323 ds
->swapArray16(ds
, inBytes
+header
.leadByteToScript
,
324 4 + (2 * indexCount
) + (2 * dataCount
),
325 outBytes
+header
.leadByteToScript
, pErrorCode
);
332 /* swap ICU collation data like ucadata.icu */
333 U_CAPI
int32_t U_EXPORT2
334 ucol_swap(const UDataSwapper
*ds
,
335 const void *inData
, int32_t length
, void *outData
,
336 UErrorCode
*pErrorCode
) {
338 const UDataInfo
*pInfo
;
339 int32_t headerSize
, collationSize
;
341 /* udata_swapDataHeader checks the arguments */
342 headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, pErrorCode
);
343 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
347 /* check data format and format version */
348 pInfo
=(const UDataInfo
*)((const char *)inData
+4);
350 pInfo
->dataFormat
[0]==0x55 && /* dataFormat="UCol" */
351 pInfo
->dataFormat
[1]==0x43 &&
352 pInfo
->dataFormat
[2]==0x6f &&
353 pInfo
->dataFormat
[3]==0x6c &&
354 pInfo
->formatVersion
[0]==3 /*&&
355 pInfo->formatVersion[1]>=0*/
357 udata_printError(ds
, "ucol_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not a collation file\n",
358 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
359 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
360 pInfo
->formatVersion
[0], pInfo
->formatVersion
[1]);
361 *pErrorCode
=U_UNSUPPORTED_ERROR
;
365 collationSize
=ucol_swapBinary(ds
,
366 (const char *)inData
+headerSize
,
367 length
>=0 ? length
-headerSize
: -1,
368 (char *)outData
+headerSize
,
370 if(U_SUCCESS(*pErrorCode
)) {
371 return headerSize
+collationSize
;
377 /* swap inverse UCA collation data (invuca.icu) */
378 U_CAPI
int32_t U_EXPORT2
379 ucol_swapInverseUCA(const UDataSwapper
*ds
,
380 const void *inData
, int32_t length
, void *outData
,
381 UErrorCode
*pErrorCode
) {
382 const UDataInfo
*pInfo
;
385 const uint8_t *inBytes
;
388 const InverseUCATableHeader
*inHeader
;
389 InverseUCATableHeader
*outHeader
;
390 InverseUCATableHeader header
={ 0,0,0,0,0,{0,0,0,0},{0,0,0,0,0,0,0,0} };
392 /* udata_swapDataHeader checks the arguments */
393 headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, pErrorCode
);
394 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
398 /* check data format and format version */
399 pInfo
=(const UDataInfo
*)((const char *)inData
+4);
401 pInfo
->dataFormat
[0]==0x49 && /* dataFormat="InvC" */
402 pInfo
->dataFormat
[1]==0x6e &&
403 pInfo
->dataFormat
[2]==0x76 &&
404 pInfo
->dataFormat
[3]==0x43 &&
405 pInfo
->formatVersion
[0]==2 &&
406 pInfo
->formatVersion
[1]>=1
408 udata_printError(ds
, "ucol_swapInverseUCA(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not an inverse UCA collation file\n",
409 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
410 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
411 pInfo
->formatVersion
[0], pInfo
->formatVersion
[1]);
412 *pErrorCode
=U_UNSUPPORTED_ERROR
;
416 inBytes
=(const uint8_t *)inData
+headerSize
;
417 outBytes
=(uint8_t *)outData
+headerSize
;
419 inHeader
=(const InverseUCATableHeader
*)inBytes
;
420 outHeader
=(InverseUCATableHeader
*)outBytes
;
423 * The inverse UCA collation binary must contain at least the InverseUCATableHeader,
424 * starting with its size field.
425 * sizeof(UCATableHeader)==8*4 in ICU 2.8
426 * check the length against the header size before reading the size field
429 header
.byteSize
=udata_readInt32(ds
, inHeader
->byteSize
);
431 ((length
-headerSize
)<(8*4) ||
432 (uint32_t)(length
-headerSize
)<(header
.byteSize
=udata_readInt32(ds
, inHeader
->byteSize
)))
434 udata_printError(ds
, "ucol_swapInverseUCA(): too few bytes (%d after header) for inverse UCA collation data\n",
436 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
441 /* copy everything, takes care of data that needs no swapping */
442 if(inBytes
!=outBytes
) {
443 uprv_memcpy(outBytes
, inBytes
, header
.byteSize
);
446 /* swap the necessary pieces in the order of their occurrence in the data */
448 /* read more of the InverseUCATableHeader (the byteSize field was read above) */
449 header
.tableSize
= ds
->readUInt32(inHeader
->tableSize
);
450 header
.contsSize
= ds
->readUInt32(inHeader
->contsSize
);
451 header
.table
= ds
->readUInt32(inHeader
->table
);
452 header
.conts
= ds
->readUInt32(inHeader
->conts
);
454 /* swap the 32-bit integers in the header */
455 ds
->swapArray32(ds
, inHeader
, 5*4, outHeader
, pErrorCode
);
457 /* swap the inverse table; tableSize counts uint32_t[3] rows */
458 ds
->swapArray32(ds
, inBytes
+header
.table
, header
.tableSize
*3*4,
459 outBytes
+header
.table
, pErrorCode
);
461 /* swap the continuation table; contsSize counts UChars */
462 ds
->swapArray16(ds
, inBytes
+header
.conts
, header
.contsSize
*U_SIZEOF_UCHAR
,
463 outBytes
+header
.conts
, pErrorCode
);
466 return headerSize
+header
.byteSize
;
469 #endif /* #if !UCONFIG_NO_COLLATION */