2 *******************************************************************************
4 * Copyright (C) 2003-2012, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: ucol_swp.cpp
10 * tab size: 8 (not used)
13 * created on: 2003sep10
14 * created by: Markus W. Scherer
16 * Swap collation binaries.
19 #include "unicode/udata.h" /* UDataInfo */
23 #include "ucol_data.h"
26 /* swapping ----------------------------------------------------------------- */
29 * This performs data swapping for a folded trie (see utrie.c for details).
32 U_CAPI
int32_t U_EXPORT2
33 utrie_swap(const UDataSwapper
*ds
,
34 const void *inData
, int32_t length
, void *outData
,
35 UErrorCode
*pErrorCode
) {
36 const UTrieHeader
*inTrie
;
41 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
44 if(ds
==NULL
|| inData
==NULL
|| (length
>=0 && outData
==NULL
)) {
45 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
49 /* setup and swapping */
50 if(length
>=0 && (uint32_t)length
<sizeof(UTrieHeader
)) {
51 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
55 inTrie
=(const UTrieHeader
*)inData
;
56 trie
.signature
=ds
->readUInt32(inTrie
->signature
);
57 trie
.options
=ds
->readUInt32(inTrie
->options
);
58 trie
.indexLength
=udata_readInt32(ds
, inTrie
->indexLength
);
59 trie
.dataLength
=udata_readInt32(ds
, inTrie
->dataLength
);
61 if( trie
.signature
!=0x54726965 ||
62 (trie
.options
&UTRIE_OPTIONS_SHIFT_MASK
)!=UTRIE_SHIFT
||
63 ((trie
.options
>>UTRIE_OPTIONS_INDEX_SHIFT
)&UTRIE_OPTIONS_SHIFT_MASK
)!=UTRIE_INDEX_SHIFT
||
64 trie
.indexLength
<UTRIE_BMP_INDEX_LENGTH
||
65 (trie
.indexLength
&(UTRIE_SURROGATE_BLOCK_COUNT
-1))!=0 ||
66 trie
.dataLength
<UTRIE_DATA_BLOCK_LENGTH
||
67 (trie
.dataLength
&(UTRIE_DATA_GRANULARITY
-1))!=0 ||
68 ((trie
.options
&UTRIE_OPTIONS_LATIN1_IS_LINEAR
)!=0 && trie
.dataLength
<(UTRIE_DATA_BLOCK_LENGTH
+0x100))
70 *pErrorCode
=U_INVALID_FORMAT_ERROR
; /* not a UTrie */
74 dataIs32
=(UBool
)((trie
.options
&UTRIE_OPTIONS_DATA_IS_32_BIT
)!=0);
75 size
=sizeof(UTrieHeader
)+trie
.indexLength
*2+trie
.dataLength
*(dataIs32
?4:2);
81 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
85 outTrie
=(UTrieHeader
*)outData
;
88 ds
->swapArray32(ds
, inTrie
, sizeof(UTrieHeader
), outTrie
, pErrorCode
);
90 /* swap the index and the data */
92 ds
->swapArray16(ds
, inTrie
+1, trie
.indexLength
*2, outTrie
+1, pErrorCode
);
93 ds
->swapArray32(ds
, (const uint16_t *)(inTrie
+1)+trie
.indexLength
, trie
.dataLength
*4,
94 (uint16_t *)(outTrie
+1)+trie
.indexLength
, pErrorCode
);
96 ds
->swapArray16(ds
, inTrie
+1, (trie
.indexLength
+trie
.dataLength
)*2, outTrie
+1, pErrorCode
);
103 #if !UCONFIG_NO_COLLATION
105 /* Modified copy of the beginning of ucol_swapBinary(). */
106 U_CAPI UBool U_EXPORT2
107 ucol_looksLikeCollationBinary(const UDataSwapper
*ds
,
108 const void *inData
, int32_t length
) {
109 const UCATableHeader
*inHeader
;
110 UCATableHeader header
;
112 if(ds
==NULL
|| inData
==NULL
|| length
<-1) {
116 inHeader
=(const UCATableHeader
*)inData
;
119 * The collation binary must contain at least the UCATableHeader,
120 * starting with its size field.
121 * sizeof(UCATableHeader)==42*4 in ICU 2.8
122 * check the length against the header size before reading the size field
124 uprv_memset(&header
, 0, sizeof(header
));
126 header
.size
=udata_readInt32(ds
, inHeader
->size
);
127 } else if((length
<(42*4) || length
<(header
.size
=udata_readInt32(ds
, inHeader
->size
)))) {
131 header
.magic
=ds
->readUInt32(inHeader
->magic
);
133 header
.magic
==UCOL_HEADER_MAGIC
&&
134 inHeader
->formatVersion
[0]==3 /*&&
135 inHeader->formatVersion[1]>=0*/
140 if(inHeader
->isBigEndian
!=ds
->inIsBigEndian
|| inHeader
->charSetFamily
!=ds
->inCharset
) {
147 /* swap a header-less collation binary, inside a resource bundle or ucadata.icu */
148 U_CAPI
int32_t U_EXPORT2
149 ucol_swapBinary(const UDataSwapper
*ds
,
150 const void *inData
, int32_t length
, void *outData
,
151 UErrorCode
*pErrorCode
) {
152 const uint8_t *inBytes
;
155 const UCATableHeader
*inHeader
;
156 UCATableHeader
*outHeader
;
157 UCATableHeader header
;
161 /* argument checking in case we were not called from ucol_swap() */
162 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
165 if(ds
==NULL
|| inData
==NULL
|| length
<-1 || (length
>0 && outData
==NULL
)) {
166 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
170 inBytes
=(const uint8_t *)inData
;
171 outBytes
=(uint8_t *)outData
;
173 inHeader
=(const UCATableHeader
*)inData
;
174 outHeader
=(UCATableHeader
*)outData
;
177 * The collation binary must contain at least the UCATableHeader,
178 * starting with its size field.
179 * sizeof(UCATableHeader)==42*4 in ICU 2.8
180 * check the length against the header size before reading the size field
182 uprv_memset(&header
, 0, sizeof(header
));
184 header
.size
=udata_readInt32(ds
, inHeader
->size
);
185 } else if((length
<(42*4) || length
<(header
.size
=udata_readInt32(ds
, inHeader
->size
)))) {
186 udata_printError(ds
, "ucol_swapBinary(): too few bytes (%d after header) for collation data\n",
188 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
192 header
.magic
=ds
->readUInt32(inHeader
->magic
);
194 header
.magic
==UCOL_HEADER_MAGIC
&&
195 inHeader
->formatVersion
[0]==3 /*&&
196 inHeader->formatVersion[1]>=0*/
198 udata_printError(ds
, "ucol_swapBinary(): magic 0x%08x or format version %02x.%02x is not a collation binary\n",
200 inHeader
->formatVersion
[0], inHeader
->formatVersion
[1]);
201 *pErrorCode
=U_UNSUPPORTED_ERROR
;
205 if(inHeader
->isBigEndian
!=ds
->inIsBigEndian
|| inHeader
->charSetFamily
!=ds
->inCharset
) {
206 udata_printError(ds
, "ucol_swapBinary(): endianness %d or charset %d does not match the swapper\n",
207 inHeader
->isBigEndian
, inHeader
->charSetFamily
);
208 *pErrorCode
=U_INVALID_FORMAT_ERROR
;
213 /* copy everything, takes care of data that needs no swapping */
214 if(inBytes
!=outBytes
) {
215 uprv_memcpy(outBytes
, inBytes
, header
.size
);
218 /* swap the necessary pieces in the order of their occurrence in the data */
220 /* read more of the UCATableHeader (the size field was read above) */
221 header
.options
= ds
->readUInt32(inHeader
->options
);
222 header
.UCAConsts
= ds
->readUInt32(inHeader
->UCAConsts
);
223 header
.contractionUCACombos
= ds
->readUInt32(inHeader
->contractionUCACombos
);
224 header
.mappingPosition
= ds
->readUInt32(inHeader
->mappingPosition
);
225 header
.expansion
= ds
->readUInt32(inHeader
->expansion
);
226 header
.contractionIndex
= ds
->readUInt32(inHeader
->contractionIndex
);
227 header
.contractionCEs
= ds
->readUInt32(inHeader
->contractionCEs
);
228 header
.contractionSize
= ds
->readUInt32(inHeader
->contractionSize
);
229 header
.endExpansionCE
= ds
->readUInt32(inHeader
->endExpansionCE
);
230 header
.expansionCESize
= ds
->readUInt32(inHeader
->expansionCESize
);
231 header
.endExpansionCECount
= udata_readInt32(ds
, inHeader
->endExpansionCECount
);
232 header
.contractionUCACombosSize
=udata_readInt32(ds
, inHeader
->contractionUCACombosSize
);
233 header
.scriptToLeadByte
= ds
->readUInt32(inHeader
->scriptToLeadByte
);
234 header
.leadByteToScript
= ds
->readUInt32(inHeader
->leadByteToScript
);
236 /* swap the 32-bit integers in the header */
237 ds
->swapArray32(ds
, inHeader
, (int32_t)((const char *)&inHeader
->jamoSpecial
-(const char *)inHeader
),
238 outHeader
, pErrorCode
);
239 ds
->swapArray32(ds
, &(inHeader
->scriptToLeadByte
), sizeof(header
.scriptToLeadByte
) + sizeof(header
.leadByteToScript
),
240 &(outHeader
->scriptToLeadByte
), pErrorCode
);
241 /* set the output platform properties */
242 outHeader
->isBigEndian
=ds
->outIsBigEndian
;
243 outHeader
->charSetFamily
=ds
->outCharset
;
245 /* swap the options */
246 if(header
.options
!=0) {
247 ds
->swapArray32(ds
, inBytes
+header
.options
, header
.expansion
-header
.options
,
248 outBytes
+header
.options
, pErrorCode
);
251 /* swap the expansions */
252 if(header
.mappingPosition
!=0 && header
.expansion
!=0) {
253 if(header
.contractionIndex
!=0) {
254 /* expansions bounded by contractions */
255 count
=header
.contractionIndex
-header
.expansion
;
257 /* no contractions: expansions bounded by the main trie */
258 count
=header
.mappingPosition
-header
.expansion
;
260 ds
->swapArray32(ds
, inBytes
+header
.expansion
, (int32_t)count
,
261 outBytes
+header
.expansion
, pErrorCode
);
264 /* swap the contractions */
265 if(header
.contractionSize
!=0) {
266 /* contractionIndex: UChar[] */
267 ds
->swapArray16(ds
, inBytes
+header
.contractionIndex
, header
.contractionSize
*2,
268 outBytes
+header
.contractionIndex
, pErrorCode
);
270 /* contractionCEs: CEs[] */
271 ds
->swapArray32(ds
, inBytes
+header
.contractionCEs
, header
.contractionSize
*4,
272 outBytes
+header
.contractionCEs
, pErrorCode
);
275 /* swap the main trie */
276 if(header
.mappingPosition
!=0) {
277 count
=header
.endExpansionCE
-header
.mappingPosition
;
278 utrie_swap(ds
, inBytes
+header
.mappingPosition
, (int32_t)count
,
279 outBytes
+header
.mappingPosition
, pErrorCode
);
282 /* swap the max expansion table */
283 if(header
.endExpansionCECount
!=0) {
284 ds
->swapArray32(ds
, inBytes
+header
.endExpansionCE
, header
.endExpansionCECount
*4,
285 outBytes
+header
.endExpansionCE
, pErrorCode
);
288 /* expansionCESize, unsafeCP, contrEndCP: uint8_t[], no need to swap */
290 /* swap UCA constants */
291 if(header
.UCAConsts
!=0) {
293 * if UCAConsts!=0 then contractionUCACombos because we are swapping
294 * the UCA data file, and we know that the UCA contains contractions
296 count
=header
.contractionUCACombos
-header
.UCAConsts
;
297 ds
->swapArray32(ds
, inBytes
+header
.UCAConsts
, header
.contractionUCACombos
-header
.UCAConsts
,
298 outBytes
+header
.UCAConsts
, pErrorCode
);
301 /* swap UCA contractions */
302 if(header
.contractionUCACombosSize
!=0) {
303 count
=header
.contractionUCACombosSize
*inHeader
->contractionUCACombosWidth
*U_SIZEOF_UCHAR
;
304 ds
->swapArray16(ds
, inBytes
+header
.contractionUCACombos
, (int32_t)count
,
305 outBytes
+header
.contractionUCACombos
, pErrorCode
);
308 /* swap the script to lead bytes */
309 if(header
.scriptToLeadByte
!=0) {
310 int indexCount
= ds
->readUInt16(*((uint16_t*)(inBytes
+header
.scriptToLeadByte
))); // each entry = 2 * uint16
311 int dataCount
= ds
->readUInt16(*((uint16_t*)(inBytes
+header
.scriptToLeadByte
+ 2))); // each entry = uint16
312 ds
->swapArray16(ds
, inBytes
+header
.scriptToLeadByte
,
313 4 + (4 * indexCount
) + (2 * dataCount
),
314 outBytes
+header
.scriptToLeadByte
, pErrorCode
);
317 /* swap the lead byte to scripts */
318 if(header
.leadByteToScript
!=0) {
319 int indexCount
= ds
->readUInt16(*((uint16_t*)(inBytes
+header
.leadByteToScript
))); // each entry = uint16
320 int dataCount
= ds
->readUInt16(*((uint16_t*)(inBytes
+header
.leadByteToScript
+ 2))); // each entry = uint16
321 ds
->swapArray16(ds
, inBytes
+header
.leadByteToScript
,
322 4 + (2 * indexCount
) + (2 * dataCount
),
323 outBytes
+header
.leadByteToScript
, pErrorCode
);
330 /* swap ICU collation data like ucadata.icu */
331 U_CAPI
int32_t U_EXPORT2
332 ucol_swap(const UDataSwapper
*ds
,
333 const void *inData
, int32_t length
, void *outData
,
334 UErrorCode
*pErrorCode
) {
336 const UDataInfo
*pInfo
;
337 int32_t headerSize
, collationSize
;
339 /* udata_swapDataHeader checks the arguments */
340 headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, pErrorCode
);
341 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
345 /* check data format and format version */
346 pInfo
=(const UDataInfo
*)((const char *)inData
+4);
348 pInfo
->dataFormat
[0]==0x55 && /* dataFormat="UCol" */
349 pInfo
->dataFormat
[1]==0x43 &&
350 pInfo
->dataFormat
[2]==0x6f &&
351 pInfo
->dataFormat
[3]==0x6c &&
352 pInfo
->formatVersion
[0]==3 /*&&
353 pInfo->formatVersion[1]>=0*/
355 udata_printError(ds
, "ucol_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not a collation file\n",
356 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
357 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
358 pInfo
->formatVersion
[0], pInfo
->formatVersion
[1]);
359 *pErrorCode
=U_UNSUPPORTED_ERROR
;
363 collationSize
=ucol_swapBinary(ds
,
364 (const char *)inData
+headerSize
,
365 length
>=0 ? length
-headerSize
: -1,
366 (char *)outData
+headerSize
,
368 if(U_SUCCESS(*pErrorCode
)) {
369 return headerSize
+collationSize
;
375 /* swap inverse UCA collation data (invuca.icu) */
376 U_CAPI
int32_t U_EXPORT2
377 ucol_swapInverseUCA(const UDataSwapper
*ds
,
378 const void *inData
, int32_t length
, void *outData
,
379 UErrorCode
*pErrorCode
) {
380 const UDataInfo
*pInfo
;
383 const uint8_t *inBytes
;
386 const InverseUCATableHeader
*inHeader
;
387 InverseUCATableHeader
*outHeader
;
388 InverseUCATableHeader header
={ 0,0,0,0,0,{0,0,0,0},{0,0,0,0,0,0,0,0} };
390 /* udata_swapDataHeader checks the arguments */
391 headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, pErrorCode
);
392 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
396 /* check data format and format version */
397 pInfo
=(const UDataInfo
*)((const char *)inData
+4);
399 pInfo
->dataFormat
[0]==0x49 && /* dataFormat="InvC" */
400 pInfo
->dataFormat
[1]==0x6e &&
401 pInfo
->dataFormat
[2]==0x76 &&
402 pInfo
->dataFormat
[3]==0x43 &&
403 pInfo
->formatVersion
[0]==2 &&
404 pInfo
->formatVersion
[1]>=1
406 udata_printError(ds
, "ucol_swapInverseUCA(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not an inverse UCA collation file\n",
407 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
408 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
409 pInfo
->formatVersion
[0], pInfo
->formatVersion
[1]);
410 *pErrorCode
=U_UNSUPPORTED_ERROR
;
414 inBytes
=(const uint8_t *)inData
+headerSize
;
415 outBytes
=(uint8_t *)outData
+headerSize
;
417 inHeader
=(const InverseUCATableHeader
*)inBytes
;
418 outHeader
=(InverseUCATableHeader
*)outBytes
;
421 * The inverse UCA collation binary must contain at least the InverseUCATableHeader,
422 * starting with its size field.
423 * sizeof(UCATableHeader)==8*4 in ICU 2.8
424 * check the length against the header size before reading the size field
427 header
.byteSize
=udata_readInt32(ds
, inHeader
->byteSize
);
429 ((length
-headerSize
)<(8*4) ||
430 (uint32_t)(length
-headerSize
)<(header
.byteSize
=udata_readInt32(ds
, inHeader
->byteSize
)))
432 udata_printError(ds
, "ucol_swapInverseUCA(): too few bytes (%d after header) for inverse UCA collation data\n",
434 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
439 /* copy everything, takes care of data that needs no swapping */
440 if(inBytes
!=outBytes
) {
441 uprv_memcpy(outBytes
, inBytes
, header
.byteSize
);
444 /* swap the necessary pieces in the order of their occurrence in the data */
446 /* read more of the InverseUCATableHeader (the byteSize field was read above) */
447 header
.tableSize
= ds
->readUInt32(inHeader
->tableSize
);
448 header
.contsSize
= ds
->readUInt32(inHeader
->contsSize
);
449 header
.table
= ds
->readUInt32(inHeader
->table
);
450 header
.conts
= ds
->readUInt32(inHeader
->conts
);
452 /* swap the 32-bit integers in the header */
453 ds
->swapArray32(ds
, inHeader
, 5*4, outHeader
, pErrorCode
);
455 /* swap the inverse table; tableSize counts uint32_t[3] rows */
456 ds
->swapArray32(ds
, inBytes
+header
.table
, header
.tableSize
*3*4,
457 outBytes
+header
.table
, pErrorCode
);
459 /* swap the continuation table; contsSize counts UChars */
460 ds
->swapArray16(ds
, inBytes
+header
.conts
, header
.contsSize
*U_SIZEOF_UCHAR
,
461 outBytes
+header
.conts
, pErrorCode
);
464 return headerSize
+header
.byteSize
;
467 #endif /* #if !UCONFIG_NO_COLLATION */