2 *******************************************************************************
4 * Copyright (C) 2003-2006, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: ucol_swp.c
10 * tab size: 8 (not used)
13 * created on: 2003sep10
14 * created by: Markus W. Scherer
16 * Swap collation binaries.
19 #include "unicode/udata.h" /* UDataInfo */
26 /* swapping ----------------------------------------------------------------- */
29 * This performs data swapping for a folded trie (see utrie.c for details).
32 U_CAPI
int32_t U_EXPORT2
33 utrie_swap(const UDataSwapper
*ds
,
34 const void *inData
, int32_t length
, void *outData
,
35 UErrorCode
*pErrorCode
) {
36 const UTrieHeader
*inTrie
;
41 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
44 if(ds
==NULL
|| inData
==NULL
|| (length
>=0 && outData
==NULL
)) {
45 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
49 /* setup and swapping */
50 if(length
>=0 && length
<sizeof(UTrieHeader
)) {
51 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
55 inTrie
=(const UTrieHeader
*)inData
;
56 trie
.signature
=ds
->readUInt32(inTrie
->signature
);
57 trie
.options
=ds
->readUInt32(inTrie
->options
);
58 trie
.indexLength
=udata_readInt32(ds
, inTrie
->indexLength
);
59 trie
.dataLength
=udata_readInt32(ds
, inTrie
->dataLength
);
61 if( trie
.signature
!=0x54726965 ||
62 (trie
.options
&UTRIE_OPTIONS_SHIFT_MASK
)!=UTRIE_SHIFT
||
63 ((trie
.options
>>UTRIE_OPTIONS_INDEX_SHIFT
)&UTRIE_OPTIONS_SHIFT_MASK
)!=UTRIE_INDEX_SHIFT
||
64 trie
.indexLength
<UTRIE_BMP_INDEX_LENGTH
||
65 (trie
.indexLength
&(UTRIE_SURROGATE_BLOCK_COUNT
-1))!=0 ||
66 trie
.dataLength
<UTRIE_DATA_BLOCK_LENGTH
||
67 (trie
.dataLength
&(UTRIE_DATA_GRANULARITY
-1))!=0 ||
68 ((trie
.options
&UTRIE_OPTIONS_LATIN1_IS_LINEAR
)!=0 && trie
.dataLength
<(UTRIE_DATA_BLOCK_LENGTH
+0x100))
70 *pErrorCode
=U_INVALID_FORMAT_ERROR
; /* not a UTrie */
74 dataIs32
=(UBool
)((trie
.options
&UTRIE_OPTIONS_DATA_IS_32_BIT
)!=0);
75 size
=sizeof(UTrieHeader
)+trie
.indexLength
*2+trie
.dataLength
*(dataIs32
?4:2);
81 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
85 outTrie
=(UTrieHeader
*)outData
;
88 ds
->swapArray32(ds
, inTrie
, sizeof(UTrieHeader
), outTrie
, pErrorCode
);
90 /* swap the index and the data */
92 ds
->swapArray16(ds
, inTrie
+1, trie
.indexLength
*2, outTrie
+1, pErrorCode
);
93 ds
->swapArray32(ds
, (const uint16_t *)(inTrie
+1)+trie
.indexLength
, trie
.dataLength
*4,
94 (uint16_t *)(outTrie
+1)+trie
.indexLength
, pErrorCode
);
96 ds
->swapArray16(ds
, inTrie
+1, (trie
.indexLength
+trie
.dataLength
)*2, outTrie
+1, pErrorCode
);
103 #if !UCONFIG_NO_COLLATION
105 /* swap a header-less collation binary, inside a resource bundle or ucadata.icu */
106 U_CAPI
int32_t U_EXPORT2
107 ucol_swapBinary(const UDataSwapper
*ds
,
108 const void *inData
, int32_t length
, void *outData
,
109 UErrorCode
*pErrorCode
) {
110 const uint8_t *inBytes
;
113 const UCATableHeader
*inHeader
;
114 UCATableHeader
*outHeader
;
115 UCATableHeader header
={ 0 };
119 /* argument checking in case we were not called from ucol_swap() */
120 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
123 if(ds
==NULL
|| inData
==NULL
|| length
<-1 || (length
>0 && outData
==NULL
)) {
124 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
128 inBytes
=(const uint8_t *)inData
;
129 outBytes
=(uint8_t *)outData
;
131 inHeader
=(const UCATableHeader
*)inData
;
132 outHeader
=(UCATableHeader
*)outData
;
135 * The collation binary must contain at least the UCATableHeader,
136 * starting with its size field.
137 * sizeof(UCATableHeader)==42*4 in ICU 2.8
138 * check the length against the header size before reading the size field
141 header
.size
=udata_readInt32(ds
, inHeader
->size
);
142 } else if((length
<(42*4) || length
<(header
.size
=udata_readInt32(ds
, inHeader
->size
)))) {
143 udata_printError(ds
, "ucol_swapBinary(): too few bytes (%d after header) for collation data\n",
145 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
149 header
.magic
=ds
->readUInt32(inHeader
->magic
);
151 header
.magic
==UCOL_HEADER_MAGIC
&&
152 inHeader
->formatVersion
[0]==2 &&
153 inHeader
->formatVersion
[1]>=3
155 udata_printError(ds
, "ucol_swapBinary(): magic 0x%08x or format version %02x.%02x is not a collation binary\n",
157 inHeader
->formatVersion
[0], inHeader
->formatVersion
[1]);
158 *pErrorCode
=U_UNSUPPORTED_ERROR
;
162 if(inHeader
->isBigEndian
!=ds
->inIsBigEndian
|| inHeader
->charSetFamily
!=ds
->inCharset
) {
163 udata_printError(ds
, "ucol_swapBinary(): endianness %d or charset %d does not match the swapper\n",
164 inHeader
->isBigEndian
, inHeader
->charSetFamily
);
165 *pErrorCode
=U_INVALID_FORMAT_ERROR
;
170 /* copy everything, takes care of data that needs no swapping */
171 if(inBytes
!=outBytes
) {
172 uprv_memcpy(outBytes
, inBytes
, header
.size
);
175 /* swap the necessary pieces in the order of their occurrence in the data */
177 /* read more of the UCATableHeader (the size field was read above) */
178 header
.options
= ds
->readUInt32(inHeader
->options
);
179 header
.UCAConsts
= ds
->readUInt32(inHeader
->UCAConsts
);
180 header
.contractionUCACombos
= ds
->readUInt32(inHeader
->contractionUCACombos
);
181 header
.mappingPosition
= ds
->readUInt32(inHeader
->mappingPosition
);
182 header
.expansion
= ds
->readUInt32(inHeader
->expansion
);
183 header
.contractionIndex
= ds
->readUInt32(inHeader
->contractionIndex
);
184 header
.contractionCEs
= ds
->readUInt32(inHeader
->contractionCEs
);
185 header
.contractionSize
= ds
->readUInt32(inHeader
->contractionSize
);
186 header
.endExpansionCE
= ds
->readUInt32(inHeader
->endExpansionCE
);
187 header
.expansionCESize
= ds
->readUInt32(inHeader
->expansionCESize
);
188 header
.endExpansionCECount
= udata_readInt32(ds
, inHeader
->endExpansionCECount
);
189 header
.contractionUCACombosSize
=udata_readInt32(ds
, inHeader
->contractionUCACombosSize
);
191 /* swap the 32-bit integers in the header */
192 ds
->swapArray32(ds
, inHeader
, (int32_t)((const char *)&inHeader
->jamoSpecial
-(const char *)inHeader
),
193 outHeader
, pErrorCode
);
195 /* set the output platform properties */
196 outHeader
->isBigEndian
=ds
->outIsBigEndian
;
197 outHeader
->charSetFamily
=ds
->outCharset
;
199 /* swap the options */
200 if(header
.options
!=0) {
201 ds
->swapArray32(ds
, inBytes
+header
.options
, header
.expansion
-header
.options
,
202 outBytes
+header
.options
, pErrorCode
);
205 /* swap the expansions */
206 if(header
.mappingPosition
!=0 && header
.expansion
!=0) {
207 if(header
.contractionIndex
!=0) {
208 /* expansions bounded by contractions */
209 count
=header
.contractionIndex
-header
.expansion
;
211 /* no contractions: expansions bounded by the main trie */
212 count
=header
.mappingPosition
-header
.expansion
;
214 ds
->swapArray32(ds
, inBytes
+header
.expansion
, (int32_t)count
,
215 outBytes
+header
.expansion
, pErrorCode
);
218 /* swap the contractions */
219 if(header
.contractionSize
!=0) {
220 /* contractionIndex: UChar[] */
221 ds
->swapArray16(ds
, inBytes
+header
.contractionIndex
, header
.contractionSize
*2,
222 outBytes
+header
.contractionIndex
, pErrorCode
);
224 /* contractionCEs: CEs[] */
225 ds
->swapArray32(ds
, inBytes
+header
.contractionCEs
, header
.contractionSize
*4,
226 outBytes
+header
.contractionCEs
, pErrorCode
);
229 /* swap the main trie */
230 if(header
.mappingPosition
!=0) {
231 count
=header
.endExpansionCE
-header
.mappingPosition
;
232 utrie_swap(ds
, inBytes
+header
.mappingPosition
, (int32_t)count
,
233 outBytes
+header
.mappingPosition
, pErrorCode
);
236 /* swap the max expansion table */
237 if(header
.endExpansionCECount
!=0) {
238 ds
->swapArray32(ds
, inBytes
+header
.endExpansionCE
, header
.endExpansionCECount
*4,
239 outBytes
+header
.endExpansionCE
, pErrorCode
);
242 /* expansionCESize, unsafeCP, contrEndCP: uint8_t[], no need to swap */
244 /* swap UCA constants */
245 if(header
.UCAConsts
!=0) {
247 * if UCAConsts!=0 then contractionUCACombos because we are swapping
248 * the UCA data file, and we know that the UCA contains contractions
250 count
=header
.contractionUCACombos
-header
.UCAConsts
;
251 ds
->swapArray32(ds
, inBytes
+header
.UCAConsts
, header
.contractionUCACombos
-header
.UCAConsts
,
252 outBytes
+header
.UCAConsts
, pErrorCode
);
255 /* swap UCA contractions */
256 if(header
.contractionUCACombosSize
!=0) {
257 count
=header
.contractionUCACombosSize
*inHeader
->contractionUCACombosWidth
*U_SIZEOF_UCHAR
;
258 ds
->swapArray16(ds
, inBytes
+header
.contractionUCACombos
, (int32_t)count
,
259 outBytes
+header
.contractionUCACombos
, pErrorCode
);
266 /* swap ICU collation data like ucadata.icu */
267 U_CAPI
int32_t U_EXPORT2
268 ucol_swap(const UDataSwapper
*ds
,
269 const void *inData
, int32_t length
, void *outData
,
270 UErrorCode
*pErrorCode
) {
271 const UDataInfo
*pInfo
;
272 int32_t headerSize
, collationSize
;
274 /* udata_swapDataHeader checks the arguments */
275 headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, pErrorCode
);
276 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
280 /* check data format and format version */
281 pInfo
=(const UDataInfo
*)((const char *)inData
+4);
283 pInfo
->dataFormat
[0]==0x55 && /* dataFormat="UCol" */
284 pInfo
->dataFormat
[1]==0x43 &&
285 pInfo
->dataFormat
[2]==0x6f &&
286 pInfo
->dataFormat
[3]==0x6c &&
287 pInfo
->formatVersion
[0]==2 &&
288 pInfo
->formatVersion
[1]>=3
290 udata_printError(ds
, "ucol_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not a collation file\n",
291 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
292 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
293 pInfo
->formatVersion
[0], pInfo
->formatVersion
[1]);
294 *pErrorCode
=U_UNSUPPORTED_ERROR
;
298 collationSize
=ucol_swapBinary(ds
,
299 (const char *)inData
+headerSize
,
300 length
>=0 ? length
-headerSize
: -1,
301 (char *)outData
+headerSize
,
303 if(U_SUCCESS(*pErrorCode
)) {
304 return headerSize
+collationSize
;
310 /* swap inverse UCA collation data (invuca.icu) */
311 U_CAPI
int32_t U_EXPORT2
312 ucol_swapInverseUCA(const UDataSwapper
*ds
,
313 const void *inData
, int32_t length
, void *outData
,
314 UErrorCode
*pErrorCode
) {
315 const UDataInfo
*pInfo
;
318 const uint8_t *inBytes
;
321 const InverseUCATableHeader
*inHeader
;
322 InverseUCATableHeader
*outHeader
;
323 InverseUCATableHeader header
={ 0 };
325 /* udata_swapDataHeader checks the arguments */
326 headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, pErrorCode
);
327 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
331 /* check data format and format version */
332 pInfo
=(const UDataInfo
*)((const char *)inData
+4);
334 pInfo
->dataFormat
[0]==0x49 && /* dataFormat="InvC" */
335 pInfo
->dataFormat
[1]==0x6e &&
336 pInfo
->dataFormat
[2]==0x76 &&
337 pInfo
->dataFormat
[3]==0x43 &&
338 pInfo
->formatVersion
[0]==2 &&
339 pInfo
->formatVersion
[1]>=1
341 udata_printError(ds
, "ucol_swapInverseUCA(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not an inverse UCA collation file\n",
342 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
343 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
344 pInfo
->formatVersion
[0], pInfo
->formatVersion
[1]);
345 *pErrorCode
=U_UNSUPPORTED_ERROR
;
349 inBytes
=(const uint8_t *)inData
+headerSize
;
350 outBytes
=(uint8_t *)outData
+headerSize
;
352 inHeader
=(const InverseUCATableHeader
*)inBytes
;
353 outHeader
=(InverseUCATableHeader
*)outBytes
;
356 * The inverse UCA collation binary must contain at least the InverseUCATableHeader,
357 * starting with its size field.
358 * sizeof(UCATableHeader)==8*4 in ICU 2.8
359 * check the length against the header size before reading the size field
362 header
.byteSize
=udata_readInt32(ds
, inHeader
->byteSize
);
364 ((length
-headerSize
)<(8*4) ||
365 (uint32_t)(length
-headerSize
)<(header
.byteSize
=udata_readInt32(ds
, inHeader
->byteSize
)))
367 udata_printError(ds
, "ucol_swapInverseUCA(): too few bytes (%d after header) for inverse UCA collation data\n",
369 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
374 /* copy everything, takes care of data that needs no swapping */
375 if(inBytes
!=outBytes
) {
376 uprv_memcpy(outBytes
, inBytes
, header
.byteSize
);
379 /* swap the necessary pieces in the order of their occurrence in the data */
381 /* read more of the InverseUCATableHeader (the byteSize field was read above) */
382 header
.tableSize
= ds
->readUInt32(inHeader
->tableSize
);
383 header
.contsSize
= ds
->readUInt32(inHeader
->contsSize
);
384 header
.table
= ds
->readUInt32(inHeader
->table
);
385 header
.conts
= ds
->readUInt32(inHeader
->conts
);
387 /* swap the 32-bit integers in the header */
388 ds
->swapArray32(ds
, inHeader
, 5*4, outHeader
, pErrorCode
);
390 /* swap the inverse table; tableSize counts uint32_t[3] rows */
391 ds
->swapArray32(ds
, inBytes
+header
.table
, header
.tableSize
*3*4,
392 outBytes
+header
.table
, pErrorCode
);
394 /* swap the continuation table; contsSize counts UChars */
395 ds
->swapArray16(ds
, inBytes
+header
.conts
, header
.contsSize
*U_SIZEOF_UCHAR
,
396 outBytes
+header
.conts
, pErrorCode
);
399 return headerSize
+header
.byteSize
;
402 #endif /* #if !UCONFIG_NO_COLLATION */