2 *******************************************************************************
4 * Copyright (C) 2003, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: ucol_swp.c
10 * tab size: 8 (not used)
13 * created on: 2003sep10
14 * created by: Markus W. Scherer
16 * Swap collation binaries.
19 #include "unicode/utypes.h"
21 #if !UCONFIG_NO_COLLATION
23 #include "unicode/udata.h" /* UDataInfo */
30 /* swap a header-less collation binary, inside a resource bundle or ucadata.icu */
31 U_CAPI
int32_t U_EXPORT2
32 ucol_swapBinary(const UDataSwapper
*ds
,
33 const void *inData
, int32_t length
, void *outData
,
34 UErrorCode
*pErrorCode
) {
35 const uint8_t *inBytes
;
38 const UCATableHeader
*inHeader
;
39 UCATableHeader
*outHeader
;
40 UCATableHeader header
={ 0 };
44 /* argument checking in case we were not called from ucol_swap() */
45 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
48 if(ds
==NULL
|| inData
==NULL
|| length
<-1 || (length
>0 && outData
==NULL
)) {
49 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
53 inBytes
=(const uint8_t *)inData
;
54 outBytes
=(uint8_t *)outData
;
56 inHeader
=(const UCATableHeader
*)inData
;
57 outHeader
=(UCATableHeader
*)outData
;
60 * The collation binary must contain at least the UCATableHeader,
61 * starting with its size field.
62 * sizeof(UCATableHeader)==42*4 in ICU 2.8
63 * check the length against the header size before reading the size field
66 header
.size
=udata_readInt32(ds
, inHeader
->size
);
67 } else if((length
<(42*4) || length
<(header
.size
=udata_readInt32(ds
, inHeader
->size
)))) {
68 udata_printError(ds
, "ucol_swapBinary(): too few bytes (%d after header) for collation data\n",
70 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
74 header
.magic
=ds
->readUInt32(inHeader
->magic
);
76 header
.magic
==UCOL_HEADER_MAGIC
&&
77 inHeader
->formatVersion
[0]==2 &&
78 inHeader
->formatVersion
[1]>=3
80 udata_printError(ds
, "ucol_swapBinary(): magic 0x%08x or format version %02x.%02x is not a collation binary\n",
82 inHeader
->formatVersion
[0], inHeader
->formatVersion
[1]);
83 *pErrorCode
=U_UNSUPPORTED_ERROR
;
87 if(inHeader
->isBigEndian
!=ds
->inIsBigEndian
|| inHeader
->charSetFamily
!=ds
->inCharset
) {
88 udata_printError(ds
, "ucol_swapBinary(): endianness %d or charset %d does not match the swapper\n",
89 inHeader
->isBigEndian
, inHeader
->charSetFamily
);
90 *pErrorCode
=U_INVALID_FORMAT_ERROR
;
95 /* copy everything, takes care of data that needs no swapping */
96 if(inBytes
!=outBytes
) {
97 uprv_memcpy(outBytes
, inBytes
, header
.size
);
100 /* swap the necessary pieces in the order of their occurrence in the data */
102 /* read more of the UCATableHeader (the size field was read above) */
103 header
.options
= ds
->readUInt32(inHeader
->options
);
104 header
.UCAConsts
= ds
->readUInt32(inHeader
->UCAConsts
);
105 header
.contractionUCACombos
= ds
->readUInt32(inHeader
->contractionUCACombos
);
106 header
.mappingPosition
= ds
->readUInt32(inHeader
->mappingPosition
);
107 header
.expansion
= ds
->readUInt32(inHeader
->expansion
);
108 header
.contractionIndex
= ds
->readUInt32(inHeader
->contractionIndex
);
109 header
.contractionCEs
= ds
->readUInt32(inHeader
->contractionCEs
);
110 header
.contractionSize
= ds
->readUInt32(inHeader
->contractionSize
);
111 header
.endExpansionCE
= ds
->readUInt32(inHeader
->endExpansionCE
);
112 header
.expansionCESize
= ds
->readUInt32(inHeader
->expansionCESize
);
113 header
.endExpansionCECount
= udata_readInt32(ds
, inHeader
->endExpansionCECount
);
114 header
.contractionUCACombosSize
=udata_readInt32(ds
, inHeader
->contractionUCACombosSize
);
116 /* swap the 32-bit integers in the header */
117 ds
->swapArray32(ds
, inHeader
, (int32_t)((const char *)&inHeader
->jamoSpecial
-(const char *)inHeader
),
118 outHeader
, pErrorCode
);
120 /* set the output platform properties */
121 outHeader
->isBigEndian
=ds
->outIsBigEndian
;
122 outHeader
->charSetFamily
=ds
->outCharset
;
124 /* swap the options */
125 if(header
.options
!=0) {
126 ds
->swapArray32(ds
, inBytes
+header
.options
, header
.expansion
-header
.options
,
127 outBytes
+header
.options
, pErrorCode
);
130 /* swap the expansions */
131 if(header
.mappingPosition
!=0 && header
.expansion
!=0) {
132 if(header
.contractionIndex
!=0) {
133 /* expansions bounded by contractions */
134 count
=header
.contractionIndex
-header
.expansion
;
136 /* no contractions: expansions bounded by the main trie */
137 count
=header
.mappingPosition
-header
.expansion
;
139 ds
->swapArray32(ds
, inBytes
+header
.expansion
, (int32_t)count
,
140 outBytes
+header
.expansion
, pErrorCode
);
143 /* swap the contractions */
144 if(header
.contractionSize
!=0) {
145 /* contractionIndex: UChar[] */
146 ds
->swapArray16(ds
, inBytes
+header
.contractionIndex
, header
.contractionSize
*2,
147 outBytes
+header
.contractionIndex
, pErrorCode
);
149 /* contractionCEs: CEs[] */
150 ds
->swapArray32(ds
, inBytes
+header
.contractionCEs
, header
.contractionSize
*4,
151 outBytes
+header
.contractionCEs
, pErrorCode
);
154 /* swap the main trie */
155 if(header
.mappingPosition
!=0) {
156 count
=header
.endExpansionCE
-header
.mappingPosition
;
157 utrie_swap(ds
, inBytes
+header
.mappingPosition
, (int32_t)count
,
158 outBytes
+header
.mappingPosition
, pErrorCode
);
161 /* swap the max expansion table */
162 if(header
.endExpansionCECount
!=0) {
163 ds
->swapArray32(ds
, inBytes
+header
.endExpansionCE
, header
.endExpansionCECount
*4,
164 outBytes
+header
.endExpansionCE
, pErrorCode
);
167 /* expansionCESize, unsafeCP, contrEndCP: uint8_t[], no need to swap */
169 /* swap UCA constants */
170 if(header
.UCAConsts
!=0) {
172 * if UCAConsts!=0 then contractionUCACombos because we are swapping
173 * the UCA data file, and we know that the UCA contains contractions
175 count
=header
.contractionUCACombos
-header
.UCAConsts
;
176 ds
->swapArray32(ds
, inBytes
+header
.UCAConsts
, header
.contractionUCACombos
-header
.UCAConsts
,
177 outBytes
+header
.UCAConsts
, pErrorCode
);
180 /* swap UCA contractions */
181 if(header
.contractionUCACombosSize
!=0) {
182 count
=header
.contractionUCACombosSize
*inHeader
->contractionUCACombosWidth
*U_SIZEOF_UCHAR
;
183 ds
->swapArray16(ds
, inBytes
+header
.contractionUCACombos
, (int32_t)count
,
184 outBytes
+header
.contractionUCACombos
, pErrorCode
);
191 /* swap ICU collation data like ucadata.icu */
192 U_CAPI
int32_t U_EXPORT2
193 ucol_swap(const UDataSwapper
*ds
,
194 const void *inData
, int32_t length
, void *outData
,
195 UErrorCode
*pErrorCode
) {
196 const UDataInfo
*pInfo
;
197 int32_t headerSize
, collationSize
;
199 /* udata_swapDataHeader checks the arguments */
200 headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, pErrorCode
);
201 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
205 /* check data format and format version */
206 pInfo
=(const UDataInfo
*)((const char *)inData
+4);
208 pInfo
->dataFormat
[0]==0x55 && /* dataFormat="UCol" */
209 pInfo
->dataFormat
[1]==0x43 &&
210 pInfo
->dataFormat
[2]==0x6f &&
211 pInfo
->dataFormat
[3]==0x6c &&
212 pInfo
->formatVersion
[0]==2 &&
213 pInfo
->formatVersion
[1]>=3
215 udata_printError(ds
, "ucol_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not a collation file\n",
216 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
217 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
218 pInfo
->formatVersion
[0], pInfo
->formatVersion
[1]);
219 *pErrorCode
=U_UNSUPPORTED_ERROR
;
223 collationSize
=ucol_swapBinary(ds
,
224 (const char *)inData
+headerSize
,
225 length
>=0 ? length
-headerSize
: -1,
226 (char *)outData
+headerSize
,
228 if(U_SUCCESS(*pErrorCode
)) {
229 return headerSize
+collationSize
;
235 /* swap inverse UCA collation data (invuca.icu) */
236 U_CAPI
int32_t U_EXPORT2
237 ucol_swapInverseUCA(const UDataSwapper
*ds
,
238 const void *inData
, int32_t length
, void *outData
,
239 UErrorCode
*pErrorCode
) {
240 const UDataInfo
*pInfo
;
243 const uint8_t *inBytes
;
246 const InverseUCATableHeader
*inHeader
;
247 InverseUCATableHeader
*outHeader
;
248 InverseUCATableHeader header
={ 0 };
250 /* udata_swapDataHeader checks the arguments */
251 headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, pErrorCode
);
252 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
256 /* check data format and format version */
257 pInfo
=(const UDataInfo
*)((const char *)inData
+4);
259 pInfo
->dataFormat
[0]==0x49 && /* dataFormat="InvC" */
260 pInfo
->dataFormat
[1]==0x6e &&
261 pInfo
->dataFormat
[2]==0x76 &&
262 pInfo
->dataFormat
[3]==0x43 &&
263 pInfo
->formatVersion
[0]==2 &&
264 pInfo
->formatVersion
[1]>=1
266 udata_printError(ds
, "ucol_swapInverseUCA(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not an inverse UCA collation file\n",
267 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
268 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
269 pInfo
->formatVersion
[0], pInfo
->formatVersion
[1]);
270 *pErrorCode
=U_UNSUPPORTED_ERROR
;
274 inBytes
=(const uint8_t *)inData
+headerSize
;
275 outBytes
=(uint8_t *)outData
+headerSize
;
277 inHeader
=(const InverseUCATableHeader
*)inBytes
;
278 outHeader
=(InverseUCATableHeader
*)outBytes
;
281 * The inverse UCA collation binary must contain at least the InverseUCATableHeader,
282 * starting with its size field.
283 * sizeof(UCATableHeader)==8*4 in ICU 2.8
284 * check the length against the header size before reading the size field
287 header
.byteSize
=udata_readInt32(ds
, inHeader
->byteSize
);
289 ((length
-headerSize
)<(8*4) ||
290 (uint32_t)(length
-headerSize
)<(header
.byteSize
=udata_readInt32(ds
, inHeader
->byteSize
)))
292 udata_printError(ds
, "ucol_swapInverseUCA(): too few bytes (%d after header) for inverse UCA collation data\n",
294 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
299 /* copy everything, takes care of data that needs no swapping */
300 if(inBytes
!=outBytes
) {
301 uprv_memcpy(outBytes
, inBytes
, header
.byteSize
);
304 /* swap the necessary pieces in the order of their occurrence in the data */
306 /* read more of the InverseUCATableHeader (the byteSize field was read above) */
307 header
.tableSize
= ds
->readUInt32(inHeader
->tableSize
);
308 header
.contsSize
= ds
->readUInt32(inHeader
->contsSize
);
309 header
.table
= ds
->readUInt32(inHeader
->table
);
310 header
.conts
= ds
->readUInt32(inHeader
->conts
);
312 /* swap the 32-bit integers in the header */
313 ds
->swapArray32(ds
, inHeader
, 5*4, outHeader
, pErrorCode
);
315 /* swap the inverse table; tableSize counts uint32_t[3] rows */
316 ds
->swapArray32(ds
, inBytes
+header
.table
, header
.tableSize
*3*4,
317 outBytes
+header
.table
, pErrorCode
);
319 /* swap the continuation table; contsSize counts UChars */
320 ds
->swapArray16(ds
, inBytes
+header
.conts
, header
.contsSize
*U_SIZEOF_UCHAR
,
321 outBytes
+header
.conts
, pErrorCode
);
324 return headerSize
+header
.byteSize
;
327 #endif /* #if !UCONFIG_NO_COLLATION */