]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/ucol_swp.c
ICU-6.2.22.tar.gz
[apple/icu.git] / icuSources / common / ucol_swp.c
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2003, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: ucol_swp.c
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2003sep10
14 * created by: Markus W. Scherer
15 *
16 * Swap collation binaries.
17 */
18
19 #include "unicode/utypes.h"
20
21 #if !UCONFIG_NO_COLLATION
22
23 #include "unicode/udata.h" /* UDataInfo */
24 #include "cmemory.h"
25 #include "utrie.h"
26 #include "udataswp.h"
27 #include "ucol_imp.h"
28 #include "ucol_swp.h"
29
30 /* swap a header-less collation binary, inside a resource bundle or ucadata.icu */
31 U_CAPI int32_t U_EXPORT2
32 ucol_swapBinary(const UDataSwapper *ds,
33 const void *inData, int32_t length, void *outData,
34 UErrorCode *pErrorCode) {
35 const uint8_t *inBytes;
36 uint8_t *outBytes;
37
38 const UCATableHeader *inHeader;
39 UCATableHeader *outHeader;
40 UCATableHeader header={ 0 };
41
42 uint32_t count;
43
44 /* argument checking in case we were not called from ucol_swap() */
45 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
46 return 0;
47 }
48 if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {
49 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
50 return 0;
51 }
52
53 inBytes=(const uint8_t *)inData;
54 outBytes=(uint8_t *)outData;
55
56 inHeader=(const UCATableHeader *)inData;
57 outHeader=(UCATableHeader *)outData;
58
59 /*
60 * The collation binary must contain at least the UCATableHeader,
61 * starting with its size field.
62 * sizeof(UCATableHeader)==42*4 in ICU 2.8
63 * check the length against the header size before reading the size field
64 */
65 if(length<0) {
66 header.size=udata_readInt32(ds, inHeader->size);
67 } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) {
68 udata_printError(ds, "ucol_swapBinary(): too few bytes (%d after header) for collation data\n",
69 length);
70 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
71 return 0;
72 }
73
74 header.magic=ds->readUInt32(inHeader->magic);
75 if(!(
76 header.magic==UCOL_HEADER_MAGIC &&
77 inHeader->formatVersion[0]==2 &&
78 inHeader->formatVersion[1]>=3
79 )) {
80 udata_printError(ds, "ucol_swapBinary(): magic 0x%08x or format version %02x.%02x is not a collation binary\n",
81 header.magic,
82 inHeader->formatVersion[0], inHeader->formatVersion[1]);
83 *pErrorCode=U_UNSUPPORTED_ERROR;
84 return 0;
85 }
86
87 if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) {
88 udata_printError(ds, "ucol_swapBinary(): endianness %d or charset %d does not match the swapper\n",
89 inHeader->isBigEndian, inHeader->charSetFamily);
90 *pErrorCode=U_INVALID_FORMAT_ERROR;
91 return 0;
92 }
93
94 if(length>=0) {
95 /* copy everything, takes care of data that needs no swapping */
96 if(inBytes!=outBytes) {
97 uprv_memcpy(outBytes, inBytes, header.size);
98 }
99
100 /* swap the necessary pieces in the order of their occurrence in the data */
101
102 /* read more of the UCATableHeader (the size field was read above) */
103 header.options= ds->readUInt32(inHeader->options);
104 header.UCAConsts= ds->readUInt32(inHeader->UCAConsts);
105 header.contractionUCACombos= ds->readUInt32(inHeader->contractionUCACombos);
106 header.mappingPosition= ds->readUInt32(inHeader->mappingPosition);
107 header.expansion= ds->readUInt32(inHeader->expansion);
108 header.contractionIndex= ds->readUInt32(inHeader->contractionIndex);
109 header.contractionCEs= ds->readUInt32(inHeader->contractionCEs);
110 header.contractionSize= ds->readUInt32(inHeader->contractionSize);
111 header.endExpansionCE= ds->readUInt32(inHeader->endExpansionCE);
112 header.expansionCESize= ds->readUInt32(inHeader->expansionCESize);
113 header.endExpansionCECount= udata_readInt32(ds, inHeader->endExpansionCECount);
114 header.contractionUCACombosSize=udata_readInt32(ds, inHeader->contractionUCACombosSize);
115
116 /* swap the 32-bit integers in the header */
117 ds->swapArray32(ds, inHeader, (int32_t)((const char *)&inHeader->jamoSpecial-(const char *)inHeader),
118 outHeader, pErrorCode);
119
120 /* set the output platform properties */
121 outHeader->isBigEndian=ds->outIsBigEndian;
122 outHeader->charSetFamily=ds->outCharset;
123
124 /* swap the options */
125 if(header.options!=0) {
126 ds->swapArray32(ds, inBytes+header.options, header.expansion-header.options,
127 outBytes+header.options, pErrorCode);
128 }
129
130 /* swap the expansions */
131 if(header.mappingPosition!=0 && header.expansion!=0) {
132 if(header.contractionIndex!=0) {
133 /* expansions bounded by contractions */
134 count=header.contractionIndex-header.expansion;
135 } else {
136 /* no contractions: expansions bounded by the main trie */
137 count=header.mappingPosition-header.expansion;
138 }
139 ds->swapArray32(ds, inBytes+header.expansion, (int32_t)count,
140 outBytes+header.expansion, pErrorCode);
141 }
142
143 /* swap the contractions */
144 if(header.contractionSize!=0) {
145 /* contractionIndex: UChar[] */
146 ds->swapArray16(ds, inBytes+header.contractionIndex, header.contractionSize*2,
147 outBytes+header.contractionIndex, pErrorCode);
148
149 /* contractionCEs: CEs[] */
150 ds->swapArray32(ds, inBytes+header.contractionCEs, header.contractionSize*4,
151 outBytes+header.contractionCEs, pErrorCode);
152 }
153
154 /* swap the main trie */
155 if(header.mappingPosition!=0) {
156 count=header.endExpansionCE-header.mappingPosition;
157 utrie_swap(ds, inBytes+header.mappingPosition, (int32_t)count,
158 outBytes+header.mappingPosition, pErrorCode);
159 }
160
161 /* swap the max expansion table */
162 if(header.endExpansionCECount!=0) {
163 ds->swapArray32(ds, inBytes+header.endExpansionCE, header.endExpansionCECount*4,
164 outBytes+header.endExpansionCE, pErrorCode);
165 }
166
167 /* expansionCESize, unsafeCP, contrEndCP: uint8_t[], no need to swap */
168
169 /* swap UCA constants */
170 if(header.UCAConsts!=0) {
171 /*
172 * if UCAConsts!=0 then contractionUCACombos because we are swapping
173 * the UCA data file, and we know that the UCA contains contractions
174 */
175 count=header.contractionUCACombos-header.UCAConsts;
176 ds->swapArray32(ds, inBytes+header.UCAConsts, header.contractionUCACombos-header.UCAConsts,
177 outBytes+header.UCAConsts, pErrorCode);
178 }
179
180 /* swap UCA contractions */
181 if(header.contractionUCACombosSize!=0) {
182 count=header.contractionUCACombosSize*inHeader->contractionUCACombosWidth*U_SIZEOF_UCHAR;
183 ds->swapArray16(ds, inBytes+header.contractionUCACombos, (int32_t)count,
184 outBytes+header.contractionUCACombos, pErrorCode);
185 }
186 }
187
188 return header.size;
189 }
190
191 /* swap ICU collation data like ucadata.icu */
192 U_CAPI int32_t U_EXPORT2
193 ucol_swap(const UDataSwapper *ds,
194 const void *inData, int32_t length, void *outData,
195 UErrorCode *pErrorCode) {
196 const UDataInfo *pInfo;
197 int32_t headerSize, collationSize;
198
199 /* udata_swapDataHeader checks the arguments */
200 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
201 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
202 return 0;
203 }
204
205 /* check data format and format version */
206 pInfo=(const UDataInfo *)((const char *)inData+4);
207 if(!(
208 pInfo->dataFormat[0]==0x55 && /* dataFormat="UCol" */
209 pInfo->dataFormat[1]==0x43 &&
210 pInfo->dataFormat[2]==0x6f &&
211 pInfo->dataFormat[3]==0x6c &&
212 pInfo->formatVersion[0]==2 &&
213 pInfo->formatVersion[1]>=3
214 )) {
215 udata_printError(ds, "ucol_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not a collation file\n",
216 pInfo->dataFormat[0], pInfo->dataFormat[1],
217 pInfo->dataFormat[2], pInfo->dataFormat[3],
218 pInfo->formatVersion[0], pInfo->formatVersion[1]);
219 *pErrorCode=U_UNSUPPORTED_ERROR;
220 return 0;
221 }
222
223 collationSize=ucol_swapBinary(ds,
224 (const char *)inData+headerSize,
225 length>=0 ? length-headerSize : -1,
226 (char *)outData+headerSize,
227 pErrorCode);
228 if(U_SUCCESS(*pErrorCode)) {
229 return headerSize+collationSize;
230 } else {
231 return 0;
232 }
233 }
234
235 /* swap inverse UCA collation data (invuca.icu) */
236 U_CAPI int32_t U_EXPORT2
237 ucol_swapInverseUCA(const UDataSwapper *ds,
238 const void *inData, int32_t length, void *outData,
239 UErrorCode *pErrorCode) {
240 const UDataInfo *pInfo;
241 int32_t headerSize;
242
243 const uint8_t *inBytes;
244 uint8_t *outBytes;
245
246 const InverseUCATableHeader *inHeader;
247 InverseUCATableHeader *outHeader;
248 InverseUCATableHeader header={ 0 };
249
250 /* udata_swapDataHeader checks the arguments */
251 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
252 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
253 return 0;
254 }
255
256 /* check data format and format version */
257 pInfo=(const UDataInfo *)((const char *)inData+4);
258 if(!(
259 pInfo->dataFormat[0]==0x49 && /* dataFormat="InvC" */
260 pInfo->dataFormat[1]==0x6e &&
261 pInfo->dataFormat[2]==0x76 &&
262 pInfo->dataFormat[3]==0x43 &&
263 pInfo->formatVersion[0]==2 &&
264 pInfo->formatVersion[1]>=1
265 )) {
266 udata_printError(ds, "ucol_swapInverseUCA(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not an inverse UCA collation file\n",
267 pInfo->dataFormat[0], pInfo->dataFormat[1],
268 pInfo->dataFormat[2], pInfo->dataFormat[3],
269 pInfo->formatVersion[0], pInfo->formatVersion[1]);
270 *pErrorCode=U_UNSUPPORTED_ERROR;
271 return 0;
272 }
273
274 inBytes=(const uint8_t *)inData+headerSize;
275 outBytes=(uint8_t *)outData+headerSize;
276
277 inHeader=(const InverseUCATableHeader *)inBytes;
278 outHeader=(InverseUCATableHeader *)outBytes;
279
280 /*
281 * The inverse UCA collation binary must contain at least the InverseUCATableHeader,
282 * starting with its size field.
283 * sizeof(UCATableHeader)==8*4 in ICU 2.8
284 * check the length against the header size before reading the size field
285 */
286 if(length<0) {
287 header.byteSize=udata_readInt32(ds, inHeader->byteSize);
288 } else if(
289 ((length-headerSize)<(8*4) ||
290 (uint32_t)(length-headerSize)<(header.byteSize=udata_readInt32(ds, inHeader->byteSize)))
291 ) {
292 udata_printError(ds, "ucol_swapInverseUCA(): too few bytes (%d after header) for inverse UCA collation data\n",
293 length);
294 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
295 return 0;
296 }
297
298 if(length>=0) {
299 /* copy everything, takes care of data that needs no swapping */
300 if(inBytes!=outBytes) {
301 uprv_memcpy(outBytes, inBytes, header.byteSize);
302 }
303
304 /* swap the necessary pieces in the order of their occurrence in the data */
305
306 /* read more of the InverseUCATableHeader (the byteSize field was read above) */
307 header.tableSize= ds->readUInt32(inHeader->tableSize);
308 header.contsSize= ds->readUInt32(inHeader->contsSize);
309 header.table= ds->readUInt32(inHeader->table);
310 header.conts= ds->readUInt32(inHeader->conts);
311
312 /* swap the 32-bit integers in the header */
313 ds->swapArray32(ds, inHeader, 5*4, outHeader, pErrorCode);
314
315 /* swap the inverse table; tableSize counts uint32_t[3] rows */
316 ds->swapArray32(ds, inBytes+header.table, header.tableSize*3*4,
317 outBytes+header.table, pErrorCode);
318
319 /* swap the continuation table; contsSize counts UChars */
320 ds->swapArray16(ds, inBytes+header.conts, header.contsSize*U_SIZEOF_UCHAR,
321 outBytes+header.conts, pErrorCode);
322 }
323
324 return headerSize+header.byteSize;
325 }
326
327 #endif /* #if !UCONFIG_NO_COLLATION */