2 *******************************************************************************
3 * Copyright (C) 2013-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * collationdatawriter.cpp
8 * created on: 2013aug06
9 * created by: Markus W. Scherer
12 #include "unicode/utypes.h"
14 #if !UCONFIG_NO_COLLATION
16 #include "unicode/tblcoll.h"
17 #include "unicode/udata.h"
18 #include "unicode/uniset.h"
20 #include "collationdata.h"
21 #include "collationdatabuilder.h"
22 #include "collationdatareader.h"
23 #include "collationdatawriter.h"
24 #include "collationfastlatin.h"
25 #include "collationsettings.h"
26 #include "collationtailoring.h"
33 RuleBasedCollator::cloneRuleData(int32_t &length
, UErrorCode
&errorCode
) const {
34 if(U_FAILURE(errorCode
)) { return NULL
; }
35 LocalMemory
<uint8_t> buffer((uint8_t *)uprv_malloc(20000));
37 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
40 length
= cloneBinary(buffer
.getAlias(), 20000, errorCode
);
41 if(errorCode
== U_BUFFER_OVERFLOW_ERROR
) {
42 if(buffer
.allocateInsteadAndCopy(length
, 0) == NULL
) {
43 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
46 errorCode
= U_ZERO_ERROR
;
47 length
= cloneBinary(buffer
.getAlias(), length
, errorCode
);
49 if(U_FAILURE(errorCode
)) { return NULL
; }
50 return buffer
.orphan();
54 RuleBasedCollator::cloneBinary(uint8_t *dest
, int32_t capacity
, UErrorCode
&errorCode
) const {
55 int32_t indexes
[CollationDataReader::IX_TOTAL_SIZE
+ 1];
56 return CollationDataWriter::writeTailoring(
57 *tailoring
, *settings
, indexes
, dest
, capacity
,
61 static const UDataInfo dataInfo
= {
70 { 0x55, 0x43, 0x6f, 0x6c }, // dataFormat="UCol"
71 { 4, 0, 0, 0 }, // formatVersion
72 { 6, 3, 0, 0 } // dataVersion
76 CollationDataWriter::writeBase(const CollationData
&data
, const CollationSettings
&settings
,
77 const void *rootElements
, int32_t rootElementsLength
,
78 int32_t indexes
[], uint8_t *dest
, int32_t capacity
,
79 UErrorCode
&errorCode
) {
80 return write(TRUE
, NULL
,
82 rootElements
, rootElementsLength
,
83 indexes
, dest
, capacity
, errorCode
);
87 CollationDataWriter::writeTailoring(const CollationTailoring
&t
, const CollationSettings
&settings
,
88 int32_t indexes
[], uint8_t *dest
, int32_t capacity
,
89 UErrorCode
&errorCode
) {
90 return write(FALSE
, t
.version
,
93 indexes
, dest
, capacity
, errorCode
);
97 CollationDataWriter::write(UBool isBase
, const UVersionInfo dataVersion
,
98 const CollationData
&data
, const CollationSettings
&settings
,
99 const void *rootElements
, int32_t rootElementsLength
,
100 int32_t indexes
[], uint8_t *dest
, int32_t capacity
,
101 UErrorCode
&errorCode
) {
102 if(U_FAILURE(errorCode
)) { return 0; }
103 if(capacity
< 0 || (capacity
> 0 && dest
== NULL
)) {
104 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
108 // Figure out which data items to write before settling on
109 // the indexes length and writing offsets.
110 // For any data item, we need to write the start and limit offsets,
111 // so the indexes length must be at least index-of-start-offset + 2.
112 int32_t indexesLength
;
114 UnicodeSet unsafeBackwardSet
;
115 const CollationData
*baseData
= data
.base
;
117 int32_t fastLatinVersion
;
118 if(data
.fastLatinTable
!= NULL
) {
119 fastLatinVersion
= (int32_t)CollationFastLatin::VERSION
<< 16;
121 fastLatinVersion
= 0;
123 int32_t fastLatinTableLength
= 0;
126 // For the root collator, we write an even number of indexes
127 // so that we start with an 8-aligned offset.
128 indexesLength
= CollationDataReader::IX_TOTAL_SIZE
+ 1;
129 U_ASSERT(settings
.reorderCodesLength
== 0);
131 unsafeBackwardSet
= *data
.unsafeBackwardSet
;
132 fastLatinTableLength
= data
.fastLatinTableLength
;
133 } else if(baseData
== NULL
) {
135 if(settings
.reorderCodesLength
== 0) {
137 indexesLength
= CollationDataReader::IX_OPTIONS
+ 1; // no limit offset here
139 // only options, reorder codes, and the reorder table
140 indexesLength
= CollationDataReader::IX_REORDER_TABLE_OFFSET
+ 2;
144 // Tailored mappings, and what else?
145 // Check in ascending order of optional tailoring data items.
146 indexesLength
= CollationDataReader::IX_CE32S_OFFSET
+ 2;
147 if(data
.contextsLength
!= 0) {
148 indexesLength
= CollationDataReader::IX_CONTEXTS_OFFSET
+ 2;
150 unsafeBackwardSet
.addAll(*data
.unsafeBackwardSet
).removeAll(*baseData
->unsafeBackwardSet
);
151 if(!unsafeBackwardSet
.isEmpty()) {
152 indexesLength
= CollationDataReader::IX_UNSAFE_BWD_OFFSET
+ 2;
154 if(data
.fastLatinTable
!= baseData
->fastLatinTable
) {
155 fastLatinTableLength
= data
.fastLatinTableLength
;
156 indexesLength
= CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET
+ 2;
162 headerSize
= 0; // udata_create() writes the header
165 header
.dataHeader
.magic1
= 0xda;
166 header
.dataHeader
.magic2
= 0x27;
167 uprv_memcpy(&header
.info
, &dataInfo
, sizeof(UDataInfo
));
168 uprv_memcpy(header
.info
.dataVersion
, dataVersion
, sizeof(UVersionInfo
));
169 headerSize
= (int32_t)sizeof(header
);
170 U_ASSERT((headerSize
& 3) == 0); // multiple of 4 bytes
171 if(hasMappings
&& data
.cesLength
!= 0) {
172 // Sum of the sizes of the data items which are
173 // not automatically multiples of 8 bytes and which are placed before the CEs.
174 int32_t sum
= headerSize
+ (indexesLength
+ settings
.reorderCodesLength
) * 4;
176 // We need to add padding somewhere so that the 64-bit CEs are 8-aligned.
177 // We add to the header size here.
178 // Alternatively, we could increment the indexesLength
179 // or add a few bytes to the reorderTable.
183 header
.dataHeader
.headerSize
= (uint16_t)headerSize
;
184 if(headerSize
<= capacity
) {
185 uprv_memcpy(dest
, &header
, sizeof(header
));
186 // Write 00 bytes so that the padding is not mistaken for a copyright string.
187 uprv_memset(dest
+ sizeof(header
), 0, headerSize
- (int32_t)sizeof(header
));
189 capacity
-= headerSize
;
196 indexes
[CollationDataReader::IX_INDEXES_LENGTH
] = indexesLength
;
197 U_ASSERT((settings
.options
& ~0xffff) == 0);
198 indexes
[CollationDataReader::IX_OPTIONS
] =
199 data
.numericPrimary
| fastLatinVersion
| settings
.options
;
200 indexes
[CollationDataReader::IX_RESERVED2
] = 0;
201 indexes
[CollationDataReader::IX_RESERVED3
] = 0;
203 // Byte offsets of data items all start from the start of the indexes.
204 // We add the headerSize at the very end.
205 int32_t totalSize
= indexesLength
* 4;
207 if(hasMappings
&& (isBase
|| data
.jamoCE32s
!= baseData
->jamoCE32s
)) {
208 indexes
[CollationDataReader::IX_JAMO_CE32S_START
] = data
.jamoCE32s
- data
.ce32s
;
210 indexes
[CollationDataReader::IX_JAMO_CE32S_START
] = -1;
213 indexes
[CollationDataReader::IX_REORDER_CODES_OFFSET
] = totalSize
;
214 totalSize
+= settings
.reorderCodesLength
* 4;
216 indexes
[CollationDataReader::IX_REORDER_TABLE_OFFSET
] = totalSize
;
217 if(settings
.reorderTable
!= NULL
) {
221 indexes
[CollationDataReader::IX_TRIE_OFFSET
] = totalSize
;
223 UErrorCode errorCode2
= U_ZERO_ERROR
;
225 if(totalSize
< capacity
) {
226 length
= utrie2_serialize(data
.trie
, dest
+ totalSize
,
227 capacity
- totalSize
, &errorCode2
);
229 length
= utrie2_serialize(data
.trie
, NULL
, 0, &errorCode2
);
231 if(U_FAILURE(errorCode2
) && errorCode2
!= U_BUFFER_OVERFLOW_ERROR
) {
232 errorCode
= errorCode2
;
235 // The trie size should be a multiple of 8 bytes due to the way
236 // compactIndex2(UNewTrie2 *trie) currently works.
237 U_ASSERT((length
& 7) == 0);
241 indexes
[CollationDataReader::IX_RESERVED8_OFFSET
] = totalSize
;
242 indexes
[CollationDataReader::IX_CES_OFFSET
] = totalSize
;
243 if(hasMappings
&& data
.cesLength
!= 0) {
244 U_ASSERT(((headerSize
+ totalSize
) & 7) == 0);
245 totalSize
+= data
.cesLength
* 8;
248 indexes
[CollationDataReader::IX_RESERVED10_OFFSET
] = totalSize
;
249 indexes
[CollationDataReader::IX_CE32S_OFFSET
] = totalSize
;
251 totalSize
+= data
.ce32sLength
* 4;
254 indexes
[CollationDataReader::IX_ROOT_ELEMENTS_OFFSET
] = totalSize
;
255 totalSize
+= rootElementsLength
* 4;
257 indexes
[CollationDataReader::IX_CONTEXTS_OFFSET
] = totalSize
;
259 totalSize
+= data
.contextsLength
* 2;
262 indexes
[CollationDataReader::IX_UNSAFE_BWD_OFFSET
] = totalSize
;
263 if(hasMappings
&& !unsafeBackwardSet
.isEmpty()) {
264 UErrorCode errorCode2
= U_ZERO_ERROR
;
266 if(totalSize
< capacity
) {
267 uint16_t *p
= reinterpret_cast<uint16_t *>(dest
+ totalSize
);
268 length
= unsafeBackwardSet
.serialize(
269 p
, (capacity
- totalSize
) / 2, errorCode2
);
271 length
= unsafeBackwardSet
.serialize(NULL
, 0, errorCode2
);
273 if(U_FAILURE(errorCode2
) && errorCode2
!= U_BUFFER_OVERFLOW_ERROR
) {
274 errorCode
= errorCode2
;
277 totalSize
+= length
* 2;
280 indexes
[CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET
] = totalSize
;
281 totalSize
+= fastLatinTableLength
* 2;
283 indexes
[CollationDataReader::IX_SCRIPTS_OFFSET
] = totalSize
;
285 totalSize
+= data
.scriptsLength
* 2;
288 indexes
[CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET
] = totalSize
;
293 indexes
[CollationDataReader::IX_RESERVED18_OFFSET
] = totalSize
;
294 indexes
[CollationDataReader::IX_TOTAL_SIZE
] = totalSize
;
296 if(totalSize
> capacity
) {
297 errorCode
= U_BUFFER_OVERFLOW_ERROR
;
298 return headerSize
+ totalSize
;
301 uprv_memcpy(dest
, indexes
, indexesLength
* 4);
302 copyData(indexes
, CollationDataReader::IX_REORDER_CODES_OFFSET
, settings
.reorderCodes
, dest
);
303 copyData(indexes
, CollationDataReader::IX_REORDER_TABLE_OFFSET
, settings
.reorderTable
, dest
);
304 // The trie has already been serialized into the dest buffer.
305 copyData(indexes
, CollationDataReader::IX_CES_OFFSET
, data
.ces
, dest
);
306 copyData(indexes
, CollationDataReader::IX_CE32S_OFFSET
, data
.ce32s
, dest
);
307 copyData(indexes
, CollationDataReader::IX_ROOT_ELEMENTS_OFFSET
, rootElements
, dest
);
308 copyData(indexes
, CollationDataReader::IX_CONTEXTS_OFFSET
, data
.contexts
, dest
);
309 // The unsafeBackwardSet has already been serialized into the dest buffer.
310 copyData(indexes
, CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET
, data
.fastLatinTable
, dest
);
311 copyData(indexes
, CollationDataReader::IX_SCRIPTS_OFFSET
, data
.scripts
, dest
);
312 copyData(indexes
, CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET
, data
.compressibleBytes
, dest
);
314 return headerSize
+ totalSize
;
318 CollationDataWriter::copyData(const int32_t indexes
[], int32_t startIndex
,
319 const void *src
, uint8_t *dest
) {
320 int32_t start
= indexes
[startIndex
];
321 int32_t limit
= indexes
[startIndex
+ 1];
323 uprv_memcpy(dest
+ start
, src
, limit
- start
);
329 #endif // !UCONFIG_NO_COLLATION