]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
57a6839d A |
3 | /* |
4 | ******************************************************************************* | |
b331163b | 5 | * Copyright (C) 2013-2015, International Business Machines |
57a6839d A |
6 | * Corporation and others. All Rights Reserved. |
7 | ******************************************************************************* | |
8 | * collationsettings.cpp | |
9 | * | |
10 | * created on: 2013feb07 | |
11 | * created by: Markus W. Scherer | |
12 | */ | |
13 | ||
14 | #include "unicode/utypes.h" | |
15 | ||
16 | #if !UCONFIG_NO_COLLATION | |
17 | ||
18 | #include "unicode/ucol.h" | |
19 | #include "cmemory.h" | |
20 | #include "collation.h" | |
b331163b | 21 | #include "collationdata.h" |
57a6839d A |
22 | #include "collationsettings.h" |
23 | #include "sharedobject.h" | |
24 | #include "uassert.h" | |
25 | #include "umutex.h" | |
b331163b | 26 | #include "uvectr32.h" |
57a6839d A |
27 | |
28 | U_NAMESPACE_BEGIN | |
29 | ||
30 | CollationSettings::CollationSettings(const CollationSettings &other) | |
31 | : SharedObject(other), | |
32 | options(other.options), variableTop(other.variableTop), | |
33 | reorderTable(NULL), | |
b331163b A |
34 | minHighNoReorder(other.minHighNoReorder), |
35 | reorderRanges(NULL), reorderRangesLength(0), | |
57a6839d A |
36 | reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0), |
37 | fastLatinOptions(other.fastLatinOptions) { | |
b331163b A |
38 | UErrorCode errorCode = U_ZERO_ERROR; |
39 | copyReorderingFrom(other, errorCode); | |
57a6839d A |
40 | if(fastLatinOptions >= 0) { |
41 | uprv_memcpy(fastLatinPrimaries, other.fastLatinPrimaries, sizeof(fastLatinPrimaries)); | |
42 | } | |
43 | } | |
44 | ||
45 | CollationSettings::~CollationSettings() { | |
46 | if(reorderCodesCapacity != 0) { | |
47 | uprv_free(const_cast<int32_t *>(reorderCodes)); | |
48 | } | |
49 | } | |
50 | ||
51 | UBool | |
52 | CollationSettings::operator==(const CollationSettings &other) const { | |
53 | if(options != other.options) { return FALSE; } | |
54 | if((options & ALTERNATE_MASK) != 0 && variableTop != other.variableTop) { return FALSE; } | |
55 | if(reorderCodesLength != other.reorderCodesLength) { return FALSE; } | |
56 | for(int32_t i = 0; i < reorderCodesLength; ++i) { | |
57 | if(reorderCodes[i] != other.reorderCodes[i]) { return FALSE; } | |
58 | } | |
59 | return TRUE; | |
60 | } | |
61 | ||
62 | int32_t | |
63 | CollationSettings::hashCode() const { | |
64 | int32_t h = options << 8; | |
65 | if((options & ALTERNATE_MASK) != 0) { h ^= variableTop; } | |
66 | h ^= reorderCodesLength; | |
67 | for(int32_t i = 0; i < reorderCodesLength; ++i) { | |
68 | h ^= (reorderCodes[i] << i); | |
69 | } | |
70 | return h; | |
71 | } | |
72 | ||
73 | void | |
74 | CollationSettings::resetReordering() { | |
75 | // When we turn off reordering, we want to set a NULL permutation | |
76 | // rather than a no-op permutation. | |
77 | // Keep the memory via reorderCodes and its capacity. | |
78 | reorderTable = NULL; | |
b331163b A |
79 | minHighNoReorder = 0; |
80 | reorderRangesLength = 0; | |
57a6839d A |
81 | reorderCodesLength = 0; |
82 | } | |
83 | ||
84 | void | |
b331163b A |
85 | CollationSettings::aliasReordering(const CollationData &data, const int32_t *codes, int32_t length, |
86 | const uint32_t *ranges, int32_t rangesLength, | |
87 | const uint8_t *table, UErrorCode &errorCode) { | |
88 | if(U_FAILURE(errorCode)) { return; } | |
89 | if(table != NULL && | |
90 | (rangesLength == 0 ? | |
91 | !reorderTableHasSplitBytes(table) : | |
92 | rangesLength >= 2 && | |
93 | // The first offset must be 0. The last offset must not be 0. | |
94 | (ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0)) { | |
57a6839d A |
95 | // We need to release the memory before setting the alias pointer. |
96 | if(reorderCodesCapacity != 0) { | |
97 | uprv_free(const_cast<int32_t *>(reorderCodes)); | |
98 | reorderCodesCapacity = 0; | |
99 | } | |
100 | reorderTable = table; | |
101 | reorderCodes = codes; | |
102 | reorderCodesLength = length; | |
b331163b A |
103 | // Drop ranges before the first split byte. They are reordered by the table. |
104 | // This then speeds up reordering of the remaining ranges. | |
105 | int32_t firstSplitByteRangeIndex = 0; | |
106 | while(firstSplitByteRangeIndex < rangesLength && | |
107 | (ranges[firstSplitByteRangeIndex] & 0xff0000) == 0) { | |
108 | // The second byte of the primary limit is 0. | |
109 | ++firstSplitByteRangeIndex; | |
110 | } | |
111 | if(firstSplitByteRangeIndex == rangesLength) { | |
112 | U_ASSERT(!reorderTableHasSplitBytes(table)); | |
113 | minHighNoReorder = 0; | |
114 | reorderRanges = NULL; | |
115 | reorderRangesLength = 0; | |
116 | } else { | |
117 | U_ASSERT(table[ranges[firstSplitByteRangeIndex] >> 24] == 0); | |
118 | minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000; | |
119 | reorderRanges = ranges + firstSplitByteRangeIndex; | |
120 | reorderRangesLength = rangesLength - firstSplitByteRangeIndex; | |
121 | } | |
122 | return; | |
57a6839d | 123 | } |
b331163b A |
124 | // Regenerate missing data. |
125 | setReordering(data, codes, length, errorCode); | |
57a6839d A |
126 | } |
127 | ||
b331163b A |
128 | void |
129 | CollationSettings::setReordering(const CollationData &data, | |
130 | const int32_t *codes, int32_t codesLength, | |
131 | UErrorCode &errorCode) { | |
132 | if(U_FAILURE(errorCode)) { return; } | |
133 | if(codesLength == 0 || (codesLength == 1 && codes[0] == UCOL_REORDER_CODE_NONE)) { | |
57a6839d | 134 | resetReordering(); |
b331163b A |
135 | return; |
136 | } | |
137 | UVector32 rangesList(errorCode); | |
138 | data.makeReorderRanges(codes, codesLength, rangesList, errorCode); | |
139 | if(U_FAILURE(errorCode)) { return; } | |
140 | int32_t rangesLength = rangesList.size(); | |
141 | if(rangesLength == 0) { | |
142 | resetReordering(); | |
143 | return; | |
144 | } | |
145 | const uint32_t *ranges = reinterpret_cast<uint32_t *>(rangesList.getBuffer()); | |
146 | // ranges[] contains at least two (limit, offset) pairs. | |
147 | // The first offset must be 0. The last offset must not be 0. | |
148 | // Separators (at the low end) and trailing weights (at the high end) | |
149 | // are never reordered. | |
150 | U_ASSERT(rangesLength >= 2); | |
151 | U_ASSERT((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0); | |
152 | minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000; | |
153 | ||
154 | // Write the lead byte permutation table. | |
155 | // Set a 0 for each lead byte that has a range boundary in the middle. | |
156 | uint8_t table[256]; | |
157 | int32_t b = 0; | |
158 | int32_t firstSplitByteRangeIndex = -1; | |
159 | for(int32_t i = 0; i < rangesLength; ++i) { | |
160 | uint32_t pair = ranges[i]; | |
161 | int32_t limit1 = (int32_t)(pair >> 24); | |
162 | while(b < limit1) { | |
163 | table[b] = (uint8_t)(b + pair); | |
164 | ++b; | |
165 | } | |
166 | // Check the second byte of the limit. | |
167 | if((pair & 0xff0000) != 0) { | |
168 | table[limit1] = 0; | |
169 | b = limit1 + 1; | |
170 | if(firstSplitByteRangeIndex < 0) { | |
171 | firstSplitByteRangeIndex = i; | |
57a6839d | 172 | } |
57a6839d | 173 | } |
57a6839d | 174 | } |
b331163b A |
175 | while(b <= 0xff) { |
176 | table[b] = (uint8_t)b; | |
177 | ++b; | |
178 | } | |
179 | if(firstSplitByteRangeIndex < 0) { | |
180 | // The lead byte permutation table alone suffices for reordering. | |
181 | rangesLength = 0; | |
182 | } else { | |
183 | // Remove the ranges below the first split byte. | |
184 | ranges += firstSplitByteRangeIndex; | |
185 | rangesLength -= firstSplitByteRangeIndex; | |
186 | } | |
187 | setReorderArrays(codes, codesLength, ranges, rangesLength, table, errorCode); | |
188 | } | |
189 | ||
190 | void | |
191 | CollationSettings::setReorderArrays(const int32_t *codes, int32_t codesLength, | |
192 | const uint32_t *ranges, int32_t rangesLength, | |
193 | const uint8_t *table, UErrorCode &errorCode) { | |
194 | if(U_FAILURE(errorCode)) { return; } | |
195 | int32_t *ownedCodes; | |
196 | int32_t totalLength = codesLength + rangesLength; | |
197 | U_ASSERT(totalLength > 0); | |
198 | if(totalLength <= reorderCodesCapacity) { | |
199 | ownedCodes = const_cast<int32_t *>(reorderCodes); | |
200 | } else { | |
201 | // Allocate one memory block for the codes, the ranges, and the 16-aligned table. | |
202 | int32_t capacity = (totalLength + 3) & ~3; // round up to a multiple of 4 ints | |
203 | ownedCodes = (int32_t *)uprv_malloc(capacity * 4 + 256); | |
204 | if(ownedCodes == NULL) { | |
205 | resetReordering(); | |
206 | errorCode = U_MEMORY_ALLOCATION_ERROR; | |
207 | return; | |
208 | } | |
209 | if(reorderCodesCapacity != 0) { | |
210 | uprv_free(const_cast<int32_t *>(reorderCodes)); | |
211 | } | |
212 | reorderCodes = ownedCodes; | |
213 | reorderCodesCapacity = capacity; | |
214 | } | |
215 | uprv_memcpy(ownedCodes + reorderCodesCapacity, table, 256); | |
216 | uprv_memcpy(ownedCodes, codes, codesLength * 4); | |
217 | uprv_memcpy(ownedCodes + codesLength, ranges, rangesLength * 4); | |
218 | reorderTable = reinterpret_cast<const uint8_t *>(reorderCodes + reorderCodesCapacity); | |
219 | reorderCodesLength = codesLength; | |
220 | reorderRanges = reinterpret_cast<uint32_t *>(ownedCodes) + codesLength; | |
221 | reorderRangesLength = rangesLength; | |
222 | } | |
223 | ||
224 | void | |
225 | CollationSettings::copyReorderingFrom(const CollationSettings &other, UErrorCode &errorCode) { | |
226 | if(U_FAILURE(errorCode)) { return; } | |
227 | if(!other.hasReordering()) { | |
228 | resetReordering(); | |
229 | return; | |
230 | } | |
231 | minHighNoReorder = other.minHighNoReorder; | |
232 | if(other.reorderCodesCapacity == 0) { | |
233 | // The reorder arrays are aliased to memory-mapped data. | |
234 | reorderTable = other.reorderTable; | |
235 | reorderRanges = other.reorderRanges; | |
236 | reorderRangesLength = other.reorderRangesLength; | |
237 | reorderCodes = other.reorderCodes; | |
238 | reorderCodesLength = other.reorderCodesLength; | |
239 | } else { | |
240 | setReorderArrays(other.reorderCodes, other.reorderCodesLength, | |
241 | other.reorderRanges, other.reorderRangesLength, | |
242 | other.reorderTable, errorCode); | |
243 | } | |
244 | } | |
245 | ||
246 | UBool | |
247 | CollationSettings::reorderTableHasSplitBytes(const uint8_t table[256]) { | |
248 | U_ASSERT(table[0] == 0); | |
249 | for(int32_t i = 1; i < 256; ++i) { | |
250 | if(table[i] == 0) { | |
251 | return TRUE; | |
252 | } | |
253 | } | |
254 | return FALSE; | |
255 | } | |
256 | ||
257 | uint32_t | |
258 | CollationSettings::reorderEx(uint32_t p) const { | |
259 | if(p >= minHighNoReorder) { return p; } | |
260 | // Round up p so that its lower 16 bits are >= any offset bits. | |
261 | // Then compare q directly with (limit, offset) pairs. | |
262 | uint32_t q = p | 0xffff; | |
263 | uint32_t r; | |
264 | const uint32_t *ranges = reorderRanges; | |
265 | while(q >= (r = *ranges)) { ++ranges; } | |
266 | return p + (r << 24); | |
57a6839d A |
267 | } |
268 | ||
269 | void | |
270 | CollationSettings::setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) { | |
271 | if(U_FAILURE(errorCode)) { return; } | |
272 | int32_t noStrength = options & ~STRENGTH_MASK; | |
273 | switch(value) { | |
274 | case UCOL_PRIMARY: | |
275 | case UCOL_SECONDARY: | |
276 | case UCOL_TERTIARY: | |
277 | case UCOL_QUATERNARY: | |
278 | case UCOL_IDENTICAL: | |
279 | options = noStrength | (value << STRENGTH_SHIFT); | |
280 | break; | |
281 | case UCOL_DEFAULT: | |
282 | options = noStrength | (defaultOptions & STRENGTH_MASK); | |
283 | break; | |
284 | default: | |
285 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; | |
286 | break; | |
287 | } | |
288 | } | |
289 | ||
290 | void | |
291 | CollationSettings::setFlag(int32_t bit, UColAttributeValue value, | |
292 | int32_t defaultOptions, UErrorCode &errorCode) { | |
293 | if(U_FAILURE(errorCode)) { return; } | |
294 | switch(value) { | |
295 | case UCOL_ON: | |
296 | options |= bit; | |
297 | break; | |
298 | case UCOL_OFF: | |
299 | options &= ~bit; | |
300 | break; | |
301 | case UCOL_DEFAULT: | |
302 | options = (options & ~bit) | (defaultOptions & bit); | |
303 | break; | |
304 | default: | |
305 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; | |
306 | break; | |
307 | } | |
308 | } | |
309 | ||
310 | void | |
311 | CollationSettings::setCaseFirst(UColAttributeValue value, | |
312 | int32_t defaultOptions, UErrorCode &errorCode) { | |
313 | if(U_FAILURE(errorCode)) { return; } | |
314 | int32_t noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK; | |
315 | switch(value) { | |
316 | case UCOL_OFF: | |
317 | options = noCaseFirst; | |
318 | break; | |
319 | case UCOL_LOWER_FIRST: | |
320 | options = noCaseFirst | CASE_FIRST; | |
321 | break; | |
322 | case UCOL_UPPER_FIRST: | |
323 | options = noCaseFirst | CASE_FIRST_AND_UPPER_MASK; | |
324 | break; | |
325 | case UCOL_DEFAULT: | |
326 | options = noCaseFirst | (defaultOptions & CASE_FIRST_AND_UPPER_MASK); | |
327 | break; | |
328 | default: | |
329 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; | |
330 | break; | |
331 | } | |
332 | } | |
333 | ||
334 | void | |
335 | CollationSettings::setAlternateHandling(UColAttributeValue value, | |
336 | int32_t defaultOptions, UErrorCode &errorCode) { | |
337 | if(U_FAILURE(errorCode)) { return; } | |
338 | int32_t noAlternate = options & ~ALTERNATE_MASK; | |
339 | switch(value) { | |
340 | case UCOL_NON_IGNORABLE: | |
341 | options = noAlternate; | |
342 | break; | |
343 | case UCOL_SHIFTED: | |
344 | options = noAlternate | SHIFTED; | |
345 | break; | |
346 | case UCOL_DEFAULT: | |
347 | options = noAlternate | (defaultOptions & ALTERNATE_MASK); | |
348 | break; | |
349 | default: | |
350 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; | |
351 | break; | |
352 | } | |
353 | } | |
354 | ||
355 | void | |
356 | CollationSettings::setMaxVariable(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) { | |
357 | if(U_FAILURE(errorCode)) { return; } | |
358 | int32_t noMax = options & ~MAX_VARIABLE_MASK; | |
359 | switch(value) { | |
360 | case MAX_VAR_SPACE: | |
361 | case MAX_VAR_PUNCT: | |
362 | case MAX_VAR_SYMBOL: | |
363 | case MAX_VAR_CURRENCY: | |
364 | options = noMax | (value << MAX_VARIABLE_SHIFT); | |
365 | break; | |
366 | case UCOL_DEFAULT: | |
367 | options = noMax | (defaultOptions & MAX_VARIABLE_MASK); | |
368 | break; | |
369 | default: | |
370 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; | |
371 | break; | |
372 | } | |
373 | } | |
374 | ||
375 | U_NAMESPACE_END | |
376 | ||
377 | #endif // !UCONFIG_NO_COLLATION |