]>
Commit | Line | Data |
---|---|---|
57a6839d A |
1 | /* |
2 | ******************************************************************************* | |
b331163b | 3 | * Copyright (C) 2013-2015, International Business Machines |
57a6839d A |
4 | * Corporation and others. All Rights Reserved. |
5 | ******************************************************************************* | |
6 | * collationsettings.cpp | |
7 | * | |
8 | * created on: 2013feb07 | |
9 | * created by: Markus W. Scherer | |
10 | */ | |
11 | ||
12 | #include "unicode/utypes.h" | |
13 | ||
14 | #if !UCONFIG_NO_COLLATION | |
15 | ||
16 | #include "unicode/ucol.h" | |
17 | #include "cmemory.h" | |
18 | #include "collation.h" | |
b331163b | 19 | #include "collationdata.h" |
57a6839d A |
20 | #include "collationsettings.h" |
21 | #include "sharedobject.h" | |
22 | #include "uassert.h" | |
23 | #include "umutex.h" | |
b331163b | 24 | #include "uvectr32.h" |
57a6839d A |
25 | |
26 | U_NAMESPACE_BEGIN | |
27 | ||
28 | CollationSettings::CollationSettings(const CollationSettings &other) | |
29 | : SharedObject(other), | |
30 | options(other.options), variableTop(other.variableTop), | |
31 | reorderTable(NULL), | |
b331163b A |
32 | minHighNoReorder(other.minHighNoReorder), |
33 | reorderRanges(NULL), reorderRangesLength(0), | |
57a6839d A |
34 | reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0), |
35 | fastLatinOptions(other.fastLatinOptions) { | |
b331163b A |
36 | UErrorCode errorCode = U_ZERO_ERROR; |
37 | copyReorderingFrom(other, errorCode); | |
57a6839d A |
38 | if(fastLatinOptions >= 0) { |
39 | uprv_memcpy(fastLatinPrimaries, other.fastLatinPrimaries, sizeof(fastLatinPrimaries)); | |
40 | } | |
41 | } | |
42 | ||
43 | CollationSettings::~CollationSettings() { | |
44 | if(reorderCodesCapacity != 0) { | |
45 | uprv_free(const_cast<int32_t *>(reorderCodes)); | |
46 | } | |
47 | } | |
48 | ||
49 | UBool | |
50 | CollationSettings::operator==(const CollationSettings &other) const { | |
51 | if(options != other.options) { return FALSE; } | |
52 | if((options & ALTERNATE_MASK) != 0 && variableTop != other.variableTop) { return FALSE; } | |
53 | if(reorderCodesLength != other.reorderCodesLength) { return FALSE; } | |
54 | for(int32_t i = 0; i < reorderCodesLength; ++i) { | |
55 | if(reorderCodes[i] != other.reorderCodes[i]) { return FALSE; } | |
56 | } | |
57 | return TRUE; | |
58 | } | |
59 | ||
60 | int32_t | |
61 | CollationSettings::hashCode() const { | |
62 | int32_t h = options << 8; | |
63 | if((options & ALTERNATE_MASK) != 0) { h ^= variableTop; } | |
64 | h ^= reorderCodesLength; | |
65 | for(int32_t i = 0; i < reorderCodesLength; ++i) { | |
66 | h ^= (reorderCodes[i] << i); | |
67 | } | |
68 | return h; | |
69 | } | |
70 | ||
71 | void | |
72 | CollationSettings::resetReordering() { | |
73 | // When we turn off reordering, we want to set a NULL permutation | |
74 | // rather than a no-op permutation. | |
75 | // Keep the memory via reorderCodes and its capacity. | |
76 | reorderTable = NULL; | |
b331163b A |
77 | minHighNoReorder = 0; |
78 | reorderRangesLength = 0; | |
57a6839d A |
79 | reorderCodesLength = 0; |
80 | } | |
81 | ||
82 | void | |
b331163b A |
83 | CollationSettings::aliasReordering(const CollationData &data, const int32_t *codes, int32_t length, |
84 | const uint32_t *ranges, int32_t rangesLength, | |
85 | const uint8_t *table, UErrorCode &errorCode) { | |
86 | if(U_FAILURE(errorCode)) { return; } | |
87 | if(table != NULL && | |
88 | (rangesLength == 0 ? | |
89 | !reorderTableHasSplitBytes(table) : | |
90 | rangesLength >= 2 && | |
91 | // The first offset must be 0. The last offset must not be 0. | |
92 | (ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0)) { | |
57a6839d A |
93 | // We need to release the memory before setting the alias pointer. |
94 | if(reorderCodesCapacity != 0) { | |
95 | uprv_free(const_cast<int32_t *>(reorderCodes)); | |
96 | reorderCodesCapacity = 0; | |
97 | } | |
98 | reorderTable = table; | |
99 | reorderCodes = codes; | |
100 | reorderCodesLength = length; | |
b331163b A |
101 | // Drop ranges before the first split byte. They are reordered by the table. |
102 | // This then speeds up reordering of the remaining ranges. | |
103 | int32_t firstSplitByteRangeIndex = 0; | |
104 | while(firstSplitByteRangeIndex < rangesLength && | |
105 | (ranges[firstSplitByteRangeIndex] & 0xff0000) == 0) { | |
106 | // The second byte of the primary limit is 0. | |
107 | ++firstSplitByteRangeIndex; | |
108 | } | |
109 | if(firstSplitByteRangeIndex == rangesLength) { | |
110 | U_ASSERT(!reorderTableHasSplitBytes(table)); | |
111 | minHighNoReorder = 0; | |
112 | reorderRanges = NULL; | |
113 | reorderRangesLength = 0; | |
114 | } else { | |
115 | U_ASSERT(table[ranges[firstSplitByteRangeIndex] >> 24] == 0); | |
116 | minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000; | |
117 | reorderRanges = ranges + firstSplitByteRangeIndex; | |
118 | reorderRangesLength = rangesLength - firstSplitByteRangeIndex; | |
119 | } | |
120 | return; | |
57a6839d | 121 | } |
b331163b A |
122 | // Regenerate missing data. |
123 | setReordering(data, codes, length, errorCode); | |
57a6839d A |
124 | } |
125 | ||
b331163b A |
126 | void |
127 | CollationSettings::setReordering(const CollationData &data, | |
128 | const int32_t *codes, int32_t codesLength, | |
129 | UErrorCode &errorCode) { | |
130 | if(U_FAILURE(errorCode)) { return; } | |
131 | if(codesLength == 0 || (codesLength == 1 && codes[0] == UCOL_REORDER_CODE_NONE)) { | |
57a6839d | 132 | resetReordering(); |
b331163b A |
133 | return; |
134 | } | |
135 | UVector32 rangesList(errorCode); | |
136 | data.makeReorderRanges(codes, codesLength, rangesList, errorCode); | |
137 | if(U_FAILURE(errorCode)) { return; } | |
138 | int32_t rangesLength = rangesList.size(); | |
139 | if(rangesLength == 0) { | |
140 | resetReordering(); | |
141 | return; | |
142 | } | |
143 | const uint32_t *ranges = reinterpret_cast<uint32_t *>(rangesList.getBuffer()); | |
144 | // ranges[] contains at least two (limit, offset) pairs. | |
145 | // The first offset must be 0. The last offset must not be 0. | |
146 | // Separators (at the low end) and trailing weights (at the high end) | |
147 | // are never reordered. | |
148 | U_ASSERT(rangesLength >= 2); | |
149 | U_ASSERT((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0); | |
150 | minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000; | |
151 | ||
152 | // Write the lead byte permutation table. | |
153 | // Set a 0 for each lead byte that has a range boundary in the middle. | |
154 | uint8_t table[256]; | |
155 | int32_t b = 0; | |
156 | int32_t firstSplitByteRangeIndex = -1; | |
157 | for(int32_t i = 0; i < rangesLength; ++i) { | |
158 | uint32_t pair = ranges[i]; | |
159 | int32_t limit1 = (int32_t)(pair >> 24); | |
160 | while(b < limit1) { | |
161 | table[b] = (uint8_t)(b + pair); | |
162 | ++b; | |
163 | } | |
164 | // Check the second byte of the limit. | |
165 | if((pair & 0xff0000) != 0) { | |
166 | table[limit1] = 0; | |
167 | b = limit1 + 1; | |
168 | if(firstSplitByteRangeIndex < 0) { | |
169 | firstSplitByteRangeIndex = i; | |
57a6839d | 170 | } |
57a6839d | 171 | } |
57a6839d | 172 | } |
b331163b A |
173 | while(b <= 0xff) { |
174 | table[b] = (uint8_t)b; | |
175 | ++b; | |
176 | } | |
177 | if(firstSplitByteRangeIndex < 0) { | |
178 | // The lead byte permutation table alone suffices for reordering. | |
179 | rangesLength = 0; | |
180 | } else { | |
181 | // Remove the ranges below the first split byte. | |
182 | ranges += firstSplitByteRangeIndex; | |
183 | rangesLength -= firstSplitByteRangeIndex; | |
184 | } | |
185 | setReorderArrays(codes, codesLength, ranges, rangesLength, table, errorCode); | |
186 | } | |
187 | ||
188 | void | |
189 | CollationSettings::setReorderArrays(const int32_t *codes, int32_t codesLength, | |
190 | const uint32_t *ranges, int32_t rangesLength, | |
191 | const uint8_t *table, UErrorCode &errorCode) { | |
192 | if(U_FAILURE(errorCode)) { return; } | |
193 | int32_t *ownedCodes; | |
194 | int32_t totalLength = codesLength + rangesLength; | |
195 | U_ASSERT(totalLength > 0); | |
196 | if(totalLength <= reorderCodesCapacity) { | |
197 | ownedCodes = const_cast<int32_t *>(reorderCodes); | |
198 | } else { | |
199 | // Allocate one memory block for the codes, the ranges, and the 16-aligned table. | |
200 | int32_t capacity = (totalLength + 3) & ~3; // round up to a multiple of 4 ints | |
201 | ownedCodes = (int32_t *)uprv_malloc(capacity * 4 + 256); | |
202 | if(ownedCodes == NULL) { | |
203 | resetReordering(); | |
204 | errorCode = U_MEMORY_ALLOCATION_ERROR; | |
205 | return; | |
206 | } | |
207 | if(reorderCodesCapacity != 0) { | |
208 | uprv_free(const_cast<int32_t *>(reorderCodes)); | |
209 | } | |
210 | reorderCodes = ownedCodes; | |
211 | reorderCodesCapacity = capacity; | |
212 | } | |
213 | uprv_memcpy(ownedCodes + reorderCodesCapacity, table, 256); | |
214 | uprv_memcpy(ownedCodes, codes, codesLength * 4); | |
215 | uprv_memcpy(ownedCodes + codesLength, ranges, rangesLength * 4); | |
216 | reorderTable = reinterpret_cast<const uint8_t *>(reorderCodes + reorderCodesCapacity); | |
217 | reorderCodesLength = codesLength; | |
218 | reorderRanges = reinterpret_cast<uint32_t *>(ownedCodes) + codesLength; | |
219 | reorderRangesLength = rangesLength; | |
220 | } | |
221 | ||
222 | void | |
223 | CollationSettings::copyReorderingFrom(const CollationSettings &other, UErrorCode &errorCode) { | |
224 | if(U_FAILURE(errorCode)) { return; } | |
225 | if(!other.hasReordering()) { | |
226 | resetReordering(); | |
227 | return; | |
228 | } | |
229 | minHighNoReorder = other.minHighNoReorder; | |
230 | if(other.reorderCodesCapacity == 0) { | |
231 | // The reorder arrays are aliased to memory-mapped data. | |
232 | reorderTable = other.reorderTable; | |
233 | reorderRanges = other.reorderRanges; | |
234 | reorderRangesLength = other.reorderRangesLength; | |
235 | reorderCodes = other.reorderCodes; | |
236 | reorderCodesLength = other.reorderCodesLength; | |
237 | } else { | |
238 | setReorderArrays(other.reorderCodes, other.reorderCodesLength, | |
239 | other.reorderRanges, other.reorderRangesLength, | |
240 | other.reorderTable, errorCode); | |
241 | } | |
242 | } | |
243 | ||
244 | UBool | |
245 | CollationSettings::reorderTableHasSplitBytes(const uint8_t table[256]) { | |
246 | U_ASSERT(table[0] == 0); | |
247 | for(int32_t i = 1; i < 256; ++i) { | |
248 | if(table[i] == 0) { | |
249 | return TRUE; | |
250 | } | |
251 | } | |
252 | return FALSE; | |
253 | } | |
254 | ||
255 | uint32_t | |
256 | CollationSettings::reorderEx(uint32_t p) const { | |
257 | if(p >= minHighNoReorder) { return p; } | |
258 | // Round up p so that its lower 16 bits are >= any offset bits. | |
259 | // Then compare q directly with (limit, offset) pairs. | |
260 | uint32_t q = p | 0xffff; | |
261 | uint32_t r; | |
262 | const uint32_t *ranges = reorderRanges; | |
263 | while(q >= (r = *ranges)) { ++ranges; } | |
264 | return p + (r << 24); | |
57a6839d A |
265 | } |
266 | ||
267 | void | |
268 | CollationSettings::setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) { | |
269 | if(U_FAILURE(errorCode)) { return; } | |
270 | int32_t noStrength = options & ~STRENGTH_MASK; | |
271 | switch(value) { | |
272 | case UCOL_PRIMARY: | |
273 | case UCOL_SECONDARY: | |
274 | case UCOL_TERTIARY: | |
275 | case UCOL_QUATERNARY: | |
276 | case UCOL_IDENTICAL: | |
277 | options = noStrength | (value << STRENGTH_SHIFT); | |
278 | break; | |
279 | case UCOL_DEFAULT: | |
280 | options = noStrength | (defaultOptions & STRENGTH_MASK); | |
281 | break; | |
282 | default: | |
283 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; | |
284 | break; | |
285 | } | |
286 | } | |
287 | ||
288 | void | |
289 | CollationSettings::setFlag(int32_t bit, UColAttributeValue value, | |
290 | int32_t defaultOptions, UErrorCode &errorCode) { | |
291 | if(U_FAILURE(errorCode)) { return; } | |
292 | switch(value) { | |
293 | case UCOL_ON: | |
294 | options |= bit; | |
295 | break; | |
296 | case UCOL_OFF: | |
297 | options &= ~bit; | |
298 | break; | |
299 | case UCOL_DEFAULT: | |
300 | options = (options & ~bit) | (defaultOptions & bit); | |
301 | break; | |
302 | default: | |
303 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; | |
304 | break; | |
305 | } | |
306 | } | |
307 | ||
308 | void | |
309 | CollationSettings::setCaseFirst(UColAttributeValue value, | |
310 | int32_t defaultOptions, UErrorCode &errorCode) { | |
311 | if(U_FAILURE(errorCode)) { return; } | |
312 | int32_t noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK; | |
313 | switch(value) { | |
314 | case UCOL_OFF: | |
315 | options = noCaseFirst; | |
316 | break; | |
317 | case UCOL_LOWER_FIRST: | |
318 | options = noCaseFirst | CASE_FIRST; | |
319 | break; | |
320 | case UCOL_UPPER_FIRST: | |
321 | options = noCaseFirst | CASE_FIRST_AND_UPPER_MASK; | |
322 | break; | |
323 | case UCOL_DEFAULT: | |
324 | options = noCaseFirst | (defaultOptions & CASE_FIRST_AND_UPPER_MASK); | |
325 | break; | |
326 | default: | |
327 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; | |
328 | break; | |
329 | } | |
330 | } | |
331 | ||
332 | void | |
333 | CollationSettings::setAlternateHandling(UColAttributeValue value, | |
334 | int32_t defaultOptions, UErrorCode &errorCode) { | |
335 | if(U_FAILURE(errorCode)) { return; } | |
336 | int32_t noAlternate = options & ~ALTERNATE_MASK; | |
337 | switch(value) { | |
338 | case UCOL_NON_IGNORABLE: | |
339 | options = noAlternate; | |
340 | break; | |
341 | case UCOL_SHIFTED: | |
342 | options = noAlternate | SHIFTED; | |
343 | break; | |
344 | case UCOL_DEFAULT: | |
345 | options = noAlternate | (defaultOptions & ALTERNATE_MASK); | |
346 | break; | |
347 | default: | |
348 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; | |
349 | break; | |
350 | } | |
351 | } | |
352 | ||
353 | void | |
354 | CollationSettings::setMaxVariable(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) { | |
355 | if(U_FAILURE(errorCode)) { return; } | |
356 | int32_t noMax = options & ~MAX_VARIABLE_MASK; | |
357 | switch(value) { | |
358 | case MAX_VAR_SPACE: | |
359 | case MAX_VAR_PUNCT: | |
360 | case MAX_VAR_SYMBOL: | |
361 | case MAX_VAR_CURRENCY: | |
362 | options = noMax | (value << MAX_VARIABLE_SHIFT); | |
363 | break; | |
364 | case UCOL_DEFAULT: | |
365 | options = noMax | (defaultOptions & MAX_VARIABLE_MASK); | |
366 | break; | |
367 | default: | |
368 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; | |
369 | break; | |
370 | } | |
371 | } | |
372 | ||
373 | U_NAMESPACE_END | |
374 | ||
375 | #endif // !UCONFIG_NO_COLLATION |