2 *******************************************************************************
3 * Copyright (C) 2013-2015, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * collationsettings.cpp
8 * created on: 2013feb07
9 * created by: Markus W. Scherer
12 #include "unicode/utypes.h"
14 #if !UCONFIG_NO_COLLATION
16 #include "unicode/ucol.h"
18 #include "collation.h"
19 #include "collationdata.h"
20 #include "collationsettings.h"
21 #include "sharedobject.h"
28 CollationSettings::CollationSettings(const CollationSettings
&other
)
29 : SharedObject(other
),
30 options(other
.options
), variableTop(other
.variableTop
),
32 minHighNoReorder(other
.minHighNoReorder
),
33 reorderRanges(NULL
), reorderRangesLength(0),
34 reorderCodes(NULL
), reorderCodesLength(0), reorderCodesCapacity(0),
35 fastLatinOptions(other
.fastLatinOptions
) {
36 UErrorCode errorCode
= U_ZERO_ERROR
;
37 copyReorderingFrom(other
, errorCode
);
38 if(fastLatinOptions
>= 0) {
39 uprv_memcpy(fastLatinPrimaries
, other
.fastLatinPrimaries
, sizeof(fastLatinPrimaries
));
43 CollationSettings::~CollationSettings() {
44 if(reorderCodesCapacity
!= 0) {
45 uprv_free(const_cast<int32_t *>(reorderCodes
));
50 CollationSettings::operator==(const CollationSettings
&other
) const {
51 if(options
!= other
.options
) { return FALSE
; }
52 if((options
& ALTERNATE_MASK
) != 0 && variableTop
!= other
.variableTop
) { return FALSE
; }
53 if(reorderCodesLength
!= other
.reorderCodesLength
) { return FALSE
; }
54 for(int32_t i
= 0; i
< reorderCodesLength
; ++i
) {
55 if(reorderCodes
[i
] != other
.reorderCodes
[i
]) { return FALSE
; }
61 CollationSettings::hashCode() const {
62 int32_t h
= options
<< 8;
63 if((options
& ALTERNATE_MASK
) != 0) { h
^= variableTop
; }
64 h
^= reorderCodesLength
;
65 for(int32_t i
= 0; i
< reorderCodesLength
; ++i
) {
66 h
^= (reorderCodes
[i
] << i
);
72 CollationSettings::resetReordering() {
73 // When we turn off reordering, we want to set a NULL permutation
74 // rather than a no-op permutation.
75 // Keep the memory via reorderCodes and its capacity.
78 reorderRangesLength
= 0;
79 reorderCodesLength
= 0;
83 CollationSettings::aliasReordering(const CollationData
&data
, const int32_t *codes
, int32_t length
,
84 const uint32_t *ranges
, int32_t rangesLength
,
85 const uint8_t *table
, UErrorCode
&errorCode
) {
86 if(U_FAILURE(errorCode
)) { return; }
89 !reorderTableHasSplitBytes(table
) :
91 // The first offset must be 0. The last offset must not be 0.
92 (ranges
[0] & 0xffff) == 0 && (ranges
[rangesLength
- 1] & 0xffff) != 0)) {
93 // We need to release the memory before setting the alias pointer.
94 if(reorderCodesCapacity
!= 0) {
95 uprv_free(const_cast<int32_t *>(reorderCodes
));
96 reorderCodesCapacity
= 0;
100 reorderCodesLength
= length
;
101 // Drop ranges before the first split byte. They are reordered by the table.
102 // This then speeds up reordering of the remaining ranges.
103 int32_t firstSplitByteRangeIndex
= 0;
104 while(firstSplitByteRangeIndex
< rangesLength
&&
105 (ranges
[firstSplitByteRangeIndex
] & 0xff0000) == 0) {
106 // The second byte of the primary limit is 0.
107 ++firstSplitByteRangeIndex
;
109 if(firstSplitByteRangeIndex
== rangesLength
) {
110 U_ASSERT(!reorderTableHasSplitBytes(table
));
111 minHighNoReorder
= 0;
112 reorderRanges
= NULL
;
113 reorderRangesLength
= 0;
115 U_ASSERT(table
[ranges
[firstSplitByteRangeIndex
] >> 24] == 0);
116 minHighNoReorder
= ranges
[rangesLength
- 1] & 0xffff0000;
117 reorderRanges
= ranges
+ firstSplitByteRangeIndex
;
118 reorderRangesLength
= rangesLength
- firstSplitByteRangeIndex
;
122 // Regenerate missing data.
123 setReordering(data
, codes
, length
, errorCode
);
127 CollationSettings::setReordering(const CollationData
&data
,
128 const int32_t *codes
, int32_t codesLength
,
129 UErrorCode
&errorCode
) {
130 if(U_FAILURE(errorCode
)) { return; }
131 if(codesLength
== 0 || (codesLength
== 1 && codes
[0] == UCOL_REORDER_CODE_NONE
)) {
135 UVector32
rangesList(errorCode
);
136 data
.makeReorderRanges(codes
, codesLength
, rangesList
, errorCode
);
137 if(U_FAILURE(errorCode
)) { return; }
138 int32_t rangesLength
= rangesList
.size();
139 if(rangesLength
== 0) {
143 const uint32_t *ranges
= reinterpret_cast<uint32_t *>(rangesList
.getBuffer());
144 // ranges[] contains at least two (limit, offset) pairs.
145 // The first offset must be 0. The last offset must not be 0.
146 // Separators (at the low end) and trailing weights (at the high end)
147 // are never reordered.
148 U_ASSERT(rangesLength
>= 2);
149 U_ASSERT((ranges
[0] & 0xffff) == 0 && (ranges
[rangesLength
- 1] & 0xffff) != 0);
150 minHighNoReorder
= ranges
[rangesLength
- 1] & 0xffff0000;
152 // Write the lead byte permutation table.
153 // Set a 0 for each lead byte that has a range boundary in the middle.
156 int32_t firstSplitByteRangeIndex
= -1;
157 for(int32_t i
= 0; i
< rangesLength
; ++i
) {
158 uint32_t pair
= ranges
[i
];
159 int32_t limit1
= (int32_t)(pair
>> 24);
161 table
[b
] = (uint8_t)(b
+ pair
);
164 // Check the second byte of the limit.
165 if((pair
& 0xff0000) != 0) {
168 if(firstSplitByteRangeIndex
< 0) {
169 firstSplitByteRangeIndex
= i
;
174 table
[b
] = (uint8_t)b
;
177 if(firstSplitByteRangeIndex
< 0) {
178 // The lead byte permutation table alone suffices for reordering.
181 // Remove the ranges below the first split byte.
182 ranges
+= firstSplitByteRangeIndex
;
183 rangesLength
-= firstSplitByteRangeIndex
;
185 setReorderArrays(codes
, codesLength
, ranges
, rangesLength
, table
, errorCode
);
189 CollationSettings::setReorderArrays(const int32_t *codes
, int32_t codesLength
,
190 const uint32_t *ranges
, int32_t rangesLength
,
191 const uint8_t *table
, UErrorCode
&errorCode
) {
192 if(U_FAILURE(errorCode
)) { return; }
194 int32_t totalLength
= codesLength
+ rangesLength
;
195 U_ASSERT(totalLength
> 0);
196 if(totalLength
<= reorderCodesCapacity
) {
197 ownedCodes
= const_cast<int32_t *>(reorderCodes
);
199 // Allocate one memory block for the codes, the ranges, and the 16-aligned table.
200 int32_t capacity
= (totalLength
+ 3) & ~3; // round up to a multiple of 4 ints
201 ownedCodes
= (int32_t *)uprv_malloc(capacity
* 4 + 256);
202 if(ownedCodes
== NULL
) {
204 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
207 if(reorderCodesCapacity
!= 0) {
208 uprv_free(const_cast<int32_t *>(reorderCodes
));
210 reorderCodes
= ownedCodes
;
211 reorderCodesCapacity
= capacity
;
213 uprv_memcpy(ownedCodes
+ reorderCodesCapacity
, table
, 256);
214 uprv_memcpy(ownedCodes
, codes
, codesLength
* 4);
215 uprv_memcpy(ownedCodes
+ codesLength
, ranges
, rangesLength
* 4);
216 reorderTable
= reinterpret_cast<const uint8_t *>(reorderCodes
+ reorderCodesCapacity
);
217 reorderCodesLength
= codesLength
;
218 reorderRanges
= reinterpret_cast<uint32_t *>(ownedCodes
) + codesLength
;
219 reorderRangesLength
= rangesLength
;
223 CollationSettings::copyReorderingFrom(const CollationSettings
&other
, UErrorCode
&errorCode
) {
224 if(U_FAILURE(errorCode
)) { return; }
225 if(!other
.hasReordering()) {
229 minHighNoReorder
= other
.minHighNoReorder
;
230 if(other
.reorderCodesCapacity
== 0) {
231 // The reorder arrays are aliased to memory-mapped data.
232 reorderTable
= other
.reorderTable
;
233 reorderRanges
= other
.reorderRanges
;
234 reorderRangesLength
= other
.reorderRangesLength
;
235 reorderCodes
= other
.reorderCodes
;
236 reorderCodesLength
= other
.reorderCodesLength
;
238 setReorderArrays(other
.reorderCodes
, other
.reorderCodesLength
,
239 other
.reorderRanges
, other
.reorderRangesLength
,
240 other
.reorderTable
, errorCode
);
245 CollationSettings::reorderTableHasSplitBytes(const uint8_t table
[256]) {
246 U_ASSERT(table
[0] == 0);
247 for(int32_t i
= 1; i
< 256; ++i
) {
256 CollationSettings::reorderEx(uint32_t p
) const {
257 if(p
>= minHighNoReorder
) { return p
; }
258 // Round up p so that its lower 16 bits are >= any offset bits.
259 // Then compare q directly with (limit, offset) pairs.
260 uint32_t q
= p
| 0xffff;
262 const uint32_t *ranges
= reorderRanges
;
263 while(q
>= (r
= *ranges
)) { ++ranges
; }
264 return p
+ (r
<< 24);
268 CollationSettings::setStrength(int32_t value
, int32_t defaultOptions
, UErrorCode
&errorCode
) {
269 if(U_FAILURE(errorCode
)) { return; }
270 int32_t noStrength
= options
& ~STRENGTH_MASK
;
275 case UCOL_QUATERNARY
:
277 options
= noStrength
| (value
<< STRENGTH_SHIFT
);
280 options
= noStrength
| (defaultOptions
& STRENGTH_MASK
);
283 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
289 CollationSettings::setFlag(int32_t bit
, UColAttributeValue value
,
290 int32_t defaultOptions
, UErrorCode
&errorCode
) {
291 if(U_FAILURE(errorCode
)) { return; }
300 options
= (options
& ~bit
) | (defaultOptions
& bit
);
303 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
309 CollationSettings::setCaseFirst(UColAttributeValue value
,
310 int32_t defaultOptions
, UErrorCode
&errorCode
) {
311 if(U_FAILURE(errorCode
)) { return; }
312 int32_t noCaseFirst
= options
& ~CASE_FIRST_AND_UPPER_MASK
;
315 options
= noCaseFirst
;
317 case UCOL_LOWER_FIRST
:
318 options
= noCaseFirst
| CASE_FIRST
;
320 case UCOL_UPPER_FIRST
:
321 options
= noCaseFirst
| CASE_FIRST_AND_UPPER_MASK
;
324 options
= noCaseFirst
| (defaultOptions
& CASE_FIRST_AND_UPPER_MASK
);
327 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
333 CollationSettings::setAlternateHandling(UColAttributeValue value
,
334 int32_t defaultOptions
, UErrorCode
&errorCode
) {
335 if(U_FAILURE(errorCode
)) { return; }
336 int32_t noAlternate
= options
& ~ALTERNATE_MASK
;
338 case UCOL_NON_IGNORABLE
:
339 options
= noAlternate
;
342 options
= noAlternate
| SHIFTED
;
345 options
= noAlternate
| (defaultOptions
& ALTERNATE_MASK
);
348 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
354 CollationSettings::setMaxVariable(int32_t value
, int32_t defaultOptions
, UErrorCode
&errorCode
) {
355 if(U_FAILURE(errorCode
)) { return; }
356 int32_t noMax
= options
& ~MAX_VARIABLE_MASK
;
361 case MAX_VAR_CURRENCY
:
362 options
= noMax
| (value
<< MAX_VARIABLE_SHIFT
);
365 options
= noMax
| (defaultOptions
& MAX_VARIABLE_MASK
);
368 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
375 #endif // !UCONFIG_NO_COLLATION