1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 2013-2015, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * collationsettings.cpp
10 * created on: 2013feb07
11 * created by: Markus W. Scherer
14 #include "unicode/utypes.h"
16 #if !UCONFIG_NO_COLLATION
18 #include "unicode/ucol.h"
20 #include "collation.h"
21 #include "collationdata.h"
22 #include "collationsettings.h"
23 #include "sharedobject.h"
30 CollationSettings::CollationSettings(const CollationSettings
&other
)
31 : SharedObject(other
),
32 options(other
.options
), variableTop(other
.variableTop
),
34 minHighNoReorder(other
.minHighNoReorder
),
35 reorderRanges(NULL
), reorderRangesLength(0),
36 reorderCodes(NULL
), reorderCodesLength(0), reorderCodesCapacity(0),
37 fastLatinOptions(other
.fastLatinOptions
) {
38 UErrorCode errorCode
= U_ZERO_ERROR
;
39 copyReorderingFrom(other
, errorCode
);
40 if(fastLatinOptions
>= 0) {
41 uprv_memcpy(fastLatinPrimaries
, other
.fastLatinPrimaries
, sizeof(fastLatinPrimaries
));
45 CollationSettings::~CollationSettings() {
46 if(reorderCodesCapacity
!= 0) {
47 uprv_free(const_cast<int32_t *>(reorderCodes
));
52 CollationSettings::operator==(const CollationSettings
&other
) const {
53 if(options
!= other
.options
) { return FALSE
; }
54 if((options
& ALTERNATE_MASK
) != 0 && variableTop
!= other
.variableTop
) { return FALSE
; }
55 if(reorderCodesLength
!= other
.reorderCodesLength
) { return FALSE
; }
56 for(int32_t i
= 0; i
< reorderCodesLength
; ++i
) {
57 if(reorderCodes
[i
] != other
.reorderCodes
[i
]) { return FALSE
; }
63 CollationSettings::hashCode() const {
64 int32_t h
= options
<< 8;
65 if((options
& ALTERNATE_MASK
) != 0) { h
^= variableTop
; }
66 h
^= reorderCodesLength
;
67 for(int32_t i
= 0; i
< reorderCodesLength
; ++i
) {
68 h
^= (reorderCodes
[i
] << i
);
74 CollationSettings::resetReordering() {
75 // When we turn off reordering, we want to set a NULL permutation
76 // rather than a no-op permutation.
77 // Keep the memory via reorderCodes and its capacity.
80 reorderRangesLength
= 0;
81 reorderCodesLength
= 0;
85 CollationSettings::aliasReordering(const CollationData
&data
, const int32_t *codes
, int32_t length
,
86 const uint32_t *ranges
, int32_t rangesLength
,
87 const uint8_t *table
, UErrorCode
&errorCode
) {
88 if(U_FAILURE(errorCode
)) { return; }
91 !reorderTableHasSplitBytes(table
) :
93 // The first offset must be 0. The last offset must not be 0.
94 (ranges
[0] & 0xffff) == 0 && (ranges
[rangesLength
- 1] & 0xffff) != 0)) {
95 // We need to release the memory before setting the alias pointer.
96 if(reorderCodesCapacity
!= 0) {
97 uprv_free(const_cast<int32_t *>(reorderCodes
));
98 reorderCodesCapacity
= 0;
100 reorderTable
= table
;
101 reorderCodes
= codes
;
102 reorderCodesLength
= length
;
103 // Drop ranges before the first split byte. They are reordered by the table.
104 // This then speeds up reordering of the remaining ranges.
105 int32_t firstSplitByteRangeIndex
= 0;
106 while(firstSplitByteRangeIndex
< rangesLength
&&
107 (ranges
[firstSplitByteRangeIndex
] & 0xff0000) == 0) {
108 // The second byte of the primary limit is 0.
109 ++firstSplitByteRangeIndex
;
111 if(firstSplitByteRangeIndex
== rangesLength
) {
112 U_ASSERT(!reorderTableHasSplitBytes(table
));
113 minHighNoReorder
= 0;
114 reorderRanges
= NULL
;
115 reorderRangesLength
= 0;
117 U_ASSERT(table
[ranges
[firstSplitByteRangeIndex
] >> 24] == 0);
118 minHighNoReorder
= ranges
[rangesLength
- 1] & 0xffff0000;
119 reorderRanges
= ranges
+ firstSplitByteRangeIndex
;
120 reorderRangesLength
= rangesLength
- firstSplitByteRangeIndex
;
124 // Regenerate missing data.
125 setReordering(data
, codes
, length
, errorCode
);
129 CollationSettings::setReordering(const CollationData
&data
,
130 const int32_t *codes
, int32_t codesLength
,
131 UErrorCode
&errorCode
) {
132 if(U_FAILURE(errorCode
)) { return; }
133 if(codesLength
== 0 || (codesLength
== 1 && codes
[0] == UCOL_REORDER_CODE_NONE
)) {
137 UVector32
rangesList(errorCode
);
138 data
.makeReorderRanges(codes
, codesLength
, rangesList
, errorCode
);
139 if(U_FAILURE(errorCode
)) { return; }
140 int32_t rangesLength
= rangesList
.size();
141 if(rangesLength
== 0) {
145 const uint32_t *ranges
= reinterpret_cast<uint32_t *>(rangesList
.getBuffer());
146 // ranges[] contains at least two (limit, offset) pairs.
147 // The first offset must be 0. The last offset must not be 0.
148 // Separators (at the low end) and trailing weights (at the high end)
149 // are never reordered.
150 U_ASSERT(rangesLength
>= 2);
151 U_ASSERT((ranges
[0] & 0xffff) == 0 && (ranges
[rangesLength
- 1] & 0xffff) != 0);
152 minHighNoReorder
= ranges
[rangesLength
- 1] & 0xffff0000;
154 // Write the lead byte permutation table.
155 // Set a 0 for each lead byte that has a range boundary in the middle.
158 int32_t firstSplitByteRangeIndex
= -1;
159 for(int32_t i
= 0; i
< rangesLength
; ++i
) {
160 uint32_t pair
= ranges
[i
];
161 int32_t limit1
= (int32_t)(pair
>> 24);
163 table
[b
] = (uint8_t)(b
+ pair
);
166 // Check the second byte of the limit.
167 if((pair
& 0xff0000) != 0) {
170 if(firstSplitByteRangeIndex
< 0) {
171 firstSplitByteRangeIndex
= i
;
176 table
[b
] = (uint8_t)b
;
179 if(firstSplitByteRangeIndex
< 0) {
180 // The lead byte permutation table alone suffices for reordering.
183 // Remove the ranges below the first split byte.
184 ranges
+= firstSplitByteRangeIndex
;
185 rangesLength
-= firstSplitByteRangeIndex
;
187 setReorderArrays(codes
, codesLength
, ranges
, rangesLength
, table
, errorCode
);
191 CollationSettings::setReorderArrays(const int32_t *codes
, int32_t codesLength
,
192 const uint32_t *ranges
, int32_t rangesLength
,
193 const uint8_t *table
, UErrorCode
&errorCode
) {
194 if(U_FAILURE(errorCode
)) { return; }
196 int32_t totalLength
= codesLength
+ rangesLength
;
197 U_ASSERT(totalLength
> 0);
198 if(totalLength
<= reorderCodesCapacity
) {
199 ownedCodes
= const_cast<int32_t *>(reorderCodes
);
201 // Allocate one memory block for the codes, the ranges, and the 16-aligned table.
202 int32_t capacity
= (totalLength
+ 3) & ~3; // round up to a multiple of 4 ints
203 ownedCodes
= (int32_t *)uprv_malloc(capacity
* 4 + 256);
204 if(ownedCodes
== NULL
) {
206 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
209 if(reorderCodesCapacity
!= 0) {
210 uprv_free(const_cast<int32_t *>(reorderCodes
));
212 reorderCodes
= ownedCodes
;
213 reorderCodesCapacity
= capacity
;
215 uprv_memcpy(ownedCodes
+ reorderCodesCapacity
, table
, 256);
216 uprv_memcpy(ownedCodes
, codes
, codesLength
* 4);
217 uprv_memcpy(ownedCodes
+ codesLength
, ranges
, rangesLength
* 4);
218 reorderTable
= reinterpret_cast<const uint8_t *>(reorderCodes
+ reorderCodesCapacity
);
219 reorderCodesLength
= codesLength
;
220 reorderRanges
= reinterpret_cast<uint32_t *>(ownedCodes
) + codesLength
;
221 reorderRangesLength
= rangesLength
;
225 CollationSettings::copyReorderingFrom(const CollationSettings
&other
, UErrorCode
&errorCode
) {
226 if(U_FAILURE(errorCode
)) { return; }
227 if(!other
.hasReordering()) {
231 minHighNoReorder
= other
.minHighNoReorder
;
232 if(other
.reorderCodesCapacity
== 0) {
233 // The reorder arrays are aliased to memory-mapped data.
234 reorderTable
= other
.reorderTable
;
235 reorderRanges
= other
.reorderRanges
;
236 reorderRangesLength
= other
.reorderRangesLength
;
237 reorderCodes
= other
.reorderCodes
;
238 reorderCodesLength
= other
.reorderCodesLength
;
240 setReorderArrays(other
.reorderCodes
, other
.reorderCodesLength
,
241 other
.reorderRanges
, other
.reorderRangesLength
,
242 other
.reorderTable
, errorCode
);
247 CollationSettings::reorderTableHasSplitBytes(const uint8_t table
[256]) {
248 U_ASSERT(table
[0] == 0);
249 for(int32_t i
= 1; i
< 256; ++i
) {
258 CollationSettings::reorderEx(uint32_t p
) const {
259 if(p
>= minHighNoReorder
) { return p
; }
260 // Round up p so that its lower 16 bits are >= any offset bits.
261 // Then compare q directly with (limit, offset) pairs.
262 uint32_t q
= p
| 0xffff;
264 const uint32_t *ranges
= reorderRanges
;
265 while(q
>= (r
= *ranges
)) { ++ranges
; }
266 return p
+ (r
<< 24);
270 CollationSettings::setStrength(int32_t value
, int32_t defaultOptions
, UErrorCode
&errorCode
) {
271 if(U_FAILURE(errorCode
)) { return; }
272 int32_t noStrength
= options
& ~STRENGTH_MASK
;
277 case UCOL_QUATERNARY
:
279 options
= noStrength
| (value
<< STRENGTH_SHIFT
);
282 options
= noStrength
| (defaultOptions
& STRENGTH_MASK
);
285 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
291 CollationSettings::setFlag(int32_t bit
, UColAttributeValue value
,
292 int32_t defaultOptions
, UErrorCode
&errorCode
) {
293 if(U_FAILURE(errorCode
)) { return; }
302 options
= (options
& ~bit
) | (defaultOptions
& bit
);
305 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
311 CollationSettings::setCaseFirst(UColAttributeValue value
,
312 int32_t defaultOptions
, UErrorCode
&errorCode
) {
313 if(U_FAILURE(errorCode
)) { return; }
314 int32_t noCaseFirst
= options
& ~CASE_FIRST_AND_UPPER_MASK
;
317 options
= noCaseFirst
;
319 case UCOL_LOWER_FIRST
:
320 options
= noCaseFirst
| CASE_FIRST
;
322 case UCOL_UPPER_FIRST
:
323 options
= noCaseFirst
| CASE_FIRST_AND_UPPER_MASK
;
326 options
= noCaseFirst
| (defaultOptions
& CASE_FIRST_AND_UPPER_MASK
);
329 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
335 CollationSettings::setAlternateHandling(UColAttributeValue value
,
336 int32_t defaultOptions
, UErrorCode
&errorCode
) {
337 if(U_FAILURE(errorCode
)) { return; }
338 int32_t noAlternate
= options
& ~ALTERNATE_MASK
;
340 case UCOL_NON_IGNORABLE
:
341 options
= noAlternate
;
344 options
= noAlternate
| SHIFTED
;
347 options
= noAlternate
| (defaultOptions
& ALTERNATE_MASK
);
350 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
356 CollationSettings::setMaxVariable(int32_t value
, int32_t defaultOptions
, UErrorCode
&errorCode
) {
357 if(U_FAILURE(errorCode
)) { return; }
358 int32_t noMax
= options
& ~MAX_VARIABLE_MASK
;
363 case MAX_VAR_CURRENCY
:
364 options
= noMax
| (value
<< MAX_VARIABLE_SHIFT
);
367 options
= noMax
| (defaultOptions
& MAX_VARIABLE_MASK
);
370 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
377 #endif // !UCONFIG_NO_COLLATION