]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/collationrootelements.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 2013-2014, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * collationrootelements.cpp
10 * created on: 2013mar05
11 * created by: Markus W. Scherer
14 #include "unicode/utypes.h"
16 #if !UCONFIG_NO_COLLATION
18 #include "collation.h"
19 #include "collationrootelements.h"
25 CollationRootElements::lastCEWithPrimaryBefore(uint32_t p
) const {
26 if(p
== 0) { return 0; }
27 U_ASSERT(p
> elements
[elements
[IX_FIRST_PRIMARY_INDEX
]]);
28 int32_t index
= findP(p
);
29 uint32_t q
= elements
[index
];
31 if(p
== (q
& 0xffffff00)) {
32 // p == elements[index] is a root primary. Find the CE before it.
33 // We must not be in a primary range.
34 U_ASSERT((q
& PRIMARY_STEP_MASK
) == 0);
35 secTer
= elements
[index
- 1];
36 if((secTer
& SEC_TER_DELTA_FLAG
) == 0) {
37 // Primary CE just before p.
38 p
= secTer
& 0xffffff00;
39 secTer
= Collation::COMMON_SEC_AND_TER_CE
;
41 // secTer = last secondary & tertiary for the previous primary
45 if((p
& SEC_TER_DELTA_FLAG
) == 0) {
53 // p > elements[index] which is the previous primary.
54 // Find the last secondary & tertiary weights for it.
56 secTer
= Collation::COMMON_SEC_AND_TER_CE
;
58 q
= elements
[++index
];
59 if((q
& SEC_TER_DELTA_FLAG
) == 0) {
60 // We must not be in a primary range.
61 U_ASSERT((q
& PRIMARY_STEP_MASK
) == 0);
67 return ((int64_t)p
<< 32) | (secTer
& ~SEC_TER_DELTA_FLAG
);
71 CollationRootElements::firstCEWithPrimaryAtLeast(uint32_t p
) const {
72 if(p
== 0) { return 0; }
73 int32_t index
= findP(p
);
74 if(p
!= (elements
[index
] & 0xffffff00)) {
76 p
= elements
[++index
];
77 if((p
& SEC_TER_DELTA_FLAG
) == 0) {
78 // First primary after p. We must not be in a primary range.
79 U_ASSERT((p
& PRIMARY_STEP_MASK
) == 0);
84 // The code above guarantees that p has at most 3 bytes: (p & 0xff) == 0.
85 return ((int64_t)p
<< 32) | Collation::COMMON_SEC_AND_TER_CE
;
89 CollationRootElements::getPrimaryBefore(uint32_t p
, UBool isCompressible
) const {
90 int32_t index
= findPrimary(p
);
92 uint32_t q
= elements
[index
];
93 if(p
== (q
& 0xffffff00)) {
94 // Found p itself. Return the previous primary.
95 // See if p is at the end of a previous range.
96 step
= (int32_t)q
& PRIMARY_STEP_MASK
;
98 // p is not at the end of a range. Look for the previous primary.
100 p
= elements
[--index
];
101 } while((p
& SEC_TER_DELTA_FLAG
) != 0);
102 return p
& 0xffffff00;
105 // p is in a range, and not at the start.
106 uint32_t nextElement
= elements
[index
+ 1];
107 U_ASSERT(isEndOfPrimaryRange(nextElement
));
108 step
= (int32_t)nextElement
& PRIMARY_STEP_MASK
;
110 // Return the previous range primary.
111 if((p
& 0xffff) == 0) {
112 return Collation::decTwoBytePrimaryByOneStep(p
, isCompressible
, step
);
114 return Collation::decThreeBytePrimaryByOneStep(p
, isCompressible
, step
);
119 CollationRootElements::getSecondaryBefore(uint32_t p
, uint32_t s
) const {
121 uint32_t previousSec
, sec
;
123 index
= (int32_t)elements
[IX_FIRST_SECONDARY_INDEX
];
124 // Gap at the beginning of the secondary CE range.
126 sec
= elements
[index
] >> 16;
128 index
= findPrimary(p
) + 1;
129 previousSec
= Collation::BEFORE_WEIGHT16
;
130 sec
= getFirstSecTerForPrimary(index
) >> 16;
135 U_ASSERT((elements
[index
] & SEC_TER_DELTA_FLAG
) != 0);
136 sec
= elements
[index
++] >> 16;
143 CollationRootElements::getTertiaryBefore(uint32_t p
, uint32_t s
, uint32_t t
) const {
144 U_ASSERT((t
& ~Collation::ONLY_TERTIARY_MASK
) == 0);
146 uint32_t previousTer
, secTer
;
149 index
= (int32_t)elements
[IX_FIRST_TERTIARY_INDEX
];
150 // Gap at the beginning of the tertiary CE range.
153 index
= (int32_t)elements
[IX_FIRST_SECONDARY_INDEX
];
154 previousTer
= Collation::BEFORE_WEIGHT16
;
156 secTer
= elements
[index
] & ~SEC_TER_DELTA_FLAG
;
158 index
= findPrimary(p
) + 1;
159 previousTer
= Collation::BEFORE_WEIGHT16
;
160 secTer
= getFirstSecTerForPrimary(index
);
162 uint32_t st
= (s
<< 16) | t
;
164 if((secTer
>> 16) == s
) { previousTer
= secTer
; }
165 U_ASSERT((elements
[index
] & SEC_TER_DELTA_FLAG
) != 0);
166 secTer
= elements
[index
++] & ~SEC_TER_DELTA_FLAG
;
168 U_ASSERT(secTer
== st
);
169 return previousTer
& 0xffff;
173 CollationRootElements::getPrimaryAfter(uint32_t p
, int32_t index
, UBool isCompressible
) const {
174 U_ASSERT(p
== (elements
[index
] & 0xffffff00) || isEndOfPrimaryRange(elements
[index
+ 1]));
175 uint32_t q
= elements
[++index
];
177 if((q
& SEC_TER_DELTA_FLAG
) == 0 && (step
= (int32_t)q
& PRIMARY_STEP_MASK
) != 0) {
178 // Return the next primary in this range.
179 if((p
& 0xffff) == 0) {
180 return Collation::incTwoBytePrimaryByOffset(p
, isCompressible
, step
);
182 return Collation::incThreeBytePrimaryByOffset(p
, isCompressible
, step
);
185 // Return the next primary in the list.
186 while((q
& SEC_TER_DELTA_FLAG
) != 0) {
187 q
= elements
[++index
];
189 U_ASSERT((q
& PRIMARY_STEP_MASK
) == 0);
195 CollationRootElements::getSecondaryAfter(int32_t index
, uint32_t s
) const {
201 index
= (int32_t)elements
[IX_FIRST_SECONDARY_INDEX
];
202 secTer
= elements
[index
];
203 // Gap at the end of the secondary CE range.
206 U_ASSERT(index
>= (int32_t)elements
[IX_FIRST_PRIMARY_INDEX
]);
207 secTer
= getFirstSecTerForPrimary(index
+ 1);
208 // If this is an explicit sec/ter unit, then it will be read once more.
209 // Gap for secondaries of primary CEs.
210 secLimit
= getSecondaryBoundary();
213 uint32_t sec
= secTer
>> 16;
214 if(sec
> s
) { return sec
; }
215 secTer
= elements
[++index
];
216 if((secTer
& SEC_TER_DELTA_FLAG
) == 0) { return secLimit
; }
221 CollationRootElements::getTertiaryAfter(int32_t index
, uint32_t s
, uint32_t t
) const {
228 index
= (int32_t)elements
[IX_FIRST_TERTIARY_INDEX
];
229 // Gap at the end of the tertiary CE range.
232 index
= (int32_t)elements
[IX_FIRST_SECONDARY_INDEX
];
233 // Gap for tertiaries of primary/secondary CEs.
234 terLimit
= getTertiaryBoundary();
236 secTer
= elements
[index
] & ~SEC_TER_DELTA_FLAG
;
238 U_ASSERT(index
>= (int32_t)elements
[IX_FIRST_PRIMARY_INDEX
]);
239 secTer
= getFirstSecTerForPrimary(index
+ 1);
240 // If this is an explicit sec/ter unit, then it will be read once more.
241 terLimit
= getTertiaryBoundary();
243 uint32_t st
= (s
<< 16) | t
;
246 U_ASSERT((secTer
>> 16) == s
);
247 return secTer
& 0xffff;
249 secTer
= elements
[++index
];
250 // No tertiary greater than t for this primary+secondary.
251 if((secTer
& SEC_TER_DELTA_FLAG
) == 0 || (secTer
>> 16) > s
) { return terLimit
; }
252 secTer
&= ~SEC_TER_DELTA_FLAG
;
257 CollationRootElements::getFirstSecTerForPrimary(int32_t index
) const {
258 uint32_t secTer
= elements
[index
];
259 if((secTer
& SEC_TER_DELTA_FLAG
) == 0) {
261 return Collation::COMMON_SEC_AND_TER_CE
;
263 secTer
&= ~SEC_TER_DELTA_FLAG
;
264 if(secTer
> Collation::COMMON_SEC_AND_TER_CE
) {
266 return Collation::COMMON_SEC_AND_TER_CE
;
268 // Explicit sec/ter below common/common.
273 CollationRootElements::findPrimary(uint32_t p
) const {
274 // Requirement: p must occur as a root primary.
275 U_ASSERT((p
& 0xff) == 0); // at most a 3-byte primary
276 int32_t index
= findP(p
);
277 // If p is in a range, then we just assume that p is an actual primary in this range.
278 // (Too cumbersome/expensive to check.)
279 // Otherwise, it must be an exact match.
280 U_ASSERT(isEndOfPrimaryRange(elements
[index
+ 1]) || p
== (elements
[index
] & 0xffffff00));
285 CollationRootElements::findP(uint32_t p
) const {
286 // p need not occur as a root primary.
287 // For example, it might be a reordering group boundary.
288 U_ASSERT((p
>> 24) != Collation::UNASSIGNED_IMPLICIT_BYTE
);
289 // modified binary search
290 int32_t start
= (int32_t)elements
[IX_FIRST_PRIMARY_INDEX
];
291 U_ASSERT(p
>= elements
[start
]);
292 int32_t limit
= length
- 1;
293 U_ASSERT(elements
[limit
] >= PRIMARY_SENTINEL
);
294 U_ASSERT(p
< elements
[limit
]);
295 while((start
+ 1) < limit
) {
296 // Invariant: elements[start] and elements[limit] are primaries,
297 // and elements[start]<=p<=elements[limit].
298 int32_t i
= (start
+ limit
) / 2;
299 uint32_t q
= elements
[i
];
300 if((q
& SEC_TER_DELTA_FLAG
) != 0) {
301 // Find the next primary.
304 if(j
== limit
) { break; }
306 if((q
& SEC_TER_DELTA_FLAG
) == 0) {
312 if((q
& SEC_TER_DELTA_FLAG
) != 0) {
313 // Find the preceding primary.
316 if(j
== start
) { break; }
318 if((q
& SEC_TER_DELTA_FLAG
) == 0) {
324 if((q
& SEC_TER_DELTA_FLAG
) != 0) {
325 // No primary between start and limit.
330 if(p
< (q
& 0xffffff00)) { // Reset the "step" bits of a range end primary.
341 #endif // !UCONFIG_NO_COLLATION