]>
Commit | Line | Data |
---|---|---|
1 | // © 2016 and later: Unicode, Inc. and others. | |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | /* | |
4 | ******************************************************************************* | |
5 | * Copyright (C) 2013-2014, International Business Machines | |
6 | * Corporation and others. All Rights Reserved. | |
7 | ******************************************************************************* | |
8 | * collationrootelements.cpp | |
9 | * | |
10 | * created on: 2013mar05 | |
11 | * created by: Markus W. Scherer | |
12 | */ | |
13 | ||
14 | #include "unicode/utypes.h" | |
15 | ||
16 | #if !UCONFIG_NO_COLLATION | |
17 | ||
18 | #include "collation.h" | |
19 | #include "collationrootelements.h" | |
20 | #include "uassert.h" | |
21 | ||
22 | U_NAMESPACE_BEGIN | |
23 | ||
24 | int64_t | |
25 | CollationRootElements::lastCEWithPrimaryBefore(uint32_t p) const { | |
26 | if(p == 0) { return 0; } | |
27 | U_ASSERT(p > elements[elements[IX_FIRST_PRIMARY_INDEX]]); | |
28 | int32_t index = findP(p); | |
29 | uint32_t q = elements[index]; | |
30 | uint32_t secTer; | |
31 | if(p == (q & 0xffffff00)) { | |
32 | // p == elements[index] is a root primary. Find the CE before it. | |
33 | // We must not be in a primary range. | |
34 | U_ASSERT((q & PRIMARY_STEP_MASK) == 0); | |
35 | secTer = elements[index - 1]; | |
36 | if((secTer & SEC_TER_DELTA_FLAG) == 0) { | |
37 | // Primary CE just before p. | |
38 | p = secTer & 0xffffff00; | |
39 | secTer = Collation::COMMON_SEC_AND_TER_CE; | |
40 | } else { | |
41 | // secTer = last secondary & tertiary for the previous primary | |
42 | index -= 2; | |
43 | for(;;) { | |
44 | p = elements[index]; | |
45 | if((p & SEC_TER_DELTA_FLAG) == 0) { | |
46 | p &= 0xffffff00; | |
47 | break; | |
48 | } | |
49 | --index; | |
50 | } | |
51 | } | |
52 | } else { | |
53 | // p > elements[index] which is the previous primary. | |
54 | // Find the last secondary & tertiary weights for it. | |
55 | p = q & 0xffffff00; | |
56 | secTer = Collation::COMMON_SEC_AND_TER_CE; | |
57 | for(;;) { | |
58 | q = elements[++index]; | |
59 | if((q & SEC_TER_DELTA_FLAG) == 0) { | |
60 | // We must not be in a primary range. | |
61 | U_ASSERT((q & PRIMARY_STEP_MASK) == 0); | |
62 | break; | |
63 | } | |
64 | secTer = q; | |
65 | } | |
66 | } | |
67 | return ((int64_t)p << 32) | (secTer & ~SEC_TER_DELTA_FLAG); | |
68 | } | |
69 | ||
70 | int64_t | |
71 | CollationRootElements::firstCEWithPrimaryAtLeast(uint32_t p) const { | |
72 | if(p == 0) { return 0; } | |
73 | int32_t index = findP(p); | |
74 | if(p != (elements[index] & 0xffffff00)) { | |
75 | for(;;) { | |
76 | p = elements[++index]; | |
77 | if((p & SEC_TER_DELTA_FLAG) == 0) { | |
78 | // First primary after p. We must not be in a primary range. | |
79 | U_ASSERT((p & PRIMARY_STEP_MASK) == 0); | |
80 | break; | |
81 | } | |
82 | } | |
83 | } | |
84 | // The code above guarantees that p has at most 3 bytes: (p & 0xff) == 0. | |
85 | return ((int64_t)p << 32) | Collation::COMMON_SEC_AND_TER_CE; | |
86 | } | |
87 | ||
88 | uint32_t | |
89 | CollationRootElements::getPrimaryBefore(uint32_t p, UBool isCompressible) const { | |
90 | int32_t index = findPrimary(p); | |
91 | int32_t step; | |
92 | uint32_t q = elements[index]; | |
93 | if(p == (q & 0xffffff00)) { | |
94 | // Found p itself. Return the previous primary. | |
95 | // See if p is at the end of a previous range. | |
96 | step = (int32_t)q & PRIMARY_STEP_MASK; | |
97 | if(step == 0) { | |
98 | // p is not at the end of a range. Look for the previous primary. | |
99 | do { | |
100 | p = elements[--index]; | |
101 | } while((p & SEC_TER_DELTA_FLAG) != 0); | |
102 | return p & 0xffffff00; | |
103 | } | |
104 | } else { | |
105 | // p is in a range, and not at the start. | |
106 | uint32_t nextElement = elements[index + 1]; | |
107 | U_ASSERT(isEndOfPrimaryRange(nextElement)); | |
108 | step = (int32_t)nextElement & PRIMARY_STEP_MASK; | |
109 | } | |
110 | // Return the previous range primary. | |
111 | if((p & 0xffff) == 0) { | |
112 | return Collation::decTwoBytePrimaryByOneStep(p, isCompressible, step); | |
113 | } else { | |
114 | return Collation::decThreeBytePrimaryByOneStep(p, isCompressible, step); | |
115 | } | |
116 | } | |
117 | ||
118 | uint32_t | |
119 | CollationRootElements::getSecondaryBefore(uint32_t p, uint32_t s) const { | |
120 | int32_t index; | |
121 | uint32_t previousSec, sec; | |
122 | if(p == 0) { | |
123 | index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; | |
124 | // Gap at the beginning of the secondary CE range. | |
125 | previousSec = 0; | |
126 | sec = elements[index] >> 16; | |
127 | } else { | |
128 | index = findPrimary(p) + 1; | |
129 | previousSec = Collation::BEFORE_WEIGHT16; | |
130 | sec = getFirstSecTerForPrimary(index) >> 16; | |
131 | } | |
132 | U_ASSERT(s >= sec); | |
133 | while(s > sec) { | |
134 | previousSec = sec; | |
135 | U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0); | |
136 | sec = elements[index++] >> 16; | |
137 | } | |
138 | U_ASSERT(sec == s); | |
139 | return previousSec; | |
140 | } | |
141 | ||
142 | uint32_t | |
143 | CollationRootElements::getTertiaryBefore(uint32_t p, uint32_t s, uint32_t t) const { | |
144 | U_ASSERT((t & ~Collation::ONLY_TERTIARY_MASK) == 0); | |
145 | int32_t index; | |
146 | uint32_t previousTer, secTer; | |
147 | if(p == 0) { | |
148 | if(s == 0) { | |
149 | index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX]; | |
150 | // Gap at the beginning of the tertiary CE range. | |
151 | previousTer = 0; | |
152 | } else { | |
153 | index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; | |
154 | previousTer = Collation::BEFORE_WEIGHT16; | |
155 | } | |
156 | secTer = elements[index] & ~SEC_TER_DELTA_FLAG; | |
157 | } else { | |
158 | index = findPrimary(p) + 1; | |
159 | previousTer = Collation::BEFORE_WEIGHT16; | |
160 | secTer = getFirstSecTerForPrimary(index); | |
161 | } | |
162 | uint32_t st = (s << 16) | t; | |
163 | while(st > secTer) { | |
164 | if((secTer >> 16) == s) { previousTer = secTer; } | |
165 | U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0); | |
166 | secTer = elements[index++] & ~SEC_TER_DELTA_FLAG; | |
167 | } | |
168 | U_ASSERT(secTer == st); | |
169 | return previousTer & 0xffff; | |
170 | } | |
171 | ||
172 | uint32_t | |
173 | CollationRootElements::getPrimaryAfter(uint32_t p, int32_t index, UBool isCompressible) const { | |
174 | U_ASSERT(p == (elements[index] & 0xffffff00) || isEndOfPrimaryRange(elements[index + 1])); | |
175 | uint32_t q = elements[++index]; | |
176 | int32_t step; | |
177 | if((q & SEC_TER_DELTA_FLAG) == 0 && (step = (int32_t)q & PRIMARY_STEP_MASK) != 0) { | |
178 | // Return the next primary in this range. | |
179 | if((p & 0xffff) == 0) { | |
180 | return Collation::incTwoBytePrimaryByOffset(p, isCompressible, step); | |
181 | } else { | |
182 | return Collation::incThreeBytePrimaryByOffset(p, isCompressible, step); | |
183 | } | |
184 | } else { | |
185 | // Return the next primary in the list. | |
186 | while((q & SEC_TER_DELTA_FLAG) != 0) { | |
187 | q = elements[++index]; | |
188 | } | |
189 | U_ASSERT((q & PRIMARY_STEP_MASK) == 0); | |
190 | return q; | |
191 | } | |
192 | } | |
193 | ||
194 | uint32_t | |
195 | CollationRootElements::getSecondaryAfter(int32_t index, uint32_t s) const { | |
196 | uint32_t secTer; | |
197 | uint32_t secLimit; | |
198 | if(index == 0) { | |
199 | // primary = 0 | |
200 | U_ASSERT(s != 0); | |
201 | index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; | |
202 | secTer = elements[index]; | |
203 | // Gap at the end of the secondary CE range. | |
204 | secLimit = 0x10000; | |
205 | } else { | |
206 | U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]); | |
207 | secTer = getFirstSecTerForPrimary(index + 1); | |
208 | // If this is an explicit sec/ter unit, then it will be read once more. | |
209 | // Gap for secondaries of primary CEs. | |
210 | secLimit = getSecondaryBoundary(); | |
211 | } | |
212 | for(;;) { | |
213 | uint32_t sec = secTer >> 16; | |
214 | if(sec > s) { return sec; } | |
215 | secTer = elements[++index]; | |
216 | if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; } | |
217 | } | |
218 | } | |
219 | ||
220 | uint32_t | |
221 | CollationRootElements::getTertiaryAfter(int32_t index, uint32_t s, uint32_t t) const { | |
222 | uint32_t secTer; | |
223 | uint32_t terLimit; | |
224 | if(index == 0) { | |
225 | // primary = 0 | |
226 | if(s == 0) { | |
227 | U_ASSERT(t != 0); | |
228 | index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX]; | |
229 | // Gap at the end of the tertiary CE range. | |
230 | terLimit = 0x4000; | |
231 | } else { | |
232 | index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; | |
233 | // Gap for tertiaries of primary/secondary CEs. | |
234 | terLimit = getTertiaryBoundary(); | |
235 | } | |
236 | secTer = elements[index] & ~SEC_TER_DELTA_FLAG; | |
237 | } else { | |
238 | U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]); | |
239 | secTer = getFirstSecTerForPrimary(index + 1); | |
240 | // If this is an explicit sec/ter unit, then it will be read once more. | |
241 | terLimit = getTertiaryBoundary(); | |
242 | } | |
243 | uint32_t st = (s << 16) | t; | |
244 | for(;;) { | |
245 | if(secTer > st) { | |
246 | U_ASSERT((secTer >> 16) == s); | |
247 | return secTer & 0xffff; | |
248 | } | |
249 | secTer = elements[++index]; | |
250 | // No tertiary greater than t for this primary+secondary. | |
251 | if((secTer & SEC_TER_DELTA_FLAG) == 0 || (secTer >> 16) > s) { return terLimit; } | |
252 | secTer &= ~SEC_TER_DELTA_FLAG; | |
253 | } | |
254 | } | |
255 | ||
256 | uint32_t | |
257 | CollationRootElements::getFirstSecTerForPrimary(int32_t index) const { | |
258 | uint32_t secTer = elements[index]; | |
259 | if((secTer & SEC_TER_DELTA_FLAG) == 0) { | |
260 | // No sec/ter delta. | |
261 | return Collation::COMMON_SEC_AND_TER_CE; | |
262 | } | |
263 | secTer &= ~SEC_TER_DELTA_FLAG; | |
264 | if(secTer > Collation::COMMON_SEC_AND_TER_CE) { | |
265 | // Implied sec/ter. | |
266 | return Collation::COMMON_SEC_AND_TER_CE; | |
267 | } | |
268 | // Explicit sec/ter below common/common. | |
269 | return secTer; | |
270 | } | |
271 | ||
272 | int32_t | |
273 | CollationRootElements::findPrimary(uint32_t p) const { | |
274 | // Requirement: p must occur as a root primary. | |
275 | U_ASSERT((p & 0xff) == 0); // at most a 3-byte primary | |
276 | int32_t index = findP(p); | |
277 | // If p is in a range, then we just assume that p is an actual primary in this range. | |
278 | // (Too cumbersome/expensive to check.) | |
279 | // Otherwise, it must be an exact match. | |
280 | U_ASSERT(isEndOfPrimaryRange(elements[index + 1]) || p == (elements[index] & 0xffffff00)); | |
281 | return index; | |
282 | } | |
283 | ||
284 | int32_t | |
285 | CollationRootElements::findP(uint32_t p) const { | |
286 | // p need not occur as a root primary. | |
287 | // For example, it might be a reordering group boundary. | |
288 | U_ASSERT((p >> 24) != Collation::UNASSIGNED_IMPLICIT_BYTE); | |
289 | // modified binary search | |
290 | int32_t start = (int32_t)elements[IX_FIRST_PRIMARY_INDEX]; | |
291 | U_ASSERT(p >= elements[start]); | |
292 | int32_t limit = length - 1; | |
293 | U_ASSERT(elements[limit] >= PRIMARY_SENTINEL); | |
294 | U_ASSERT(p < elements[limit]); | |
295 | while((start + 1) < limit) { | |
296 | // Invariant: elements[start] and elements[limit] are primaries, | |
297 | // and elements[start]<=p<=elements[limit]. | |
298 | int32_t i = (start + limit) / 2; | |
299 | uint32_t q = elements[i]; | |
300 | if((q & SEC_TER_DELTA_FLAG) != 0) { | |
301 | // Find the next primary. | |
302 | int32_t j = i + 1; | |
303 | for(;;) { | |
304 | if(j == limit) { break; } | |
305 | q = elements[j]; | |
306 | if((q & SEC_TER_DELTA_FLAG) == 0) { | |
307 | i = j; | |
308 | break; | |
309 | } | |
310 | ++j; | |
311 | } | |
312 | if((q & SEC_TER_DELTA_FLAG) != 0) { | |
313 | // Find the preceding primary. | |
314 | j = i - 1; | |
315 | for(;;) { | |
316 | if(j == start) { break; } | |
317 | q = elements[j]; | |
318 | if((q & SEC_TER_DELTA_FLAG) == 0) { | |
319 | i = j; | |
320 | break; | |
321 | } | |
322 | --j; | |
323 | } | |
324 | if((q & SEC_TER_DELTA_FLAG) != 0) { | |
325 | // No primary between start and limit. | |
326 | break; | |
327 | } | |
328 | } | |
329 | } | |
330 | if(p < (q & 0xffffff00)) { // Reset the "step" bits of a range end primary. | |
331 | limit = i; | |
332 | } else { | |
333 | start = i; | |
334 | } | |
335 | } | |
336 | return start; | |
337 | } | |
338 | ||
339 | U_NAMESPACE_END | |
340 | ||
341 | #endif // !UCONFIG_NO_COLLATION |