]>
Commit | Line | Data |
---|---|---|
57a6839d A |
1 | /* |
2 | ******************************************************************************* | |
3 | * Copyright (C) 2013-2014, International Business Machines | |
4 | * Corporation and others. All Rights Reserved. | |
5 | ******************************************************************************* | |
6 | * collationrootelements.cpp | |
7 | * | |
8 | * created on: 2013mar05 | |
9 | * created by: Markus W. Scherer | |
10 | */ | |
11 | ||
12 | #include "unicode/utypes.h" | |
13 | ||
14 | #if !UCONFIG_NO_COLLATION | |
15 | ||
16 | #include "collation.h" | |
17 | #include "collationrootelements.h" | |
18 | #include "uassert.h" | |
19 | ||
20 | U_NAMESPACE_BEGIN | |
21 | ||
22 | int64_t | |
23 | CollationRootElements::lastCEWithPrimaryBefore(uint32_t p) const { | |
24 | if(p == 0) { return 0; } | |
25 | U_ASSERT(p > elements[elements[IX_FIRST_PRIMARY_INDEX]]); | |
26 | int32_t index = findP(p); | |
27 | uint32_t q = elements[index]; | |
28 | uint32_t secTer; | |
29 | if(p == (q & 0xffffff00)) { | |
30 | // p == elements[index] is a root primary. Find the CE before it. | |
31 | // We must not be in a primary range. | |
32 | U_ASSERT((q & PRIMARY_STEP_MASK) == 0); | |
33 | secTer = elements[index - 1]; | |
34 | if((secTer & SEC_TER_DELTA_FLAG) == 0) { | |
35 | // Primary CE just before p. | |
36 | p = secTer & 0xffffff00; | |
37 | secTer = Collation::COMMON_SEC_AND_TER_CE; | |
38 | } else { | |
39 | // secTer = last secondary & tertiary for the previous primary | |
40 | index -= 2; | |
41 | for(;;) { | |
42 | p = elements[index]; | |
43 | if((p & SEC_TER_DELTA_FLAG) == 0) { | |
44 | p &= 0xffffff00; | |
45 | break; | |
46 | } | |
47 | --index; | |
48 | } | |
49 | } | |
50 | } else { | |
51 | // p > elements[index] which is the previous primary. | |
52 | // Find the last secondary & tertiary weights for it. | |
53 | p = q & 0xffffff00; | |
54 | secTer = Collation::COMMON_SEC_AND_TER_CE; | |
55 | for(;;) { | |
56 | q = elements[++index]; | |
57 | if((q & SEC_TER_DELTA_FLAG) == 0) { | |
58 | // We must not be in a primary range. | |
59 | U_ASSERT((q & PRIMARY_STEP_MASK) == 0); | |
60 | break; | |
61 | } | |
62 | secTer = q; | |
63 | } | |
64 | } | |
65 | return ((int64_t)p << 32) | (secTer & ~SEC_TER_DELTA_FLAG); | |
66 | } | |
67 | ||
68 | int64_t | |
69 | CollationRootElements::firstCEWithPrimaryAtLeast(uint32_t p) const { | |
70 | if(p == 0) { return 0; } | |
71 | int32_t index = findP(p); | |
72 | if(p != (elements[index] & 0xffffff00)) { | |
73 | for(;;) { | |
74 | p = elements[++index]; | |
75 | if((p & SEC_TER_DELTA_FLAG) == 0) { | |
76 | // First primary after p. We must not be in a primary range. | |
77 | U_ASSERT((p & PRIMARY_STEP_MASK) == 0); | |
78 | break; | |
79 | } | |
80 | } | |
81 | } | |
82 | // The code above guarantees that p has at most 3 bytes: (p & 0xff) == 0. | |
83 | return ((int64_t)p << 32) | Collation::COMMON_SEC_AND_TER_CE; | |
84 | } | |
85 | ||
86 | uint32_t | |
87 | CollationRootElements::getPrimaryBefore(uint32_t p, UBool isCompressible) const { | |
88 | int32_t index = findPrimary(p); | |
89 | int32_t step; | |
90 | uint32_t q = elements[index]; | |
91 | if(p == (q & 0xffffff00)) { | |
92 | // Found p itself. Return the previous primary. | |
93 | // See if p is at the end of a previous range. | |
94 | step = (int32_t)q & PRIMARY_STEP_MASK; | |
95 | if(step == 0) { | |
96 | // p is not at the end of a range. Look for the previous primary. | |
97 | do { | |
98 | p = elements[--index]; | |
99 | } while((p & SEC_TER_DELTA_FLAG) != 0); | |
100 | return p & 0xffffff00; | |
101 | } | |
102 | } else { | |
103 | // p is in a range, and not at the start. | |
104 | uint32_t nextElement = elements[index + 1]; | |
105 | U_ASSERT(isEndOfPrimaryRange(nextElement)); | |
106 | step = (int32_t)nextElement & PRIMARY_STEP_MASK; | |
107 | } | |
108 | // Return the previous range primary. | |
109 | if((p & 0xffff) == 0) { | |
110 | return Collation::decTwoBytePrimaryByOneStep(p, isCompressible, step); | |
111 | } else { | |
112 | return Collation::decThreeBytePrimaryByOneStep(p, isCompressible, step); | |
113 | } | |
114 | } | |
115 | ||
116 | uint32_t | |
117 | CollationRootElements::getSecondaryBefore(uint32_t p, uint32_t s) const { | |
118 | int32_t index; | |
119 | uint32_t previousSec, sec; | |
120 | if(p == 0) { | |
121 | index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; | |
122 | // Gap at the beginning of the secondary CE range. | |
123 | previousSec = 0; | |
124 | sec = elements[index] >> 16; | |
125 | } else { | |
126 | index = findPrimary(p) + 1; | |
b331163b A |
127 | previousSec = Collation::BEFORE_WEIGHT16; |
128 | sec = getFirstSecTerForPrimary(index) >> 16; | |
57a6839d A |
129 | } |
130 | U_ASSERT(s >= sec); | |
131 | while(s > sec) { | |
132 | previousSec = sec; | |
133 | U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0); | |
134 | sec = elements[index++] >> 16; | |
135 | } | |
136 | U_ASSERT(sec == s); | |
137 | return previousSec; | |
138 | } | |
139 | ||
140 | uint32_t | |
141 | CollationRootElements::getTertiaryBefore(uint32_t p, uint32_t s, uint32_t t) const { | |
142 | U_ASSERT((t & ~Collation::ONLY_TERTIARY_MASK) == 0); | |
143 | int32_t index; | |
144 | uint32_t previousTer, secTer; | |
145 | if(p == 0) { | |
146 | if(s == 0) { | |
147 | index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX]; | |
148 | // Gap at the beginning of the tertiary CE range. | |
149 | previousTer = 0; | |
150 | } else { | |
151 | index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; | |
b331163b | 152 | previousTer = Collation::BEFORE_WEIGHT16; |
57a6839d A |
153 | } |
154 | secTer = elements[index] & ~SEC_TER_DELTA_FLAG; | |
155 | } else { | |
156 | index = findPrimary(p) + 1; | |
b331163b A |
157 | previousTer = Collation::BEFORE_WEIGHT16; |
158 | secTer = getFirstSecTerForPrimary(index); | |
57a6839d A |
159 | } |
160 | uint32_t st = (s << 16) | t; | |
161 | while(st > secTer) { | |
162 | if((secTer >> 16) == s) { previousTer = secTer; } | |
163 | U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0); | |
164 | secTer = elements[index++] & ~SEC_TER_DELTA_FLAG; | |
165 | } | |
166 | U_ASSERT(secTer == st); | |
167 | return previousTer & 0xffff; | |
168 | } | |
169 | ||
170 | uint32_t | |
171 | CollationRootElements::getPrimaryAfter(uint32_t p, int32_t index, UBool isCompressible) const { | |
172 | U_ASSERT(p == (elements[index] & 0xffffff00) || isEndOfPrimaryRange(elements[index + 1])); | |
173 | uint32_t q = elements[++index]; | |
174 | int32_t step; | |
175 | if((q & SEC_TER_DELTA_FLAG) == 0 && (step = (int32_t)q & PRIMARY_STEP_MASK) != 0) { | |
176 | // Return the next primary in this range. | |
177 | if((p & 0xffff) == 0) { | |
178 | return Collation::incTwoBytePrimaryByOffset(p, isCompressible, step); | |
179 | } else { | |
180 | return Collation::incThreeBytePrimaryByOffset(p, isCompressible, step); | |
181 | } | |
182 | } else { | |
183 | // Return the next primary in the list. | |
184 | while((q & SEC_TER_DELTA_FLAG) != 0) { | |
185 | q = elements[++index]; | |
186 | } | |
187 | U_ASSERT((q & PRIMARY_STEP_MASK) == 0); | |
188 | return q; | |
189 | } | |
190 | } | |
191 | ||
192 | uint32_t | |
193 | CollationRootElements::getSecondaryAfter(int32_t index, uint32_t s) const { | |
b331163b | 194 | uint32_t secTer; |
57a6839d A |
195 | uint32_t secLimit; |
196 | if(index == 0) { | |
197 | // primary = 0 | |
b331163b | 198 | U_ASSERT(s != 0); |
57a6839d | 199 | index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; |
b331163b | 200 | secTer = elements[index]; |
57a6839d A |
201 | // Gap at the end of the secondary CE range. |
202 | secLimit = 0x10000; | |
203 | } else { | |
204 | U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]); | |
b331163b A |
205 | secTer = getFirstSecTerForPrimary(index + 1); |
206 | // If this is an explicit sec/ter unit, then it will be read once more. | |
57a6839d A |
207 | // Gap for secondaries of primary CEs. |
208 | secLimit = getSecondaryBoundary(); | |
209 | } | |
210 | for(;;) { | |
57a6839d A |
211 | uint32_t sec = secTer >> 16; |
212 | if(sec > s) { return sec; } | |
b331163b A |
213 | secTer = elements[++index]; |
214 | if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; } | |
57a6839d A |
215 | } |
216 | } | |
217 | ||
218 | uint32_t | |
219 | CollationRootElements::getTertiaryAfter(int32_t index, uint32_t s, uint32_t t) const { | |
b331163b | 220 | uint32_t secTer; |
57a6839d A |
221 | uint32_t terLimit; |
222 | if(index == 0) { | |
223 | // primary = 0 | |
224 | if(s == 0) { | |
b331163b | 225 | U_ASSERT(t != 0); |
57a6839d A |
226 | index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX]; |
227 | // Gap at the end of the tertiary CE range. | |
228 | terLimit = 0x4000; | |
229 | } else { | |
230 | index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; | |
231 | // Gap for tertiaries of primary/secondary CEs. | |
232 | terLimit = getTertiaryBoundary(); | |
233 | } | |
b331163b | 234 | secTer = elements[index] & ~SEC_TER_DELTA_FLAG; |
57a6839d A |
235 | } else { |
236 | U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]); | |
b331163b A |
237 | secTer = getFirstSecTerForPrimary(index + 1); |
238 | // If this is an explicit sec/ter unit, then it will be read once more. | |
57a6839d A |
239 | terLimit = getTertiaryBoundary(); |
240 | } | |
241 | uint32_t st = (s << 16) | t; | |
242 | for(;;) { | |
b331163b A |
243 | if(secTer > st) { |
244 | U_ASSERT((secTer >> 16) == s); | |
245 | return secTer & 0xffff; | |
246 | } | |
247 | secTer = elements[++index]; | |
57a6839d A |
248 | // No tertiary greater than t for this primary+secondary. |
249 | if((secTer & SEC_TER_DELTA_FLAG) == 0 || (secTer >> 16) > s) { return terLimit; } | |
250 | secTer &= ~SEC_TER_DELTA_FLAG; | |
57a6839d A |
251 | } |
252 | } | |
253 | ||
b331163b A |
254 | uint32_t |
255 | CollationRootElements::getFirstSecTerForPrimary(int32_t index) const { | |
256 | uint32_t secTer = elements[index]; | |
257 | if((secTer & SEC_TER_DELTA_FLAG) == 0) { | |
258 | // No sec/ter delta. | |
259 | return Collation::COMMON_SEC_AND_TER_CE; | |
260 | } | |
261 | secTer &= ~SEC_TER_DELTA_FLAG; | |
262 | if(secTer > Collation::COMMON_SEC_AND_TER_CE) { | |
263 | // Implied sec/ter. | |
264 | return Collation::COMMON_SEC_AND_TER_CE; | |
265 | } | |
266 | // Explicit sec/ter below common/common. | |
267 | return secTer; | |
268 | } | |
269 | ||
57a6839d A |
270 | int32_t |
271 | CollationRootElements::findPrimary(uint32_t p) const { | |
272 | // Requirement: p must occur as a root primary. | |
273 | U_ASSERT((p & 0xff) == 0); // at most a 3-byte primary | |
274 | int32_t index = findP(p); | |
275 | // If p is in a range, then we just assume that p is an actual primary in this range. | |
276 | // (Too cumbersome/expensive to check.) | |
277 | // Otherwise, it must be an exact match. | |
278 | U_ASSERT(isEndOfPrimaryRange(elements[index + 1]) || p == (elements[index] & 0xffffff00)); | |
279 | return index; | |
280 | } | |
281 | ||
282 | int32_t | |
283 | CollationRootElements::findP(uint32_t p) const { | |
284 | // p need not occur as a root primary. | |
285 | // For example, it might be a reordering group boundary. | |
286 | U_ASSERT((p >> 24) != Collation::UNASSIGNED_IMPLICIT_BYTE); | |
287 | // modified binary search | |
288 | int32_t start = (int32_t)elements[IX_FIRST_PRIMARY_INDEX]; | |
289 | U_ASSERT(p >= elements[start]); | |
290 | int32_t limit = length - 1; | |
291 | U_ASSERT(elements[limit] >= PRIMARY_SENTINEL); | |
292 | U_ASSERT(p < elements[limit]); | |
293 | while((start + 1) < limit) { | |
294 | // Invariant: elements[start] and elements[limit] are primaries, | |
295 | // and elements[start]<=p<=elements[limit]. | |
296 | int32_t i = (start + limit) / 2; | |
297 | uint32_t q = elements[i]; | |
298 | if((q & SEC_TER_DELTA_FLAG) != 0) { | |
299 | // Find the next primary. | |
300 | int32_t j = i + 1; | |
301 | for(;;) { | |
302 | if(j == limit) { break; } | |
303 | q = elements[j]; | |
304 | if((q & SEC_TER_DELTA_FLAG) == 0) { | |
305 | i = j; | |
306 | break; | |
307 | } | |
308 | ++j; | |
309 | } | |
310 | if((q & SEC_TER_DELTA_FLAG) != 0) { | |
311 | // Find the preceding primary. | |
312 | j = i - 1; | |
313 | for(;;) { | |
314 | if(j == start) { break; } | |
315 | q = elements[j]; | |
316 | if((q & SEC_TER_DELTA_FLAG) == 0) { | |
317 | i = j; | |
318 | break; | |
319 | } | |
320 | --j; | |
321 | } | |
322 | if((q & SEC_TER_DELTA_FLAG) != 0) { | |
323 | // No primary between start and limit. | |
324 | break; | |
325 | } | |
326 | } | |
327 | } | |
328 | if(p < (q & 0xffffff00)) { // Reset the "step" bits of a range end primary. | |
329 | limit = i; | |
330 | } else { | |
331 | start = i; | |
332 | } | |
333 | } | |
334 | return start; | |
335 | } | |
336 | ||
337 | U_NAMESPACE_END | |
338 | ||
339 | #endif // !UCONFIG_NO_COLLATION |