]>
Commit | Line | Data |
---|---|---|
57a6839d A |
1 | /* |
2 | ******************************************************************************* | |
3 | * Copyright (C) 2013-2014, International Business Machines | |
4 | * Corporation and others. All Rights Reserved. | |
5 | ******************************************************************************* | |
6 | * collationrootelements.cpp | |
7 | * | |
8 | * created on: 2013mar05 | |
9 | * created by: Markus W. Scherer | |
10 | */ | |
11 | ||
12 | #include "unicode/utypes.h" | |
13 | ||
14 | #if !UCONFIG_NO_COLLATION | |
15 | ||
16 | #include "collation.h" | |
17 | #include "collationrootelements.h" | |
18 | #include "uassert.h" | |
19 | ||
20 | U_NAMESPACE_BEGIN | |
21 | ||
22 | int64_t | |
23 | CollationRootElements::lastCEWithPrimaryBefore(uint32_t p) const { | |
24 | if(p == 0) { return 0; } | |
25 | U_ASSERT(p > elements[elements[IX_FIRST_PRIMARY_INDEX]]); | |
26 | int32_t index = findP(p); | |
27 | uint32_t q = elements[index]; | |
28 | uint32_t secTer; | |
29 | if(p == (q & 0xffffff00)) { | |
30 | // p == elements[index] is a root primary. Find the CE before it. | |
31 | // We must not be in a primary range. | |
32 | U_ASSERT((q & PRIMARY_STEP_MASK) == 0); | |
33 | secTer = elements[index - 1]; | |
34 | if((secTer & SEC_TER_DELTA_FLAG) == 0) { | |
35 | // Primary CE just before p. | |
36 | p = secTer & 0xffffff00; | |
37 | secTer = Collation::COMMON_SEC_AND_TER_CE; | |
38 | } else { | |
39 | // secTer = last secondary & tertiary for the previous primary | |
40 | index -= 2; | |
41 | for(;;) { | |
42 | p = elements[index]; | |
43 | if((p & SEC_TER_DELTA_FLAG) == 0) { | |
44 | p &= 0xffffff00; | |
45 | break; | |
46 | } | |
47 | --index; | |
48 | } | |
49 | } | |
50 | } else { | |
51 | // p > elements[index] which is the previous primary. | |
52 | // Find the last secondary & tertiary weights for it. | |
53 | p = q & 0xffffff00; | |
54 | secTer = Collation::COMMON_SEC_AND_TER_CE; | |
55 | for(;;) { | |
56 | q = elements[++index]; | |
57 | if((q & SEC_TER_DELTA_FLAG) == 0) { | |
58 | // We must not be in a primary range. | |
59 | U_ASSERT((q & PRIMARY_STEP_MASK) == 0); | |
60 | break; | |
61 | } | |
62 | secTer = q; | |
63 | } | |
64 | } | |
65 | return ((int64_t)p << 32) | (secTer & ~SEC_TER_DELTA_FLAG); | |
66 | } | |
67 | ||
68 | int64_t | |
69 | CollationRootElements::firstCEWithPrimaryAtLeast(uint32_t p) const { | |
70 | if(p == 0) { return 0; } | |
71 | int32_t index = findP(p); | |
72 | if(p != (elements[index] & 0xffffff00)) { | |
73 | for(;;) { | |
74 | p = elements[++index]; | |
75 | if((p & SEC_TER_DELTA_FLAG) == 0) { | |
76 | // First primary after p. We must not be in a primary range. | |
77 | U_ASSERT((p & PRIMARY_STEP_MASK) == 0); | |
78 | break; | |
79 | } | |
80 | } | |
81 | } | |
82 | // The code above guarantees that p has at most 3 bytes: (p & 0xff) == 0. | |
83 | return ((int64_t)p << 32) | Collation::COMMON_SEC_AND_TER_CE; | |
84 | } | |
85 | ||
86 | uint32_t | |
87 | CollationRootElements::getPrimaryBefore(uint32_t p, UBool isCompressible) const { | |
88 | int32_t index = findPrimary(p); | |
89 | int32_t step; | |
90 | uint32_t q = elements[index]; | |
91 | if(p == (q & 0xffffff00)) { | |
92 | // Found p itself. Return the previous primary. | |
93 | // See if p is at the end of a previous range. | |
94 | step = (int32_t)q & PRIMARY_STEP_MASK; | |
95 | if(step == 0) { | |
96 | // p is not at the end of a range. Look for the previous primary. | |
97 | do { | |
98 | p = elements[--index]; | |
99 | } while((p & SEC_TER_DELTA_FLAG) != 0); | |
100 | return p & 0xffffff00; | |
101 | } | |
102 | } else { | |
103 | // p is in a range, and not at the start. | |
104 | uint32_t nextElement = elements[index + 1]; | |
105 | U_ASSERT(isEndOfPrimaryRange(nextElement)); | |
106 | step = (int32_t)nextElement & PRIMARY_STEP_MASK; | |
107 | } | |
108 | // Return the previous range primary. | |
109 | if((p & 0xffff) == 0) { | |
110 | return Collation::decTwoBytePrimaryByOneStep(p, isCompressible, step); | |
111 | } else { | |
112 | return Collation::decThreeBytePrimaryByOneStep(p, isCompressible, step); | |
113 | } | |
114 | } | |
115 | ||
116 | uint32_t | |
117 | CollationRootElements::getSecondaryBefore(uint32_t p, uint32_t s) const { | |
118 | int32_t index; | |
119 | uint32_t previousSec, sec; | |
120 | if(p == 0) { | |
121 | index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; | |
122 | // Gap at the beginning of the secondary CE range. | |
123 | previousSec = 0; | |
124 | sec = elements[index] >> 16; | |
125 | } else { | |
126 | index = findPrimary(p) + 1; | |
127 | previousSec = Collation::MERGE_SEPARATOR_WEIGHT16; | |
128 | sec = Collation::COMMON_WEIGHT16; | |
129 | } | |
130 | U_ASSERT(s >= sec); | |
131 | while(s > sec) { | |
132 | previousSec = sec; | |
133 | U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0); | |
134 | sec = elements[index++] >> 16; | |
135 | } | |
136 | U_ASSERT(sec == s); | |
137 | return previousSec; | |
138 | } | |
139 | ||
140 | uint32_t | |
141 | CollationRootElements::getTertiaryBefore(uint32_t p, uint32_t s, uint32_t t) const { | |
142 | U_ASSERT((t & ~Collation::ONLY_TERTIARY_MASK) == 0); | |
143 | int32_t index; | |
144 | uint32_t previousTer, secTer; | |
145 | if(p == 0) { | |
146 | if(s == 0) { | |
147 | index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX]; | |
148 | // Gap at the beginning of the tertiary CE range. | |
149 | previousTer = 0; | |
150 | } else { | |
151 | index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; | |
152 | previousTer = Collation::MERGE_SEPARATOR_WEIGHT16; | |
153 | } | |
154 | secTer = elements[index] & ~SEC_TER_DELTA_FLAG; | |
155 | } else { | |
156 | index = findPrimary(p) + 1; | |
157 | previousTer = Collation::MERGE_SEPARATOR_WEIGHT16; | |
158 | secTer = Collation::COMMON_SEC_AND_TER_CE; | |
159 | } | |
160 | uint32_t st = (s << 16) | t; | |
161 | while(st > secTer) { | |
162 | if((secTer >> 16) == s) { previousTer = secTer; } | |
163 | U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0); | |
164 | secTer = elements[index++] & ~SEC_TER_DELTA_FLAG; | |
165 | } | |
166 | U_ASSERT(secTer == st); | |
167 | return previousTer & 0xffff; | |
168 | } | |
169 | ||
170 | uint32_t | |
171 | CollationRootElements::getPrimaryAfter(uint32_t p, int32_t index, UBool isCompressible) const { | |
172 | U_ASSERT(p == (elements[index] & 0xffffff00) || isEndOfPrimaryRange(elements[index + 1])); | |
173 | uint32_t q = elements[++index]; | |
174 | int32_t step; | |
175 | if((q & SEC_TER_DELTA_FLAG) == 0 && (step = (int32_t)q & PRIMARY_STEP_MASK) != 0) { | |
176 | // Return the next primary in this range. | |
177 | if((p & 0xffff) == 0) { | |
178 | return Collation::incTwoBytePrimaryByOffset(p, isCompressible, step); | |
179 | } else { | |
180 | return Collation::incThreeBytePrimaryByOffset(p, isCompressible, step); | |
181 | } | |
182 | } else { | |
183 | // Return the next primary in the list. | |
184 | while((q & SEC_TER_DELTA_FLAG) != 0) { | |
185 | q = elements[++index]; | |
186 | } | |
187 | U_ASSERT((q & PRIMARY_STEP_MASK) == 0); | |
188 | return q; | |
189 | } | |
190 | } | |
191 | ||
192 | uint32_t | |
193 | CollationRootElements::getSecondaryAfter(int32_t index, uint32_t s) const { | |
194 | uint32_t secLimit; | |
195 | if(index == 0) { | |
196 | // primary = 0 | |
197 | index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; | |
198 | // Gap at the end of the secondary CE range. | |
199 | secLimit = 0x10000; | |
200 | } else { | |
201 | U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]); | |
202 | ++index; | |
203 | // Gap for secondaries of primary CEs. | |
204 | secLimit = getSecondaryBoundary(); | |
205 | } | |
206 | for(;;) { | |
207 | uint32_t secTer = elements[index]; | |
208 | if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; } | |
209 | uint32_t sec = secTer >> 16; | |
210 | if(sec > s) { return sec; } | |
211 | ++index; | |
212 | } | |
213 | } | |
214 | ||
215 | uint32_t | |
216 | CollationRootElements::getTertiaryAfter(int32_t index, uint32_t s, uint32_t t) const { | |
217 | uint32_t terLimit; | |
218 | if(index == 0) { | |
219 | // primary = 0 | |
220 | if(s == 0) { | |
221 | index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX]; | |
222 | // Gap at the end of the tertiary CE range. | |
223 | terLimit = 0x4000; | |
224 | } else { | |
225 | index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; | |
226 | // Gap for tertiaries of primary/secondary CEs. | |
227 | terLimit = getTertiaryBoundary(); | |
228 | } | |
229 | } else { | |
230 | U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]); | |
231 | ++index; | |
232 | terLimit = getTertiaryBoundary(); | |
233 | } | |
234 | uint32_t st = (s << 16) | t; | |
235 | for(;;) { | |
236 | uint32_t secTer = elements[index]; | |
237 | // No tertiary greater than t for this primary+secondary. | |
238 | if((secTer & SEC_TER_DELTA_FLAG) == 0 || (secTer >> 16) > s) { return terLimit; } | |
239 | secTer &= ~SEC_TER_DELTA_FLAG; | |
240 | if(secTer > st) { return secTer & 0xffff; } | |
241 | ++index; | |
242 | } | |
243 | } | |
244 | ||
245 | int32_t | |
246 | CollationRootElements::findPrimary(uint32_t p) const { | |
247 | // Requirement: p must occur as a root primary. | |
248 | U_ASSERT((p & 0xff) == 0); // at most a 3-byte primary | |
249 | int32_t index = findP(p); | |
250 | // If p is in a range, then we just assume that p is an actual primary in this range. | |
251 | // (Too cumbersome/expensive to check.) | |
252 | // Otherwise, it must be an exact match. | |
253 | U_ASSERT(isEndOfPrimaryRange(elements[index + 1]) || p == (elements[index] & 0xffffff00)); | |
254 | return index; | |
255 | } | |
256 | ||
257 | int32_t | |
258 | CollationRootElements::findP(uint32_t p) const { | |
259 | // p need not occur as a root primary. | |
260 | // For example, it might be a reordering group boundary. | |
261 | U_ASSERT((p >> 24) != Collation::UNASSIGNED_IMPLICIT_BYTE); | |
262 | // modified binary search | |
263 | int32_t start = (int32_t)elements[IX_FIRST_PRIMARY_INDEX]; | |
264 | U_ASSERT(p >= elements[start]); | |
265 | int32_t limit = length - 1; | |
266 | U_ASSERT(elements[limit] >= PRIMARY_SENTINEL); | |
267 | U_ASSERT(p < elements[limit]); | |
268 | while((start + 1) < limit) { | |
269 | // Invariant: elements[start] and elements[limit] are primaries, | |
270 | // and elements[start]<=p<=elements[limit]. | |
271 | int32_t i = (start + limit) / 2; | |
272 | uint32_t q = elements[i]; | |
273 | if((q & SEC_TER_DELTA_FLAG) != 0) { | |
274 | // Find the next primary. | |
275 | int32_t j = i + 1; | |
276 | for(;;) { | |
277 | if(j == limit) { break; } | |
278 | q = elements[j]; | |
279 | if((q & SEC_TER_DELTA_FLAG) == 0) { | |
280 | i = j; | |
281 | break; | |
282 | } | |
283 | ++j; | |
284 | } | |
285 | if((q & SEC_TER_DELTA_FLAG) != 0) { | |
286 | // Find the preceding primary. | |
287 | j = i - 1; | |
288 | for(;;) { | |
289 | if(j == start) { break; } | |
290 | q = elements[j]; | |
291 | if((q & SEC_TER_DELTA_FLAG) == 0) { | |
292 | i = j; | |
293 | break; | |
294 | } | |
295 | --j; | |
296 | } | |
297 | if((q & SEC_TER_DELTA_FLAG) != 0) { | |
298 | // No primary between start and limit. | |
299 | break; | |
300 | } | |
301 | } | |
302 | } | |
303 | if(p < (q & 0xffffff00)) { // Reset the "step" bits of a range end primary. | |
304 | limit = i; | |
305 | } else { | |
306 | start = i; | |
307 | } | |
308 | } | |
309 | return start; | |
310 | } | |
311 | ||
312 | U_NAMESPACE_END | |
313 | ||
314 | #endif // !UCONFIG_NO_COLLATION |