]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/collationrootelements.cpp
2 *******************************************************************************
3 * Copyright (C) 2013-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * collationrootelements.cpp
8 * created on: 2013mar05
9 * created by: Markus W. Scherer
12 #include "unicode/utypes.h"
14 #if !UCONFIG_NO_COLLATION
16 #include "collation.h"
17 #include "collationrootelements.h"
23 CollationRootElements::lastCEWithPrimaryBefore(uint32_t p
) const {
24 if(p
== 0) { return 0; }
25 U_ASSERT(p
> elements
[elements
[IX_FIRST_PRIMARY_INDEX
]]);
26 int32_t index
= findP(p
);
27 uint32_t q
= elements
[index
];
29 if(p
== (q
& 0xffffff00)) {
30 // p == elements[index] is a root primary. Find the CE before it.
31 // We must not be in a primary range.
32 U_ASSERT((q
& PRIMARY_STEP_MASK
) == 0);
33 secTer
= elements
[index
- 1];
34 if((secTer
& SEC_TER_DELTA_FLAG
) == 0) {
35 // Primary CE just before p.
36 p
= secTer
& 0xffffff00;
37 secTer
= Collation::COMMON_SEC_AND_TER_CE
;
39 // secTer = last secondary & tertiary for the previous primary
43 if((p
& SEC_TER_DELTA_FLAG
) == 0) {
51 // p > elements[index] which is the previous primary.
52 // Find the last secondary & tertiary weights for it.
54 secTer
= Collation::COMMON_SEC_AND_TER_CE
;
56 q
= elements
[++index
];
57 if((q
& SEC_TER_DELTA_FLAG
) == 0) {
58 // We must not be in a primary range.
59 U_ASSERT((q
& PRIMARY_STEP_MASK
) == 0);
65 return ((int64_t)p
<< 32) | (secTer
& ~SEC_TER_DELTA_FLAG
);
69 CollationRootElements::firstCEWithPrimaryAtLeast(uint32_t p
) const {
70 if(p
== 0) { return 0; }
71 int32_t index
= findP(p
);
72 if(p
!= (elements
[index
] & 0xffffff00)) {
74 p
= elements
[++index
];
75 if((p
& SEC_TER_DELTA_FLAG
) == 0) {
76 // First primary after p. We must not be in a primary range.
77 U_ASSERT((p
& PRIMARY_STEP_MASK
) == 0);
82 // The code above guarantees that p has at most 3 bytes: (p & 0xff) == 0.
83 return ((int64_t)p
<< 32) | Collation::COMMON_SEC_AND_TER_CE
;
87 CollationRootElements::getPrimaryBefore(uint32_t p
, UBool isCompressible
) const {
88 int32_t index
= findPrimary(p
);
90 uint32_t q
= elements
[index
];
91 if(p
== (q
& 0xffffff00)) {
92 // Found p itself. Return the previous primary.
93 // See if p is at the end of a previous range.
94 step
= (int32_t)q
& PRIMARY_STEP_MASK
;
96 // p is not at the end of a range. Look for the previous primary.
98 p
= elements
[--index
];
99 } while((p
& SEC_TER_DELTA_FLAG
) != 0);
100 return p
& 0xffffff00;
103 // p is in a range, and not at the start.
104 uint32_t nextElement
= elements
[index
+ 1];
105 U_ASSERT(isEndOfPrimaryRange(nextElement
));
106 step
= (int32_t)nextElement
& PRIMARY_STEP_MASK
;
108 // Return the previous range primary.
109 if((p
& 0xffff) == 0) {
110 return Collation::decTwoBytePrimaryByOneStep(p
, isCompressible
, step
);
112 return Collation::decThreeBytePrimaryByOneStep(p
, isCompressible
, step
);
117 CollationRootElements::getSecondaryBefore(uint32_t p
, uint32_t s
) const {
119 uint32_t previousSec
, sec
;
121 index
= (int32_t)elements
[IX_FIRST_SECONDARY_INDEX
];
122 // Gap at the beginning of the secondary CE range.
124 sec
= elements
[index
] >> 16;
126 index
= findPrimary(p
) + 1;
127 previousSec
= Collation::BEFORE_WEIGHT16
;
128 sec
= getFirstSecTerForPrimary(index
) >> 16;
133 U_ASSERT((elements
[index
] & SEC_TER_DELTA_FLAG
) != 0);
134 sec
= elements
[index
++] >> 16;
141 CollationRootElements::getTertiaryBefore(uint32_t p
, uint32_t s
, uint32_t t
) const {
142 U_ASSERT((t
& ~Collation::ONLY_TERTIARY_MASK
) == 0);
144 uint32_t previousTer
, secTer
;
147 index
= (int32_t)elements
[IX_FIRST_TERTIARY_INDEX
];
148 // Gap at the beginning of the tertiary CE range.
151 index
= (int32_t)elements
[IX_FIRST_SECONDARY_INDEX
];
152 previousTer
= Collation::BEFORE_WEIGHT16
;
154 secTer
= elements
[index
] & ~SEC_TER_DELTA_FLAG
;
156 index
= findPrimary(p
) + 1;
157 previousTer
= Collation::BEFORE_WEIGHT16
;
158 secTer
= getFirstSecTerForPrimary(index
);
160 uint32_t st
= (s
<< 16) | t
;
162 if((secTer
>> 16) == s
) { previousTer
= secTer
; }
163 U_ASSERT((elements
[index
] & SEC_TER_DELTA_FLAG
) != 0);
164 secTer
= elements
[index
++] & ~SEC_TER_DELTA_FLAG
;
166 U_ASSERT(secTer
== st
);
167 return previousTer
& 0xffff;
171 CollationRootElements::getPrimaryAfter(uint32_t p
, int32_t index
, UBool isCompressible
) const {
172 U_ASSERT(p
== (elements
[index
] & 0xffffff00) || isEndOfPrimaryRange(elements
[index
+ 1]));
173 uint32_t q
= elements
[++index
];
175 if((q
& SEC_TER_DELTA_FLAG
) == 0 && (step
= (int32_t)q
& PRIMARY_STEP_MASK
) != 0) {
176 // Return the next primary in this range.
177 if((p
& 0xffff) == 0) {
178 return Collation::incTwoBytePrimaryByOffset(p
, isCompressible
, step
);
180 return Collation::incThreeBytePrimaryByOffset(p
, isCompressible
, step
);
183 // Return the next primary in the list.
184 while((q
& SEC_TER_DELTA_FLAG
) != 0) {
185 q
= elements
[++index
];
187 U_ASSERT((q
& PRIMARY_STEP_MASK
) == 0);
193 CollationRootElements::getSecondaryAfter(int32_t index
, uint32_t s
) const {
199 index
= (int32_t)elements
[IX_FIRST_SECONDARY_INDEX
];
200 secTer
= elements
[index
];
201 // Gap at the end of the secondary CE range.
204 U_ASSERT(index
>= (int32_t)elements
[IX_FIRST_PRIMARY_INDEX
]);
205 secTer
= getFirstSecTerForPrimary(index
+ 1);
206 // If this is an explicit sec/ter unit, then it will be read once more.
207 // Gap for secondaries of primary CEs.
208 secLimit
= getSecondaryBoundary();
211 uint32_t sec
= secTer
>> 16;
212 if(sec
> s
) { return sec
; }
213 secTer
= elements
[++index
];
214 if((secTer
& SEC_TER_DELTA_FLAG
) == 0) { return secLimit
; }
219 CollationRootElements::getTertiaryAfter(int32_t index
, uint32_t s
, uint32_t t
) const {
226 index
= (int32_t)elements
[IX_FIRST_TERTIARY_INDEX
];
227 // Gap at the end of the tertiary CE range.
230 index
= (int32_t)elements
[IX_FIRST_SECONDARY_INDEX
];
231 // Gap for tertiaries of primary/secondary CEs.
232 terLimit
= getTertiaryBoundary();
234 secTer
= elements
[index
] & ~SEC_TER_DELTA_FLAG
;
236 U_ASSERT(index
>= (int32_t)elements
[IX_FIRST_PRIMARY_INDEX
]);
237 secTer
= getFirstSecTerForPrimary(index
+ 1);
238 // If this is an explicit sec/ter unit, then it will be read once more.
239 terLimit
= getTertiaryBoundary();
241 uint32_t st
= (s
<< 16) | t
;
244 U_ASSERT((secTer
>> 16) == s
);
245 return secTer
& 0xffff;
247 secTer
= elements
[++index
];
248 // No tertiary greater than t for this primary+secondary.
249 if((secTer
& SEC_TER_DELTA_FLAG
) == 0 || (secTer
>> 16) > s
) { return terLimit
; }
250 secTer
&= ~SEC_TER_DELTA_FLAG
;
255 CollationRootElements::getFirstSecTerForPrimary(int32_t index
) const {
256 uint32_t secTer
= elements
[index
];
257 if((secTer
& SEC_TER_DELTA_FLAG
) == 0) {
259 return Collation::COMMON_SEC_AND_TER_CE
;
261 secTer
&= ~SEC_TER_DELTA_FLAG
;
262 if(secTer
> Collation::COMMON_SEC_AND_TER_CE
) {
264 return Collation::COMMON_SEC_AND_TER_CE
;
266 // Explicit sec/ter below common/common.
271 CollationRootElements::findPrimary(uint32_t p
) const {
272 // Requirement: p must occur as a root primary.
273 U_ASSERT((p
& 0xff) == 0); // at most a 3-byte primary
274 int32_t index
= findP(p
);
275 // If p is in a range, then we just assume that p is an actual primary in this range.
276 // (Too cumbersome/expensive to check.)
277 // Otherwise, it must be an exact match.
278 U_ASSERT(isEndOfPrimaryRange(elements
[index
+ 1]) || p
== (elements
[index
] & 0xffffff00));
283 CollationRootElements::findP(uint32_t p
) const {
284 // p need not occur as a root primary.
285 // For example, it might be a reordering group boundary.
286 U_ASSERT((p
>> 24) != Collation::UNASSIGNED_IMPLICIT_BYTE
);
287 // modified binary search
288 int32_t start
= (int32_t)elements
[IX_FIRST_PRIMARY_INDEX
];
289 U_ASSERT(p
>= elements
[start
]);
290 int32_t limit
= length
- 1;
291 U_ASSERT(elements
[limit
] >= PRIMARY_SENTINEL
);
292 U_ASSERT(p
< elements
[limit
]);
293 while((start
+ 1) < limit
) {
294 // Invariant: elements[start] and elements[limit] are primaries,
295 // and elements[start]<=p<=elements[limit].
296 int32_t i
= (start
+ limit
) / 2;
297 uint32_t q
= elements
[i
];
298 if((q
& SEC_TER_DELTA_FLAG
) != 0) {
299 // Find the next primary.
302 if(j
== limit
) { break; }
304 if((q
& SEC_TER_DELTA_FLAG
) == 0) {
310 if((q
& SEC_TER_DELTA_FLAG
) != 0) {
311 // Find the preceding primary.
314 if(j
== start
) { break; }
316 if((q
& SEC_TER_DELTA_FLAG
) == 0) {
322 if((q
& SEC_TER_DELTA_FLAG
) != 0) {
323 // No primary between start and limit.
328 if(p
< (q
& 0xffffff00)) { // Reset the "step" bits of a range end primary.
339 #endif // !UCONFIG_NO_COLLATION