]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ****************************************************************************** | |
4388f060 | 3 | * Copyright (C) 2001-2011, International Business Machines |
b75a7d8f A |
4 | * Corporation and others. All Rights Reserved. |
5 | ****************************************************************************** | |
6 | * | |
7 | * File ucoleitr.cpp | |
8 | * | |
9 | * Modification History: | |
10 | * | |
11 | * Date Name Description | |
12 | * 02/15/2001 synwee Modified all methods to process its own function | |
13 | * instead of calling the equivalent c++ api (coleitr.h) | |
14 | ******************************************************************************/ | |
15 | ||
16 | #include "unicode/utypes.h" | |
17 | ||
18 | #if !UCONFIG_NO_COLLATION | |
19 | ||
20 | #include "unicode/ucoleitr.h" | |
21 | #include "unicode/ustring.h" | |
22 | #include "unicode/sortkey.h" | |
46f4442e | 23 | #include "unicode/uobject.h" |
b75a7d8f A |
24 | #include "ucol_imp.h" |
25 | #include "cmemory.h" | |
26 | ||
27 | U_NAMESPACE_USE | |
28 | ||
29 | #define BUFFER_LENGTH 100 | |
30 | ||
46f4442e A |
31 | #define DEFAULT_BUFFER_SIZE 16 |
32 | #define BUFFER_GROW 8 | |
33 | ||
34 | #define ARRAY_SIZE(array) (sizeof array / sizeof array[0]) | |
35 | ||
36 | #define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0]) | |
37 | ||
38 | #define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type)) | |
39 | ||
40 | #define GROW_ARRAY(array, newSize) uprv_realloc((void *) (array), (newSize) * sizeof (array)[0]) | |
41 | ||
42 | #define DELETE_ARRAY(array) uprv_free((void *) (array)) | |
43 | ||
4388f060 | 44 | typedef struct icu::collIterate collIterator; |
b75a7d8f | 45 | |
46f4442e A |
46 | struct RCEI |
47 | { | |
48 | uint32_t ce; | |
49 | int32_t low; | |
50 | int32_t high; | |
51 | }; | |
52 | ||
53 | U_NAMESPACE_BEGIN | |
54 | ||
55 | struct RCEBuffer | |
56 | { | |
57 | RCEI defaultBuffer[DEFAULT_BUFFER_SIZE]; | |
58 | RCEI *buffer; | |
59 | int32_t bufferIndex; | |
60 | int32_t bufferSize; | |
61 | ||
62 | RCEBuffer(); | |
63 | ~RCEBuffer(); | |
64 | ||
65 | UBool empty() const; | |
66 | void put(uint32_t ce, int32_t ixLow, int32_t ixHigh); | |
67 | const RCEI *get(); | |
68 | }; | |
69 | ||
70 | RCEBuffer::RCEBuffer() | |
71 | { | |
72 | buffer = defaultBuffer; | |
73 | bufferIndex = 0; | |
74 | bufferSize = DEFAULT_BUFFER_SIZE; | |
75 | } | |
76 | ||
77 | RCEBuffer::~RCEBuffer() | |
78 | { | |
79 | if (buffer != defaultBuffer) { | |
80 | DELETE_ARRAY(buffer); | |
81 | } | |
82 | } | |
83 | ||
84 | UBool RCEBuffer::empty() const | |
85 | { | |
86 | return bufferIndex <= 0; | |
87 | } | |
88 | ||
89 | void RCEBuffer::put(uint32_t ce, int32_t ixLow, int32_t ixHigh) | |
90 | { | |
91 | if (bufferIndex >= bufferSize) { | |
92 | RCEI *newBuffer = NEW_ARRAY(RCEI, bufferSize + BUFFER_GROW); | |
93 | ||
94 | ARRAY_COPY(newBuffer, buffer, bufferSize); | |
95 | ||
96 | if (buffer != defaultBuffer) { | |
97 | DELETE_ARRAY(buffer); | |
98 | } | |
99 | ||
100 | buffer = newBuffer; | |
101 | bufferSize += BUFFER_GROW; | |
102 | } | |
103 | ||
104 | buffer[bufferIndex].ce = ce; | |
105 | buffer[bufferIndex].low = ixLow; | |
106 | buffer[bufferIndex].high = ixHigh; | |
107 | ||
108 | bufferIndex += 1; | |
109 | } | |
110 | ||
111 | const RCEI *RCEBuffer::get() | |
112 | { | |
113 | if (bufferIndex > 0) { | |
114 | return &buffer[--bufferIndex]; | |
115 | } | |
116 | ||
117 | return NULL; | |
118 | } | |
119 | ||
120 | struct PCEI | |
121 | { | |
122 | uint64_t ce; | |
123 | int32_t low; | |
124 | int32_t high; | |
125 | }; | |
126 | ||
127 | struct PCEBuffer | |
128 | { | |
129 | PCEI defaultBuffer[DEFAULT_BUFFER_SIZE]; | |
130 | PCEI *buffer; | |
131 | int32_t bufferIndex; | |
132 | int32_t bufferSize; | |
133 | ||
134 | PCEBuffer(); | |
135 | ~PCEBuffer(); | |
136 | ||
137 | void reset(); | |
138 | UBool empty() const; | |
139 | void put(uint64_t ce, int32_t ixLow, int32_t ixHigh); | |
140 | const PCEI *get(); | |
141 | }; | |
142 | ||
143 | PCEBuffer::PCEBuffer() | |
144 | { | |
145 | buffer = defaultBuffer; | |
146 | bufferIndex = 0; | |
147 | bufferSize = DEFAULT_BUFFER_SIZE; | |
148 | } | |
149 | ||
150 | PCEBuffer::~PCEBuffer() | |
151 | { | |
152 | if (buffer != defaultBuffer) { | |
153 | DELETE_ARRAY(buffer); | |
154 | } | |
155 | } | |
156 | ||
157 | void PCEBuffer::reset() | |
158 | { | |
159 | bufferIndex = 0; | |
160 | } | |
161 | ||
162 | UBool PCEBuffer::empty() const | |
163 | { | |
164 | return bufferIndex <= 0; | |
165 | } | |
166 | ||
167 | void PCEBuffer::put(uint64_t ce, int32_t ixLow, int32_t ixHigh) | |
168 | { | |
169 | if (bufferIndex >= bufferSize) { | |
170 | PCEI *newBuffer = NEW_ARRAY(PCEI, bufferSize + BUFFER_GROW); | |
171 | ||
172 | ARRAY_COPY(newBuffer, buffer, bufferSize); | |
173 | ||
174 | if (buffer != defaultBuffer) { | |
175 | DELETE_ARRAY(buffer); | |
176 | } | |
177 | ||
178 | buffer = newBuffer; | |
179 | bufferSize += BUFFER_GROW; | |
180 | } | |
181 | ||
182 | buffer[bufferIndex].ce = ce; | |
183 | buffer[bufferIndex].low = ixLow; | |
184 | buffer[bufferIndex].high = ixHigh; | |
185 | ||
186 | bufferIndex += 1; | |
187 | } | |
188 | ||
189 | const PCEI *PCEBuffer::get() | |
190 | { | |
191 | if (bufferIndex > 0) { | |
192 | return &buffer[--bufferIndex]; | |
193 | } | |
194 | ||
195 | return NULL; | |
196 | } | |
197 | ||
198 | /* | |
199 | * This inherits from UObject so that | |
200 | * it can be allocated by new and the | |
201 | * constructor for PCEBuffer is called. | |
202 | */ | |
203 | struct UCollationPCE : public UObject | |
204 | { | |
205 | PCEBuffer pceBuffer; | |
206 | UCollationStrength strength; | |
207 | UBool toShift; | |
208 | UBool isShifted; | |
209 | uint32_t variableTop; | |
210 | ||
211 | UCollationPCE(UCollationElements *elems); | |
212 | ~UCollationPCE(); | |
213 | ||
214 | void init(const UCollator *coll); | |
215 | ||
216 | virtual UClassID getDynamicClassID() const; | |
217 | static UClassID getStaticClassID(); | |
218 | }; | |
219 | ||
220 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UCollationPCE) | |
221 | ||
222 | UCollationPCE::UCollationPCE(UCollationElements *elems) | |
223 | { | |
224 | init(elems->iteratordata_.coll); | |
225 | } | |
226 | ||
227 | void UCollationPCE::init(const UCollator *coll) | |
228 | { | |
229 | UErrorCode status = U_ZERO_ERROR; | |
230 | ||
231 | strength = ucol_getStrength(coll); | |
232 | toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) == UCOL_SHIFTED; | |
233 | isShifted = FALSE; | |
234 | variableTop = coll->variableTopValue << 16; | |
235 | } | |
236 | ||
237 | UCollationPCE::~UCollationPCE() | |
238 | { | |
239 | // nothing to do | |
240 | } | |
241 | ||
242 | ||
243 | U_NAMESPACE_END | |
244 | ||
245 | ||
246 | inline uint64_t processCE(UCollationElements *elems, uint32_t ce) | |
247 | { | |
248 | uint64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0; | |
249 | ||
250 | // This is clean, but somewhat slow... | |
251 | // We could apply the mask to ce and then | |
252 | // just get all three orders... | |
253 | switch(elems->pce->strength) { | |
254 | default: | |
255 | tertiary = ucol_tertiaryOrder(ce); | |
256 | /* note fall-through */ | |
257 | ||
258 | case UCOL_SECONDARY: | |
259 | secondary = ucol_secondaryOrder(ce); | |
260 | /* note fall-through */ | |
261 | ||
262 | case UCOL_PRIMARY: | |
263 | primary = ucol_primaryOrder(ce); | |
264 | } | |
265 | ||
729e4ab9 A |
266 | // **** This should probably handle continuations too. **** |
267 | // **** That means that we need 24 bits for the primary **** | |
268 | // **** instead of the 16 that we're currently using. **** | |
269 | // **** So we can lay out the 64 bits as: 24.12.12.16. **** | |
270 | // **** Another complication with continuations is that **** | |
271 | // **** the *second* CE is marked as a continuation, so **** | |
272 | // **** we always have to peek ahead to know how long **** | |
273 | // **** the primary is... **** | |
274 | if ((elems->pce->toShift && elems->pce->variableTop > ce && primary != 0) | |
46f4442e A |
275 | || (elems->pce->isShifted && primary == 0)) { |
276 | ||
277 | if (primary == 0) { | |
278 | return UCOL_IGNORABLE; | |
279 | } | |
280 | ||
281 | if (elems->pce->strength >= UCOL_QUATERNARY) { | |
282 | quaternary = primary; | |
283 | } | |
284 | ||
285 | primary = secondary = tertiary = 0; | |
286 | elems->pce->isShifted = TRUE; | |
287 | } else { | |
288 | if (elems->pce->strength >= UCOL_QUATERNARY) { | |
289 | quaternary = 0xFFFF; | |
290 | } | |
291 | ||
292 | elems->pce->isShifted = FALSE; | |
293 | } | |
294 | ||
46f4442e A |
295 | return primary << 48 | secondary << 32 | tertiary << 16 | quaternary; |
296 | } | |
297 | ||
298 | U_CAPI void U_EXPORT2 | |
299 | uprv_init_pce(const UCollationElements *elems) | |
300 | { | |
301 | if (elems->pce != NULL) { | |
302 | elems->pce->init(elems->iteratordata_.coll); | |
303 | } | |
304 | } | |
305 | ||
306 | ||
307 | ||
b75a7d8f A |
308 | /* public methods ---------------------------------------------------- */ |
309 | ||
b75a7d8f A |
310 | U_CAPI UCollationElements* U_EXPORT2 |
311 | ucol_openElements(const UCollator *coll, | |
312 | const UChar *text, | |
313 | int32_t textLength, | |
314 | UErrorCode *status) | |
315 | { | |
46f4442e A |
316 | if (U_FAILURE(*status)) { |
317 | return NULL; | |
318 | } | |
b75a7d8f | 319 | |
729e4ab9 | 320 | UCollationElements *result = new UCollationElements; |
46f4442e A |
321 | if (result == NULL) { |
322 | *status = U_MEMORY_ALLOCATION_ERROR; | |
323 | return NULL; | |
324 | } | |
729e4ab9 | 325 | |
46f4442e A |
326 | result->reset_ = TRUE; |
327 | result->isWritable = FALSE; | |
328 | result->pce = NULL; | |
b75a7d8f | 329 | |
46f4442e A |
330 | if (text == NULL) { |
331 | textLength = 0; | |
332 | } | |
729e4ab9 | 333 | uprv_init_collIterate(coll, text, textLength, &result->iteratordata_, status); |
b75a7d8f | 334 | |
46f4442e | 335 | return result; |
b75a7d8f A |
336 | } |
337 | ||
729e4ab9 | 338 | |
b75a7d8f A |
339 | U_CAPI void U_EXPORT2 |
340 | ucol_closeElements(UCollationElements *elems) | |
341 | { | |
46f4442e A |
342 | if (elems != NULL) { |
343 | collIterate *ci = &elems->iteratordata_; | |
344 | ||
729e4ab9 A |
345 | if (ci->extendCEs) { |
346 | uprv_free(ci->extendCEs); | |
347 | } | |
46f4442e | 348 | |
729e4ab9 A |
349 | if (ci->offsetBuffer) { |
350 | uprv_free(ci->offsetBuffer); | |
46f4442e A |
351 | } |
352 | ||
353 | if (elems->isWritable && elems->iteratordata_.string != NULL) | |
354 | { | |
729e4ab9 | 355 | uprv_free((UChar *)elems->iteratordata_.string); |
46f4442e A |
356 | } |
357 | ||
358 | if (elems->pce != NULL) { | |
359 | delete elems->pce; | |
360 | } | |
361 | ||
729e4ab9 | 362 | delete elems; |
46f4442e | 363 | } |
b75a7d8f A |
364 | } |
365 | ||
366 | U_CAPI void U_EXPORT2 | |
367 | ucol_reset(UCollationElements *elems) | |
368 | { | |
46f4442e A |
369 | collIterate *ci = &(elems->iteratordata_); |
370 | elems->reset_ = TRUE; | |
371 | ci->pos = ci->string; | |
372 | if ((ci->flags & UCOL_ITER_HASLEN) == 0 || ci->endp == NULL) { | |
373 | ci->endp = ci->string + u_strlen(ci->string); | |
374 | } | |
375 | ci->CEpos = ci->toReturn = ci->CEs; | |
729e4ab9 | 376 | ci->flags = (ci->flags & UCOL_FORCE_HAN_IMPLICIT) | UCOL_ITER_HASLEN; |
46f4442e A |
377 | if (ci->coll->normalizationMode == UCOL_ON) { |
378 | ci->flags |= UCOL_ITER_NORM; | |
379 | } | |
380 | ||
729e4ab9 | 381 | ci->writableBuffer.remove(); |
46f4442e A |
382 | ci->fcdPosition = NULL; |
383 | ||
384 | //ci->offsetReturn = ci->offsetStore = NULL; | |
385 | ci->offsetRepeatCount = ci->offsetRepeatValue = 0; | |
b75a7d8f A |
386 | } |
387 | ||
729e4ab9 A |
388 | U_CAPI void U_EXPORT2 |
389 | ucol_forceHanImplicit(UCollationElements *elems, UErrorCode *status) | |
390 | { | |
391 | if (U_FAILURE(*status)) { | |
392 | return; | |
393 | } | |
394 | ||
395 | if (elems == NULL) { | |
396 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
397 | return; | |
398 | } | |
399 | ||
400 | elems->iteratordata_.flags |= UCOL_FORCE_HAN_IMPLICIT; | |
401 | } | |
402 | ||
b75a7d8f A |
403 | U_CAPI int32_t U_EXPORT2 |
404 | ucol_next(UCollationElements *elems, | |
405 | UErrorCode *status) | |
406 | { | |
46f4442e A |
407 | int32_t result; |
408 | if (U_FAILURE(*status)) { | |
409 | return UCOL_NULLORDER; | |
410 | } | |
b75a7d8f | 411 | |
46f4442e | 412 | elems->reset_ = FALSE; |
b75a7d8f | 413 | |
46f4442e A |
414 | result = (int32_t)ucol_getNextCE(elems->iteratordata_.coll, |
415 | &elems->iteratordata_, | |
416 | status); | |
417 | ||
418 | if (result == UCOL_NO_MORE_CES) { | |
419 | result = UCOL_NULLORDER; | |
420 | } | |
421 | return result; | |
422 | } | |
423 | ||
424 | U_CAPI int64_t U_EXPORT2 | |
425 | ucol_nextProcessed(UCollationElements *elems, | |
426 | int32_t *ixLow, | |
427 | int32_t *ixHigh, | |
428 | UErrorCode *status) | |
429 | { | |
430 | const UCollator *coll = elems->iteratordata_.coll; | |
431 | int64_t result = UCOL_IGNORABLE; | |
432 | uint32_t low = 0, high = 0; | |
433 | ||
434 | if (U_FAILURE(*status)) { | |
435 | return UCOL_PROCESSED_NULLORDER; | |
436 | } | |
437 | ||
438 | if (elems->pce == NULL) { | |
439 | elems->pce = new UCollationPCE(elems); | |
440 | } else { | |
441 | elems->pce->pceBuffer.reset(); | |
442 | } | |
443 | ||
444 | elems->reset_ = FALSE; | |
445 | ||
446 | do { | |
447 | low = ucol_getOffset(elems); | |
448 | uint32_t ce = (uint32_t) ucol_getNextCE(coll, &elems->iteratordata_, status); | |
449 | high = ucol_getOffset(elems); | |
450 | ||
451 | if (ce == UCOL_NO_MORE_CES) { | |
452 | result = UCOL_PROCESSED_NULLORDER; | |
453 | break; | |
454 | } | |
455 | ||
456 | result = processCE(elems, ce); | |
457 | } while (result == UCOL_IGNORABLE); | |
458 | ||
459 | if (ixLow != NULL) { | |
460 | *ixLow = low; | |
461 | } | |
462 | ||
463 | if (ixHigh != NULL) { | |
464 | *ixHigh = high; | |
465 | } | |
466 | ||
467 | return result; | |
b75a7d8f A |
468 | } |
469 | ||
470 | U_CAPI int32_t U_EXPORT2 | |
471 | ucol_previous(UCollationElements *elems, | |
472 | UErrorCode *status) | |
473 | { | |
46f4442e A |
474 | if(U_FAILURE(*status)) { |
475 | return UCOL_NULLORDER; | |
476 | } | |
477 | else | |
478 | { | |
479 | int32_t result; | |
480 | ||
481 | if (elems->reset_ && (elems->iteratordata_.pos == elems->iteratordata_.string)) { | |
482 | if (elems->iteratordata_.endp == NULL) { | |
483 | elems->iteratordata_.endp = elems->iteratordata_.string + | |
484 | u_strlen(elems->iteratordata_.string); | |
485 | elems->iteratordata_.flags |= UCOL_ITER_HASLEN; | |
486 | } | |
487 | elems->iteratordata_.pos = elems->iteratordata_.endp; | |
488 | elems->iteratordata_.fcdPosition = elems->iteratordata_.endp; | |
489 | } | |
490 | ||
491 | elems->reset_ = FALSE; | |
492 | ||
493 | result = (int32_t)ucol_getPrevCE(elems->iteratordata_.coll, | |
494 | &(elems->iteratordata_), | |
495 | status); | |
496 | ||
497 | if (result == UCOL_NO_MORE_CES) { | |
498 | result = UCOL_NULLORDER; | |
499 | } | |
500 | ||
501 | return result; | |
502 | } | |
503 | } | |
504 | ||
505 | U_CAPI int64_t U_EXPORT2 | |
506 | ucol_previousProcessed(UCollationElements *elems, | |
507 | int32_t *ixLow, | |
508 | int32_t *ixHigh, | |
509 | UErrorCode *status) | |
510 | { | |
511 | const UCollator *coll = elems->iteratordata_.coll; | |
512 | int64_t result = UCOL_IGNORABLE; | |
513 | // int64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0; | |
514 | // UCollationStrength strength = ucol_getStrength(coll); | |
515 | // UBool toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED; | |
516 | // uint32_t variableTop = coll->variableTopValue; | |
517 | int32_t low = 0, high = 0; | |
518 | ||
519 | if (U_FAILURE(*status)) { | |
520 | return UCOL_PROCESSED_NULLORDER; | |
521 | } | |
b75a7d8f A |
522 | |
523 | if (elems->reset_ && | |
524 | (elems->iteratordata_.pos == elems->iteratordata_.string)) { | |
525 | if (elems->iteratordata_.endp == NULL) { | |
526 | elems->iteratordata_.endp = elems->iteratordata_.string + | |
527 | u_strlen(elems->iteratordata_.string); | |
528 | elems->iteratordata_.flags |= UCOL_ITER_HASLEN; | |
529 | } | |
46f4442e | 530 | |
b75a7d8f A |
531 | elems->iteratordata_.pos = elems->iteratordata_.endp; |
532 | elems->iteratordata_.fcdPosition = elems->iteratordata_.endp; | |
533 | } | |
534 | ||
46f4442e A |
535 | if (elems->pce == NULL) { |
536 | elems->pce = new UCollationPCE(elems); | |
537 | } else { | |
538 | //elems->pce->pceBuffer.reset(); | |
539 | } | |
540 | ||
b75a7d8f A |
541 | elems->reset_ = FALSE; |
542 | ||
46f4442e A |
543 | while (elems->pce->pceBuffer.empty()) { |
544 | // buffer raw CEs up to non-ignorable primary | |
545 | RCEBuffer rceb; | |
546 | uint32_t ce; | |
547 | ||
548 | // **** do we need to reset rceb, or will it always be empty at this point **** | |
549 | do { | |
550 | high = ucol_getOffset(elems); | |
551 | ce = ucol_getPrevCE(coll, &elems->iteratordata_, status); | |
552 | low = ucol_getOffset(elems); | |
553 | ||
554 | if (ce == UCOL_NO_MORE_CES) { | |
555 | if (! rceb.empty()) { | |
556 | break; | |
557 | } | |
558 | ||
559 | goto finish; | |
560 | } | |
561 | ||
562 | rceb.put(ce, low, high); | |
563 | } while ((ce & UCOL_PRIMARYMASK) == 0); | |
564 | ||
565 | // process the raw CEs | |
566 | while (! rceb.empty()) { | |
567 | const RCEI *rcei = rceb.get(); | |
568 | ||
569 | result = processCE(elems, rcei->ce); | |
570 | ||
571 | if (result != UCOL_IGNORABLE) { | |
572 | elems->pce->pceBuffer.put(result, rcei->low, rcei->high); | |
573 | } | |
574 | } | |
575 | } | |
b75a7d8f | 576 | |
46f4442e A |
577 | finish: |
578 | if (elems->pce->pceBuffer.empty()) { | |
579 | // **** Is -1 the right value for ixLow, ixHigh? **** | |
580 | if (ixLow != NULL) { | |
581 | *ixLow = -1; | |
582 | } | |
583 | ||
584 | if (ixHigh != NULL) { | |
585 | *ixHigh = -1 | |
586 | ; | |
587 | } | |
588 | return UCOL_PROCESSED_NULLORDER; | |
b75a7d8f A |
589 | } |
590 | ||
46f4442e A |
591 | const PCEI *pcei = elems->pce->pceBuffer.get(); |
592 | ||
593 | if (ixLow != NULL) { | |
594 | *ixLow = pcei->low; | |
595 | } | |
596 | ||
597 | if (ixHigh != NULL) { | |
598 | *ixHigh = pcei->high; | |
599 | } | |
600 | ||
601 | return pcei->ce; | |
b75a7d8f A |
602 | } |
603 | ||
604 | U_CAPI int32_t U_EXPORT2 | |
605 | ucol_getMaxExpansion(const UCollationElements *elems, | |
606 | int32_t order) | |
607 | { | |
46f4442e A |
608 | uint8_t result; |
609 | ||
610 | #if 0 | |
611 | UCOL_GETMAXEXPANSION(elems->iteratordata_.coll, (uint32_t)order, result); | |
612 | #else | |
613 | const UCollator *coll = elems->iteratordata_.coll; | |
614 | const uint32_t *start; | |
615 | const uint32_t *limit; | |
616 | const uint32_t *mid; | |
617 | uint32_t strengthMask = 0; | |
618 | uint32_t mOrder = (uint32_t) order; | |
619 | ||
620 | switch (coll->strength) | |
621 | { | |
622 | default: | |
623 | strengthMask |= UCOL_TERTIARYORDERMASK; | |
624 | /* fall through */ | |
625 | ||
626 | case UCOL_SECONDARY: | |
627 | strengthMask |= UCOL_SECONDARYORDERMASK; | |
628 | /* fall through */ | |
629 | ||
630 | case UCOL_PRIMARY: | |
631 | strengthMask |= UCOL_PRIMARYORDERMASK; | |
632 | } | |
633 | ||
634 | mOrder &= strengthMask; | |
635 | start = (coll)->endExpansionCE; | |
636 | limit = (coll)->lastEndExpansionCE; | |
637 | ||
638 | while (start < limit - 1) { | |
639 | mid = start + ((limit - start) >> 1); | |
640 | if (mOrder <= (*mid & strengthMask)) { | |
641 | limit = mid; | |
642 | } else { | |
643 | start = mid; | |
644 | } | |
645 | } | |
646 | ||
647 | // FIXME: with a masked search, there might be more than one hit, | |
648 | // so we need to look forward and backward from the match to find all | |
649 | // of the hits... | |
650 | if ((*start & strengthMask) == mOrder) { | |
651 | result = *((coll)->expansionCESize + (start - (coll)->endExpansionCE)); | |
652 | } else if ((*limit & strengthMask) == mOrder) { | |
653 | result = *(coll->expansionCESize + (limit - coll->endExpansionCE)); | |
654 | } else if ((mOrder & 0xFFFF) == 0x00C0) { | |
655 | result = 2; | |
656 | } else { | |
657 | result = 1; | |
658 | } | |
659 | #endif | |
660 | ||
661 | return result; | |
b75a7d8f A |
662 | } |
663 | ||
664 | U_CAPI void U_EXPORT2 | |
665 | ucol_setText( UCollationElements *elems, | |
666 | const UChar *text, | |
667 | int32_t textLength, | |
668 | UErrorCode *status) | |
669 | { | |
46f4442e A |
670 | if (U_FAILURE(*status)) { |
671 | return; | |
672 | } | |
b75a7d8f | 673 | |
46f4442e A |
674 | if (elems->isWritable && elems->iteratordata_.string != NULL) |
675 | { | |
729e4ab9 | 676 | uprv_free((UChar *)elems->iteratordata_.string); |
46f4442e A |
677 | } |
678 | ||
679 | if (text == NULL) { | |
680 | textLength = 0; | |
681 | } | |
b75a7d8f | 682 | |
46f4442e A |
683 | elems->isWritable = FALSE; |
684 | ||
685 | /* free offset buffer to avoid memory leak before initializing. */ | |
729e4ab9 A |
686 | ucol_freeOffsetBuffer(&(elems->iteratordata_)); |
687 | /* Ensure that previously allocated extendCEs is freed before setting to NULL. */ | |
688 | if (elems->iteratordata_.extendCEs != NULL) { | |
689 | uprv_free(elems->iteratordata_.extendCEs); | |
690 | } | |
46f4442e | 691 | uprv_init_collIterate(elems->iteratordata_.coll, text, textLength, |
729e4ab9 | 692 | &elems->iteratordata_, status); |
b75a7d8f | 693 | |
46f4442e | 694 | elems->reset_ = TRUE; |
b75a7d8f A |
695 | } |
696 | ||
697 | U_CAPI int32_t U_EXPORT2 | |
698 | ucol_getOffset(const UCollationElements *elems) | |
699 | { | |
700 | const collIterate *ci = &(elems->iteratordata_); | |
46f4442e A |
701 | |
702 | if (ci->offsetRepeatCount > 0 && ci->offsetRepeatValue != 0) { | |
703 | return ci->offsetRepeatValue; | |
704 | } | |
705 | ||
706 | if (ci->offsetReturn != NULL) { | |
707 | return *ci->offsetReturn; | |
708 | } | |
709 | ||
b75a7d8f A |
710 | // while processing characters in normalization buffer getOffset will |
711 | // return the next non-normalized character. | |
712 | // should be inline with the old implementation since the old codes uses | |
713 | // nextDecomp in normalizer which also decomposes the string till the | |
714 | // first base character is found. | |
715 | if (ci->flags & UCOL_ITER_INNORMBUF) { | |
716 | if (ci->fcdPosition == NULL) { | |
717 | return 0; | |
718 | } | |
719 | return (int32_t)(ci->fcdPosition - ci->string); | |
720 | } | |
721 | else { | |
722 | return (int32_t)(ci->pos - ci->string); | |
723 | } | |
724 | } | |
725 | ||
726 | U_CAPI void U_EXPORT2 | |
727 | ucol_setOffset(UCollationElements *elems, | |
728 | int32_t offset, | |
729 | UErrorCode *status) | |
730 | { | |
46f4442e A |
731 | if (U_FAILURE(*status)) { |
732 | return; | |
733 | } | |
b75a7d8f | 734 | |
46f4442e A |
735 | // this methods will clean up any use of the writable buffer and points to |
736 | // the original string | |
737 | collIterate *ci = &(elems->iteratordata_); | |
738 | ci->pos = ci->string + offset; | |
739 | ci->CEpos = ci->toReturn = ci->CEs; | |
740 | if (ci->flags & UCOL_ITER_INNORMBUF) { | |
741 | ci->flags = ci->origFlags; | |
742 | } | |
743 | if ((ci->flags & UCOL_ITER_HASLEN) == 0) { | |
744 | ci->endp = ci->string + u_strlen(ci->string); | |
745 | ci->flags |= UCOL_ITER_HASLEN; | |
746 | } | |
747 | ci->fcdPosition = NULL; | |
748 | elems->reset_ = FALSE; | |
749 | ||
750 | ci->offsetReturn = NULL; | |
751 | ci->offsetStore = ci->offsetBuffer; | |
752 | ci->offsetRepeatCount = ci->offsetRepeatValue = 0; | |
b75a7d8f A |
753 | } |
754 | ||
755 | U_CAPI int32_t U_EXPORT2 | |
756 | ucol_primaryOrder (int32_t order) | |
757 | { | |
46f4442e A |
758 | order &= UCOL_PRIMARYMASK; |
759 | return (order >> UCOL_PRIMARYORDERSHIFT); | |
b75a7d8f A |
760 | } |
761 | ||
762 | U_CAPI int32_t U_EXPORT2 | |
763 | ucol_secondaryOrder (int32_t order) | |
764 | { | |
46f4442e A |
765 | order &= UCOL_SECONDARYMASK; |
766 | return (order >> UCOL_SECONDARYORDERSHIFT); | |
b75a7d8f A |
767 | } |
768 | ||
769 | U_CAPI int32_t U_EXPORT2 | |
770 | ucol_tertiaryOrder (int32_t order) | |
771 | { | |
46f4442e | 772 | return (order & UCOL_TERTIARYMASK); |
b75a7d8f A |
773 | } |
774 | ||
729e4ab9 A |
775 | |
776 | void ucol_freeOffsetBuffer(collIterate *s) { | |
777 | if (s != NULL && s->offsetBuffer != NULL) { | |
778 | uprv_free(s->offsetBuffer); | |
779 | s->offsetBuffer = NULL; | |
780 | s->offsetBufferSize = 0; | |
781 | } | |
782 | } | |
783 | ||
b75a7d8f | 784 | #endif /* #if !UCONFIG_NO_COLLATION */ |