]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/uitercollationiterator.cpp
2 *******************************************************************************
3 * Copyright (C) 2012-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * uitercollationiterator.cpp
8 * created on: 2012sep23 (from utf16collationiterator.cpp)
9 * created by: Markus W. Scherer
12 #include "unicode/utypes.h"
14 #if !UCONFIG_NO_COLLATION
16 #include "unicode/uiter.h"
19 #include "collation.h"
20 #include "collationdata.h"
21 #include "collationfcd.h"
22 #include "collationiterator.h"
23 #include "normalizer2impl.h"
25 #include "uitercollationiterator.h"
29 UIterCollationIterator::~UIterCollationIterator() {}
32 UIterCollationIterator::resetToOffset(int32_t newOffset
) {
34 iter
.move(&iter
, newOffset
, UITER_START
);
38 UIterCollationIterator::getOffset() const {
39 return iter
.getIndex(&iter
, UITER_CURRENT
);
43 UIterCollationIterator::handleNextCE32(UChar32
&c
, UErrorCode
& /*errorCode*/) {
46 return Collation::FALLBACK_CE32
;
48 return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie
, c
);
52 UIterCollationIterator::handleGetTrailSurrogate() {
53 UChar32 trail
= iter
.next(&iter
);
54 if(!U16_IS_TRAIL(trail
) && trail
>= 0) { iter
.previous(&iter
); }
59 UIterCollationIterator::nextCodePoint(UErrorCode
& /*errorCode*/) {
60 return uiter_next32(&iter
);
64 UIterCollationIterator::previousCodePoint(UErrorCode
& /*errorCode*/) {
65 return uiter_previous32(&iter
);
69 UIterCollationIterator::forwardNumCodePoints(int32_t num
, UErrorCode
& /*errorCode*/) {
70 while(num
> 0 && (uiter_next32(&iter
)) >= 0) {
76 UIterCollationIterator::backwardNumCodePoints(int32_t num
, UErrorCode
& /*errorCode*/) {
77 while(num
> 0 && (uiter_previous32(&iter
)) >= 0) {
82 // FCDUIterCollationIterator ----------------------------------------------- ***
84 FCDUIterCollationIterator::~FCDUIterCollationIterator() {}
87 FCDUIterCollationIterator::resetToOffset(int32_t newOffset
) {
88 UIterCollationIterator::resetToOffset(newOffset
);
90 state
= ITER_CHECK_FWD
;
94 FCDUIterCollationIterator::getOffset() const {
95 if(state
<= ITER_CHECK_BWD
) {
96 return iter
.getIndex(&iter
, UITER_CURRENT
);
97 } else if(state
== ITER_IN_FCD_SEGMENT
) {
107 FCDUIterCollationIterator::handleNextCE32(UChar32
&c
, UErrorCode
&errorCode
) {
109 if(state
== ITER_CHECK_FWD
) {
110 c
= iter
.next(&iter
);
112 return Collation::FALLBACK_CE32
;
114 if(CollationFCD::hasTccc(c
)) {
115 if(CollationFCD::maybeTibetanCompositeVowel(c
) ||
116 CollationFCD::hasLccc(iter
.current(&iter
))) {
117 iter
.previous(&iter
);
118 if(!nextSegment(errorCode
)) {
120 return Collation::FALLBACK_CE32
;
126 } else if(state
== ITER_IN_FCD_SEGMENT
&& pos
!= limit
) {
127 c
= iter
.next(&iter
);
131 } else if(state
>= IN_NORM_ITER_AT_LIMIT
&& pos
!= normalized
.length()) {
132 c
= normalized
[pos
++];
138 return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie
, c
);
142 FCDUIterCollationIterator::handleGetTrailSurrogate() {
143 if(state
<= ITER_IN_FCD_SEGMENT
) {
144 UChar32 trail
= iter
.next(&iter
);
145 if(U16_IS_TRAIL(trail
)) {
146 if(state
== ITER_IN_FCD_SEGMENT
) { ++pos
; }
147 } else if(trail
>= 0) {
148 iter
.previous(&iter
);
152 U_ASSERT(pos
< normalized
.length());
154 if(U16_IS_TRAIL(trail
= normalized
[pos
])) { ++pos
; }
160 FCDUIterCollationIterator::nextCodePoint(UErrorCode
&errorCode
) {
163 if(state
== ITER_CHECK_FWD
) {
164 c
= iter
.next(&iter
);
168 if(CollationFCD::hasTccc(c
)) {
169 if(CollationFCD::maybeTibetanCompositeVowel(c
) ||
170 CollationFCD::hasLccc(iter
.current(&iter
))) {
171 iter
.previous(&iter
);
172 if(!nextSegment(errorCode
)) {
179 UChar32 trail
= iter
.next(&iter
);
180 if(U16_IS_TRAIL(trail
)) {
181 return U16_GET_SUPPLEMENTARY(c
, trail
);
182 } else if(trail
>= 0) {
183 iter
.previous(&iter
);
187 } else if(state
== ITER_IN_FCD_SEGMENT
&& pos
!= limit
) {
188 c
= uiter_next32(&iter
);
189 pos
+= U16_LENGTH(c
);
192 } else if(state
>= IN_NORM_ITER_AT_LIMIT
&& pos
!= normalized
.length()) {
193 c
= normalized
.char32At(pos
);
194 pos
+= U16_LENGTH(c
);
203 FCDUIterCollationIterator::previousCodePoint(UErrorCode
&errorCode
) {
206 if(state
== ITER_CHECK_BWD
) {
207 c
= iter
.previous(&iter
);
210 state
= ITER_IN_FCD_SEGMENT
;
213 if(CollationFCD::hasLccc(c
)) {
214 UChar32 prev
= U_SENTINEL
;
215 if(CollationFCD::maybeTibetanCompositeVowel(c
) ||
216 CollationFCD::hasTccc(prev
= iter
.previous(&iter
))) {
221 if(!previousSegment(errorCode
)) {
226 // hasLccc(trail)=true for all trail surrogates
227 if(U16_IS_TRAIL(c
)) {
229 prev
= iter
.previous(&iter
);
231 if(U16_IS_LEAD(prev
)) {
232 return U16_GET_SUPPLEMENTARY(prev
, c
);
240 } else if(state
== ITER_IN_FCD_SEGMENT
&& pos
!= start
) {
241 c
= uiter_previous32(&iter
);
242 pos
-= U16_LENGTH(c
);
245 } else if(state
>= IN_NORM_ITER_AT_LIMIT
&& pos
!= 0) {
246 c
= normalized
.char32At(pos
- 1);
247 pos
-= U16_LENGTH(c
);
256 FCDUIterCollationIterator::forwardNumCodePoints(int32_t num
, UErrorCode
&errorCode
) {
257 // Specify the class to avoid a virtual-function indirection.
258 // In Java, we would declare this class final.
259 while(num
> 0 && FCDUIterCollationIterator::nextCodePoint(errorCode
) >= 0) {
265 FCDUIterCollationIterator::backwardNumCodePoints(int32_t num
, UErrorCode
&errorCode
) {
266 // Specify the class to avoid a virtual-function indirection.
267 // In Java, we would declare this class final.
268 while(num
> 0 && FCDUIterCollationIterator::previousCodePoint(errorCode
) >= 0) {
274 FCDUIterCollationIterator::switchToForward() {
275 U_ASSERT(state
== ITER_CHECK_BWD
||
276 (state
== ITER_IN_FCD_SEGMENT
&& pos
== limit
) ||
277 (state
>= IN_NORM_ITER_AT_LIMIT
&& pos
== normalized
.length()));
278 if(state
== ITER_CHECK_BWD
) {
279 // Turn around from backward checking.
280 start
= pos
= iter
.getIndex(&iter
, UITER_CURRENT
);
282 state
= ITER_CHECK_FWD
; // Check forward.
283 } else { // pos < limit
284 state
= ITER_IN_FCD_SEGMENT
; // Stay in FCD segment.
287 // Reached the end of the FCD segment.
288 if(state
== ITER_IN_FCD_SEGMENT
) {
289 // The input text segment is FCD, extend it forward.
291 // The input text segment needed to be normalized.
292 // Switch to checking forward from it.
293 if(state
== IN_NORM_ITER_AT_START
) {
294 iter
.move(&iter
, limit
- start
, UITER_CURRENT
);
298 state
= ITER_CHECK_FWD
;
303 FCDUIterCollationIterator::nextSegment(UErrorCode
&errorCode
) {
304 if(U_FAILURE(errorCode
)) { return FALSE
; }
305 U_ASSERT(state
== ITER_CHECK_FWD
);
306 // The input text [start..(iter index)[ passes the FCD check.
307 pos
= iter
.getIndex(&iter
, UITER_CURRENT
);
308 // Collect the characters being checked, in case they need to be normalized.
312 // Fetch the next character and its fcd16 value.
313 UChar32 c
= uiter_next32(&iter
);
315 uint16_t fcd16
= nfcImpl
.getFCD16(c
);
316 uint8_t leadCC
= (uint8_t)(fcd16
>> 8);
317 if(leadCC
== 0 && !s
.isEmpty()) {
318 // FCD boundary before this character.
319 uiter_previous32(&iter
);
323 if(leadCC
!= 0 && (prevCC
> leadCC
|| CollationFCD::isFCD16OfTibetanCompositeVowel(fcd16
))) {
324 // Fails FCD check. Find the next FCD boundary and normalize.
326 c
= uiter_next32(&iter
);
328 if(nfcImpl
.getFCD16(c
) <= 0xff) {
329 uiter_previous32(&iter
);
334 if(!normalize(s
, errorCode
)) { return FALSE
; }
336 limit
= pos
+ s
.length();
337 state
= IN_NORM_ITER_AT_LIMIT
;
341 prevCC
= (uint8_t)fcd16
;
343 // FCD boundary after the last character.
347 limit
= pos
+ s
.length();
348 U_ASSERT(pos
!= limit
);
349 iter
.move(&iter
, -s
.length(), UITER_CURRENT
);
350 state
= ITER_IN_FCD_SEGMENT
;
355 FCDUIterCollationIterator::switchToBackward() {
356 U_ASSERT(state
== ITER_CHECK_FWD
||
357 (state
== ITER_IN_FCD_SEGMENT
&& pos
== start
) ||
358 (state
>= IN_NORM_ITER_AT_LIMIT
&& pos
== 0));
359 if(state
== ITER_CHECK_FWD
) {
360 // Turn around from forward checking.
361 limit
= pos
= iter
.getIndex(&iter
, UITER_CURRENT
);
363 state
= ITER_CHECK_BWD
; // Check backward.
364 } else { // pos > start
365 state
= ITER_IN_FCD_SEGMENT
; // Stay in FCD segment.
368 // Reached the start of the FCD segment.
369 if(state
== ITER_IN_FCD_SEGMENT
) {
370 // The input text segment is FCD, extend it backward.
372 // The input text segment needed to be normalized.
373 // Switch to checking backward from it.
374 if(state
== IN_NORM_ITER_AT_LIMIT
) {
375 iter
.move(&iter
, start
- limit
, UITER_CURRENT
);
379 state
= ITER_CHECK_BWD
;
384 FCDUIterCollationIterator::previousSegment(UErrorCode
&errorCode
) {
385 if(U_FAILURE(errorCode
)) { return FALSE
; }
386 U_ASSERT(state
== ITER_CHECK_BWD
);
387 // The input text [(iter index)..limit[ passes the FCD check.
388 pos
= iter
.getIndex(&iter
, UITER_CURRENT
);
389 // Collect the characters being checked, in case they need to be normalized.
393 // Fetch the previous character and its fcd16 value.
394 UChar32 c
= uiter_previous32(&iter
);
396 uint16_t fcd16
= nfcImpl
.getFCD16(c
);
397 uint8_t trailCC
= (uint8_t)fcd16
;
398 if(trailCC
== 0 && !s
.isEmpty()) {
399 // FCD boundary after this character.
404 if(trailCC
!= 0 && ((nextCC
!= 0 && trailCC
> nextCC
) ||
405 CollationFCD::isFCD16OfTibetanCompositeVowel(fcd16
))) {
406 // Fails FCD check. Find the previous FCD boundary and normalize.
407 while(fcd16
> 0xff) {
408 c
= uiter_previous32(&iter
);
410 fcd16
= nfcImpl
.getFCD16(c
);
412 (void)uiter_next32(&iter
);
418 if(!normalize(s
, errorCode
)) { return FALSE
; }
420 start
= pos
- s
.length();
421 state
= IN_NORM_ITER_AT_START
;
422 pos
= normalized
.length();
425 nextCC
= (uint8_t)(fcd16
>> 8);
427 // FCD boundary before the following character.
431 start
= pos
- s
.length();
432 U_ASSERT(pos
!= start
);
433 iter
.move(&iter
, s
.length(), UITER_CURRENT
);
434 state
= ITER_IN_FCD_SEGMENT
;
439 FCDUIterCollationIterator::normalize(const UnicodeString
&s
, UErrorCode
&errorCode
) {
440 // NFD without argument checking.
441 U_ASSERT(U_SUCCESS(errorCode
));
442 nfcImpl
.decompose(s
, normalized
, errorCode
);
443 return U_SUCCESS(errorCode
);
448 #endif // !UCONFIG_NO_COLLATION