]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/uitercollationiterator.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 2012-2014, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * uitercollationiterator.cpp
10 * created on: 2012sep23 (from utf16collationiterator.cpp)
11 * created by: Markus W. Scherer
14 #include "unicode/utypes.h"
16 #if !UCONFIG_NO_COLLATION
18 #include "unicode/uiter.h"
21 #include "collation.h"
22 #include "collationdata.h"
23 #include "collationfcd.h"
24 #include "collationiterator.h"
25 #include "normalizer2impl.h"
27 #include "uitercollationiterator.h"
31 UIterCollationIterator::~UIterCollationIterator() {}
34 UIterCollationIterator::resetToOffset(int32_t newOffset
) {
36 iter
.move(&iter
, newOffset
, UITER_START
);
40 UIterCollationIterator::getOffset() const {
41 return iter
.getIndex(&iter
, UITER_CURRENT
);
45 UIterCollationIterator::handleNextCE32(UChar32
&c
, UErrorCode
& /*errorCode*/) {
48 return Collation::FALLBACK_CE32
;
50 return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie
, c
);
54 UIterCollationIterator::handleGetTrailSurrogate() {
55 UChar32 trail
= iter
.next(&iter
);
56 if(!U16_IS_TRAIL(trail
) && trail
>= 0) { iter
.previous(&iter
); }
61 UIterCollationIterator::nextCodePoint(UErrorCode
& /*errorCode*/) {
62 return uiter_next32(&iter
);
66 UIterCollationIterator::previousCodePoint(UErrorCode
& /*errorCode*/) {
67 return uiter_previous32(&iter
);
71 UIterCollationIterator::forwardNumCodePoints(int32_t num
, UErrorCode
& /*errorCode*/) {
72 while(num
> 0 && (uiter_next32(&iter
)) >= 0) {
78 UIterCollationIterator::backwardNumCodePoints(int32_t num
, UErrorCode
& /*errorCode*/) {
79 while(num
> 0 && (uiter_previous32(&iter
)) >= 0) {
84 // FCDUIterCollationIterator ----------------------------------------------- ***
86 FCDUIterCollationIterator::~FCDUIterCollationIterator() {}
89 FCDUIterCollationIterator::resetToOffset(int32_t newOffset
) {
90 UIterCollationIterator::resetToOffset(newOffset
);
92 state
= ITER_CHECK_FWD
;
96 FCDUIterCollationIterator::getOffset() const {
97 if(state
<= ITER_CHECK_BWD
) {
98 return iter
.getIndex(&iter
, UITER_CURRENT
);
99 } else if(state
== ITER_IN_FCD_SEGMENT
) {
101 } else if(pos
== 0) {
109 FCDUIterCollationIterator::handleNextCE32(UChar32
&c
, UErrorCode
&errorCode
) {
111 if(state
== ITER_CHECK_FWD
) {
112 c
= iter
.next(&iter
);
114 return Collation::FALLBACK_CE32
;
116 if(CollationFCD::hasTccc(c
)) {
117 if(CollationFCD::maybeTibetanCompositeVowel(c
) ||
118 CollationFCD::hasLccc(iter
.current(&iter
))) {
119 iter
.previous(&iter
);
120 if(!nextSegment(errorCode
)) {
122 return Collation::FALLBACK_CE32
;
128 } else if(state
== ITER_IN_FCD_SEGMENT
&& pos
!= limit
) {
129 c
= iter
.next(&iter
);
133 } else if(state
>= IN_NORM_ITER_AT_LIMIT
&& pos
!= normalized
.length()) {
134 c
= normalized
[pos
++];
140 return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie
, c
);
144 FCDUIterCollationIterator::handleGetTrailSurrogate() {
145 if(state
<= ITER_IN_FCD_SEGMENT
) {
146 UChar32 trail
= iter
.next(&iter
);
147 if(U16_IS_TRAIL(trail
)) {
148 if(state
== ITER_IN_FCD_SEGMENT
) { ++pos
; }
149 } else if(trail
>= 0) {
150 iter
.previous(&iter
);
154 U_ASSERT(pos
< normalized
.length());
156 if(U16_IS_TRAIL(trail
= normalized
[pos
])) { ++pos
; }
162 FCDUIterCollationIterator::nextCodePoint(UErrorCode
&errorCode
) {
165 if(state
== ITER_CHECK_FWD
) {
166 c
= iter
.next(&iter
);
170 if(CollationFCD::hasTccc(c
)) {
171 if(CollationFCD::maybeTibetanCompositeVowel(c
) ||
172 CollationFCD::hasLccc(iter
.current(&iter
))) {
173 iter
.previous(&iter
);
174 if(!nextSegment(errorCode
)) {
181 UChar32 trail
= iter
.next(&iter
);
182 if(U16_IS_TRAIL(trail
)) {
183 return U16_GET_SUPPLEMENTARY(c
, trail
);
184 } else if(trail
>= 0) {
185 iter
.previous(&iter
);
189 } else if(state
== ITER_IN_FCD_SEGMENT
&& pos
!= limit
) {
190 c
= uiter_next32(&iter
);
191 pos
+= U16_LENGTH(c
);
194 } else if(state
>= IN_NORM_ITER_AT_LIMIT
&& pos
!= normalized
.length()) {
195 c
= normalized
.char32At(pos
);
196 pos
+= U16_LENGTH(c
);
205 FCDUIterCollationIterator::previousCodePoint(UErrorCode
&errorCode
) {
208 if(state
== ITER_CHECK_BWD
) {
209 c
= iter
.previous(&iter
);
212 state
= ITER_IN_FCD_SEGMENT
;
215 if(CollationFCD::hasLccc(c
)) {
216 UChar32 prev
= U_SENTINEL
;
217 if(CollationFCD::maybeTibetanCompositeVowel(c
) ||
218 CollationFCD::hasTccc(prev
= iter
.previous(&iter
))) {
223 if(!previousSegment(errorCode
)) {
228 // hasLccc(trail)=true for all trail surrogates
229 if(U16_IS_TRAIL(c
)) {
231 prev
= iter
.previous(&iter
);
233 if(U16_IS_LEAD(prev
)) {
234 return U16_GET_SUPPLEMENTARY(prev
, c
);
242 } else if(state
== ITER_IN_FCD_SEGMENT
&& pos
!= start
) {
243 c
= uiter_previous32(&iter
);
244 pos
-= U16_LENGTH(c
);
247 } else if(state
>= IN_NORM_ITER_AT_LIMIT
&& pos
!= 0) {
248 c
= normalized
.char32At(pos
- 1);
249 pos
-= U16_LENGTH(c
);
258 FCDUIterCollationIterator::forwardNumCodePoints(int32_t num
, UErrorCode
&errorCode
) {
259 // Specify the class to avoid a virtual-function indirection.
260 // In Java, we would declare this class final.
261 while(num
> 0 && FCDUIterCollationIterator::nextCodePoint(errorCode
) >= 0) {
267 FCDUIterCollationIterator::backwardNumCodePoints(int32_t num
, UErrorCode
&errorCode
) {
268 // Specify the class to avoid a virtual-function indirection.
269 // In Java, we would declare this class final.
270 while(num
> 0 && FCDUIterCollationIterator::previousCodePoint(errorCode
) >= 0) {
276 FCDUIterCollationIterator::switchToForward() {
277 U_ASSERT(state
== ITER_CHECK_BWD
||
278 (state
== ITER_IN_FCD_SEGMENT
&& pos
== limit
) ||
279 (state
>= IN_NORM_ITER_AT_LIMIT
&& pos
== normalized
.length()));
280 if(state
== ITER_CHECK_BWD
) {
281 // Turn around from backward checking.
282 start
= pos
= iter
.getIndex(&iter
, UITER_CURRENT
);
284 state
= ITER_CHECK_FWD
; // Check forward.
285 } else { // pos < limit
286 state
= ITER_IN_FCD_SEGMENT
; // Stay in FCD segment.
289 // Reached the end of the FCD segment.
290 if(state
== ITER_IN_FCD_SEGMENT
) {
291 // The input text segment is FCD, extend it forward.
293 // The input text segment needed to be normalized.
294 // Switch to checking forward from it.
295 if(state
== IN_NORM_ITER_AT_START
) {
296 iter
.move(&iter
, limit
- start
, UITER_CURRENT
);
300 state
= ITER_CHECK_FWD
;
305 FCDUIterCollationIterator::nextSegment(UErrorCode
&errorCode
) {
306 if(U_FAILURE(errorCode
)) { return FALSE
; }
307 U_ASSERT(state
== ITER_CHECK_FWD
);
308 // The input text [start..(iter index)[ passes the FCD check.
309 pos
= iter
.getIndex(&iter
, UITER_CURRENT
);
310 // Collect the characters being checked, in case they need to be normalized.
314 // Fetch the next character and its fcd16 value.
315 UChar32 c
= uiter_next32(&iter
);
317 uint16_t fcd16
= nfcImpl
.getFCD16(c
);
318 uint8_t leadCC
= (uint8_t)(fcd16
>> 8);
319 if(leadCC
== 0 && !s
.isEmpty()) {
320 // FCD boundary before this character.
321 uiter_previous32(&iter
);
325 if(leadCC
!= 0 && (prevCC
> leadCC
|| CollationFCD::isFCD16OfTibetanCompositeVowel(fcd16
))) {
326 // Fails FCD check. Find the next FCD boundary and normalize.
328 c
= uiter_next32(&iter
);
330 if(nfcImpl
.getFCD16(c
) <= 0xff) {
331 uiter_previous32(&iter
);
336 if(!normalize(s
, errorCode
)) { return FALSE
; }
338 limit
= pos
+ s
.length();
339 state
= IN_NORM_ITER_AT_LIMIT
;
343 prevCC
= (uint8_t)fcd16
;
345 // FCD boundary after the last character.
349 limit
= pos
+ s
.length();
350 U_ASSERT(pos
!= limit
);
351 iter
.move(&iter
, -s
.length(), UITER_CURRENT
);
352 state
= ITER_IN_FCD_SEGMENT
;
357 FCDUIterCollationIterator::switchToBackward() {
358 U_ASSERT(state
== ITER_CHECK_FWD
||
359 (state
== ITER_IN_FCD_SEGMENT
&& pos
== start
) ||
360 (state
>= IN_NORM_ITER_AT_LIMIT
&& pos
== 0));
361 if(state
== ITER_CHECK_FWD
) {
362 // Turn around from forward checking.
363 limit
= pos
= iter
.getIndex(&iter
, UITER_CURRENT
);
365 state
= ITER_CHECK_BWD
; // Check backward.
366 } else { // pos > start
367 state
= ITER_IN_FCD_SEGMENT
; // Stay in FCD segment.
370 // Reached the start of the FCD segment.
371 if(state
== ITER_IN_FCD_SEGMENT
) {
372 // The input text segment is FCD, extend it backward.
374 // The input text segment needed to be normalized.
375 // Switch to checking backward from it.
376 if(state
== IN_NORM_ITER_AT_LIMIT
) {
377 iter
.move(&iter
, start
- limit
, UITER_CURRENT
);
381 state
= ITER_CHECK_BWD
;
386 FCDUIterCollationIterator::previousSegment(UErrorCode
&errorCode
) {
387 if(U_FAILURE(errorCode
)) { return FALSE
; }
388 U_ASSERT(state
== ITER_CHECK_BWD
);
389 // The input text [(iter index)..limit[ passes the FCD check.
390 pos
= iter
.getIndex(&iter
, UITER_CURRENT
);
391 // Collect the characters being checked, in case they need to be normalized.
395 // Fetch the previous character and its fcd16 value.
396 UChar32 c
= uiter_previous32(&iter
);
398 uint16_t fcd16
= nfcImpl
.getFCD16(c
);
399 uint8_t trailCC
= (uint8_t)fcd16
;
400 if(trailCC
== 0 && !s
.isEmpty()) {
401 // FCD boundary after this character.
406 if(trailCC
!= 0 && ((nextCC
!= 0 && trailCC
> nextCC
) ||
407 CollationFCD::isFCD16OfTibetanCompositeVowel(fcd16
))) {
408 // Fails FCD check. Find the previous FCD boundary and normalize.
409 while(fcd16
> 0xff) {
410 c
= uiter_previous32(&iter
);
412 fcd16
= nfcImpl
.getFCD16(c
);
414 (void)uiter_next32(&iter
);
420 if(!normalize(s
, errorCode
)) { return FALSE
; }
422 start
= pos
- s
.length();
423 state
= IN_NORM_ITER_AT_START
;
424 pos
= normalized
.length();
427 nextCC
= (uint8_t)(fcd16
>> 8);
429 // FCD boundary before the following character.
433 start
= pos
- s
.length();
434 U_ASSERT(pos
!= start
);
435 iter
.move(&iter
, s
.length(), UITER_CURRENT
);
436 state
= ITER_IN_FCD_SEGMENT
;
441 FCDUIterCollationIterator::normalize(const UnicodeString
&s
, UErrorCode
&errorCode
) {
442 // NFD without argument checking.
443 U_ASSERT(U_SUCCESS(errorCode
));
444 nfcImpl
.decompose(s
, normalized
, errorCode
);
445 return U_SUCCESS(errorCode
);
450 #endif // !UCONFIG_NO_COLLATION