2 *******************************************************************************
3 * Copyright (C) 2010-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * utf16collationiterator.cpp
8 * created on: 2010oct27
9 * created by: Markus W. Scherer
12 #include "unicode/utypes.h"
14 #if !UCONFIG_NO_COLLATION
18 #include "collation.h"
19 #include "collationdata.h"
20 #include "collationfcd.h"
21 #include "collationiterator.h"
22 #include "normalizer2impl.h"
24 #include "utf16collationiterator.h"
28 UTF16CollationIterator::UTF16CollationIterator(const UTF16CollationIterator
&other
,
30 : CollationIterator(other
),
32 pos(newText
+ (other
.pos
- other
.start
)),
33 limit(other
.limit
== NULL
? NULL
: newText
+ (other
.limit
- other
.start
)) {
36 UTF16CollationIterator::~UTF16CollationIterator() {}
39 UTF16CollationIterator::operator==(const CollationIterator
&other
) const {
40 if(!CollationIterator::operator==(other
)) { return FALSE
; }
41 const UTF16CollationIterator
&o
= static_cast<const UTF16CollationIterator
&>(other
);
42 // Compare the iterator state but not the text: Assume that the caller does that.
43 return (pos
- start
) == (o
.pos
- o
.start
);
47 UTF16CollationIterator::resetToOffset(int32_t newOffset
) {
49 pos
= start
+ newOffset
;
53 UTF16CollationIterator::getOffset() const {
54 return (int32_t)(pos
- start
);
58 UTF16CollationIterator::handleNextCE32(UChar32
&c
, UErrorCode
& /*errorCode*/) {
61 return Collation::FALLBACK_CE32
;
64 return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie
, c
);
68 UTF16CollationIterator::handleGetTrailSurrogate() {
69 if(pos
== limit
) { return 0; }
71 if(U16_IS_TRAIL(trail
= *pos
)) { ++pos
; }
76 UTF16CollationIterator::foundNULTerminator() {
86 UTF16CollationIterator::nextCodePoint(UErrorCode
& /*errorCode*/) {
91 if(c
== 0 && limit
== NULL
) {
97 if(U16_IS_LEAD(c
) && pos
!= limit
&& U16_IS_TRAIL(trail
= *pos
)) {
99 return U16_GET_SUPPLEMENTARY(c
, trail
);
106 UTF16CollationIterator::previousCodePoint(UErrorCode
& /*errorCode*/) {
112 if(U16_IS_TRAIL(c
) && pos
!= start
&& U16_IS_LEAD(lead
= *(pos
- 1))) {
114 return U16_GET_SUPPLEMENTARY(lead
, c
);
121 UTF16CollationIterator::forwardNumCodePoints(int32_t num
, UErrorCode
& /*errorCode*/) {
122 while(num
> 0 && pos
!= limit
) {
124 if(c
== 0 && limit
== NULL
) {
130 if(U16_IS_LEAD(c
) && pos
!= limit
&& U16_IS_TRAIL(*pos
)) {
137 UTF16CollationIterator::backwardNumCodePoints(int32_t num
, UErrorCode
& /*errorCode*/) {
138 while(num
> 0 && pos
!= start
) {
141 if(U16_IS_TRAIL(c
) && pos
!= start
&& U16_IS_LEAD(*(pos
-1))) {
147 // FCDUTF16CollationIterator ----------------------------------------------- ***
149 FCDUTF16CollationIterator::FCDUTF16CollationIterator(const FCDUTF16CollationIterator
&other
,
150 const UChar
*newText
)
151 : UTF16CollationIterator(other
),
153 segmentStart(newText
+ (other
.segmentStart
- other
.rawStart
)),
154 segmentLimit(other
.segmentLimit
== NULL
? NULL
: newText
+ (other
.segmentLimit
- other
.rawStart
)),
155 rawLimit(other
.rawLimit
== NULL
? NULL
: newText
+ (other
.rawLimit
- other
.rawStart
)),
156 nfcImpl(other
.nfcImpl
),
157 normalized(other
.normalized
),
158 checkDir(other
.checkDir
) {
159 if(checkDir
!= 0 || other
.start
== other
.segmentStart
) {
160 start
= newText
+ (other
.start
- other
.rawStart
);
161 pos
= newText
+ (other
.pos
- other
.rawStart
);
162 limit
= other
.limit
== NULL
? NULL
: newText
+ (other
.limit
- other
.rawStart
);
164 start
= normalized
.getBuffer();
165 pos
= start
+ (other
.pos
- other
.start
);
166 limit
= start
+ normalized
.length();
170 FCDUTF16CollationIterator::~FCDUTF16CollationIterator() {}
173 FCDUTF16CollationIterator::operator==(const CollationIterator
&other
) const {
174 // Skip the UTF16CollationIterator and call its parent.
175 if(!CollationIterator::operator==(other
)) { return FALSE
; }
176 const FCDUTF16CollationIterator
&o
= static_cast<const FCDUTF16CollationIterator
&>(other
);
177 // Compare the iterator state but not the text: Assume that the caller does that.
178 if(checkDir
!= o
.checkDir
) { return FALSE
; }
179 if(checkDir
== 0 && (start
== segmentStart
) != (o
.start
== o
.segmentStart
)) { return FALSE
; }
180 if(checkDir
!= 0 || start
== segmentStart
) {
181 return (pos
- rawStart
) == (o
.pos
- o
.rawStart
);
183 return (segmentStart
- rawStart
) == (o
.segmentStart
- o
.rawStart
) &&
184 (pos
- start
) == (o
.pos
- o
.start
);
189 FCDUTF16CollationIterator::resetToOffset(int32_t newOffset
) {
191 start
= segmentStart
= pos
= rawStart
+ newOffset
;
197 FCDUTF16CollationIterator::getOffset() const {
198 if(checkDir
!= 0 || start
== segmentStart
) {
199 return (int32_t)(pos
- rawStart
);
200 } else if(pos
== start
) {
201 return (int32_t)(segmentStart
- rawStart
);
203 return (int32_t)(segmentLimit
- rawStart
);
208 FCDUTF16CollationIterator::handleNextCE32(UChar32
&c
, UErrorCode
&errorCode
) {
213 return Collation::FALLBACK_CE32
;
216 if(CollationFCD::hasTccc(c
)) {
217 if(CollationFCD::maybeTibetanCompositeVowel(c
) ||
218 (pos
!= limit
&& CollationFCD::hasLccc(*pos
))) {
220 if(!nextSegment(errorCode
)) {
222 return Collation::FALLBACK_CE32
;
228 } else if(checkDir
== 0 && pos
!= limit
) {
235 return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie
, c
);
239 FCDUTF16CollationIterator::foundNULTerminator() {
241 limit
= rawLimit
= --pos
;
249 FCDUTF16CollationIterator::nextCodePoint(UErrorCode
&errorCode
) {
257 if(CollationFCD::hasTccc(c
)) {
258 if(CollationFCD::maybeTibetanCompositeVowel(c
) ||
259 (pos
!= limit
&& CollationFCD::hasLccc(*pos
))) {
261 if(!nextSegment(errorCode
)) {
266 } else if(c
== 0 && limit
== NULL
) {
267 limit
= rawLimit
= --pos
;
271 } else if(checkDir
== 0 && pos
!= limit
) {
279 if(U16_IS_LEAD(c
) && pos
!= limit
&& U16_IS_TRAIL(trail
= *pos
)) {
281 return U16_GET_SUPPLEMENTARY(c
, trail
);
288 FCDUTF16CollationIterator::previousCodePoint(UErrorCode
&errorCode
) {
296 if(CollationFCD::hasLccc(c
)) {
297 if(CollationFCD::maybeTibetanCompositeVowel(c
) ||
298 (pos
!= start
&& CollationFCD::hasTccc(*(pos
- 1)))) {
300 if(!previousSegment(errorCode
)) {
307 } else if(checkDir
== 0 && pos
!= start
) {
315 if(U16_IS_TRAIL(c
) && pos
!= start
&& U16_IS_LEAD(lead
= *(pos
- 1))) {
317 return U16_GET_SUPPLEMENTARY(lead
, c
);
324 FCDUTF16CollationIterator::forwardNumCodePoints(int32_t num
, UErrorCode
&errorCode
) {
325 // Specify the class to avoid a virtual-function indirection.
326 // In Java, we would declare this class final.
327 while(num
> 0 && FCDUTF16CollationIterator::nextCodePoint(errorCode
) >= 0) {
333 FCDUTF16CollationIterator::backwardNumCodePoints(int32_t num
, UErrorCode
&errorCode
) {
334 // Specify the class to avoid a virtual-function indirection.
335 // In Java, we would declare this class final.
336 while(num
> 0 && FCDUTF16CollationIterator::previousCodePoint(errorCode
) >= 0) {
342 FCDUTF16CollationIterator::switchToForward() {
343 U_ASSERT(checkDir
< 0 || (checkDir
== 0 && pos
== limit
));
345 // Turn around from backward checking.
346 start
= segmentStart
= pos
;
347 if(pos
== segmentLimit
) {
349 checkDir
= 1; // Check forward.
350 } else { // pos < segmentLimit
351 checkDir
= 0; // Stay in FCD segment.
354 // Reached the end of the FCD segment.
355 if(start
== segmentStart
) {
356 // The input text segment is FCD, extend it forward.
358 // The input text segment needed to be normalized.
359 // Switch to checking forward from it.
360 pos
= start
= segmentStart
= segmentLimit
;
361 // Note: If this segment is at the end of the input text,
362 // then it might help to return FALSE to indicate that, so that
363 // we do not have to re-check and normalize when we turn around and go backwards.
364 // However, that would complicate the call sites for an optimization of an unusual case.
372 FCDUTF16CollationIterator::nextSegment(UErrorCode
&errorCode
) {
373 if(U_FAILURE(errorCode
)) { return FALSE
; }
374 U_ASSERT(checkDir
> 0 && pos
!= limit
);
375 // The input text [segmentStart..pos[ passes the FCD check.
376 const UChar
*p
= pos
;
379 // Fetch the next character's fcd16 value.
381 uint16_t fcd16
= nfcImpl
.nextFCD16(p
, rawLimit
);
382 uint8_t leadCC
= (uint8_t)(fcd16
>> 8);
383 if(leadCC
== 0 && q
!= pos
) {
384 // FCD boundary before the [q, p[ character.
385 limit
= segmentLimit
= q
;
388 if(leadCC
!= 0 && (prevCC
> leadCC
|| CollationFCD::isFCD16OfTibetanCompositeVowel(fcd16
))) {
389 // Fails FCD check. Find the next FCD boundary and normalize.
392 } while(p
!= rawLimit
&& nfcImpl
.nextFCD16(p
, rawLimit
) > 0xff);
393 if(!normalize(pos
, q
, errorCode
)) { return FALSE
; }
397 prevCC
= (uint8_t)fcd16
;
398 if(p
== rawLimit
|| prevCC
== 0) {
399 // FCD boundary after the last character.
400 limit
= segmentLimit
= p
;
404 U_ASSERT(pos
!= limit
);
410 FCDUTF16CollationIterator::switchToBackward() {
411 U_ASSERT(checkDir
> 0 || (checkDir
== 0 && pos
== start
));
413 // Turn around from forward checking.
414 limit
= segmentLimit
= pos
;
415 if(pos
== segmentStart
) {
417 checkDir
= -1; // Check backward.
418 } else { // pos > segmentStart
419 checkDir
= 0; // Stay in FCD segment.
422 // Reached the start of the FCD segment.
423 if(start
== segmentStart
) {
424 // The input text segment is FCD, extend it backward.
426 // The input text segment needed to be normalized.
427 // Switch to checking backward from it.
428 pos
= limit
= segmentLimit
= segmentStart
;
436 FCDUTF16CollationIterator::previousSegment(UErrorCode
&errorCode
) {
437 if(U_FAILURE(errorCode
)) { return FALSE
; }
438 U_ASSERT(checkDir
< 0 && pos
!= start
);
439 // The input text [pos..segmentLimit[ passes the FCD check.
440 const UChar
*p
= pos
;
443 // Fetch the previous character's fcd16 value.
445 uint16_t fcd16
= nfcImpl
.previousFCD16(rawStart
, p
);
446 uint8_t trailCC
= (uint8_t)fcd16
;
447 if(trailCC
== 0 && q
!= pos
) {
448 // FCD boundary after the [p, q[ character.
449 start
= segmentStart
= q
;
452 if(trailCC
!= 0 && ((nextCC
!= 0 && trailCC
> nextCC
) ||
453 CollationFCD::isFCD16OfTibetanCompositeVowel(fcd16
))) {
454 // Fails FCD check. Find the previous FCD boundary and normalize.
457 } while(fcd16
> 0xff && p
!= rawStart
&&
458 (fcd16
= nfcImpl
.previousFCD16(rawStart
, p
)) != 0);
459 if(!normalize(q
, pos
, errorCode
)) { return FALSE
; }
463 nextCC
= (uint8_t)(fcd16
>> 8);
464 if(p
== rawStart
|| nextCC
== 0) {
465 // FCD boundary before the following character.
466 start
= segmentStart
= p
;
470 U_ASSERT(pos
!= start
);
476 FCDUTF16CollationIterator::normalize(const UChar
*from
, const UChar
*to
, UErrorCode
&errorCode
) {
477 // NFD without argument checking.
478 U_ASSERT(U_SUCCESS(errorCode
));
479 nfcImpl
.decompose(from
, to
, normalized
, (int32_t)(to
- from
), errorCode
);
480 if(U_FAILURE(errorCode
)) { return FALSE
; }
481 // Switch collation processing into the FCD buffer
482 // with the result of normalizing [segmentStart, segmentLimit[.
485 start
= normalized
.getBuffer();
486 limit
= start
+ normalized
.length();
492 #endif // !UCONFIG_NO_COLLATION