2 *******************************************************************************
3 * Copyright (C) 2010-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * utf16collationiterator.h
8 * created on: 2010oct27
9 * created by: Markus W. Scherer
12 #ifndef __UTF16COLLATIONITERATOR_H__
13 #define __UTF16COLLATIONITERATOR_H__
15 #include "unicode/utypes.h"
17 #if !UCONFIG_NO_COLLATION
20 #include "collation.h"
21 #include "collationdata.h"
22 #include "collationiterator.h"
23 #include "normalizer2impl.h"
28 * UTF-16 collation element and character iterator.
29 * Handles normalized UTF-16 text inline, with length or NUL-terminated.
30 * Unnormalized text is handled by a subclass.
32 class U_I18N_API UTF16CollationIterator
: public CollationIterator
{
34 UTF16CollationIterator(const CollationData
*d
, UBool numeric
,
35 const UChar
*s
, const UChar
*p
, const UChar
*lim
)
36 : CollationIterator(d
, numeric
),
37 start(s
), pos(p
), limit(lim
) {}
39 UTF16CollationIterator(const UTF16CollationIterator
&other
, const UChar
*newText
);
41 virtual ~UTF16CollationIterator();
43 virtual UBool
operator==(const CollationIterator
&other
) const;
45 virtual void resetToOffset(int32_t newOffset
);
47 virtual int32_t getOffset() const;
49 void setText(const UChar
*s
, const UChar
*lim
) {
55 virtual UChar32
nextCodePoint(UErrorCode
&errorCode
);
57 virtual UChar32
previousCodePoint(UErrorCode
&errorCode
);
60 // Copy constructor only for subclasses which set the pointers.
61 UTF16CollationIterator(const UTF16CollationIterator
&other
)
62 : CollationIterator(other
),
63 start(NULL
), pos(NULL
), limit(NULL
) {}
65 virtual uint32_t handleNextCE32(UChar32
&c
, UErrorCode
&errorCode
);
67 virtual UChar
handleGetTrailSurrogate();
69 virtual UBool
foundNULTerminator();
71 virtual void forwardNumCodePoints(int32_t num
, UErrorCode
&errorCode
);
73 virtual void backwardNumCodePoints(int32_t num
, UErrorCode
&errorCode
);
75 // UTF-16 string pointers.
76 // limit can be NULL for NUL-terminated strings.
77 const UChar
*start
, *pos
, *limit
;
81 * Incrementally checks the input text for FCD and normalizes where necessary.
83 class U_I18N_API FCDUTF16CollationIterator
: public UTF16CollationIterator
{
85 FCDUTF16CollationIterator(const CollationData
*data
, UBool numeric
,
86 const UChar
*s
, const UChar
*p
, const UChar
*lim
)
87 : UTF16CollationIterator(data
, numeric
, s
, p
, lim
),
88 rawStart(s
), segmentStart(p
), segmentLimit(NULL
), rawLimit(lim
),
89 nfcImpl(data
->nfcImpl
),
92 FCDUTF16CollationIterator(const FCDUTF16CollationIterator
&other
, const UChar
*newText
);
94 virtual ~FCDUTF16CollationIterator();
96 virtual UBool
operator==(const CollationIterator
&other
) const;
98 virtual void resetToOffset(int32_t newOffset
);
100 virtual int32_t getOffset() const;
102 virtual UChar32
nextCodePoint(UErrorCode
&errorCode
);
104 virtual UChar32
previousCodePoint(UErrorCode
&errorCode
);
107 virtual uint32_t handleNextCE32(UChar32
&c
, UErrorCode
&errorCode
);
109 virtual UBool
foundNULTerminator();
111 virtual void forwardNumCodePoints(int32_t num
, UErrorCode
&errorCode
);
113 virtual void backwardNumCodePoints(int32_t num
, UErrorCode
&errorCode
);
117 * Switches to forward checking if possible.
118 * To be called when checkDir < 0 || (checkDir == 0 && pos == limit).
119 * Returns with checkDir > 0 || (checkDir == 0 && pos != limit).
121 void switchToForward();
124 * Extend the FCD text segment forward or normalize around pos.
125 * To be called when checkDir > 0 && pos != limit.
126 * @return TRUE if success, checkDir == 0 and pos != limit
128 UBool
nextSegment(UErrorCode
&errorCode
);
131 * Switches to backward checking.
132 * To be called when checkDir > 0 || (checkDir == 0 && pos == start).
133 * Returns with checkDir < 0 || (checkDir == 0 && pos != start).
135 void switchToBackward();
138 * Extend the FCD text segment backward or normalize around pos.
139 * To be called when checkDir < 0 && pos != start.
140 * @return TRUE if success, checkDir == 0 and pos != start
142 UBool
previousSegment(UErrorCode
&errorCode
);
144 UBool
normalize(const UChar
*from
, const UChar
*to
, UErrorCode
&errorCode
);
146 // Text pointers: The input text is [rawStart, rawLimit[
147 // where rawLimit can be NULL for NUL-terminated text.
151 // The input text [segmentStart..pos[ passes the FCD check.
152 // Moving forward checks incrementally.
153 // segmentLimit is undefined. limit == rawLimit.
156 // The input text [pos..segmentLimit[ passes the FCD check.
157 // Moving backward checks incrementally.
158 // segmentStart is undefined, start == rawStart.
162 // The input text [segmentStart..segmentLimit[ is being processed.
163 // These pointers are at FCD boundaries.
164 // Either this text segment already passes the FCD check
165 // and segmentStart==start<=pos<=limit==segmentLimit,
166 // or the current segment had to be normalized so that
167 // [segmentStart..segmentLimit[ turned into the normalized string,
168 // corresponding to normalized.getBuffer()==start<=pos<=limit==start+normalized.length().
169 const UChar
*rawStart
;
170 const UChar
*segmentStart
;
171 const UChar
*segmentLimit
;
172 // rawLimit==NULL for a NUL-terminated string.
173 const UChar
*rawLimit
;
175 const Normalizer2Impl
&nfcImpl
;
176 UnicodeString normalized
;
177 // Direction of incremental FCD check. See comments before rawStart.
183 #endif // !UCONFIG_NO_COLLATION
184 #endif // __UTF16COLLATIONITERATOR_H__