]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/utf16collationiterator.h
ICU-57131.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / utf16collationiterator.h
1 /*
2 *******************************************************************************
3 * Copyright (C) 2010-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * utf16collationiterator.h
7 *
8 * created on: 2010oct27
9 * created by: Markus W. Scherer
10 */
11
12 #ifndef __UTF16COLLATIONITERATOR_H__
13 #define __UTF16COLLATIONITERATOR_H__
14
15 #include "unicode/utypes.h"
16
17 #if !UCONFIG_NO_COLLATION
18
19 #include "cmemory.h"
20 #include "collation.h"
21 #include "collationdata.h"
22 #include "collationiterator.h"
23 #include "normalizer2impl.h"
24
25 U_NAMESPACE_BEGIN
26
27 /**
28 * UTF-16 collation element and character iterator.
29 * Handles normalized UTF-16 text inline, with length or NUL-terminated.
30 * Unnormalized text is handled by a subclass.
31 */
32 class U_I18N_API UTF16CollationIterator : public CollationIterator {
33 public:
34 UTF16CollationIterator(const CollationData *d, UBool numeric,
35 const UChar *s, const UChar *p, const UChar *lim)
36 : CollationIterator(d, numeric),
37 start(s), pos(p), limit(lim) {}
38
39 UTF16CollationIterator(const UTF16CollationIterator &other, const UChar *newText);
40
41 virtual ~UTF16CollationIterator();
42
43 virtual UBool operator==(const CollationIterator &other) const;
44
45 virtual void resetToOffset(int32_t newOffset);
46
47 virtual int32_t getOffset() const;
48
49 void setText(const UChar *s, const UChar *lim) {
50 reset();
51 start = pos = s;
52 limit = lim;
53 }
54
55 virtual UChar32 nextCodePoint(UErrorCode &errorCode);
56
57 virtual UChar32 previousCodePoint(UErrorCode &errorCode);
58
59 protected:
60 // Copy constructor only for subclasses which set the pointers.
61 UTF16CollationIterator(const UTF16CollationIterator &other)
62 : CollationIterator(other),
63 start(NULL), pos(NULL), limit(NULL) {}
64
65 virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
66
67 virtual UChar handleGetTrailSurrogate();
68
69 virtual UBool foundNULTerminator();
70
71 virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
72
73 virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
74
75 // UTF-16 string pointers.
76 // limit can be NULL for NUL-terminated strings.
77 const UChar *start, *pos, *limit;
78 };
79
80 /**
81 * Incrementally checks the input text for FCD and normalizes where necessary.
82 */
83 class U_I18N_API FCDUTF16CollationIterator : public UTF16CollationIterator {
84 public:
85 FCDUTF16CollationIterator(const CollationData *data, UBool numeric,
86 const UChar *s, const UChar *p, const UChar *lim)
87 : UTF16CollationIterator(data, numeric, s, p, lim),
88 rawStart(s), segmentStart(p), segmentLimit(NULL), rawLimit(lim),
89 nfcImpl(data->nfcImpl),
90 checkDir(1) {}
91
92 FCDUTF16CollationIterator(const FCDUTF16CollationIterator &other, const UChar *newText);
93
94 virtual ~FCDUTF16CollationIterator();
95
96 virtual UBool operator==(const CollationIterator &other) const;
97
98 virtual void resetToOffset(int32_t newOffset);
99
100 virtual int32_t getOffset() const;
101
102 virtual UChar32 nextCodePoint(UErrorCode &errorCode);
103
104 virtual UChar32 previousCodePoint(UErrorCode &errorCode);
105
106 protected:
107 virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
108
109 virtual UBool foundNULTerminator();
110
111 virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
112
113 virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
114
115 private:
116 /**
117 * Switches to forward checking if possible.
118 * To be called when checkDir < 0 || (checkDir == 0 && pos == limit).
119 * Returns with checkDir > 0 || (checkDir == 0 && pos != limit).
120 */
121 void switchToForward();
122
123 /**
124 * Extend the FCD text segment forward or normalize around pos.
125 * To be called when checkDir > 0 && pos != limit.
126 * @return TRUE if success, checkDir == 0 and pos != limit
127 */
128 UBool nextSegment(UErrorCode &errorCode);
129
130 /**
131 * Switches to backward checking.
132 * To be called when checkDir > 0 || (checkDir == 0 && pos == start).
133 * Returns with checkDir < 0 || (checkDir == 0 && pos != start).
134 */
135 void switchToBackward();
136
137 /**
138 * Extend the FCD text segment backward or normalize around pos.
139 * To be called when checkDir < 0 && pos != start.
140 * @return TRUE if success, checkDir == 0 and pos != start
141 */
142 UBool previousSegment(UErrorCode &errorCode);
143
144 UBool normalize(const UChar *from, const UChar *to, UErrorCode &errorCode);
145
146 // Text pointers: The input text is [rawStart, rawLimit[
147 // where rawLimit can be NULL for NUL-terminated text.
148 //
149 // checkDir > 0:
150 //
151 // The input text [segmentStart..pos[ passes the FCD check.
152 // Moving forward checks incrementally.
153 // segmentLimit is undefined. limit == rawLimit.
154 //
155 // checkDir < 0:
156 // The input text [pos..segmentLimit[ passes the FCD check.
157 // Moving backward checks incrementally.
158 // segmentStart is undefined, start == rawStart.
159 //
160 // checkDir == 0:
161 //
162 // The input text [segmentStart..segmentLimit[ is being processed.
163 // These pointers are at FCD boundaries.
164 // Either this text segment already passes the FCD check
165 // and segmentStart==start<=pos<=limit==segmentLimit,
166 // or the current segment had to be normalized so that
167 // [segmentStart..segmentLimit[ turned into the normalized string,
168 // corresponding to normalized.getBuffer()==start<=pos<=limit==start+normalized.length().
169 const UChar *rawStart;
170 const UChar *segmentStart;
171 const UChar *segmentLimit;
172 // rawLimit==NULL for a NUL-terminated string.
173 const UChar *rawLimit;
174
175 const Normalizer2Impl &nfcImpl;
176 UnicodeString normalized;
177 // Direction of incremental FCD check. See comments before rawStart.
178 int8_t checkDir;
179 };
180
181 U_NAMESPACE_END
182
183 #endif // !UCONFIG_NO_COLLATION
184 #endif // __UTF16COLLATIONITERATOR_H__