]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/uitercollationiterator.h
ICU-57132.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / uitercollationiterator.h
1 /*
2 *******************************************************************************
3 * Copyright (C) 2012-2016, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * uitercollationiterator.h
7 *
8 * created on: 2012sep23 (from utf16collationiterator.h)
9 * created by: Markus W. Scherer
10 */
11
12 #ifndef __UITERCOLLATIONITERATOR_H__
13 #define __UITERCOLLATIONITERATOR_H__
14
15 #include "unicode/utypes.h"
16
17 #if !UCONFIG_NO_COLLATION
18
19 #include "unicode/uiter.h"
20 #include "cmemory.h"
21 #include "collation.h"
22 #include "collationdata.h"
23 #include "collationiterator.h"
24 #include "normalizer2impl.h"
25
26 U_NAMESPACE_BEGIN
27
28 /**
29 * UCharIterator-based collation element and character iterator.
30 * Handles normalized text inline, with length or NUL-terminated.
31 * Unnormalized text is handled by a subclass.
32 */
33 class U_I18N_API UIterCollationIterator : public CollationIterator {
34 public:
35 UIterCollationIterator(const CollationData *d, UBool numeric, UCharIterator &ui)
36 : CollationIterator(d, numeric), iter(ui) {}
37
38 virtual ~UIterCollationIterator();
39
40 virtual void resetToOffset(int32_t newOffset);
41
42 virtual int32_t getOffset() const;
43
44 virtual UChar32 nextCodePoint(UErrorCode &errorCode);
45
46 virtual UChar32 previousCodePoint(UErrorCode &errorCode);
47
48 protected:
49 virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
50
51 virtual UChar handleGetTrailSurrogate();
52
53 virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
54
55 virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
56
57 UCharIterator &iter;
58 };
59
60 /**
61 * Incrementally checks the input text for FCD and normalizes where necessary.
62 */
63 class U_I18N_API FCDUIterCollationIterator : public UIterCollationIterator {
64 public:
65 FCDUIterCollationIterator(const CollationData *data, UBool numeric, UCharIterator &ui, int32_t startIndex)
66 : UIterCollationIterator(data, numeric, ui),
67 state(ITER_CHECK_FWD), start(startIndex),
68 nfcImpl(data->nfcImpl) {}
69
70 virtual ~FCDUIterCollationIterator();
71
72 virtual void resetToOffset(int32_t newOffset);
73
74 virtual int32_t getOffset() const;
75
76 virtual UChar32 nextCodePoint(UErrorCode &errorCode);
77
78 virtual UChar32 previousCodePoint(UErrorCode &errorCode);
79
80 protected:
81 virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
82
83 virtual UChar handleGetTrailSurrogate();
84
85 virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
86
87 virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
88
89 private:
90 /**
91 * Switches to forward checking if possible.
92 */
93 void switchToForward();
94
95 /**
96 * Extends the FCD text segment forward or normalizes around pos.
97 * @return TRUE if success
98 */
99 UBool nextSegment(UErrorCode &errorCode);
100
101 /**
102 * Switches to backward checking.
103 */
104 void switchToBackward();
105
106 /**
107 * Extends the FCD text segment backward or normalizes around pos.
108 * @return TRUE if success
109 */
110 UBool previousSegment(UErrorCode &errorCode);
111
112 UBool normalize(const UnicodeString &s, UErrorCode &errorCode);
113
114 enum State {
115 /**
116 * The input text [start..(iter index)[ passes the FCD check.
117 * Moving forward checks incrementally.
118 * pos & limit are undefined.
119 */
120 ITER_CHECK_FWD,
121 /**
122 * The input text [(iter index)..limit[ passes the FCD check.
123 * Moving backward checks incrementally.
124 * start & pos are undefined.
125 */
126 ITER_CHECK_BWD,
127 /**
128 * The input text [start..limit[ passes the FCD check.
129 * pos tracks the current text index.
130 */
131 ITER_IN_FCD_SEGMENT,
132 /**
133 * The input text [start..limit[ failed the FCD check and was normalized.
134 * pos tracks the current index in the normalized string.
135 * The text iterator is at the limit index.
136 */
137 IN_NORM_ITER_AT_LIMIT,
138 /**
139 * The input text [start..limit[ failed the FCD check and was normalized.
140 * pos tracks the current index in the normalized string.
141 * The text iterator is at the start index.
142 */
143 IN_NORM_ITER_AT_START
144 };
145
146 State state;
147
148 int32_t start;
149 int32_t pos;
150 int32_t limit;
151
152 const Normalizer2Impl &nfcImpl;
153 UnicodeString normalized;
154 };
155
156 U_NAMESPACE_END
157
158 #endif // !UCONFIG_NO_COLLATION
159 #endif // __UITERCOLLATIONITERATOR_H__