]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/uitercollationiterator.h
ICU-551.51.4.tar.gz
[apple/icu.git] / icuSources / i18n / uitercollationiterator.h
1 /*
2 *******************************************************************************
3 * Copyright (C) 2012-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * uitercollationiterator.h
7 *
8 * created on: 2012sep23 (from utf16collationiterator.h)
9 * created by: Markus W. Scherer
10 */
11
12 #ifndef __UITERCOLLATIONITERATOR_H__
13 #define __UITERCOLLATIONITERATOR_H__
14
15 #include "unicode/utypes.h"
16
17 #if !UCONFIG_NO_COLLATION
18
19 #include "unicode/uiter.h"
20 #include "cmemory.h"
21 #include "collation.h"
22 #include "collationdata.h"
23 #include "normalizer2impl.h"
24
25 U_NAMESPACE_BEGIN
26
27 /**
28 * UCharIterator-based collation element and character iterator.
29 * Handles normalized text inline, with length or NUL-terminated.
30 * Unnormalized text is handled by a subclass.
31 */
32 class U_I18N_API UIterCollationIterator : public CollationIterator {
33 public:
34 UIterCollationIterator(const CollationData *d, UBool numeric, UCharIterator &ui)
35 : CollationIterator(d, numeric), iter(ui) {}
36
37 virtual ~UIterCollationIterator();
38
39 virtual void resetToOffset(int32_t newOffset);
40
41 virtual int32_t getOffset() const;
42
43 virtual UChar32 nextCodePoint(UErrorCode &errorCode);
44
45 virtual UChar32 previousCodePoint(UErrorCode &errorCode);
46
47 protected:
48 virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
49
50 virtual UChar handleGetTrailSurrogate();
51
52 virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
53
54 virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
55
56 UCharIterator &iter;
57 };
58
59 /**
60 * Incrementally checks the input text for FCD and normalizes where necessary.
61 */
62 class U_I18N_API FCDUIterCollationIterator : public UIterCollationIterator {
63 public:
64 FCDUIterCollationIterator(const CollationData *data, UBool numeric, UCharIterator &ui, int32_t startIndex)
65 : UIterCollationIterator(data, numeric, ui),
66 state(ITER_CHECK_FWD), start(startIndex),
67 nfcImpl(data->nfcImpl) {}
68
69 virtual ~FCDUIterCollationIterator();
70
71 virtual void resetToOffset(int32_t newOffset);
72
73 virtual int32_t getOffset() const;
74
75 virtual UChar32 nextCodePoint(UErrorCode &errorCode);
76
77 virtual UChar32 previousCodePoint(UErrorCode &errorCode);
78
79 protected:
80 virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
81
82 virtual UChar handleGetTrailSurrogate();
83
84 virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
85
86 virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
87
88 private:
89 /**
90 * Switches to forward checking if possible.
91 */
92 void switchToForward();
93
94 /**
95 * Extends the FCD text segment forward or normalizes around pos.
96 * @return TRUE if success
97 */
98 UBool nextSegment(UErrorCode &errorCode);
99
100 /**
101 * Switches to backward checking.
102 */
103 void switchToBackward();
104
105 /**
106 * Extends the FCD text segment backward or normalizes around pos.
107 * @return TRUE if success
108 */
109 UBool previousSegment(UErrorCode &errorCode);
110
111 UBool normalize(const UnicodeString &s, UErrorCode &errorCode);
112
113 enum State {
114 /**
115 * The input text [start..(iter index)[ passes the FCD check.
116 * Moving forward checks incrementally.
117 * pos & limit are undefined.
118 */
119 ITER_CHECK_FWD,
120 /**
121 * The input text [(iter index)..limit[ passes the FCD check.
122 * Moving backward checks incrementally.
123 * start & pos are undefined.
124 */
125 ITER_CHECK_BWD,
126 /**
127 * The input text [start..limit[ passes the FCD check.
128 * pos tracks the current text index.
129 */
130 ITER_IN_FCD_SEGMENT,
131 /**
132 * The input text [start..limit[ failed the FCD check and was normalized.
133 * pos tracks the current index in the normalized string.
134 * The text iterator is at the limit index.
135 */
136 IN_NORM_ITER_AT_LIMIT,
137 /**
138 * The input text [start..limit[ failed the FCD check and was normalized.
139 * pos tracks the current index in the normalized string.
140 * The text iterator is at the start index.
141 */
142 IN_NORM_ITER_AT_START
143 };
144
145 State state;
146
147 int32_t start;
148 int32_t pos;
149 int32_t limit;
150
151 const Normalizer2Impl &nfcImpl;
152 UnicodeString normalized;
153 };
154
155 U_NAMESPACE_END
156
157 #endif // !UCONFIG_NO_COLLATION
158 #endif // __UITERCOLLATIONITERATOR_H__