]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/uitercollationiterator.h
ICU-57131.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / uitercollationiterator.h
CommitLineData
57a6839d
A
1/*
2*******************************************************************************
2ca993e8 3* Copyright (C) 2012-2016, International Business Machines
57a6839d
A
4* Corporation and others. All Rights Reserved.
5*******************************************************************************
6* uitercollationiterator.h
7*
8* created on: 2012sep23 (from utf16collationiterator.h)
9* created by: Markus W. Scherer
10*/
11
12#ifndef __UITERCOLLATIONITERATOR_H__
13#define __UITERCOLLATIONITERATOR_H__
14
15#include "unicode/utypes.h"
16
17#if !UCONFIG_NO_COLLATION
18
19#include "unicode/uiter.h"
20#include "cmemory.h"
21#include "collation.h"
22#include "collationdata.h"
2ca993e8 23#include "collationiterator.h"
57a6839d
A
24#include "normalizer2impl.h"
25
26U_NAMESPACE_BEGIN
27
28/**
29 * UCharIterator-based collation element and character iterator.
30 * Handles normalized text inline, with length or NUL-terminated.
31 * Unnormalized text is handled by a subclass.
32 */
33class U_I18N_API UIterCollationIterator : public CollationIterator {
34public:
35 UIterCollationIterator(const CollationData *d, UBool numeric, UCharIterator &ui)
36 : CollationIterator(d, numeric), iter(ui) {}
37
38 virtual ~UIterCollationIterator();
39
40 virtual void resetToOffset(int32_t newOffset);
41
42 virtual int32_t getOffset() const;
43
44 virtual UChar32 nextCodePoint(UErrorCode &errorCode);
45
46 virtual UChar32 previousCodePoint(UErrorCode &errorCode);
47
48protected:
49 virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
50
51 virtual UChar handleGetTrailSurrogate();
52
53 virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
54
55 virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
56
57 UCharIterator &iter;
58};
59
60/**
61 * Incrementally checks the input text for FCD and normalizes where necessary.
62 */
63class U_I18N_API FCDUIterCollationIterator : public UIterCollationIterator {
64public:
65 FCDUIterCollationIterator(const CollationData *data, UBool numeric, UCharIterator &ui, int32_t startIndex)
66 : UIterCollationIterator(data, numeric, ui),
67 state(ITER_CHECK_FWD), start(startIndex),
68 nfcImpl(data->nfcImpl) {}
69
70 virtual ~FCDUIterCollationIterator();
71
72 virtual void resetToOffset(int32_t newOffset);
73
74 virtual int32_t getOffset() const;
75
76 virtual UChar32 nextCodePoint(UErrorCode &errorCode);
77
78 virtual UChar32 previousCodePoint(UErrorCode &errorCode);
79
80protected:
81 virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
82
83 virtual UChar handleGetTrailSurrogate();
84
85 virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
86
87 virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
88
89private:
90 /**
91 * Switches to forward checking if possible.
92 */
93 void switchToForward();
94
95 /**
96 * Extends the FCD text segment forward or normalizes around pos.
97 * @return TRUE if success
98 */
99 UBool nextSegment(UErrorCode &errorCode);
100
101 /**
102 * Switches to backward checking.
103 */
104 void switchToBackward();
105
106 /**
107 * Extends the FCD text segment backward or normalizes around pos.
108 * @return TRUE if success
109 */
110 UBool previousSegment(UErrorCode &errorCode);
111
112 UBool normalize(const UnicodeString &s, UErrorCode &errorCode);
113
114 enum State {
115 /**
116 * The input text [start..(iter index)[ passes the FCD check.
117 * Moving forward checks incrementally.
118 * pos & limit are undefined.
119 */
120 ITER_CHECK_FWD,
121 /**
122 * The input text [(iter index)..limit[ passes the FCD check.
123 * Moving backward checks incrementally.
124 * start & pos are undefined.
125 */
126 ITER_CHECK_BWD,
127 /**
128 * The input text [start..limit[ passes the FCD check.
129 * pos tracks the current text index.
130 */
131 ITER_IN_FCD_SEGMENT,
132 /**
133 * The input text [start..limit[ failed the FCD check and was normalized.
134 * pos tracks the current index in the normalized string.
135 * The text iterator is at the limit index.
136 */
137 IN_NORM_ITER_AT_LIMIT,
138 /**
139 * The input text [start..limit[ failed the FCD check and was normalized.
140 * pos tracks the current index in the normalized string.
141 * The text iterator is at the start index.
142 */
143 IN_NORM_ITER_AT_START
144 };
145
146 State state;
147
148 int32_t start;
149 int32_t pos;
150 int32_t limit;
151
152 const Normalizer2Impl &nfcImpl;
153 UnicodeString normalized;
154};
155
156U_NAMESPACE_END
157
158#endif // !UCONFIG_NO_COLLATION
159#endif // __UITERCOLLATIONITERATOR_H__