]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/unicode/usetiter.h
ICU-3.13.tar.gz
[apple/icu.git] / icuSources / common / unicode / usetiter.h
CommitLineData
b75a7d8f
A
1/*
2**********************************************************************
3* Copyright (c) 2002-2003, International Business Machines
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6* $Source: /cvs/root/ICU/icuSources/common/unicode/usetiter.h,v $
7**********************************************************************
8*/
9#ifndef USETITER_H
10#define USETITER_H
11
12#include "unicode/utypes.h"
13#include "unicode/uobject.h"
14#include "unicode/unistr.h"
15
16U_NAMESPACE_BEGIN
17
18class UnicodeSet;
19class UnicodeString;
20
21/**
22 * UnicodeSetIterator iterates over the contents of a UnicodeSet. It
23 * iterates over either code points or code point ranges. After all
24 * code points or ranges have been returned, it returns the
25 * multicharacter strings of the UnicodSet, if any.
26 *
27 * <p>To iterate over code points, use a loop like this:
28 * <pre>
29 * UnicodeSetIterator it(set);
30 * while (set.next()) {
31 * if (set.isString()) {
32 * processString(set.getString());
33 * } else {
34 * processCodepoint(set.getCodepoint());
35 * }
36 * }
37 * </pre>
38 *
39 * <p>To iterate over code point ranges, use a loop like this:
40 * <pre>
41 * UnicodeSetIterator it(set);
42 * while (it.nextRange()) {
43 * if (it.isString()) {
44 * processString(it.getString());
45 * } else {
46 * processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
47 * }
48 * }
49 * </pre>
50 * @author M. Davis
51 * @draft ICU 2.2
52 */
53class U_COMMON_API UnicodeSetIterator : public UObject {
54
55 protected:
56
57 /**
58 * Value of <tt>codepoint</tt> if the iterator points to a string.
59 * If <tt>codepoint == IS_STRING</tt>, then examine
60 * <tt>string</tt> for the current iteration result.
61 * @draft ICU 2.4
62 */
63 enum { IS_STRING = -1 };
64
65 /**
66 * Current code point, or the special value <tt>IS_STRING</tt>, if
67 * the iterator points to a string.
68 * @draft ICU 2.4
69 */
70 UChar32 codepoint;
71
72 /**
73 * When iterating over ranges using <tt>nextRange()</tt>,
74 * <tt>codepointEnd</tt> contains the inclusive end of the
75 * iteration range, if <tt>codepoint != IS_STRING</tt>. If
76 * iterating over code points using <tt>next()</tt>, or if
77 * <tt>codepoint == IS_STRING</tt>, then the value of
78 * <tt>codepointEnd</tt> is undefined.
79 * @draft ICU 2.4
80 */
81 UChar32 codepointEnd;
82
83 /**
84 * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
85 * to the current string. If <tt>codepoint != IS_STRING</tt>, the
86 * value of <tt>string</tt> is undefined.
87 * @draft ICU 2.4
88 */
89 const UnicodeString* string;
90
91 public:
92
93 /**
94 * Create an iterator over the given set. The iterator is valid
95 * only so long as <tt>set</tt> is valid.
96 * @param set set to iterate over
97 * @draft ICU 2.4
98 */
99 UnicodeSetIterator(const UnicodeSet& set);
100
101 /**
102 * Create an iterator over nothing. <tt>next()</tt> and
103 * <tt>nextRange()</tt> return false. This is a convenience
104 * constructor allowing the target to be set later.
105 * @draft ICU 2.4
106 */
107 UnicodeSetIterator();
108
109 /**
110 * Destructor.
111 * @draft ICU 2.4
112 */
113 virtual ~UnicodeSetIterator();
114
115 /**
116 * Returns true if the current element is a string. If so, the
117 * caller can retrieve it with <tt>getString()</tt>. If this
118 * method returns false, the current element is a code point or
119 * code point range, depending on whether <tt>next()</tt> or
120 * <tt>nextRange()</tt> was called, and the caller can retrieve it
121 * with <tt>getCodepoint()</tt> and, for a range,
122 * <tt>getCodepointEnd()</tt>.
123 * @draft ICU 2.4
124 */
125 inline UBool isString() const;
126
127 /**
128 * Returns the current code point, if <tt>isString()</tt> returned
129 * false. Otherwise returns an undefined result.
130 * @draft ICU 2.4
131 */
132 inline UChar32 getCodepoint() const;
133
134 /**
135 * Returns the end of the current code point range, if
136 * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was
137 * called. Otherwise returns an undefined result.
138 * @draft ICU 2.4
139 */
140 inline UChar32 getCodepointEnd() const;
141
142 /**
143 * Returns the current string, if <tt>isString()</tt> returned
144 * true. Otherwise returns an undefined result.
145 * @draft ICU 2.4
146 */
147 inline const UnicodeString& getString() const;
148
149 /**
150 * Returns the next element in the set, either a single code point
151 * or a string. If there are no more elements in the set, return
152 * false. If <tt>codepoint == IS_STRING</tt>, the value is a
153 * string in the <tt>string</tt> field. Otherwise the value is a
154 * single code point in the <tt>codepoint</tt> field.
155 *
156 * <p>The order of iteration is all code points in sorted order,
157 * followed by all strings sorted order. <tt>codepointEnd</tt> is
158 * undefined after calling this method. <tt>string</tt> is
159 * undefined unless <tt>codepoint == IS_STRING</tt>. Do not mix
160 * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
161 * calling <tt>reset()</tt> between them. The results of doing so
162 * are undefined.
163 *
164 * @return true if there was another element in the set and this
165 * object contains the element.
166 * @draft ICU 2.4
167 */
168 UBool next();
169
170 /**
171 * Returns the next element in the set, either a code point range
172 * or a string. If there are no more elements in the set, return
173 * false. If <tt>codepoint == IS_STRING</tt>, the value is a
174 * string in the <tt>string</tt> field. Otherwise the value is a
175 * range of one or more code points from <tt>codepoint</tt> to
176 * <tt>codepointeEnd</tt> inclusive.
177 *
178 * <p>The order of iteration is all code points ranges in sorted
179 * order, followed by all strings sorted order. Ranges are
180 * disjoint and non-contiguous. <tt>string</tt> is undefined
181 * unless <tt>codepoint == IS_STRING</tt>. Do not mix calls to
182 * <tt>next()</tt> and <tt>nextRange()</tt> without calling
183 * <tt>reset()</tt> between them. The results of doing so are
184 * undefined.
185 *
186 * @return true if there was another element in the set and this
187 * object contains the element.
188 * @draft ICU 2.4
189 */
190 UBool nextRange();
191
192 /**
193 * Sets this iterator to visit the elements of the given set and
194 * resets it to the start of that set. The iterator is valid only
195 * so long as <tt>set</tt> is valid.
196 * @param set the set to iterate over.
197 * @draft ICU 2.4
198 */
199 void reset(const UnicodeSet& set);
200
201 /**
202 * Resets this iterator to the start of the set.
203 * @draft ICU 2.4
204 */
205 void reset();
206
207 /**
208 * ICU "poor man's RTTI", returns a UClassID for the actual class.
209 *
210 * @draft ICU 2.2
211 */
212 virtual inline UClassID getDynamicClassID() const;
213
214 /**
215 * ICU "poor man's RTTI", returns a UClassID for this class.
216 *
217 * @draft ICU 2.2
218 */
219 static inline UClassID getStaticClassID();
220
221 // ======================= PRIVATES ===========================
222
223 protected:
224
225 // endElement and nextElements are really UChar32's, but we keep
226 // them as signed int32_t's so we can do comparisons with
227 // endElement set to -1. Leave them as int32_t's.
228 /** The set
229 * @draft ICU 2.4
230 */
231 const UnicodeSet* set;
232 /** End range
233 * @draft ICU 2.4
234 */
235 int32_t endRange;
236 /** Range
237 * @draft ICU 2.4
238 */
239 int32_t range;
240 /** End element
241 * @draft ICU 2.4
242 */
243 int32_t endElement;
244 /** Next element
245 * @draft ICU 2.4
246 */
247 int32_t nextElement;
248 //UBool abbreviated;
249 /** Next string
250 * @draft ICU 2.4
251 */
252 int32_t nextString;
253 /** String count
254 * @draft ICU 2.4
255 */
256 int32_t stringCount;
257
258 /** Copy constructor. Disallowed.
259 * @draft ICU 2.4
260 */
261 UnicodeSetIterator(const UnicodeSetIterator&); // disallow
262
263 /** Assignment operator. Disallowed.
264 * @draft ICU 2.4
265 */
266 UnicodeSetIterator& operator=(const UnicodeSetIterator&); // disallow
267
268 /** Load range
269 * @draft ICU 2.4
270 */
271 virtual void loadRange(int32_t range);
272
273private:
274
275 /**
276 * The address of this static class variable serves as this class's ID
277 * for ICU "poor man's RTTI".
278 */
279 static const char fgClassID;
280};
281
282inline UClassID
283UnicodeSetIterator::getStaticClassID()
284{ return (UClassID)&fgClassID; }
285
286inline UClassID
287UnicodeSetIterator::getDynamicClassID() const
288{ return UnicodeSetIterator::getStaticClassID(); }
289
290inline UBool UnicodeSetIterator::isString() const {
291 return codepoint == (UChar32)IS_STRING;
292}
293
294inline UChar32 UnicodeSetIterator::getCodepoint() const {
295 return codepoint;
296}
297
298inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
299 return codepointEnd;
300}
301
302inline const UnicodeString& UnicodeSetIterator::getString() const {
303 return *string;
304}
305
306U_NAMESPACE_END
307
308#endif