]> git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/i18n/unicode/coleitr.h
ICU-400.42.tar.gz
[apple/icu.git] / icuSources / i18n / unicode / coleitr.h
... / ...
CommitLineData
1/*
2 ******************************************************************************
3 * Copyright (C) 1997-2008, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 ******************************************************************************
6 */
7
8/**
9 * \file
10 * \brief C++ API: Collation Element Iterator.
11 */
12
13/**
14* File coleitr.h
15*
16*
17*
18* Created by: Helena Shih
19*
20* Modification History:
21*
22* Date Name Description
23*
24* 8/18/97 helena Added internal API documentation.
25* 08/03/98 erm Synched with 1.2 version CollationElementIterator.java
26* 12/10/99 aliu Ported Thai collation support from Java.
27* 01/25/01 swquek Modified into a C++ wrapper calling C APIs (ucoliter.h)
28* 02/19/01 swquek Removed CollationElementsIterator() since it is
29* private constructor and no calls are made to it
30*/
31
32#ifndef COLEITR_H
33#define COLEITR_H
34
35#include "unicode/utypes.h"
36
37
38#if !UCONFIG_NO_COLLATION
39
40#include "unicode/uobject.h"
41#include "unicode/tblcoll.h"
42#include "unicode/ucoleitr.h"
43
44/**
45 * The UCollationElements struct.
46 * For usage in C programs.
47 * @stable ICU 2.0
48 */
49typedef struct UCollationElements UCollationElements;
50
51U_NAMESPACE_BEGIN
52
53/**
54* The CollationElementIterator class is used as an iterator to walk through
55* each character of an international string. Use the iterator to return the
56* ordering priority of the positioned character. The ordering priority of a
57* character, which we refer to as a key, defines how a character is collated in
58* the given collation object.
59* For example, consider the following in Spanish:
60* <pre>
61* "ca" -> the first key is key('c') and second key is key('a').
62* "cha" -> the first key is key('ch') and second key is key('a').</pre>
63* And in German,
64* <pre> \htmlonly "&#x00E6;b"-> the first key is key('a'), the second key is key('e'), and
65* the third key is key('b'). \endhtmlonly </pre>
66* The key of a character, is an integer composed of primary order(short),
67* secondary order(char), and tertiary order(char). Java strictly defines the
68* size and signedness of its primitive data types. Therefore, the static
69* functions primaryOrder(), secondaryOrder(), and tertiaryOrder() return
70* int32_t to ensure the correctness of the key value.
71* <p>Example of the iterator usage: (without error checking)
72* <pre>
73* \code
74* void CollationElementIterator_Example()
75* {
76* UnicodeString str = "This is a test";
77* UErrorCode success = U_ZERO_ERROR;
78* RuleBasedCollator* rbc =
79* (RuleBasedCollator*) RuleBasedCollator::createInstance(success);
80* CollationElementIterator* c =
81* rbc->createCollationElementIterator( str );
82* int32_t order = c->next(success);
83* c->reset();
84* order = c->previous(success);
85* delete c;
86* delete rbc;
87* }
88* \endcode
89* </pre>
90* <p>
91* CollationElementIterator::next returns the collation order of the next
92* character based on the comparison level of the collator.
93* CollationElementIterator::previous returns the collation order of the
94* previous character based on the comparison level of the collator.
95* The Collation Element Iterator moves only in one direction between calls to
96* CollationElementIterator::reset. That is, CollationElementIterator::next()
97* and CollationElementIterator::previous can not be inter-used. Whenever
98* CollationElementIterator::previous is to be called after
99* CollationElementIterator::next() or vice versa,
100* CollationElementIterator::reset has to be called first to reset the status,
101* shifting pointers to either the end or the start of the string. Hence at the
102* next call of CollationElementIterator::previous or
103* CollationElementIterator::next(), the first or last collation order will be
104* returned.
105* If a change of direction is done without a CollationElementIterator::reset(),
106* the result is undefined.
107* The result of a forward iterate (CollationElementIterator::next) and
108* reversed result of the backward iterate (CollationElementIterator::previous)
109* on the same string are equivalent, if collation orders with the value
110* UCOL_IGNORABLE are ignored.
111* Character based on the comparison level of the collator. A collation order
112* consists of primary order, secondary order and tertiary order. The data
113* type of the collation order is <strong>t_int32</strong>.
114*
115* Note, CollationElementIterator should not be subclassed.
116* @see Collator
117* @see RuleBasedCollator
118* @version 1.8 Jan 16 2001
119*/
120class U_I18N_API CollationElementIterator : public UObject {
121public:
122
123 // CollationElementIterator public data member ------------------------------
124
125 enum {
126 /**
127 * NULLORDER indicates that an error has occured while processing
128 * @stable ICU 2.0
129 */
130 NULLORDER = (int32_t)0xffffffff
131 };
132
133 // CollationElementIterator public constructor/destructor -------------------
134
135 /**
136 * Copy constructor.
137 *
138 * @param other the object to be copied from
139 * @stable ICU 2.0
140 */
141 CollationElementIterator(const CollationElementIterator& other);
142
143 /**
144 * Destructor
145 * @stable ICU 2.0
146 */
147 virtual ~CollationElementIterator();
148
149 // CollationElementIterator public methods ----------------------------------
150
151 /**
152 * Returns true if "other" is the same as "this"
153 *
154 * @param other the object to be compared
155 * @return true if "other" is the same as "this"
156 * @stable ICU 2.0
157 */
158 UBool operator==(const CollationElementIterator& other) const;
159
160 /**
161 * Returns true if "other" is not the same as "this".
162 *
163 * @param other the object to be compared
164 * @return true if "other" is not the same as "this"
165 * @stable ICU 2.0
166 */
167 UBool operator!=(const CollationElementIterator& other) const;
168
169 /**
170 * Resets the cursor to the beginning of the string.
171 * @stable ICU 2.0
172 */
173 void reset(void);
174
175 /**
176 * Gets the ordering priority of the next character in the string.
177 * @param status the error code status.
178 * @return the next character's ordering. otherwise returns NULLORDER if an
179 * error has occured or if the end of string has been reached
180 * @stable ICU 2.0
181 */
182 int32_t next(UErrorCode& status);
183
184 /**
185 * Get the ordering priority of the previous collation element in the string.
186 * @param status the error code status.
187 * @return the previous element's ordering. otherwise returns NULLORDER if an
188 * error has occured or if the start of string has been reached
189 * @stable ICU 2.0
190 */
191 int32_t previous(UErrorCode& status);
192
193 /**
194 * Gets the primary order of a collation order.
195 * @param order the collation order
196 * @return the primary order of a collation order.
197 * @stable ICU 2.0
198 */
199 static inline int32_t primaryOrder(int32_t order);
200
201 /**
202 * Gets the secondary order of a collation order.
203 * @param order the collation order
204 * @return the secondary order of a collation order.
205 * @stable ICU 2.0
206 */
207 static inline int32_t secondaryOrder(int32_t order);
208
209 /**
210 * Gets the tertiary order of a collation order.
211 * @param order the collation order
212 * @return the tertiary order of a collation order.
213 * @stable ICU 2.0
214 */
215 static inline int32_t tertiaryOrder(int32_t order);
216
217 /**
218 * Return the maximum length of any expansion sequences that end with the
219 * specified comparison order.
220 * @param order a collation order returned by previous or next.
221 * @return maximum size of the expansion sequences ending with the collation
222 * element or 1 if collation element does not occur at the end of any
223 * expansion sequence
224 * @stable ICU 2.0
225 */
226 int32_t getMaxExpansion(int32_t order) const;
227
228 /**
229 * Gets the comparison order in the desired strength. Ignore the other
230 * differences.
231 * @param order The order value
232 * @stable ICU 2.0
233 */
234 int32_t strengthOrder(int32_t order) const;
235
236 /**
237 * Sets the source string.
238 * @param str the source string.
239 * @param status the error code status.
240 * @stable ICU 2.0
241 */
242 void setText(const UnicodeString& str, UErrorCode& status);
243
244 /**
245 * Sets the source string.
246 * @param str the source character iterator.
247 * @param status the error code status.
248 * @stable ICU 2.0
249 */
250 void setText(CharacterIterator& str, UErrorCode& status);
251
252 /**
253 * Checks if a comparison order is ignorable.
254 * @param order the collation order.
255 * @return TRUE if a character is ignorable, FALSE otherwise.
256 * @stable ICU 2.0
257 */
258 static inline UBool isIgnorable(int32_t order);
259
260 /**
261 * Gets the offset of the currently processed character in the source string.
262 * @return the offset of the character.
263 * @stable ICU 2.0
264 */
265 int32_t getOffset(void) const;
266
267 /**
268 * Sets the offset of the currently processed character in the source string.
269 * @param newOffset the new offset.
270 * @param status the error code status.
271 * @return the offset of the character.
272 * @stable ICU 2.0
273 */
274 void setOffset(int32_t newOffset, UErrorCode& status);
275
276 /**
277 * ICU "poor man's RTTI", returns a UClassID for the actual class.
278 *
279 * @stable ICU 2.2
280 */
281 virtual UClassID getDynamicClassID() const;
282
283 /**
284 * ICU "poor man's RTTI", returns a UClassID for this class.
285 *
286 * @stable ICU 2.2
287 */
288 static UClassID U_EXPORT2 getStaticClassID();
289
290protected:
291
292 // CollationElementIterator protected constructors --------------------------
293 /**
294 * @stable ICU 2.0
295 */
296 friend class RuleBasedCollator;
297
298 /**
299 * CollationElementIterator constructor. This takes the source string and the
300 * collation object. The cursor will walk thru the source string based on the
301 * predefined collation rules. If the source string is empty, NULLORDER will
302 * be returned on the calls to next().
303 * @param sourceText the source string.
304 * @param order the collation object.
305 * @param status the error code status.
306 * @stable ICU 2.0
307 */
308 CollationElementIterator(const UnicodeString& sourceText,
309 const RuleBasedCollator* order, UErrorCode& status);
310
311 /**
312 * CollationElementIterator constructor. This takes the source string and the
313 * collation object. The cursor will walk thru the source string based on the
314 * predefined collation rules. If the source string is empty, NULLORDER will
315 * be returned on the calls to next().
316 * @param sourceText the source string.
317 * @param order the collation object.
318 * @param status the error code status.
319 * @stable ICU 2.0
320 */
321 CollationElementIterator(const CharacterIterator& sourceText,
322 const RuleBasedCollator* order, UErrorCode& status);
323
324 // CollationElementIterator protected methods -------------------------------
325
326 /**
327 * Assignment operator
328 *
329 * @param other the object to be copied
330 * @stable ICU 2.0
331 */
332 const CollationElementIterator&
333 operator=(const CollationElementIterator& other);
334
335private:
336 CollationElementIterator(); // default constructor not implemented
337
338 // CollationElementIterator private data members ----------------------------
339
340 /**
341 * Data wrapper for collation elements
342 */
343 UCollationElements *m_data_;
344
345 /**
346 * Indicates if m_data_ belongs to this object.
347 */
348 UBool isDataOwned_;
349
350};
351
352// CollationElementIterator inline method defination --------------------------
353
354/**
355* Get the primary order of a collation order.
356* @param order the collation order
357* @return the primary order of a collation order.
358*/
359inline int32_t CollationElementIterator::primaryOrder(int32_t order)
360{
361 order &= RuleBasedCollator::PRIMARYORDERMASK;
362 return (order >> RuleBasedCollator::PRIMARYORDERSHIFT);
363}
364
365/**
366* Get the secondary order of a collation order.
367* @param order the collation order
368* @return the secondary order of a collation order.
369*/
370inline int32_t CollationElementIterator::secondaryOrder(int32_t order)
371{
372 order = order & RuleBasedCollator::SECONDARYORDERMASK;
373 return (order >> RuleBasedCollator::SECONDARYORDERSHIFT);
374}
375
376/**
377* Get the tertiary order of a collation order.
378* @param order the collation order
379* @return the tertiary order of a collation order.
380*/
381inline int32_t CollationElementIterator::tertiaryOrder(int32_t order)
382{
383 return (order &= RuleBasedCollator::TERTIARYORDERMASK);
384}
385
386inline int32_t CollationElementIterator::getMaxExpansion(int32_t order) const
387{
388 return ucol_getMaxExpansion(m_data_, (uint32_t)order);
389}
390
391inline UBool CollationElementIterator::isIgnorable(int32_t order)
392{
393 return (primaryOrder(order) == RuleBasedCollator::PRIMIGNORABLE);
394}
395
396U_NAMESPACE_END
397
398#endif /* #if !UCONFIG_NO_COLLATION */
399
400#endif