]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | ******************************************************************************* | |
3 | * | |
4 | * Copyright (C) 2002-2004, International Business Machines | |
5 | * Corporation and others. All Rights Reserved. | |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: uiter.h | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2002jan18 | |
14 | * created by: Markus W. Scherer | |
15 | */ | |
16 | ||
17 | #ifndef __UITER_H__ | |
18 | #define __UITER_H__ | |
19 | ||
20 | /** | |
21 | * \file | |
22 | * \brief C API: Unicode Character Iteration | |
23 | * | |
24 | * @see UCharIterator | |
25 | */ | |
26 | ||
27 | #include "unicode/utypes.h" | |
28 | ||
29 | #ifdef XP_CPLUSPLUS | |
30 | U_NAMESPACE_BEGIN | |
31 | ||
32 | class CharacterIterator; | |
33 | class Replaceable; | |
34 | ||
35 | U_NAMESPACE_END | |
36 | #endif | |
37 | ||
38 | U_CDECL_BEGIN | |
39 | ||
40 | struct UCharIterator; | |
41 | typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */ | |
42 | ||
43 | /** | |
44 | * Origin constants for UCharIterator.getIndex() and UCharIterator.move(). | |
45 | * @see UCharIteratorMove | |
46 | * @see UCharIterator | |
47 | * @stable ICU 2.1 | |
48 | */ | |
49 | typedef enum UCharIteratorOrigin { | |
50 | UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH | |
51 | } UCharIteratorOrigin; | |
52 | ||
53 | /** Constants for UCharIterator. @stable ICU 2.6 */ | |
54 | enum { | |
55 | /** | |
56 | * Constant value that may be returned by UCharIteratorMove | |
57 | * indicating that the final UTF-16 index is not known, but that the move succeeded. | |
58 | * This can occur when moving relative to limit or length, or | |
59 | * when moving relative to the current index after a setState() | |
60 | * when the current UTF-16 index is not known. | |
61 | * | |
62 | * It would be very inefficient to have to count from the beginning of the text | |
63 | * just to get the current/limit/length index after moving relative to it. | |
64 | * The actual index can be determined with getIndex(UITER_CURRENT) | |
65 | * which will count the UChars if necessary. | |
66 | * | |
67 | * @stable ICU 2.6 | |
68 | */ | |
69 | UITER_UNKNOWN_INDEX=-2 | |
70 | }; | |
71 | ||
72 | ||
73 | /** | |
74 | * Constant for UCharIterator getState() indicating an error or | |
75 | * an unknown state. | |
76 | * Returned by uiter_getState()/UCharIteratorGetState | |
77 | * when an error occurs. | |
78 | * Also, some UCharIterator implementations may not be able to return | |
79 | * a valid state for each position. This will be clearly documented | |
80 | * for each such iterator (none of the public ones here). | |
81 | * | |
82 | * @stable ICU 2.6 | |
83 | */ | |
84 | #define UITER_NO_STATE ((uint32_t)0xffffffff) | |
85 | ||
86 | /** | |
87 | * Function type declaration for UCharIterator.getIndex(). | |
88 | * | |
89 | * Gets the current position, or the start or limit of the | |
90 | * iteration range. | |
91 | * | |
92 | * This function may perform slowly for UITER_CURRENT after setState() was called, | |
93 | * or for UITER_LENGTH, because an iterator implementation may have to count | |
94 | * UChars if the underlying storage is not UTF-16. | |
95 | * | |
96 | * @param iter the UCharIterator structure ("this pointer") | |
97 | * @param origin get the 0, start, limit, length, or current index | |
98 | * @return the requested index, or U_SENTINEL in an error condition | |
99 | * | |
100 | * @see UCharIteratorOrigin | |
101 | * @see UCharIterator | |
102 | * @stable ICU 2.1 | |
103 | */ | |
104 | typedef int32_t U_CALLCONV | |
105 | UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin); | |
106 | ||
107 | /** | |
108 | * Function type declaration for UCharIterator.move(). | |
109 | * | |
110 | * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index). | |
111 | * | |
112 | * Moves the current position relative to the start or limit of the | |
113 | * iteration range, or relative to the current position itself. | |
114 | * The movement is expressed in numbers of code units forward | |
115 | * or backward by specifying a positive or negative delta. | |
116 | * Out of bounds movement will be pinned to the start or limit. | |
117 | * | |
118 | * This function may perform slowly for moving relative to UITER_LENGTH | |
119 | * because an iterator implementation may have to count the rest of the | |
120 | * UChars if the native storage is not UTF-16. | |
121 | * | |
122 | * When moving relative to the limit or length, or | |
123 | * relative to the current position after setState() was called, | |
124 | * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient | |
125 | * determination of the actual UTF-16 index. | |
126 | * The actual index can be determined with getIndex(UITER_CURRENT) | |
127 | * which will count the UChars if necessary. | |
128 | * See UITER_UNKNOWN_INDEX for details. | |
129 | * | |
130 | * @param iter the UCharIterator structure ("this pointer") | |
131 | * @param delta can be positive, zero, or negative | |
132 | * @param origin move relative to the 0, start, limit, length, or current index | |
133 | * @return the new index, or U_SENTINEL on an error condition, | |
134 | * or UITER_UNKNOWN_INDEX when the index is not known. | |
135 | * | |
136 | * @see UCharIteratorOrigin | |
137 | * @see UCharIterator | |
138 | * @see UITER_UNKNOWN_INDEX | |
139 | * @stable ICU 2.1 | |
140 | */ | |
141 | typedef int32_t U_CALLCONV | |
142 | UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin); | |
143 | ||
144 | /** | |
145 | * Function type declaration for UCharIterator.hasNext(). | |
146 | * | |
147 | * Check if current() and next() can still | |
148 | * return another code unit. | |
149 | * | |
150 | * @param iter the UCharIterator structure ("this pointer") | |
151 | * @return boolean value for whether current() and next() can still return another code unit | |
152 | * | |
153 | * @see UCharIterator | |
154 | * @stable ICU 2.1 | |
155 | */ | |
156 | typedef UBool U_CALLCONV | |
157 | UCharIteratorHasNext(UCharIterator *iter); | |
158 | ||
159 | /** | |
160 | * Function type declaration for UCharIterator.hasPrevious(). | |
161 | * | |
162 | * Check if previous() can still return another code unit. | |
163 | * | |
164 | * @param iter the UCharIterator structure ("this pointer") | |
165 | * @return boolean value for whether previous() can still return another code unit | |
166 | * | |
167 | * @see UCharIterator | |
168 | * @stable ICU 2.1 | |
169 | */ | |
170 | typedef UBool U_CALLCONV | |
171 | UCharIteratorHasPrevious(UCharIterator *iter); | |
172 | ||
173 | /** | |
174 | * Function type declaration for UCharIterator.current(). | |
175 | * | |
176 | * Return the code unit at the current position, | |
177 | * or U_SENTINEL if there is none (index is at the limit). | |
178 | * | |
179 | * @param iter the UCharIterator structure ("this pointer") | |
180 | * @return the current code unit | |
181 | * | |
182 | * @see UCharIterator | |
183 | * @stable ICU 2.1 | |
184 | */ | |
185 | typedef UChar32 U_CALLCONV | |
186 | UCharIteratorCurrent(UCharIterator *iter); | |
187 | ||
188 | /** | |
189 | * Function type declaration for UCharIterator.next(). | |
190 | * | |
191 | * Return the code unit at the current index and increment | |
192 | * the index (post-increment, like s[i++]), | |
193 | * or return U_SENTINEL if there is none (index is at the limit). | |
194 | * | |
195 | * @param iter the UCharIterator structure ("this pointer") | |
196 | * @return the current code unit (and post-increment the current index) | |
197 | * | |
198 | * @see UCharIterator | |
199 | * @stable ICU 2.1 | |
200 | */ | |
201 | typedef UChar32 U_CALLCONV | |
202 | UCharIteratorNext(UCharIterator *iter); | |
203 | ||
204 | /** | |
205 | * Function type declaration for UCharIterator.previous(). | |
206 | * | |
207 | * Decrement the index and return the code unit from there | |
208 | * (pre-decrement, like s[--i]), | |
209 | * or return U_SENTINEL if there is none (index is at the start). | |
210 | * | |
211 | * @param iter the UCharIterator structure ("this pointer") | |
212 | * @return the previous code unit (after pre-decrementing the current index) | |
213 | * | |
214 | * @see UCharIterator | |
215 | * @stable ICU 2.1 | |
216 | */ | |
217 | typedef UChar32 U_CALLCONV | |
218 | UCharIteratorPrevious(UCharIterator *iter); | |
219 | ||
220 | /** | |
221 | * Function type declaration for UCharIterator.reservedFn(). | |
222 | * Reserved for future use. | |
223 | * | |
224 | * @param iter the UCharIterator structure ("this pointer") | |
225 | * @param something some integer argument | |
226 | * @return some integer | |
227 | * | |
228 | * @see UCharIterator | |
229 | * @stable ICU 2.1 | |
230 | */ | |
231 | typedef int32_t U_CALLCONV | |
232 | UCharIteratorReserved(UCharIterator *iter, int32_t something); | |
233 | ||
234 | /** | |
235 | * Function type declaration for UCharIterator.getState(). | |
236 | * | |
237 | * Get the "state" of the iterator in the form of a single 32-bit word. | |
238 | * It is recommended that the state value be calculated to be as small as | |
239 | * is feasible. For strings with limited lengths, fewer than 32 bits may | |
240 | * be sufficient. | |
241 | * | |
242 | * This is used together with setState()/UCharIteratorSetState | |
243 | * to save and restore the iterator position more efficiently than with | |
244 | * getIndex()/move(). | |
245 | * | |
246 | * The iterator state is defined as a uint32_t value because it is designed | |
247 | * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state | |
248 | * of the character iterator. | |
249 | * | |
250 | * With some UCharIterator implementations (e.g., UTF-8), | |
251 | * getting and setting the UTF-16 index with existing functions | |
252 | * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but | |
253 | * relatively slow because the iterator has to "walk" from a known index | |
254 | * to the requested one. | |
255 | * This takes more time the farther it needs to go. | |
256 | * | |
257 | * An opaque state value allows an iterator implementation to provide | |
258 | * an internal index (UTF-8: the source byte array index) for | |
259 | * fast, constant-time restoration. | |
260 | * | |
261 | * After calling setState(), a getIndex(UITER_CURRENT) may be slow because | |
262 | * the UTF-16 index may not be restored as well, but the iterator can deliver | |
263 | * the correct text contents and move relative to the current position | |
264 | * without performance degradation. | |
265 | * | |
266 | * Some UCharIterator implementations may not be able to return | |
267 | * a valid state for each position, in which case they return UITER_NO_STATE instead. | |
268 | * This will be clearly documented for each such iterator (none of the public ones here). | |
269 | * | |
270 | * @param iter the UCharIterator structure ("this pointer") | |
271 | * @return the state word | |
272 | * | |
273 | * @see UCharIterator | |
274 | * @see UCharIteratorSetState | |
275 | * @see UITER_NO_STATE | |
276 | * @stable ICU 2.6 | |
277 | */ | |
278 | typedef uint32_t U_CALLCONV | |
279 | UCharIteratorGetState(const UCharIterator *iter); | |
280 | ||
281 | /** | |
282 | * Function type declaration for UCharIterator.setState(). | |
283 | * | |
284 | * Restore the "state" of the iterator using a state word from a getState() call. | |
285 | * The iterator object need not be the same one as for which getState() was called, | |
286 | * but it must be of the same type (set up using the same uiter_setXYZ function) | |
287 | * and it must iterate over the same string | |
288 | * (binary identical regardless of memory address). | |
289 | * For more about the state word see UCharIteratorGetState. | |
290 | * | |
291 | * After calling setState(), a getIndex(UITER_CURRENT) may be slow because | |
292 | * the UTF-16 index may not be restored as well, but the iterator can deliver | |
293 | * the correct text contents and move relative to the current position | |
294 | * without performance degradation. | |
295 | * | |
296 | * @param iter the UCharIterator structure ("this pointer") | |
297 | * @param state the state word from a getState() call | |
298 | * on a same-type, same-string iterator | |
299 | * @param pErrorCode Must be a valid pointer to an error code value, | |
300 | * which must not indicate a failure before the function call. | |
301 | * | |
302 | * @see UCharIterator | |
303 | * @see UCharIteratorGetState | |
304 | * @stable ICU 2.6 | |
305 | */ | |
306 | typedef void U_CALLCONV | |
307 | UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); | |
308 | ||
309 | ||
310 | /** | |
311 | * C API for code unit iteration. | |
312 | * This can be used as a C wrapper around | |
313 | * CharacterIterator, Replaceable, or implemented using simple strings, etc. | |
314 | * | |
315 | * There are two roles for using UCharIterator: | |
316 | * | |
317 | * A "provider" sets the necessary function pointers and controls the "protected" | |
318 | * fields of the UCharIterator structure. A "provider" passes a UCharIterator | |
319 | * into C APIs that need a UCharIterator as an abstract, flexible string interface. | |
320 | * | |
321 | * Implementations of such C APIs are "callers" of UCharIterator functions; | |
322 | * they only use the "public" function pointers and never access the "protected" | |
323 | * fields directly. | |
324 | * | |
325 | * The current() and next() functions only check the current index against the | |
326 | * limit, and previous() only checks the current index against the start, | |
327 | * to see if the iterator already reached the end of the iteration range. | |
328 | * | |
329 | * The assumption - in all iterators - is that the index is moved via the API, | |
330 | * which means it won't go out of bounds, or the index is modified by | |
331 | * user code that knows enough about the iterator implementation to set valid | |
332 | * index values. | |
333 | * | |
334 | * UCharIterator functions return code unit values 0..0xffff, | |
335 | * or U_SENTINEL if the iteration bounds are reached. | |
336 | * | |
337 | * @stable ICU 2.1 | |
338 | */ | |
339 | struct UCharIterator { | |
340 | /** | |
341 | * (protected) Pointer to string or wrapped object or similar. | |
342 | * Not used by caller. | |
343 | * @stable ICU 2.1 | |
344 | */ | |
345 | const void *context; | |
346 | ||
347 | /** | |
348 | * (protected) Length of string or similar. | |
349 | * Not used by caller. | |
350 | * @stable ICU 2.1 | |
351 | */ | |
352 | int32_t length; | |
353 | ||
354 | /** | |
355 | * (protected) Start index or similar. | |
356 | * Not used by caller. | |
357 | * @stable ICU 2.1 | |
358 | */ | |
359 | int32_t start; | |
360 | ||
361 | /** | |
362 | * (protected) Current index or similar. | |
363 | * Not used by caller. | |
364 | * @stable ICU 2.1 | |
365 | */ | |
366 | int32_t index; | |
367 | ||
368 | /** | |
369 | * (protected) Limit index or similar. | |
370 | * Not used by caller. | |
371 | * @stable ICU 2.1 | |
372 | */ | |
373 | int32_t limit; | |
374 | ||
375 | /** | |
376 | * (protected) Used by UTF-8 iterators and possibly others. | |
377 | * @stable ICU 2.1 | |
378 | */ | |
379 | int32_t reservedField; | |
380 | ||
381 | /** | |
382 | * (public) Returns the current position or the | |
383 | * start or limit index of the iteration range. | |
384 | * | |
385 | * @see UCharIteratorGetIndex | |
386 | * @stable ICU 2.1 | |
387 | */ | |
388 | UCharIteratorGetIndex *getIndex; | |
389 | ||
390 | /** | |
391 | * (public) Moves the current position relative to the start or limit of the | |
392 | * iteration range, or relative to the current position itself. | |
393 | * The movement is expressed in numbers of code units forward | |
394 | * or backward by specifying a positive or negative delta. | |
395 | * | |
396 | * @see UCharIteratorMove | |
397 | * @stable ICU 2.1 | |
398 | */ | |
399 | UCharIteratorMove *move; | |
400 | ||
401 | /** | |
402 | * (public) Check if current() and next() can still | |
403 | * return another code unit. | |
404 | * | |
405 | * @see UCharIteratorHasNext | |
406 | * @stable ICU 2.1 | |
407 | */ | |
408 | UCharIteratorHasNext *hasNext; | |
409 | ||
410 | /** | |
411 | * (public) Check if previous() can still return another code unit. | |
412 | * | |
413 | * @see UCharIteratorHasPrevious | |
414 | * @stable ICU 2.1 | |
415 | */ | |
416 | UCharIteratorHasPrevious *hasPrevious; | |
417 | ||
418 | /** | |
419 | * (public) Return the code unit at the current position, | |
420 | * or U_SENTINEL if there is none (index is at the limit). | |
421 | * | |
422 | * @see UCharIteratorCurrent | |
423 | * @stable ICU 2.1 | |
424 | */ | |
425 | UCharIteratorCurrent *current; | |
426 | ||
427 | /** | |
428 | * (public) Return the code unit at the current index and increment | |
429 | * the index (post-increment, like s[i++]), | |
430 | * or return U_SENTINEL if there is none (index is at the limit). | |
431 | * | |
432 | * @see UCharIteratorNext | |
433 | * @stable ICU 2.1 | |
434 | */ | |
435 | UCharIteratorNext *next; | |
436 | ||
437 | /** | |
438 | * (public) Decrement the index and return the code unit from there | |
439 | * (pre-decrement, like s[--i]), | |
440 | * or return U_SENTINEL if there is none (index is at the start). | |
441 | * | |
442 | * @see UCharIteratorPrevious | |
443 | * @stable ICU 2.1 | |
444 | */ | |
445 | UCharIteratorPrevious *previous; | |
446 | ||
447 | /** | |
448 | * (public) Reserved for future use. Currently NULL. | |
449 | * | |
450 | * @see UCharIteratorReserved | |
451 | * @stable ICU 2.1 | |
452 | */ | |
453 | UCharIteratorReserved *reservedFn; | |
454 | ||
455 | /** | |
456 | * (public) Return the state of the iterator, to be restored later with setState(). | |
457 | * This function pointer is NULL if the iterator does not implement it. | |
458 | * | |
459 | * @see UCharIteratorGet | |
460 | * @stable ICU 2.6 | |
461 | */ | |
462 | UCharIteratorGetState *getState; | |
463 | ||
464 | /** | |
465 | * (public) Restore the iterator state from the state word from a call | |
466 | * to getState(). | |
467 | * This function pointer is NULL if the iterator does not implement it. | |
468 | * | |
469 | * @see UCharIteratorSet | |
470 | * @stable ICU 2.6 | |
471 | */ | |
472 | UCharIteratorSetState *setState; | |
473 | }; | |
474 | ||
475 | /** | |
476 | * Helper function for UCharIterator to get the code point | |
477 | * at the current index. | |
478 | * | |
479 | * Return the code point that includes the code unit at the current position, | |
480 | * or U_SENTINEL if there is none (index is at the limit). | |
481 | * If the current code unit is a lead or trail surrogate, | |
482 | * then the following or preceding surrogate is used to form | |
483 | * the code point value. | |
484 | * | |
485 | * @param iter the UCharIterator structure ("this pointer") | |
486 | * @return the current code point | |
487 | * | |
488 | * @see UCharIterator | |
489 | * @see U16_GET | |
490 | * @see UnicodeString::char32At() | |
491 | * @stable ICU 2.1 | |
492 | */ | |
493 | U_STABLE UChar32 U_EXPORT2 | |
494 | uiter_current32(UCharIterator *iter); | |
495 | ||
496 | /** | |
497 | * Helper function for UCharIterator to get the next code point. | |
498 | * | |
499 | * Return the code point at the current index and increment | |
500 | * the index (post-increment, like s[i++]), | |
501 | * or return U_SENTINEL if there is none (index is at the limit). | |
502 | * | |
503 | * @param iter the UCharIterator structure ("this pointer") | |
504 | * @return the current code point (and post-increment the current index) | |
505 | * | |
506 | * @see UCharIterator | |
507 | * @see U16_NEXT | |
508 | * @stable ICU 2.1 | |
509 | */ | |
510 | U_STABLE UChar32 U_EXPORT2 | |
511 | uiter_next32(UCharIterator *iter); | |
512 | ||
513 | /** | |
514 | * Helper function for UCharIterator to get the previous code point. | |
515 | * | |
516 | * Decrement the index and return the code point from there | |
517 | * (pre-decrement, like s[--i]), | |
518 | * or return U_SENTINEL if there is none (index is at the start). | |
519 | * | |
520 | * @param iter the UCharIterator structure ("this pointer") | |
521 | * @return the previous code point (after pre-decrementing the current index) | |
522 | * | |
523 | * @see UCharIterator | |
524 | * @see U16_PREV | |
525 | * @stable ICU 2.1 | |
526 | */ | |
527 | U_STABLE UChar32 U_EXPORT2 | |
528 | uiter_previous32(UCharIterator *iter); | |
529 | ||
530 | /** | |
531 | * Get the "state" of the iterator in the form of a single 32-bit word. | |
532 | * This is a convenience function that calls iter->getState(iter) | |
533 | * if iter->getState is not NULL; | |
534 | * if it is NULL or any other error occurs, then UITER_NO_STATE is returned. | |
535 | * | |
536 | * Some UCharIterator implementations may not be able to return | |
537 | * a valid state for each position, in which case they return UITER_NO_STATE instead. | |
538 | * This will be clearly documented for each such iterator (none of the public ones here). | |
539 | * | |
540 | * @param iter the UCharIterator structure ("this pointer") | |
541 | * @return the state word | |
542 | * | |
543 | * @see UCharIterator | |
544 | * @see UCharIteratorGetState | |
545 | * @see UITER_NO_STATE | |
546 | * @stable ICU 2.6 | |
547 | */ | |
548 | U_STABLE uint32_t U_EXPORT2 | |
549 | uiter_getState(const UCharIterator *iter); | |
550 | ||
551 | /** | |
552 | * Restore the "state" of the iterator using a state word from a getState() call. | |
553 | * This is a convenience function that calls iter->setState(iter, state, pErrorCode) | |
554 | * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set. | |
555 | * | |
556 | * @param iter the UCharIterator structure ("this pointer") | |
557 | * @param state the state word from a getState() call | |
558 | * on a same-type, same-string iterator | |
559 | * @param pErrorCode Must be a valid pointer to an error code value, | |
560 | * which must not indicate a failure before the function call. | |
561 | * | |
562 | * @see UCharIterator | |
563 | * @see UCharIteratorSetState | |
564 | * @stable ICU 2.6 | |
565 | */ | |
566 | U_STABLE void U_EXPORT2 | |
567 | uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); | |
568 | ||
569 | /** | |
570 | * Set up a UCharIterator to iterate over a string. | |
571 | * | |
572 | * Sets the UCharIterator function pointers for iteration over the string s | |
573 | * with iteration boundaries start=index=0 and length=limit=string length. | |
574 | * The "provider" may set the start, index, and limit values at any time | |
575 | * within the range 0..length. | |
576 | * The length field will be ignored. | |
577 | * | |
578 | * The string pointer s is set into UCharIterator.context without copying | |
579 | * or reallocating the string contents. | |
580 | * | |
581 | * getState() simply returns the current index. | |
582 | * move() will always return the final index. | |
583 | * | |
584 | * @param iter UCharIterator structure to be set for iteration | |
585 | * @param s String to iterate over | |
586 | * @param length Length of s, or -1 if NUL-terminated | |
587 | * | |
588 | * @see UCharIterator | |
589 | * @stable ICU 2.1 | |
590 | */ | |
591 | U_STABLE void U_EXPORT2 | |
592 | uiter_setString(UCharIterator *iter, const UChar *s, int32_t length); | |
593 | ||
594 | /** | |
595 | * Set up a UCharIterator to iterate over a UTF-16BE string | |
596 | * (byte vector with a big-endian pair of bytes per UChar). | |
597 | * | |
598 | * Everything works just like with a normal UChar iterator (uiter_setString), | |
599 | * except that UChars are assembled from byte pairs, | |
600 | * and that the length argument here indicates an even number of bytes. | |
601 | * | |
602 | * getState() simply returns the current index. | |
603 | * move() will always return the final index. | |
604 | * | |
605 | * @param iter UCharIterator structure to be set for iteration | |
606 | * @param s UTF-16BE string to iterate over | |
607 | * @param length Length of s as an even number of bytes, or -1 if NUL-terminated | |
608 | * (NUL means pair of 0 bytes at even index from s) | |
609 | * | |
610 | * @see UCharIterator | |
611 | * @see uiter_setString | |
612 | * @stable ICU 2.6 | |
613 | */ | |
614 | U_STABLE void U_EXPORT2 | |
615 | uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length); | |
616 | ||
617 | /** | |
618 | * Set up a UCharIterator to iterate over a UTF-8 string. | |
619 | * | |
620 | * Sets the UCharIterator function pointers for iteration over the UTF-8 string s | |
621 | * with UTF-8 iteration boundaries 0 and length. | |
622 | * The implementation counts the UTF-16 index on the fly and | |
623 | * lazily evaluates the UTF-16 length of the text. | |
624 | * | |
625 | * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length. | |
626 | * When the reservedField is not 0, then it contains a supplementary code point | |
627 | * and the UTF-16 index is between the two corresponding surrogates. | |
628 | * At that point, the UTF-8 index is behind that code point. | |
629 | * | |
630 | * The UTF-8 string pointer s is set into UCharIterator.context without copying | |
631 | * or reallocating the string contents. | |
632 | * | |
633 | * getState() returns a state value consisting of | |
634 | * - the current UTF-8 source byte index (bits 31..1) | |
635 | * - a flag (bit 0) that indicates whether the UChar position is in the middle | |
636 | * of a surrogate pair | |
637 | * (from a 4-byte UTF-8 sequence for the corresponding supplementary code point) | |
638 | * | |
639 | * getState() cannot also encode the UTF-16 index in the state value. | |
640 | * move(relative to limit or length), or | |
641 | * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX. | |
642 | * | |
643 | * @param iter UCharIterator structure to be set for iteration | |
644 | * @param s UTF-8 string to iterate over | |
645 | * @param length Length of s in bytes, or -1 if NUL-terminated | |
646 | * | |
647 | * @see UCharIterator | |
648 | * @stable ICU 2.6 | |
649 | */ | |
650 | U_STABLE void U_EXPORT2 | |
651 | uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length); | |
652 | ||
653 | #ifdef XP_CPLUSPLUS | |
654 | ||
655 | /** | |
656 | * Set up a UCharIterator to wrap around a C++ CharacterIterator. | |
657 | * | |
658 | * Sets the UCharIterator function pointers for iteration using the | |
659 | * CharacterIterator charIter. | |
660 | * | |
661 | * The CharacterIterator pointer charIter is set into UCharIterator.context | |
662 | * without copying or cloning the CharacterIterator object. | |
663 | * The other "protected" UCharIterator fields are set to 0 and will be ignored. | |
664 | * The iteration index and boundaries are controlled by the CharacterIterator. | |
665 | * | |
666 | * getState() simply returns the current index. | |
667 | * move() will always return the final index. | |
668 | * | |
669 | * @param iter UCharIterator structure to be set for iteration | |
670 | * @param charIter CharacterIterator to wrap | |
671 | * | |
672 | * @see UCharIterator | |
673 | * @stable ICU 2.1 | |
674 | */ | |
675 | U_STABLE void U_EXPORT2 | |
676 | uiter_setCharacterIterator(UCharIterator *iter, CharacterIterator *charIter); | |
677 | ||
678 | /** | |
679 | * Set up a UCharIterator to iterate over a C++ Replaceable. | |
680 | * | |
681 | * Sets the UCharIterator function pointers for iteration over the | |
682 | * Replaceable rep with iteration boundaries start=index=0 and | |
683 | * length=limit=rep->length(). | |
684 | * The "provider" may set the start, index, and limit values at any time | |
685 | * within the range 0..length=rep->length(). | |
686 | * The length field will be ignored. | |
687 | * | |
688 | * The Replaceable pointer rep is set into UCharIterator.context without copying | |
689 | * or cloning/reallocating the Replaceable object. | |
690 | * | |
691 | * getState() simply returns the current index. | |
692 | * move() will always return the final index. | |
693 | * | |
694 | * @param iter UCharIterator structure to be set for iteration | |
695 | * @param rep Replaceable to iterate over | |
696 | * | |
697 | * @see UCharIterator | |
698 | * @stable ICU 2.1 | |
699 | */ | |
700 | U_STABLE void U_EXPORT2 | |
701 | uiter_setReplaceable(UCharIterator *iter, const Replaceable *rep); | |
702 | ||
703 | #endif | |
704 | ||
705 | U_CDECL_END | |
706 | ||
707 | #endif |