]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
b75a7d8f A |
3 | /* |
4 | ******************************************************************************* | |
5 | * | |
4388f060 | 6 | * Copyright (C) 2002-2011 International Business Machines |
b75a7d8f A |
7 | * Corporation and others. All Rights Reserved. |
8 | * | |
9 | ******************************************************************************* | |
10 | * file name: uiter.h | |
f3c0d7a5 | 11 | * encoding: UTF-8 |
b75a7d8f A |
12 | * tab size: 8 (not used) |
13 | * indentation:4 | |
14 | * | |
15 | * created on: 2002jan18 | |
16 | * created by: Markus W. Scherer | |
17 | */ | |
18 | ||
19 | #ifndef __UITER_H__ | |
20 | #define __UITER_H__ | |
21 | ||
22 | /** | |
23 | * \file | |
24 | * \brief C API: Unicode Character Iteration | |
25 | * | |
26 | * @see UCharIterator | |
27 | */ | |
28 | ||
29 | #include "unicode/utypes.h" | |
30 | ||
729e4ab9 | 31 | #if U_SHOW_CPLUSPLUS_API |
b75a7d8f A |
32 | U_NAMESPACE_BEGIN |
33 | ||
34 | class CharacterIterator; | |
35 | class Replaceable; | |
36 | ||
37 | U_NAMESPACE_END | |
f3c0d7a5 | 38 | #endif // U_SHOW_CPLUSPLUS_API |
b75a7d8f A |
39 | |
40 | U_CDECL_BEGIN | |
41 | ||
42 | struct UCharIterator; | |
43 | typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */ | |
44 | ||
45 | /** | |
46 | * Origin constants for UCharIterator.getIndex() and UCharIterator.move(). | |
47 | * @see UCharIteratorMove | |
48 | * @see UCharIterator | |
49 | * @stable ICU 2.1 | |
50 | */ | |
51 | typedef enum UCharIteratorOrigin { | |
52 | UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH | |
53 | } UCharIteratorOrigin; | |
54 | ||
374ca955 | 55 | /** Constants for UCharIterator. @stable ICU 2.6 */ |
b75a7d8f A |
56 | enum { |
57 | /** | |
58 | * Constant value that may be returned by UCharIteratorMove | |
59 | * indicating that the final UTF-16 index is not known, but that the move succeeded. | |
60 | * This can occur when moving relative to limit or length, or | |
61 | * when moving relative to the current index after a setState() | |
62 | * when the current UTF-16 index is not known. | |
63 | * | |
64 | * It would be very inefficient to have to count from the beginning of the text | |
65 | * just to get the current/limit/length index after moving relative to it. | |
66 | * The actual index can be determined with getIndex(UITER_CURRENT) | |
67 | * which will count the UChars if necessary. | |
68 | * | |
374ca955 | 69 | * @stable ICU 2.6 |
b75a7d8f A |
70 | */ |
71 | UITER_UNKNOWN_INDEX=-2 | |
72 | }; | |
73 | ||
374ca955 | 74 | |
b75a7d8f A |
75 | /** |
76 | * Constant for UCharIterator getState() indicating an error or | |
77 | * an unknown state. | |
78 | * Returned by uiter_getState()/UCharIteratorGetState | |
79 | * when an error occurs. | |
80 | * Also, some UCharIterator implementations may not be able to return | |
81 | * a valid state for each position. This will be clearly documented | |
82 | * for each such iterator (none of the public ones here). | |
83 | * | |
374ca955 | 84 | * @stable ICU 2.6 |
b75a7d8f A |
85 | */ |
86 | #define UITER_NO_STATE ((uint32_t)0xffffffff) | |
87 | ||
88 | /** | |
89 | * Function type declaration for UCharIterator.getIndex(). | |
90 | * | |
91 | * Gets the current position, or the start or limit of the | |
92 | * iteration range. | |
93 | * | |
94 | * This function may perform slowly for UITER_CURRENT after setState() was called, | |
95 | * or for UITER_LENGTH, because an iterator implementation may have to count | |
96 | * UChars if the underlying storage is not UTF-16. | |
97 | * | |
98 | * @param iter the UCharIterator structure ("this pointer") | |
99 | * @param origin get the 0, start, limit, length, or current index | |
100 | * @return the requested index, or U_SENTINEL in an error condition | |
101 | * | |
102 | * @see UCharIteratorOrigin | |
103 | * @see UCharIterator | |
104 | * @stable ICU 2.1 | |
105 | */ | |
106 | typedef int32_t U_CALLCONV | |
107 | UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin); | |
108 | ||
109 | /** | |
110 | * Function type declaration for UCharIterator.move(). | |
111 | * | |
112 | * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index). | |
113 | * | |
114 | * Moves the current position relative to the start or limit of the | |
115 | * iteration range, or relative to the current position itself. | |
116 | * The movement is expressed in numbers of code units forward | |
117 | * or backward by specifying a positive or negative delta. | |
118 | * Out of bounds movement will be pinned to the start or limit. | |
119 | * | |
120 | * This function may perform slowly for moving relative to UITER_LENGTH | |
121 | * because an iterator implementation may have to count the rest of the | |
122 | * UChars if the native storage is not UTF-16. | |
123 | * | |
124 | * When moving relative to the limit or length, or | |
125 | * relative to the current position after setState() was called, | |
126 | * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient | |
127 | * determination of the actual UTF-16 index. | |
128 | * The actual index can be determined with getIndex(UITER_CURRENT) | |
129 | * which will count the UChars if necessary. | |
130 | * See UITER_UNKNOWN_INDEX for details. | |
131 | * | |
132 | * @param iter the UCharIterator structure ("this pointer") | |
133 | * @param delta can be positive, zero, or negative | |
134 | * @param origin move relative to the 0, start, limit, length, or current index | |
135 | * @return the new index, or U_SENTINEL on an error condition, | |
136 | * or UITER_UNKNOWN_INDEX when the index is not known. | |
137 | * | |
138 | * @see UCharIteratorOrigin | |
139 | * @see UCharIterator | |
140 | * @see UITER_UNKNOWN_INDEX | |
141 | * @stable ICU 2.1 | |
142 | */ | |
143 | typedef int32_t U_CALLCONV | |
144 | UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin); | |
145 | ||
146 | /** | |
147 | * Function type declaration for UCharIterator.hasNext(). | |
148 | * | |
149 | * Check if current() and next() can still | |
150 | * return another code unit. | |
151 | * | |
152 | * @param iter the UCharIterator structure ("this pointer") | |
153 | * @return boolean value for whether current() and next() can still return another code unit | |
154 | * | |
155 | * @see UCharIterator | |
156 | * @stable ICU 2.1 | |
157 | */ | |
158 | typedef UBool U_CALLCONV | |
159 | UCharIteratorHasNext(UCharIterator *iter); | |
160 | ||
161 | /** | |
162 | * Function type declaration for UCharIterator.hasPrevious(). | |
163 | * | |
164 | * Check if previous() can still return another code unit. | |
165 | * | |
166 | * @param iter the UCharIterator structure ("this pointer") | |
167 | * @return boolean value for whether previous() can still return another code unit | |
168 | * | |
169 | * @see UCharIterator | |
170 | * @stable ICU 2.1 | |
171 | */ | |
172 | typedef UBool U_CALLCONV | |
173 | UCharIteratorHasPrevious(UCharIterator *iter); | |
174 | ||
175 | /** | |
176 | * Function type declaration for UCharIterator.current(). | |
177 | * | |
178 | * Return the code unit at the current position, | |
179 | * or U_SENTINEL if there is none (index is at the limit). | |
180 | * | |
181 | * @param iter the UCharIterator structure ("this pointer") | |
182 | * @return the current code unit | |
183 | * | |
184 | * @see UCharIterator | |
185 | * @stable ICU 2.1 | |
186 | */ | |
187 | typedef UChar32 U_CALLCONV | |
188 | UCharIteratorCurrent(UCharIterator *iter); | |
189 | ||
190 | /** | |
191 | * Function type declaration for UCharIterator.next(). | |
192 | * | |
193 | * Return the code unit at the current index and increment | |
194 | * the index (post-increment, like s[i++]), | |
195 | * or return U_SENTINEL if there is none (index is at the limit). | |
196 | * | |
197 | * @param iter the UCharIterator structure ("this pointer") | |
198 | * @return the current code unit (and post-increment the current index) | |
199 | * | |
200 | * @see UCharIterator | |
201 | * @stable ICU 2.1 | |
202 | */ | |
203 | typedef UChar32 U_CALLCONV | |
204 | UCharIteratorNext(UCharIterator *iter); | |
205 | ||
206 | /** | |
207 | * Function type declaration for UCharIterator.previous(). | |
208 | * | |
209 | * Decrement the index and return the code unit from there | |
210 | * (pre-decrement, like s[--i]), | |
211 | * or return U_SENTINEL if there is none (index is at the start). | |
212 | * | |
213 | * @param iter the UCharIterator structure ("this pointer") | |
214 | * @return the previous code unit (after pre-decrementing the current index) | |
215 | * | |
216 | * @see UCharIterator | |
217 | * @stable ICU 2.1 | |
218 | */ | |
219 | typedef UChar32 U_CALLCONV | |
220 | UCharIteratorPrevious(UCharIterator *iter); | |
221 | ||
222 | /** | |
223 | * Function type declaration for UCharIterator.reservedFn(). | |
224 | * Reserved for future use. | |
225 | * | |
226 | * @param iter the UCharIterator structure ("this pointer") | |
227 | * @param something some integer argument | |
228 | * @return some integer | |
229 | * | |
230 | * @see UCharIterator | |
231 | * @stable ICU 2.1 | |
232 | */ | |
233 | typedef int32_t U_CALLCONV | |
234 | UCharIteratorReserved(UCharIterator *iter, int32_t something); | |
235 | ||
236 | /** | |
237 | * Function type declaration for UCharIterator.getState(). | |
238 | * | |
239 | * Get the "state" of the iterator in the form of a single 32-bit word. | |
240 | * It is recommended that the state value be calculated to be as small as | |
241 | * is feasible. For strings with limited lengths, fewer than 32 bits may | |
242 | * be sufficient. | |
243 | * | |
244 | * This is used together with setState()/UCharIteratorSetState | |
245 | * to save and restore the iterator position more efficiently than with | |
246 | * getIndex()/move(). | |
247 | * | |
374ca955 A |
248 | * The iterator state is defined as a uint32_t value because it is designed |
249 | * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state | |
250 | * of the character iterator. | |
251 | * | |
b75a7d8f A |
252 | * With some UCharIterator implementations (e.g., UTF-8), |
253 | * getting and setting the UTF-16 index with existing functions | |
254 | * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but | |
255 | * relatively slow because the iterator has to "walk" from a known index | |
256 | * to the requested one. | |
257 | * This takes more time the farther it needs to go. | |
258 | * | |
259 | * An opaque state value allows an iterator implementation to provide | |
260 | * an internal index (UTF-8: the source byte array index) for | |
261 | * fast, constant-time restoration. | |
262 | * | |
263 | * After calling setState(), a getIndex(UITER_CURRENT) may be slow because | |
264 | * the UTF-16 index may not be restored as well, but the iterator can deliver | |
265 | * the correct text contents and move relative to the current position | |
266 | * without performance degradation. | |
267 | * | |
268 | * Some UCharIterator implementations may not be able to return | |
269 | * a valid state for each position, in which case they return UITER_NO_STATE instead. | |
270 | * This will be clearly documented for each such iterator (none of the public ones here). | |
271 | * | |
272 | * @param iter the UCharIterator structure ("this pointer") | |
273 | * @return the state word | |
274 | * | |
275 | * @see UCharIterator | |
276 | * @see UCharIteratorSetState | |
277 | * @see UITER_NO_STATE | |
374ca955 | 278 | * @stable ICU 2.6 |
b75a7d8f A |
279 | */ |
280 | typedef uint32_t U_CALLCONV | |
281 | UCharIteratorGetState(const UCharIterator *iter); | |
282 | ||
283 | /** | |
284 | * Function type declaration for UCharIterator.setState(). | |
285 | * | |
286 | * Restore the "state" of the iterator using a state word from a getState() call. | |
287 | * The iterator object need not be the same one as for which getState() was called, | |
288 | * but it must be of the same type (set up using the same uiter_setXYZ function) | |
289 | * and it must iterate over the same string | |
290 | * (binary identical regardless of memory address). | |
291 | * For more about the state word see UCharIteratorGetState. | |
292 | * | |
293 | * After calling setState(), a getIndex(UITER_CURRENT) may be slow because | |
294 | * the UTF-16 index may not be restored as well, but the iterator can deliver | |
295 | * the correct text contents and move relative to the current position | |
296 | * without performance degradation. | |
297 | * | |
298 | * @param iter the UCharIterator structure ("this pointer") | |
299 | * @param state the state word from a getState() call | |
300 | * on a same-type, same-string iterator | |
301 | * @param pErrorCode Must be a valid pointer to an error code value, | |
302 | * which must not indicate a failure before the function call. | |
303 | * | |
304 | * @see UCharIterator | |
305 | * @see UCharIteratorGetState | |
374ca955 | 306 | * @stable ICU 2.6 |
b75a7d8f A |
307 | */ |
308 | typedef void U_CALLCONV | |
309 | UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); | |
310 | ||
311 | ||
312 | /** | |
313 | * C API for code unit iteration. | |
314 | * This can be used as a C wrapper around | |
315 | * CharacterIterator, Replaceable, or implemented using simple strings, etc. | |
316 | * | |
317 | * There are two roles for using UCharIterator: | |
318 | * | |
319 | * A "provider" sets the necessary function pointers and controls the "protected" | |
320 | * fields of the UCharIterator structure. A "provider" passes a UCharIterator | |
321 | * into C APIs that need a UCharIterator as an abstract, flexible string interface. | |
322 | * | |
323 | * Implementations of such C APIs are "callers" of UCharIterator functions; | |
324 | * they only use the "public" function pointers and never access the "protected" | |
325 | * fields directly. | |
326 | * | |
374ca955 A |
327 | * The current() and next() functions only check the current index against the |
328 | * limit, and previous() only checks the current index against the start, | |
329 | * to see if the iterator already reached the end of the iteration range. | |
330 | * | |
331 | * The assumption - in all iterators - is that the index is moved via the API, | |
332 | * which means it won't go out of bounds, or the index is modified by | |
333 | * user code that knows enough about the iterator implementation to set valid | |
334 | * index values. | |
335 | * | |
b75a7d8f A |
336 | * UCharIterator functions return code unit values 0..0xffff, |
337 | * or U_SENTINEL if the iteration bounds are reached. | |
338 | * | |
339 | * @stable ICU 2.1 | |
340 | */ | |
341 | struct UCharIterator { | |
342 | /** | |
343 | * (protected) Pointer to string or wrapped object or similar. | |
344 | * Not used by caller. | |
345 | * @stable ICU 2.1 | |
346 | */ | |
347 | const void *context; | |
348 | ||
349 | /** | |
350 | * (protected) Length of string or similar. | |
351 | * Not used by caller. | |
352 | * @stable ICU 2.1 | |
353 | */ | |
354 | int32_t length; | |
355 | ||
356 | /** | |
357 | * (protected) Start index or similar. | |
358 | * Not used by caller. | |
359 | * @stable ICU 2.1 | |
360 | */ | |
361 | int32_t start; | |
362 | ||
363 | /** | |
364 | * (protected) Current index or similar. | |
365 | * Not used by caller. | |
366 | * @stable ICU 2.1 | |
367 | */ | |
368 | int32_t index; | |
369 | ||
370 | /** | |
371 | * (protected) Limit index or similar. | |
372 | * Not used by caller. | |
373 | * @stable ICU 2.1 | |
374 | */ | |
375 | int32_t limit; | |
376 | ||
377 | /** | |
378 | * (protected) Used by UTF-8 iterators and possibly others. | |
379 | * @stable ICU 2.1 | |
380 | */ | |
381 | int32_t reservedField; | |
382 | ||
383 | /** | |
384 | * (public) Returns the current position or the | |
385 | * start or limit index of the iteration range. | |
386 | * | |
387 | * @see UCharIteratorGetIndex | |
388 | * @stable ICU 2.1 | |
389 | */ | |
390 | UCharIteratorGetIndex *getIndex; | |
391 | ||
392 | /** | |
393 | * (public) Moves the current position relative to the start or limit of the | |
394 | * iteration range, or relative to the current position itself. | |
395 | * The movement is expressed in numbers of code units forward | |
396 | * or backward by specifying a positive or negative delta. | |
397 | * | |
398 | * @see UCharIteratorMove | |
399 | * @stable ICU 2.1 | |
400 | */ | |
401 | UCharIteratorMove *move; | |
402 | ||
403 | /** | |
404 | * (public) Check if current() and next() can still | |
405 | * return another code unit. | |
406 | * | |
407 | * @see UCharIteratorHasNext | |
408 | * @stable ICU 2.1 | |
409 | */ | |
410 | UCharIteratorHasNext *hasNext; | |
411 | ||
412 | /** | |
413 | * (public) Check if previous() can still return another code unit. | |
414 | * | |
415 | * @see UCharIteratorHasPrevious | |
416 | * @stable ICU 2.1 | |
417 | */ | |
418 | UCharIteratorHasPrevious *hasPrevious; | |
419 | ||
420 | /** | |
421 | * (public) Return the code unit at the current position, | |
422 | * or U_SENTINEL if there is none (index is at the limit). | |
423 | * | |
424 | * @see UCharIteratorCurrent | |
425 | * @stable ICU 2.1 | |
426 | */ | |
427 | UCharIteratorCurrent *current; | |
428 | ||
429 | /** | |
430 | * (public) Return the code unit at the current index and increment | |
431 | * the index (post-increment, like s[i++]), | |
432 | * or return U_SENTINEL if there is none (index is at the limit). | |
433 | * | |
434 | * @see UCharIteratorNext | |
435 | * @stable ICU 2.1 | |
436 | */ | |
437 | UCharIteratorNext *next; | |
438 | ||
439 | /** | |
440 | * (public) Decrement the index and return the code unit from there | |
441 | * (pre-decrement, like s[--i]), | |
442 | * or return U_SENTINEL if there is none (index is at the start). | |
443 | * | |
444 | * @see UCharIteratorPrevious | |
445 | * @stable ICU 2.1 | |
446 | */ | |
447 | UCharIteratorPrevious *previous; | |
448 | ||
449 | /** | |
450 | * (public) Reserved for future use. Currently NULL. | |
451 | * | |
452 | * @see UCharIteratorReserved | |
453 | * @stable ICU 2.1 | |
454 | */ | |
455 | UCharIteratorReserved *reservedFn; | |
456 | ||
457 | /** | |
458 | * (public) Return the state of the iterator, to be restored later with setState(). | |
459 | * This function pointer is NULL if the iterator does not implement it. | |
460 | * | |
461 | * @see UCharIteratorGet | |
374ca955 | 462 | * @stable ICU 2.6 |
b75a7d8f A |
463 | */ |
464 | UCharIteratorGetState *getState; | |
465 | ||
466 | /** | |
467 | * (public) Restore the iterator state from the state word from a call | |
468 | * to getState(). | |
469 | * This function pointer is NULL if the iterator does not implement it. | |
470 | * | |
471 | * @see UCharIteratorSet | |
374ca955 | 472 | * @stable ICU 2.6 |
b75a7d8f A |
473 | */ |
474 | UCharIteratorSetState *setState; | |
475 | }; | |
476 | ||
477 | /** | |
478 | * Helper function for UCharIterator to get the code point | |
479 | * at the current index. | |
480 | * | |
481 | * Return the code point that includes the code unit at the current position, | |
482 | * or U_SENTINEL if there is none (index is at the limit). | |
483 | * If the current code unit is a lead or trail surrogate, | |
484 | * then the following or preceding surrogate is used to form | |
485 | * the code point value. | |
486 | * | |
487 | * @param iter the UCharIterator structure ("this pointer") | |
488 | * @return the current code point | |
489 | * | |
490 | * @see UCharIterator | |
491 | * @see U16_GET | |
492 | * @see UnicodeString::char32At() | |
493 | * @stable ICU 2.1 | |
494 | */ | |
374ca955 | 495 | U_STABLE UChar32 U_EXPORT2 |
b75a7d8f A |
496 | uiter_current32(UCharIterator *iter); |
497 | ||
498 | /** | |
499 | * Helper function for UCharIterator to get the next code point. | |
500 | * | |
501 | * Return the code point at the current index and increment | |
502 | * the index (post-increment, like s[i++]), | |
503 | * or return U_SENTINEL if there is none (index is at the limit). | |
504 | * | |
505 | * @param iter the UCharIterator structure ("this pointer") | |
506 | * @return the current code point (and post-increment the current index) | |
507 | * | |
508 | * @see UCharIterator | |
509 | * @see U16_NEXT | |
510 | * @stable ICU 2.1 | |
511 | */ | |
374ca955 | 512 | U_STABLE UChar32 U_EXPORT2 |
b75a7d8f A |
513 | uiter_next32(UCharIterator *iter); |
514 | ||
515 | /** | |
516 | * Helper function for UCharIterator to get the previous code point. | |
517 | * | |
518 | * Decrement the index and return the code point from there | |
519 | * (pre-decrement, like s[--i]), | |
520 | * or return U_SENTINEL if there is none (index is at the start). | |
521 | * | |
522 | * @param iter the UCharIterator structure ("this pointer") | |
523 | * @return the previous code point (after pre-decrementing the current index) | |
524 | * | |
525 | * @see UCharIterator | |
526 | * @see U16_PREV | |
527 | * @stable ICU 2.1 | |
528 | */ | |
374ca955 | 529 | U_STABLE UChar32 U_EXPORT2 |
b75a7d8f A |
530 | uiter_previous32(UCharIterator *iter); |
531 | ||
532 | /** | |
533 | * Get the "state" of the iterator in the form of a single 32-bit word. | |
534 | * This is a convenience function that calls iter->getState(iter) | |
535 | * if iter->getState is not NULL; | |
536 | * if it is NULL or any other error occurs, then UITER_NO_STATE is returned. | |
537 | * | |
538 | * Some UCharIterator implementations may not be able to return | |
539 | * a valid state for each position, in which case they return UITER_NO_STATE instead. | |
540 | * This will be clearly documented for each such iterator (none of the public ones here). | |
541 | * | |
542 | * @param iter the UCharIterator structure ("this pointer") | |
543 | * @return the state word | |
544 | * | |
545 | * @see UCharIterator | |
546 | * @see UCharIteratorGetState | |
547 | * @see UITER_NO_STATE | |
374ca955 | 548 | * @stable ICU 2.6 |
b75a7d8f | 549 | */ |
374ca955 | 550 | U_STABLE uint32_t U_EXPORT2 |
b75a7d8f A |
551 | uiter_getState(const UCharIterator *iter); |
552 | ||
553 | /** | |
554 | * Restore the "state" of the iterator using a state word from a getState() call. | |
555 | * This is a convenience function that calls iter->setState(iter, state, pErrorCode) | |
556 | * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set. | |
557 | * | |
558 | * @param iter the UCharIterator structure ("this pointer") | |
559 | * @param state the state word from a getState() call | |
560 | * on a same-type, same-string iterator | |
561 | * @param pErrorCode Must be a valid pointer to an error code value, | |
562 | * which must not indicate a failure before the function call. | |
563 | * | |
564 | * @see UCharIterator | |
565 | * @see UCharIteratorSetState | |
374ca955 | 566 | * @stable ICU 2.6 |
b75a7d8f | 567 | */ |
374ca955 | 568 | U_STABLE void U_EXPORT2 |
b75a7d8f A |
569 | uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); |
570 | ||
571 | /** | |
572 | * Set up a UCharIterator to iterate over a string. | |
573 | * | |
574 | * Sets the UCharIterator function pointers for iteration over the string s | |
575 | * with iteration boundaries start=index=0 and length=limit=string length. | |
576 | * The "provider" may set the start, index, and limit values at any time | |
577 | * within the range 0..length. | |
578 | * The length field will be ignored. | |
579 | * | |
580 | * The string pointer s is set into UCharIterator.context without copying | |
581 | * or reallocating the string contents. | |
582 | * | |
583 | * getState() simply returns the current index. | |
584 | * move() will always return the final index. | |
585 | * | |
586 | * @param iter UCharIterator structure to be set for iteration | |
587 | * @param s String to iterate over | |
588 | * @param length Length of s, or -1 if NUL-terminated | |
589 | * | |
590 | * @see UCharIterator | |
591 | * @stable ICU 2.1 | |
592 | */ | |
374ca955 | 593 | U_STABLE void U_EXPORT2 |
b75a7d8f A |
594 | uiter_setString(UCharIterator *iter, const UChar *s, int32_t length); |
595 | ||
596 | /** | |
597 | * Set up a UCharIterator to iterate over a UTF-16BE string | |
598 | * (byte vector with a big-endian pair of bytes per UChar). | |
599 | * | |
600 | * Everything works just like with a normal UChar iterator (uiter_setString), | |
601 | * except that UChars are assembled from byte pairs, | |
602 | * and that the length argument here indicates an even number of bytes. | |
603 | * | |
604 | * getState() simply returns the current index. | |
605 | * move() will always return the final index. | |
606 | * | |
607 | * @param iter UCharIterator structure to be set for iteration | |
608 | * @param s UTF-16BE string to iterate over | |
609 | * @param length Length of s as an even number of bytes, or -1 if NUL-terminated | |
610 | * (NUL means pair of 0 bytes at even index from s) | |
611 | * | |
612 | * @see UCharIterator | |
613 | * @see uiter_setString | |
374ca955 | 614 | * @stable ICU 2.6 |
b75a7d8f | 615 | */ |
374ca955 | 616 | U_STABLE void U_EXPORT2 |
b75a7d8f A |
617 | uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length); |
618 | ||
619 | /** | |
620 | * Set up a UCharIterator to iterate over a UTF-8 string. | |
621 | * | |
622 | * Sets the UCharIterator function pointers for iteration over the UTF-8 string s | |
623 | * with UTF-8 iteration boundaries 0 and length. | |
624 | * The implementation counts the UTF-16 index on the fly and | |
625 | * lazily evaluates the UTF-16 length of the text. | |
626 | * | |
627 | * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length. | |
628 | * When the reservedField is not 0, then it contains a supplementary code point | |
629 | * and the UTF-16 index is between the two corresponding surrogates. | |
630 | * At that point, the UTF-8 index is behind that code point. | |
631 | * | |
632 | * The UTF-8 string pointer s is set into UCharIterator.context without copying | |
633 | * or reallocating the string contents. | |
634 | * | |
635 | * getState() returns a state value consisting of | |
636 | * - the current UTF-8 source byte index (bits 31..1) | |
637 | * - a flag (bit 0) that indicates whether the UChar position is in the middle | |
638 | * of a surrogate pair | |
639 | * (from a 4-byte UTF-8 sequence for the corresponding supplementary code point) | |
640 | * | |
641 | * getState() cannot also encode the UTF-16 index in the state value. | |
642 | * move(relative to limit or length), or | |
643 | * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX. | |
644 | * | |
645 | * @param iter UCharIterator structure to be set for iteration | |
646 | * @param s UTF-8 string to iterate over | |
647 | * @param length Length of s in bytes, or -1 if NUL-terminated | |
648 | * | |
649 | * @see UCharIterator | |
374ca955 | 650 | * @stable ICU 2.6 |
b75a7d8f | 651 | */ |
374ca955 | 652 | U_STABLE void U_EXPORT2 |
b75a7d8f A |
653 | uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length); |
654 | ||
729e4ab9 | 655 | #if U_SHOW_CPLUSPLUS_API |
b75a7d8f A |
656 | |
657 | /** | |
658 | * Set up a UCharIterator to wrap around a C++ CharacterIterator. | |
659 | * | |
660 | * Sets the UCharIterator function pointers for iteration using the | |
661 | * CharacterIterator charIter. | |
662 | * | |
663 | * The CharacterIterator pointer charIter is set into UCharIterator.context | |
664 | * without copying or cloning the CharacterIterator object. | |
665 | * The other "protected" UCharIterator fields are set to 0 and will be ignored. | |
666 | * The iteration index and boundaries are controlled by the CharacterIterator. | |
667 | * | |
668 | * getState() simply returns the current index. | |
669 | * move() will always return the final index. | |
670 | * | |
671 | * @param iter UCharIterator structure to be set for iteration | |
672 | * @param charIter CharacterIterator to wrap | |
673 | * | |
674 | * @see UCharIterator | |
675 | * @stable ICU 2.1 | |
676 | */ | |
374ca955 | 677 | U_STABLE void U_EXPORT2 |
4388f060 | 678 | uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter); |
b75a7d8f A |
679 | |
680 | /** | |
681 | * Set up a UCharIterator to iterate over a C++ Replaceable. | |
682 | * | |
683 | * Sets the UCharIterator function pointers for iteration over the | |
684 | * Replaceable rep with iteration boundaries start=index=0 and | |
685 | * length=limit=rep->length(). | |
686 | * The "provider" may set the start, index, and limit values at any time | |
687 | * within the range 0..length=rep->length(). | |
688 | * The length field will be ignored. | |
689 | * | |
690 | * The Replaceable pointer rep is set into UCharIterator.context without copying | |
691 | * or cloning/reallocating the Replaceable object. | |
692 | * | |
693 | * getState() simply returns the current index. | |
694 | * move() will always return the final index. | |
695 | * | |
696 | * @param iter UCharIterator structure to be set for iteration | |
697 | * @param rep Replaceable to iterate over | |
698 | * | |
699 | * @see UCharIterator | |
700 | * @stable ICU 2.1 | |
701 | */ | |
374ca955 | 702 | U_STABLE void U_EXPORT2 |
4388f060 | 703 | uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep); |
b75a7d8f | 704 | |
f3c0d7a5 | 705 | #endif // U_SHOW_CPLUSPLUS_API |
b75a7d8f A |
706 | |
707 | U_CDECL_END | |
708 | ||
709 | #endif |