1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 2002-2012, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
10 * file name: uiter.cpp
12 * tab size: 8 (not used)
15 * created on: 2002jan18
16 * created by: Markus W. Scherer
19 #include "unicode/utypes.h"
20 #include "unicode/ustring.h"
21 #include "unicode/chariter.h"
22 #include "unicode/rep.h"
23 #include "unicode/uiter.h"
24 #include "unicode/utf.h"
25 #include "unicode/utf8.h"
26 #include "unicode/utf16.h"
31 #define IS_EVEN(n) (((n)&1)==0)
32 #define IS_POINTER_EVEN(p) IS_EVEN((size_t)p)
36 /* No-Op UCharIterator implementation for illegal input --------------------- */
38 static int32_t U_CALLCONV
39 noopGetIndex(UCharIterator
* /*iter*/, UCharIteratorOrigin
/*origin*/) {
43 static int32_t U_CALLCONV
44 noopMove(UCharIterator
* /*iter*/, int32_t /*delta*/, UCharIteratorOrigin
/*origin*/) {
48 static UBool U_CALLCONV
49 noopHasNext(UCharIterator
* /*iter*/) {
53 static UChar32 U_CALLCONV
54 noopCurrent(UCharIterator
* /*iter*/) {
58 static uint32_t U_CALLCONV
59 noopGetState(const UCharIterator
* /*iter*/) {
60 return UITER_NO_STATE
;
63 static void U_CALLCONV
64 noopSetState(UCharIterator
* /*iter*/, uint32_t /*state*/, UErrorCode
*pErrorCode
) {
65 *pErrorCode
=U_UNSUPPORTED_ERROR
;
68 static const UCharIterator noopIterator
={
82 /* UCharIterator implementation for simple strings -------------------------- */
85 * This is an implementation of a code unit (UChar) iterator
86 * for UChar * strings.
88 * The UCharIterator.context field holds a pointer to the string.
91 static int32_t U_CALLCONV
92 stringIteratorGetIndex(UCharIterator
*iter
, UCharIteratorOrigin origin
) {
105 /* not a valid origin */
106 /* Should never get here! */
111 static int32_t U_CALLCONV
112 stringIteratorMove(UCharIterator
*iter
, int32_t delta
, UCharIteratorOrigin origin
) {
120 pos
=iter
->start
+delta
;
123 pos
=iter
->index
+delta
;
126 pos
=iter
->limit
+delta
;
129 pos
=iter
->length
+delta
;
132 return -1; /* Error */
135 if(pos
<iter
->start
) {
137 } else if(pos
>iter
->limit
) {
141 return iter
->index
=pos
;
144 static UBool U_CALLCONV
145 stringIteratorHasNext(UCharIterator
*iter
) {
146 return iter
->index
<iter
->limit
;
149 static UBool U_CALLCONV
150 stringIteratorHasPrevious(UCharIterator
*iter
) {
151 return iter
->index
>iter
->start
;
154 static UChar32 U_CALLCONV
155 stringIteratorCurrent(UCharIterator
*iter
) {
156 if(iter
->index
<iter
->limit
) {
157 return ((const UChar
*)(iter
->context
))[iter
->index
];
163 static UChar32 U_CALLCONV
164 stringIteratorNext(UCharIterator
*iter
) {
165 if(iter
->index
<iter
->limit
) {
166 return ((const UChar
*)(iter
->context
))[iter
->index
++];
172 static UChar32 U_CALLCONV
173 stringIteratorPrevious(UCharIterator
*iter
) {
174 if(iter
->index
>iter
->start
) {
175 return ((const UChar
*)(iter
->context
))[--iter
->index
];
181 static uint32_t U_CALLCONV
182 stringIteratorGetState(const UCharIterator
*iter
) {
183 return (uint32_t)iter
->index
;
186 static void U_CALLCONV
187 stringIteratorSetState(UCharIterator
*iter
, uint32_t state
, UErrorCode
*pErrorCode
) {
188 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
190 } else if(iter
==NULL
) {
191 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
192 } else if((int32_t)state
<iter
->start
|| iter
->limit
<(int32_t)state
) {
193 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
195 iter
->index
=(int32_t)state
;
199 static const UCharIterator stringIterator
={
201 stringIteratorGetIndex
,
203 stringIteratorHasNext
,
204 stringIteratorHasPrevious
,
205 stringIteratorCurrent
,
207 stringIteratorPrevious
,
209 stringIteratorGetState
,
210 stringIteratorSetState
213 U_CAPI
void U_EXPORT2
214 uiter_setString(UCharIterator
*iter
, const UChar
*s
, int32_t length
) {
216 if(s
!=0 && length
>=-1) {
217 *iter
=stringIterator
;
222 iter
->length
=u_strlen(s
);
224 iter
->limit
=iter
->length
;
231 /* UCharIterator implementation for UTF-16BE strings ------------------------ */
234 * This is an implementation of a code unit (UChar) iterator
235 * for UTF-16BE strings, i.e., strings in byte-vectors where
236 * each UChar is stored as a big-endian pair of bytes.
238 * The UCharIterator.context field holds a pointer to the string.
239 * Everything works just like with a normal UChar iterator (uiter_setString),
240 * except that UChars are assembled from byte pairs.
243 /* internal helper function */
244 static inline UChar32
245 utf16BEIteratorGet(UCharIterator
*iter
, int32_t index
) {
246 const uint8_t *p
=(const uint8_t *)iter
->context
;
247 return ((UChar
)p
[2*index
]<<8)|(UChar
)p
[2*index
+1];
250 static UChar32 U_CALLCONV
251 utf16BEIteratorCurrent(UCharIterator
*iter
) {
254 if((index
=iter
->index
)<iter
->limit
) {
255 return utf16BEIteratorGet(iter
, index
);
261 static UChar32 U_CALLCONV
262 utf16BEIteratorNext(UCharIterator
*iter
) {
265 if((index
=iter
->index
)<iter
->limit
) {
267 return utf16BEIteratorGet(iter
, index
);
273 static UChar32 U_CALLCONV
274 utf16BEIteratorPrevious(UCharIterator
*iter
) {
277 if((index
=iter
->index
)>iter
->start
) {
279 return utf16BEIteratorGet(iter
, index
);
285 static const UCharIterator utf16BEIterator
={
287 stringIteratorGetIndex
,
289 stringIteratorHasNext
,
290 stringIteratorHasPrevious
,
291 utf16BEIteratorCurrent
,
293 utf16BEIteratorPrevious
,
295 stringIteratorGetState
,
296 stringIteratorSetState
300 * Count the number of UChars in a UTF-16BE string before a terminating UChar NUL,
301 * i.e., before a pair of 0 bytes where the first 0 byte is at an even
305 utf16BE_strlen(const char *s
) {
306 if(IS_POINTER_EVEN(s
)) {
308 * even-aligned, call u_strlen(s)
309 * we are probably on a little-endian machine, but searching for UChar NUL
310 * does not care about endianness
312 return u_strlen((const UChar
*)s
);
314 /* odd-aligned, search for pair of 0 bytes */
317 while(!(*p
==0 && p
[1]==0)) {
320 return (int32_t)((p
-s
)/2);
324 U_CAPI
void U_EXPORT2
325 uiter_setUTF16BE(UCharIterator
*iter
, const char *s
, int32_t length
) {
327 /* allow only even-length strings (the input length counts bytes) */
328 if(s
!=NULL
&& (length
==-1 || (length
>=0 && IS_EVEN(length
)))) {
329 /* length/=2, except that >>=1 also works for -1 (-1/2==0, -1>>1==-1) */
332 if(U_IS_BIG_ENDIAN
&& IS_POINTER_EVEN(s
)) {
333 /* big-endian machine and 2-aligned UTF-16BE string: use normal UChar iterator */
334 uiter_setString(iter
, (const UChar
*)s
, length
);
338 *iter
=utf16BEIterator
;
343 iter
->length
=utf16BE_strlen(s
);
345 iter
->limit
=iter
->length
;
352 /* UCharIterator wrapper around CharacterIterator --------------------------- */
355 * This is wrapper code around a C++ CharacterIterator to
356 * look like a C UCharIterator.
358 * The UCharIterator.context field holds a pointer to the CharacterIterator.
361 static int32_t U_CALLCONV
362 characterIteratorGetIndex(UCharIterator
*iter
, UCharIteratorOrigin origin
) {
367 return ((CharacterIterator
*)(iter
->context
))->startIndex();
369 return ((CharacterIterator
*)(iter
->context
))->getIndex();
371 return ((CharacterIterator
*)(iter
->context
))->endIndex();
373 return ((CharacterIterator
*)(iter
->context
))->getLength();
375 /* not a valid origin */
376 /* Should never get here! */
381 static int32_t U_CALLCONV
382 characterIteratorMove(UCharIterator
*iter
, int32_t delta
, UCharIteratorOrigin origin
) {
385 ((CharacterIterator
*)(iter
->context
))->setIndex(delta
);
386 return ((CharacterIterator
*)(iter
->context
))->getIndex();
390 return ((CharacterIterator
*)(iter
->context
))->move(delta
, (CharacterIterator::EOrigin
)origin
);
392 ((CharacterIterator
*)(iter
->context
))->setIndex(((CharacterIterator
*)(iter
->context
))->getLength()+delta
);
393 return ((CharacterIterator
*)(iter
->context
))->getIndex();
395 /* not a valid origin */
396 /* Should never get here! */
401 static UBool U_CALLCONV
402 characterIteratorHasNext(UCharIterator
*iter
) {
403 return ((CharacterIterator
*)(iter
->context
))->hasNext();
406 static UBool U_CALLCONV
407 characterIteratorHasPrevious(UCharIterator
*iter
) {
408 return ((CharacterIterator
*)(iter
->context
))->hasPrevious();
411 static UChar32 U_CALLCONV
412 characterIteratorCurrent(UCharIterator
*iter
) {
415 c
=((CharacterIterator
*)(iter
->context
))->current();
416 if(c
!=0xffff || ((CharacterIterator
*)(iter
->context
))->hasNext()) {
423 static UChar32 U_CALLCONV
424 characterIteratorNext(UCharIterator
*iter
) {
425 if(((CharacterIterator
*)(iter
->context
))->hasNext()) {
426 return ((CharacterIterator
*)(iter
->context
))->nextPostInc();
432 static UChar32 U_CALLCONV
433 characterIteratorPrevious(UCharIterator
*iter
) {
434 if(((CharacterIterator
*)(iter
->context
))->hasPrevious()) {
435 return ((CharacterIterator
*)(iter
->context
))->previous();
441 static uint32_t U_CALLCONV
442 characterIteratorGetState(const UCharIterator
*iter
) {
443 return ((CharacterIterator
*)(iter
->context
))->getIndex();
446 static void U_CALLCONV
447 characterIteratorSetState(UCharIterator
*iter
, uint32_t state
, UErrorCode
*pErrorCode
) {
448 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
450 } else if(iter
==NULL
|| iter
->context
==NULL
) {
451 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
452 } else if((int32_t)state
<((CharacterIterator
*)(iter
->context
))->startIndex() || ((CharacterIterator
*)(iter
->context
))->endIndex()<(int32_t)state
) {
453 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
455 ((CharacterIterator
*)(iter
->context
))->setIndex((int32_t)state
);
459 static const UCharIterator characterIteratorWrapper
={
461 characterIteratorGetIndex
,
462 characterIteratorMove
,
463 characterIteratorHasNext
,
464 characterIteratorHasPrevious
,
465 characterIteratorCurrent
,
466 characterIteratorNext
,
467 characterIteratorPrevious
,
469 characterIteratorGetState
,
470 characterIteratorSetState
473 U_CAPI
void U_EXPORT2
474 uiter_setCharacterIterator(UCharIterator
*iter
, CharacterIterator
*charIter
) {
477 *iter
=characterIteratorWrapper
;
478 iter
->context
=charIter
;
485 /* UCharIterator wrapper around Replaceable --------------------------------- */
488 * This is an implementation of a code unit (UChar) iterator
489 * based on a Replaceable object.
491 * The UCharIterator.context field holds a pointer to the Replaceable.
492 * UCharIterator.length and UCharIterator.index hold Replaceable.length()
493 * and the iteration index.
496 static UChar32 U_CALLCONV
497 replaceableIteratorCurrent(UCharIterator
*iter
) {
498 if(iter
->index
<iter
->limit
) {
499 return ((Replaceable
*)(iter
->context
))->charAt(iter
->index
);
505 static UChar32 U_CALLCONV
506 replaceableIteratorNext(UCharIterator
*iter
) {
507 if(iter
->index
<iter
->limit
) {
508 return ((Replaceable
*)(iter
->context
))->charAt(iter
->index
++);
514 static UChar32 U_CALLCONV
515 replaceableIteratorPrevious(UCharIterator
*iter
) {
516 if(iter
->index
>iter
->start
) {
517 return ((Replaceable
*)(iter
->context
))->charAt(--iter
->index
);
523 static const UCharIterator replaceableIterator
={
525 stringIteratorGetIndex
,
527 stringIteratorHasNext
,
528 stringIteratorHasPrevious
,
529 replaceableIteratorCurrent
,
530 replaceableIteratorNext
,
531 replaceableIteratorPrevious
,
533 stringIteratorGetState
,
534 stringIteratorSetState
537 U_CAPI
void U_EXPORT2
538 uiter_setReplaceable(UCharIterator
*iter
, const Replaceable
*rep
) {
541 *iter
=replaceableIterator
;
543 iter
->limit
=iter
->length
=rep
->length();
550 /* UCharIterator implementation for UTF-8 strings --------------------------- */
553 * Possible, probably necessary only for an implementation for arbitrary
555 * Maintain a buffer (ring buffer?) for a piece of converted 16-bit text.
556 * This would require to turn reservedFn into a close function and
557 * to introduce a uiter_close(iter).
560 #define UITER_CNV_CAPACITY 16
563 * Minimal implementation:
564 * Maintain a single-UChar buffer for an additional surrogate.
565 * The caller must not modify start and limit because they are used internally.
567 * Use UCharIterator fields as follows:
568 * context pointer to UTF-8 string
569 * length UTF-16 length of the string; -1 until lazy evaluation
570 * start current UTF-8 index
571 * index current UTF-16 index; may be -1="unknown" after setState()
572 * limit UTF-8 length of the string
573 * reservedField supplementary code point
575 * Since UCharIterator delivers 16-bit code units, the iteration can be
576 * currently in the middle of the byte sequence for a supplementary code point.
577 * In this case, reservedField will contain that code point and start will
578 * point to after the corresponding byte sequence. The UTF-16 index will be
579 * one less than what it would otherwise be corresponding to the UTF-8 index.
580 * Otherwise, reservedField will be 0.
584 * Possible optimization for NUL-terminated UTF-8 and UTF-16 strings:
585 * Add implementations that do not call strlen() for iteration but check for NUL.
588 static int32_t U_CALLCONV
589 utf8IteratorGetIndex(UCharIterator
*iter
, UCharIteratorOrigin origin
) {
596 /* the current UTF-16 index is unknown after setState(), count from the beginning */
599 int32_t i
, limit
, index
;
601 s
=(const uint8_t *)iter
->context
;
603 limit
=iter
->start
; /* count up to the UTF-8 index */
605 U8_NEXT_OR_FFFD(s
, i
, limit
, c
);
606 index
+=U16_LENGTH(c
);
609 iter
->start
=i
; /* just in case setState() did not get us to a code point boundary */
611 iter
->length
=index
; /* in case it was <0 or wrong */
613 if(iter
->reservedField
!=0) {
614 --index
; /* we are in the middle of a supplementary code point */
624 int32_t i
, limit
, length
;
626 s
=(const uint8_t *)iter
->context
;
629 * the current UTF-16 index is unknown after setState(),
630 * we must first count from the beginning to here
635 /* count from the beginning to the current index */
637 U8_NEXT_OR_FFFD(s
, i
, limit
, c
);
638 length
+=U16_LENGTH(c
);
641 /* assume i==limit==iter->start, set the UTF-16 index */
642 iter
->start
=i
; /* just in case setState() did not get us to a code point boundary */
643 iter
->index
= iter
->reservedField
!=0 ? length
-1 : length
;
647 if(iter
->reservedField
!=0) {
652 /* count from the current index to the end */
655 U8_NEXT_OR_FFFD(s
, i
, limit
, c
);
656 length
+=U16_LENGTH(c
);
662 /* not a valid origin */
663 /* Should never get here! */
668 static int32_t U_CALLCONV
669 utf8IteratorMove(UCharIterator
*iter
, int32_t delta
, UCharIteratorOrigin origin
) {
672 int32_t pos
; /* requested UTF-16 index */
673 int32_t i
; /* UTF-8 index */
676 /* calculate the requested UTF-16 index */
682 /* iter->index<0 (unknown) is possible */
686 pos
=iter
->index
+delta
;
689 /* the current UTF-16 index is unknown after setState(), use only delta */
696 if(iter
->length
>=0) {
697 pos
=iter
->length
+delta
;
700 /* pin to the end, avoid counting the length */
702 iter
->start
=iter
->limit
;
703 iter
->reservedField
=0;
705 return UITER_UNKNOWN_INDEX
;
707 /* the current UTF-16 index is unknown, use only delta */
714 return -1; /* Error */
718 /* shortcuts: pinning to the edges of the string */
720 iter
->index
=iter
->start
=iter
->reservedField
=0;
722 } else if(iter
->length
>=0 && pos
>=iter
->length
) {
723 iter
->index
=iter
->length
;
724 iter
->start
=iter
->limit
;
725 iter
->reservedField
=0;
729 /* minimize the number of U8_NEXT/PREV operations */
730 if(iter
->index
<0 || pos
<iter
->index
/2) {
731 /* go forward from the start instead of backward from the current index */
732 iter
->index
=iter
->start
=iter
->reservedField
=0;
733 } else if(iter
->length
>=0 && (iter
->length
-pos
)<(pos
-iter
->index
)) {
735 * if we have the UTF-16 index and length and the new position is
736 * closer to the end than the current index,
737 * then go backward from the end instead of forward from the current index
739 iter
->index
=iter
->length
;
740 iter
->start
=iter
->limit
;
741 iter
->reservedField
=0;
744 delta
=pos
-iter
->index
;
746 return iter
->index
; /* nothing to do */
749 /* move relative to unknown UTF-16 index */
751 return UITER_UNKNOWN_INDEX
; /* nothing to do */
752 } else if(-delta
>=iter
->start
) {
753 /* moving backwards by more UChars than there are UTF-8 bytes, pin to 0 */
754 iter
->index
=iter
->start
=iter
->reservedField
=0;
756 } else if(delta
>=(iter
->limit
-iter
->start
)) {
757 /* moving forward by more UChars than the remaining UTF-8 bytes, pin to the end */
758 iter
->index
=iter
->length
; /* may or may not be <0 (unknown) */
759 iter
->start
=iter
->limit
;
760 iter
->reservedField
=0;
761 return iter
->index
>=0 ? iter
->index
: (int32_t)UITER_UNKNOWN_INDEX
;
767 /* move towards the requested position, pin to the edges of the string */
768 s
=(const uint8_t *)iter
->context
;
769 pos
=iter
->index
; /* could be <0 (unknown) */
773 int32_t limit
=iter
->limit
;
774 if(iter
->reservedField
!=0) {
775 iter
->reservedField
=0;
779 while(delta
>0 && i
<limit
) {
780 U8_NEXT_OR_FFFD(s
, i
, limit
, c
);
784 } else if(delta
>=2) {
787 } else /* delta==1 */ {
788 /* stop in the middle of a supplementary code point */
789 iter
->reservedField
=c
;
791 break; /* delta=0; */
795 if(iter
->length
<0 && iter
->index
>=0) {
796 iter
->length
= iter
->reservedField
==0 ? pos
: pos
+1;
797 } else if(iter
->index
<0 && iter
->length
>=0) {
798 iter
->index
= iter
->reservedField
==0 ? iter
->length
: iter
->length
-1;
801 } else /* delta<0 */ {
803 if(iter
->reservedField
!=0) {
804 iter
->reservedField
=0;
805 i
-=4; /* we stayed behind the supplementary code point; go before it now */
809 while(delta
<0 && i
>0) {
810 U8_PREV_OR_FFFD(s
, 0, i
, c
);
814 } else if(delta
<=-2) {
817 } else /* delta==-1 */ {
818 /* stop in the middle of a supplementary code point */
819 i
+=4; /* back to behind this supplementary code point for consistent state */
820 iter
->reservedField
=c
;
822 break; /* delta=0; */
829 return iter
->index
=pos
;
831 /* we started with index<0 (unknown) so pos is bogus */
833 return iter
->index
=i
; /* reached the beginning */
835 /* we still don't know the UTF-16 index */
836 return UITER_UNKNOWN_INDEX
;
841 static UBool U_CALLCONV
842 utf8IteratorHasNext(UCharIterator
*iter
) {
843 return iter
->start
<iter
->limit
|| iter
->reservedField
!=0;
846 static UBool U_CALLCONV
847 utf8IteratorHasPrevious(UCharIterator
*iter
) {
848 return iter
->start
>0;
851 static UChar32 U_CALLCONV
852 utf8IteratorCurrent(UCharIterator
*iter
) {
853 if(iter
->reservedField
!=0) {
854 return U16_TRAIL(iter
->reservedField
);
855 } else if(iter
->start
<iter
->limit
) {
856 const uint8_t *s
=(const uint8_t *)iter
->context
;
858 int32_t i
=iter
->start
;
860 U8_NEXT_OR_FFFD(s
, i
, iter
->limit
, c
);
871 static UChar32 U_CALLCONV
872 utf8IteratorNext(UCharIterator
*iter
) {
875 if(iter
->reservedField
!=0) {
876 UChar trail
=U16_TRAIL(iter
->reservedField
);
877 iter
->reservedField
=0;
878 if((index
=iter
->index
)>=0) {
882 } else if(iter
->start
<iter
->limit
) {
883 const uint8_t *s
=(const uint8_t *)iter
->context
;
886 U8_NEXT_OR_FFFD(s
, iter
->start
, iter
->limit
, c
);
887 if((index
=iter
->index
)>=0) {
889 if(iter
->length
<0 && iter
->start
==iter
->limit
) {
890 iter
->length
= c
<=0xffff ? index
: index
+1;
892 } else if(iter
->start
==iter
->limit
&& iter
->length
>=0) {
893 iter
->index
= c
<=0xffff ? iter
->length
: iter
->length
-1;
898 iter
->reservedField
=c
;
906 static UChar32 U_CALLCONV
907 utf8IteratorPrevious(UCharIterator
*iter
) {
910 if(iter
->reservedField
!=0) {
911 UChar lead
=U16_LEAD(iter
->reservedField
);
912 iter
->reservedField
=0;
913 iter
->start
-=4; /* we stayed behind the supplementary code point; go before it now */
914 if((index
=iter
->index
)>0) {
918 } else if(iter
->start
>0) {
919 const uint8_t *s
=(const uint8_t *)iter
->context
;
922 U8_PREV_OR_FFFD(s
, 0, iter
->start
, c
);
923 if((index
=iter
->index
)>0) {
925 } else if(iter
->start
<=1) {
926 iter
->index
= c
<=0xffff ? iter
->start
: iter
->start
+1;
931 iter
->start
+=4; /* back to behind this supplementary code point for consistent state */
932 iter
->reservedField
=c
;
940 static uint32_t U_CALLCONV
941 utf8IteratorGetState(const UCharIterator
*iter
) {
942 uint32_t state
=(uint32_t)(iter
->start
<<1);
943 if(iter
->reservedField
!=0) {
949 static void U_CALLCONV
950 utf8IteratorSetState(UCharIterator
*iter
,
952 UErrorCode
*pErrorCode
)
954 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
956 } else if(iter
==NULL
) {
957 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
958 } else if(state
==utf8IteratorGetState(iter
)) {
959 /* setting to the current state: no-op */
961 int32_t index
=(int32_t)(state
>>1); /* UTF-8 index */
962 state
&=1; /* 1 if in surrogate pair, must be index>=4 */
964 if((state
==0 ? index
<0 : index
<4) || iter
->limit
<index
) {
965 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
967 iter
->start
=index
; /* restore UTF-8 byte index */
971 iter
->index
=-1; /* unknown UTF-16 index */
974 iter
->reservedField
=0;
976 /* verified index>=4 above */
978 U8_PREV_OR_FFFD((const uint8_t *)iter
->context
, 0, index
, c
);
980 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
982 iter
->reservedField
=c
;
989 static const UCharIterator utf8Iterator
={
991 utf8IteratorGetIndex
,
994 utf8IteratorHasPrevious
,
997 utf8IteratorPrevious
,
999 utf8IteratorGetState
,
1000 utf8IteratorSetState
1003 U_CAPI
void U_EXPORT2
1004 uiter_setUTF8(UCharIterator
*iter
, const char *s
, int32_t length
) {
1006 if(s
!=0 && length
>=-1) {
1012 iter
->limit
=(int32_t)uprv_strlen(s
);
1014 iter
->length
= iter
->limit
<=1 ? iter
->limit
: -1;
1021 /* Helper functions --------------------------------------------------------- */
1023 U_CAPI UChar32 U_EXPORT2
1024 uiter_current32(UCharIterator
*iter
) {
1027 c
=iter
->current(iter
);
1028 if(U16_IS_SURROGATE(c
)) {
1029 if(U16_IS_SURROGATE_LEAD(c
)) {
1031 * go to the next code unit
1032 * we know that we are not at the limit because c!=U_SENTINEL
1034 iter
->move(iter
, 1, UITER_CURRENT
);
1035 if(U16_IS_TRAIL(c2
=iter
->current(iter
))) {
1036 c
=U16_GET_SUPPLEMENTARY(c
, c2
);
1039 /* undo index movement */
1040 iter
->move(iter
, -1, UITER_CURRENT
);
1042 if(U16_IS_LEAD(c2
=iter
->previous(iter
))) {
1043 c
=U16_GET_SUPPLEMENTARY(c2
, c
);
1046 /* undo index movement */
1047 iter
->move(iter
, 1, UITER_CURRENT
);
1054 U_CAPI UChar32 U_EXPORT2
1055 uiter_next32(UCharIterator
*iter
) {
1059 if(U16_IS_LEAD(c
)) {
1060 if(U16_IS_TRAIL(c2
=iter
->next(iter
))) {
1061 c
=U16_GET_SUPPLEMENTARY(c
, c2
);
1063 /* unmatched first surrogate, undo index movement */
1064 iter
->move(iter
, -1, UITER_CURRENT
);
1070 U_CAPI UChar32 U_EXPORT2
1071 uiter_previous32(UCharIterator
*iter
) {
1074 c
=iter
->previous(iter
);
1075 if(U16_IS_TRAIL(c
)) {
1076 if(U16_IS_LEAD(c2
=iter
->previous(iter
))) {
1077 c
=U16_GET_SUPPLEMENTARY(c2
, c
);
1079 /* unmatched second surrogate, undo index movement */
1080 iter
->move(iter
, 1, UITER_CURRENT
);
1086 U_CAPI
uint32_t U_EXPORT2
1087 uiter_getState(const UCharIterator
*iter
) {
1088 if(iter
==NULL
|| iter
->getState
==NULL
) {
1089 return UITER_NO_STATE
;
1091 return iter
->getState(iter
);
1095 U_CAPI
void U_EXPORT2
1096 uiter_setState(UCharIterator
*iter
, uint32_t state
, UErrorCode
*pErrorCode
) {
1097 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
1099 } else if(iter
==NULL
) {
1100 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1101 } else if(iter
->setState
==NULL
) {
1102 *pErrorCode
=U_UNSUPPORTED_ERROR
;
1104 iter
->setState(iter
, state
, pErrorCode
);