2 *******************************************************************************
4 * Copyright (C) 2002-2011, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
10 * tab size: 8 (not used)
13 * created on: 2002jan18
14 * created by: Markus W. Scherer
17 #include "unicode/utypes.h"
18 #include "unicode/ustring.h"
19 #include "unicode/chariter.h"
20 #include "unicode/rep.h"
21 #include "unicode/uiter.h"
22 #include "unicode/utf.h"
23 #include "unicode/utf8.h"
24 #include "unicode/utf16.h"
29 #define IS_EVEN(n) (((n)&1)==0)
30 #define IS_POINTER_EVEN(p) IS_EVEN((size_t)p)
34 /* No-Op UCharIterator implementation for illegal input --------------------- */
36 static int32_t U_CALLCONV
37 noopGetIndex(UCharIterator
* /*iter*/, UCharIteratorOrigin
/*origin*/) {
41 static int32_t U_CALLCONV
42 noopMove(UCharIterator
* /*iter*/, int32_t /*delta*/, UCharIteratorOrigin
/*origin*/) {
46 static UBool U_CALLCONV
47 noopHasNext(UCharIterator
* /*iter*/) {
51 static UChar32 U_CALLCONV
52 noopCurrent(UCharIterator
* /*iter*/) {
56 static uint32_t U_CALLCONV
57 noopGetState(const UCharIterator
* /*iter*/) {
58 return UITER_NO_STATE
;
61 static void U_CALLCONV
62 noopSetState(UCharIterator
* /*iter*/, uint32_t /*state*/, UErrorCode
*pErrorCode
) {
63 *pErrorCode
=U_UNSUPPORTED_ERROR
;
66 static const UCharIterator noopIterator
={
80 /* UCharIterator implementation for simple strings -------------------------- */
83 * This is an implementation of a code unit (UChar) iterator
84 * for UChar * strings.
86 * The UCharIterator.context field holds a pointer to the string.
89 static int32_t U_CALLCONV
90 stringIteratorGetIndex(UCharIterator
*iter
, UCharIteratorOrigin origin
) {
103 /* not a valid origin */
104 /* Should never get here! */
109 static int32_t U_CALLCONV
110 stringIteratorMove(UCharIterator
*iter
, int32_t delta
, UCharIteratorOrigin origin
) {
118 pos
=iter
->start
+delta
;
121 pos
=iter
->index
+delta
;
124 pos
=iter
->limit
+delta
;
127 pos
=iter
->length
+delta
;
130 return -1; /* Error */
133 if(pos
<iter
->start
) {
135 } else if(pos
>iter
->limit
) {
139 return iter
->index
=pos
;
142 static UBool U_CALLCONV
143 stringIteratorHasNext(UCharIterator
*iter
) {
144 return iter
->index
<iter
->limit
;
147 static UBool U_CALLCONV
148 stringIteratorHasPrevious(UCharIterator
*iter
) {
149 return iter
->index
>iter
->start
;
152 static UChar32 U_CALLCONV
153 stringIteratorCurrent(UCharIterator
*iter
) {
154 if(iter
->index
<iter
->limit
) {
155 return ((const UChar
*)(iter
->context
))[iter
->index
];
161 static UChar32 U_CALLCONV
162 stringIteratorNext(UCharIterator
*iter
) {
163 if(iter
->index
<iter
->limit
) {
164 return ((const UChar
*)(iter
->context
))[iter
->index
++];
170 static UChar32 U_CALLCONV
171 stringIteratorPrevious(UCharIterator
*iter
) {
172 if(iter
->index
>iter
->start
) {
173 return ((const UChar
*)(iter
->context
))[--iter
->index
];
179 static uint32_t U_CALLCONV
180 stringIteratorGetState(const UCharIterator
*iter
) {
181 return (uint32_t)iter
->index
;
184 static void U_CALLCONV
185 stringIteratorSetState(UCharIterator
*iter
, uint32_t state
, UErrorCode
*pErrorCode
) {
186 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
188 } else if(iter
==NULL
) {
189 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
190 } else if((int32_t)state
<iter
->start
|| iter
->limit
<(int32_t)state
) {
191 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
193 iter
->index
=(int32_t)state
;
197 static const UCharIterator stringIterator
={
199 stringIteratorGetIndex
,
201 stringIteratorHasNext
,
202 stringIteratorHasPrevious
,
203 stringIteratorCurrent
,
205 stringIteratorPrevious
,
207 stringIteratorGetState
,
208 stringIteratorSetState
211 U_CAPI
void U_EXPORT2
212 uiter_setString(UCharIterator
*iter
, const UChar
*s
, int32_t length
) {
214 if(s
!=0 && length
>=-1) {
215 *iter
=stringIterator
;
220 iter
->length
=u_strlen(s
);
222 iter
->limit
=iter
->length
;
229 /* UCharIterator implementation for UTF-16BE strings ------------------------ */
232 * This is an implementation of a code unit (UChar) iterator
233 * for UTF-16BE strings, i.e., strings in byte-vectors where
234 * each UChar is stored as a big-endian pair of bytes.
236 * The UCharIterator.context field holds a pointer to the string.
237 * Everything works just like with a normal UChar iterator (uiter_setString),
238 * except that UChars are assembled from byte pairs.
241 /* internal helper function */
242 static inline UChar32
243 utf16BEIteratorGet(UCharIterator
*iter
, int32_t index
) {
244 const uint8_t *p
=(const uint8_t *)iter
->context
;
245 return ((UChar
)p
[2*index
]<<8)|(UChar
)p
[2*index
+1];
248 static UChar32 U_CALLCONV
249 utf16BEIteratorCurrent(UCharIterator
*iter
) {
252 if((index
=iter
->index
)<iter
->limit
) {
253 return utf16BEIteratorGet(iter
, index
);
259 static UChar32 U_CALLCONV
260 utf16BEIteratorNext(UCharIterator
*iter
) {
263 if((index
=iter
->index
)<iter
->limit
) {
265 return utf16BEIteratorGet(iter
, index
);
271 static UChar32 U_CALLCONV
272 utf16BEIteratorPrevious(UCharIterator
*iter
) {
275 if((index
=iter
->index
)>iter
->start
) {
277 return utf16BEIteratorGet(iter
, index
);
283 static const UCharIterator utf16BEIterator
={
285 stringIteratorGetIndex
,
287 stringIteratorHasNext
,
288 stringIteratorHasPrevious
,
289 utf16BEIteratorCurrent
,
291 utf16BEIteratorPrevious
,
293 stringIteratorGetState
,
294 stringIteratorSetState
298 * Count the number of UChars in a UTF-16BE string before a terminating UChar NUL,
299 * i.e., before a pair of 0 bytes where the first 0 byte is at an even
303 utf16BE_strlen(const char *s
) {
304 if(IS_POINTER_EVEN(s
)) {
306 * even-aligned, call u_strlen(s)
307 * we are probably on a little-endian machine, but searching for UChar NUL
308 * does not care about endianness
310 return u_strlen((const UChar
*)s
);
312 /* odd-aligned, search for pair of 0 bytes */
315 while(!(*p
==0 && p
[1]==0)) {
318 return (int32_t)((p
-s
)/2);
322 U_CAPI
void U_EXPORT2
323 uiter_setUTF16BE(UCharIterator
*iter
, const char *s
, int32_t length
) {
325 /* allow only even-length strings (the input length counts bytes) */
326 if(s
!=NULL
&& (length
==-1 || (length
>=0 && IS_EVEN(length
)))) {
327 /* length/=2, except that >>=1 also works for -1 (-1/2==0, -1>>1==-1) */
330 if(U_IS_BIG_ENDIAN
&& IS_POINTER_EVEN(s
)) {
331 /* big-endian machine and 2-aligned UTF-16BE string: use normal UChar iterator */
332 uiter_setString(iter
, (const UChar
*)s
, length
);
336 *iter
=utf16BEIterator
;
341 iter
->length
=utf16BE_strlen(s
);
343 iter
->limit
=iter
->length
;
350 /* UCharIterator wrapper around CharacterIterator --------------------------- */
353 * This is wrapper code around a C++ CharacterIterator to
354 * look like a C UCharIterator.
356 * The UCharIterator.context field holds a pointer to the CharacterIterator.
359 static int32_t U_CALLCONV
360 characterIteratorGetIndex(UCharIterator
*iter
, UCharIteratorOrigin origin
) {
365 return ((CharacterIterator
*)(iter
->context
))->startIndex();
367 return ((CharacterIterator
*)(iter
->context
))->getIndex();
369 return ((CharacterIterator
*)(iter
->context
))->endIndex();
371 return ((CharacterIterator
*)(iter
->context
))->getLength();
373 /* not a valid origin */
374 /* Should never get here! */
379 static int32_t U_CALLCONV
380 characterIteratorMove(UCharIterator
*iter
, int32_t delta
, UCharIteratorOrigin origin
) {
383 ((CharacterIterator
*)(iter
->context
))->setIndex(delta
);
384 return ((CharacterIterator
*)(iter
->context
))->getIndex();
388 return ((CharacterIterator
*)(iter
->context
))->move(delta
, (CharacterIterator::EOrigin
)origin
);
390 ((CharacterIterator
*)(iter
->context
))->setIndex(((CharacterIterator
*)(iter
->context
))->getLength()+delta
);
391 return ((CharacterIterator
*)(iter
->context
))->getIndex();
393 /* not a valid origin */
394 /* Should never get here! */
399 static UBool U_CALLCONV
400 characterIteratorHasNext(UCharIterator
*iter
) {
401 return ((CharacterIterator
*)(iter
->context
))->hasNext();
404 static UBool U_CALLCONV
405 characterIteratorHasPrevious(UCharIterator
*iter
) {
406 return ((CharacterIterator
*)(iter
->context
))->hasPrevious();
409 static UChar32 U_CALLCONV
410 characterIteratorCurrent(UCharIterator
*iter
) {
413 c
=((CharacterIterator
*)(iter
->context
))->current();
414 if(c
!=0xffff || ((CharacterIterator
*)(iter
->context
))->hasNext()) {
421 static UChar32 U_CALLCONV
422 characterIteratorNext(UCharIterator
*iter
) {
423 if(((CharacterIterator
*)(iter
->context
))->hasNext()) {
424 return ((CharacterIterator
*)(iter
->context
))->nextPostInc();
430 static UChar32 U_CALLCONV
431 characterIteratorPrevious(UCharIterator
*iter
) {
432 if(((CharacterIterator
*)(iter
->context
))->hasPrevious()) {
433 return ((CharacterIterator
*)(iter
->context
))->previous();
439 static uint32_t U_CALLCONV
440 characterIteratorGetState(const UCharIterator
*iter
) {
441 return ((CharacterIterator
*)(iter
->context
))->getIndex();
444 static void U_CALLCONV
445 characterIteratorSetState(UCharIterator
*iter
, uint32_t state
, UErrorCode
*pErrorCode
) {
446 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
448 } else if(iter
==NULL
|| iter
->context
==NULL
) {
449 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
450 } else if((int32_t)state
<((CharacterIterator
*)(iter
->context
))->startIndex() || ((CharacterIterator
*)(iter
->context
))->endIndex()<(int32_t)state
) {
451 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
453 ((CharacterIterator
*)(iter
->context
))->setIndex((int32_t)state
);
457 static const UCharIterator characterIteratorWrapper
={
459 characterIteratorGetIndex
,
460 characterIteratorMove
,
461 characterIteratorHasNext
,
462 characterIteratorHasPrevious
,
463 characterIteratorCurrent
,
464 characterIteratorNext
,
465 characterIteratorPrevious
,
467 characterIteratorGetState
,
468 characterIteratorSetState
471 U_CAPI
void U_EXPORT2
472 uiter_setCharacterIterator(UCharIterator
*iter
, CharacterIterator
*charIter
) {
475 *iter
=characterIteratorWrapper
;
476 iter
->context
=charIter
;
483 /* UCharIterator wrapper around Replaceable --------------------------------- */
486 * This is an implementation of a code unit (UChar) iterator
487 * based on a Replaceable object.
489 * The UCharIterator.context field holds a pointer to the Replaceable.
490 * UCharIterator.length and UCharIterator.index hold Replaceable.length()
491 * and the iteration index.
494 static UChar32 U_CALLCONV
495 replaceableIteratorCurrent(UCharIterator
*iter
) {
496 if(iter
->index
<iter
->limit
) {
497 return ((Replaceable
*)(iter
->context
))->charAt(iter
->index
);
503 static UChar32 U_CALLCONV
504 replaceableIteratorNext(UCharIterator
*iter
) {
505 if(iter
->index
<iter
->limit
) {
506 return ((Replaceable
*)(iter
->context
))->charAt(iter
->index
++);
512 static UChar32 U_CALLCONV
513 replaceableIteratorPrevious(UCharIterator
*iter
) {
514 if(iter
->index
>iter
->start
) {
515 return ((Replaceable
*)(iter
->context
))->charAt(--iter
->index
);
521 static const UCharIterator replaceableIterator
={
523 stringIteratorGetIndex
,
525 stringIteratorHasNext
,
526 stringIteratorHasPrevious
,
527 replaceableIteratorCurrent
,
528 replaceableIteratorNext
,
529 replaceableIteratorPrevious
,
531 stringIteratorGetState
,
532 stringIteratorSetState
535 U_CAPI
void U_EXPORT2
536 uiter_setReplaceable(UCharIterator
*iter
, const Replaceable
*rep
) {
539 *iter
=replaceableIterator
;
541 iter
->limit
=iter
->length
=rep
->length();
548 /* UCharIterator implementation for UTF-8 strings --------------------------- */
551 * Possible, probably necessary only for an implementation for arbitrary
553 * Maintain a buffer (ring buffer?) for a piece of converted 16-bit text.
554 * This would require to turn reservedFn into a close function and
555 * to introduce a uiter_close(iter).
558 #define UITER_CNV_CAPACITY 16
561 * Minimal implementation:
562 * Maintain a single-UChar buffer for an additional surrogate.
563 * The caller must not modify start and limit because they are used internally.
565 * Use UCharIterator fields as follows:
566 * context pointer to UTF-8 string
567 * length UTF-16 length of the string; -1 until lazy evaluation
568 * start current UTF-8 index
569 * index current UTF-16 index; may be -1="unknown" after setState()
570 * limit UTF-8 length of the string
571 * reservedField supplementary code point
573 * Since UCharIterator delivers 16-bit code units, the iteration can be
574 * currently in the middle of the byte sequence for a supplementary code point.
575 * In this case, reservedField will contain that code point and start will
576 * point to after the corresponding byte sequence. The UTF-16 index will be
577 * one less than what it would otherwise be corresponding to the UTF-8 index.
578 * Otherwise, reservedField will be 0.
582 * Possible optimization for NUL-terminated UTF-8 and UTF-16 strings:
583 * Add implementations that do not call strlen() for iteration but check for NUL.
586 static int32_t U_CALLCONV
587 utf8IteratorGetIndex(UCharIterator
*iter
, UCharIteratorOrigin origin
) {
594 /* the current UTF-16 index is unknown after setState(), count from the beginning */
597 int32_t i
, limit
, index
;
599 s
=(const uint8_t *)iter
->context
;
601 limit
=iter
->start
; /* count up to the UTF-8 index */
603 U8_NEXT(s
, i
, limit
, c
);
611 iter
->start
=i
; /* just in case setState() did not get us to a code point boundary */
613 iter
->length
=index
; /* in case it was <0 or wrong */
615 if(iter
->reservedField
!=0) {
616 --index
; /* we are in the middle of a supplementary code point */
626 int32_t i
, limit
, length
;
628 s
=(const uint8_t *)iter
->context
;
631 * the current UTF-16 index is unknown after setState(),
632 * we must first count from the beginning to here
637 /* count from the beginning to the current index */
639 U8_NEXT(s
, i
, limit
, c
);
647 /* assume i==limit==iter->start, set the UTF-16 index */
648 iter
->start
=i
; /* just in case setState() did not get us to a code point boundary */
649 iter
->index
= iter
->reservedField
!=0 ? length
-1 : length
;
653 if(iter
->reservedField
!=0) {
658 /* count from the current index to the end */
661 U8_NEXT(s
, i
, limit
, c
);
672 /* not a valid origin */
673 /* Should never get here! */
678 static int32_t U_CALLCONV
679 utf8IteratorMove(UCharIterator
*iter
, int32_t delta
, UCharIteratorOrigin origin
) {
682 int32_t pos
; /* requested UTF-16 index */
683 int32_t i
; /* UTF-8 index */
686 /* calculate the requested UTF-16 index */
692 /* iter->index<0 (unknown) is possible */
696 pos
=iter
->index
+delta
;
699 /* the current UTF-16 index is unknown after setState(), use only delta */
706 if(iter
->length
>=0) {
707 pos
=iter
->length
+delta
;
710 /* pin to the end, avoid counting the length */
712 iter
->start
=iter
->limit
;
713 iter
->reservedField
=0;
715 return UITER_UNKNOWN_INDEX
;
717 /* the current UTF-16 index is unknown, use only delta */
724 return -1; /* Error */
728 /* shortcuts: pinning to the edges of the string */
730 iter
->index
=iter
->start
=iter
->reservedField
=0;
732 } else if(iter
->length
>=0 && pos
>=iter
->length
) {
733 iter
->index
=iter
->length
;
734 iter
->start
=iter
->limit
;
735 iter
->reservedField
=0;
739 /* minimize the number of U8_NEXT/PREV operations */
740 if(iter
->index
<0 || pos
<iter
->index
/2) {
741 /* go forward from the start instead of backward from the current index */
742 iter
->index
=iter
->start
=iter
->reservedField
=0;
743 } else if(iter
->length
>=0 && (iter
->length
-pos
)<(pos
-iter
->index
)) {
745 * if we have the UTF-16 index and length and the new position is
746 * closer to the end than the current index,
747 * then go backward from the end instead of forward from the current index
749 iter
->index
=iter
->length
;
750 iter
->start
=iter
->limit
;
751 iter
->reservedField
=0;
754 delta
=pos
-iter
->index
;
756 return iter
->index
; /* nothing to do */
759 /* move relative to unknown UTF-16 index */
761 return UITER_UNKNOWN_INDEX
; /* nothing to do */
762 } else if(-delta
>=iter
->start
) {
763 /* moving backwards by more UChars than there are UTF-8 bytes, pin to 0 */
764 iter
->index
=iter
->start
=iter
->reservedField
=0;
766 } else if(delta
>=(iter
->limit
-iter
->start
)) {
767 /* moving forward by more UChars than the remaining UTF-8 bytes, pin to the end */
768 iter
->index
=iter
->length
; /* may or may not be <0 (unknown) */
769 iter
->start
=iter
->limit
;
770 iter
->reservedField
=0;
771 return iter
->index
>=0 ? iter
->index
: (int32_t)UITER_UNKNOWN_INDEX
;
777 /* move towards the requested position, pin to the edges of the string */
778 s
=(const uint8_t *)iter
->context
;
779 pos
=iter
->index
; /* could be <0 (unknown) */
783 int32_t limit
=iter
->limit
;
784 if(iter
->reservedField
!=0) {
785 iter
->reservedField
=0;
789 while(delta
>0 && i
<limit
) {
790 U8_NEXT(s
, i
, limit
, c
);
794 } else if(delta
>=2) {
797 } else /* delta==1 */ {
798 /* stop in the middle of a supplementary code point */
799 iter
->reservedField
=c
;
801 break; /* delta=0; */
805 if(iter
->length
<0 && iter
->index
>=0) {
806 iter
->length
= iter
->reservedField
==0 ? pos
: pos
+1;
807 } else if(iter
->index
<0 && iter
->length
>=0) {
808 iter
->index
= iter
->reservedField
==0 ? iter
->length
: iter
->length
-1;
811 } else /* delta<0 */ {
813 if(iter
->reservedField
!=0) {
814 iter
->reservedField
=0;
815 i
-=4; /* we stayed behind the supplementary code point; go before it now */
819 while(delta
<0 && i
>0) {
824 } else if(delta
<=-2) {
827 } else /* delta==-1 */ {
828 /* stop in the middle of a supplementary code point */
829 i
+=4; /* back to behind this supplementary code point for consistent state */
830 iter
->reservedField
=c
;
832 break; /* delta=0; */
839 return iter
->index
=pos
;
841 /* we started with index<0 (unknown) so pos is bogus */
843 return iter
->index
=i
; /* reached the beginning */
845 /* we still don't know the UTF-16 index */
846 return UITER_UNKNOWN_INDEX
;
851 static UBool U_CALLCONV
852 utf8IteratorHasNext(UCharIterator
*iter
) {
853 return iter
->start
<iter
->limit
|| iter
->reservedField
!=0;
856 static UBool U_CALLCONV
857 utf8IteratorHasPrevious(UCharIterator
*iter
) {
858 return iter
->start
>0;
861 static UChar32 U_CALLCONV
862 utf8IteratorCurrent(UCharIterator
*iter
) {
863 if(iter
->reservedField
!=0) {
864 return U16_TRAIL(iter
->reservedField
);
865 } else if(iter
->start
<iter
->limit
) {
866 const uint8_t *s
=(const uint8_t *)iter
->context
;
868 int32_t i
=iter
->start
;
870 U8_NEXT(s
, i
, iter
->limit
, c
);
873 } else if(c
<=0xffff) {
883 static UChar32 U_CALLCONV
884 utf8IteratorNext(UCharIterator
*iter
) {
887 if(iter
->reservedField
!=0) {
888 UChar trail
=U16_TRAIL(iter
->reservedField
);
889 iter
->reservedField
=0;
890 if((index
=iter
->index
)>=0) {
894 } else if(iter
->start
<iter
->limit
) {
895 const uint8_t *s
=(const uint8_t *)iter
->context
;
898 U8_NEXT(s
, iter
->start
, iter
->limit
, c
);
899 if((index
=iter
->index
)>=0) {
901 if(iter
->length
<0 && iter
->start
==iter
->limit
) {
902 iter
->length
= c
<=0xffff ? index
: index
+1;
904 } else if(iter
->start
==iter
->limit
&& iter
->length
>=0) {
905 iter
->index
= c
<=0xffff ? iter
->length
: iter
->length
-1;
909 } else if(c
<=0xffff) {
912 iter
->reservedField
=c
;
920 static UChar32 U_CALLCONV
921 utf8IteratorPrevious(UCharIterator
*iter
) {
924 if(iter
->reservedField
!=0) {
925 UChar lead
=U16_LEAD(iter
->reservedField
);
926 iter
->reservedField
=0;
927 iter
->start
-=4; /* we stayed behind the supplementary code point; go before it now */
928 if((index
=iter
->index
)>0) {
932 } else if(iter
->start
>0) {
933 const uint8_t *s
=(const uint8_t *)iter
->context
;
936 U8_PREV(s
, 0, iter
->start
, c
);
937 if((index
=iter
->index
)>0) {
939 } else if(iter
->start
<=1) {
940 iter
->index
= c
<=0xffff ? iter
->start
: iter
->start
+1;
944 } else if(c
<=0xffff) {
947 iter
->start
+=4; /* back to behind this supplementary code point for consistent state */
948 iter
->reservedField
=c
;
956 static uint32_t U_CALLCONV
957 utf8IteratorGetState(const UCharIterator
*iter
) {
958 uint32_t state
=(uint32_t)(iter
->start
<<1);
959 if(iter
->reservedField
!=0) {
965 static void U_CALLCONV
966 utf8IteratorSetState(UCharIterator
*iter
,
968 UErrorCode
*pErrorCode
)
970 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
972 } else if(iter
==NULL
) {
973 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
974 } else if(state
==utf8IteratorGetState(iter
)) {
975 /* setting to the current state: no-op */
977 int32_t index
=(int32_t)(state
>>1); /* UTF-8 index */
978 state
&=1; /* 1 if in surrogate pair, must be index>=4 */
980 if((state
==0 ? index
<0 : index
<4) || iter
->limit
<index
) {
981 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
983 iter
->start
=index
; /* restore UTF-8 byte index */
987 iter
->index
=-1; /* unknown UTF-16 index */
990 iter
->reservedField
=0;
992 /* verified index>=4 above */
994 U8_PREV((const uint8_t *)iter
->context
, 0, index
, c
);
996 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
998 iter
->reservedField
=c
;
1005 static const UCharIterator utf8Iterator
={
1007 utf8IteratorGetIndex
,
1009 utf8IteratorHasNext
,
1010 utf8IteratorHasPrevious
,
1011 utf8IteratorCurrent
,
1013 utf8IteratorPrevious
,
1015 utf8IteratorGetState
,
1016 utf8IteratorSetState
1019 U_CAPI
void U_EXPORT2
1020 uiter_setUTF8(UCharIterator
*iter
, const char *s
, int32_t length
) {
1022 if(s
!=0 && length
>=-1) {
1028 iter
->limit
=(int32_t)uprv_strlen(s
);
1030 iter
->length
= iter
->limit
<=1 ? iter
->limit
: -1;
1037 /* Helper functions --------------------------------------------------------- */
1039 U_CAPI UChar32 U_EXPORT2
1040 uiter_current32(UCharIterator
*iter
) {
1043 c
=iter
->current(iter
);
1044 if(U16_IS_SURROGATE(c
)) {
1045 if(U16_IS_SURROGATE_LEAD(c
)) {
1047 * go to the next code unit
1048 * we know that we are not at the limit because c!=U_SENTINEL
1050 iter
->move(iter
, 1, UITER_CURRENT
);
1051 if(U16_IS_TRAIL(c2
=iter
->current(iter
))) {
1052 c
=U16_GET_SUPPLEMENTARY(c
, c2
);
1055 /* undo index movement */
1056 iter
->move(iter
, -1, UITER_CURRENT
);
1058 if(U16_IS_LEAD(c2
=iter
->previous(iter
))) {
1059 c
=U16_GET_SUPPLEMENTARY(c2
, c
);
1062 /* undo index movement */
1063 iter
->move(iter
, 1, UITER_CURRENT
);
1070 U_CAPI UChar32 U_EXPORT2
1071 uiter_next32(UCharIterator
*iter
) {
1075 if(U16_IS_LEAD(c
)) {
1076 if(U16_IS_TRAIL(c2
=iter
->next(iter
))) {
1077 c
=U16_GET_SUPPLEMENTARY(c
, c2
);
1079 /* unmatched first surrogate, undo index movement */
1080 iter
->move(iter
, -1, UITER_CURRENT
);
1086 U_CAPI UChar32 U_EXPORT2
1087 uiter_previous32(UCharIterator
*iter
) {
1090 c
=iter
->previous(iter
);
1091 if(U16_IS_TRAIL(c
)) {
1092 if(U16_IS_LEAD(c2
=iter
->previous(iter
))) {
1093 c
=U16_GET_SUPPLEMENTARY(c2
, c
);
1095 /* unmatched second surrogate, undo index movement */
1096 iter
->move(iter
, 1, UITER_CURRENT
);
1102 U_CAPI
uint32_t U_EXPORT2
1103 uiter_getState(const UCharIterator
*iter
) {
1104 if(iter
==NULL
|| iter
->getState
==NULL
) {
1105 return UITER_NO_STATE
;
1107 return iter
->getState(iter
);
1111 U_CAPI
void U_EXPORT2
1112 uiter_setState(UCharIterator
*iter
, uint32_t state
, UErrorCode
*pErrorCode
) {
1113 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
1115 } else if(iter
==NULL
) {
1116 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1117 } else if(iter
->setState
==NULL
) {
1118 *pErrorCode
=U_UNSUPPORTED_ERROR
;
1120 iter
->setState(iter
, state
, pErrorCode
);