2 *******************************************************************************
4 * Copyright (C) 2002-2012, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
10 * tab size: 8 (not used)
13 * created on: 2002jan18
14 * created by: Markus W. Scherer
17 #include "unicode/utypes.h"
18 #include "unicode/ustring.h"
19 #include "unicode/chariter.h"
20 #include "unicode/rep.h"
21 #include "unicode/uiter.h"
22 #include "unicode/utf.h"
23 #include "unicode/utf8.h"
24 #include "unicode/utf16.h"
29 #define IS_EVEN(n) (((n)&1)==0)
30 #define IS_POINTER_EVEN(p) IS_EVEN((size_t)p)
34 /* No-Op UCharIterator implementation for illegal input --------------------- */
36 static int32_t U_CALLCONV
37 noopGetIndex(UCharIterator
* /*iter*/, UCharIteratorOrigin
/*origin*/) {
41 static int32_t U_CALLCONV
42 noopMove(UCharIterator
* /*iter*/, int32_t /*delta*/, UCharIteratorOrigin
/*origin*/) {
46 static UBool U_CALLCONV
47 noopHasNext(UCharIterator
* /*iter*/) {
51 static UChar32 U_CALLCONV
52 noopCurrent(UCharIterator
* /*iter*/) {
56 static uint32_t U_CALLCONV
57 noopGetState(const UCharIterator
* /*iter*/) {
58 return UITER_NO_STATE
;
61 static void U_CALLCONV
62 noopSetState(UCharIterator
* /*iter*/, uint32_t /*state*/, UErrorCode
*pErrorCode
) {
63 *pErrorCode
=U_UNSUPPORTED_ERROR
;
66 static const UCharIterator noopIterator
={
80 /* UCharIterator implementation for simple strings -------------------------- */
83 * This is an implementation of a code unit (UChar) iterator
84 * for UChar * strings.
86 * The UCharIterator.context field holds a pointer to the string.
89 static int32_t U_CALLCONV
90 stringIteratorGetIndex(UCharIterator
*iter
, UCharIteratorOrigin origin
) {
103 /* not a valid origin */
104 /* Should never get here! */
109 static int32_t U_CALLCONV
110 stringIteratorMove(UCharIterator
*iter
, int32_t delta
, UCharIteratorOrigin origin
) {
118 pos
=iter
->start
+delta
;
121 pos
=iter
->index
+delta
;
124 pos
=iter
->limit
+delta
;
127 pos
=iter
->length
+delta
;
130 return -1; /* Error */
133 if(pos
<iter
->start
) {
135 } else if(pos
>iter
->limit
) {
139 return iter
->index
=pos
;
142 static UBool U_CALLCONV
143 stringIteratorHasNext(UCharIterator
*iter
) {
144 return iter
->index
<iter
->limit
;
147 static UBool U_CALLCONV
148 stringIteratorHasPrevious(UCharIterator
*iter
) {
149 return iter
->index
>iter
->start
;
152 static UChar32 U_CALLCONV
153 stringIteratorCurrent(UCharIterator
*iter
) {
154 if(iter
->index
<iter
->limit
) {
155 return ((const UChar
*)(iter
->context
))[iter
->index
];
161 static UChar32 U_CALLCONV
162 stringIteratorNext(UCharIterator
*iter
) {
163 if(iter
->index
<iter
->limit
) {
164 return ((const UChar
*)(iter
->context
))[iter
->index
++];
170 static UChar32 U_CALLCONV
171 stringIteratorPrevious(UCharIterator
*iter
) {
172 if(iter
->index
>iter
->start
) {
173 return ((const UChar
*)(iter
->context
))[--iter
->index
];
179 static uint32_t U_CALLCONV
180 stringIteratorGetState(const UCharIterator
*iter
) {
181 return (uint32_t)iter
->index
;
184 static void U_CALLCONV
185 stringIteratorSetState(UCharIterator
*iter
, uint32_t state
, UErrorCode
*pErrorCode
) {
186 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
188 } else if(iter
==NULL
) {
189 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
190 } else if((int32_t)state
<iter
->start
|| iter
->limit
<(int32_t)state
) {
191 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
193 iter
->index
=(int32_t)state
;
197 static const UCharIterator stringIterator
={
199 stringIteratorGetIndex
,
201 stringIteratorHasNext
,
202 stringIteratorHasPrevious
,
203 stringIteratorCurrent
,
205 stringIteratorPrevious
,
207 stringIteratorGetState
,
208 stringIteratorSetState
211 U_CAPI
void U_EXPORT2
212 uiter_setString(UCharIterator
*iter
, const UChar
*s
, int32_t length
) {
214 if(s
!=0 && length
>=-1) {
215 *iter
=stringIterator
;
220 iter
->length
=u_strlen(s
);
222 iter
->limit
=iter
->length
;
229 /* UCharIterator implementation for UTF-16BE strings ------------------------ */
232 * This is an implementation of a code unit (UChar) iterator
233 * for UTF-16BE strings, i.e., strings in byte-vectors where
234 * each UChar is stored as a big-endian pair of bytes.
236 * The UCharIterator.context field holds a pointer to the string.
237 * Everything works just like with a normal UChar iterator (uiter_setString),
238 * except that UChars are assembled from byte pairs.
241 /* internal helper function */
242 static inline UChar32
243 utf16BEIteratorGet(UCharIterator
*iter
, int32_t index
) {
244 const uint8_t *p
=(const uint8_t *)iter
->context
;
245 return ((UChar
)p
[2*index
]<<8)|(UChar
)p
[2*index
+1];
248 static UChar32 U_CALLCONV
249 utf16BEIteratorCurrent(UCharIterator
*iter
) {
252 if((index
=iter
->index
)<iter
->limit
) {
253 return utf16BEIteratorGet(iter
, index
);
259 static UChar32 U_CALLCONV
260 utf16BEIteratorNext(UCharIterator
*iter
) {
263 if((index
=iter
->index
)<iter
->limit
) {
265 return utf16BEIteratorGet(iter
, index
);
271 static UChar32 U_CALLCONV
272 utf16BEIteratorPrevious(UCharIterator
*iter
) {
275 if((index
=iter
->index
)>iter
->start
) {
277 return utf16BEIteratorGet(iter
, index
);
283 static const UCharIterator utf16BEIterator
={
285 stringIteratorGetIndex
,
287 stringIteratorHasNext
,
288 stringIteratorHasPrevious
,
289 utf16BEIteratorCurrent
,
291 utf16BEIteratorPrevious
,
293 stringIteratorGetState
,
294 stringIteratorSetState
298 * Count the number of UChars in a UTF-16BE string before a terminating UChar NUL,
299 * i.e., before a pair of 0 bytes where the first 0 byte is at an even
303 utf16BE_strlen(const char *s
) {
304 if(IS_POINTER_EVEN(s
)) {
306 * even-aligned, call u_strlen(s)
307 * we are probably on a little-endian machine, but searching for UChar NUL
308 * does not care about endianness
310 return u_strlen((const UChar
*)s
);
312 /* odd-aligned, search for pair of 0 bytes */
315 while(!(*p
==0 && p
[1]==0)) {
318 return (int32_t)((p
-s
)/2);
322 U_CAPI
void U_EXPORT2
323 uiter_setUTF16BE(UCharIterator
*iter
, const char *s
, int32_t length
) {
325 /* allow only even-length strings (the input length counts bytes) */
326 if(s
!=NULL
&& (length
==-1 || (length
>=0 && IS_EVEN(length
)))) {
327 /* length/=2, except that >>=1 also works for -1 (-1/2==0, -1>>1==-1) */
330 if(U_IS_BIG_ENDIAN
&& IS_POINTER_EVEN(s
)) {
331 /* big-endian machine and 2-aligned UTF-16BE string: use normal UChar iterator */
332 uiter_setString(iter
, (const UChar
*)s
, length
);
336 *iter
=utf16BEIterator
;
341 iter
->length
=utf16BE_strlen(s
);
343 iter
->limit
=iter
->length
;
350 /* UCharIterator wrapper around CharacterIterator --------------------------- */
353 * This is wrapper code around a C++ CharacterIterator to
354 * look like a C UCharIterator.
356 * The UCharIterator.context field holds a pointer to the CharacterIterator.
359 static int32_t U_CALLCONV
360 characterIteratorGetIndex(UCharIterator
*iter
, UCharIteratorOrigin origin
) {
365 return ((CharacterIterator
*)(iter
->context
))->startIndex();
367 return ((CharacterIterator
*)(iter
->context
))->getIndex();
369 return ((CharacterIterator
*)(iter
->context
))->endIndex();
371 return ((CharacterIterator
*)(iter
->context
))->getLength();
373 /* not a valid origin */
374 /* Should never get here! */
379 static int32_t U_CALLCONV
380 characterIteratorMove(UCharIterator
*iter
, int32_t delta
, UCharIteratorOrigin origin
) {
383 ((CharacterIterator
*)(iter
->context
))->setIndex(delta
);
384 return ((CharacterIterator
*)(iter
->context
))->getIndex();
388 return ((CharacterIterator
*)(iter
->context
))->move(delta
, (CharacterIterator::EOrigin
)origin
);
390 ((CharacterIterator
*)(iter
->context
))->setIndex(((CharacterIterator
*)(iter
->context
))->getLength()+delta
);
391 return ((CharacterIterator
*)(iter
->context
))->getIndex();
393 /* not a valid origin */
394 /* Should never get here! */
399 static UBool U_CALLCONV
400 characterIteratorHasNext(UCharIterator
*iter
) {
401 return ((CharacterIterator
*)(iter
->context
))->hasNext();
404 static UBool U_CALLCONV
405 characterIteratorHasPrevious(UCharIterator
*iter
) {
406 return ((CharacterIterator
*)(iter
->context
))->hasPrevious();
409 static UChar32 U_CALLCONV
410 characterIteratorCurrent(UCharIterator
*iter
) {
413 c
=((CharacterIterator
*)(iter
->context
))->current();
414 if(c
!=0xffff || ((CharacterIterator
*)(iter
->context
))->hasNext()) {
421 static UChar32 U_CALLCONV
422 characterIteratorNext(UCharIterator
*iter
) {
423 if(((CharacterIterator
*)(iter
->context
))->hasNext()) {
424 return ((CharacterIterator
*)(iter
->context
))->nextPostInc();
430 static UChar32 U_CALLCONV
431 characterIteratorPrevious(UCharIterator
*iter
) {
432 if(((CharacterIterator
*)(iter
->context
))->hasPrevious()) {
433 return ((CharacterIterator
*)(iter
->context
))->previous();
439 static uint32_t U_CALLCONV
440 characterIteratorGetState(const UCharIterator
*iter
) {
441 return ((CharacterIterator
*)(iter
->context
))->getIndex();
444 static void U_CALLCONV
445 characterIteratorSetState(UCharIterator
*iter
, uint32_t state
, UErrorCode
*pErrorCode
) {
446 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
448 } else if(iter
==NULL
|| iter
->context
==NULL
) {
449 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
450 } else if((int32_t)state
<((CharacterIterator
*)(iter
->context
))->startIndex() || ((CharacterIterator
*)(iter
->context
))->endIndex()<(int32_t)state
) {
451 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
453 ((CharacterIterator
*)(iter
->context
))->setIndex((int32_t)state
);
457 static const UCharIterator characterIteratorWrapper
={
459 characterIteratorGetIndex
,
460 characterIteratorMove
,
461 characterIteratorHasNext
,
462 characterIteratorHasPrevious
,
463 characterIteratorCurrent
,
464 characterIteratorNext
,
465 characterIteratorPrevious
,
467 characterIteratorGetState
,
468 characterIteratorSetState
471 U_CAPI
void U_EXPORT2
472 uiter_setCharacterIterator(UCharIterator
*iter
, CharacterIterator
*charIter
) {
475 *iter
=characterIteratorWrapper
;
476 iter
->context
=charIter
;
483 /* UCharIterator wrapper around Replaceable --------------------------------- */
486 * This is an implementation of a code unit (UChar) iterator
487 * based on a Replaceable object.
489 * The UCharIterator.context field holds a pointer to the Replaceable.
490 * UCharIterator.length and UCharIterator.index hold Replaceable.length()
491 * and the iteration index.
494 static UChar32 U_CALLCONV
495 replaceableIteratorCurrent(UCharIterator
*iter
) {
496 if(iter
->index
<iter
->limit
) {
497 return ((Replaceable
*)(iter
->context
))->charAt(iter
->index
);
503 static UChar32 U_CALLCONV
504 replaceableIteratorNext(UCharIterator
*iter
) {
505 if(iter
->index
<iter
->limit
) {
506 return ((Replaceable
*)(iter
->context
))->charAt(iter
->index
++);
512 static UChar32 U_CALLCONV
513 replaceableIteratorPrevious(UCharIterator
*iter
) {
514 if(iter
->index
>iter
->start
) {
515 return ((Replaceable
*)(iter
->context
))->charAt(--iter
->index
);
521 static const UCharIterator replaceableIterator
={
523 stringIteratorGetIndex
,
525 stringIteratorHasNext
,
526 stringIteratorHasPrevious
,
527 replaceableIteratorCurrent
,
528 replaceableIteratorNext
,
529 replaceableIteratorPrevious
,
531 stringIteratorGetState
,
532 stringIteratorSetState
535 U_CAPI
void U_EXPORT2
536 uiter_setReplaceable(UCharIterator
*iter
, const Replaceable
*rep
) {
539 *iter
=replaceableIterator
;
541 iter
->limit
=iter
->length
=rep
->length();
548 /* UCharIterator implementation for UTF-8 strings --------------------------- */
551 * Possible, probably necessary only for an implementation for arbitrary
553 * Maintain a buffer (ring buffer?) for a piece of converted 16-bit text.
554 * This would require to turn reservedFn into a close function and
555 * to introduce a uiter_close(iter).
558 #define UITER_CNV_CAPACITY 16
561 * Minimal implementation:
562 * Maintain a single-UChar buffer for an additional surrogate.
563 * The caller must not modify start and limit because they are used internally.
565 * Use UCharIterator fields as follows:
566 * context pointer to UTF-8 string
567 * length UTF-16 length of the string; -1 until lazy evaluation
568 * start current UTF-8 index
569 * index current UTF-16 index; may be -1="unknown" after setState()
570 * limit UTF-8 length of the string
571 * reservedField supplementary code point
573 * Since UCharIterator delivers 16-bit code units, the iteration can be
574 * currently in the middle of the byte sequence for a supplementary code point.
575 * In this case, reservedField will contain that code point and start will
576 * point to after the corresponding byte sequence. The UTF-16 index will be
577 * one less than what it would otherwise be corresponding to the UTF-8 index.
578 * Otherwise, reservedField will be 0.
582 * Possible optimization for NUL-terminated UTF-8 and UTF-16 strings:
583 * Add implementations that do not call strlen() for iteration but check for NUL.
586 static int32_t U_CALLCONV
587 utf8IteratorGetIndex(UCharIterator
*iter
, UCharIteratorOrigin origin
) {
594 /* the current UTF-16 index is unknown after setState(), count from the beginning */
597 int32_t i
, limit
, index
;
599 s
=(const uint8_t *)iter
->context
;
601 limit
=iter
->start
; /* count up to the UTF-8 index */
603 U8_NEXT_OR_FFFD(s
, i
, limit
, c
);
604 index
+=U16_LENGTH(c
);
607 iter
->start
=i
; /* just in case setState() did not get us to a code point boundary */
609 iter
->length
=index
; /* in case it was <0 or wrong */
611 if(iter
->reservedField
!=0) {
612 --index
; /* we are in the middle of a supplementary code point */
622 int32_t i
, limit
, length
;
624 s
=(const uint8_t *)iter
->context
;
627 * the current UTF-16 index is unknown after setState(),
628 * we must first count from the beginning to here
633 /* count from the beginning to the current index */
635 U8_NEXT_OR_FFFD(s
, i
, limit
, c
);
636 length
+=U16_LENGTH(c
);
639 /* assume i==limit==iter->start, set the UTF-16 index */
640 iter
->start
=i
; /* just in case setState() did not get us to a code point boundary */
641 iter
->index
= iter
->reservedField
!=0 ? length
-1 : length
;
645 if(iter
->reservedField
!=0) {
650 /* count from the current index to the end */
653 U8_NEXT_OR_FFFD(s
, i
, limit
, c
);
654 length
+=U16_LENGTH(c
);
660 /* not a valid origin */
661 /* Should never get here! */
666 static int32_t U_CALLCONV
667 utf8IteratorMove(UCharIterator
*iter
, int32_t delta
, UCharIteratorOrigin origin
) {
670 int32_t pos
; /* requested UTF-16 index */
671 int32_t i
; /* UTF-8 index */
674 /* calculate the requested UTF-16 index */
680 /* iter->index<0 (unknown) is possible */
684 pos
=iter
->index
+delta
;
687 /* the current UTF-16 index is unknown after setState(), use only delta */
694 if(iter
->length
>=0) {
695 pos
=iter
->length
+delta
;
698 /* pin to the end, avoid counting the length */
700 iter
->start
=iter
->limit
;
701 iter
->reservedField
=0;
703 return UITER_UNKNOWN_INDEX
;
705 /* the current UTF-16 index is unknown, use only delta */
712 return -1; /* Error */
716 /* shortcuts: pinning to the edges of the string */
718 iter
->index
=iter
->start
=iter
->reservedField
=0;
720 } else if(iter
->length
>=0 && pos
>=iter
->length
) {
721 iter
->index
=iter
->length
;
722 iter
->start
=iter
->limit
;
723 iter
->reservedField
=0;
727 /* minimize the number of U8_NEXT/PREV operations */
728 if(iter
->index
<0 || pos
<iter
->index
/2) {
729 /* go forward from the start instead of backward from the current index */
730 iter
->index
=iter
->start
=iter
->reservedField
=0;
731 } else if(iter
->length
>=0 && (iter
->length
-pos
)<(pos
-iter
->index
)) {
733 * if we have the UTF-16 index and length and the new position is
734 * closer to the end than the current index,
735 * then go backward from the end instead of forward from the current index
737 iter
->index
=iter
->length
;
738 iter
->start
=iter
->limit
;
739 iter
->reservedField
=0;
742 delta
=pos
-iter
->index
;
744 return iter
->index
; /* nothing to do */
747 /* move relative to unknown UTF-16 index */
749 return UITER_UNKNOWN_INDEX
; /* nothing to do */
750 } else if(-delta
>=iter
->start
) {
751 /* moving backwards by more UChars than there are UTF-8 bytes, pin to 0 */
752 iter
->index
=iter
->start
=iter
->reservedField
=0;
754 } else if(delta
>=(iter
->limit
-iter
->start
)) {
755 /* moving forward by more UChars than the remaining UTF-8 bytes, pin to the end */
756 iter
->index
=iter
->length
; /* may or may not be <0 (unknown) */
757 iter
->start
=iter
->limit
;
758 iter
->reservedField
=0;
759 return iter
->index
>=0 ? iter
->index
: (int32_t)UITER_UNKNOWN_INDEX
;
765 /* move towards the requested position, pin to the edges of the string */
766 s
=(const uint8_t *)iter
->context
;
767 pos
=iter
->index
; /* could be <0 (unknown) */
771 int32_t limit
=iter
->limit
;
772 if(iter
->reservedField
!=0) {
773 iter
->reservedField
=0;
777 while(delta
>0 && i
<limit
) {
778 U8_NEXT_OR_FFFD(s
, i
, limit
, c
);
782 } else if(delta
>=2) {
785 } else /* delta==1 */ {
786 /* stop in the middle of a supplementary code point */
787 iter
->reservedField
=c
;
789 break; /* delta=0; */
793 if(iter
->length
<0 && iter
->index
>=0) {
794 iter
->length
= iter
->reservedField
==0 ? pos
: pos
+1;
795 } else if(iter
->index
<0 && iter
->length
>=0) {
796 iter
->index
= iter
->reservedField
==0 ? iter
->length
: iter
->length
-1;
799 } else /* delta<0 */ {
801 if(iter
->reservedField
!=0) {
802 iter
->reservedField
=0;
803 i
-=4; /* we stayed behind the supplementary code point; go before it now */
807 while(delta
<0 && i
>0) {
808 U8_PREV_OR_FFFD(s
, 0, i
, c
);
812 } else if(delta
<=-2) {
815 } else /* delta==-1 */ {
816 /* stop in the middle of a supplementary code point */
817 i
+=4; /* back to behind this supplementary code point for consistent state */
818 iter
->reservedField
=c
;
820 break; /* delta=0; */
827 return iter
->index
=pos
;
829 /* we started with index<0 (unknown) so pos is bogus */
831 return iter
->index
=i
; /* reached the beginning */
833 /* we still don't know the UTF-16 index */
834 return UITER_UNKNOWN_INDEX
;
839 static UBool U_CALLCONV
840 utf8IteratorHasNext(UCharIterator
*iter
) {
841 return iter
->start
<iter
->limit
|| iter
->reservedField
!=0;
844 static UBool U_CALLCONV
845 utf8IteratorHasPrevious(UCharIterator
*iter
) {
846 return iter
->start
>0;
849 static UChar32 U_CALLCONV
850 utf8IteratorCurrent(UCharIterator
*iter
) {
851 if(iter
->reservedField
!=0) {
852 return U16_TRAIL(iter
->reservedField
);
853 } else if(iter
->start
<iter
->limit
) {
854 const uint8_t *s
=(const uint8_t *)iter
->context
;
856 int32_t i
=iter
->start
;
858 U8_NEXT_OR_FFFD(s
, i
, iter
->limit
, c
);
869 static UChar32 U_CALLCONV
870 utf8IteratorNext(UCharIterator
*iter
) {
873 if(iter
->reservedField
!=0) {
874 UChar trail
=U16_TRAIL(iter
->reservedField
);
875 iter
->reservedField
=0;
876 if((index
=iter
->index
)>=0) {
880 } else if(iter
->start
<iter
->limit
) {
881 const uint8_t *s
=(const uint8_t *)iter
->context
;
884 U8_NEXT_OR_FFFD(s
, iter
->start
, iter
->limit
, c
);
885 if((index
=iter
->index
)>=0) {
887 if(iter
->length
<0 && iter
->start
==iter
->limit
) {
888 iter
->length
= c
<=0xffff ? index
: index
+1;
890 } else if(iter
->start
==iter
->limit
&& iter
->length
>=0) {
891 iter
->index
= c
<=0xffff ? iter
->length
: iter
->length
-1;
896 iter
->reservedField
=c
;
904 static UChar32 U_CALLCONV
905 utf8IteratorPrevious(UCharIterator
*iter
) {
908 if(iter
->reservedField
!=0) {
909 UChar lead
=U16_LEAD(iter
->reservedField
);
910 iter
->reservedField
=0;
911 iter
->start
-=4; /* we stayed behind the supplementary code point; go before it now */
912 if((index
=iter
->index
)>0) {
916 } else if(iter
->start
>0) {
917 const uint8_t *s
=(const uint8_t *)iter
->context
;
920 U8_PREV_OR_FFFD(s
, 0, iter
->start
, c
);
921 if((index
=iter
->index
)>0) {
923 } else if(iter
->start
<=1) {
924 iter
->index
= c
<=0xffff ? iter
->start
: iter
->start
+1;
929 iter
->start
+=4; /* back to behind this supplementary code point for consistent state */
930 iter
->reservedField
=c
;
938 static uint32_t U_CALLCONV
939 utf8IteratorGetState(const UCharIterator
*iter
) {
940 uint32_t state
=(uint32_t)(iter
->start
<<1);
941 if(iter
->reservedField
!=0) {
947 static void U_CALLCONV
948 utf8IteratorSetState(UCharIterator
*iter
,
950 UErrorCode
*pErrorCode
)
952 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
954 } else if(iter
==NULL
) {
955 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
956 } else if(state
==utf8IteratorGetState(iter
)) {
957 /* setting to the current state: no-op */
959 int32_t index
=(int32_t)(state
>>1); /* UTF-8 index */
960 state
&=1; /* 1 if in surrogate pair, must be index>=4 */
962 if((state
==0 ? index
<0 : index
<4) || iter
->limit
<index
) {
963 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
965 iter
->start
=index
; /* restore UTF-8 byte index */
969 iter
->index
=-1; /* unknown UTF-16 index */
972 iter
->reservedField
=0;
974 /* verified index>=4 above */
976 U8_PREV_OR_FFFD((const uint8_t *)iter
->context
, 0, index
, c
);
978 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
980 iter
->reservedField
=c
;
987 static const UCharIterator utf8Iterator
={
989 utf8IteratorGetIndex
,
992 utf8IteratorHasPrevious
,
995 utf8IteratorPrevious
,
997 utf8IteratorGetState
,
1001 U_CAPI
void U_EXPORT2
1002 uiter_setUTF8(UCharIterator
*iter
, const char *s
, int32_t length
) {
1004 if(s
!=0 && length
>=-1) {
1010 iter
->limit
=(int32_t)uprv_strlen(s
);
1012 iter
->length
= iter
->limit
<=1 ? iter
->limit
: -1;
1019 /* Helper functions --------------------------------------------------------- */
1021 U_CAPI UChar32 U_EXPORT2
1022 uiter_current32(UCharIterator
*iter
) {
1025 c
=iter
->current(iter
);
1026 if(U16_IS_SURROGATE(c
)) {
1027 if(U16_IS_SURROGATE_LEAD(c
)) {
1029 * go to the next code unit
1030 * we know that we are not at the limit because c!=U_SENTINEL
1032 iter
->move(iter
, 1, UITER_CURRENT
);
1033 if(U16_IS_TRAIL(c2
=iter
->current(iter
))) {
1034 c
=U16_GET_SUPPLEMENTARY(c
, c2
);
1037 /* undo index movement */
1038 iter
->move(iter
, -1, UITER_CURRENT
);
1040 if(U16_IS_LEAD(c2
=iter
->previous(iter
))) {
1041 c
=U16_GET_SUPPLEMENTARY(c2
, c
);
1044 /* undo index movement */
1045 iter
->move(iter
, 1, UITER_CURRENT
);
1052 U_CAPI UChar32 U_EXPORT2
1053 uiter_next32(UCharIterator
*iter
) {
1057 if(U16_IS_LEAD(c
)) {
1058 if(U16_IS_TRAIL(c2
=iter
->next(iter
))) {
1059 c
=U16_GET_SUPPLEMENTARY(c
, c2
);
1061 /* unmatched first surrogate, undo index movement */
1062 iter
->move(iter
, -1, UITER_CURRENT
);
1068 U_CAPI UChar32 U_EXPORT2
1069 uiter_previous32(UCharIterator
*iter
) {
1072 c
=iter
->previous(iter
);
1073 if(U16_IS_TRAIL(c
)) {
1074 if(U16_IS_LEAD(c2
=iter
->previous(iter
))) {
1075 c
=U16_GET_SUPPLEMENTARY(c2
, c
);
1077 /* unmatched second surrogate, undo index movement */
1078 iter
->move(iter
, 1, UITER_CURRENT
);
1084 U_CAPI
uint32_t U_EXPORT2
1085 uiter_getState(const UCharIterator
*iter
) {
1086 if(iter
==NULL
|| iter
->getState
==NULL
) {
1087 return UITER_NO_STATE
;
1089 return iter
->getState(iter
);
1093 U_CAPI
void U_EXPORT2
1094 uiter_setState(UCharIterator
*iter
, uint32_t state
, UErrorCode
*pErrorCode
) {
1095 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
1097 } else if(iter
==NULL
) {
1098 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1099 } else if(iter
->setState
==NULL
) {
1100 *pErrorCode
=U_UNSUPPORTED_ERROR
;
1102 iter
->setState(iter
, state
, pErrorCode
);