2 *******************************************************************************
4 * Copyright (C) 2002-2004, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
10 * tab size: 8 (not used)
13 * created on: 2002jan18
14 * created by: Markus W. Scherer
17 #include "unicode/utypes.h"
18 #include "unicode/ustring.h"
19 #include "unicode/chariter.h"
20 #include "unicode/rep.h"
21 #include "unicode/uiter.h"
24 #define IS_EVEN(n) (((n)&1)==0)
25 #define IS_POINTER_EVEN(p) IS_EVEN((size_t)p)
29 /* No-Op UCharIterator implementation for illegal input --------------------- */
31 static int32_t U_CALLCONV
32 noopGetIndex(UCharIterator
* /*iter*/, UCharIteratorOrigin
/*origin*/) {
36 static int32_t U_CALLCONV
37 noopMove(UCharIterator
* /*iter*/, int32_t /*delta*/, UCharIteratorOrigin
/*origin*/) {
41 static UBool U_CALLCONV
42 noopHasNext(UCharIterator
* /*iter*/) {
46 static UChar32 U_CALLCONV
47 noopCurrent(UCharIterator
* /*iter*/) {
51 static uint32_t U_CALLCONV
52 noopGetState(const UCharIterator
* /*iter*/) {
56 static void U_CALLCONV
57 noopSetState(UCharIterator
* /*iter*/, uint32_t /*state*/, UErrorCode
* /*pErrorCode*/) {
60 static const UCharIterator noopIterator
={
74 /* UCharIterator implementation for simple strings -------------------------- */
77 * This is an implementation of a code unit (UChar) iterator
78 * for UChar * strings.
80 * The UCharIterator.context field holds a pointer to the string.
83 static int32_t U_CALLCONV
84 stringIteratorGetIndex(UCharIterator
*iter
, UCharIteratorOrigin origin
) {
97 /* not a valid origin */
98 /* Should never get here! */
103 static int32_t U_CALLCONV
104 stringIteratorMove(UCharIterator
*iter
, int32_t delta
, UCharIteratorOrigin origin
) {
112 pos
=iter
->start
+delta
;
115 pos
=iter
->index
+delta
;
118 pos
=iter
->limit
+delta
;
121 pos
=iter
->length
+delta
;
124 return -1; /* Error */
127 if(pos
<iter
->start
) {
129 } else if(pos
>iter
->limit
) {
133 return iter
->index
=pos
;
136 static UBool U_CALLCONV
137 stringIteratorHasNext(UCharIterator
*iter
) {
138 return iter
->index
<iter
->limit
;
141 static UBool U_CALLCONV
142 stringIteratorHasPrevious(UCharIterator
*iter
) {
143 return iter
->index
>iter
->start
;
146 static UChar32 U_CALLCONV
147 stringIteratorCurrent(UCharIterator
*iter
) {
148 if(iter
->index
<iter
->limit
) {
149 return ((const UChar
*)(iter
->context
))[iter
->index
];
155 static UChar32 U_CALLCONV
156 stringIteratorNext(UCharIterator
*iter
) {
157 if(iter
->index
<iter
->limit
) {
158 return ((const UChar
*)(iter
->context
))[iter
->index
++];
164 static UChar32 U_CALLCONV
165 stringIteratorPrevious(UCharIterator
*iter
) {
166 if(iter
->index
>iter
->start
) {
167 return ((const UChar
*)(iter
->context
))[--iter
->index
];
173 static uint32_t U_CALLCONV
174 stringIteratorGetState(const UCharIterator
*iter
) {
175 return (uint32_t)iter
->index
;
178 static void U_CALLCONV
179 stringIteratorSetState(UCharIterator
*iter
, uint32_t state
, UErrorCode
*pErrorCode
) {
180 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
182 } else if(iter
==NULL
) {
183 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
184 } else if((int32_t)state
<iter
->start
|| iter
->limit
<(int32_t)state
) {
185 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
187 iter
->index
=(int32_t)state
;
191 static const UCharIterator stringIterator
={
193 stringIteratorGetIndex
,
195 stringIteratorHasNext
,
196 stringIteratorHasPrevious
,
197 stringIteratorCurrent
,
199 stringIteratorPrevious
,
201 stringIteratorGetState
,
202 stringIteratorSetState
205 U_CAPI
void U_EXPORT2
206 uiter_setString(UCharIterator
*iter
, const UChar
*s
, int32_t length
) {
208 if(s
!=0 && length
>=-1) {
209 *iter
=stringIterator
;
214 iter
->length
=u_strlen(s
);
216 iter
->limit
=iter
->length
;
223 /* UCharIterator implementation for UTF-16BE strings ------------------------ */
226 * This is an implementation of a code unit (UChar) iterator
227 * for UTF-16BE strings, i.e., strings in byte-vectors where
228 * each UChar is stored as a big-endian pair of bytes.
230 * The UCharIterator.context field holds a pointer to the string.
231 * Everything works just like with a normal UChar iterator (uiter_setString),
232 * except that UChars are assembled from byte pairs.
235 /* internal helper function */
236 static inline UChar32
237 utf16BEIteratorGet(UCharIterator
*iter
, int32_t index
) {
238 const uint8_t *p
=(const uint8_t *)iter
->context
;
239 return ((UChar
)p
[2*index
]<<8)|(UChar
)p
[2*index
+1];
242 static UChar32 U_CALLCONV
243 utf16BEIteratorCurrent(UCharIterator
*iter
) {
246 if((index
=iter
->index
)<iter
->limit
) {
247 return utf16BEIteratorGet(iter
, index
);
253 static UChar32 U_CALLCONV
254 utf16BEIteratorNext(UCharIterator
*iter
) {
257 if((index
=iter
->index
)<iter
->limit
) {
259 return utf16BEIteratorGet(iter
, index
);
265 static UChar32 U_CALLCONV
266 utf16BEIteratorPrevious(UCharIterator
*iter
) {
269 if((index
=iter
->index
)>iter
->start
) {
271 return utf16BEIteratorGet(iter
, index
);
277 static const UCharIterator utf16BEIterator
={
279 stringIteratorGetIndex
,
281 stringIteratorHasNext
,
282 stringIteratorHasPrevious
,
283 utf16BEIteratorCurrent
,
285 utf16BEIteratorPrevious
,
287 stringIteratorGetState
,
288 stringIteratorSetState
292 * Count the number of UChars in a UTF-16BE string before a terminating UChar NUL,
293 * i.e., before a pair of 0 bytes where the first 0 byte is at an even
297 utf16BE_strlen(const char *s
) {
298 if(IS_POINTER_EVEN(s
)) {
300 * even-aligned, call u_strlen(s)
301 * we are probably on a little-endian machine, but searching for UChar NUL
302 * does not care about endianness
304 return u_strlen((const UChar
*)s
);
306 /* odd-aligned, search for pair of 0 bytes */
309 while(!(*p
==0 && p
[1]==0)) {
312 return (int32_t)((p
-s
)/2);
316 U_CAPI
void U_EXPORT2
317 uiter_setUTF16BE(UCharIterator
*iter
, const char *s
, int32_t length
) {
319 /* allow only even-length strings (the input length counts bytes) */
320 if(s
!=NULL
&& (length
==-1 || (length
>=0 && IS_EVEN(length
)))) {
321 /* length/=2, except that >>=1 also works for -1 (-1/2==0, -1>>1==-1) */
324 if(U_IS_BIG_ENDIAN
&& IS_POINTER_EVEN(s
)) {
325 /* big-endian machine and 2-aligned UTF-16BE string: use normal UChar iterator */
326 uiter_setString(iter
, (const UChar
*)s
, length
);
330 *iter
=utf16BEIterator
;
335 iter
->length
=utf16BE_strlen(s
);
337 iter
->limit
=iter
->length
;
344 /* UCharIterator wrapper around CharacterIterator --------------------------- */
347 * This is wrapper code around a C++ CharacterIterator to
348 * look like a C UCharIterator.
350 * The UCharIterator.context field holds a pointer to the CharacterIterator.
353 static int32_t U_CALLCONV
354 characterIteratorGetIndex(UCharIterator
*iter
, UCharIteratorOrigin origin
) {
359 return ((CharacterIterator
*)(iter
->context
))->startIndex();
361 return ((CharacterIterator
*)(iter
->context
))->getIndex();
363 return ((CharacterIterator
*)(iter
->context
))->endIndex();
365 return ((CharacterIterator
*)(iter
->context
))->getLength();
367 /* not a valid origin */
368 /* Should never get here! */
373 static int32_t U_CALLCONV
374 characterIteratorMove(UCharIterator
*iter
, int32_t delta
, UCharIteratorOrigin origin
) {
377 ((CharacterIterator
*)(iter
->context
))->setIndex(delta
);
378 return ((CharacterIterator
*)(iter
->context
))->getIndex();
382 return ((CharacterIterator
*)(iter
->context
))->move(delta
, (CharacterIterator::EOrigin
)origin
);
384 ((CharacterIterator
*)(iter
->context
))->setIndex(((CharacterIterator
*)(iter
->context
))->getLength()+delta
);
385 return ((CharacterIterator
*)(iter
->context
))->getIndex();
387 /* not a valid origin */
388 /* Should never get here! */
393 static UBool U_CALLCONV
394 characterIteratorHasNext(UCharIterator
*iter
) {
395 return ((CharacterIterator
*)(iter
->context
))->hasNext();
398 static UBool U_CALLCONV
399 characterIteratorHasPrevious(UCharIterator
*iter
) {
400 return ((CharacterIterator
*)(iter
->context
))->hasPrevious();
403 static UChar32 U_CALLCONV
404 characterIteratorCurrent(UCharIterator
*iter
) {
407 c
=((CharacterIterator
*)(iter
->context
))->current();
408 if(c
!=0xffff || ((CharacterIterator
*)(iter
->context
))->hasNext()) {
415 static UChar32 U_CALLCONV
416 characterIteratorNext(UCharIterator
*iter
) {
417 if(((CharacterIterator
*)(iter
->context
))->hasNext()) {
418 return ((CharacterIterator
*)(iter
->context
))->nextPostInc();
424 static UChar32 U_CALLCONV
425 characterIteratorPrevious(UCharIterator
*iter
) {
426 if(((CharacterIterator
*)(iter
->context
))->hasPrevious()) {
427 return ((CharacterIterator
*)(iter
->context
))->previous();
433 static uint32_t U_CALLCONV
434 characterIteratorGetState(const UCharIterator
*iter
) {
435 return ((CharacterIterator
*)(iter
->context
))->getIndex();
438 static void U_CALLCONV
439 characterIteratorSetState(UCharIterator
*iter
, uint32_t state
, UErrorCode
*pErrorCode
) {
440 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
442 } else if(iter
==NULL
|| iter
->context
==NULL
) {
443 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
444 } else if((int32_t)state
<((CharacterIterator
*)(iter
->context
))->startIndex() || ((CharacterIterator
*)(iter
->context
))->endIndex()<(int32_t)state
) {
445 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
447 ((CharacterIterator
*)(iter
->context
))->setIndex((int32_t)state
);
451 static const UCharIterator characterIteratorWrapper
={
453 characterIteratorGetIndex
,
454 characterIteratorMove
,
455 characterIteratorHasNext
,
456 characterIteratorHasPrevious
,
457 characterIteratorCurrent
,
458 characterIteratorNext
,
459 characterIteratorPrevious
,
461 characterIteratorGetState
,
462 characterIteratorSetState
465 U_CAPI
void U_EXPORT2
466 uiter_setCharacterIterator(UCharIterator
*iter
, CharacterIterator
*charIter
) {
469 *iter
=characterIteratorWrapper
;
470 iter
->context
=charIter
;
477 /* UCharIterator wrapper around Replaceable --------------------------------- */
480 * This is an implementation of a code unit (UChar) iterator
481 * based on a Replaceable object.
483 * The UCharIterator.context field holds a pointer to the Replaceable.
484 * UCharIterator.length and UCharIterator.index hold Replaceable.length()
485 * and the iteration index.
488 static UChar32 U_CALLCONV
489 replaceableIteratorCurrent(UCharIterator
*iter
) {
490 if(iter
->index
<iter
->limit
) {
491 return ((Replaceable
*)(iter
->context
))->charAt(iter
->index
);
497 static UChar32 U_CALLCONV
498 replaceableIteratorNext(UCharIterator
*iter
) {
499 if(iter
->index
<iter
->limit
) {
500 return ((Replaceable
*)(iter
->context
))->charAt(iter
->index
++);
506 static UChar32 U_CALLCONV
507 replaceableIteratorPrevious(UCharIterator
*iter
) {
508 if(iter
->index
>iter
->start
) {
509 return ((Replaceable
*)(iter
->context
))->charAt(--iter
->index
);
515 static const UCharIterator replaceableIterator
={
517 stringIteratorGetIndex
,
519 stringIteratorHasNext
,
520 stringIteratorHasPrevious
,
521 replaceableIteratorCurrent
,
522 replaceableIteratorNext
,
523 replaceableIteratorPrevious
,
525 stringIteratorGetState
,
526 stringIteratorSetState
529 U_CAPI
void U_EXPORT2
530 uiter_setReplaceable(UCharIterator
*iter
, const Replaceable
*rep
) {
533 *iter
=replaceableIterator
;
535 iter
->limit
=iter
->length
=rep
->length();
542 /* UCharIterator implementation for UTF-8 strings --------------------------- */
545 * Possible, probably necessary only for an implementation for arbitrary
547 * Maintain a buffer (ring buffer?) for a piece of converted 16-bit text.
548 * This would require to turn reservedFn into a close function and
549 * to introduce a uiter_close(iter).
552 #define UITER_CNV_CAPACITY 16
555 * Minimal implementation:
556 * Maintain a single-UChar buffer for an additional surrogate.
557 * The caller must not modify start and limit because they are used internally.
559 * Use UCharIterator fields as follows:
560 * context pointer to UTF-8 string
561 * length UTF-16 length of the string; -1 until lazy evaluation
562 * start current UTF-8 index
563 * index current UTF-16 index; may be -1="unknown" after setState()
564 * limit UTF-8 length of the string
565 * reservedField supplementary code point
567 * Since UCharIterator delivers 16-bit code units, the iteration can be
568 * currently in the middle of the byte sequence for a supplementary code point.
569 * In this case, reservedField will contain that code point and start will
570 * point to after the corresponding byte sequence. The UTF-16 index will be
571 * one less than what it would otherwise be corresponding to the UTF-8 index.
572 * Otherwise, reservedField will be 0.
576 * Possible optimization for NUL-terminated UTF-8 and UTF-16 strings:
577 * Add implementations that do not call strlen() for iteration but check for NUL.
580 static int32_t U_CALLCONV
581 utf8IteratorGetIndex(UCharIterator
*iter
, UCharIteratorOrigin origin
) {
588 /* the current UTF-16 index is unknown after setState(), count from the beginning */
591 int32_t i
, limit
, index
;
593 s
=(const uint8_t *)iter
->context
;
595 limit
=iter
->start
; /* count up to the UTF-8 index */
597 U8_NEXT(s
, i
, limit
, c
);
605 iter
->start
=i
; /* just in case setState() did not get us to a code point boundary */
607 iter
->length
=index
; /* in case it was <0 or wrong */
609 if(iter
->reservedField
!=0) {
610 --index
; /* we are in the middle of a supplementary code point */
620 int32_t i
, limit
, length
;
622 s
=(const uint8_t *)iter
->context
;
625 * the current UTF-16 index is unknown after setState(),
626 * we must first count from the beginning to here
631 /* count from the beginning to the current index */
633 U8_NEXT(s
, i
, limit
, c
);
641 /* assume i==limit==iter->start, set the UTF-16 index */
642 iter
->start
=i
; /* just in case setState() did not get us to a code point boundary */
643 iter
->index
= iter
->reservedField
!=0 ? length
-1 : length
;
647 if(iter
->reservedField
!=0) {
652 /* count from the current index to the end */
655 U8_NEXT(s
, i
, limit
, c
);
666 /* not a valid origin */
667 /* Should never get here! */
672 static int32_t U_CALLCONV
673 utf8IteratorMove(UCharIterator
*iter
, int32_t delta
, UCharIteratorOrigin origin
) {
676 int32_t pos
; /* requested UTF-16 index */
677 int32_t i
; /* UTF-8 index */
680 /* calculate the requested UTF-16 index */
686 /* iter->index<0 (unknown) is possible */
690 pos
=iter
->index
+delta
;
693 /* the current UTF-16 index is unknown after setState(), use only delta */
700 if(iter
->length
>=0) {
701 pos
=iter
->length
+delta
;
704 /* pin to the end, avoid counting the length */
706 iter
->start
=iter
->limit
;
707 iter
->reservedField
=0;
709 return UITER_UNKNOWN_INDEX
;
711 /* the current UTF-16 index is unknown, use only delta */
718 return -1; /* Error */
722 /* shortcuts: pinning to the edges of the string */
724 iter
->index
=iter
->start
=iter
->reservedField
=0;
726 } else if(iter
->length
>=0 && pos
>=iter
->length
) {
727 iter
->index
=iter
->length
;
728 iter
->start
=iter
->limit
;
729 iter
->reservedField
=0;
733 /* minimize the number of U8_NEXT/PREV operations */
734 if(iter
->index
<0 || pos
<iter
->index
/2) {
735 /* go forward from the start instead of backward from the current index */
736 iter
->index
=iter
->start
=iter
->reservedField
=0;
737 } else if(iter
->length
>=0 && (iter
->length
-pos
)<(pos
-iter
->index
)) {
739 * if we have the UTF-16 index and length and the new position is
740 * closer to the end than the current index,
741 * then go backward from the end instead of forward from the current index
743 iter
->index
=iter
->length
;
744 iter
->start
=iter
->limit
;
745 iter
->reservedField
=0;
748 delta
=pos
-iter
->index
;
750 return iter
->index
; /* nothing to do */
753 /* move relative to unknown UTF-16 index */
755 return UITER_UNKNOWN_INDEX
; /* nothing to do */
756 } else if(-delta
>=iter
->start
) {
757 /* moving backwards by more UChars than there are UTF-8 bytes, pin to 0 */
758 iter
->index
=iter
->start
=iter
->reservedField
=0;
760 } else if(delta
>=(iter
->limit
-iter
->start
)) {
761 /* moving forward by more UChars than the remaining UTF-8 bytes, pin to the end */
762 iter
->index
=iter
->length
; /* may or may not be <0 (unknown) */
763 iter
->start
=iter
->limit
;
764 iter
->reservedField
=0;
765 return iter
->index
>=0 ? iter
->index
: (int32_t)UITER_UNKNOWN_INDEX
;
771 /* move towards the requested position, pin to the edges of the string */
772 s
=(const uint8_t *)iter
->context
;
773 pos
=iter
->index
; /* could be <0 (unknown) */
777 int32_t limit
=iter
->limit
;
778 if(iter
->reservedField
!=0) {
779 iter
->reservedField
=0;
783 while(delta
>0 && i
<limit
) {
784 U8_NEXT(s
, i
, limit
, c
);
788 } else if(delta
>=2) {
791 } else /* delta==1 */ {
792 /* stop in the middle of a supplementary code point */
793 iter
->reservedField
=c
;
795 break; /* delta=0; */
799 if(iter
->length
<0 && iter
->index
>=0) {
800 iter
->length
= iter
->reservedField
==0 ? pos
: pos
+1;
801 } else if(iter
->index
<0 && iter
->length
>=0) {
802 iter
->index
= iter
->reservedField
==0 ? iter
->length
: iter
->length
-1;
805 } else /* delta<0 */ {
807 if(iter
->reservedField
!=0) {
808 iter
->reservedField
=0;
809 i
-=4; /* we stayed behind the supplementary code point; go before it now */
813 while(delta
<0 && i
>0) {
818 } else if(delta
<=-2) {
821 } else /* delta==-1 */ {
822 /* stop in the middle of a supplementary code point */
823 i
+=4; /* back to behind this supplementary code point for consistent state */
824 iter
->reservedField
=c
;
826 break; /* delta=0; */
833 return iter
->index
=pos
;
835 /* we started with index<0 (unknown) so pos is bogus */
837 return iter
->index
=i
; /* reached the beginning */
839 /* we still don't know the UTF-16 index */
840 return UITER_UNKNOWN_INDEX
;
845 static UBool U_CALLCONV
846 utf8IteratorHasNext(UCharIterator
*iter
) {
847 return iter
->start
<iter
->limit
|| iter
->reservedField
!=0;
850 static UBool U_CALLCONV
851 utf8IteratorHasPrevious(UCharIterator
*iter
) {
852 return iter
->start
>0;
855 static UChar32 U_CALLCONV
856 utf8IteratorCurrent(UCharIterator
*iter
) {
857 if(iter
->reservedField
!=0) {
858 return U16_TRAIL(iter
->reservedField
);
859 } else if(iter
->start
<iter
->limit
) {
860 const uint8_t *s
=(const uint8_t *)iter
->context
;
862 int32_t i
=iter
->start
;
864 U8_NEXT(s
, i
, iter
->limit
, c
);
867 } else if(c
<=0xffff) {
877 static UChar32 U_CALLCONV
878 utf8IteratorNext(UCharIterator
*iter
) {
881 if(iter
->reservedField
!=0) {
882 UChar trail
=U16_TRAIL(iter
->reservedField
);
883 iter
->reservedField
=0;
884 if((index
=iter
->index
)>=0) {
888 } else if(iter
->start
<iter
->limit
) {
889 const uint8_t *s
=(const uint8_t *)iter
->context
;
892 U8_NEXT(s
, iter
->start
, iter
->limit
, c
);
893 if((index
=iter
->index
)>=0) {
895 if(iter
->length
<0 && iter
->start
==iter
->limit
) {
896 iter
->length
= c
<=0xffff ? index
: index
+1;
898 } else if(iter
->start
==iter
->limit
&& iter
->length
>=0) {
899 iter
->index
= c
<=0xffff ? iter
->length
: iter
->length
-1;
903 } else if(c
<=0xffff) {
906 iter
->reservedField
=c
;
914 static UChar32 U_CALLCONV
915 utf8IteratorPrevious(UCharIterator
*iter
) {
918 if(iter
->reservedField
!=0) {
919 UChar lead
=U16_LEAD(iter
->reservedField
);
920 iter
->reservedField
=0;
921 iter
->start
-=4; /* we stayed behind the supplementary code point; go before it now */
922 if((index
=iter
->index
)>0) {
926 } else if(iter
->start
>0) {
927 const uint8_t *s
=(const uint8_t *)iter
->context
;
930 U8_PREV(s
, 0, iter
->start
, c
);
931 if((index
=iter
->index
)>0) {
933 } else if(iter
->start
<=1) {
934 iter
->index
= c
<=0xffff ? iter
->start
: iter
->start
+1;
938 } else if(c
<=0xffff) {
941 iter
->start
+=4; /* back to behind this supplementary code point for consistent state */
942 iter
->reservedField
=c
;
950 static uint32_t U_CALLCONV
951 utf8IteratorGetState(const UCharIterator
*iter
) {
952 uint32_t state
=(uint32_t)(iter
->start
<<1);
953 if(iter
->reservedField
!=0) {
959 static void U_CALLCONV
960 utf8IteratorSetState(UCharIterator
*iter
,
962 UErrorCode
*pErrorCode
)
964 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
966 } else if(iter
==NULL
) {
967 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
968 } else if(state
==utf8IteratorGetState(iter
)) {
969 /* setting to the current state: no-op */
971 int32_t index
=(int32_t)(state
>>1); /* UTF-8 index */
972 state
&=1; /* 1 if in surrogate pair, must be index>=4 */
974 if((state
==0 ? index
<0 : index
<4) || iter
->limit
<index
) {
975 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
977 iter
->start
=index
; /* restore UTF-8 byte index */
981 iter
->index
=-1; /* unknown UTF-16 index */
984 iter
->reservedField
=0;
986 /* verified index>=4 above */
988 U8_PREV((const uint8_t *)iter
->context
, 0, index
, c
);
990 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
992 iter
->reservedField
=c
;
999 static const UCharIterator utf8Iterator
={
1001 utf8IteratorGetIndex
,
1003 utf8IteratorHasNext
,
1004 utf8IteratorHasPrevious
,
1005 utf8IteratorCurrent
,
1007 utf8IteratorPrevious
,
1009 utf8IteratorGetState
,
1010 utf8IteratorSetState
1013 U_CAPI
void U_EXPORT2
1014 uiter_setUTF8(UCharIterator
*iter
, const char *s
, int32_t length
) {
1016 if(s
!=0 && length
>=-1) {
1022 iter
->limit
=(int32_t)uprv_strlen(s
);
1024 iter
->length
= iter
->limit
<=1 ? iter
->limit
: -1;
1031 /* Helper functions --------------------------------------------------------- */
1033 U_CAPI UChar32 U_EXPORT2
1034 uiter_current32(UCharIterator
*iter
) {
1037 c
=iter
->current(iter
);
1038 if(UTF_IS_SURROGATE(c
)) {
1039 if(UTF_IS_SURROGATE_FIRST(c
)) {
1041 * go to the next code unit
1042 * we know that we are not at the limit because c!=U_SENTINEL
1044 iter
->move(iter
, 1, UITER_CURRENT
);
1045 if(UTF_IS_SECOND_SURROGATE(c2
=iter
->current(iter
))) {
1046 c
=UTF16_GET_PAIR_VALUE(c
, c2
);
1049 /* undo index movement */
1050 iter
->move(iter
, -1, UITER_CURRENT
);
1052 if(UTF_IS_FIRST_SURROGATE(c2
=iter
->previous(iter
))) {
1053 c
=UTF16_GET_PAIR_VALUE(c2
, c
);
1056 /* undo index movement */
1057 iter
->move(iter
, 1, UITER_CURRENT
);
1064 U_CAPI UChar32 U_EXPORT2
1065 uiter_next32(UCharIterator
*iter
) {
1069 if(UTF_IS_FIRST_SURROGATE(c
)) {
1070 if(UTF_IS_SECOND_SURROGATE(c2
=iter
->next(iter
))) {
1071 c
=UTF16_GET_PAIR_VALUE(c
, c2
);
1073 /* unmatched first surrogate, undo index movement */
1074 iter
->move(iter
, -1, UITER_CURRENT
);
1080 U_CAPI UChar32 U_EXPORT2
1081 uiter_previous32(UCharIterator
*iter
) {
1084 c
=iter
->previous(iter
);
1085 if(UTF_IS_SECOND_SURROGATE(c
)) {
1086 if(UTF_IS_FIRST_SURROGATE(c2
=iter
->previous(iter
))) {
1087 c
=UTF16_GET_PAIR_VALUE(c2
, c
);
1089 /* unmatched second surrogate, undo index movement */
1090 iter
->move(iter
, 1, UITER_CURRENT
);
1096 U_CAPI
uint32_t U_EXPORT2
1097 uiter_getState(const UCharIterator
*iter
) {
1098 if(iter
==NULL
|| iter
->getState
==NULL
) {
1099 return UITER_NO_STATE
;
1101 return iter
->getState(iter
);
1105 U_CAPI
void U_EXPORT2
1106 uiter_setState(UCharIterator
*iter
, uint32_t state
, UErrorCode
*pErrorCode
) {
1107 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
1109 } else if(iter
==NULL
) {
1110 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1111 } else if(iter
->setState
==NULL
) {
1112 *pErrorCode
=U_UNSUPPORTED_ERROR
;
1114 iter
->setState(iter
, state
, pErrorCode
);