2 *******************************************************************************
4 * Copyright (C) 2002-2006, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
10 * tab size: 8 (not used)
13 * created on: 2002jan18
14 * created by: Markus W. Scherer
17 #include "unicode/utypes.h"
18 #include "unicode/ustring.h"
19 #include "unicode/chariter.h"
20 #include "unicode/rep.h"
21 #include "unicode/uiter.h"
26 #define IS_EVEN(n) (((n)&1)==0)
27 #define IS_POINTER_EVEN(p) IS_EVEN((size_t)p)
31 /* No-Op UCharIterator implementation for illegal input --------------------- */
33 static int32_t U_CALLCONV
34 noopGetIndex(UCharIterator
* /*iter*/, UCharIteratorOrigin
/*origin*/) {
38 static int32_t U_CALLCONV
39 noopMove(UCharIterator
* /*iter*/, int32_t /*delta*/, UCharIteratorOrigin
/*origin*/) {
43 static UBool U_CALLCONV
44 noopHasNext(UCharIterator
* /*iter*/) {
48 static UChar32 U_CALLCONV
49 noopCurrent(UCharIterator
* /*iter*/) {
53 static uint32_t U_CALLCONV
54 noopGetState(const UCharIterator
* /*iter*/) {
55 return UITER_NO_STATE
;
58 static void U_CALLCONV
59 noopSetState(UCharIterator
* /*iter*/, uint32_t /*state*/, UErrorCode
*pErrorCode
) {
60 *pErrorCode
=U_UNSUPPORTED_ERROR
;
63 static const UCharIterator noopIterator
={
77 /* UCharIterator implementation for simple strings -------------------------- */
80 * This is an implementation of a code unit (UChar) iterator
81 * for UChar * strings.
83 * The UCharIterator.context field holds a pointer to the string.
86 static int32_t U_CALLCONV
87 stringIteratorGetIndex(UCharIterator
*iter
, UCharIteratorOrigin origin
) {
100 /* not a valid origin */
101 /* Should never get here! */
106 static int32_t U_CALLCONV
107 stringIteratorMove(UCharIterator
*iter
, int32_t delta
, UCharIteratorOrigin origin
) {
115 pos
=iter
->start
+delta
;
118 pos
=iter
->index
+delta
;
121 pos
=iter
->limit
+delta
;
124 pos
=iter
->length
+delta
;
127 return -1; /* Error */
130 if(pos
<iter
->start
) {
132 } else if(pos
>iter
->limit
) {
136 return iter
->index
=pos
;
139 static UBool U_CALLCONV
140 stringIteratorHasNext(UCharIterator
*iter
) {
141 return iter
->index
<iter
->limit
;
144 static UBool U_CALLCONV
145 stringIteratorHasPrevious(UCharIterator
*iter
) {
146 return iter
->index
>iter
->start
;
149 static UChar32 U_CALLCONV
150 stringIteratorCurrent(UCharIterator
*iter
) {
151 if(iter
->index
<iter
->limit
) {
152 return ((const UChar
*)(iter
->context
))[iter
->index
];
158 static UChar32 U_CALLCONV
159 stringIteratorNext(UCharIterator
*iter
) {
160 if(iter
->index
<iter
->limit
) {
161 return ((const UChar
*)(iter
->context
))[iter
->index
++];
167 static UChar32 U_CALLCONV
168 stringIteratorPrevious(UCharIterator
*iter
) {
169 if(iter
->index
>iter
->start
) {
170 return ((const UChar
*)(iter
->context
))[--iter
->index
];
176 static uint32_t U_CALLCONV
177 stringIteratorGetState(const UCharIterator
*iter
) {
178 return (uint32_t)iter
->index
;
181 static void U_CALLCONV
182 stringIteratorSetState(UCharIterator
*iter
, uint32_t state
, UErrorCode
*pErrorCode
) {
183 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
185 } else if(iter
==NULL
) {
186 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
187 } else if((int32_t)state
<iter
->start
|| iter
->limit
<(int32_t)state
) {
188 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
190 iter
->index
=(int32_t)state
;
194 static const UCharIterator stringIterator
={
196 stringIteratorGetIndex
,
198 stringIteratorHasNext
,
199 stringIteratorHasPrevious
,
200 stringIteratorCurrent
,
202 stringIteratorPrevious
,
204 stringIteratorGetState
,
205 stringIteratorSetState
208 U_CAPI
void U_EXPORT2
209 uiter_setString(UCharIterator
*iter
, const UChar
*s
, int32_t length
) {
211 if(s
!=0 && length
>=-1) {
212 *iter
=stringIterator
;
217 iter
->length
=u_strlen(s
);
219 iter
->limit
=iter
->length
;
226 /* UCharIterator implementation for UTF-16BE strings ------------------------ */
229 * This is an implementation of a code unit (UChar) iterator
230 * for UTF-16BE strings, i.e., strings in byte-vectors where
231 * each UChar is stored as a big-endian pair of bytes.
233 * The UCharIterator.context field holds a pointer to the string.
234 * Everything works just like with a normal UChar iterator (uiter_setString),
235 * except that UChars are assembled from byte pairs.
238 /* internal helper function */
239 static inline UChar32
240 utf16BEIteratorGet(UCharIterator
*iter
, int32_t index
) {
241 const uint8_t *p
=(const uint8_t *)iter
->context
;
242 return ((UChar
)p
[2*index
]<<8)|(UChar
)p
[2*index
+1];
245 static UChar32 U_CALLCONV
246 utf16BEIteratorCurrent(UCharIterator
*iter
) {
249 if((index
=iter
->index
)<iter
->limit
) {
250 return utf16BEIteratorGet(iter
, index
);
256 static UChar32 U_CALLCONV
257 utf16BEIteratorNext(UCharIterator
*iter
) {
260 if((index
=iter
->index
)<iter
->limit
) {
262 return utf16BEIteratorGet(iter
, index
);
268 static UChar32 U_CALLCONV
269 utf16BEIteratorPrevious(UCharIterator
*iter
) {
272 if((index
=iter
->index
)>iter
->start
) {
274 return utf16BEIteratorGet(iter
, index
);
280 static const UCharIterator utf16BEIterator
={
282 stringIteratorGetIndex
,
284 stringIteratorHasNext
,
285 stringIteratorHasPrevious
,
286 utf16BEIteratorCurrent
,
288 utf16BEIteratorPrevious
,
290 stringIteratorGetState
,
291 stringIteratorSetState
295 * Count the number of UChars in a UTF-16BE string before a terminating UChar NUL,
296 * i.e., before a pair of 0 bytes where the first 0 byte is at an even
300 utf16BE_strlen(const char *s
) {
301 if(IS_POINTER_EVEN(s
)) {
303 * even-aligned, call u_strlen(s)
304 * we are probably on a little-endian machine, but searching for UChar NUL
305 * does not care about endianness
307 return u_strlen((const UChar
*)s
);
309 /* odd-aligned, search for pair of 0 bytes */
312 while(!(*p
==0 && p
[1]==0)) {
315 return (int32_t)((p
-s
)/2);
319 U_CAPI
void U_EXPORT2
320 uiter_setUTF16BE(UCharIterator
*iter
, const char *s
, int32_t length
) {
322 /* allow only even-length strings (the input length counts bytes) */
323 if(s
!=NULL
&& (length
==-1 || (length
>=0 && IS_EVEN(length
)))) {
324 /* length/=2, except that >>=1 also works for -1 (-1/2==0, -1>>1==-1) */
327 if(U_IS_BIG_ENDIAN
&& IS_POINTER_EVEN(s
)) {
328 /* big-endian machine and 2-aligned UTF-16BE string: use normal UChar iterator */
329 uiter_setString(iter
, (const UChar
*)s
, length
);
333 *iter
=utf16BEIterator
;
338 iter
->length
=utf16BE_strlen(s
);
340 iter
->limit
=iter
->length
;
347 /* UCharIterator wrapper around CharacterIterator --------------------------- */
350 * This is wrapper code around a C++ CharacterIterator to
351 * look like a C UCharIterator.
353 * The UCharIterator.context field holds a pointer to the CharacterIterator.
356 static int32_t U_CALLCONV
357 characterIteratorGetIndex(UCharIterator
*iter
, UCharIteratorOrigin origin
) {
362 return ((CharacterIterator
*)(iter
->context
))->startIndex();
364 return ((CharacterIterator
*)(iter
->context
))->getIndex();
366 return ((CharacterIterator
*)(iter
->context
))->endIndex();
368 return ((CharacterIterator
*)(iter
->context
))->getLength();
370 /* not a valid origin */
371 /* Should never get here! */
376 static int32_t U_CALLCONV
377 characterIteratorMove(UCharIterator
*iter
, int32_t delta
, UCharIteratorOrigin origin
) {
380 ((CharacterIterator
*)(iter
->context
))->setIndex(delta
);
381 return ((CharacterIterator
*)(iter
->context
))->getIndex();
385 return ((CharacterIterator
*)(iter
->context
))->move(delta
, (CharacterIterator::EOrigin
)origin
);
387 ((CharacterIterator
*)(iter
->context
))->setIndex(((CharacterIterator
*)(iter
->context
))->getLength()+delta
);
388 return ((CharacterIterator
*)(iter
->context
))->getIndex();
390 /* not a valid origin */
391 /* Should never get here! */
396 static UBool U_CALLCONV
397 characterIteratorHasNext(UCharIterator
*iter
) {
398 return ((CharacterIterator
*)(iter
->context
))->hasNext();
401 static UBool U_CALLCONV
402 characterIteratorHasPrevious(UCharIterator
*iter
) {
403 return ((CharacterIterator
*)(iter
->context
))->hasPrevious();
406 static UChar32 U_CALLCONV
407 characterIteratorCurrent(UCharIterator
*iter
) {
410 c
=((CharacterIterator
*)(iter
->context
))->current();
411 if(c
!=0xffff || ((CharacterIterator
*)(iter
->context
))->hasNext()) {
418 static UChar32 U_CALLCONV
419 characterIteratorNext(UCharIterator
*iter
) {
420 if(((CharacterIterator
*)(iter
->context
))->hasNext()) {
421 return ((CharacterIterator
*)(iter
->context
))->nextPostInc();
427 static UChar32 U_CALLCONV
428 characterIteratorPrevious(UCharIterator
*iter
) {
429 if(((CharacterIterator
*)(iter
->context
))->hasPrevious()) {
430 return ((CharacterIterator
*)(iter
->context
))->previous();
436 static uint32_t U_CALLCONV
437 characterIteratorGetState(const UCharIterator
*iter
) {
438 return ((CharacterIterator
*)(iter
->context
))->getIndex();
441 static void U_CALLCONV
442 characterIteratorSetState(UCharIterator
*iter
, uint32_t state
, UErrorCode
*pErrorCode
) {
443 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
445 } else if(iter
==NULL
|| iter
->context
==NULL
) {
446 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
447 } else if((int32_t)state
<((CharacterIterator
*)(iter
->context
))->startIndex() || ((CharacterIterator
*)(iter
->context
))->endIndex()<(int32_t)state
) {
448 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
450 ((CharacterIterator
*)(iter
->context
))->setIndex((int32_t)state
);
454 static const UCharIterator characterIteratorWrapper
={
456 characterIteratorGetIndex
,
457 characterIteratorMove
,
458 characterIteratorHasNext
,
459 characterIteratorHasPrevious
,
460 characterIteratorCurrent
,
461 characterIteratorNext
,
462 characterIteratorPrevious
,
464 characterIteratorGetState
,
465 characterIteratorSetState
468 U_CAPI
void U_EXPORT2
469 uiter_setCharacterIterator(UCharIterator
*iter
, CharacterIterator
*charIter
) {
472 *iter
=characterIteratorWrapper
;
473 iter
->context
=charIter
;
480 /* UCharIterator wrapper around Replaceable --------------------------------- */
483 * This is an implementation of a code unit (UChar) iterator
484 * based on a Replaceable object.
486 * The UCharIterator.context field holds a pointer to the Replaceable.
487 * UCharIterator.length and UCharIterator.index hold Replaceable.length()
488 * and the iteration index.
491 static UChar32 U_CALLCONV
492 replaceableIteratorCurrent(UCharIterator
*iter
) {
493 if(iter
->index
<iter
->limit
) {
494 return ((Replaceable
*)(iter
->context
))->charAt(iter
->index
);
500 static UChar32 U_CALLCONV
501 replaceableIteratorNext(UCharIterator
*iter
) {
502 if(iter
->index
<iter
->limit
) {
503 return ((Replaceable
*)(iter
->context
))->charAt(iter
->index
++);
509 static UChar32 U_CALLCONV
510 replaceableIteratorPrevious(UCharIterator
*iter
) {
511 if(iter
->index
>iter
->start
) {
512 return ((Replaceable
*)(iter
->context
))->charAt(--iter
->index
);
518 static const UCharIterator replaceableIterator
={
520 stringIteratorGetIndex
,
522 stringIteratorHasNext
,
523 stringIteratorHasPrevious
,
524 replaceableIteratorCurrent
,
525 replaceableIteratorNext
,
526 replaceableIteratorPrevious
,
528 stringIteratorGetState
,
529 stringIteratorSetState
532 U_CAPI
void U_EXPORT2
533 uiter_setReplaceable(UCharIterator
*iter
, const Replaceable
*rep
) {
536 *iter
=replaceableIterator
;
538 iter
->limit
=iter
->length
=rep
->length();
545 /* UCharIterator implementation for UTF-8 strings --------------------------- */
548 * Possible, probably necessary only for an implementation for arbitrary
550 * Maintain a buffer (ring buffer?) for a piece of converted 16-bit text.
551 * This would require to turn reservedFn into a close function and
552 * to introduce a uiter_close(iter).
555 #define UITER_CNV_CAPACITY 16
558 * Minimal implementation:
559 * Maintain a single-UChar buffer for an additional surrogate.
560 * The caller must not modify start and limit because they are used internally.
562 * Use UCharIterator fields as follows:
563 * context pointer to UTF-8 string
564 * length UTF-16 length of the string; -1 until lazy evaluation
565 * start current UTF-8 index
566 * index current UTF-16 index; may be -1="unknown" after setState()
567 * limit UTF-8 length of the string
568 * reservedField supplementary code point
570 * Since UCharIterator delivers 16-bit code units, the iteration can be
571 * currently in the middle of the byte sequence for a supplementary code point.
572 * In this case, reservedField will contain that code point and start will
573 * point to after the corresponding byte sequence. The UTF-16 index will be
574 * one less than what it would otherwise be corresponding to the UTF-8 index.
575 * Otherwise, reservedField will be 0.
579 * Possible optimization for NUL-terminated UTF-8 and UTF-16 strings:
580 * Add implementations that do not call strlen() for iteration but check for NUL.
583 static int32_t U_CALLCONV
584 utf8IteratorGetIndex(UCharIterator
*iter
, UCharIteratorOrigin origin
) {
591 /* the current UTF-16 index is unknown after setState(), count from the beginning */
594 int32_t i
, limit
, index
;
596 s
=(const uint8_t *)iter
->context
;
598 limit
=iter
->start
; /* count up to the UTF-8 index */
600 U8_NEXT(s
, i
, limit
, c
);
608 iter
->start
=i
; /* just in case setState() did not get us to a code point boundary */
610 iter
->length
=index
; /* in case it was <0 or wrong */
612 if(iter
->reservedField
!=0) {
613 --index
; /* we are in the middle of a supplementary code point */
623 int32_t i
, limit
, length
;
625 s
=(const uint8_t *)iter
->context
;
628 * the current UTF-16 index is unknown after setState(),
629 * we must first count from the beginning to here
634 /* count from the beginning to the current index */
636 U8_NEXT(s
, i
, limit
, c
);
644 /* assume i==limit==iter->start, set the UTF-16 index */
645 iter
->start
=i
; /* just in case setState() did not get us to a code point boundary */
646 iter
->index
= iter
->reservedField
!=0 ? length
-1 : length
;
650 if(iter
->reservedField
!=0) {
655 /* count from the current index to the end */
658 U8_NEXT(s
, i
, limit
, c
);
669 /* not a valid origin */
670 /* Should never get here! */
675 static int32_t U_CALLCONV
676 utf8IteratorMove(UCharIterator
*iter
, int32_t delta
, UCharIteratorOrigin origin
) {
679 int32_t pos
; /* requested UTF-16 index */
680 int32_t i
; /* UTF-8 index */
683 /* calculate the requested UTF-16 index */
689 /* iter->index<0 (unknown) is possible */
693 pos
=iter
->index
+delta
;
696 /* the current UTF-16 index is unknown after setState(), use only delta */
703 if(iter
->length
>=0) {
704 pos
=iter
->length
+delta
;
707 /* pin to the end, avoid counting the length */
709 iter
->start
=iter
->limit
;
710 iter
->reservedField
=0;
712 return UITER_UNKNOWN_INDEX
;
714 /* the current UTF-16 index is unknown, use only delta */
721 return -1; /* Error */
725 /* shortcuts: pinning to the edges of the string */
727 iter
->index
=iter
->start
=iter
->reservedField
=0;
729 } else if(iter
->length
>=0 && pos
>=iter
->length
) {
730 iter
->index
=iter
->length
;
731 iter
->start
=iter
->limit
;
732 iter
->reservedField
=0;
736 /* minimize the number of U8_NEXT/PREV operations */
737 if(iter
->index
<0 || pos
<iter
->index
/2) {
738 /* go forward from the start instead of backward from the current index */
739 iter
->index
=iter
->start
=iter
->reservedField
=0;
740 } else if(iter
->length
>=0 && (iter
->length
-pos
)<(pos
-iter
->index
)) {
742 * if we have the UTF-16 index and length and the new position is
743 * closer to the end than the current index,
744 * then go backward from the end instead of forward from the current index
746 iter
->index
=iter
->length
;
747 iter
->start
=iter
->limit
;
748 iter
->reservedField
=0;
751 delta
=pos
-iter
->index
;
753 return iter
->index
; /* nothing to do */
756 /* move relative to unknown UTF-16 index */
758 return UITER_UNKNOWN_INDEX
; /* nothing to do */
759 } else if(-delta
>=iter
->start
) {
760 /* moving backwards by more UChars than there are UTF-8 bytes, pin to 0 */
761 iter
->index
=iter
->start
=iter
->reservedField
=0;
763 } else if(delta
>=(iter
->limit
-iter
->start
)) {
764 /* moving forward by more UChars than the remaining UTF-8 bytes, pin to the end */
765 iter
->index
=iter
->length
; /* may or may not be <0 (unknown) */
766 iter
->start
=iter
->limit
;
767 iter
->reservedField
=0;
768 return iter
->index
>=0 ? iter
->index
: (int32_t)UITER_UNKNOWN_INDEX
;
774 /* move towards the requested position, pin to the edges of the string */
775 s
=(const uint8_t *)iter
->context
;
776 pos
=iter
->index
; /* could be <0 (unknown) */
780 int32_t limit
=iter
->limit
;
781 if(iter
->reservedField
!=0) {
782 iter
->reservedField
=0;
786 while(delta
>0 && i
<limit
) {
787 U8_NEXT(s
, i
, limit
, c
);
791 } else if(delta
>=2) {
794 } else /* delta==1 */ {
795 /* stop in the middle of a supplementary code point */
796 iter
->reservedField
=c
;
798 break; /* delta=0; */
802 if(iter
->length
<0 && iter
->index
>=0) {
803 iter
->length
= iter
->reservedField
==0 ? pos
: pos
+1;
804 } else if(iter
->index
<0 && iter
->length
>=0) {
805 iter
->index
= iter
->reservedField
==0 ? iter
->length
: iter
->length
-1;
808 } else /* delta<0 */ {
810 if(iter
->reservedField
!=0) {
811 iter
->reservedField
=0;
812 i
-=4; /* we stayed behind the supplementary code point; go before it now */
816 while(delta
<0 && i
>0) {
821 } else if(delta
<=-2) {
824 } else /* delta==-1 */ {
825 /* stop in the middle of a supplementary code point */
826 i
+=4; /* back to behind this supplementary code point for consistent state */
827 iter
->reservedField
=c
;
829 break; /* delta=0; */
836 return iter
->index
=pos
;
838 /* we started with index<0 (unknown) so pos is bogus */
840 return iter
->index
=i
; /* reached the beginning */
842 /* we still don't know the UTF-16 index */
843 return UITER_UNKNOWN_INDEX
;
848 static UBool U_CALLCONV
849 utf8IteratorHasNext(UCharIterator
*iter
) {
850 return iter
->start
<iter
->limit
|| iter
->reservedField
!=0;
853 static UBool U_CALLCONV
854 utf8IteratorHasPrevious(UCharIterator
*iter
) {
855 return iter
->start
>0;
858 static UChar32 U_CALLCONV
859 utf8IteratorCurrent(UCharIterator
*iter
) {
860 if(iter
->reservedField
!=0) {
861 return U16_TRAIL(iter
->reservedField
);
862 } else if(iter
->start
<iter
->limit
) {
863 const uint8_t *s
=(const uint8_t *)iter
->context
;
865 int32_t i
=iter
->start
;
867 U8_NEXT(s
, i
, iter
->limit
, c
);
870 } else if(c
<=0xffff) {
880 static UChar32 U_CALLCONV
881 utf8IteratorNext(UCharIterator
*iter
) {
884 if(iter
->reservedField
!=0) {
885 UChar trail
=U16_TRAIL(iter
->reservedField
);
886 iter
->reservedField
=0;
887 if((index
=iter
->index
)>=0) {
891 } else if(iter
->start
<iter
->limit
) {
892 const uint8_t *s
=(const uint8_t *)iter
->context
;
895 U8_NEXT(s
, iter
->start
, iter
->limit
, c
);
896 if((index
=iter
->index
)>=0) {
898 if(iter
->length
<0 && iter
->start
==iter
->limit
) {
899 iter
->length
= c
<=0xffff ? index
: index
+1;
901 } else if(iter
->start
==iter
->limit
&& iter
->length
>=0) {
902 iter
->index
= c
<=0xffff ? iter
->length
: iter
->length
-1;
906 } else if(c
<=0xffff) {
909 iter
->reservedField
=c
;
917 static UChar32 U_CALLCONV
918 utf8IteratorPrevious(UCharIterator
*iter
) {
921 if(iter
->reservedField
!=0) {
922 UChar lead
=U16_LEAD(iter
->reservedField
);
923 iter
->reservedField
=0;
924 iter
->start
-=4; /* we stayed behind the supplementary code point; go before it now */
925 if((index
=iter
->index
)>0) {
929 } else if(iter
->start
>0) {
930 const uint8_t *s
=(const uint8_t *)iter
->context
;
933 U8_PREV(s
, 0, iter
->start
, c
);
934 if((index
=iter
->index
)>0) {
936 } else if(iter
->start
<=1) {
937 iter
->index
= c
<=0xffff ? iter
->start
: iter
->start
+1;
941 } else if(c
<=0xffff) {
944 iter
->start
+=4; /* back to behind this supplementary code point for consistent state */
945 iter
->reservedField
=c
;
953 static uint32_t U_CALLCONV
954 utf8IteratorGetState(const UCharIterator
*iter
) {
955 uint32_t state
=(uint32_t)(iter
->start
<<1);
956 if(iter
->reservedField
!=0) {
962 static void U_CALLCONV
963 utf8IteratorSetState(UCharIterator
*iter
,
965 UErrorCode
*pErrorCode
)
967 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
969 } else if(iter
==NULL
) {
970 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
971 } else if(state
==utf8IteratorGetState(iter
)) {
972 /* setting to the current state: no-op */
974 int32_t index
=(int32_t)(state
>>1); /* UTF-8 index */
975 state
&=1; /* 1 if in surrogate pair, must be index>=4 */
977 if((state
==0 ? index
<0 : index
<4) || iter
->limit
<index
) {
978 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
980 iter
->start
=index
; /* restore UTF-8 byte index */
984 iter
->index
=-1; /* unknown UTF-16 index */
987 iter
->reservedField
=0;
989 /* verified index>=4 above */
991 U8_PREV((const uint8_t *)iter
->context
, 0, index
, c
);
993 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
995 iter
->reservedField
=c
;
1002 static const UCharIterator utf8Iterator
={
1004 utf8IteratorGetIndex
,
1006 utf8IteratorHasNext
,
1007 utf8IteratorHasPrevious
,
1008 utf8IteratorCurrent
,
1010 utf8IteratorPrevious
,
1012 utf8IteratorGetState
,
1013 utf8IteratorSetState
1016 U_CAPI
void U_EXPORT2
1017 uiter_setUTF8(UCharIterator
*iter
, const char *s
, int32_t length
) {
1019 if(s
!=0 && length
>=-1) {
1025 iter
->limit
=(int32_t)uprv_strlen(s
);
1027 iter
->length
= iter
->limit
<=1 ? iter
->limit
: -1;
1034 /* Helper functions --------------------------------------------------------- */
1036 U_CAPI UChar32 U_EXPORT2
1037 uiter_current32(UCharIterator
*iter
) {
1040 c
=iter
->current(iter
);
1041 if(UTF_IS_SURROGATE(c
)) {
1042 if(UTF_IS_SURROGATE_FIRST(c
)) {
1044 * go to the next code unit
1045 * we know that we are not at the limit because c!=U_SENTINEL
1047 iter
->move(iter
, 1, UITER_CURRENT
);
1048 if(UTF_IS_SECOND_SURROGATE(c2
=iter
->current(iter
))) {
1049 c
=UTF16_GET_PAIR_VALUE(c
, c2
);
1052 /* undo index movement */
1053 iter
->move(iter
, -1, UITER_CURRENT
);
1055 if(UTF_IS_FIRST_SURROGATE(c2
=iter
->previous(iter
))) {
1056 c
=UTF16_GET_PAIR_VALUE(c2
, c
);
1059 /* undo index movement */
1060 iter
->move(iter
, 1, UITER_CURRENT
);
1067 U_CAPI UChar32 U_EXPORT2
1068 uiter_next32(UCharIterator
*iter
) {
1072 if(UTF_IS_FIRST_SURROGATE(c
)) {
1073 if(UTF_IS_SECOND_SURROGATE(c2
=iter
->next(iter
))) {
1074 c
=UTF16_GET_PAIR_VALUE(c
, c2
);
1076 /* unmatched first surrogate, undo index movement */
1077 iter
->move(iter
, -1, UITER_CURRENT
);
1083 U_CAPI UChar32 U_EXPORT2
1084 uiter_previous32(UCharIterator
*iter
) {
1087 c
=iter
->previous(iter
);
1088 if(UTF_IS_SECOND_SURROGATE(c
)) {
1089 if(UTF_IS_FIRST_SURROGATE(c2
=iter
->previous(iter
))) {
1090 c
=UTF16_GET_PAIR_VALUE(c2
, c
);
1092 /* unmatched second surrogate, undo index movement */
1093 iter
->move(iter
, 1, UITER_CURRENT
);
1099 U_CAPI
uint32_t U_EXPORT2
1100 uiter_getState(const UCharIterator
*iter
) {
1101 if(iter
==NULL
|| iter
->getState
==NULL
) {
1102 return UITER_NO_STATE
;
1104 return iter
->getState(iter
);
1108 U_CAPI
void U_EXPORT2
1109 uiter_setState(UCharIterator
*iter
, uint32_t state
, UErrorCode
*pErrorCode
) {
1110 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
1112 } else if(iter
==NULL
) {
1113 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1114 } else if(iter
->setState
==NULL
) {
1115 *pErrorCode
=U_UNSUPPORTED_ERROR
;
1117 iter
->setState(iter
, state
, pErrorCode
);