+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
-* Copyright (C) 2002-2003, International Business Machines
+* Copyright (C) 2002-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: uiter.cpp
-* encoding: US-ASCII
+* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
#include "unicode/chariter.h"
#include "unicode/rep.h"
#include "unicode/uiter.h"
+#include "unicode/utf.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
#include "cstring.h"
+U_NAMESPACE_USE
+
#define IS_EVEN(n) (((n)&1)==0)
#define IS_POINTER_EVEN(p) IS_EVEN((size_t)p)
static uint32_t U_CALLCONV
noopGetState(const UCharIterator * /*iter*/) {
- return 0;
+ return UITER_NO_STATE;
}
static void U_CALLCONV
-noopSetState(UCharIterator * /*iter*/, uint32_t /*state*/, UErrorCode * /*pErrorCode*/) {
+noopSetState(UCharIterator * /*iter*/, uint32_t /*state*/, UErrorCode *pErrorCode) {
+ *pErrorCode=U_UNSUPPORTED_ERROR;
}
static const UCharIterator noopIterator={
* except that UChars are assembled from byte pairs.
*/
+/* internal helper function */
+static inline UChar32
+utf16BEIteratorGet(UCharIterator *iter, int32_t index) {
+ const uint8_t *p=(const uint8_t *)iter->context;
+ return ((UChar)p[2*index]<<8)|(UChar)p[2*index+1];
+}
+
static UChar32 U_CALLCONV
utf16BEIteratorCurrent(UCharIterator *iter) {
int32_t index;
if((index=iter->index)<iter->limit) {
- const uint8_t *p=(const uint8_t *)iter->context;
- return ((UChar)p[2*index]<<8)|(UChar)p[2*index+1];
+ return utf16BEIteratorGet(iter, index);
} else {
return U_SENTINEL;
}
int32_t index;
if((index=iter->index)<iter->limit) {
- const uint8_t *p=(const uint8_t *)iter->context;
iter->index=index+1;
- return ((UChar)p[2*index]<<8)|(UChar)p[2*index+1];
+ return utf16BEIteratorGet(iter, index);
} else {
return U_SENTINEL;
}
int32_t index;
if((index=iter->index)>iter->start) {
- const uint8_t *p=(const uint8_t *)iter->context;
iter->index=--index;
- return ((UChar)p[2*index]<<8)|(UChar)p[2*index+1];
+ return utf16BEIteratorGet(iter, index);
} else {
return U_SENTINEL;
}
i=index=0;
limit=iter->start; /* count up to the UTF-8 index */
while(i<limit) {
- U8_NEXT(s, i, limit, c);
- if(c<=0xffff) {
- ++index;
- } else {
- index+=2;
- }
+ U8_NEXT_OR_FFFD(s, i, limit, c);
+ index+=U16_LENGTH(c);
}
iter->start=i; /* just in case setState() did not get us to a code point boundary */
/* count from the beginning to the current index */
while(i<limit) {
- U8_NEXT(s, i, limit, c);
- if(c<=0xffff) {
- ++length;
- } else {
- length+=2;
- }
+ U8_NEXT_OR_FFFD(s, i, limit, c);
+ length+=U16_LENGTH(c);
}
/* assume i==limit==iter->start, set the UTF-16 index */
/* count from the current index to the end */
limit=iter->limit;
while(i<limit) {
- U8_NEXT(s, i, limit, c);
- if(c<=0xffff) {
- ++length;
- } else {
- length+=2;
- }
+ U8_NEXT_OR_FFFD(s, i, limit, c);
+ length+=U16_LENGTH(c);
}
iter->length=length;
}
iter->index=iter->length; /* may or may not be <0 (unknown) */
iter->start=iter->limit;
iter->reservedField=0;
- return iter->index>=0 ? iter->index : UITER_UNKNOWN_INDEX;
+ return iter->index>=0 ? iter->index : (int32_t)UITER_UNKNOWN_INDEX;
}
}
--delta;
}
while(delta>0 && i<limit) {
- U8_NEXT(s, i, limit, c);
- if(c<0xffff) {
+ U8_NEXT_OR_FFFD(s, i, limit, c);
+ if(c<=0xffff) {
++pos;
--delta;
} else if(delta>=2) {
++delta;
}
while(delta<0 && i>0) {
- U8_PREV(s, 0, i, c);
- if(c<0xffff) {
+ U8_PREV_OR_FFFD(s, 0, i, c);
+ if(c<=0xffff) {
--pos;
++delta;
} else if(delta<=-2) {
static UBool U_CALLCONV
utf8IteratorHasNext(UCharIterator *iter) {
- return iter->reservedField!=0 || iter->start<iter->limit;
+ return iter->start<iter->limit || iter->reservedField!=0;
}
static UBool U_CALLCONV
UChar32 c;
int32_t i=iter->start;
- U8_NEXT(s, i, iter->limit, c);
- if(c<0) {
- return 0xfffd;
- } else if(c<=0xffff) {
+ U8_NEXT_OR_FFFD(s, i, iter->limit, c);
+ if(c<=0xffff) {
return c;
} else {
return U16_LEAD(c);
const uint8_t *s=(const uint8_t *)iter->context;
UChar32 c;
- U8_NEXT(s, iter->start, iter->limit, c);
+ U8_NEXT_OR_FFFD(s, iter->start, iter->limit, c);
if((index=iter->index)>=0) {
iter->index=++index;
if(iter->length<0 && iter->start==iter->limit) {
} else if(iter->start==iter->limit && iter->length>=0) {
iter->index= c<=0xffff ? iter->length : iter->length-1;
}
- if(c<0) {
- return 0xfffd;
- } else if(c<=0xffff) {
+ if(c<=0xffff) {
return c;
} else {
iter->reservedField=c;
const uint8_t *s=(const uint8_t *)iter->context;
UChar32 c;
- U8_PREV(s, 0, iter->start, c);
+ U8_PREV_OR_FFFD(s, 0, iter->start, c);
if((index=iter->index)>0) {
iter->index=index-1;
} else if(iter->start<=1) {
iter->index= c<=0xffff ? iter->start : iter->start+1;
}
- if(c<0) {
- return 0xfffd;
- } else if(c<=0xffff) {
+ if(c<=0xffff) {
return c;
} else {
iter->start+=4; /* back to behind this supplementary code point for consistent state */
} else {
/* verified index>=4 above */
UChar32 c;
- U8_PREV((const uint8_t *)iter->context, 0, index, c);
+ U8_PREV_OR_FFFD((const uint8_t *)iter->context, 0, index, c);
if(c<=0xffff) {
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
} else {
if(length>=0) {
iter->limit=length;
} else {
- iter->limit=uprv_strlen(s);
+ iter->limit=(int32_t)uprv_strlen(s);
}
iter->length= iter->limit<=1 ? iter->limit : -1;
} else {
UChar32 c, c2;
c=iter->current(iter);
- if(UTF_IS_SURROGATE(c)) {
- if(UTF_IS_SURROGATE_FIRST(c)) {
+ if(U16_IS_SURROGATE(c)) {
+ if(U16_IS_SURROGATE_LEAD(c)) {
/*
* go to the next code unit
* we know that we are not at the limit because c!=U_SENTINEL
*/
iter->move(iter, 1, UITER_CURRENT);
- if(UTF_IS_SECOND_SURROGATE(c2=iter->current(iter))) {
- c=UTF16_GET_PAIR_VALUE(c, c2);
+ if(U16_IS_TRAIL(c2=iter->current(iter))) {
+ c=U16_GET_SUPPLEMENTARY(c, c2);
}
/* undo index movement */
iter->move(iter, -1, UITER_CURRENT);
} else {
- if(UTF_IS_FIRST_SURROGATE(c2=iter->previous(iter))) {
- c=UTF16_GET_PAIR_VALUE(c2, c);
+ if(U16_IS_LEAD(c2=iter->previous(iter))) {
+ c=U16_GET_SUPPLEMENTARY(c2, c);
}
if(c2>=0) {
/* undo index movement */
UChar32 c, c2;
c=iter->next(iter);
- if(UTF_IS_FIRST_SURROGATE(c)) {
- if(UTF_IS_SECOND_SURROGATE(c2=iter->next(iter))) {
- c=UTF16_GET_PAIR_VALUE(c, c2);
+ if(U16_IS_LEAD(c)) {
+ if(U16_IS_TRAIL(c2=iter->next(iter))) {
+ c=U16_GET_SUPPLEMENTARY(c, c2);
} else if(c2>=0) {
/* unmatched first surrogate, undo index movement */
iter->move(iter, -1, UITER_CURRENT);
UChar32 c, c2;
c=iter->previous(iter);
- if(UTF_IS_SECOND_SURROGATE(c)) {
- if(UTF_IS_FIRST_SURROGATE(c2=iter->previous(iter))) {
- c=UTF16_GET_PAIR_VALUE(c2, c);
+ if(U16_IS_TRAIL(c)) {
+ if(U16_IS_LEAD(c2=iter->previous(iter))) {
+ c=U16_GET_SUPPLEMENTARY(c2, c);
} else if(c2>=0) {
/* unmatched second surrogate, undo index movement */
iter->move(iter, 1, UITER_CURRENT);