+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
-* Copyright (C) 2002-2006, International Business Machines
+* Copyright (C) 2002-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: uiter.cpp
-* encoding: US-ASCII
+* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
#include "unicode/chariter.h"
#include "unicode/rep.h"
#include "unicode/uiter.h"
+#include "unicode/utf.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
#include "cstring.h"
+U_NAMESPACE_USE
+
#define IS_EVEN(n) (((n)&1)==0)
#define IS_POINTER_EVEN(p) IS_EVEN((size_t)p)
i=index=0;
limit=iter->start; /* count up to the UTF-8 index */
while(i<limit) {
- U8_NEXT(s, i, limit, c);
- if(c<=0xffff) {
- ++index;
- } else {
- index+=2;
- }
+ U8_NEXT_OR_FFFD(s, i, limit, c);
+ index+=U16_LENGTH(c);
}
iter->start=i; /* just in case setState() did not get us to a code point boundary */
/* count from the beginning to the current index */
while(i<limit) {
- U8_NEXT(s, i, limit, c);
- if(c<=0xffff) {
- ++length;
- } else {
- length+=2;
- }
+ U8_NEXT_OR_FFFD(s, i, limit, c);
+ length+=U16_LENGTH(c);
}
/* assume i==limit==iter->start, set the UTF-16 index */
/* count from the current index to the end */
limit=iter->limit;
while(i<limit) {
- U8_NEXT(s, i, limit, c);
- if(c<=0xffff) {
- ++length;
- } else {
- length+=2;
- }
+ U8_NEXT_OR_FFFD(s, i, limit, c);
+ length+=U16_LENGTH(c);
}
iter->length=length;
}
--delta;
}
while(delta>0 && i<limit) {
- U8_NEXT(s, i, limit, c);
- if(c<0xffff) {
+ U8_NEXT_OR_FFFD(s, i, limit, c);
+ if(c<=0xffff) {
++pos;
--delta;
} else if(delta>=2) {
++delta;
}
while(delta<0 && i>0) {
- U8_PREV(s, 0, i, c);
- if(c<0xffff) {
+ U8_PREV_OR_FFFD(s, 0, i, c);
+ if(c<=0xffff) {
--pos;
++delta;
} else if(delta<=-2) {
UChar32 c;
int32_t i=iter->start;
- U8_NEXT(s, i, iter->limit, c);
- if(c<0) {
- return 0xfffd;
- } else if(c<=0xffff) {
+ U8_NEXT_OR_FFFD(s, i, iter->limit, c);
+ if(c<=0xffff) {
return c;
} else {
return U16_LEAD(c);
const uint8_t *s=(const uint8_t *)iter->context;
UChar32 c;
- U8_NEXT(s, iter->start, iter->limit, c);
+ U8_NEXT_OR_FFFD(s, iter->start, iter->limit, c);
if((index=iter->index)>=0) {
iter->index=++index;
if(iter->length<0 && iter->start==iter->limit) {
} else if(iter->start==iter->limit && iter->length>=0) {
iter->index= c<=0xffff ? iter->length : iter->length-1;
}
- if(c<0) {
- return 0xfffd;
- } else if(c<=0xffff) {
+ if(c<=0xffff) {
return c;
} else {
iter->reservedField=c;
const uint8_t *s=(const uint8_t *)iter->context;
UChar32 c;
- U8_PREV(s, 0, iter->start, c);
+ U8_PREV_OR_FFFD(s, 0, iter->start, c);
if((index=iter->index)>0) {
iter->index=index-1;
} else if(iter->start<=1) {
iter->index= c<=0xffff ? iter->start : iter->start+1;
}
- if(c<0) {
- return 0xfffd;
- } else if(c<=0xffff) {
+ if(c<=0xffff) {
return c;
} else {
iter->start+=4; /* back to behind this supplementary code point for consistent state */
} else {
/* verified index>=4 above */
UChar32 c;
- U8_PREV((const uint8_t *)iter->context, 0, index, c);
+ U8_PREV_OR_FFFD((const uint8_t *)iter->context, 0, index, c);
if(c<=0xffff) {
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
} else {
UChar32 c, c2;
c=iter->current(iter);
- if(UTF_IS_SURROGATE(c)) {
- if(UTF_IS_SURROGATE_FIRST(c)) {
+ if(U16_IS_SURROGATE(c)) {
+ if(U16_IS_SURROGATE_LEAD(c)) {
/*
* go to the next code unit
* we know that we are not at the limit because c!=U_SENTINEL
*/
iter->move(iter, 1, UITER_CURRENT);
- if(UTF_IS_SECOND_SURROGATE(c2=iter->current(iter))) {
- c=UTF16_GET_PAIR_VALUE(c, c2);
+ if(U16_IS_TRAIL(c2=iter->current(iter))) {
+ c=U16_GET_SUPPLEMENTARY(c, c2);
}
/* undo index movement */
iter->move(iter, -1, UITER_CURRENT);
} else {
- if(UTF_IS_FIRST_SURROGATE(c2=iter->previous(iter))) {
- c=UTF16_GET_PAIR_VALUE(c2, c);
+ if(U16_IS_LEAD(c2=iter->previous(iter))) {
+ c=U16_GET_SUPPLEMENTARY(c2, c);
}
if(c2>=0) {
/* undo index movement */
UChar32 c, c2;
c=iter->next(iter);
- if(UTF_IS_FIRST_SURROGATE(c)) {
- if(UTF_IS_SECOND_SURROGATE(c2=iter->next(iter))) {
- c=UTF16_GET_PAIR_VALUE(c, c2);
+ if(U16_IS_LEAD(c)) {
+ if(U16_IS_TRAIL(c2=iter->next(iter))) {
+ c=U16_GET_SUPPLEMENTARY(c, c2);
} else if(c2>=0) {
/* unmatched first surrogate, undo index movement */
iter->move(iter, -1, UITER_CURRENT);
UChar32 c, c2;
c=iter->previous(iter);
- if(UTF_IS_SECOND_SURROGATE(c)) {
- if(UTF_IS_FIRST_SURROGATE(c2=iter->previous(iter))) {
- c=UTF16_GET_PAIR_VALUE(c2, c);
+ if(U16_IS_TRAIL(c)) {
+ if(U16_IS_LEAD(c2=iter->previous(iter))) {
+ c=U16_GET_SUPPLEMENTARY(c2, c);
} else if(c2>=0) {
/* unmatched second surrogate, undo index movement */
iter->move(iter, 1, UITER_CURRENT);