X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/374ca955a76ecab1204ca8bfa63ff9238d998416..HEAD:/icuSources/common/uiter.cpp diff --git a/icuSources/common/uiter.cpp b/icuSources/common/uiter.cpp index 6ca97508..b9252d81 100644 --- a/icuSources/common/uiter.cpp +++ b/icuSources/common/uiter.cpp @@ -1,12 +1,14 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * -* Copyright (C) 2002-2004, International Business Machines +* Copyright (C) 2002-2012, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: uiter.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -19,8 +21,13 @@ #include "unicode/chariter.h" #include "unicode/rep.h" #include "unicode/uiter.h" +#include "unicode/utf.h" +#include "unicode/utf8.h" +#include "unicode/utf16.h" #include "cstring.h" +U_NAMESPACE_USE + #define IS_EVEN(n) (((n)&1)==0) #define IS_POINTER_EVEN(p) IS_EVEN((size_t)p) @@ -50,11 +57,12 @@ noopCurrent(UCharIterator * /*iter*/) { static uint32_t U_CALLCONV noopGetState(const UCharIterator * /*iter*/) { - return 0; + return UITER_NO_STATE; } static void U_CALLCONV -noopSetState(UCharIterator * /*iter*/, uint32_t /*state*/, UErrorCode * /*pErrorCode*/) { +noopSetState(UCharIterator * /*iter*/, uint32_t /*state*/, UErrorCode *pErrorCode) { + *pErrorCode=U_UNSUPPORTED_ERROR; } static const UCharIterator noopIterator={ @@ -594,12 +602,8 @@ utf8IteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) { i=index=0; limit=iter->start; /* count up to the UTF-8 index */ while(istart=i; /* just in case setState() did not get us to a code point boundary */ @@ -630,12 +634,8 @@ utf8IteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) { /* count from the beginning to the current index */ while(istart, set the UTF-16 index */ @@ -652,12 +652,8 @@ utf8IteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) { /* count from the current index to the end */ limit=iter->limit; while(ilength=length; } @@ -781,8 +777,8 @@ utf8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) --delta; } while(delta>0 && i=2) { @@ -811,8 +807,8 @@ utf8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) ++delta; } while(delta<0 && i>0) { - U8_PREV(s, 0, i, c); - if(c<0xffff) { + U8_PREV_OR_FFFD(s, 0, i, c); + if(c<=0xffff) { --pos; ++delta; } else if(delta<=-2) { @@ -861,10 +857,8 @@ utf8IteratorCurrent(UCharIterator *iter) { UChar32 c; int32_t i=iter->start; - U8_NEXT(s, i, iter->limit, c); - if(c<0) { - return 0xfffd; - } else if(c<=0xffff) { + U8_NEXT_OR_FFFD(s, i, iter->limit, c); + if(c<=0xffff) { return c; } else { return U16_LEAD(c); @@ -889,7 +883,7 @@ utf8IteratorNext(UCharIterator *iter) { const uint8_t *s=(const uint8_t *)iter->context; UChar32 c; - U8_NEXT(s, iter->start, iter->limit, c); + U8_NEXT_OR_FFFD(s, iter->start, iter->limit, c); if((index=iter->index)>=0) { iter->index=++index; if(iter->length<0 && iter->start==iter->limit) { @@ -898,9 +892,7 @@ utf8IteratorNext(UCharIterator *iter) { } else if(iter->start==iter->limit && iter->length>=0) { iter->index= c<=0xffff ? iter->length : iter->length-1; } - if(c<0) { - return 0xfffd; - } else if(c<=0xffff) { + if(c<=0xffff) { return c; } else { iter->reservedField=c; @@ -927,15 +919,13 @@ utf8IteratorPrevious(UCharIterator *iter) { const uint8_t *s=(const uint8_t *)iter->context; UChar32 c; - U8_PREV(s, 0, iter->start, c); + U8_PREV_OR_FFFD(s, 0, iter->start, c); if((index=iter->index)>0) { iter->index=index-1; } else if(iter->start<=1) { iter->index= c<=0xffff ? iter->start : iter->start+1; } - if(c<0) { - return 0xfffd; - } else if(c<=0xffff) { + if(c<=0xffff) { return c; } else { iter->start+=4; /* back to behind this supplementary code point for consistent state */ @@ -985,7 +975,7 @@ utf8IteratorSetState(UCharIterator *iter, } else { /* verified index>=4 above */ UChar32 c; - U8_PREV((const uint8_t *)iter->context, 0, index, c); + U8_PREV_OR_FFFD((const uint8_t *)iter->context, 0, index, c); if(c<=0xffff) { *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; } else { @@ -1035,22 +1025,22 @@ uiter_current32(UCharIterator *iter) { UChar32 c, c2; c=iter->current(iter); - if(UTF_IS_SURROGATE(c)) { - if(UTF_IS_SURROGATE_FIRST(c)) { + if(U16_IS_SURROGATE(c)) { + if(U16_IS_SURROGATE_LEAD(c)) { /* * go to the next code unit * we know that we are not at the limit because c!=U_SENTINEL */ iter->move(iter, 1, UITER_CURRENT); - if(UTF_IS_SECOND_SURROGATE(c2=iter->current(iter))) { - c=UTF16_GET_PAIR_VALUE(c, c2); + if(U16_IS_TRAIL(c2=iter->current(iter))) { + c=U16_GET_SUPPLEMENTARY(c, c2); } /* undo index movement */ iter->move(iter, -1, UITER_CURRENT); } else { - if(UTF_IS_FIRST_SURROGATE(c2=iter->previous(iter))) { - c=UTF16_GET_PAIR_VALUE(c2, c); + if(U16_IS_LEAD(c2=iter->previous(iter))) { + c=U16_GET_SUPPLEMENTARY(c2, c); } if(c2>=0) { /* undo index movement */ @@ -1066,9 +1056,9 @@ uiter_next32(UCharIterator *iter) { UChar32 c, c2; c=iter->next(iter); - if(UTF_IS_FIRST_SURROGATE(c)) { - if(UTF_IS_SECOND_SURROGATE(c2=iter->next(iter))) { - c=UTF16_GET_PAIR_VALUE(c, c2); + if(U16_IS_LEAD(c)) { + if(U16_IS_TRAIL(c2=iter->next(iter))) { + c=U16_GET_SUPPLEMENTARY(c, c2); } else if(c2>=0) { /* unmatched first surrogate, undo index movement */ iter->move(iter, -1, UITER_CURRENT); @@ -1082,9 +1072,9 @@ uiter_previous32(UCharIterator *iter) { UChar32 c, c2; c=iter->previous(iter); - if(UTF_IS_SECOND_SURROGATE(c)) { - if(UTF_IS_FIRST_SURROGATE(c2=iter->previous(iter))) { - c=UTF16_GET_PAIR_VALUE(c2, c); + if(U16_IS_TRAIL(c)) { + if(U16_IS_LEAD(c2=iter->previous(iter))) { + c=U16_GET_SUPPLEMENTARY(c2, c); } else if(c2>=0) { /* unmatched second surrogate, undo index movement */ iter->move(iter, 1, UITER_CURRENT);