]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/common/uiter.cpp
ICU-59173.0.1.tar.gz
[apple/icu.git] / icuSources / common / uiter.cpp
index bec7190c63538ed87d1d76a5e1b09f7a745d044d..b9252d81c2db5cb66011ca96f8c51a364c37a447 100644 (file)
@@ -1,12 +1,14 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2002-2006, International Business Machines
+*   Copyright (C) 2002-2012, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
 *   file name:  uiter.cpp
-*   encoding:   US-ASCII
+*   encoding:   UTF-8
 *   tab size:   8 (not used)
 *   indentation:4
 *
@@ -19,6 +21,9 @@
 #include "unicode/chariter.h"
 #include "unicode/rep.h"
 #include "unicode/uiter.h"
+#include "unicode/utf.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
 #include "cstring.h"
 
 U_NAMESPACE_USE
@@ -597,12 +602,8 @@ utf8IteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
             i=index=0;
             limit=iter->start; /* count up to the UTF-8 index */
             while(i<limit) {
-                U8_NEXT(s, i, limit, c);
-                if(c<=0xffff) {
-                    ++index;
-                } else {
-                    index+=2;
-                }
+                U8_NEXT_OR_FFFD(s, i, limit, c);
+                index+=U16_LENGTH(c);
             }
 
             iter->start=i; /* just in case setState() did not get us to a code point boundary */
@@ -633,12 +634,8 @@ utf8IteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
 
                 /* count from the beginning to the current index */
                 while(i<limit) {
-                    U8_NEXT(s, i, limit, c);
-                    if(c<=0xffff) {
-                        ++length;
-                    } else {
-                        length+=2;
-                    }
+                    U8_NEXT_OR_FFFD(s, i, limit, c);
+                    length+=U16_LENGTH(c);
                 }
 
                 /* assume i==limit==iter->start, set the UTF-16 index */
@@ -655,12 +652,8 @@ utf8IteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
             /* count from the current index to the end */
             limit=iter->limit;
             while(i<limit) {
-                U8_NEXT(s, i, limit, c);
-                if(c<=0xffff) {
-                    ++length;
-                } else {
-                    length+=2;
-                }
+                U8_NEXT_OR_FFFD(s, i, limit, c);
+                length+=U16_LENGTH(c);
             }
             iter->length=length;
         }
@@ -784,8 +777,8 @@ utf8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin)
             --delta;
         }
         while(delta>0 && i<limit) {
-            U8_NEXT(s, i, limit, c);
-            if(c<0xffff) {
+            U8_NEXT_OR_FFFD(s, i, limit, c);
+            if(c<=0xffff) {
                 ++pos;
                 --delta;
             } else if(delta>=2) {
@@ -814,8 +807,8 @@ utf8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin)
             ++delta;
         }
         while(delta<0 && i>0) {
-            U8_PREV(s, 0, i, c);
-            if(c<0xffff) {
+            U8_PREV_OR_FFFD(s, 0, i, c);
+            if(c<=0xffff) {
                 --pos;
                 ++delta;
             } else if(delta<=-2) {
@@ -864,10 +857,8 @@ utf8IteratorCurrent(UCharIterator *iter) {
         UChar32 c;
         int32_t i=iter->start;
 
-        U8_NEXT(s, i, iter->limit, c);
-        if(c<0) {
-            return 0xfffd;
-        } else if(c<=0xffff) {
+        U8_NEXT_OR_FFFD(s, i, iter->limit, c);
+        if(c<=0xffff) {
             return c;
         } else {
             return U16_LEAD(c);
@@ -892,7 +883,7 @@ utf8IteratorNext(UCharIterator *iter) {
         const uint8_t *s=(const uint8_t *)iter->context;
         UChar32 c;
 
-        U8_NEXT(s, iter->start, iter->limit, c);
+        U8_NEXT_OR_FFFD(s, iter->start, iter->limit, c);
         if((index=iter->index)>=0) {
             iter->index=++index;
             if(iter->length<0 && iter->start==iter->limit) {
@@ -901,9 +892,7 @@ utf8IteratorNext(UCharIterator *iter) {
         } else if(iter->start==iter->limit && iter->length>=0) {
             iter->index= c<=0xffff ? iter->length : iter->length-1;
         }
-        if(c<0) {
-            return 0xfffd;
-        } else if(c<=0xffff) {
+        if(c<=0xffff) {
             return c;
         } else {
             iter->reservedField=c;
@@ -930,15 +919,13 @@ utf8IteratorPrevious(UCharIterator *iter) {
         const uint8_t *s=(const uint8_t *)iter->context;
         UChar32 c;
 
-        U8_PREV(s, 0, iter->start, c);
+        U8_PREV_OR_FFFD(s, 0, iter->start, c);
         if((index=iter->index)>0) {
             iter->index=index-1;
         } else if(iter->start<=1) {
             iter->index= c<=0xffff ? iter->start : iter->start+1;
         }
-        if(c<0) {
-            return 0xfffd;
-        } else if(c<=0xffff) {
+        if(c<=0xffff) {
             return c;
         } else {
             iter->start+=4; /* back to behind this supplementary code point for consistent state */
@@ -988,7 +975,7 @@ utf8IteratorSetState(UCharIterator *iter,
             } else {
                 /* verified index>=4 above */
                 UChar32 c;
-                U8_PREV((const uint8_t *)iter->context, 0, index, c);
+                U8_PREV_OR_FFFD((const uint8_t *)iter->context, 0, index, c);
                 if(c<=0xffff) {
                     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
                 } else {
@@ -1038,22 +1025,22 @@ uiter_current32(UCharIterator *iter) {
     UChar32 c, c2;
 
     c=iter->current(iter);
-    if(UTF_IS_SURROGATE(c)) {
-        if(UTF_IS_SURROGATE_FIRST(c)) {
+    if(U16_IS_SURROGATE(c)) {
+        if(U16_IS_SURROGATE_LEAD(c)) {
             /*
              * go to the next code unit
              * we know that we are not at the limit because c!=U_SENTINEL
              */
             iter->move(iter, 1, UITER_CURRENT);
-            if(UTF_IS_SECOND_SURROGATE(c2=iter->current(iter))) {
-                c=UTF16_GET_PAIR_VALUE(c, c2);
+            if(U16_IS_TRAIL(c2=iter->current(iter))) {
+                c=U16_GET_SUPPLEMENTARY(c, c2);
             }
 
             /* undo index movement */
             iter->move(iter, -1, UITER_CURRENT);
         } else {
-            if(UTF_IS_FIRST_SURROGATE(c2=iter->previous(iter))) {
-                c=UTF16_GET_PAIR_VALUE(c2, c);
+            if(U16_IS_LEAD(c2=iter->previous(iter))) {
+                c=U16_GET_SUPPLEMENTARY(c2, c);
             }
             if(c2>=0) {
                 /* undo index movement */
@@ -1069,9 +1056,9 @@ uiter_next32(UCharIterator *iter) {
     UChar32 c, c2;
 
     c=iter->next(iter);
-    if(UTF_IS_FIRST_SURROGATE(c)) {
-        if(UTF_IS_SECOND_SURROGATE(c2=iter->next(iter))) {
-            c=UTF16_GET_PAIR_VALUE(c, c2);
+    if(U16_IS_LEAD(c)) {
+        if(U16_IS_TRAIL(c2=iter->next(iter))) {
+            c=U16_GET_SUPPLEMENTARY(c, c2);
         } else if(c2>=0) {
             /* unmatched first surrogate, undo index movement */
             iter->move(iter, -1, UITER_CURRENT);
@@ -1085,9 +1072,9 @@ uiter_previous32(UCharIterator *iter) {
     UChar32 c, c2;
 
     c=iter->previous(iter);
-    if(UTF_IS_SECOND_SURROGATE(c)) {
-        if(UTF_IS_FIRST_SURROGATE(c2=iter->previous(iter))) {
-            c=UTF16_GET_PAIR_VALUE(c2, c);
+    if(U16_IS_TRAIL(c)) {
+        if(U16_IS_LEAD(c2=iter->previous(iter))) {
+            c=U16_GET_SUPPLEMENTARY(c2, c);
         } else if(c2>=0) {
             /* unmatched second surrogate, undo index movement */
             iter->move(iter, 1, UITER_CURRENT);