]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/common/uiter.cpp
ICU-59173.0.1.tar.gz
[apple/icu.git] / icuSources / common / uiter.cpp
index 923ef2e06f14085553443de4a5c95ac9a4feee6f..b9252d81c2db5cb66011ca96f8c51a364c37a447 100644 (file)
@@ -1,12 +1,14 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2002-2003, International Business Machines
+*   Copyright (C) 2002-2012, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
 *   file name:  uiter.cpp
-*   encoding:   US-ASCII
+*   encoding:   UTF-8
 *   tab size:   8 (not used)
 *   indentation:4
 *
 #include "unicode/chariter.h"
 #include "unicode/rep.h"
 #include "unicode/uiter.h"
+#include "unicode/utf.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
 #include "cstring.h"
 
+U_NAMESPACE_USE
+
 #define IS_EVEN(n) (((n)&1)==0)
 #define IS_POINTER_EVEN(p) IS_EVEN((size_t)p)
 
@@ -50,11 +57,12 @@ noopCurrent(UCharIterator * /*iter*/) {
 
 static uint32_t U_CALLCONV
 noopGetState(const UCharIterator * /*iter*/) {
-    return 0;
+    return UITER_NO_STATE;
 }
 
 static void U_CALLCONV
-noopSetState(UCharIterator * /*iter*/, uint32_t /*state*/, UErrorCode * /*pErrorCode*/) {
+noopSetState(UCharIterator * /*iter*/, uint32_t /*state*/, UErrorCode *pErrorCode) {
+    *pErrorCode=U_UNSUPPORTED_ERROR;
 }
 
 static const UCharIterator noopIterator={
@@ -232,13 +240,19 @@ uiter_setString(UCharIterator *iter, const UChar *s, int32_t length) {
  * except that UChars are assembled from byte pairs.
  */
 
+/* internal helper function */
+static inline UChar32
+utf16BEIteratorGet(UCharIterator *iter, int32_t index) {
+    const uint8_t *p=(const uint8_t *)iter->context;
+    return ((UChar)p[2*index]<<8)|(UChar)p[2*index+1];
+}
+
 static UChar32 U_CALLCONV
 utf16BEIteratorCurrent(UCharIterator *iter) {
     int32_t index;
 
     if((index=iter->index)<iter->limit) {
-        const uint8_t *p=(const uint8_t *)iter->context;
-        return ((UChar)p[2*index]<<8)|(UChar)p[2*index+1];
+        return utf16BEIteratorGet(iter, index);
     } else {
         return U_SENTINEL;
     }
@@ -249,9 +263,8 @@ utf16BEIteratorNext(UCharIterator *iter) {
     int32_t index;
 
     if((index=iter->index)<iter->limit) {
-        const uint8_t *p=(const uint8_t *)iter->context;
         iter->index=index+1;
-        return ((UChar)p[2*index]<<8)|(UChar)p[2*index+1];
+        return utf16BEIteratorGet(iter, index);
     } else {
         return U_SENTINEL;
     }
@@ -262,9 +275,8 @@ utf16BEIteratorPrevious(UCharIterator *iter) {
     int32_t index;
 
     if((index=iter->index)>iter->start) {
-        const uint8_t *p=(const uint8_t *)iter->context;
         iter->index=--index;
-        return ((UChar)p[2*index]<<8)|(UChar)p[2*index+1];
+        return utf16BEIteratorGet(iter, index);
     } else {
         return U_SENTINEL;
     }
@@ -590,12 +602,8 @@ utf8IteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
             i=index=0;
             limit=iter->start; /* count up to the UTF-8 index */
             while(i<limit) {
-                U8_NEXT(s, i, limit, c);
-                if(c<=0xffff) {
-                    ++index;
-                } else {
-                    index+=2;
-                }
+                U8_NEXT_OR_FFFD(s, i, limit, c);
+                index+=U16_LENGTH(c);
             }
 
             iter->start=i; /* just in case setState() did not get us to a code point boundary */
@@ -626,12 +634,8 @@ utf8IteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
 
                 /* count from the beginning to the current index */
                 while(i<limit) {
-                    U8_NEXT(s, i, limit, c);
-                    if(c<=0xffff) {
-                        ++length;
-                    } else {
-                        length+=2;
-                    }
+                    U8_NEXT_OR_FFFD(s, i, limit, c);
+                    length+=U16_LENGTH(c);
                 }
 
                 /* assume i==limit==iter->start, set the UTF-16 index */
@@ -648,12 +652,8 @@ utf8IteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
             /* count from the current index to the end */
             limit=iter->limit;
             while(i<limit) {
-                U8_NEXT(s, i, limit, c);
-                if(c<=0xffff) {
-                    ++length;
-                } else {
-                    length+=2;
-                }
+                U8_NEXT_OR_FFFD(s, i, limit, c);
+                length+=U16_LENGTH(c);
             }
             iter->length=length;
         }
@@ -758,7 +758,7 @@ utf8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin)
             iter->index=iter->length; /* may or may not be <0 (unknown) */
             iter->start=iter->limit;
             iter->reservedField=0;
-            return iter->index>=0 ? iter->index : UITER_UNKNOWN_INDEX;
+            return iter->index>=0 ? iter->index : (int32_t)UITER_UNKNOWN_INDEX;
         }
     }
 
@@ -777,8 +777,8 @@ utf8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin)
             --delta;
         }
         while(delta>0 && i<limit) {
-            U8_NEXT(s, i, limit, c);
-            if(c<0xffff) {
+            U8_NEXT_OR_FFFD(s, i, limit, c);
+            if(c<=0xffff) {
                 ++pos;
                 --delta;
             } else if(delta>=2) {
@@ -807,8 +807,8 @@ utf8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin)
             ++delta;
         }
         while(delta<0 && i>0) {
-            U8_PREV(s, 0, i, c);
-            if(c<0xffff) {
+            U8_PREV_OR_FFFD(s, 0, i, c);
+            if(c<=0xffff) {
                 --pos;
                 ++delta;
             } else if(delta<=-2) {
@@ -840,7 +840,7 @@ utf8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin)
 
 static UBool U_CALLCONV
 utf8IteratorHasNext(UCharIterator *iter) {
-    return iter->reservedField!=0 || iter->start<iter->limit;
+    return iter->start<iter->limit || iter->reservedField!=0;
 }
 
 static UBool U_CALLCONV
@@ -857,10 +857,8 @@ utf8IteratorCurrent(UCharIterator *iter) {
         UChar32 c;
         int32_t i=iter->start;
 
-        U8_NEXT(s, i, iter->limit, c);
-        if(c<0) {
-            return 0xfffd;
-        } else if(c<=0xffff) {
+        U8_NEXT_OR_FFFD(s, i, iter->limit, c);
+        if(c<=0xffff) {
             return c;
         } else {
             return U16_LEAD(c);
@@ -885,7 +883,7 @@ utf8IteratorNext(UCharIterator *iter) {
         const uint8_t *s=(const uint8_t *)iter->context;
         UChar32 c;
 
-        U8_NEXT(s, iter->start, iter->limit, c);
+        U8_NEXT_OR_FFFD(s, iter->start, iter->limit, c);
         if((index=iter->index)>=0) {
             iter->index=++index;
             if(iter->length<0 && iter->start==iter->limit) {
@@ -894,9 +892,7 @@ utf8IteratorNext(UCharIterator *iter) {
         } else if(iter->start==iter->limit && iter->length>=0) {
             iter->index= c<=0xffff ? iter->length : iter->length-1;
         }
-        if(c<0) {
-            return 0xfffd;
-        } else if(c<=0xffff) {
+        if(c<=0xffff) {
             return c;
         } else {
             iter->reservedField=c;
@@ -923,15 +919,13 @@ utf8IteratorPrevious(UCharIterator *iter) {
         const uint8_t *s=(const uint8_t *)iter->context;
         UChar32 c;
 
-        U8_PREV(s, 0, iter->start, c);
+        U8_PREV_OR_FFFD(s, 0, iter->start, c);
         if((index=iter->index)>0) {
             iter->index=index-1;
         } else if(iter->start<=1) {
             iter->index= c<=0xffff ? iter->start : iter->start+1;
         }
-        if(c<0) {
-            return 0xfffd;
-        } else if(c<=0xffff) {
+        if(c<=0xffff) {
             return c;
         } else {
             iter->start+=4; /* back to behind this supplementary code point for consistent state */
@@ -981,7 +975,7 @@ utf8IteratorSetState(UCharIterator *iter,
             } else {
                 /* verified index>=4 above */
                 UChar32 c;
-                U8_PREV((const uint8_t *)iter->context, 0, index, c);
+                U8_PREV_OR_FFFD((const uint8_t *)iter->context, 0, index, c);
                 if(c<=0xffff) {
                     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
                 } else {
@@ -1015,7 +1009,7 @@ uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length) {
             if(length>=0) {
                 iter->limit=length;
             } else {
-                iter->limit=uprv_strlen(s);
+                iter->limit=(int32_t)uprv_strlen(s);
             }
             iter->length= iter->limit<=1 ? iter->limit : -1;
         } else {
@@ -1031,22 +1025,22 @@ uiter_current32(UCharIterator *iter) {
     UChar32 c, c2;
 
     c=iter->current(iter);
-    if(UTF_IS_SURROGATE(c)) {
-        if(UTF_IS_SURROGATE_FIRST(c)) {
+    if(U16_IS_SURROGATE(c)) {
+        if(U16_IS_SURROGATE_LEAD(c)) {
             /*
              * go to the next code unit
              * we know that we are not at the limit because c!=U_SENTINEL
              */
             iter->move(iter, 1, UITER_CURRENT);
-            if(UTF_IS_SECOND_SURROGATE(c2=iter->current(iter))) {
-                c=UTF16_GET_PAIR_VALUE(c, c2);
+            if(U16_IS_TRAIL(c2=iter->current(iter))) {
+                c=U16_GET_SUPPLEMENTARY(c, c2);
             }
 
             /* undo index movement */
             iter->move(iter, -1, UITER_CURRENT);
         } else {
-            if(UTF_IS_FIRST_SURROGATE(c2=iter->previous(iter))) {
-                c=UTF16_GET_PAIR_VALUE(c2, c);
+            if(U16_IS_LEAD(c2=iter->previous(iter))) {
+                c=U16_GET_SUPPLEMENTARY(c2, c);
             }
             if(c2>=0) {
                 /* undo index movement */
@@ -1062,9 +1056,9 @@ uiter_next32(UCharIterator *iter) {
     UChar32 c, c2;
 
     c=iter->next(iter);
-    if(UTF_IS_FIRST_SURROGATE(c)) {
-        if(UTF_IS_SECOND_SURROGATE(c2=iter->next(iter))) {
-            c=UTF16_GET_PAIR_VALUE(c, c2);
+    if(U16_IS_LEAD(c)) {
+        if(U16_IS_TRAIL(c2=iter->next(iter))) {
+            c=U16_GET_SUPPLEMENTARY(c, c2);
         } else if(c2>=0) {
             /* unmatched first surrogate, undo index movement */
             iter->move(iter, -1, UITER_CURRENT);
@@ -1078,9 +1072,9 @@ uiter_previous32(UCharIterator *iter) {
     UChar32 c, c2;
 
     c=iter->previous(iter);
-    if(UTF_IS_SECOND_SURROGATE(c)) {
-        if(UTF_IS_FIRST_SURROGATE(c2=iter->previous(iter))) {
-            c=UTF16_GET_PAIR_VALUE(c2, c);
+    if(U16_IS_TRAIL(c)) {
+        if(U16_IS_LEAD(c2=iter->previous(iter))) {
+            c=U16_GET_SUPPLEMENTARY(c2, c);
         } else if(c2>=0) {
             /* unmatched second surrogate, undo index movement */
             iter->move(iter, 1, UITER_CURRENT);