]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/common/unisetspan.cpp
ICU-64260.0.1.tar.gz
[apple/icu.git] / icuSources / common / unisetspan.cpp
index 0e43dfc3f7568920fe69b24e2a1fab41a037954a..0a8893472f958b3fbed6f969e735508e097c829b 100644 (file)
@@ -1,12 +1,14 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
 /*
 ******************************************************************************
 *
 /*
 ******************************************************************************
 *
-*   Copyright (C) 2007, International Business Machines
+*   Copyright (C) 2007-2012, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ******************************************************************************
 *   file name:  unisetspan.cpp
 *   Corporation and others.  All Rights Reserved.
 *
 ******************************************************************************
 *   file name:  unisetspan.cpp
-*   encoding:   US-ASCII
+*   encoding:   UTF-8
 *   tab size:   8 (not used)
 *   indentation:4
 *
 *   tab size:   8 (not used)
 *   indentation:4
 *
@@ -17,6 +19,8 @@
 #include "unicode/utypes.h"
 #include "unicode/uniset.h"
 #include "unicode/ustring.h"
 #include "unicode/utypes.h"
 #include "unicode/uniset.h"
 #include "unicode/ustring.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
 #include "cmemory.h"
 #include "uvector.h"
 #include "unisetspan.h"
 #include "cmemory.h"
 #include "uvector.h"
 #include "unisetspan.h"
@@ -498,23 +502,23 @@ spanOneBack(const UnicodeSet &set, const UChar *s, int32_t length) {
 static inline int32_t
 spanOneUTF8(const UnicodeSet &set, const uint8_t *s, int32_t length) {
     UChar32 c=*s;
 static inline int32_t
 spanOneUTF8(const UnicodeSet &set, const uint8_t *s, int32_t length) {
     UChar32 c=*s;
-    if((int8_t)c>=0) {
+    if(U8_IS_SINGLE(c)) {
         return set.contains(c) ? 1 : -1;
     }
         return set.contains(c) ? 1 : -1;
     }
-    // Take advantage of non-ASCII fastpaths in U8_NEXT().
+    // Take advantage of non-ASCII fastpaths in U8_NEXT_OR_FFFD().
     int32_t i=0;
     int32_t i=0;
-    U8_NEXT(s, i, length, c);
+    U8_NEXT_OR_FFFD(s, i, length, c);
     return set.contains(c) ? i : -i;
 }
 
 static inline int32_t
 spanOneBackUTF8(const UnicodeSet &set, const uint8_t *s, int32_t length) {
     UChar32 c=s[length-1];
     return set.contains(c) ? i : -i;
 }
 
 static inline int32_t
 spanOneBackUTF8(const UnicodeSet &set, const uint8_t *s, int32_t length) {
     UChar32 c=s[length-1];
-    if((int8_t)c>=0) {
+    if(U8_IS_SINGLE(c)) {
         return set.contains(c) ? 1 : -1;
     }
     int32_t i=length-1;
         return set.contains(c) ? 1 : -1;
     }
     int32_t i=length-1;
-    c=utf8_prevCharSafeBody(s, 0, &i, c, -1);
+    c=utf8_prevCharSafeBody(s, 0, &i, c, -3);
     length-=i;
     return set.contains(c) ? length : -length;
 }
     length-=i;
     return set.contains(c) ? length : -length;
 }
@@ -1002,11 +1006,9 @@ int32_t UnicodeSetStringSpan::spanUTF8(const uint8_t *s, int32_t length, USetSpa
                     // Try to match if the increment is not listed already.
                     // Match at code point boundaries. (The UTF-8 strings were converted
                     // from UTF-16 and are guaranteed to be well-formed.)
                     // Try to match if the increment is not listed already.
                     // Match at code point boundaries. (The UTF-8 strings were converted
                     // from UTF-16 and are guaranteed to be well-formed.)
-                    if( !U8_IS_TRAIL(s[pos-overlap]) &&
-                        !offsets.containsOffset(inc) &&
-                        matches8(s+pos-overlap, s8, length8)
-                        
-                    ) {
+                    if(!U8_IS_TRAIL(s[pos-overlap]) &&
+                            !offsets.containsOffset(inc) &&
+                            matches8(s+pos-overlap, s8, length8)) {
                         if(inc==rest) {
                             return length;  // Reached the end of the string.
                         }
                         if(inc==rest) {
                             return length;  // Reached the end of the string.
                         }
@@ -1048,11 +1050,10 @@ int32_t UnicodeSetStringSpan::spanUTF8(const uint8_t *s, int32_t length, USetSpa
                     // Try to match if the string is longer or starts earlier.
                     // Match at code point boundaries. (The UTF-8 strings were converted
                     // from UTF-16 and are guaranteed to be well-formed.)
                     // Try to match if the string is longer or starts earlier.
                     // Match at code point boundaries. (The UTF-8 strings were converted
                     // from UTF-16 and are guaranteed to be well-formed.)
-                    if( !U8_IS_TRAIL(s[pos-overlap]) &&
-                        (overlap>maxOverlap || /* redundant overlap==maxOverlap && */ inc>maxInc) &&
-                        matches8(s+pos-overlap, s8, length8)
-                        
-                    ) {
+                    if(!U8_IS_TRAIL(s[pos-overlap]) &&
+                            (overlap>maxOverlap ||
+                                /* redundant overlap==maxOverlap && */ inc>maxInc) &&
+                            matches8(s+pos-overlap, s8, length8)) {
                         maxInc=inc;  // Longest match from earliest start.
                         maxOverlap=overlap;
                         break;
                         maxInc=inc;  // Longest match from earliest start.
                         maxOverlap=overlap;
                         break;