ICU-64260.0.1.tar.gz

[apple/icu.git] / icuSources / common / unisetspan.cpp
diff --git a/icuSources/common/unisetspan.cpp b/icuSources/common/unisetspan.cpp

index 0e43dfc3f7568920fe69b24e2a1fab41a037954a..0a8893472f958b3fbed6f969e735508e097c829b 100644 (file)
--- a/icuSources/common/unisetspan.cpp
+++ b/icuSources/common/unisetspan.cpp
@@ -1,12 +1,14 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
  /*
  ******************************************************************************
  *
  /*
  ******************************************************************************
  *
-*   Copyright (C) 2007, International Business Machines
+*   Copyright (C) 2007-2012, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  ******************************************************************************
  *   file name:  unisetspan.cpp
  *   Corporation and others.  All Rights Reserved.
  *
  ******************************************************************************
  *   file name:  unisetspan.cpp
-*   encoding:   US-ASCII
+*   encoding:   UTF-8
  *   tab size:   8 (not used)
  *   indentation:4
  *
  *   tab size:   8 (not used)
  *   indentation:4
  *
@@ -17,6 +19,8 @@
  #include "unicode/utypes.h"
  #include "unicode/uniset.h"
  #include "unicode/ustring.h"
  #include "unicode/utypes.h"
  #include "unicode/uniset.h"
  #include "unicode/ustring.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
  #include "cmemory.h"
  #include "uvector.h"
  #include "unisetspan.h"
  #include "cmemory.h"
  #include "uvector.h"
  #include "unisetspan.h"
@@ -498,23 +502,23 @@ spanOneBack(const UnicodeSet &set, const UChar *s, int32_t length) {
  static inline int32_t
  spanOneUTF8(const UnicodeSet &set, const uint8_t *s, int32_t length) {
      UChar32 c=*s;
  static inline int32_t
  spanOneUTF8(const UnicodeSet &set, const uint8_t *s, int32_t length) {
      UChar32 c=*s;
-    if((int8_t)c>=0) {
+    if(U8_IS_SINGLE(c)) {
          return set.contains(c) ? 1 : -1;
      }
          return set.contains(c) ? 1 : -1;
      }
-    // Take advantage of non-ASCII fastpaths in U8_NEXT().
+    // Take advantage of non-ASCII fastpaths in U8_NEXT_OR_FFFD().
      int32_t i=0;
      int32_t i=0;
-    U8_NEXT(s, i, length, c);
+    U8_NEXT_OR_FFFD(s, i, length, c);
      return set.contains(c) ? i : -i;
  }
  
  static inline int32_t
  spanOneBackUTF8(const UnicodeSet &set, const uint8_t *s, int32_t length) {
      UChar32 c=s[length-1];
      return set.contains(c) ? i : -i;
  }
  
  static inline int32_t
  spanOneBackUTF8(const UnicodeSet &set, const uint8_t *s, int32_t length) {
      UChar32 c=s[length-1];
-    if((int8_t)c>=0) {
+    if(U8_IS_SINGLE(c)) {
          return set.contains(c) ? 1 : -1;
      }
      int32_t i=length-1;
          return set.contains(c) ? 1 : -1;
      }
      int32_t i=length-1;
-    c=utf8_prevCharSafeBody(s, 0, &i, c, -1);
+    c=utf8_prevCharSafeBody(s, 0, &i, c, -3);
      length-=i;
      return set.contains(c) ? length : -length;
  }
      length-=i;
      return set.contains(c) ? length : -length;
  }
@@ -1002,11 +1006,9 @@ int32_t UnicodeSetStringSpan::spanUTF8(const uint8_t *s, int32_t length, USetSpa
                      // Try to match if the increment is not listed already.
                      // Match at code point boundaries. (The UTF-8 strings were converted
                      // from UTF-16 and are guaranteed to be well-formed.)
                      // Try to match if the increment is not listed already.
                      // Match at code point boundaries. (The UTF-8 strings were converted
                      // from UTF-16 and are guaranteed to be well-formed.)
-                    if( !U8_IS_TRAIL(s[pos-overlap]) &&
-                        !offsets.containsOffset(inc) &&
-                        matches8(s+pos-overlap, s8, length8)
-                        
-                    ) {
+                    if(!U8_IS_TRAIL(s[pos-overlap]) &&
+                            !offsets.containsOffset(inc) &&
+                            matches8(s+pos-overlap, s8, length8)) {
                          if(inc==rest) {
                              return length;  // Reached the end of the string.
                          }
                          if(inc==rest) {
                              return length;  // Reached the end of the string.
                          }
@@ -1048,11 +1050,10 @@ int32_t UnicodeSetStringSpan::spanUTF8(const uint8_t *s, int32_t length, USetSpa
                      // Try to match if the string is longer or starts earlier.
                      // Match at code point boundaries. (The UTF-8 strings were converted
                      // from UTF-16 and are guaranteed to be well-formed.)
                      // Try to match if the string is longer or starts earlier.
                      // Match at code point boundaries. (The UTF-8 strings were converted
                      // from UTF-16 and are guaranteed to be well-formed.)
-                    if( !U8_IS_TRAIL(s[pos-overlap]) &&
-                        (overlap>maxOverlap || /* redundant overlap==maxOverlap && */ inc>maxInc) &&
-                        matches8(s+pos-overlap, s8, length8)
-                        
-                    ) {
+                    if(!U8_IS_TRAIL(s[pos-overlap]) &&
+                            (overlap>maxOverlap ||
+                                /* redundant overlap==maxOverlap && */ inc>maxInc) &&
+                            matches8(s+pos-overlap, s8, length8)) {
                          maxInc=inc;  // Longest match from earliest start.
                          maxOverlap=overlap;
                          break;
                          maxInc=inc;  // Longest match from earliest start.
                          maxOverlap=overlap;
                          break;