X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/46f4442e9a5a4f3b98b7c1083586332f6a8a99a4..a01113dcd0f39d5da295ef82785beff9ed86fe38:/icuSources/common/unisetspan.cpp?ds=sidebyside diff --git a/icuSources/common/unisetspan.cpp b/icuSources/common/unisetspan.cpp index 0e43dfc3..0a889347 100644 --- a/icuSources/common/unisetspan.cpp +++ b/icuSources/common/unisetspan.cpp @@ -1,12 +1,14 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** * -* Copyright (C) 2007, International Business Machines +* Copyright (C) 2007-2012, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** * file name: unisetspan.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -17,6 +19,8 @@ #include "unicode/utypes.h" #include "unicode/uniset.h" #include "unicode/ustring.h" +#include "unicode/utf8.h" +#include "unicode/utf16.h" #include "cmemory.h" #include "uvector.h" #include "unisetspan.h" @@ -498,23 +502,23 @@ spanOneBack(const UnicodeSet &set, const UChar *s, int32_t length) { static inline int32_t spanOneUTF8(const UnicodeSet &set, const uint8_t *s, int32_t length) { UChar32 c=*s; - if((int8_t)c>=0) { + if(U8_IS_SINGLE(c)) { return set.contains(c) ? 1 : -1; } - // Take advantage of non-ASCII fastpaths in U8_NEXT(). + // Take advantage of non-ASCII fastpaths in U8_NEXT_OR_FFFD(). int32_t i=0; - U8_NEXT(s, i, length, c); + U8_NEXT_OR_FFFD(s, i, length, c); return set.contains(c) ? i : -i; } static inline int32_t spanOneBackUTF8(const UnicodeSet &set, const uint8_t *s, int32_t length) { UChar32 c=s[length-1]; - if((int8_t)c>=0) { + if(U8_IS_SINGLE(c)) { return set.contains(c) ? 1 : -1; } int32_t i=length-1; - c=utf8_prevCharSafeBody(s, 0, &i, c, -1); + c=utf8_prevCharSafeBody(s, 0, &i, c, -3); length-=i; return set.contains(c) ? length : -length; } @@ -1002,11 +1006,9 @@ int32_t UnicodeSetStringSpan::spanUTF8(const uint8_t *s, int32_t length, USetSpa // Try to match if the increment is not listed already. // Match at code point boundaries. (The UTF-8 strings were converted // from UTF-16 and are guaranteed to be well-formed.) - if( !U8_IS_TRAIL(s[pos-overlap]) && - !offsets.containsOffset(inc) && - matches8(s+pos-overlap, s8, length8) - - ) { + if(!U8_IS_TRAIL(s[pos-overlap]) && + !offsets.containsOffset(inc) && + matches8(s+pos-overlap, s8, length8)) { if(inc==rest) { return length; // Reached the end of the string. } @@ -1048,11 +1050,10 @@ int32_t UnicodeSetStringSpan::spanUTF8(const uint8_t *s, int32_t length, USetSpa // Try to match if the string is longer or starts earlier. // Match at code point boundaries. (The UTF-8 strings were converted // from UTF-16 and are guaranteed to be well-formed.) - if( !U8_IS_TRAIL(s[pos-overlap]) && - (overlap>maxOverlap || /* redundant overlap==maxOverlap && */ inc>maxInc) && - matches8(s+pos-overlap, s8, length8) - - ) { + if(!U8_IS_TRAIL(s[pos-overlap]) && + (overlap>maxOverlap || + /* redundant overlap==maxOverlap && */ inc>maxInc) && + matches8(s+pos-overlap, s8, length8)) { maxInc=inc; // Longest match from earliest start. maxOverlap=overlap; break;