+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
-* Copyright (C) 2007, International Business Machines
+* Copyright (C) 2007-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: unisetspan.cpp
-* encoding: US-ASCII
+* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
#include "unicode/utypes.h"
#include "unicode/uniset.h"
#include "unicode/ustring.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
#include "cmemory.h"
#include "uvector.h"
#include "unisetspan.h"
static inline int32_t
spanOneUTF8(const UnicodeSet &set, const uint8_t *s, int32_t length) {
UChar32 c=*s;
- if((int8_t)c>=0) {
+ if(U8_IS_SINGLE(c)) {
return set.contains(c) ? 1 : -1;
}
- // Take advantage of non-ASCII fastpaths in U8_NEXT().
+ // Take advantage of non-ASCII fastpaths in U8_NEXT_OR_FFFD().
int32_t i=0;
- U8_NEXT(s, i, length, c);
+ U8_NEXT_OR_FFFD(s, i, length, c);
return set.contains(c) ? i : -i;
}
static inline int32_t
spanOneBackUTF8(const UnicodeSet &set, const uint8_t *s, int32_t length) {
UChar32 c=s[length-1];
- if((int8_t)c>=0) {
+ if(U8_IS_SINGLE(c)) {
return set.contains(c) ? 1 : -1;
}
int32_t i=length-1;
- c=utf8_prevCharSafeBody(s, 0, &i, c, -1);
+ c=utf8_prevCharSafeBody(s, 0, &i, c, -3);
length-=i;
return set.contains(c) ? length : -length;
}
// Try to match if the increment is not listed already.
// Match at code point boundaries. (The UTF-8 strings were converted
// from UTF-16 and are guaranteed to be well-formed.)
- if( !U8_IS_TRAIL(s[pos-overlap]) &&
- !offsets.containsOffset(inc) &&
- matches8(s+pos-overlap, s8, length8)
-
- ) {
+ if(!U8_IS_TRAIL(s[pos-overlap]) &&
+ !offsets.containsOffset(inc) &&
+ matches8(s+pos-overlap, s8, length8)) {
if(inc==rest) {
return length; // Reached the end of the string.
}
// Try to match if the string is longer or starts earlier.
// Match at code point boundaries. (The UTF-8 strings were converted
// from UTF-16 and are guaranteed to be well-formed.)
- if( !U8_IS_TRAIL(s[pos-overlap]) &&
- (overlap>maxOverlap || /* redundant overlap==maxOverlap && */ inc>maxInc) &&
- matches8(s+pos-overlap, s8, length8)
-
- ) {
+ if(!U8_IS_TRAIL(s[pos-overlap]) &&
+ (overlap>maxOverlap ||
+ /* redundant overlap==maxOverlap && */ inc>maxInc) &&
+ matches8(s+pos-overlap, s8, length8)) {
maxInc=inc; // Longest match from earliest start.
maxOverlap=overlap;
break;