/*
*******************************************************************************
*
-* Copyright (C) 2002-2004, International Business Machines
+* Copyright (C) 2002-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
-* file name: uset.c
+* file name: uset.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
* created on: 2002mar07
* created by: Markus W. Scherer
*
-* The serialized structure, the array of range limits, is
-* the same as in UnicodeSet, except that the HIGH value is not stored.
-*
* There are functions to efficiently serialize a USet into an array of uint16_t
* and functions to use such a serialized form efficiently without
* instantiating a new USet.
#include "unicode/ustring.h"
#include "unicode/parsepos.h"
+U_NAMESPACE_USE
+
+U_CAPI USet* U_EXPORT2
+uset_openEmpty() {
+ return (USet*) new UnicodeSet();
+}
+
U_CAPI USet* U_EXPORT2
uset_open(UChar32 start, UChar32 end) {
return (USet*) new UnicodeSet(start, end);
delete (UnicodeSet*) set;
}
+U_CAPI USet * U_EXPORT2
+uset_clone(const USet *set) {
+ return (USet*) (((UnicodeSet*) set)->UnicodeSet::clone());
+}
+
+U_CAPI UBool U_EXPORT2
+uset_isFrozen(const USet *set) {
+ return ((UnicodeSet*) set)->UnicodeSet::isFrozen();
+}
+
+U_CAPI void U_EXPORT2
+uset_freeze(USet *set) {
+ ((UnicodeSet*) set)->UnicodeSet::freeze();
+}
+
+U_CAPI USet * U_EXPORT2
+uset_cloneAsThawed(const USet *set) {
+ return (USet*) (((UnicodeSet*) set)->UnicodeSet::cloneAsThawed());
+}
+
U_CAPI void U_EXPORT2
uset_set(USet* set,
UChar32 start, UChar32 end) {
- ((UnicodeSet*) set)->set(start, end);
+ ((UnicodeSet*) set)->UnicodeSet::set(start, end);
}
U_CAPI void U_EXPORT2
uset_addAll(USet* set, const USet *additionalSet) {
- ((UnicodeSet*) set)->addAll(*((const UnicodeSet*)additionalSet));
+ ((UnicodeSet*) set)->UnicodeSet::addAll(*((const UnicodeSet*)additionalSet));
}
U_CAPI void U_EXPORT2
uset_add(USet* set, UChar32 c) {
- ((UnicodeSet*) set)->add(c);
+ ((UnicodeSet*) set)->UnicodeSet::add(c);
}
U_CAPI void U_EXPORT2
uset_addRange(USet* set, UChar32 start, UChar32 end) {
- ((UnicodeSet*) set)->add(start, end);
+ ((UnicodeSet*) set)->UnicodeSet::add(start, end);
}
U_CAPI void U_EXPORT2
uset_addString(USet* set, const UChar* str, int32_t strLen) {
- // WRONG! Do not alias, it will stay aliased, even after
- // copying. TODO: do we need a copy ctor that unaliases
- //UnicodeString s(strLen==-1, str, strLen);
- // We promised -1 for zero terminated
- if(strLen == -1) {
- strLen = u_strlen(str);
- }
+ // UnicodeString handles -1 for strLen
+ UnicodeString s(strLen<0, str, strLen);
+ ((UnicodeSet*) set)->UnicodeSet::add(s);
+}
+
+U_CAPI void U_EXPORT2
+uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen) {
+ // UnicodeString handles -1 for strLen
UnicodeString s(str, strLen);
- ((UnicodeSet*) set)->add(s);
+ ((UnicodeSet*) set)->UnicodeSet::addAll(s);
}
U_CAPI void U_EXPORT2
uset_remove(USet* set, UChar32 c) {
- ((UnicodeSet*) set)->remove(c);
+ ((UnicodeSet*) set)->UnicodeSet::remove(c);
}
U_CAPI void U_EXPORT2
uset_removeRange(USet* set, UChar32 start, UChar32 end) {
- ((UnicodeSet*) set)->remove(start, end);
+ ((UnicodeSet*) set)->UnicodeSet::remove(start, end);
}
U_CAPI void U_EXPORT2
uset_removeString(USet* set, const UChar* str, int32_t strLen) {
UnicodeString s(strLen==-1, str, strLen);
- ((UnicodeSet*) set)->remove(s);
+ ((UnicodeSet*) set)->UnicodeSet::remove(s);
}
U_CAPI void U_EXPORT2
uset_removeAll(USet* set, const USet* remove) {
- ((UnicodeSet*) set)->removeAll(*(const UnicodeSet*)remove);
+ ((UnicodeSet*) set)->UnicodeSet::removeAll(*(const UnicodeSet*)remove);
}
U_CAPI void U_EXPORT2
uset_retain(USet* set, UChar32 start, UChar32 end) {
- ((UnicodeSet*) set)->retain(start, end);
+ ((UnicodeSet*) set)->UnicodeSet::retain(start, end);
}
U_CAPI void U_EXPORT2
uset_retainAll(USet* set, const USet* retain) {
- ((UnicodeSet*) set)->retainAll(*(const UnicodeSet*)retain);
+ ((UnicodeSet*) set)->UnicodeSet::retainAll(*(const UnicodeSet*)retain);
}
U_CAPI void U_EXPORT2
uset_compact(USet* set) {
- ((UnicodeSet*) set)->compact();
+ ((UnicodeSet*) set)->UnicodeSet::compact();
}
U_CAPI void U_EXPORT2
uset_complement(USet* set) {
- ((UnicodeSet*) set)->complement();
+ ((UnicodeSet*) set)->UnicodeSet::complement();
}
U_CAPI void U_EXPORT2
uset_complementAll(USet* set, const USet* complement) {
- ((UnicodeSet*) set)->complementAll(*(const UnicodeSet*)complement);
+ ((UnicodeSet*) set)->UnicodeSet::complementAll(*(const UnicodeSet*)complement);
}
U_CAPI void U_EXPORT2
uset_clear(USet* set) {
- ((UnicodeSet*) set)->clear();
+ ((UnicodeSet*) set)->UnicodeSet::clear();
+}
+
+U_CAPI void U_EXPORT2
+uset_removeAllStrings(USet* set) {
+ ((UnicodeSet*) set)->UnicodeSet::removeAllStrings();
}
U_CAPI UBool U_EXPORT2
uset_isEmpty(const USet* set) {
- return ((const UnicodeSet*) set)->isEmpty();
+ return ((const UnicodeSet*) set)->UnicodeSet::isEmpty();
}
U_CAPI UBool U_EXPORT2
uset_contains(const USet* set, UChar32 c) {
- return ((const UnicodeSet*) set)->contains(c);
+ return ((const UnicodeSet*) set)->UnicodeSet::contains(c);
}
U_CAPI UBool U_EXPORT2
uset_containsRange(const USet* set, UChar32 start, UChar32 end) {
- return ((const UnicodeSet*) set)->contains(start, end);
+ return ((const UnicodeSet*) set)->UnicodeSet::contains(start, end);
}
U_CAPI UBool U_EXPORT2
uset_containsString(const USet* set, const UChar* str, int32_t strLen) {
UnicodeString s(strLen==-1, str, strLen);
- return ((const UnicodeSet*) set)->contains(s);
+ return ((const UnicodeSet*) set)->UnicodeSet::contains(s);
}
U_CAPI UBool U_EXPORT2
uset_containsAll(const USet* set1, const USet* set2) {
- return ((const UnicodeSet*) set1)->containsAll(* (const UnicodeSet*) set2);
+ return ((const UnicodeSet*) set1)->UnicodeSet::containsAll(* (const UnicodeSet*) set2);
+}
+
+U_CAPI UBool U_EXPORT2
+uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen) {
+ // Create a string alias, since nothing is being added to the set.
+ UnicodeString s(strLen==-1, str, strLen);
+ return ((const UnicodeSet*) set)->UnicodeSet::containsAll(s);
}
U_CAPI UBool U_EXPORT2
uset_containsNone(const USet* set1, const USet* set2) {
- return ((const UnicodeSet*) set1)->containsNone(* (const UnicodeSet*) set2);
+ return ((const UnicodeSet*) set1)->UnicodeSet::containsNone(* (const UnicodeSet*) set2);
}
U_CAPI UBool U_EXPORT2
uset_containsSome(const USet* set1, const USet* set2) {
- return ((const UnicodeSet*) set1)->containsSome(* (const UnicodeSet*) set2);
+ return ((const UnicodeSet*) set1)->UnicodeSet::containsSome(* (const UnicodeSet*) set2);
+}
+
+U_CAPI int32_t U_EXPORT2
+uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition) {
+ return ((UnicodeSet*) set)->UnicodeSet::span(s, length, spanCondition);
+}
+
+U_CAPI int32_t U_EXPORT2
+uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition) {
+ return ((UnicodeSet*) set)->UnicodeSet::spanBack(s, length, spanCondition);
+}
+
+U_CAPI int32_t U_EXPORT2
+uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) {
+ return ((UnicodeSet*) set)->UnicodeSet::spanUTF8(s, length, spanCondition);
+}
+
+U_CAPI int32_t U_EXPORT2
+uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) {
+ return ((UnicodeSet*) set)->UnicodeSet::spanBackUTF8(s, length, spanCondition);
}
U_CAPI UBool U_EXPORT2
U_CAPI int32_t U_EXPORT2
uset_indexOf(const USet* set, UChar32 c) {
- return ((UnicodeSet*) set)->indexOf(c);
+ return ((UnicodeSet*) set)->UnicodeSet::indexOf(c);
}
U_CAPI UChar32 U_EXPORT2
uset_charAt(const USet* set, int32_t index) {
- return ((UnicodeSet*) set)->charAt(index);
+ return ((UnicodeSet*) set)->UnicodeSet::charAt(index);
}
U_CAPI int32_t U_EXPORT2
uset_size(const USet* set) {
- return ((const UnicodeSet*) set)->size();
+ return ((const UnicodeSet*) set)->UnicodeSet::size();
}
U_NAMESPACE_BEGIN
return 0;
}
- return ((const UnicodeSet*) set)->serialize(dest, destCapacity,* ec);
+ return ((const UnicodeSet*) set)->UnicodeSet::serialize(dest, destCapacity,* ec);
}
U_CAPI UBool U_EXPORT2
array=set->array;
if(c<=0xffff) {
/* find c in the BMP part */
- int32_t i, bmpLength=set->bmpLength;
- for(i=0; i<bmpLength && (uint16_t)c>=array[i]; ++i) {}
- return (UBool)(i&1);
+ int32_t lo = 0;
+ int32_t hi = set->bmpLength-1;
+ if (c < array[0]) {
+ hi = 0;
+ } else if (c < array[hi]) {
+ for(;;) {
+ int32_t i = (lo + hi) >> 1;
+ if (i == lo) {
+ break; // Done!
+ } else if (c < array[i]) {
+ hi = i;
+ } else {
+ lo = i;
+ }
+ }
+ } else {
+ hi += 1;
+ }
+ return (UBool)(hi&1);
} else {
/* find c in the supplementary part */
- int32_t i, length=set->length;
uint16_t high=(uint16_t)(c>>16), low=(uint16_t)c;
- for(i=set->bmpLength;
- i<length && (high>array[i] || (high==array[i] && low>=array[i+1]));
- i+=2) {}
-
+ int32_t base = set->bmpLength;
+ int32_t lo = 0;
+ int32_t hi = set->length - 2 - base;
+ if (high < array[base] || (high==array[base] && low<array[base+1])) {
+ hi = 0;
+ } else if (high < array[base+hi] || (high==array[base+hi] && low<array[base+hi+1])) {
+ for (;;) {
+ int32_t i = ((lo + hi) >> 1) & ~1; // Guarantee even result
+ int32_t iabs = i + base;
+ if (i == lo) {
+ break; // Done!
+ } else if (high < array[iabs] || (high==array[iabs] && low<array[iabs+1])) {
+ hi = i;
+ } else {
+ lo = i;
+ }
+ }
+ } else {
+ hi += 2;
+ }
/* count pairs of 16-bit units even per BMP and check if the number of pairs is odd */
- return (UBool)(((i+set->bmpLength)&2)!=0);
+ return (UBool)(((hi+(base<<1))&2)!=0);
}
}
if(rangeIndex<bmpLength) {
*pStart=array[rangeIndex++];
if(rangeIndex<bmpLength) {
- *pEnd=array[rangeIndex];
+ *pEnd=array[rangeIndex]-1;
} else if(rangeIndex<length) {
- *pEnd=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
+ *pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1;
} else {
- *pEnd=0x110000;
+ *pEnd=0x10ffff;
}
- --*pEnd;
return TRUE;
} else {
rangeIndex-=bmpLength;
*pStart=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
rangeIndex+=2;
if(rangeIndex<length) {
- *pEnd=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
+ *pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1;
} else {
- *pEnd=0x110000;
+ *pEnd=0x10ffff;
}
- --*pEnd;
return TRUE;
} else {
return FALSE;
// ---
// #define USET_GROW_DELTA 20
//
-// static U_INLINE int32_t
+// static int32_t
// findChar(const UChar32* array, int32_t length, UChar32 c) {
// int32_t i;
//