/*
*******************************************************************************
*
-* Copyright (C) 2002-2006, International Business Machines
+* Copyright (C) 2002-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
-* file name: uset.c
+* file name: uset.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
* created on: 2002mar07
* created by: Markus W. Scherer
*
-* The serialized structure, the array of range limits, is
-* the same as in UnicodeSet, except that the HIGH value is not stored.
-*
* There are functions to efficiently serialize a USet into an array of uint16_t
* and functions to use such a serialized form efficiently without
* instantiating a new USet.
#include "unicode/ustring.h"
#include "unicode/parsepos.h"
+U_NAMESPACE_USE
+
+U_CAPI USet* U_EXPORT2
+uset_openEmpty() {
+ return (USet*) new UnicodeSet();
+}
+
U_CAPI USet* U_EXPORT2
uset_open(UChar32 start, UChar32 end) {
return (USet*) new UnicodeSet(start, end);
delete (UnicodeSet*) set;
}
+U_CAPI USet * U_EXPORT2
+uset_clone(const USet *set) {
+ return (USet*) (((UnicodeSet*) set)->UnicodeSet::clone());
+}
+
+U_CAPI UBool U_EXPORT2
+uset_isFrozen(const USet *set) {
+ return ((UnicodeSet*) set)->UnicodeSet::isFrozen();
+}
+
+U_CAPI void U_EXPORT2
+uset_freeze(USet *set) {
+ ((UnicodeSet*) set)->UnicodeSet::freeze();
+}
+
+U_CAPI USet * U_EXPORT2
+uset_cloneAsThawed(const USet *set) {
+ return (USet*) (((UnicodeSet*) set)->UnicodeSet::cloneAsThawed());
+}
+
U_CAPI void U_EXPORT2
uset_set(USet* set,
UChar32 start, UChar32 end) {
U_CAPI void U_EXPORT2
uset_addString(USet* set, const UChar* str, int32_t strLen) {
- // WRONG! Do not alias, it will stay aliased, even after
- // copying. TODO: do we need a copy ctor that unaliases
- //UnicodeString s(strLen==-1, str, strLen);
-
// UnicodeString handles -1 for strLen
- UnicodeString s(str, strLen);
+ UnicodeString s(strLen<0, str, strLen);
((UnicodeSet*) set)->UnicodeSet::add(s);
}
((UnicodeSet*) set)->UnicodeSet::clear();
}
+U_CAPI void U_EXPORT2
+uset_removeAllStrings(USet* set) {
+ ((UnicodeSet*) set)->UnicodeSet::removeAllStrings();
+}
+
U_CAPI UBool U_EXPORT2
uset_isEmpty(const USet* set) {
return ((const UnicodeSet*) set)->UnicodeSet::isEmpty();
return ((const UnicodeSet*) set1)->UnicodeSet::containsSome(* (const UnicodeSet*) set2);
}
+U_CAPI int32_t U_EXPORT2
+uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition) {
+ return ((UnicodeSet*) set)->UnicodeSet::span(s, length, spanCondition);
+}
+
+U_CAPI int32_t U_EXPORT2
+uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition) {
+ return ((UnicodeSet*) set)->UnicodeSet::spanBack(s, length, spanCondition);
+}
+
+U_CAPI int32_t U_EXPORT2
+uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) {
+ return ((UnicodeSet*) set)->UnicodeSet::spanUTF8(s, length, spanCondition);
+}
+
+U_CAPI int32_t U_EXPORT2
+uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) {
+ return ((UnicodeSet*) set)->UnicodeSet::spanBackUTF8(s, length, spanCondition);
+}
+
U_CAPI UBool U_EXPORT2
uset_equals(const USet* set1, const USet* set2) {
return *(const UnicodeSet*)set1 == *(const UnicodeSet*)set2;
// return TRUE;
//}
-U_CAPI USet* U_EXPORT2
-uprv_openRuleWhiteSpaceSet(UErrorCode* ec) {
- if(U_FAILURE(*ec)) {
- return NULL;
- }
- // create a set with the Pattern_White_Space characters,
- // without a pattern for fewer code dependencies
- UnicodeSet *set=new UnicodeSet(9, 0xd);
- set->UnicodeSet::add(0x20).add(0x85).add(0x200e, 0x200f).add(0x2028, 0x2029);
- return (USet *)set;
-}
-
/*
* Serialize a USet into 16-bit units.
* Store BMP code points as themselves with one 16-bit unit each.
if(rangeIndex<bmpLength) {
*pStart=array[rangeIndex++];
if(rangeIndex<bmpLength) {
- *pEnd=array[rangeIndex];
+ *pEnd=array[rangeIndex]-1;
} else if(rangeIndex<length) {
- *pEnd=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
+ *pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1;
} else {
- *pEnd=0x110000;
+ *pEnd=0x10ffff;
}
- --*pEnd;
return TRUE;
} else {
rangeIndex-=bmpLength;
*pStart=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
rangeIndex+=2;
if(rangeIndex<length) {
- *pEnd=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
+ *pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1;
} else {
- *pEnd=0x110000;
+ *pEnd=0x10ffff;
}
- --*pEnd;
return TRUE;
} else {
return FALSE;
// ---
// #define USET_GROW_DELTA 20
//
-// static U_INLINE int32_t
+// static int32_t
// findChar(const UChar32* array, int32_t length, UChar32 c) {
// int32_t i;
//