ICU-551.51.4.tar.gz

[apple/icu.git] / icuSources / common / uset.cpp
diff --git a/icuSources/common/uset.cpp b/icuSources/common/uset.cpp

index 8cd252213080dfdc5a78cb2d22d7865595b9f576..5648a115d496226bac15b0c97e424f4b5147275b 100644 (file)
--- a/icuSources/common/uset.cpp
+++ b/icuSources/common/uset.cpp
@@ -1,11 +1,11 @@
  /*
  *******************************************************************************
  *
-*   Copyright (C) 2002-2006, International Business Machines
+*   Copyright (C) 2002-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  *******************************************************************************
-*   file name:  uset.c
+*   file name:  uset.cpp
  *   encoding:   US-ASCII
  *   tab size:   8 (not used)
  *   indentation:4
@@ -13,9 +13,6 @@
  *   created on: 2002mar07
  *   created by: Markus W. Scherer
  *
-*   The serialized structure, the array of range limits, is
-*   the same as in UnicodeSet, except that the HIGH value is not stored.
-*
  *   There are functions to efficiently serialize a USet into an array of uint16_t
  *   and functions to use such a serialized form efficiently without
  *   instantiating a new USet.
@@ -29,6 +26,13 @@
  #include "unicode/ustring.h"
  #include "unicode/parsepos.h"
  
+U_NAMESPACE_USE
+
+U_CAPI USet* U_EXPORT2
+uset_openEmpty() {
+    return (USet*) new UnicodeSet();
+}
+
  U_CAPI USet* U_EXPORT2
  uset_open(UChar32 start, UChar32 end) {
      return (USet*) new UnicodeSet(start, end);
@@ -39,6 +43,26 @@ uset_close(USet* set) {
      delete (UnicodeSet*) set;
  }
  
+U_CAPI USet * U_EXPORT2
+uset_clone(const USet *set) {
+    return (USet*) (((UnicodeSet*) set)->UnicodeSet::clone());
+}
+
+U_CAPI UBool U_EXPORT2
+uset_isFrozen(const USet *set) {
+    return ((UnicodeSet*) set)->UnicodeSet::isFrozen();
+}
+
+U_CAPI void U_EXPORT2
+uset_freeze(USet *set) {
+    ((UnicodeSet*) set)->UnicodeSet::freeze();
+}
+
+U_CAPI USet * U_EXPORT2
+uset_cloneAsThawed(const USet *set) {
+    return (USet*) (((UnicodeSet*) set)->UnicodeSet::cloneAsThawed());
+}
+
  U_CAPI void U_EXPORT2
  uset_set(USet* set,
       UChar32 start, UChar32 end) {
@@ -62,12 +86,8 @@ uset_addRange(USet* set, UChar32 start, UChar32 end) {
  
  U_CAPI void U_EXPORT2
  uset_addString(USet* set, const UChar* str, int32_t strLen) {
-    // WRONG! Do not alias, it will stay aliased, even after 
-    // copying. TODO: do we need a copy ctor that unaliases
-    //UnicodeString s(strLen==-1, str, strLen);
-
      // UnicodeString handles -1 for strLen
-    UnicodeString s(str, strLen);
+    UnicodeString s(strLen<0, str, strLen);
      ((UnicodeSet*) set)->UnicodeSet::add(s);
  }
  
@@ -129,6 +149,11 @@ uset_clear(USet* set) {
      ((UnicodeSet*) set)->UnicodeSet::clear();
  }
  
+U_CAPI void U_EXPORT2
+uset_removeAllStrings(USet* set) {
+    ((UnicodeSet*) set)->UnicodeSet::removeAllStrings();
+}
+
  U_CAPI UBool U_EXPORT2
  uset_isEmpty(const USet* set) {
      return ((const UnicodeSet*) set)->UnicodeSet::isEmpty();
@@ -172,6 +197,26 @@ uset_containsSome(const USet* set1, const USet* set2) {
      return ((const UnicodeSet*) set1)->UnicodeSet::containsSome(* (const UnicodeSet*) set2);
  }
  
+U_CAPI int32_t U_EXPORT2
+uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition) {
+    return ((UnicodeSet*) set)->UnicodeSet::span(s, length, spanCondition);
+}
+
+U_CAPI int32_t U_EXPORT2
+uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition) {
+    return ((UnicodeSet*) set)->UnicodeSet::spanBack(s, length, spanCondition);
+}
+
+U_CAPI int32_t U_EXPORT2
+uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) {
+    return ((UnicodeSet*) set)->UnicodeSet::spanUTF8(s, length, spanCondition);
+}
+
+U_CAPI int32_t U_EXPORT2
+uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) {
+    return ((UnicodeSet*) set)->UnicodeSet::spanBackUTF8(s, length, spanCondition);
+}
+
  U_CAPI UBool U_EXPORT2
  uset_equals(const USet* set1, const USet* set2) {
      return *(const UnicodeSet*)set1 == *(const UnicodeSet*)set2;
@@ -265,18 +310,6 @@ uset_getItem(const USet* uset, int32_t itemIndex,
  //    return TRUE;
  //}
  
-U_CAPI USet* U_EXPORT2
-uprv_openRuleWhiteSpaceSet(UErrorCode* ec) {
-    if(U_FAILURE(*ec)) {
-        return NULL;
-    }
-    // create a set with the Pattern_White_Space characters,
-    // without a pattern for fewer code dependencies
-    UnicodeSet *set=new UnicodeSet(9, 0xd);
-    set->UnicodeSet::add(0x20).add(0x85).add(0x200e, 0x200f).add(0x2028, 0x2029);
-    return (USet *)set;
-}
-
  /*
   * Serialize a USet into 16-bit units.
   * Store BMP code points as themselves with one 16-bit unit each.
@@ -456,13 +489,12 @@ uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
      if(rangeIndex<bmpLength) {
          *pStart=array[rangeIndex++];
          if(rangeIndex<bmpLength) {
-            *pEnd=array[rangeIndex];
+            *pEnd=array[rangeIndex]-1;
          } else if(rangeIndex<length) {
-            *pEnd=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
+            *pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1;
          } else {
-            *pEnd=0x110000;
+            *pEnd=0x10ffff;
          }
-        --*pEnd;
          return TRUE;
      } else {
          rangeIndex-=bmpLength;
@@ -473,11 +505,10 @@ uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
              *pStart=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
              rangeIndex+=2;
              if(rangeIndex<length) {
-                *pEnd=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
+                *pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1;
              } else {
-                *pEnd=0x110000;
+                *pEnd=0x10ffff;
              }
-            --*pEnd;
              return TRUE;
          } else {
              return FALSE;
@@ -495,7 +526,7 @@ uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
  // ---
  // #define USET_GROW_DELTA 20
  // 
-// static U_INLINE int32_t
+// static int32_t
  // findChar(const UChar32* array, int32_t length, UChar32 c) {
  //     int32_t i;
  //