ICU-551.51.4.tar.gz

[apple/icu.git] / icuSources / common / uset.cpp
diff --git a/icuSources/common/uset.cpp b/icuSources/common/uset.cpp

index cd5f323ae15160c896393a2bbafd92f498c5971f..5648a115d496226bac15b0c97e424f4b5147275b 100644 (file)
--- a/icuSources/common/uset.cpp
+++ b/icuSources/common/uset.cpp
@@ -1,11 +1,11 @@
  /*
  *******************************************************************************
  *
-*   Copyright (C) 2002-2004, International Business Machines
+*   Copyright (C) 2002-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  *******************************************************************************
-*   file name:  uset.c
+*   file name:  uset.cpp
  *   encoding:   US-ASCII
  *   tab size:   8 (not used)
  *   indentation:4
@@ -13,9 +13,6 @@
  *   created on: 2002mar07
  *   created by: Markus W. Scherer
  *
-*   The serialized structure, the array of range limits, is
-*   the same as in UnicodeSet, except that the HIGH value is not stored.
-*
  *   There are functions to efficiently serialize a USet into an array of uint16_t
  *   and functions to use such a serialized form efficiently without
  *   instantiating a new USet.
@@ -29,6 +26,13 @@
  #include "unicode/ustring.h"
  #include "unicode/parsepos.h"
  
+U_NAMESPACE_USE
+
+U_CAPI USet* U_EXPORT2
+uset_openEmpty() {
+    return (USet*) new UnicodeSet();
+}
+
  U_CAPI USet* U_EXPORT2
  uset_open(UChar32 start, UChar32 end) {
      return (USet*) new UnicodeSet(start, end);
@@ -39,125 +43,178 @@ uset_close(USet* set) {
      delete (UnicodeSet*) set;
  }
  
+U_CAPI USet * U_EXPORT2
+uset_clone(const USet *set) {
+    return (USet*) (((UnicodeSet*) set)->UnicodeSet::clone());
+}
+
+U_CAPI UBool U_EXPORT2
+uset_isFrozen(const USet *set) {
+    return ((UnicodeSet*) set)->UnicodeSet::isFrozen();
+}
+
+U_CAPI void U_EXPORT2
+uset_freeze(USet *set) {
+    ((UnicodeSet*) set)->UnicodeSet::freeze();
+}
+
+U_CAPI USet * U_EXPORT2
+uset_cloneAsThawed(const USet *set) {
+    return (USet*) (((UnicodeSet*) set)->UnicodeSet::cloneAsThawed());
+}
+
  U_CAPI void U_EXPORT2
  uset_set(USet* set,
       UChar32 start, UChar32 end) {
-    ((UnicodeSet*) set)->set(start, end);
+    ((UnicodeSet*) set)->UnicodeSet::set(start, end);
  }
  
  U_CAPI void U_EXPORT2
  uset_addAll(USet* set, const USet *additionalSet) {
-    ((UnicodeSet*) set)->addAll(*((const UnicodeSet*)additionalSet));
+    ((UnicodeSet*) set)->UnicodeSet::addAll(*((const UnicodeSet*)additionalSet));
  }
  
  U_CAPI void U_EXPORT2
  uset_add(USet* set, UChar32 c) {
-    ((UnicodeSet*) set)->add(c);
+    ((UnicodeSet*) set)->UnicodeSet::add(c);
  }
  
  U_CAPI void U_EXPORT2
  uset_addRange(USet* set, UChar32 start, UChar32 end) {
-    ((UnicodeSet*) set)->add(start, end);    
+    ((UnicodeSet*) set)->UnicodeSet::add(start, end);    
  }
  
  U_CAPI void U_EXPORT2
  uset_addString(USet* set, const UChar* str, int32_t strLen) {
-  // WRONG! Do not alias, it will stay aliased, even after 
-  // copying. TODO: do we need a copy ctor that unaliases
-    //UnicodeString s(strLen==-1, str, strLen);
-  // We promised -1 for zero terminated
-    if(strLen == -1) {
-      strLen = u_strlen(str);
-    }
+    // UnicodeString handles -1 for strLen
+    UnicodeString s(strLen<0, str, strLen);
+    ((UnicodeSet*) set)->UnicodeSet::add(s);
+}
+
+U_CAPI void U_EXPORT2
+uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen) {
+    // UnicodeString handles -1 for strLen
      UnicodeString s(str, strLen);
-    ((UnicodeSet*) set)->add(s);
+    ((UnicodeSet*) set)->UnicodeSet::addAll(s);
  }
  
  U_CAPI void U_EXPORT2
  uset_remove(USet* set, UChar32 c) {
-    ((UnicodeSet*) set)->remove(c);
+    ((UnicodeSet*) set)->UnicodeSet::remove(c);
  }
  
  U_CAPI void U_EXPORT2
  uset_removeRange(USet* set, UChar32 start, UChar32 end) {
-    ((UnicodeSet*) set)->remove(start, end);
+    ((UnicodeSet*) set)->UnicodeSet::remove(start, end);
  }
  
  U_CAPI void U_EXPORT2
  uset_removeString(USet* set, const UChar* str, int32_t strLen) {
      UnicodeString s(strLen==-1, str, strLen);
-    ((UnicodeSet*) set)->remove(s);
+    ((UnicodeSet*) set)->UnicodeSet::remove(s);
  }
  
  U_CAPI void U_EXPORT2
  uset_removeAll(USet* set, const USet* remove) {
-    ((UnicodeSet*) set)->removeAll(*(const UnicodeSet*)remove);
+    ((UnicodeSet*) set)->UnicodeSet::removeAll(*(const UnicodeSet*)remove);
  }
  
  U_CAPI void U_EXPORT2
  uset_retain(USet* set, UChar32 start, UChar32 end) {
-    ((UnicodeSet*) set)->retain(start, end);
+    ((UnicodeSet*) set)->UnicodeSet::retain(start, end);
  }
  
  U_CAPI void U_EXPORT2
  uset_retainAll(USet* set, const USet* retain) {
-    ((UnicodeSet*) set)->retainAll(*(const UnicodeSet*)retain);
+    ((UnicodeSet*) set)->UnicodeSet::retainAll(*(const UnicodeSet*)retain);
  }
  
  U_CAPI void U_EXPORT2
  uset_compact(USet* set) {
-    ((UnicodeSet*) set)->compact();
+    ((UnicodeSet*) set)->UnicodeSet::compact();
  }
  
  U_CAPI void U_EXPORT2
  uset_complement(USet* set) {
-    ((UnicodeSet*) set)->complement();
+    ((UnicodeSet*) set)->UnicodeSet::complement();
  }
  
  U_CAPI void U_EXPORT2
  uset_complementAll(USet* set, const USet* complement) {
-    ((UnicodeSet*) set)->complementAll(*(const UnicodeSet*)complement);
+    ((UnicodeSet*) set)->UnicodeSet::complementAll(*(const UnicodeSet*)complement);
  }
  
  U_CAPI void U_EXPORT2
  uset_clear(USet* set) {
-    ((UnicodeSet*) set)->clear();
+    ((UnicodeSet*) set)->UnicodeSet::clear();
+}
+
+U_CAPI void U_EXPORT2
+uset_removeAllStrings(USet* set) {
+    ((UnicodeSet*) set)->UnicodeSet::removeAllStrings();
  }
  
  U_CAPI UBool U_EXPORT2
  uset_isEmpty(const USet* set) {
-    return ((const UnicodeSet*) set)->isEmpty();
+    return ((const UnicodeSet*) set)->UnicodeSet::isEmpty();
  }
  
  U_CAPI UBool U_EXPORT2
  uset_contains(const USet* set, UChar32 c) {
-    return ((const UnicodeSet*) set)->contains(c);
+    return ((const UnicodeSet*) set)->UnicodeSet::contains(c);
  }
  
  U_CAPI UBool U_EXPORT2
  uset_containsRange(const USet* set, UChar32 start, UChar32 end) {
-    return ((const UnicodeSet*) set)->contains(start, end);
+    return ((const UnicodeSet*) set)->UnicodeSet::contains(start, end);
  }
  
  U_CAPI UBool U_EXPORT2
  uset_containsString(const USet* set, const UChar* str, int32_t strLen) {
      UnicodeString s(strLen==-1, str, strLen);
-    return ((const UnicodeSet*) set)->contains(s);
+    return ((const UnicodeSet*) set)->UnicodeSet::contains(s);
  }
  
  U_CAPI UBool U_EXPORT2
  uset_containsAll(const USet* set1, const USet* set2) {
-    return ((const UnicodeSet*) set1)->containsAll(* (const UnicodeSet*) set2);
+    return ((const UnicodeSet*) set1)->UnicodeSet::containsAll(* (const UnicodeSet*) set2);
+}
+
+U_CAPI UBool U_EXPORT2
+uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen) {
+    // Create a string alias, since nothing is being added to the set.
+    UnicodeString s(strLen==-1, str, strLen);
+    return ((const UnicodeSet*) set)->UnicodeSet::containsAll(s);
  }
  
  U_CAPI UBool U_EXPORT2
  uset_containsNone(const USet* set1, const USet* set2) {
-    return ((const UnicodeSet*) set1)->containsNone(* (const UnicodeSet*) set2);
+    return ((const UnicodeSet*) set1)->UnicodeSet::containsNone(* (const UnicodeSet*) set2);
  }
  
  U_CAPI UBool U_EXPORT2
  uset_containsSome(const USet* set1, const USet* set2) {
-    return ((const UnicodeSet*) set1)->containsSome(* (const UnicodeSet*) set2);
+    return ((const UnicodeSet*) set1)->UnicodeSet::containsSome(* (const UnicodeSet*) set2);
+}
+
+U_CAPI int32_t U_EXPORT2
+uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition) {
+    return ((UnicodeSet*) set)->UnicodeSet::span(s, length, spanCondition);
+}
+
+U_CAPI int32_t U_EXPORT2
+uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition) {
+    return ((UnicodeSet*) set)->UnicodeSet::spanBack(s, length, spanCondition);
+}
+
+U_CAPI int32_t U_EXPORT2
+uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) {
+    return ((UnicodeSet*) set)->UnicodeSet::spanUTF8(s, length, spanCondition);
+}
+
+U_CAPI int32_t U_EXPORT2
+uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) {
+    return ((UnicodeSet*) set)->UnicodeSet::spanBackUTF8(s, length, spanCondition);
  }
  
  U_CAPI UBool U_EXPORT2
@@ -167,17 +224,17 @@ uset_equals(const USet* set1, const USet* set2) {
  
  U_CAPI int32_t U_EXPORT2
  uset_indexOf(const USet* set, UChar32 c) {
-    return ((UnicodeSet*) set)->indexOf(c);
+    return ((UnicodeSet*) set)->UnicodeSet::indexOf(c);
  }
  
  U_CAPI UChar32 U_EXPORT2
  uset_charAt(const USet* set, int32_t index) {
-    return ((UnicodeSet*) set)->charAt(index);
+    return ((UnicodeSet*) set)->UnicodeSet::charAt(index);
  }
  
  U_CAPI int32_t U_EXPORT2
  uset_size(const USet* set) {
-    return ((const UnicodeSet*) set)->size();
+    return ((const UnicodeSet*) set)->UnicodeSet::size();
  }
  
  U_NAMESPACE_BEGIN
@@ -277,7 +334,7 @@ uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode
          return 0;
      }
  
-    return ((const UnicodeSet*) set)->serialize(dest, destCapacity,* ec);
+    return ((const UnicodeSet*) set)->UnicodeSet::serialize(dest, destCapacity,* ec);
  }
  
  U_CAPI UBool U_EXPORT2
@@ -358,19 +415,50 @@ uset_serializedContains(const USerializedSet* set, UChar32 c) {
      array=set->array;
      if(c<=0xffff) {
          /* find c in the BMP part */
-        int32_t i, bmpLength=set->bmpLength;
-        for(i=0; i<bmpLength && (uint16_t)c>=array[i]; ++i) {}
-        return (UBool)(i&1);
+        int32_t lo = 0;
+        int32_t hi = set->bmpLength-1;
+        if (c < array[0]) {
+            hi = 0;
+        } else if (c < array[hi]) {
+            for(;;) {
+                int32_t i = (lo + hi) >> 1;
+                if (i == lo) {
+                    break;  // Done!
+                } else if (c < array[i]) {
+                    hi = i;
+                } else {
+                    lo = i;
+                }
+            }
+        } else {
+            hi += 1;
+        }
+        return (UBool)(hi&1);
      } else {
          /* find c in the supplementary part */
-        int32_t i, length=set->length;
          uint16_t high=(uint16_t)(c>>16), low=(uint16_t)c;
-        for(i=set->bmpLength;
-            i<length && (high>array[i] || (high==array[i] && low>=array[i+1]));
-            i+=2) {}
-
+        int32_t base = set->bmpLength;
+        int32_t lo = 0;
+        int32_t hi = set->length - 2 - base;
+        if (high < array[base] || (high==array[base] && low<array[base+1])) {
+            hi = 0;
+        } else if (high < array[base+hi] || (high==array[base+hi] && low<array[base+hi+1])) {
+            for (;;) {
+                int32_t i = ((lo + hi) >> 1) & ~1;  // Guarantee even result
+                int32_t iabs = i + base;
+                if (i == lo) {
+                    break;  // Done!
+                } else if (high < array[iabs] || (high==array[iabs] && low<array[iabs+1])) {
+                    hi = i;
+                } else {
+                    lo = i;
+                }
+            }
+        } else {
+            hi += 2;
+        }
          /* count pairs of 16-bit units even per BMP and check if the number of pairs is odd */
-        return (UBool)(((i+set->bmpLength)&2)!=0);
+        return (UBool)(((hi+(base<<1))&2)!=0);
      }
  }
  
@@ -401,13 +489,12 @@ uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
      if(rangeIndex<bmpLength) {
          *pStart=array[rangeIndex++];
          if(rangeIndex<bmpLength) {
-            *pEnd=array[rangeIndex];
+            *pEnd=array[rangeIndex]-1;
          } else if(rangeIndex<length) {
-            *pEnd=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
+            *pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1;
          } else {
-            *pEnd=0x110000;
+            *pEnd=0x10ffff;
          }
-        --*pEnd;
          return TRUE;
      } else {
          rangeIndex-=bmpLength;
@@ -418,11 +505,10 @@ uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
              *pStart=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
              rangeIndex+=2;
              if(rangeIndex<length) {
-                *pEnd=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
+                *pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1;
              } else {
-                *pEnd=0x110000;
+                *pEnd=0x10ffff;
              }
-            --*pEnd;
              return TRUE;
          } else {
              return FALSE;
@@ -440,7 +526,7 @@ uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
  // ---
  // #define USET_GROW_DELTA 20
  // 
-// static U_INLINE int32_t
+// static int32_t
  // findChar(const UChar32* array, int32_t length, UChar32 c) {
  //     int32_t i;
  //