/*
*******************************************************************************
*
-* Copyright (C) 2002-2008, International Business Machines
+* Copyright (C) 2002-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
#include "unicode/utypes.h"
#include "unicode/uchar.h"
+#include "unicode/localpointer.h"
#ifndef UCNV_H
struct USet;
* of each existing element in the set.
* @stable ICU 3.2
*/
- USET_ADD_CASE_MAPPINGS = 4,
-
- /**
- * Enough for any single-code point set
- * @internal
- */
- USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8
+ USET_ADD_CASE_MAPPINGS = 4
};
/**
*
* The functionality is straightforward for sets with only single code points,
* without strings (which is the common case):
- * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE
- * work the same.
+ * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE work the same.
+ * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE are inverses of USET_SPAN_NOT_CONTAINED.
* - span() and spanBack() partition any string the same way when
* alternating between span(USET_SPAN_NOT_CONTAINED) and
* span(either "contained" condition).
* (strings with unpaired surrogates which cannot be converted to UTF-8)
* are ignored.
*
- * @stable ICU 4.0
+ * @stable ICU 3.8
*/
typedef enum USetSpanCondition {
/**
- * Continue a span() while there is no set element at the current position.
+ * Continues a span() while there is no set element at the current position.
+ * Increments by one code point at a time.
* Stops before the first set element (character or string).
* (For code points only, this is like while contains(current)==FALSE).
*
* it returned consists only of characters that are not in the set,
* and none of its strings overlap with the span.
*
- * @stable ICU 4.0
+ * @stable ICU 3.8
*/
USET_SPAN_NOT_CONTAINED = 0,
/**
- * Continue a span() while there is a set element at the current position.
+ * Spans the longest substring that is a concatenation of set elements (characters or strings).
* (For characters only, this is like while contains(current)==TRUE).
*
* When span() returns, the substring between where it started and the position
* it returned consists only of set elements (characters or strings) that are in the set.
*
- * If a set contains strings, then the span will be the longest substring
- * matching any of the possible concatenations of set elements (characters or strings).
- * (There must be a single, non-overlapping concatenation of characters or strings.)
- * This is equivalent to a POSIX regular expression for (OR of each set element)*.
+ * If a set contains strings, then the span will be the longest substring for which there
+ * exists at least one non-overlapping concatenation of set elements (characters or strings).
+ * This is equivalent to a POSIX regular expression for <code>(OR of each set element)*</code>.
+ * (Java/ICU/Perl regex stops at the first match of an OR.)
*
- * @stable ICU 4.0
+ * @stable ICU 3.8
*/
USET_SPAN_CONTAINED = 1,
/**
- * Continue a span() while there is a set element at the current position.
+ * Continues a span() while there is a set element at the current position.
+ * Increments by the longest matching element at each position.
* (For characters only, this is like while contains(current)==TRUE).
*
* When span() returns, the substring between where it started and the position
* Use this span condition together with other longest-match algorithms,
* such as ICU converters (ucnv_getUnicodeSet()).
*
- * @stable ICU 4.0
+ * @stable ICU 3.8
*/
USET_SPAN_SIMPLE = 2,
/**
* One more than the last span condition.
- * @stable ICU 4.0
+ * @stable ICU 3.8
*/
USET_SPAN_CONDITION_COUNT
} USetSpanCondition;
+enum {
+ /**
+ * Capacity of USerializedSet::staticArray.
+ * Enough for any single-code point set.
+ * Also provides padding for nice sizeof(USerializedSet).
+ * @stable ICU 2.4
+ */
+ USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8
+};
+
/**
* A serialized form of a Unicode set. Limited manipulations are
* possible directly on a serialized set. See below.
* USet API
*********************************************************************/
+/**
+ * Create an empty USet object.
+ * Equivalent to uset_open(1, 0).
+ * @return a newly created USet. The caller must call uset_close() on
+ * it when done.
+ * @stable ICU 4.2
+ */
+U_STABLE USet* U_EXPORT2
+uset_openEmpty(void);
+
/**
* Creates a USet object that contains the range of characters
* start..end, inclusive. If <code>start > end</code>
- * then an empty set is created.
+ * then an empty set is created (same as using uset_openEmpty()).
* @param start first character of the range, inclusive
* @param end last character of the range, inclusive
* @return a newly created USet. The caller must call uset_close() on
U_STABLE void U_EXPORT2
uset_close(USet* set);
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUSetPointer
+ * "Smart pointer" class, closes a USet via uset_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUSetPointer, USet, uset_close);
+
+U_NAMESPACE_END
+
+#endif
+
/**
* Returns a copy of this object.
* If this set is frozen, then the clone will be frozen as well.
* @param set the original set
* @return the newly allocated copy of the set
* @see uset_cloneAsThawed
- * @stable ICU 4.0
+ * @stable ICU 3.8
*/
-U_DRAFT USet * U_EXPORT2
+U_STABLE USet * U_EXPORT2
uset_clone(const USet *set);
/**
* @return TRUE/FALSE for whether the set has been frozen
* @see uset_freeze
* @see uset_cloneAsThawed
- * @stable ICU 4.0
+ * @stable ICU 3.8
*/
-U_DRAFT UBool U_EXPORT2
+U_STABLE UBool U_EXPORT2
uset_isFrozen(const USet *set);
/**
* @return the same set, now frozen
* @see uset_isFrozen
* @see uset_cloneAsThawed
- * @stable ICU 4.0
+ * @stable ICU 3.8
*/
-U_DRAFT void U_EXPORT2
+U_STABLE void U_EXPORT2
uset_freeze(USet *set);
/**
* @see uset_freeze
* @see uset_isFrozen
* @see uset_clone
- * @stable ICU 4.0
+ * @stable ICU 3.8
*/
-U_DRAFT USet * U_EXPORT2
+U_STABLE USet * U_EXPORT2
uset_cloneAsThawed(const USet *set);
/**
U_STABLE void U_EXPORT2
uset_clear(USet* set);
+/**
+ * Close this set over the given attribute. For the attribute
+ * USET_CASE, the result is to modify this set so that:
+ *
+ * 1. For each character or string 'a' in this set, all strings or
+ * characters 'b' such that foldCase(a) == foldCase(b) are added
+ * to this set.
+ *
+ * 2. For each string 'e' in the resulting set, if e !=
+ * foldCase(e), 'e' will be removed.
+ *
+ * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}]
+ *
+ * (Here foldCase(x) refers to the operation u_strFoldCase, and a
+ * == b denotes that the contents are the same, not pointer
+ * comparison.)
+ *
+ * A frozen set will not be modified.
+ *
+ * @param set the set
+ *
+ * @param attributes bitmask for attributes to close over.
+ * Currently only the USET_CASE bit is supported. Any undefined bits
+ * are ignored.
+ * @stable ICU 4.2
+ */
+U_STABLE void U_EXPORT2
+uset_closeOver(USet* set, int32_t attributes);
+
+/**
+ * Remove all strings from this set.
+ *
+ * @param set the set
+ * @stable ICU 4.2
+ */
+U_STABLE void U_EXPORT2
+uset_removeAllStrings(USet* set);
+
/**
* Returns TRUE if the given USet contains no characters and no
* strings.
* out of range, return (UChar32)-1. The inverse of this method is
* <code>indexOf()</code>.
* @param set the set
- * @param index an index from 0..size()-1 to obtain the char for
+ * @param charIndex an index from 0..size()-1 to obtain the char for
* @return the character at the given index, or (UChar32)-1.
* @stable ICU 3.2
*/
U_STABLE UChar32 U_EXPORT2
-uset_charAt(const USet* set, int32_t index);
+uset_charAt(const USet* set, int32_t charIndex);
/**
* Returns the number of characters and strings contained in the given
* @param spanCondition specifies the containment condition
* @return the length of the initial substring according to the spanCondition;
* 0 if the start of the string does not fit the spanCondition
- * @stable ICU 4.0
+ * @stable ICU 3.8
* @see USetSpanCondition
*/
-U_DRAFT int32_t U_EXPORT2
+U_STABLE int32_t U_EXPORT2
uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
/**
* @param spanCondition specifies the containment condition
* @return the start of the trailing substring according to the spanCondition;
* the string length if the end of the string does not fit the spanCondition
- * @stable ICU 4.0
+ * @stable ICU 3.8
* @see USetSpanCondition
*/
-U_DRAFT int32_t U_EXPORT2
+U_STABLE int32_t U_EXPORT2
uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
/**
* @param spanCondition specifies the containment condition
* @return the length of the initial substring according to the spanCondition;
* 0 if the start of the string does not fit the spanCondition
- * @stable ICU 4.0
+ * @stable ICU 3.8
* @see USetSpanCondition
*/
-U_DRAFT int32_t U_EXPORT2
+U_STABLE int32_t U_EXPORT2
uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
/**
* @param spanCondition specifies the containment condition
* @return the start of the trailing substring according to the spanCondition;
* the string length if the end of the string does not fit the spanCondition
- * @stable ICU 4.0
+ * @stable ICU 3.8
* @see USetSpanCondition
*/
-U_DRAFT int32_t U_EXPORT2
+U_STABLE int32_t U_EXPORT2
uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
/**