X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/46f4442e9a5a4f3b98b7c1083586332f6a8a99a4..2ca993e82fb37b597a3c73ecd1586a139a6579c5:/icuSources/common/unicode/uset.h diff --git a/icuSources/common/unicode/uset.h b/icuSources/common/unicode/uset.h index 2bbfd7a5..eb3c9e6a 100644 --- a/icuSources/common/unicode/uset.h +++ b/icuSources/common/unicode/uset.h @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2002-2008, International Business Machines +* Copyright (C) 2002-2014, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -29,6 +29,7 @@ #include "unicode/utypes.h" #include "unicode/uchar.h" +#include "unicode/localpointer.h" #ifndef UCNV_H struct USet; @@ -88,13 +89,7 @@ enum { * of each existing element in the set. * @stable ICU 3.2 */ - USET_ADD_CASE_MAPPINGS = 4, - - /** - * Enough for any single-code point set - * @internal - */ - USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8 + USET_ADD_CASE_MAPPINGS = 4 }; /** @@ -103,8 +98,8 @@ enum { * * The functionality is straightforward for sets with only single code points, * without strings (which is the common case): - * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE - * work the same. + * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE work the same. + * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE are inverses of USET_SPAN_NOT_CONTAINED. * - span() and spanBack() partition any string the same way when * alternating between span(USET_SPAN_NOT_CONTAINED) and * span(either "contained" condition). @@ -150,11 +145,12 @@ enum { * (strings with unpaired surrogates which cannot be converted to UTF-8) * are ignored. * - * @stable ICU 4.0 + * @stable ICU 3.8 */ typedef enum USetSpanCondition { /** - * Continue a span() while there is no set element at the current position. + * Continues a span() while there is no set element at the current position. + * Increments by one code point at a time. * Stops before the first set element (character or string). * (For code points only, this is like while contains(current)==FALSE). * @@ -162,26 +158,27 @@ typedef enum USetSpanCondition { * it returned consists only of characters that are not in the set, * and none of its strings overlap with the span. * - * @stable ICU 4.0 + * @stable ICU 3.8 */ USET_SPAN_NOT_CONTAINED = 0, /** - * Continue a span() while there is a set element at the current position. + * Spans the longest substring that is a concatenation of set elements (characters or strings). * (For characters only, this is like while contains(current)==TRUE). * * When span() returns, the substring between where it started and the position * it returned consists only of set elements (characters or strings) that are in the set. * - * If a set contains strings, then the span will be the longest substring - * matching any of the possible concatenations of set elements (characters or strings). - * (There must be a single, non-overlapping concatenation of characters or strings.) - * This is equivalent to a POSIX regular expression for (OR of each set element)*. + * If a set contains strings, then the span will be the longest substring for which there + * exists at least one non-overlapping concatenation of set elements (characters or strings). + * This is equivalent to a POSIX regular expression for (OR of each set element)*. + * (Java/ICU/Perl regex stops at the first match of an OR.) * - * @stable ICU 4.0 + * @stable ICU 3.8 */ USET_SPAN_CONTAINED = 1, /** - * Continue a span() while there is a set element at the current position. + * Continues a span() while there is a set element at the current position. + * Increments by the longest matching element at each position. * (For characters only, this is like while contains(current)==TRUE). * * When span() returns, the substring between where it started and the position @@ -196,16 +193,26 @@ typedef enum USetSpanCondition { * Use this span condition together with other longest-match algorithms, * such as ICU converters (ucnv_getUnicodeSet()). * - * @stable ICU 4.0 + * @stable ICU 3.8 */ USET_SPAN_SIMPLE = 2, /** * One more than the last span condition. - * @stable ICU 4.0 + * @stable ICU 3.8 */ USET_SPAN_CONDITION_COUNT } USetSpanCondition; +enum { + /** + * Capacity of USerializedSet::staticArray. + * Enough for any single-code point set. + * Also provides padding for nice sizeof(USerializedSet). + * @stable ICU 2.4 + */ + USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8 +}; + /** * A serialized form of a Unicode set. Limited manipulations are * possible directly on a serialized set. See below. @@ -238,10 +245,20 @@ typedef struct USerializedSet { * USet API *********************************************************************/ +/** + * Create an empty USet object. + * Equivalent to uset_open(1, 0). + * @return a newly created USet. The caller must call uset_close() on + * it when done. + * @stable ICU 4.2 + */ +U_STABLE USet* U_EXPORT2 +uset_openEmpty(void); + /** * Creates a USet object that contains the range of characters * start..end, inclusive. If start > end - * then an empty set is created. + * then an empty set is created (same as using uset_openEmpty()). * @param start first character of the range, inclusive * @param end last character of the range, inclusive * @return a newly created USet. The caller must call uset_close() on @@ -289,6 +306,25 @@ uset_openPatternOptions(const UChar* pattern, int32_t patternLength, U_STABLE void U_EXPORT2 uset_close(USet* set); +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUSetPointer + * "Smart pointer" class, closes a USet via uset_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUSetPointer, USet, uset_close); + +U_NAMESPACE_END + +#endif + /** * Returns a copy of this object. * If this set is frozen, then the clone will be frozen as well. @@ -296,9 +332,9 @@ uset_close(USet* set); * @param set the original set * @return the newly allocated copy of the set * @see uset_cloneAsThawed - * @stable ICU 4.0 + * @stable ICU 3.8 */ -U_DRAFT USet * U_EXPORT2 +U_STABLE USet * U_EXPORT2 uset_clone(const USet *set); /** @@ -308,9 +344,9 @@ uset_clone(const USet *set); * @return TRUE/FALSE for whether the set has been frozen * @see uset_freeze * @see uset_cloneAsThawed - * @stable ICU 4.0 + * @stable ICU 3.8 */ -U_DRAFT UBool U_EXPORT2 +U_STABLE UBool U_EXPORT2 uset_isFrozen(const USet *set); /** @@ -325,9 +361,9 @@ uset_isFrozen(const USet *set); * @return the same set, now frozen * @see uset_isFrozen * @see uset_cloneAsThawed - * @stable ICU 4.0 + * @stable ICU 3.8 */ -U_DRAFT void U_EXPORT2 +U_STABLE void U_EXPORT2 uset_freeze(USet *set); /** @@ -338,9 +374,9 @@ uset_freeze(USet *set); * @see uset_freeze * @see uset_isFrozen * @see uset_clone - * @stable ICU 4.0 + * @stable ICU 3.8 */ -U_DRAFT USet * U_EXPORT2 +U_STABLE USet * U_EXPORT2 uset_cloneAsThawed(const USet *set); /** @@ -673,6 +709,44 @@ uset_complementAll(USet* set, const USet* complement); U_STABLE void U_EXPORT2 uset_clear(USet* set); +/** + * Close this set over the given attribute. For the attribute + * USET_CASE, the result is to modify this set so that: + * + * 1. For each character or string 'a' in this set, all strings or + * characters 'b' such that foldCase(a) == foldCase(b) are added + * to this set. + * + * 2. For each string 'e' in the resulting set, if e != + * foldCase(e), 'e' will be removed. + * + * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}] + * + * (Here foldCase(x) refers to the operation u_strFoldCase, and a + * == b denotes that the contents are the same, not pointer + * comparison.) + * + * A frozen set will not be modified. + * + * @param set the set + * + * @param attributes bitmask for attributes to close over. + * Currently only the USET_CASE bit is supported. Any undefined bits + * are ignored. + * @stable ICU 4.2 + */ +U_STABLE void U_EXPORT2 +uset_closeOver(USet* set, int32_t attributes); + +/** + * Remove all strings from this set. + * + * @param set the set + * @stable ICU 4.2 + */ +U_STABLE void U_EXPORT2 +uset_removeAllStrings(USet* set); + /** * Returns TRUE if the given USet contains no characters and no * strings. @@ -736,12 +810,12 @@ uset_indexOf(const USet* set, UChar32 c); * out of range, return (UChar32)-1. The inverse of this method is * indexOf(). * @param set the set - * @param index an index from 0..size()-1 to obtain the char for + * @param charIndex an index from 0..size()-1 to obtain the char for * @return the character at the given index, or (UChar32)-1. * @stable ICU 3.2 */ U_STABLE UChar32 U_EXPORT2 -uset_charAt(const USet* set, int32_t index); +uset_charAt(const USet* set, int32_t charIndex); /** * Returns the number of characters and strings contained in the given @@ -851,10 +925,10 @@ uset_containsSome(const USet* set1, const USet* set2); * @param spanCondition specifies the containment condition * @return the length of the initial substring according to the spanCondition; * 0 if the start of the string does not fit the spanCondition - * @stable ICU 4.0 + * @stable ICU 3.8 * @see USetSpanCondition */ -U_DRAFT int32_t U_EXPORT2 +U_STABLE int32_t U_EXPORT2 uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition); /** @@ -872,10 +946,10 @@ uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spa * @param spanCondition specifies the containment condition * @return the start of the trailing substring according to the spanCondition; * the string length if the end of the string does not fit the spanCondition - * @stable ICU 4.0 + * @stable ICU 3.8 * @see USetSpanCondition */ -U_DRAFT int32_t U_EXPORT2 +U_STABLE int32_t U_EXPORT2 uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition); /** @@ -894,10 +968,10 @@ uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition * @param spanCondition specifies the containment condition * @return the length of the initial substring according to the spanCondition; * 0 if the start of the string does not fit the spanCondition - * @stable ICU 4.0 + * @stable ICU 3.8 * @see USetSpanCondition */ -U_DRAFT int32_t U_EXPORT2 +U_STABLE int32_t U_EXPORT2 uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition); /** @@ -915,10 +989,10 @@ uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition * @param spanCondition specifies the containment condition * @return the start of the trailing substring according to the spanCondition; * the string length if the end of the string does not fit the spanCondition - * @stable ICU 4.0 + * @stable ICU 3.8 * @see USetSpanCondition */ -U_DRAFT int32_t U_EXPORT2 +U_STABLE int32_t U_EXPORT2 uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition); /**