/*
***************************************************************************
-* Copyright (C) 1999-2008, International Business Machines Corporation
+* Copyright (C) 1999-2013, International Business Machines Corporation
* and others. All Rights Reserved.
***************************************************************************
* Date Name Description
U_NAMESPACE_BEGIN
+// Forward Declarations.
+void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status); /**< @internal */
+
class BMPSet;
class ParsePosition;
+class RBBIRuleScanner;
class SymbolTable;
class UnicodeSetStringSpan;
class UVector;
*
* @return TRUE if the set is valid, FALSE otherwise
* @see setToBogus()
- * @draft ICU 4.0
+ * @stable ICU 4.0
*/
inline UBool isBogus(void) const;
* take a UErrorCode for simplicity.
*
* @see isBogus()
- * @draft ICU 4.0
+ * @stable ICU 4.0
*/
void setToBogus();
UnicodeSet(const UnicodeString& pattern,
UErrorCode& status);
+#ifndef U_HIDE_INTERNAL_API
/**
* Constructs a set from the given pattern. See the class
* description for the syntax of the pattern language.
uint32_t options,
const SymbolTable* symbols,
UErrorCode& status);
+#endif /* U_HIDE_INTERNAL_API */
/**
* Constructs a set from the given pattern. See the class description
*/
virtual int32_t hashCode(void) const;
+ /**
+ * Get a UnicodeSet pointer from a USet
+ *
+ * @param uset a USet (the ICU plain C type for UnicodeSet)
+ * @return the corresponding UnicodeSet pointer.
+ *
+ * @stable ICU 4.2
+ */
+ inline static UnicodeSet *fromUSet(USet *uset);
+
+ /**
+ * Get a UnicodeSet pointer from a const USet
+ *
+ * @param uset a const USet (the ICU plain C type for UnicodeSet)
+ * @return the corresponding UnicodeSet pointer.
+ *
+ * @stable ICU 4.2
+ */
+ inline static const UnicodeSet *fromUSet(const USet *uset);
+
+ /**
+ * Produce a USet * pointer for this UnicodeSet.
+ * USet is the plain C type for UnicodeSet
+ *
+ * @return a USet pointer for this UnicodeSet
+ * @stable ICU 4.2
+ */
+ inline USet *toUSet();
+
+
+ /**
+ * Produce a const USet * pointer for this UnicodeSet.
+ * USet is the plain C type for UnicodeSet
+ *
+ * @return a const USet pointer for this UnicodeSet
+ * @stable ICU 4.2
+ */
+ inline const USet * toUSet() const;
+
+
//----------------------------------------------------------------
// Freezable API
//----------------------------------------------------------------
* @return TRUE/FALSE for whether the set has been frozen
* @see freeze
* @see cloneAsThawed
- * @stable ICU 4.0
+ * @stable ICU 3.8
*/
inline UBool isFrozen() const;
* @return this set.
* @see isFrozen
* @see cloneAsThawed
- * @stable ICU 4.0
+ * @stable ICU 3.8
*/
UnicodeFunctor *freeze();
* @return the mutable clone
* @see freeze
* @see isFrozen
- * @stable ICU 4.0
+ * @stable ICU 3.8
*/
UnicodeFunctor *cloneAsThawed() const;
/**
* Modifies this set to represent the set specified by the given
- * pattern, optionally ignoring white space. See the class
- * description for the syntax of the pattern language.
+ * pattern, ignoring Unicode Pattern_White_Space characters.
+ * See the class description for the syntax of the pattern language.
* A frozen set will not be modified.
* @param pattern a string specifying what characters are in the set
* @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
UnicodeSet& applyPattern(const UnicodeString& pattern,
UErrorCode& status);
+#ifndef U_HIDE_INTERNAL_API
/**
* Modifies this set to represent the set specified by the given
- * pattern, optionally ignoring white space. See the class
- * description for the syntax of the pattern language.
+ * pattern, optionally ignoring Unicode Pattern_White_Space characters.
+ * See the class description for the syntax of the pattern language.
* A frozen set will not be modified.
* @param pattern a string specifying what characters are in the set
* @param options bitmask for options to apply to the pattern.
uint32_t options,
const SymbolTable* symbols,
UErrorCode& status);
+#endif /* U_HIDE_INTERNAL_API */
/**
* Parses the given pattern, starting at the given position. The
* @param spanCondition specifies the containment condition
* @return the length of the initial substring according to the spanCondition;
* 0 if the start of the string does not fit the spanCondition
- * @stable ICU 4.0
+ * @stable ICU 3.8
* @see USetSpanCondition
*/
int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
+ /**
+ * Returns the end of the substring of the input string according to the USetSpanCondition.
+ * Same as <code>start+span(s.getBuffer()+start, s.length()-start, spanCondition)</code>
+ * after pinning start to 0<=start<=s.length().
+ * @param s the string
+ * @param start the start index in the string for the span operation
+ * @param spanCondition specifies the containment condition
+ * @return the exclusive end of the substring according to the spanCondition;
+ * the substring s.tempSubStringBetween(start, end) fulfills the spanCondition
+ * @stable ICU 4.4
+ * @see USetSpanCondition
+ */
+ inline int32_t span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const;
+
/**
* Returns the start of the trailing substring of the input string which
* consists only of characters and strings that are contained in this set
* @param spanCondition specifies the containment condition
* @return the start of the trailing substring according to the spanCondition;
* the string length if the end of the string does not fit the spanCondition
- * @stable ICU 4.0
+ * @stable ICU 3.8
* @see USetSpanCondition
*/
int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
+ /**
+ * Returns the start of the substring of the input string according to the USetSpanCondition.
+ * Same as <code>spanBack(s.getBuffer(), limit, spanCondition)</code>
+ * after pinning limit to 0<=end<=s.length().
+ * @param s the string
+ * @param limit the exclusive-end index in the string for the span operation
+ * (use s.length() or INT32_MAX for spanning back from the end of the string)
+ * @param spanCondition specifies the containment condition
+ * @return the start of the substring according to the spanCondition;
+ * the substring s.tempSubStringBetween(start, limit) fulfills the spanCondition
+ * @stable ICU 4.4
+ * @see USetSpanCondition
+ */
+ inline int32_t spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const;
+
/**
* Returns the length of the initial substring of the input string which
* consists only of characters and strings that are contained in this set
* @param spanCondition specifies the containment condition
* @return the length of the initial substring according to the spanCondition;
* 0 if the start of the string does not fit the spanCondition
- * @stable ICU 4.0
+ * @stable ICU 3.8
* @see USetSpanCondition
*/
int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
* @param spanCondition specifies the containment condition
* @return the start of the trailing substring according to the spanCondition;
* the string length if the end of the string does not fit the spanCondition
- * @stable ICU 4.0
+ * @stable ICU 3.8
* @see USetSpanCondition
*/
int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
* @param limit the limit offset for matching, either last+1 in
* the forward direction, or last-1 in the reverse direction,
* where last is the index of the last character to match.
+ * @param s
* @return If part of s matches up to the limit, return |limit -
* start|. If all of s matches before reaching the limit, return
* s.length(). If there is a mismatch between s and text, return
/**
* @return a code point IF the string consists of a single one.
* otherwise returns -1.
- * @param string to test
+ * @param s string to test
*/
static int32_t getSingleCP(const UnicodeString& s);
* Currently only the USET_CASE bit is supported. Any undefined bits
* are ignored.
* @return a reference to this set.
- * @internal
+ * @stable ICU 4.2
*/
UnicodeSet& closeOver(int32_t attribute);
* Remove all strings from this set.
*
* @return a reference to this set.
- * @internal
+ * @stable ICU 4.2
*/
virtual UnicodeSet &removeAllStrings();
virtual UBool matchesIndexValue(uint8_t v) const;
private:
+ friend class RBBIRuleScanner;
//----------------------------------------------------------------
// Implementation: Clone as thawed (see ICU4J Freezable)
// Implementation: Pattern parsing
//----------------------------------------------------------------
+ void applyPatternIgnoreSpace(const UnicodeString& pattern,
+ ParsePosition& pos,
+ const SymbolTable* symbols,
+ UErrorCode& status);
+
void applyPattern(RuleCharacterIterator& chars,
const SymbolTable* symbols,
UnicodeString& rebuiltPat,
uint32_t options,
+ UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
UErrorCode& ec);
//----------------------------------------------------------------
* \\p{foo} \\P{foo} - white space not allowed within "\\p" or "\\P"
* \\N{name} - white space not allowed within "\\N"
*
- * Other than the above restrictions, white space is ignored. Case
- * is ignored except in "\\p" and "\\P" and "\\N". In 'name' leading
+ * Other than the above restrictions, Unicode Pattern_White_Space characters are ignored.
+ * Case is ignored except in "\\p" and "\\P" and "\\N". In 'name' leading
* and trailing space is deleted, and internal runs of whitespace
* are collapsed to a single space.
*
* On return, the position after the last character parsed, that is,
* the locations marked '%'. If the parse fails, ppos is returned
* unchanged.
+ * @param ec status
* @return a reference to this.
*/
UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
UnicodeString& rebuiltPat,
UErrorCode& ec);
+ friend void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status);
static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status);
/**
friend class UnicodeSetIterator;
};
+
+
inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
return !operator==(o);
}
return (UBool)(fFlags & kIsBogus);
}
+inline UnicodeSet *UnicodeSet::fromUSet(USet *uset) {
+ return reinterpret_cast<UnicodeSet *>(uset);
+}
+
+inline const UnicodeSet *UnicodeSet::fromUSet(const USet *uset) {
+ return reinterpret_cast<const UnicodeSet *>(uset);
+}
+
+inline USet *UnicodeSet::toUSet() {
+ return reinterpret_cast<USet *>(this);
+}
+
+inline const USet *UnicodeSet::toUSet() const {
+ return reinterpret_cast<const USet *>(this);
+}
+
+inline int32_t UnicodeSet::span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const {
+ int32_t sLength=s.length();
+ if(start<0) {
+ start=0;
+ } else if(start>sLength) {
+ start=sLength;
+ }
+ return start+span(s.getBuffer()+start, sLength-start, spanCondition);
+}
+
+inline int32_t UnicodeSet::spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const {
+ int32_t sLength=s.length();
+ if(limit<0) {
+ limit=0;
+ } else if(limit>sLength) {
+ limit=sLength;
+ }
+ return spanBack(s.getBuffer(), limit, spanCondition);
+}
+
U_NAMESPACE_END
#endif