X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/729e4ab9bc6618bc3d8a898e575df7f4019e29ca..f59164e3d128c7675a4d3934206346a3384e53a5:/icuSources/common/unicode/brkiter.h diff --git a/icuSources/common/unicode/brkiter.h b/icuSources/common/unicode/brkiter.h index bdd3cc70..7296ac96 100644 --- a/icuSources/common/unicode/brkiter.h +++ b/icuSources/common/unicode/brkiter.h @@ -1,6 +1,6 @@ /* ******************************************************************************** -* Copyright (C) 1997-2010, International Business Machines +* Copyright (C) 1997-2016, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************** * @@ -182,6 +182,11 @@ public: * Utext that was passed as a parameter, but that the underlying text itself * must not be altered while being referenced by the break iterator. * + * All index positions returned by break iterator functions are + * native indices from the UText. For example, when breaking UTF-8 + * encoded text, the break positions returned by next(), previous(), etc. + * will be UTF-8 string indices, not UTF-16 positions. + * * @param text The UText used to change the text. * @param status receives any error codes. * @stable ICU 3.4 @@ -208,19 +213,21 @@ public: }; /** - * Return the index of the first character in the text being scanned. + * Sets the current iteration position to the beginning of the text, position zero. + * @return The offset of the beginning of the text, zero. * @stable ICU 2.0 */ virtual int32_t first(void) = 0; /** - * Return the index immediately BEYOND the last character in the text being scanned. + * Set the iterator position to the index immediately BEYOND the last character in the text being scanned. + * @return The index immediately BEYOND the last character in the text being scanned. * @stable ICU 2.0 */ virtual int32_t last(void) = 0; /** - * Return the boundary preceding the current boundary. + * Set the iterator position to the boundary preceding the current boundary. * @return The character index of the previous text boundary or DONE if all * boundaries have been returned. * @stable ICU 2.0 @@ -228,7 +235,7 @@ public: virtual int32_t previous(void) = 0; /** - * Return the boundary following the current boundary. + * Advance the iterator to the boundary following the current boundary. * @return The character index of the next text boundary or DONE if all * boundaries have been returned. * @stable ICU 2.0 @@ -243,7 +250,7 @@ public: virtual int32_t current(void) const = 0; /** - * Return the first boundary following the specified offset. + * Advance the iterator to the first boundary following the specified offset. * The value returned is always greater than the offset or * the value BreakIterator.DONE * @param offset the offset to begin scanning. @@ -253,7 +260,7 @@ public: virtual int32_t following(int32_t offset) = 0; /** - * Return the first boundary preceding the specified offset. + * Set the iterator position to the first boundary preceding the specified offset. * The value returned is always smaller than the offset or * the value BreakIterator.DONE * @param offset the offset to begin scanning. @@ -273,16 +280,62 @@ public: virtual UBool isBoundary(int32_t offset) = 0; /** - * Return the nth boundary from the current boundary - * @param n which boundary to return. A value of 0 + * Set the iterator position to the nth boundary from the current boundary + * @param n the number of boundaries to move by. A value of 0 * does nothing. Negative values move to previous boundaries * and positive values move to later boundaries. - * @return The index of the nth boundary from the current position, or + * @return The new iterator position, or * DONE if there are fewer than |n| boundaries in the specfied direction. * @stable ICU 2.0 */ virtual int32_t next(int32_t n) = 0; + /** + * For RuleBasedBreakIterators, return the status tag from the + * break rule that determined the most recently + * returned break position. + *

+ * For break iterator types that do not support a rule status, + * a default value of 0 is returned. + *

+ * @return the status from the break rule that determined the most recently + * returned break position. + * @see RuleBaseBreakIterator::getRuleStatus() + * @see UWordBreak + * @stable ICU 52 + */ + virtual int32_t getRuleStatus() const; + + /** + * For RuleBasedBreakIterators, get the status (tag) values from the break rule(s) + * that determined the most recently returned break position. + *

+ * For break iterator types that do not support rule status, + * no values are returned. + *

+ * The returned status value(s) are stored into an array provided by the caller. + * The values are stored in sorted (ascending) order. + * If the capacity of the output array is insufficient to hold the data, + * the output will be truncated to the available length, and a + * U_BUFFER_OVERFLOW_ERROR will be signaled. + *

+ * @see RuleBaseBreakIterator::getRuleStatusVec + * + * @param fillInVec an array to be filled in with the status values. + * @param capacity the length of the supplied vector. A length of zero causes + * the function to return the number of status values, in the + * normal way, without attemtping to store any values. + * @param status receives error codes. + * @return The number of rule status values from rules that determined + * the most recent boundary returned by the break iterator. + * In the event of a U_BUFFER_OVERFLOW_ERROR, the return value + * is the total number of status values that were available, + * not the reduced number that were actually returned. + * @see getRuleStatus + * @stable ICU 52 + */ + virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status); + /** * Create BreakIterator for word-breaks using the given locale. * Returns an instance of a BreakIterator implementing word breaks. @@ -433,6 +486,8 @@ public: UnicodeString& name); /** + * Deprecated functionality. Use clone() instead. + * * Thread safe client-buffer-based cloning operation * Do NOT call delete on a safeclone, since 'new' is not used to create it. * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated. @@ -447,26 +502,33 @@ public: * necessary. * @return pointer to the new clone * - * @stable ICU 2.0 + * @deprecated ICU 52. Use clone() instead. */ virtual BreakIterator * createBufferClone(void *stackBuffer, int32_t &BufferSize, UErrorCode &status) = 0; +#ifndef U_HIDE_DEPRECATED_API + /** * Determine whether the BreakIterator was created in user memory by * createBufferClone(), and thus should not be deleted. Such objects * must be closed by an explicit call to the destructor (not delete). - * @stable ICU 2.0 + * @deprecated ICU 52. Always delete the BreakIterator. */ inline UBool isBufferClone(void); +#endif /* U_HIDE_DEPRECATED_API */ + #if !UCONFIG_NO_SERVICE /** * Register a new break iterator of the indicated kind, to use in the given locale. * The break iterator will be adopted. Clones of the iterator will be returned * if a request for a break iterator of the given kind matches or falls back to * this locale. + * Because ICU may choose to cache BreakIterators internally, this must + * be called at application startup, prior to any calls to + * BreakIterator::createXXXInstance to avoid undefined behavior. * @param toAdopt the BreakIterator instance to be adopted * @param locale the Locale for which this instance is to be registered * @param kind the type of iterator for which this instance is to be registered @@ -483,6 +545,9 @@ public: * Unregister a previously-registered BreakIterator using the key returned from the * register call. Key becomes invalid after a successful call and should not be used again. * The BreakIterator corresponding to the key will be deleted. + * Because ICU may choose to cache BreakIterators internally, this should + * be called during application shutdown, after all calls to + * BreakIterator::createXXXInstance to avoid undefined behavior. * @param key the registry key returned by a previous call to registerInstance * @param status the in/out status code, no special meanings are assigned * @return TRUE if the iterator for the key was successfully unregistered @@ -506,6 +571,7 @@ public: */ Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const; +#ifndef U_HIDE_INTERNAL_API /** Get the locale for this break iterator object. You can choose between valid and actual locale. * @param type type of the locale we're looking for (valid or actual) * @param status error code for the operation @@ -513,6 +579,34 @@ public: * @internal */ const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const; +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Set the subject text string upon which the break iterator is operating + * without changing any other aspect of the matching state. + * The new and previous text strings must have the same content. + * + * This function is intended for use in environments where ICU is operating on + * strings that may move around in memory. It provides a mechanism for notifying + * ICU that the string has been relocated, and providing a new UText to access the + * string in its new position. + * + * Note that the break iterator implementation never copies the underlying text + * of a string being processed, but always operates directly on the original text + * provided by the user. Refreshing simply drops the references to the old text + * and replaces them with references to the new. + * + * Caution: this function is normally used only by very specialized, + * system-level code. One example use case is with garbage collection that moves + * the text in memory. + * + * @param input The new (moved) text string. + * @param status Receives errors detected by this function. + * @return *this + * + * @stable ICU 49 + */ + virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0; private: static BreakIterator* buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode& status); @@ -523,12 +617,17 @@ public: friend class ICUBreakIteratorService; protected: + // Do not enclose protected default/copy constructors with #ifndef U_HIDE_INTERNAL_API + // or else the compiler will create a public ones. /** @internal */ BreakIterator(); /** @internal */ - UBool fBufferClone; + BreakIterator (const BreakIterator &other) : UObject(other) {} +#ifndef U_HIDE_INTERNAL_API /** @internal */ - BreakIterator (const BreakIterator &other) : UObject(other), fBufferClone(FALSE) {} + BreakIterator (const Locale& valid, const Locale& actual); +#endif /* U_HIDE_INTERNAL_API */ + UBool fKeepAll; private: @@ -541,17 +640,26 @@ private: * It's provided to make the compiler happy. Do not call. */ BreakIterator& operator=(const BreakIterator&); + void setKeepAll(UBool keepAll); }; +inline void BreakIterator::setKeepAll(UBool keepAll) +{ + fKeepAll = keepAll; +} + +#ifndef U_HIDE_DEPRECATED_API + inline UBool BreakIterator::isBufferClone() { - return fBufferClone; + return FALSE; } +#endif /* U_HIDE_DEPRECATED_API */ + U_NAMESPACE_END #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ #endif // _BRKITER //eof -