X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/08b89b0a244153b9f5bbb2f49c55ab0f7298122e..b331163bffd790ced0e88b73f44f86d49ccc48a5:/icuSources/common/unicode/ubrk.h diff --git a/icuSources/common/unicode/ubrk.h b/icuSources/common/unicode/ubrk.h index e9c0ef62..dfa2fe71 100644 --- a/icuSources/common/unicode/ubrk.h +++ b/icuSources/common/unicode/ubrk.h @@ -1,6 +1,6 @@ /* ****************************************************************************** -* Copyright (C) 1996-2013, International Business Machines Corporation and others. +* Copyright (C) 1996-2015, International Business Machines Corporation and others. * All Rights Reserved. ****************************************************************************** */ @@ -45,10 +45,22 @@ * when line-wrapping. The mechanism correctly handles punctuation and * hyphenated words. *

+ * Note: The locale keyword "lb" can be used to modify line break + * behavior according to the CSS level 3 line-break options, see + * . For example: + * "ja@lb=strict", "zh@lb=loose". + *

* Sentence boundary analysis allows selection with correct * interpretation of periods within numbers and abbreviations, and * trailing punctuation marks such as quotation marks and parentheses. *

+ * Note: The locale keyword "ss" can be used to enable use of + * segmentation suppression data (preventing breaks in English after + * abbreviations such as "Mr." or "Est.", for example), as follows: + * "en@ss=standard". However, use of the suppression data is + * currently supported only for ubrk_next; ubrk_previous, ubrk_following, + * and ubrk_preceding will ignore the suppression data. + *

* Word boundary analysis is used by search and replace functions, as * well as within text editing applications that allow the user to * select words with a double click. Word selection provides correct @@ -202,7 +214,9 @@ typedef enum USentenceBreakTag { * and sentence breaks in text. * @param type The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_WORD, * UBRK_LINE, UBRK_SENTENCE - * @param locale The locale specifying the text-breaking conventions. + * @param locale The locale specifying the text-breaking conventions. Note that + * locale keys such as "lb" and "ss" may be used to modify text break behavior, + * see general discussion of BreakIterator C API. * @param text The text to be iterated over. * @param textLength The number of characters in text, or -1 if null-terminated. * @param status A UErrorCode to receive any errors. @@ -378,10 +392,9 @@ U_STABLE int32_t U_EXPORT2 ubrk_previous(UBreakIterator *bi); /** - * Set the iterator position to the index of the first character in the text being scanned. - * This is not always the same as index 0 of the text. + * Set the iterator position to zero, the start of the text being scanned. * @param bi The break iterator to use. - * @return The character index of the first character in the text being scanned. + * @return The new iterator position (zero). * @see ubrk_last * @stable ICU 2.0 */