]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
57a6839d A |
3 | /* |
4 | ******************************************************************************** | |
b331163b | 5 | * Copyright (C) 1997-2015, International Business Machines |
57a6839d A |
6 | * Corporation and others. All Rights Reserved. |
7 | ******************************************************************************** | |
8 | */ | |
9 | ||
10 | #ifndef FILTEREDBRK_H | |
11 | #define FILTEREDBRK_H | |
12 | ||
b331163b | 13 | #include "unicode/utypes.h" |
57a6839d A |
14 | #include "unicode/brkiter.h" |
15 | ||
b331163b | 16 | #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION |
2ca993e8 | 17 | |
f3c0d7a5 | 18 | #if U_SHOW_CPLUSPLUS_API |
57a6839d A |
19 | U_NAMESPACE_BEGIN |
20 | ||
21 | /** | |
22 | * \file | |
23 | * \brief C++ API: FilteredBreakIteratorBuilder | |
24 | */ | |
25 | ||
26 | /** | |
27 | * The BreakIteratorFilter is used to modify the behavior of a BreakIterator | |
28 | * by constructing a new BreakIterator which suppresses certain segment boundaries. | |
29 | * See http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions . | |
30 | * For example, a typical English Sentence Break Iterator would break on the space | |
31 | * in the string "Mr. Smith" (resulting in two segments), | |
32 | * but with "Mr." as an exception, a filtered break iterator | |
33 | * would consider the string "Mr. Smith" to be a single segment. | |
34 | * | |
f3c0d7a5 | 35 | * @stable ICU 56 |
57a6839d | 36 | */ |
b331163b | 37 | class U_COMMON_API FilteredBreakIteratorBuilder : public UObject { |
57a6839d A |
38 | public: |
39 | /** | |
40 | * destructor. | |
f3c0d7a5 | 41 | * @stable ICU 56 |
57a6839d A |
42 | */ |
43 | virtual ~FilteredBreakIteratorBuilder(); | |
44 | ||
45 | /** | |
46 | * Construct a FilteredBreakIteratorBuilder based on rules in a locale. | |
47 | * The rules are taken from CLDR exception data for the locale, | |
48 | * see http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions | |
49 | * This is the equivalent of calling createInstance(UErrorCode&) | |
50 | * and then repeatedly calling addNoBreakAfter(...) with the contents | |
51 | * of the CLDR exception data. | |
52 | * @param where the locale. | |
53 | * @param status The error code. | |
54 | * @return the new builder | |
f3c0d7a5 | 55 | * @stable ICU 56 |
57a6839d A |
56 | */ |
57 | static FilteredBreakIteratorBuilder *createInstance(const Locale& where, UErrorCode& status); | |
58 | ||
59 | /** | |
60 | * Construct an empty FilteredBreakIteratorBuilder. | |
61 | * In this state, it will not suppress any segment boundaries. | |
62 | * @param status The error code. | |
63 | * @return the new builder | |
f3c0d7a5 | 64 | * @stable ICU 56 |
57a6839d A |
65 | */ |
66 | static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status); | |
67 | ||
68 | /** | |
69 | * Suppress a certain string from being the end of a segment. | |
70 | * For example, suppressing "Mr.", then segments ending in "Mr." will not be returned | |
71 | * by the iterator. | |
72 | * @param string the string to suppress, such as "Mr." | |
73 | * @param status error code | |
74 | * @return returns TRUE if the string was not present and now added, | |
75 | * FALSE if the call was a no-op because the string was already being suppressed. | |
f3c0d7a5 | 76 | * @stable ICU 56 |
57a6839d A |
77 | */ |
78 | virtual UBool suppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0; | |
79 | ||
80 | /** | |
81 | * Stop suppressing a certain string from being the end of the segment. | |
82 | * This function does not create any new segment boundaries, but only serves to un-do | |
83 | * the effect of earlier calls to suppressBreakAfter, or to un-do the effect of | |
84 | * locale data which may be suppressing certain strings. | |
85 | * @param exception the exception to remove | |
86 | * @param status error code | |
87 | * @return returns TRUE if the string was present and now removed, | |
88 | * FALSE if the call was a no-op because the string was not being suppressed. | |
f3c0d7a5 | 89 | * @stable ICU 56 |
57a6839d A |
90 | */ |
91 | virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0; | |
92 | ||
93 | /** | |
94 | * Wrap (adopt) an existing break iterator in a new filtered instance. | |
95 | * The resulting BreakIterator is owned by the caller. | |
96 | * The BreakIteratorFilter may be destroyed before the BreakIterator is destroyed. | |
97 | * Note that the adoptBreakIterator is adopted by the new BreakIterator | |
98 | * and should no longer be used by the caller. | |
99 | * The FilteredBreakIteratorBuilder may be reused. | |
100 | * @param adoptBreakIterator the break iterator to adopt | |
101 | * @param status error code | |
102 | * @return the new BreakIterator, owned by the caller. | |
f3c0d7a5 | 103 | * @stable ICU 56 |
57a6839d A |
104 | */ |
105 | virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0; | |
106 | ||
107 | protected: | |
108 | /** | |
109 | * For subclass use | |
f3c0d7a5 | 110 | * @stable ICU 56 |
57a6839d A |
111 | */ |
112 | FilteredBreakIteratorBuilder(); | |
113 | }; | |
114 | ||
115 | ||
116 | U_NAMESPACE_END | |
f3c0d7a5 | 117 | #endif // U_SHOW_CPLUSPLUS_API |
2ca993e8 | 118 | |
b331163b | 119 | #endif // #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION |
57a6839d A |
120 | |
121 | #endif // #ifndef FILTEREDBRK_H |