]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
57a6839d A |
3 | /* |
4 | ******************************************************************************** | |
b331163b | 5 | * Copyright (C) 1997-2015, International Business Machines |
57a6839d A |
6 | * Corporation and others. All Rights Reserved. |
7 | ******************************************************************************** | |
8 | */ | |
9 | ||
10 | #ifndef FILTEREDBRK_H | |
11 | #define FILTEREDBRK_H | |
12 | ||
b331163b | 13 | #include "unicode/utypes.h" |
57a6839d A |
14 | #include "unicode/brkiter.h" |
15 | ||
b331163b | 16 | #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION |
2ca993e8 | 17 | |
f3c0d7a5 | 18 | #if U_SHOW_CPLUSPLUS_API |
57a6839d A |
19 | U_NAMESPACE_BEGIN |
20 | ||
21 | /** | |
22 | * \file | |
23 | * \brief C++ API: FilteredBreakIteratorBuilder | |
24 | */ | |
25 | ||
26 | /** | |
27 | * The BreakIteratorFilter is used to modify the behavior of a BreakIterator | |
28 | * by constructing a new BreakIterator which suppresses certain segment boundaries. | |
29 | * See http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions . | |
30 | * For example, a typical English Sentence Break Iterator would break on the space | |
31 | * in the string "Mr. Smith" (resulting in two segments), | |
32 | * but with "Mr." as an exception, a filtered break iterator | |
33 | * would consider the string "Mr. Smith" to be a single segment. | |
34 | * | |
f3c0d7a5 | 35 | * @stable ICU 56 |
57a6839d | 36 | */ |
b331163b | 37 | class U_COMMON_API FilteredBreakIteratorBuilder : public UObject { |
57a6839d A |
38 | public: |
39 | /** | |
40 | * destructor. | |
f3c0d7a5 | 41 | * @stable ICU 56 |
57a6839d A |
42 | */ |
43 | virtual ~FilteredBreakIteratorBuilder(); | |
44 | ||
45 | /** | |
46 | * Construct a FilteredBreakIteratorBuilder based on rules in a locale. | |
47 | * The rules are taken from CLDR exception data for the locale, | |
48 | * see http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions | |
49 | * This is the equivalent of calling createInstance(UErrorCode&) | |
50 | * and then repeatedly calling addNoBreakAfter(...) with the contents | |
51 | * of the CLDR exception data. | |
52 | * @param where the locale. | |
53 | * @param status The error code. | |
54 | * @return the new builder | |
f3c0d7a5 | 55 | * @stable ICU 56 |
57a6839d A |
56 | */ |
57 | static FilteredBreakIteratorBuilder *createInstance(const Locale& where, UErrorCode& status); | |
58 | ||
0f5d89e8 A |
59 | #ifndef U_HIDE_DEPRECATED_API |
60 | /** | |
61 | * This function has been deprecated in favor of createEmptyInstance, which has | |
62 | * identical behavior. | |
63 | * @param status The error code. | |
64 | * @return the new builder | |
65 | * @deprecated ICU 60 use createEmptyInstance instead | |
66 | * @see createEmptyInstance() | |
67 | */ | |
68 | static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status); | |
69 | #endif /* U_HIDE_DEPRECATED_API */ | |
70 | ||
71 | #ifndef U_HIDE_DRAFT_API | |
57a6839d A |
72 | /** |
73 | * Construct an empty FilteredBreakIteratorBuilder. | |
74 | * In this state, it will not suppress any segment boundaries. | |
75 | * @param status The error code. | |
76 | * @return the new builder | |
0f5d89e8 | 77 | * @draft ICU 60 |
57a6839d | 78 | */ |
0f5d89e8 A |
79 | static FilteredBreakIteratorBuilder *createEmptyInstance(UErrorCode &status); |
80 | #endif /* U_HIDE_DRAFT_API */ | |
57a6839d A |
81 | |
82 | /** | |
83 | * Suppress a certain string from being the end of a segment. | |
84 | * For example, suppressing "Mr.", then segments ending in "Mr." will not be returned | |
85 | * by the iterator. | |
86 | * @param string the string to suppress, such as "Mr." | |
87 | * @param status error code | |
88 | * @return returns TRUE if the string was not present and now added, | |
89 | * FALSE if the call was a no-op because the string was already being suppressed. | |
f3c0d7a5 | 90 | * @stable ICU 56 |
57a6839d A |
91 | */ |
92 | virtual UBool suppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0; | |
93 | ||
94 | /** | |
95 | * Stop suppressing a certain string from being the end of the segment. | |
96 | * This function does not create any new segment boundaries, but only serves to un-do | |
97 | * the effect of earlier calls to suppressBreakAfter, or to un-do the effect of | |
98 | * locale data which may be suppressing certain strings. | |
99 | * @param exception the exception to remove | |
100 | * @param status error code | |
101 | * @return returns TRUE if the string was present and now removed, | |
102 | * FALSE if the call was a no-op because the string was not being suppressed. | |
f3c0d7a5 | 103 | * @stable ICU 56 |
57a6839d A |
104 | */ |
105 | virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0; | |
106 | ||
0f5d89e8 A |
107 | /** |
108 | * This function has been deprecated in favor of wrapIteratorWithFilter() | |
109 | * The behavior is identical. | |
110 | * @param adoptBreakIterator the break iterator to adopt | |
111 | * @param status error code | |
112 | * @return the new BreakIterator, owned by the caller. | |
113 | * @deprecated ICU 60 use wrapIteratorWithFilter() instead | |
114 | * @see wrapBreakIteratorWithFilter() | |
115 | */ | |
116 | virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0; | |
117 | ||
118 | #ifndef U_HIDE_DRAFT_API | |
57a6839d A |
119 | /** |
120 | * Wrap (adopt) an existing break iterator in a new filtered instance. | |
121 | * The resulting BreakIterator is owned by the caller. | |
122 | * The BreakIteratorFilter may be destroyed before the BreakIterator is destroyed. | |
123 | * Note that the adoptBreakIterator is adopted by the new BreakIterator | |
124 | * and should no longer be used by the caller. | |
125 | * The FilteredBreakIteratorBuilder may be reused. | |
0f5d89e8 | 126 | * This function is an alias for build() |
57a6839d A |
127 | * @param adoptBreakIterator the break iterator to adopt |
128 | * @param status error code | |
129 | * @return the new BreakIterator, owned by the caller. | |
0f5d89e8 | 130 | * @draft ICU 60 |
57a6839d | 131 | */ |
0f5d89e8 A |
132 | inline BreakIterator *wrapIteratorWithFilter(BreakIterator* adoptBreakIterator, UErrorCode& status) { |
133 | return build(adoptBreakIterator, status); | |
134 | } | |
135 | #endif /* U_HIDE_DRAFT_API */ | |
57a6839d A |
136 | |
137 | protected: | |
138 | /** | |
139 | * For subclass use | |
f3c0d7a5 | 140 | * @stable ICU 56 |
57a6839d A |
141 | */ |
142 | FilteredBreakIteratorBuilder(); | |
143 | }; | |
144 | ||
145 | ||
146 | U_NAMESPACE_END | |
f3c0d7a5 | 147 | #endif // U_SHOW_CPLUSPLUS_API |
2ca993e8 | 148 | |
b331163b | 149 | #endif // #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION |
57a6839d A |
150 | |
151 | #endif // #ifndef FILTEREDBRK_H |