Commit | Line | Data |
---|---|---|
57a6839d A |
1 | /* |
2 | ******************************************************************************** | |
b331163b | 3 | * Copyright (C) 1997-2015, International Business Machines |
57a6839d A |
4 | * Corporation and others. All Rights Reserved. |
5 | ******************************************************************************** | |
6 | */ | |
7 | ||
8 | #ifndef FILTEREDBRK_H | |
9 | #define FILTEREDBRK_H | |
10 | ||
b331163b | 11 | #include "unicode/utypes.h" |
57a6839d A |
12 | #include "unicode/brkiter.h" |
13 | ||
b331163b | 14 | #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION |
2ca993e8 A |
15 | |
16 | #ifndef U_HIDE_DRAFT_API | |
57a6839d A |
17 | |
18 | U_NAMESPACE_BEGIN | |
19 | ||
20 | /** | |
21 | * \file | |
22 | * \brief C++ API: FilteredBreakIteratorBuilder | |
23 | */ | |
24 | ||
25 | /** | |
26 | * The BreakIteratorFilter is used to modify the behavior of a BreakIterator | |
27 | * by constructing a new BreakIterator which suppresses certain segment boundaries. | |
28 | * See http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions . | |
29 | * For example, a typical English Sentence Break Iterator would break on the space | |
30 | * in the string "Mr. Smith" (resulting in two segments), | |
31 | * but with "Mr." as an exception, a filtered break iterator | |
32 | * would consider the string "Mr. Smith" to be a single segment. | |
33 | * | |
2ca993e8 | 34 | * @draft ICU 56 |
57a6839d | 35 | */ |
b331163b | 36 | class U_COMMON_API FilteredBreakIteratorBuilder : public UObject { |
57a6839d A |
37 | public: |
38 | /** | |
39 | * destructor. | |
2ca993e8 | 40 | * @draft ICU 56 |
57a6839d A |
41 | */ |
42 | virtual ~FilteredBreakIteratorBuilder(); | |
43 | ||
44 | /** | |
45 | * Construct a FilteredBreakIteratorBuilder based on rules in a locale. | |
46 | * The rules are taken from CLDR exception data for the locale, | |
47 | * see http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions | |
48 | * This is the equivalent of calling createInstance(UErrorCode&) | |
49 | * and then repeatedly calling addNoBreakAfter(...) with the contents | |
50 | * of the CLDR exception data. | |
51 | * @param where the locale. | |
52 | * @param status The error code. | |
53 | * @return the new builder | |
2ca993e8 | 54 | * @draft ICU 56 |
57a6839d A |
55 | */ |
56 | static FilteredBreakIteratorBuilder *createInstance(const Locale& where, UErrorCode& status); | |
57 | ||
58 | /** | |
59 | * Construct an empty FilteredBreakIteratorBuilder. | |
60 | * In this state, it will not suppress any segment boundaries. | |
61 | * @param status The error code. | |
62 | * @return the new builder | |
2ca993e8 | 63 | * @draft ICU 56 |
57a6839d A |
64 | */ |
65 | static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status); | |
66 | ||
67 | /** | |
68 | * Suppress a certain string from being the end of a segment. | |
69 | * For example, suppressing "Mr.", then segments ending in "Mr." will not be returned | |
70 | * by the iterator. | |
71 | * @param string the string to suppress, such as "Mr." | |
72 | * @param status error code | |
73 | * @return returns TRUE if the string was not present and now added, | |
74 | * FALSE if the call was a no-op because the string was already being suppressed. | |
2ca993e8 | 75 | * @draft ICU 56 |
57a6839d A |
76 | */ |
77 | virtual UBool suppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0; | |
78 | ||
79 | /** | |
80 | * Stop suppressing a certain string from being the end of the segment. | |
81 | * This function does not create any new segment boundaries, but only serves to un-do | |
82 | * the effect of earlier calls to suppressBreakAfter, or to un-do the effect of | |
83 | * locale data which may be suppressing certain strings. | |
84 | * @param exception the exception to remove | |
85 | * @param status error code | |
86 | * @return returns TRUE if the string was present and now removed, | |
87 | * FALSE if the call was a no-op because the string was not being suppressed. | |
2ca993e8 | 88 | * @draft ICU 56 |
57a6839d A |
89 | */ |
90 | virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0; | |
91 | ||
92 | /** | |
93 | * Wrap (adopt) an existing break iterator in a new filtered instance. | |
94 | * The resulting BreakIterator is owned by the caller. | |
95 | * The BreakIteratorFilter may be destroyed before the BreakIterator is destroyed. | |
96 | * Note that the adoptBreakIterator is adopted by the new BreakIterator | |
97 | * and should no longer be used by the caller. | |
98 | * The FilteredBreakIteratorBuilder may be reused. | |
99 | * @param adoptBreakIterator the break iterator to adopt | |
100 | * @param status error code | |
101 | * @return the new BreakIterator, owned by the caller. | |
2ca993e8 | 102 | * @draft ICU 56 |
57a6839d A |
103 | */ |
104 | virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0; | |
105 | ||
106 | protected: | |
107 | /** | |
108 | * For subclass use | |
2ca993e8 | 109 | * @draft ICU 56 |
57a6839d A |
110 | */ |
111 | FilteredBreakIteratorBuilder(); | |
112 | }; | |
113 | ||
114 | ||
115 | U_NAMESPACE_END | |
116 | ||
2ca993e8 A |
117 | #endif /* U_HIDE_DRAFT_API */ |
118 | ||
b331163b | 119 | #endif // #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION |
57a6839d A |
120 | |
121 | #endif // #ifndef FILTEREDBRK_H |