]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
73c04bcf | 3 | /** |
46f4442e | 4 | ************************************************************************************ |
51004dcb | 5 | * Copyright (C) 2006-2012, International Business Machines Corporation and others. * |
46f4442e A |
6 | * All Rights Reserved. * |
7 | ************************************************************************************ | |
73c04bcf A |
8 | */ |
9 | ||
10 | #ifndef BRKENG_H | |
11 | #define BRKENG_H | |
12 | ||
13 | #include "unicode/utypes.h" | |
14 | #include "unicode/uobject.h" | |
15 | #include "unicode/utext.h" | |
16 | #include "unicode/uscript.h" | |
17 | ||
18 | U_NAMESPACE_BEGIN | |
19 | ||
20 | class UnicodeSet; | |
21 | class UStack; | |
0f5d89e8 | 22 | class UVector32; |
51004dcb | 23 | class DictionaryMatcher; |
73c04bcf A |
24 | |
25 | /******************************************************************* | |
26 | * LanguageBreakEngine | |
27 | */ | |
28 | ||
29 | /** | |
30 | * <p>LanguageBreakEngines implement language-specific knowledge for | |
31 | * finding text boundaries within a run of characters belonging to a | |
32 | * specific set. The boundaries will be of a specific kind, e.g. word, | |
33 | * line, etc.</p> | |
34 | * | |
35 | * <p>LanguageBreakEngines should normally be implemented so as to | |
36 | * be shared between threads without locking.</p> | |
37 | */ | |
38 | class LanguageBreakEngine : public UMemory { | |
39 | public: | |
40 | ||
41 | /** | |
42 | * <p>Default constructor.</p> | |
43 | * | |
44 | */ | |
45 | LanguageBreakEngine(); | |
46 | ||
47 | /** | |
48 | * <p>Virtual destructor.</p> | |
49 | */ | |
50 | virtual ~LanguageBreakEngine(); | |
51 | ||
52 | /** | |
53 | * <p>Indicate whether this engine handles a particular character for | |
54 | * a particular kind of break.</p> | |
55 | * | |
56 | * @param c A character which begins a run that the engine might handle | |
73c04bcf A |
57 | * @return TRUE if this engine handles the particular character and break |
58 | * type. | |
59 | */ | |
0f5d89e8 | 60 | virtual UBool handles(UChar32 c) const = 0; |
73c04bcf A |
61 | |
62 | /** | |
63 | * <p>Find any breaks within a run in the supplied text.</p> | |
64 | * | |
65 | * @param text A UText representing the text. The | |
66 | * iterator is left at the end of the run of characters which the engine | |
67 | * is capable of handling. | |
68 | * @param startPos The start of the run within the supplied text. | |
69 | * @param endPos The end of the run within the supplied text. | |
0f5d89e8 | 70 | * @param foundBreaks A Vector of int32_t to receive the breaks. |
73c04bcf A |
71 | * @return The number of breaks found. |
72 | */ | |
73 | virtual int32_t findBreaks( UText *text, | |
74 | int32_t startPos, | |
75 | int32_t endPos, | |
0f5d89e8 | 76 | UVector32 &foundBreaks ) const = 0; |
73c04bcf A |
77 | |
78 | }; | |
79 | ||
80 | /******************************************************************* | |
81 | * LanguageBreakFactory | |
82 | */ | |
83 | ||
84 | /** | |
85 | * <p>LanguageBreakFactorys find and return a LanguageBreakEngine | |
86 | * that can determine breaks for characters in a specific set, if | |
87 | * such an object can be found.</p> | |
88 | * | |
89 | * <p>If a LanguageBreakFactory is to be shared between threads, | |
90 | * appropriate synchronization must be used; there is none internal | |
91 | * to the factory.</p> | |
92 | * | |
93 | * <p>A LanguageBreakEngine returned by a LanguageBreakFactory can | |
94 | * normally be shared between threads without synchronization, unless | |
95 | * the specific subclass of LanguageBreakFactory indicates otherwise.</p> | |
96 | * | |
97 | * <p>A LanguageBreakFactory is responsible for deleting any LanguageBreakEngine | |
98 | * it returns when it itself is deleted, unless the specific subclass of | |
99 | * LanguageBreakFactory indicates otherwise. Naturally, the factory should | |
100 | * not be deleted until the LanguageBreakEngines it has returned are no | |
101 | * longer needed.</p> | |
102 | */ | |
103 | class LanguageBreakFactory : public UMemory { | |
104 | public: | |
105 | ||
106 | /** | |
107 | * <p>Default constructor.</p> | |
108 | * | |
109 | */ | |
110 | LanguageBreakFactory(); | |
111 | ||
112 | /** | |
113 | * <p>Virtual destructor.</p> | |
114 | */ | |
115 | virtual ~LanguageBreakFactory(); | |
116 | ||
117 | /** | |
118 | * <p>Find and return a LanguageBreakEngine that can find the desired | |
119 | * kind of break for the set of characters to which the supplied | |
120 | * character belongs. It is up to the set of available engines to | |
121 | * determine what the sets of characters are.</p> | |
122 | * | |
123 | * @param c A character that begins a run for which a LanguageBreakEngine is | |
124 | * sought. | |
73c04bcf A |
125 | * @return A LanguageBreakEngine with the desired characteristics, or 0. |
126 | */ | |
0f5d89e8 | 127 | virtual const LanguageBreakEngine *getEngineFor(UChar32 c) = 0; |
73c04bcf A |
128 | |
129 | }; | |
130 | ||
131 | /******************************************************************* | |
132 | * UnhandledEngine | |
133 | */ | |
134 | ||
135 | /** | |
136 | * <p>UnhandledEngine is a special subclass of LanguageBreakEngine that | |
137 | * handles characters that no other LanguageBreakEngine is available to | |
138 | * handle. It is told the character and the type of break; at its | |
139 | * discretion it may handle more than the specified character (e.g., | |
140 | * the entire script to which that character belongs.</p> | |
141 | * | |
142 | * <p>UnhandledEngines may not be shared between threads without | |
143 | * external synchronization.</p> | |
144 | */ | |
145 | ||
146 | class UnhandledEngine : public LanguageBreakEngine { | |
147 | private: | |
148 | ||
149 | /** | |
0f5d89e8 | 150 | * The sets of characters handled. |
73c04bcf A |
151 | * @internal |
152 | */ | |
153 | ||
0f5d89e8 | 154 | UnicodeSet *fHandled; |
73c04bcf A |
155 | |
156 | public: | |
157 | ||
158 | /** | |
159 | * <p>Default constructor.</p> | |
160 | * | |
161 | */ | |
162 | UnhandledEngine(UErrorCode &status); | |
163 | ||
164 | /** | |
165 | * <p>Virtual destructor.</p> | |
166 | */ | |
167 | virtual ~UnhandledEngine(); | |
168 | ||
169 | /** | |
170 | * <p>Indicate whether this engine handles a particular character for | |
171 | * a particular kind of break.</p> | |
172 | * | |
173 | * @param c A character which begins a run that the engine might handle | |
73c04bcf A |
174 | * @return TRUE if this engine handles the particular character and break |
175 | * type. | |
176 | */ | |
0f5d89e8 | 177 | virtual UBool handles(UChar32 c) const; |
73c04bcf A |
178 | |
179 | /** | |
180 | * <p>Find any breaks within a run in the supplied text.</p> | |
181 | * | |
182 | * @param text A UText representing the text (TODO: UText). The | |
183 | * iterator is left at the end of the run of characters which the engine | |
184 | * is capable of handling. | |
185 | * @param startPos The start of the run within the supplied text. | |
186 | * @param endPos The end of the run within the supplied text. | |
73c04bcf A |
187 | * @param foundBreaks An allocated C array of the breaks found, if any |
188 | * @return The number of breaks found. | |
189 | */ | |
190 | virtual int32_t findBreaks( UText *text, | |
191 | int32_t startPos, | |
192 | int32_t endPos, | |
0f5d89e8 | 193 | UVector32 &foundBreaks ) const; |
73c04bcf A |
194 | |
195 | /** | |
196 | * <p>Tell the engine to handle a particular character and break type.</p> | |
197 | * | |
198 | * @param c A character which the engine should handle | |
73c04bcf | 199 | */ |
0f5d89e8 | 200 | virtual void handleCharacter(UChar32 c); |
73c04bcf A |
201 | |
202 | }; | |
203 | ||
204 | /******************************************************************* | |
205 | * ICULanguageBreakFactory | |
206 | */ | |
207 | ||
208 | /** | |
209 | * <p>ICULanguageBreakFactory is the default LanguageBreakFactory for | |
210 | * ICU. It creates dictionary-based LanguageBreakEngines from dictionary | |
211 | * data in the ICU data file.</p> | |
212 | */ | |
213 | class ICULanguageBreakFactory : public LanguageBreakFactory { | |
214 | private: | |
215 | ||
216 | /** | |
217 | * The stack of break engines created by this factory | |
218 | * @internal | |
219 | */ | |
220 | ||
221 | UStack *fEngines; | |
222 | ||
223 | public: | |
224 | ||
225 | /** | |
226 | * <p>Standard constructor.</p> | |
227 | * | |
228 | */ | |
229 | ICULanguageBreakFactory(UErrorCode &status); | |
230 | ||
231 | /** | |
232 | * <p>Virtual destructor.</p> | |
233 | */ | |
234 | virtual ~ICULanguageBreakFactory(); | |
235 | ||
236 | /** | |
237 | * <p>Find and return a LanguageBreakEngine that can find the desired | |
238 | * kind of break for the set of characters to which the supplied | |
239 | * character belongs. It is up to the set of available engines to | |
240 | * determine what the sets of characters are.</p> | |
241 | * | |
242 | * @param c A character that begins a run for which a LanguageBreakEngine is | |
243 | * sought. | |
73c04bcf A |
244 | * @return A LanguageBreakEngine with the desired characteristics, or 0. |
245 | */ | |
0f5d89e8 | 246 | virtual const LanguageBreakEngine *getEngineFor(UChar32 c); |
73c04bcf | 247 | |
51004dcb | 248 | protected: |
73c04bcf A |
249 | /** |
250 | * <p>Create a LanguageBreakEngine for the set of characters to which | |
251 | * the supplied character belongs, for the specified break type.</p> | |
252 | * | |
253 | * @param c A character that begins a run for which a LanguageBreakEngine is | |
254 | * sought. | |
73c04bcf A |
255 | * @return A LanguageBreakEngine with the desired characteristics, or 0. |
256 | */ | |
0f5d89e8 | 257 | virtual const LanguageBreakEngine *loadEngineFor(UChar32 c); |
73c04bcf | 258 | |
51004dcb A |
259 | /** |
260 | * <p>Create a DictionaryMatcher for the specified script and break type.</p> | |
261 | * @param script An ISO 15924 script code that identifies the dictionary to be | |
262 | * created. | |
51004dcb A |
263 | * @return A DictionaryMatcher with the desired characteristics, or NULL. |
264 | */ | |
0f5d89e8 | 265 | virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script); |
73c04bcf A |
266 | }; |
267 | ||
268 | U_NAMESPACE_END | |
269 | ||
270 | /* BRKENG_H */ | |
271 | #endif |