]>
git.saurik.com Git - apple/icu.git/blob - icuSources/common/brkeng.h
2 ************************************************************************************
3 * Copyright (C) 2006-2012, International Business Machines Corporation and others. *
4 * All Rights Reserved. *
5 ************************************************************************************
11 #include "unicode/utypes.h"
12 #include "unicode/uobject.h"
13 #include "unicode/utext.h"
14 #include "unicode/uscript.h"
20 class DictionaryMatcher
;
22 /*******************************************************************
27 * <p>LanguageBreakEngines implement language-specific knowledge for
28 * finding text boundaries within a run of characters belonging to a
29 * specific set. The boundaries will be of a specific kind, e.g. word,
32 * <p>LanguageBreakEngines should normally be implemented so as to
33 * be shared between threads without locking.</p>
35 class LanguageBreakEngine
: public UMemory
{
39 * <p>Default constructor.</p>
42 LanguageBreakEngine();
45 * <p>Virtual destructor.</p>
47 virtual ~LanguageBreakEngine();
50 * <p>Indicate whether this engine handles a particular character for
51 * a particular kind of break.</p>
53 * @param c A character which begins a run that the engine might handle
54 * @param breakType The type of text break which the caller wants to determine
55 * @return TRUE if this engine handles the particular character and break
58 virtual UBool
handles(UChar32 c
, int32_t breakType
) const = 0;
61 * <p>Find any breaks within a run in the supplied text.</p>
63 * @param text A UText representing the text. The
64 * iterator is left at the end of the run of characters which the engine
65 * is capable of handling.
66 * @param startPos The start of the run within the supplied text.
67 * @param endPos The end of the run within the supplied text.
68 * @param reverse Whether the caller is looking for breaks in a reverse
70 * @param breakType The type of break desired, or -1.
71 * @param foundBreaks An allocated C array of the breaks found, if any
72 * @return The number of breaks found.
74 virtual int32_t findBreaks( UText
*text
,
79 UStack
&foundBreaks
) const = 0;
83 /*******************************************************************
84 * LanguageBreakFactory
88 * <p>LanguageBreakFactorys find and return a LanguageBreakEngine
89 * that can determine breaks for characters in a specific set, if
90 * such an object can be found.</p>
92 * <p>If a LanguageBreakFactory is to be shared between threads,
93 * appropriate synchronization must be used; there is none internal
96 * <p>A LanguageBreakEngine returned by a LanguageBreakFactory can
97 * normally be shared between threads without synchronization, unless
98 * the specific subclass of LanguageBreakFactory indicates otherwise.</p>
100 * <p>A LanguageBreakFactory is responsible for deleting any LanguageBreakEngine
101 * it returns when it itself is deleted, unless the specific subclass of
102 * LanguageBreakFactory indicates otherwise. Naturally, the factory should
103 * not be deleted until the LanguageBreakEngines it has returned are no
106 class LanguageBreakFactory
: public UMemory
{
110 * <p>Default constructor.</p>
113 LanguageBreakFactory();
116 * <p>Virtual destructor.</p>
118 virtual ~LanguageBreakFactory();
121 * <p>Find and return a LanguageBreakEngine that can find the desired
122 * kind of break for the set of characters to which the supplied
123 * character belongs. It is up to the set of available engines to
124 * determine what the sets of characters are.</p>
126 * @param c A character that begins a run for which a LanguageBreakEngine is
128 * @param breakType The kind of text break for which a LanguageBreakEngine is
130 * @return A LanguageBreakEngine with the desired characteristics, or 0.
132 virtual const LanguageBreakEngine
*getEngineFor(UChar32 c
, int32_t breakType
) = 0;
136 /*******************************************************************
141 * <p>UnhandledEngine is a special subclass of LanguageBreakEngine that
142 * handles characters that no other LanguageBreakEngine is available to
143 * handle. It is told the character and the type of break; at its
144 * discretion it may handle more than the specified character (e.g.,
145 * the entire script to which that character belongs.</p>
147 * <p>UnhandledEngines may not be shared between threads without
148 * external synchronization.</p>
151 class UnhandledEngine
: public LanguageBreakEngine
{
155 * The sets of characters handled, for each break type
159 UnicodeSet
*fHandled
[4];
164 * <p>Default constructor.</p>
167 UnhandledEngine(UErrorCode
&status
);
170 * <p>Virtual destructor.</p>
172 virtual ~UnhandledEngine();
175 * <p>Indicate whether this engine handles a particular character for
176 * a particular kind of break.</p>
178 * @param c A character which begins a run that the engine might handle
179 * @param breakType The type of text break which the caller wants to determine
180 * @return TRUE if this engine handles the particular character and break
183 virtual UBool
handles(UChar32 c
, int32_t breakType
) const;
186 * <p>Find any breaks within a run in the supplied text.</p>
188 * @param text A UText representing the text (TODO: UText). The
189 * iterator is left at the end of the run of characters which the engine
190 * is capable of handling.
191 * @param startPos The start of the run within the supplied text.
192 * @param endPos The end of the run within the supplied text.
193 * @param reverse Whether the caller is looking for breaks in a reverse
195 * @param breakType The type of break desired, or -1.
196 * @param foundBreaks An allocated C array of the breaks found, if any
197 * @return The number of breaks found.
199 virtual int32_t findBreaks( UText
*text
,
204 UStack
&foundBreaks
) const;
207 * <p>Tell the engine to handle a particular character and break type.</p>
209 * @param c A character which the engine should handle
210 * @param breakType The type of text break for which the engine should handle c
212 virtual void handleCharacter(UChar32 c
, int32_t breakType
);
216 /*******************************************************************
217 * ICULanguageBreakFactory
221 * <p>ICULanguageBreakFactory is the default LanguageBreakFactory for
222 * ICU. It creates dictionary-based LanguageBreakEngines from dictionary
223 * data in the ICU data file.</p>
225 class ICULanguageBreakFactory
: public LanguageBreakFactory
{
229 * The stack of break engines created by this factory
238 * <p>Standard constructor.</p>
241 ICULanguageBreakFactory(UErrorCode
&status
);
244 * <p>Virtual destructor.</p>
246 virtual ~ICULanguageBreakFactory();
249 * <p>Find and return a LanguageBreakEngine that can find the desired
250 * kind of break for the set of characters to which the supplied
251 * character belongs. It is up to the set of available engines to
252 * determine what the sets of characters are.</p>
254 * @param c A character that begins a run for which a LanguageBreakEngine is
256 * @param breakType The kind of text break for which a LanguageBreakEngine is
258 * @return A LanguageBreakEngine with the desired characteristics, or 0.
260 virtual const LanguageBreakEngine
*getEngineFor(UChar32 c
, int32_t breakType
);
264 * <p>Create a LanguageBreakEngine for the set of characters to which
265 * the supplied character belongs, for the specified break type.</p>
267 * @param c A character that begins a run for which a LanguageBreakEngine is
269 * @param breakType The kind of text break for which a LanguageBreakEngine is
271 * @return A LanguageBreakEngine with the desired characteristics, or 0.
273 virtual const LanguageBreakEngine
*loadEngineFor(UChar32 c
, int32_t breakType
);
276 * <p>Create a DictionaryMatcher for the specified script and break type.</p>
277 * @param script An ISO 15924 script code that identifies the dictionary to be
279 * @param breakType The kind of text break for which a dictionary is
281 * @return A DictionaryMatcher with the desired characteristics, or NULL.
283 virtual DictionaryMatcher
*loadDictionaryMatcherFor(UScriptCode script
, int32_t breakType
);