]>
git.saurik.com Git - apple/icu.git/blob - icuSources/common/dictbe.h
2 *******************************************************************************
3 * Copyright (C) 2006,2011, International Business Machines Corporation *
4 * and others. All Rights Reserved. *
5 *******************************************************************************
11 #include "unicode/utypes.h"
12 #include "unicode/uniset.h"
13 #include "unicode/utext.h"
19 class TrieWordDictionary
;
21 /*******************************************************************
22 * DictionaryBreakEngine
26 * <p>DictionaryBreakEngine is a kind of LanguageBreakEngine that uses a
27 * dictionary to determine language-specific breaks.</p>
29 * <p>After it is constructed a DictionaryBreakEngine may be shared between
30 * threads without synchronization.</p>
32 class DictionaryBreakEngine
: public LanguageBreakEngine
{
35 * The set of characters handled by this engine
42 * The set of break types handled by this engine
49 * <p>Default constructor.</p>
52 DictionaryBreakEngine();
57 * <p>Constructor setting the break types handled.</p>
59 * @param breakTypes A bitmap of types handled by the engine.
61 DictionaryBreakEngine( uint32_t breakTypes
);
64 * <p>Virtual destructor.</p>
66 virtual ~DictionaryBreakEngine();
69 * <p>Indicate whether this engine handles a particular character for
70 * a particular kind of break.</p>
72 * @param c A character which begins a run that the engine might handle
73 * @param breakType The type of text break which the caller wants to determine
74 * @return TRUE if this engine handles the particular character and break
77 virtual UBool
handles( UChar32 c
, int32_t breakType
) const;
80 * <p>Find any breaks within a run in the supplied text.</p>
82 * @param text A UText representing the text. The
83 * iterator is left at the end of the run of characters which the engine
84 * is capable of handling.
85 * @param startPos The start of the run within the supplied text.
86 * @param endPos The end of the run within the supplied text.
87 * @param reverse Whether the caller is looking for breaks in a reverse
89 * @param breakType The type of break desired, or -1.
90 * @param foundBreaks An allocated C array of the breaks found, if any
91 * @return The number of breaks found.
93 virtual int32_t findBreaks( UText
*text
,
98 UStack
&foundBreaks
) const;
103 * <p>Set the character set handled by this engine.</p>
105 * @param set A UnicodeSet of the set of characters handled by the engine
107 virtual void setCharacters( const UnicodeSet
&set
);
110 * <p>Set the break types handled by this engine.</p>
112 * @param breakTypes A bitmap of types handled by the engine.
114 // virtual void setBreakTypes( uint32_t breakTypes );
117 * <p>Divide up a range of known dictionary characters.</p>
119 * @param text A UText representing the text
120 * @param rangeStart The start of the range of dictionary characters
121 * @param rangeEnd The end of the range of dictionary characters
122 * @param foundBreaks Output of C array of int32_t break positions, or 0
123 * @return The number of breaks found
125 virtual int32_t divideUpDictionaryRange( UText
*text
,
128 UStack
&foundBreaks
) const = 0;
132 /*******************************************************************
137 * <p>ThaiBreakEngine is a kind of DictionaryBreakEngine that uses a
138 * TrieWordDictionary and heuristics to determine Thai-specific breaks.</p>
140 * <p>After it is constructed a ThaiBreakEngine may be shared between
141 * threads without synchronization.</p>
143 class ThaiBreakEngine
: public DictionaryBreakEngine
{
146 * The set of characters handled by this engine
150 UnicodeSet fThaiWordSet
;
151 UnicodeSet fEndWordSet
;
152 UnicodeSet fBeginWordSet
;
153 UnicodeSet fSuffixSet
;
155 const TrieWordDictionary
*fDictionary
;
160 * <p>Default constructor.</p>
162 * @param adoptDictionary A TrieWordDictionary to adopt. Deleted when the
165 ThaiBreakEngine(const TrieWordDictionary
*adoptDictionary
, UErrorCode
&status
);
168 * <p>Virtual destructor.</p>
170 virtual ~ThaiBreakEngine();
174 * <p>Divide up a range of known dictionary characters.</p>
176 * @param text A UText representing the text
177 * @param rangeStart The start of the range of dictionary characters
178 * @param rangeEnd The end of the range of dictionary characters
179 * @param foundBreaks Output of C array of int32_t break positions, or 0
180 * @return The number of breaks found
182 virtual int32_t divideUpDictionaryRange( UText
*text
,
185 UStack
&foundBreaks
) const;
190 /*******************************************************************
195 * <p>KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a
196 * TrieWordDictionary and heuristics to determine Khmer-specific breaks.</p>
198 * <p>After it is constructed a KhmerBreakEngine may be shared between
199 * threads without synchronization.</p>
201 class KhmerBreakEngine
: public DictionaryBreakEngine
{
204 * The set of characters handled by this engine
208 UnicodeSet fKhmerWordSet
;
209 UnicodeSet fEndWordSet
;
210 UnicodeSet fBeginWordSet
;
212 const TrieWordDictionary
*fDictionary
;
217 * <p>Default constructor.</p>
219 * @param adoptDictionary A TrieWordDictionary to adopt. Deleted when the
222 KhmerBreakEngine(const TrieWordDictionary
*adoptDictionary
, UErrorCode
&status
);
225 * <p>Virtual destructor.</p>
227 virtual ~KhmerBreakEngine();
231 * <p>Divide up a range of known dictionary characters.</p>
233 * @param text A UText representing the text
234 * @param rangeStart The start of the range of dictionary characters
235 * @param rangeEnd The end of the range of dictionary characters
236 * @param foundBreaks Output of C array of int32_t break positions, or 0
237 * @return The number of breaks found
239 virtual int32_t divideUpDictionaryRange( UText
*text
,
242 UStack
&foundBreaks
) const;