]>
Commit | Line | Data |
---|---|---|
73c04bcf A |
1 | /** |
2 | ******************************************************************************* | |
3 | * Copyright (C) 2006, International Business Machines Corporation and others. * | |
4 | * All Rights Reserved. * | |
5 | ******************************************************************************* | |
6 | */ | |
7 | ||
8 | #ifndef TRIEDICT_H | |
9 | #define TRIEDICT_H | |
10 | ||
11 | #include "unicode/utypes.h" | |
12 | #include "unicode/uobject.h" | |
13 | #include "unicode/utext.h" | |
14 | ||
15 | struct UEnumeration; | |
16 | struct UDataSwapper; | |
17 | struct UDataMemory; | |
18 | ||
19 | /** | |
20 | * <p>UDataSwapFn function for use in swapping a compact dictionary.</p> | |
21 | * | |
22 | * @param ds Pointer to UDataSwapper containing global data about the | |
23 | * transformation and function pointers for handling primitive | |
24 | * types. | |
25 | * @param inData Pointer to the input data to be transformed or examined. | |
26 | * @param length Length of the data, counting bytes. May be -1 for preflighting. | |
27 | * If length>=0, then transform the data. | |
28 | * If length==-1, then only determine the length of the data. | |
29 | * The length cannot be determined from the data itself for all | |
30 | * types of data (e.g., not for simple arrays of integers). | |
31 | * @param outData Pointer to the output data buffer. | |
32 | * If length>=0 (transformation), then the output buffer must | |
33 | * have a capacity of at least length. | |
34 | * If length==-1, then outData will not be used and can be NULL. | |
35 | * @param pErrorCode ICU UErrorCode parameter, must not be NULL and must | |
36 | * fulfill U_SUCCESS on input. | |
37 | * @return The actual length of the data. | |
38 | * | |
39 | * @see UDataSwapper | |
40 | */ | |
41 | ||
42 | U_CAPI int32_t U_EXPORT2 | |
43 | triedict_swap(const UDataSwapper *ds, | |
44 | const void *inData, int32_t length, void *outData, | |
45 | UErrorCode *pErrorCode); | |
46 | ||
47 | U_NAMESPACE_BEGIN | |
48 | ||
49 | class StringEnumeration; | |
50 | struct CompactTrieHeader; | |
51 | ||
52 | /******************************************************************* | |
53 | * TrieWordDictionary | |
54 | */ | |
55 | ||
56 | /** | |
57 | * <p>TrieWordDictionary is an abstract class that represents a word | |
58 | * dictionary based on a trie. The base protocol is read-only. | |
59 | * Subclasses may allow writing.</p> | |
60 | */ | |
61 | class U_COMMON_API TrieWordDictionary : public UMemory { | |
62 | public: | |
63 | ||
64 | /** | |
65 | * <p>Default constructor.</p> | |
66 | * | |
67 | */ | |
68 | TrieWordDictionary(); | |
69 | ||
70 | /** | |
71 | * <p>Virtual destructor.</p> | |
72 | */ | |
73 | virtual ~TrieWordDictionary(); | |
74 | ||
75 | /** | |
76 | * <p>Find dictionary words that match the text.</p> | |
77 | * | |
78 | * @param text A UText representing the text. The | |
79 | * iterator is left after the longest prefix match in the dictionary. | |
80 | * @param start The current position in text. | |
81 | * @param maxLength The maximum number of code units to match. | |
82 | * @param lengths An array that is filled with the lengths of words that matched. | |
83 | * @param count Filled with the number of elements output in lengths. | |
84 | * @param limit The size of the lengths array; this limits the number of words output. | |
85 | * @return The number of characters in text that were matched. | |
86 | */ | |
87 | virtual int32_t matches( UText *text, | |
88 | int32_t maxLength, | |
89 | int32_t *lengths, | |
90 | int &count, | |
91 | int limit ) const = 0; | |
92 | ||
93 | /** | |
94 | * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p> | |
95 | * | |
96 | * @param status A status code recording the success of the call. | |
97 | * @return A StringEnumeration that will iterate through the whole dictionary. | |
98 | * The caller is responsible for closing it. The order is unspecified. | |
99 | */ | |
100 | virtual StringEnumeration *openWords( UErrorCode &status ) const = 0; | |
101 | ||
102 | }; | |
103 | ||
104 | /******************************************************************* | |
105 | * MutableTrieDictionary | |
106 | */ | |
107 | ||
108 | /** | |
109 | * <p>MutableTrieDictionary is a TrieWordDictionary that allows words to be | |
110 | * added.</p> | |
111 | */ | |
112 | ||
113 | struct TernaryNode; // Forwards declaration | |
114 | ||
115 | class U_COMMON_API MutableTrieDictionary : public TrieWordDictionary { | |
116 | private: | |
117 | /** | |
118 | * The root node of the trie | |
119 | * @internal | |
120 | */ | |
121 | ||
122 | TernaryNode *fTrie; | |
123 | ||
124 | /** | |
125 | * A UText for internal use | |
126 | * @internal | |
127 | */ | |
128 | ||
129 | UText *fIter; | |
130 | ||
131 | friend class CompactTrieDictionary; // For fast conversion | |
132 | ||
133 | public: | |
134 | ||
135 | /** | |
136 | * <p>Constructor.</p> | |
137 | * | |
138 | * @param median A UChar around which to balance the trie. Ideally, it should | |
139 | * begin at least one word that is near the median of the set in the dictionary | |
140 | * @param status A status code recording the success of the call. | |
141 | */ | |
142 | MutableTrieDictionary( UChar median, UErrorCode &status ); | |
143 | ||
144 | /** | |
145 | * <p>Virtual destructor.</p> | |
146 | */ | |
147 | virtual ~MutableTrieDictionary(); | |
148 | ||
149 | /** | |
150 | * <p>Find dictionary words that match the text.</p> | |
151 | * | |
152 | * @param text A UText representing the text. The | |
153 | * iterator is left after the longest prefix match in the dictionary. | |
154 | * @param maxLength The maximum number of code units to match. | |
155 | * @param lengths An array that is filled with the lengths of words that matched. | |
156 | * @param count Filled with the number of elements output in lengths. | |
157 | * @param limit The size of the lengths array; this limits the number of words output. | |
158 | * @return The number of characters in text that were matched. | |
159 | */ | |
160 | virtual int32_t matches( UText *text, | |
161 | int32_t maxLength, | |
162 | int32_t *lengths, | |
163 | int &count, | |
164 | int limit ) const; | |
165 | ||
166 | /** | |
167 | * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p> | |
168 | * | |
169 | * @param status A status code recording the success of the call. | |
170 | * @return A StringEnumeration that will iterate through the whole dictionary. | |
171 | * The caller is responsible for closing it. The order is unspecified. | |
172 | */ | |
173 | virtual StringEnumeration *openWords( UErrorCode &status ) const; | |
174 | ||
175 | /** | |
176 | * <p>Add one word to the dictionary.</p> | |
177 | * | |
178 | * @param word A UChar buffer containing the word. | |
179 | * @param length The length of the word. | |
180 | * @param status The resultant status | |
181 | */ | |
182 | virtual void addWord( const UChar *word, | |
183 | int32_t length, | |
184 | UErrorCode &status); | |
185 | ||
186 | #if 0 | |
187 | /** | |
188 | * <p>Add all strings from a UEnumeration to the dictionary.</p> | |
189 | * | |
190 | * @param words A UEnumeration that will return the desired words. | |
191 | * @param status The resultant status | |
192 | */ | |
193 | virtual void addWords( UEnumeration *words, UErrorCode &status ); | |
194 | #endif | |
195 | ||
196 | protected: | |
197 | /** | |
198 | * <p>Search the dictionary for matches.</p> | |
199 | * | |
200 | * @param text A UText representing the text. The | |
201 | * iterator is left after the longest prefix match in the dictionary. | |
202 | * @param maxLength The maximum number of code units to match. | |
203 | * @param lengths An array that is filled with the lengths of words that matched. | |
204 | * @param count Filled with the number of elements output in lengths. | |
205 | * @param limit The size of the lengths array; this limits the number of words output. | |
206 | * @param parent The parent of the current node | |
207 | * @param pMatched The returned parent node matched the input | |
208 | * @return The number of characters in text that were matched. | |
209 | */ | |
210 | virtual int32_t search( UText *text, | |
211 | int32_t maxLength, | |
212 | int32_t *lengths, | |
213 | int &count, | |
214 | int limit, | |
215 | TernaryNode *&parent, | |
216 | UBool &pMatched ) const; | |
217 | ||
218 | private: | |
219 | /** | |
220 | * <p>Private constructor. The root node it not allocated.</p> | |
221 | * | |
222 | * @param status A status code recording the success of the call. | |
223 | */ | |
224 | MutableTrieDictionary( UErrorCode &status ); | |
225 | }; | |
226 | ||
227 | /******************************************************************* | |
228 | * CompactTrieDictionary | |
229 | */ | |
230 | ||
231 | /** | |
232 | * <p>CompactTrieDictionary is a TrieWordDictionary that has been compacted | |
233 | * to save space.</p> | |
234 | */ | |
235 | class U_COMMON_API CompactTrieDictionary : public TrieWordDictionary { | |
236 | private: | |
237 | /** | |
238 | * The root node of the trie | |
239 | */ | |
240 | ||
241 | const CompactTrieHeader *fData; | |
242 | ||
243 | /** | |
244 | * A UBool indicating whether or not we own the fData. | |
245 | */ | |
246 | ||
247 | UBool fOwnData; | |
248 | ||
249 | UDataMemory *fUData; | |
250 | public: | |
251 | /** | |
252 | * <p>Construct a dictionary from a UDataMemory.</p> | |
253 | * | |
254 | * @param data A pointer to a UDataMemory, which is adopted | |
255 | * @param status A status code giving the result of the constructor | |
256 | */ | |
257 | CompactTrieDictionary(UDataMemory *dataObj, UErrorCode &status); | |
258 | ||
259 | /** | |
260 | * <p>Construct a dictionary from raw saved data.</p> | |
261 | * | |
262 | * @param data A pointer to the raw data, which is still owned by the caller | |
263 | * @param status A status code giving the result of the constructor | |
264 | */ | |
265 | CompactTrieDictionary(const void *dataObj, UErrorCode &status); | |
266 | ||
267 | /** | |
268 | * <p>Construct a dictionary from a MutableTrieDictionary.</p> | |
269 | * | |
270 | * @param dict The dictionary to use as input. | |
271 | * @param status A status code recording the success of the call. | |
272 | */ | |
273 | CompactTrieDictionary( const MutableTrieDictionary &dict, UErrorCode &status ); | |
274 | ||
275 | /** | |
276 | * <p>Virtual destructor.</p> | |
277 | */ | |
278 | virtual ~CompactTrieDictionary(); | |
279 | ||
280 | /** | |
281 | * <p>Find dictionary words that match the text.</p> | |
282 | * | |
283 | * @param text A UText representing the text. The | |
284 | * iterator is left after the longest prefix match in the dictionary. | |
285 | * @param maxLength The maximum number of code units to match. | |
286 | * @param lengths An array that is filled with the lengths of words that matched. | |
287 | * @param count Filled with the number of elements output in lengths. | |
288 | * @param limit The size of the lengths array; this limits the number of words output. | |
289 | * @return The number of characters in text that were matched. | |
290 | */ | |
291 | virtual int32_t matches( UText *text, | |
292 | int32_t rangeEnd, | |
293 | int32_t *lengths, | |
294 | int &count, | |
295 | int limit ) const; | |
296 | ||
297 | /** | |
298 | * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p> | |
299 | * | |
300 | * @param status A status code recording the success of the call. | |
301 | * @return A StringEnumeration that will iterate through the whole dictionary. | |
302 | * The caller is responsible for closing it. The order is unspecified. | |
303 | */ | |
304 | virtual StringEnumeration *openWords( UErrorCode &status ) const; | |
305 | ||
306 | /** | |
307 | * <p>Return the size of the compact data.</p> | |
308 | * | |
309 | * @return The size of the dictionary's compact data. | |
310 | */ | |
311 | virtual uint32_t dataSize() const; | |
312 | ||
313 | /** | |
314 | * <p>Return a void * pointer to the compact data, platform-endian.</p> | |
315 | * | |
316 | * @return The data for the compact dictionary, suitable for passing to the | |
317 | * constructor. | |
318 | */ | |
319 | virtual const void *data() const; | |
320 | ||
321 | /** | |
322 | * <p>Return a MutableTrieDictionary clone of this dictionary.</p> | |
323 | * | |
324 | * @param status A status code recording the success of the call. | |
325 | * @return A MutableTrieDictionary with the same data as this dictionary | |
326 | */ | |
327 | virtual MutableTrieDictionary *cloneMutable( UErrorCode &status ) const; | |
328 | ||
329 | private: | |
330 | ||
331 | /** | |
332 | * <p>Convert a MutableTrieDictionary into a compact data blob.</p> | |
333 | * | |
334 | * @param dict The dictionary to convert. | |
335 | * @param status A status code recording the success of the call. | |
336 | * @return A single data blob starting with a CompactTrieHeader. | |
337 | */ | |
338 | static CompactTrieHeader *compactMutableTrieDictionary( const MutableTrieDictionary &dict, | |
339 | UErrorCode &status ); | |
340 | ||
341 | }; | |
342 | ||
343 | U_NAMESPACE_END | |
344 | ||
345 | /* TRIEDICT_H */ | |
346 | #endif |