]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/triedict.h
ICU-8.11.4.tar.gz
[apple/icu.git] / icuSources / common / triedict.h
CommitLineData
73c04bcf
A
1/**
2 *******************************************************************************
3 * Copyright (C) 2006, International Business Machines Corporation and others. *
4 * All Rights Reserved. *
5 *******************************************************************************
6 */
7
8#ifndef TRIEDICT_H
9#define TRIEDICT_H
10
11#include "unicode/utypes.h"
12#include "unicode/uobject.h"
13#include "unicode/utext.h"
14
15struct UEnumeration;
16struct UDataSwapper;
17struct UDataMemory;
18
19 /**
20 * <p>UDataSwapFn function for use in swapping a compact dictionary.</p>
21 *
22 * @param ds Pointer to UDataSwapper containing global data about the
23 * transformation and function pointers for handling primitive
24 * types.
25 * @param inData Pointer to the input data to be transformed or examined.
26 * @param length Length of the data, counting bytes. May be -1 for preflighting.
27 * If length>=0, then transform the data.
28 * If length==-1, then only determine the length of the data.
29 * The length cannot be determined from the data itself for all
30 * types of data (e.g., not for simple arrays of integers).
31 * @param outData Pointer to the output data buffer.
32 * If length>=0 (transformation), then the output buffer must
33 * have a capacity of at least length.
34 * If length==-1, then outData will not be used and can be NULL.
35 * @param pErrorCode ICU UErrorCode parameter, must not be NULL and must
36 * fulfill U_SUCCESS on input.
37 * @return The actual length of the data.
38 *
39 * @see UDataSwapper
40 */
41
42U_CAPI int32_t U_EXPORT2
43triedict_swap(const UDataSwapper *ds,
44 const void *inData, int32_t length, void *outData,
45 UErrorCode *pErrorCode);
46
47U_NAMESPACE_BEGIN
48
49class StringEnumeration;
50struct CompactTrieHeader;
51
52/*******************************************************************
53 * TrieWordDictionary
54 */
55
56/**
57 * <p>TrieWordDictionary is an abstract class that represents a word
58 * dictionary based on a trie. The base protocol is read-only.
59 * Subclasses may allow writing.</p>
60 */
61class U_COMMON_API TrieWordDictionary : public UMemory {
62 public:
63
64 /**
65 * <p>Default constructor.</p>
66 *
67 */
68 TrieWordDictionary();
69
70 /**
71 * <p>Virtual destructor.</p>
72 */
73 virtual ~TrieWordDictionary();
74
75 /**
76 * <p>Find dictionary words that match the text.</p>
77 *
78 * @param text A UText representing the text. The
79 * iterator is left after the longest prefix match in the dictionary.
80 * @param start The current position in text.
81 * @param maxLength The maximum number of code units to match.
82 * @param lengths An array that is filled with the lengths of words that matched.
83 * @param count Filled with the number of elements output in lengths.
84 * @param limit The size of the lengths array; this limits the number of words output.
85 * @return The number of characters in text that were matched.
86 */
87 virtual int32_t matches( UText *text,
88 int32_t maxLength,
89 int32_t *lengths,
90 int &count,
91 int limit ) const = 0;
92
93 /**
94 * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
95 *
96 * @param status A status code recording the success of the call.
97 * @return A StringEnumeration that will iterate through the whole dictionary.
98 * The caller is responsible for closing it. The order is unspecified.
99 */
100 virtual StringEnumeration *openWords( UErrorCode &status ) const = 0;
101
102};
103
104/*******************************************************************
105 * MutableTrieDictionary
106 */
107
108/**
109 * <p>MutableTrieDictionary is a TrieWordDictionary that allows words to be
110 * added.</p>
111 */
112
113struct TernaryNode; // Forwards declaration
114
115class U_COMMON_API MutableTrieDictionary : public TrieWordDictionary {
116 private:
117 /**
118 * The root node of the trie
119 * @internal
120 */
121
122 TernaryNode *fTrie;
123
124 /**
125 * A UText for internal use
126 * @internal
127 */
128
129 UText *fIter;
130
131 friend class CompactTrieDictionary; // For fast conversion
132
133 public:
134
135 /**
136 * <p>Constructor.</p>
137 *
138 * @param median A UChar around which to balance the trie. Ideally, it should
139 * begin at least one word that is near the median of the set in the dictionary
140 * @param status A status code recording the success of the call.
141 */
142 MutableTrieDictionary( UChar median, UErrorCode &status );
143
144 /**
145 * <p>Virtual destructor.</p>
146 */
147 virtual ~MutableTrieDictionary();
148
149 /**
150 * <p>Find dictionary words that match the text.</p>
151 *
152 * @param text A UText representing the text. The
153 * iterator is left after the longest prefix match in the dictionary.
154 * @param maxLength The maximum number of code units to match.
155 * @param lengths An array that is filled with the lengths of words that matched.
156 * @param count Filled with the number of elements output in lengths.
157 * @param limit The size of the lengths array; this limits the number of words output.
158 * @return The number of characters in text that were matched.
159 */
160 virtual int32_t matches( UText *text,
161 int32_t maxLength,
162 int32_t *lengths,
163 int &count,
164 int limit ) const;
165
166 /**
167 * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
168 *
169 * @param status A status code recording the success of the call.
170 * @return A StringEnumeration that will iterate through the whole dictionary.
171 * The caller is responsible for closing it. The order is unspecified.
172 */
173 virtual StringEnumeration *openWords( UErrorCode &status ) const;
174
175 /**
176 * <p>Add one word to the dictionary.</p>
177 *
178 * @param word A UChar buffer containing the word.
179 * @param length The length of the word.
180 * @param status The resultant status
181 */
182 virtual void addWord( const UChar *word,
183 int32_t length,
184 UErrorCode &status);
185
186#if 0
187 /**
188 * <p>Add all strings from a UEnumeration to the dictionary.</p>
189 *
190 * @param words A UEnumeration that will return the desired words.
191 * @param status The resultant status
192 */
193 virtual void addWords( UEnumeration *words, UErrorCode &status );
194#endif
195
196protected:
197 /**
198 * <p>Search the dictionary for matches.</p>
199 *
200 * @param text A UText representing the text. The
201 * iterator is left after the longest prefix match in the dictionary.
202 * @param maxLength The maximum number of code units to match.
203 * @param lengths An array that is filled with the lengths of words that matched.
204 * @param count Filled with the number of elements output in lengths.
205 * @param limit The size of the lengths array; this limits the number of words output.
206 * @param parent The parent of the current node
207 * @param pMatched The returned parent node matched the input
208 * @return The number of characters in text that were matched.
209 */
210 virtual int32_t search( UText *text,
211 int32_t maxLength,
212 int32_t *lengths,
213 int &count,
214 int limit,
215 TernaryNode *&parent,
216 UBool &pMatched ) const;
217
218private:
219 /**
220 * <p>Private constructor. The root node it not allocated.</p>
221 *
222 * @param status A status code recording the success of the call.
223 */
224 MutableTrieDictionary( UErrorCode &status );
225};
226
227/*******************************************************************
228 * CompactTrieDictionary
229 */
230
231/**
232 * <p>CompactTrieDictionary is a TrieWordDictionary that has been compacted
233 * to save space.</p>
234 */
235class U_COMMON_API CompactTrieDictionary : public TrieWordDictionary {
236 private:
237 /**
238 * The root node of the trie
239 */
240
241 const CompactTrieHeader *fData;
242
243 /**
244 * A UBool indicating whether or not we own the fData.
245 */
246
247 UBool fOwnData;
248
249 UDataMemory *fUData;
250 public:
251 /**
252 * <p>Construct a dictionary from a UDataMemory.</p>
253 *
254 * @param data A pointer to a UDataMemory, which is adopted
255 * @param status A status code giving the result of the constructor
256 */
257 CompactTrieDictionary(UDataMemory *dataObj, UErrorCode &status);
258
259 /**
260 * <p>Construct a dictionary from raw saved data.</p>
261 *
262 * @param data A pointer to the raw data, which is still owned by the caller
263 * @param status A status code giving the result of the constructor
264 */
265 CompactTrieDictionary(const void *dataObj, UErrorCode &status);
266
267 /**
268 * <p>Construct a dictionary from a MutableTrieDictionary.</p>
269 *
270 * @param dict The dictionary to use as input.
271 * @param status A status code recording the success of the call.
272 */
273 CompactTrieDictionary( const MutableTrieDictionary &dict, UErrorCode &status );
274
275 /**
276 * <p>Virtual destructor.</p>
277 */
278 virtual ~CompactTrieDictionary();
279
280 /**
281 * <p>Find dictionary words that match the text.</p>
282 *
283 * @param text A UText representing the text. The
284 * iterator is left after the longest prefix match in the dictionary.
285 * @param maxLength The maximum number of code units to match.
286 * @param lengths An array that is filled with the lengths of words that matched.
287 * @param count Filled with the number of elements output in lengths.
288 * @param limit The size of the lengths array; this limits the number of words output.
289 * @return The number of characters in text that were matched.
290 */
291 virtual int32_t matches( UText *text,
292 int32_t rangeEnd,
293 int32_t *lengths,
294 int &count,
295 int limit ) const;
296
297 /**
298 * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
299 *
300 * @param status A status code recording the success of the call.
301 * @return A StringEnumeration that will iterate through the whole dictionary.
302 * The caller is responsible for closing it. The order is unspecified.
303 */
304 virtual StringEnumeration *openWords( UErrorCode &status ) const;
305
306 /**
307 * <p>Return the size of the compact data.</p>
308 *
309 * @return The size of the dictionary's compact data.
310 */
311 virtual uint32_t dataSize() const;
312
313 /**
314 * <p>Return a void * pointer to the compact data, platform-endian.</p>
315 *
316 * @return The data for the compact dictionary, suitable for passing to the
317 * constructor.
318 */
319 virtual const void *data() const;
320
321 /**
322 * <p>Return a MutableTrieDictionary clone of this dictionary.</p>
323 *
324 * @param status A status code recording the success of the call.
325 * @return A MutableTrieDictionary with the same data as this dictionary
326 */
327 virtual MutableTrieDictionary *cloneMutable( UErrorCode &status ) const;
328
329 private:
330
331 /**
332 * <p>Convert a MutableTrieDictionary into a compact data blob.</p>
333 *
334 * @param dict The dictionary to convert.
335 * @param status A status code recording the success of the call.
336 * @return A single data blob starting with a CompactTrieHeader.
337 */
338 static CompactTrieHeader *compactMutableTrieDictionary( const MutableTrieDictionary &dict,
339 UErrorCode &status );
340
341};
342
343U_NAMESPACE_END
344
345 /* TRIEDICT_H */
346#endif