]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/intltest/colldata.h
ICU-511.27.tar.gz
[apple/icu.git] / icuSources / test / intltest / colldata.h
CommitLineData
729e4ab9
A
1/*
2 ******************************************************************************
51004dcb 3 * Copyright (C) 1996-2012, International Business Machines *
729e4ab9
A
4 * Corporation and others. All Rights Reserved. *
5 ******************************************************************************
6 */
7
8/**
9 * \file
51004dcb 10 * \brief Originally, added as C++ API for Collation data used to compute minLengthInChars
729e4ab9
A
11 * \internal
12 */
51004dcb
A
13
14/*
15 * Note: This module was incldued in ICU 4.0.1 as @internal technology preview for supporting
16 * Boyer-Moore string search API. For now, only SSearchTest depends on this module. I temporaly
17 * moved the module from i18n directory to intltest, because we have no plan to publish this
18 * as public API. (2012-12-18 yoshito)
19 */
20
729e4ab9
A
21#ifndef COLL_DATA_H
22#define COLL_DATA_H
23
24#include "unicode/utypes.h"
25
26#if !UCONFIG_NO_COLLATION
27
729e4ab9 28#include "unicode/ucol.h"
51004dcb 29#include "unicode/unistr.h"
729e4ab9
A
30
31 /**
32 * The size of the internal CE buffer in a <code>CEList</code> object
729e4ab9
A
33 */
34#define CELIST_BUFFER_SIZE 4
35
36/**
37 * \def INSTRUMENT_CELIST
38 * Define this to enable the <code>CEList</code> objects to collect
39 * statistics.
729e4ab9 40 */
729e4ab9
A
41
42 /**
43 * The size of the initial list in a <code>StringList</code> object.
729e4ab9
A
44 */
45#define STRING_LIST_BUFFER_SIZE 16
46
51004dcb 47U_NAMESPACE_USE
729e4ab9
A
48
49 /**
50 * This object holds a list of CEs generated from a particular
51 * <code>UnicodeString</code>
52 *
729e4ab9 53 */
51004dcb 54class CEList
729e4ab9
A
55{
56public:
57 /**
58 * Construct a <code>CEList</code> object.
59 *
60 * @param coll - the Collator used to collect the CEs.
61 * @param string - the string for which to collect the CEs.
62 * @param status - will be set if any errors occur.
63 *
64 * Note: if on return, status is set to an error code,
65 * the only safe thing to do with this object is to call
66 * the destructor.
729e4ab9
A
67 */
68 CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status);
69
70 /**
71 * The destructor.
729e4ab9
A
72 */
73 ~CEList();
74
75 /**
76 * Return the number of CEs in the list.
77 *
78 * @return the number of CEs in the list.
729e4ab9
A
79 */
80 int32_t size() const;
81
82 /**
83 * Get a particular CE from the list.
84 *
85 * @param index - the index of the CE to return
86 *
87 * @return the CE, or <code>0</code> if <code>index</code> is out of range
729e4ab9
A
88 */
89 uint32_t get(int32_t index) const;
90
91 /**
92 * Check if the CEs in another <code>CEList</code> match the
93 * suffix of this list starting at a give offset.
94 *
95 * @param offset - the offset of the suffix
96 * @param other - the other <code>CEList</code>
97 *
98 * @return <code>TRUE</code> if the CEs match, <code>FALSE</code> otherwise.
729e4ab9
A
99 */
100 UBool matchesAt(int32_t offset, const CEList *other) const;
101
102 /**
103 * The index operator.
104 *
105 * @param index - the index
106 *
107 * @return a reference to the given CE in the list
729e4ab9
A
108 */
109 uint32_t &operator[](int32_t index) const;
110
729e4ab9
A
111private:
112 void add(uint32_t ce, UErrorCode &status);
113
114 uint32_t ceBuffer[CELIST_BUFFER_SIZE];
115 uint32_t *ces;
116 int32_t listMax;
117 int32_t listSize;
729e4ab9
A
118};
119
120/**
121 * StringList
122 *
123 * This object holds a list of <code>UnicodeString</code> objects.
729e4ab9 124 */
51004dcb 125class StringList
729e4ab9
A
126{
127public:
128 /**
129 * Construct an empty <code>StringList</code>
130 *
131 * @param status - will be set if any errors occur.
132 *
133 * Note: if on return, status is set to an error code,
134 * the only safe thing to do with this object is to call
135 * the destructor.
729e4ab9
A
136 */
137 StringList(UErrorCode &status);
138
139 /**
140 * The destructor.
729e4ab9
A
141 */
142 ~StringList();
143
144 /**
145 * Add a string to the list.
146 *
147 * @param string - the string to add
148 * @param status - will be set if any errors occur.
729e4ab9
A
149 */
150 void add(const UnicodeString *string, UErrorCode &status);
151
152 /**
153 * Add an array of Unicode code points to the list.
154 *
155 * @param chars - the address of the array of code points
156 * @param count - the number of code points in the array
157 * @param status - will be set if any errors occur.
729e4ab9
A
158 */
159 void add(const UChar *chars, int32_t count, UErrorCode &status);
160
161 /**
162 * Get a particular string from the list.
163 *
164 * @param index - the index of the string
165 *
166 * @return a pointer to the <code>UnicodeString</code> or <code>NULL</code>
167 * if <code>index</code> is out of bounds.
729e4ab9
A
168 */
169 const UnicodeString *get(int32_t index) const;
170
171 /**
172 * Get the number of stings in the list.
173 *
174 * @return the number of strings in the list.
729e4ab9
A
175 */
176 int32_t size() const;
177
729e4ab9
A
178private:
179 UnicodeString *strings;
180 int32_t listMax;
181 int32_t listSize;
729e4ab9 182};
51004dcb 183
729e4ab9
A
184
185/*
186 * Forward references to internal classes.
187 */
188class StringToCEsMap;
189class CEToStringsMap;
729e4ab9
A
190
191/**
192 * CollData
193 *
194 * This class holds the Collator-specific data needed to
195 * compute the length of the shortest string that can
196 * generate a partcular list of CEs.
197 *
198 * <code>CollData</code> objects are quite expensive to compute. Because
199 * of this, they are cached. When you call <code>CollData::open</code> it
200 * returns a reference counted cached object. When you call <code>CollData::close</code>
201 * the reference count on the object is decremented but the object is not deleted.
202 *
203 * If you do not need to reuse any unreferenced objects in the cache, you can call
204 * <code>CollData::flushCollDataCache</code>. If you no longer need any <code>CollData</code>
205 * objects, you can call <code>CollData::freeCollDataCache</code>
729e4ab9 206 */
51004dcb 207class CollData
729e4ab9
A
208{
209public:
210 /**
211 * Construct a <code>CollData</code> object.
212 *
213 * @param collator - the collator
214 * @param status - will be set if any errors occur.
729e4ab9 215 */
51004dcb 216 CollData(UCollator *collator, UErrorCode &status);
729e4ab9
A
217
218 /**
51004dcb 219 * The destructor.
729e4ab9 220 */
51004dcb 221 ~CollData();
729e4ab9
A
222
223 /**
224 * Get the <code>UCollator</code> object used to create this object.
225 * The object returned may not be the exact object that was used to
226 * create this object, but it will have the same behavior.
729e4ab9
A
227 */
228 UCollator *getCollator() const;
229
230 /**
231 * Get a list of all the strings which generate a list
232 * of CEs starting with a given CE.
233 *
234 * @param ce - the CE
235 *
236 * return a <code>StringList</code> object containing all
237 * the stirngs, or <code>NULL</code> if there are
238 * no such strings.
729e4ab9
A
239 */
240 const StringList *getStringList(int32_t ce) const;
241
242 /**
243 * Get a list of the CEs generated by a partcular stirng.
244 *
245 * @param string - the string
246 *
247 * @return a <code>CEList</code> object containt the CEs. You
248 * must call <code>freeCEList</code> when you are finished
249 * using the <code>CEList</code>/
729e4ab9
A
250 */
251 const CEList *getCEList(const UnicodeString *string) const;
252
253 /**
254 * Release a <code>CEList</code> returned by <code>getCEList</code>.
255 *
256 * @param list - the <code>CEList</code> to free.
729e4ab9
A
257 */
258 void freeCEList(const CEList *list);
259
260 /**
261 * Return the length of the shortest string that will generate
262 * the given list of CEs.
263 *
264 * @param ces - the CEs
265 * @param offset - the offset of the first CE in the list to use.
266 *
267 * @return the length of the shortest string.
729e4ab9
A
268 */
269 int32_t minLengthInChars(const CEList *ces, int32_t offset) const;
270
271
272 /**
273 * Return the length of the shortest string that will generate
274 * the given list of CEs.
275 *
276 * Note: the algorithm used to do this computation is recursive. To
277 * limit the amount of recursion, a "history" list is used to record
278 * the best answer starting at a particular offset in the list of CEs.
279 * If the same offset is visited again during the recursion, the answer
280 * in the history list is used.
281 *
282 * @param ces - the CEs
283 * @param offset - the offset of the first CE in the list to use.
284 * @param history - the history list. Must be at least as long as
285 * the number of cEs in the <code>CEList</code>
286 *
287 * @return the length of the shortest string.
729e4ab9
A
288 */
289 int32_t minLengthInChars(const CEList *ces, int32_t offset, int32_t *history) const;
290
729e4ab9 291private:
729e4ab9 292 UCollator *coll;
729e4ab9
A
293 CEToStringsMap *ceToCharsStartingWith;
294
729e4ab9
A
295 uint32_t minHan;
296 uint32_t maxHan;
297
298 uint32_t jamoLimits[4];
299};
729e4ab9
A
300
301#endif // #if !UCONFIG_NO_COLLATION
302#endif // #ifndef COLL_DATA_H