]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/unicode/colldata.h
ICU-491.11.2.tar.gz
[apple/icu.git] / icuSources / i18n / unicode / colldata.h
1 /*
2 ******************************************************************************
3 * Copyright (C) 1996-2011, International Business Machines *
4 * Corporation and others. All Rights Reserved. *
5 ******************************************************************************
6 */
7
8 /**
9 * \file
10 * \brief C++ API: Collation data used to compute minLengthInChars.
11 * \internal
12 */
13
14 #ifndef COLL_DATA_H
15 #define COLL_DATA_H
16
17 #include "unicode/utypes.h"
18
19 #if !UCONFIG_NO_COLLATION
20
21 #include "unicode/uobject.h"
22 #include "unicode/ucol.h"
23
24 U_NAMESPACE_BEGIN
25
26 #ifndef U_HIDE_INTERNAL_API
27 /**
28 * The size of the internal buffer for the Collator's short description string.
29 * @internal ICU 4.0.1 technology preview
30 */
31 #define KEY_BUFFER_SIZE 64
32
33 /**
34 * The size of the internal CE buffer in a <code>CEList</code> object
35 * @internal ICU 4.0.1 technology preview
36 */
37 #define CELIST_BUFFER_SIZE 4
38
39 /**
40 * \def INSTRUMENT_CELIST
41 * Define this to enable the <code>CEList</code> objects to collect
42 * statistics.
43 * @internal ICU 4.0.1 technology preview
44 */
45 //#define INSTRUMENT_CELIST
46
47 /**
48 * The size of the initial list in a <code>StringList</code> object.
49 * @internal ICU 4.0.1 technology preview
50 */
51 #define STRING_LIST_BUFFER_SIZE 16
52
53 /**
54 * \def INSTRUMENT_STRING_LIST
55 * Define this to enable the <code>StringList</code> objects to
56 * collect statistics.
57 * @internal ICU 4.0.1 technology preview
58 */
59 //#define INSTRUMENT_STRING_LIST
60
61 /**
62 * This object holds a list of CEs generated from a particular
63 * <code>UnicodeString</code>
64 *
65 * @internal ICU 4.0.1 technology preview
66 */
67 class U_I18N_API CEList : public UObject
68 {
69 public:
70 /**
71 * Construct a <code>CEList</code> object.
72 *
73 * @param coll - the Collator used to collect the CEs.
74 * @param string - the string for which to collect the CEs.
75 * @param status - will be set if any errors occur.
76 *
77 * Note: if on return, status is set to an error code,
78 * the only safe thing to do with this object is to call
79 * the destructor.
80 *
81 * @internal ICU 4.0.1 technology preview
82 */
83 CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status);
84
85 /**
86 * The destructor.
87 * @internal ICU 4.0.1 technology preview
88 */
89 ~CEList();
90
91 /**
92 * Return the number of CEs in the list.
93 *
94 * @return the number of CEs in the list.
95 *
96 * @internal ICU 4.0.1 technology preview
97 */
98 int32_t size() const;
99
100 /**
101 * Get a particular CE from the list.
102 *
103 * @param index - the index of the CE to return
104 *
105 * @return the CE, or <code>0</code> if <code>index</code> is out of range
106 *
107 * @internal ICU 4.0.1 technology preview
108 */
109 uint32_t get(int32_t index) const;
110
111 /**
112 * Check if the CEs in another <code>CEList</code> match the
113 * suffix of this list starting at a give offset.
114 *
115 * @param offset - the offset of the suffix
116 * @param other - the other <code>CEList</code>
117 *
118 * @return <code>TRUE</code> if the CEs match, <code>FALSE</code> otherwise.
119 *
120 * @internal ICU 4.0.1 technology preview
121 */
122 UBool matchesAt(int32_t offset, const CEList *other) const;
123
124 /**
125 * The index operator.
126 *
127 * @param index - the index
128 *
129 * @return a reference to the given CE in the list
130 *
131 * @internal ICU 4.0.1 technology preview
132 */
133 uint32_t &operator[](int32_t index) const;
134
135 /**
136 * UObject glue...
137 * @internal ICU 4.0.1 technology preview
138 */
139 virtual UClassID getDynamicClassID() const;
140 /**
141 * UObject glue...
142 * @internal ICU 4.0.1 technology preview
143 */
144 static UClassID getStaticClassID();
145
146 private:
147 void add(uint32_t ce, UErrorCode &status);
148
149 uint32_t ceBuffer[CELIST_BUFFER_SIZE];
150 uint32_t *ces;
151 int32_t listMax;
152 int32_t listSize;
153
154 #ifdef INSTRUMENT_CELIST
155 static int32_t _active;
156 static int32_t _histogram[10];
157 #endif
158 };
159
160 /**
161 * StringList
162 *
163 * This object holds a list of <code>UnicodeString</code> objects.
164 *
165 * @internal ICU 4.0.1 technology preview
166 */
167 class U_I18N_API StringList : public UObject
168 {
169 public:
170 /**
171 * Construct an empty <code>StringList</code>
172 *
173 * @param status - will be set if any errors occur.
174 *
175 * Note: if on return, status is set to an error code,
176 * the only safe thing to do with this object is to call
177 * the destructor.
178 *
179 * @internal ICU 4.0.1 technology preview
180 */
181 StringList(UErrorCode &status);
182
183 /**
184 * The destructor.
185 *
186 * @internal ICU 4.0.1 technology preview
187 */
188 ~StringList();
189
190 /**
191 * Add a string to the list.
192 *
193 * @param string - the string to add
194 * @param status - will be set if any errors occur.
195 *
196 * @internal ICU 4.0.1 technology preview
197 */
198 void add(const UnicodeString *string, UErrorCode &status);
199
200 /**
201 * Add an array of Unicode code points to the list.
202 *
203 * @param chars - the address of the array of code points
204 * @param count - the number of code points in the array
205 * @param status - will be set if any errors occur.
206 *
207 * @internal ICU 4.0.1 technology preview
208 */
209 void add(const UChar *chars, int32_t count, UErrorCode &status);
210
211 /**
212 * Get a particular string from the list.
213 *
214 * @param index - the index of the string
215 *
216 * @return a pointer to the <code>UnicodeString</code> or <code>NULL</code>
217 * if <code>index</code> is out of bounds.
218 *
219 * @internal ICU 4.0.1 technology preview
220 */
221 const UnicodeString *get(int32_t index) const;
222
223 /**
224 * Get the number of stings in the list.
225 *
226 * @return the number of strings in the list.
227 *
228 * @internal ICU 4.0.1 technology preview
229 */
230 int32_t size() const;
231
232 /**
233 * the UObject glue...
234 * @internal ICU 4.0.1 technology preview
235 */
236 virtual UClassID getDynamicClassID() const;
237 /**
238 * the UObject glue...
239 * @internal ICU 4.0.1 technology preview
240 */
241 static UClassID getStaticClassID();
242
243 private:
244 UnicodeString *strings;
245 int32_t listMax;
246 int32_t listSize;
247
248 #ifdef INSTRUMENT_STRING_LIST
249 static int32_t _lists;
250 static int32_t _strings;
251 static int32_t _histogram[101];
252 #endif
253 };
254 #endif /* U_HIDE_INTERNAL_API */
255
256 /*
257 * Forward references to internal classes.
258 */
259 class StringToCEsMap;
260 class CEToStringsMap;
261 class CollDataCache;
262
263 #ifndef U_HIDE_INTERNAL_API
264 /**
265 * CollData
266 *
267 * This class holds the Collator-specific data needed to
268 * compute the length of the shortest string that can
269 * generate a partcular list of CEs.
270 *
271 * <code>CollData</code> objects are quite expensive to compute. Because
272 * of this, they are cached. When you call <code>CollData::open</code> it
273 * returns a reference counted cached object. When you call <code>CollData::close</code>
274 * the reference count on the object is decremented but the object is not deleted.
275 *
276 * If you do not need to reuse any unreferenced objects in the cache, you can call
277 * <code>CollData::flushCollDataCache</code>. If you no longer need any <code>CollData</code>
278 * objects, you can call <code>CollData::freeCollDataCache</code>
279 *
280 * @internal ICU 4.0.1 technology preview
281 */
282 class U_I18N_API CollData : public UObject
283 {
284 public:
285 /**
286 * Construct a <code>CollData</code> object.
287 *
288 * @param collator - the collator
289 * @param status - will be set if any errors occur.
290 *
291 * @return the <code>CollData</code> object. You must call
292 * <code>close</code> when you are done using the object.
293 *
294 * Note: if on return, status is set to an error code,
295 * the only safe thing to do with this object is to call
296 * <code>CollData::close</code>.
297 *
298 * @internal ICU 4.0.1 technology preview
299 */
300 static CollData *open(UCollator *collator, UErrorCode &status);
301
302 /**
303 * Release a <code>CollData</code> object.
304 *
305 * @param collData - the object
306 *
307 * @internal ICU 4.0.1 technology preview
308 */
309 static void close(CollData *collData);
310
311 /**
312 * Get the <code>UCollator</code> object used to create this object.
313 * The object returned may not be the exact object that was used to
314 * create this object, but it will have the same behavior.
315 * @internal ICU 4.0.1 technology preview
316 */
317 UCollator *getCollator() const;
318
319 /**
320 * Get a list of all the strings which generate a list
321 * of CEs starting with a given CE.
322 *
323 * @param ce - the CE
324 *
325 * return a <code>StringList</code> object containing all
326 * the stirngs, or <code>NULL</code> if there are
327 * no such strings.
328 *
329 * @internal ICU 4.0.1 technology preview.
330 */
331 const StringList *getStringList(int32_t ce) const;
332
333 /**
334 * Get a list of the CEs generated by a partcular stirng.
335 *
336 * @param string - the string
337 *
338 * @return a <code>CEList</code> object containt the CEs. You
339 * must call <code>freeCEList</code> when you are finished
340 * using the <code>CEList</code>/
341 *
342 * @internal ICU 4.0.1 technology preview.
343 */
344 const CEList *getCEList(const UnicodeString *string) const;
345
346 /**
347 * Release a <code>CEList</code> returned by <code>getCEList</code>.
348 *
349 * @param list - the <code>CEList</code> to free.
350 *
351 * @internal ICU 4.0.1 technology preview
352 */
353 void freeCEList(const CEList *list);
354
355 /**
356 * Return the length of the shortest string that will generate
357 * the given list of CEs.
358 *
359 * @param ces - the CEs
360 * @param offset - the offset of the first CE in the list to use.
361 *
362 * @return the length of the shortest string.
363 *
364 * @internal ICU 4.0.1 technology preview
365 */
366 int32_t minLengthInChars(const CEList *ces, int32_t offset) const;
367
368
369 /**
370 * Return the length of the shortest string that will generate
371 * the given list of CEs.
372 *
373 * Note: the algorithm used to do this computation is recursive. To
374 * limit the amount of recursion, a "history" list is used to record
375 * the best answer starting at a particular offset in the list of CEs.
376 * If the same offset is visited again during the recursion, the answer
377 * in the history list is used.
378 *
379 * @param ces - the CEs
380 * @param offset - the offset of the first CE in the list to use.
381 * @param history - the history list. Must be at least as long as
382 * the number of cEs in the <code>CEList</code>
383 *
384 * @return the length of the shortest string.
385 *
386 * @internal ICU 4.0.1 technology preview
387 */
388 int32_t minLengthInChars(const CEList *ces, int32_t offset, int32_t *history) const;
389
390 /**
391 * UObject glue...
392 * @internal ICU 4.0.1 technology preview
393 */
394 virtual UClassID getDynamicClassID() const;
395 /**
396 * UObject glue...
397 * @internal ICU 4.0.1 technology preview
398 */
399 static UClassID getStaticClassID();
400
401 /**
402 * <code>CollData</code> objects are expensive to compute, and so
403 * may be cached. This routine will free the cached objects and delete
404 * the cache.
405 *
406 * WARNING: Don't call this until you are have called <code>close</code>
407 * for each <code>CollData</code> object that you have used. also,
408 * DO NOT call this if another thread may be calling <code>flushCollDataCache</code>
409 * at the same time.
410 *
411 * @internal 4.0.1 technology preview
412 */
413 static void freeCollDataCache();
414
415 /**
416 * <code>CollData</code> objects are expensive to compute, and so
417 * may be cached. This routine will remove any unused <code>CollData</code>
418 * objects from the cache.
419 *
420 * @internal 4.0.1 technology preview
421 */
422 static void flushCollDataCache();
423
424 private:
425 friend class CollDataCache;
426 friend class CollDataCacheEntry;
427
428 CollData(UCollator *collator, char *cacheKey, int32_t cachekeyLength, UErrorCode &status);
429 ~CollData();
430
431 CollData();
432
433 static char *getCollatorKey(UCollator *collator, char *buffer, int32_t bufferLength);
434
435 static CollDataCache *getCollDataCache();
436
437 UCollator *coll;
438 StringToCEsMap *charsToCEList;
439 CEToStringsMap *ceToCharsStartingWith;
440
441 char keyBuffer[KEY_BUFFER_SIZE];
442 char *key;
443
444 static CollDataCache *collDataCache;
445
446 uint32_t minHan;
447 uint32_t maxHan;
448
449 uint32_t jamoLimits[4];
450 };
451 #endif /* U_HIDE_INTERNAL_API */
452
453 U_NAMESPACE_END
454
455 #endif // #if !UCONFIG_NO_COLLATION
456 #endif // #ifndef COLL_DATA_H