]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/unicode/colldata.h
2 ******************************************************************************
3 * Copyright (C) 1996-2010, International Business Machines *
4 * Corporation and others. All Rights Reserved. *
5 ******************************************************************************
10 * \brief C++ API: Collation data used to compute minLengthInChars.
17 #include "unicode/utypes.h"
19 #if !UCONFIG_NO_COLLATION
21 #include "unicode/uobject.h"
22 #include "unicode/ucol.h"
27 * The size of the internal buffer for the Collator's short description string.
28 * @internal ICU 4.0.1 technology preview
30 #define KEY_BUFFER_SIZE 64
33 * The size of the internal CE buffer in a <code>CEList</code> object
34 * @internal ICU 4.0.1 technology preview
36 #define CELIST_BUFFER_SIZE 4
39 * \def INSTRUMENT_CELIST
40 * Define this to enable the <code>CEList</code> objects to collect
42 * @internal ICU 4.0.1 technology preview
44 //#define INSTRUMENT_CELIST
47 * The size of the initial list in a <code>StringList</code> object.
48 * @internal ICU 4.0.1 technology preview
50 #define STRING_LIST_BUFFER_SIZE 16
53 * \def INSTRUMENT_STRING_LIST
54 * Define this to enable the <code>StringList</code> objects to
56 * @internal ICU 4.0.1 technology preview
58 //#define INSTRUMENT_STRING_LIST
61 * This object holds a list of CEs generated from a particular
62 * <code>UnicodeString</code>
64 * @internal ICU 4.0.1 technology preview
66 class U_I18N_API CEList
: public UObject
70 * Construct a <code>CEList</code> object.
72 * @param coll - the Collator used to collect the CEs.
73 * @param string - the string for which to collect the CEs.
74 * @param status - will be set if any errors occur.
76 * Note: if on return, status is set to an error code,
77 * the only safe thing to do with this object is to call
80 * @internal ICU 4.0.1 technology preview
82 CEList(UCollator
*coll
, const UnicodeString
&string
, UErrorCode
&status
);
86 * @internal ICU 4.0.1 technology preview
91 * Return the number of CEs in the list.
93 * @return the number of CEs in the list.
95 * @internal ICU 4.0.1 technology preview
100 * Get a particular CE from the list.
102 * @param index - the index of the CE to return
104 * @return the CE, or <code>0</code> if <code>index</code> is out of range
106 * @internal ICU 4.0.1 technology preview
108 uint32_t get(int32_t index
) const;
111 * Check if the CEs in another <code>CEList</code> match the
112 * suffix of this list starting at a give offset.
114 * @param offset - the offset of the suffix
115 * @param other - the other <code>CEList</code>
117 * @return <code>TRUE</code> if the CEs match, <code>FALSE</code> otherwise.
119 * @internal ICU 4.0.1 technology preview
121 UBool
matchesAt(int32_t offset
, const CEList
*other
) const;
124 * The index operator.
126 * @param index - the index
128 * @return a reference to the given CE in the list
130 * @internal ICU 4.0.1 technology preview
132 uint32_t &operator[](int32_t index
) const;
136 * @internal ICU 4.0.1 technology preview
138 virtual UClassID
getDynamicClassID() const;
141 * @internal ICU 4.0.1 technology preview
143 static UClassID
getStaticClassID();
146 void add(uint32_t ce
, UErrorCode
&status
);
148 uint32_t ceBuffer
[CELIST_BUFFER_SIZE
];
153 #ifdef INSTRUMENT_CELIST
154 static int32_t _active
;
155 static int32_t _histogram
[10];
162 * This object holds a list of <code>UnicodeString</code> objects.
164 * @internal ICU 4.0.1 technology preview
166 class U_I18N_API StringList
: public UObject
170 * Construct an empty <code>StringList</code>
172 * @param status - will be set if any errors occur.
174 * Note: if on return, status is set to an error code,
175 * the only safe thing to do with this object is to call
178 * @internal ICU 4.0.1 technology preview
180 StringList(UErrorCode
&status
);
185 * @internal ICU 4.0.1 technology preview
190 * Add a string to the list.
192 * @param string - the string to add
193 * @param status - will be set if any errors occur.
195 * @internal ICU 4.0.1 technology preview
197 void add(const UnicodeString
*string
, UErrorCode
&status
);
200 * Add an array of Unicode code points to the list.
202 * @param chars - the address of the array of code points
203 * @param count - the number of code points in the array
204 * @param status - will be set if any errors occur.
206 * @internal ICU 4.0.1 technology preview
208 void add(const UChar
*chars
, int32_t count
, UErrorCode
&status
);
211 * Get a particular string from the list.
213 * @param index - the index of the string
215 * @return a pointer to the <code>UnicodeString</code> or <code>NULL</code>
216 * if <code>index</code> is out of bounds.
218 * @internal ICU 4.0.1 technology preview
220 const UnicodeString
*get(int32_t index
) const;
223 * Get the number of stings in the list.
225 * @return the number of strings in the list.
227 * @internal ICU 4.0.1 technology preview
229 int32_t size() const;
232 * the UObject glue...
233 * @internal ICU 4.0.1 technology preview
235 virtual UClassID
getDynamicClassID() const;
237 * the UObject glue...
238 * @internal ICU 4.0.1 technology preview
240 static UClassID
getStaticClassID();
243 UnicodeString
*strings
;
247 #ifdef INSTRUMENT_STRING_LIST
248 static int32_t _lists
;
249 static int32_t _strings
;
250 static int32_t _histogram
[101];
255 * Forward references to internal classes.
257 class StringToCEsMap
;
258 class CEToStringsMap
;
264 * This class holds the Collator-specific data needed to
265 * compute the length of the shortest string that can
266 * generate a partcular list of CEs.
268 * <code>CollData</code> objects are quite expensive to compute. Because
269 * of this, they are cached. When you call <code>CollData::open</code> it
270 * returns a reference counted cached object. When you call <code>CollData::close</code>
271 * the reference count on the object is decremented but the object is not deleted.
273 * If you do not need to reuse any unreferenced objects in the cache, you can call
274 * <code>CollData::flushCollDataCache</code>. If you no longer need any <code>CollData</code>
275 * objects, you can call <code>CollData::freeCollDataCache</code>
277 * @internal ICU 4.0.1 technology preview
279 class U_I18N_API CollData
: public UObject
283 * Construct a <code>CollData</code> object.
285 * @param collator - the collator
286 * @param status - will be set if any errors occur.
288 * @return the <code>CollData</code> object. You must call
289 * <code>close</code> when you are done using the object.
291 * Note: if on return, status is set to an error code,
292 * the only safe thing to do with this object is to call
293 * <code>CollData::close</code>.
295 * @internal ICU 4.0.1 technology preview
297 static CollData
*open(UCollator
*collator
, UErrorCode
&status
);
300 * Release a <code>CollData</code> object.
302 * @param collData - the object
304 * @internal ICU 4.0.1 technology preview
306 static void close(CollData
*collData
);
309 * Get the <code>UCollator</code> object used to create this object.
310 * The object returned may not be the exact object that was used to
311 * create this object, but it will have the same behavior.
312 * @internal ICU 4.0.1 technology preview
314 UCollator
*getCollator() const;
317 * Get a list of all the strings which generate a list
318 * of CEs starting with a given CE.
322 * return a <code>StringList</code> object containing all
323 * the stirngs, or <code>NULL</code> if there are
326 * @internal ICU 4.0.1 technology preview.
328 const StringList
*getStringList(int32_t ce
) const;
331 * Get a list of the CEs generated by a partcular stirng.
333 * @param string - the string
335 * @return a <code>CEList</code> object containt the CEs. You
336 * must call <code>freeCEList</code> when you are finished
337 * using the <code>CEList</code>/
339 * @internal ICU 4.0.1 technology preview.
341 const CEList
*getCEList(const UnicodeString
*string
) const;
344 * Release a <code>CEList</code> returned by <code>getCEList</code>.
346 * @param list - the <code>CEList</code> to free.
348 * @internal ICU 4.0.1 technology preview
350 void freeCEList(const CEList
*list
);
353 * Return the length of the shortest string that will generate
354 * the given list of CEs.
356 * @param ces - the CEs
357 * @param offset - the offset of the first CE in the list to use.
359 * @return the length of the shortest string.
361 * @internal ICU 4.0.1 technology preview
363 int32_t minLengthInChars(const CEList
*ces
, int32_t offset
) const;
367 * Return the length of the shortest string that will generate
368 * the given list of CEs.
370 * Note: the algorithm used to do this computation is recursive. To
371 * limit the amount of recursion, a "history" list is used to record
372 * the best answer starting at a particular offset in the list of CEs.
373 * If the same offset is visited again during the recursion, the answer
374 * in the history list is used.
376 * @param ces - the CEs
377 * @param offset - the offset of the first CE in the list to use.
378 * @param history - the history list. Must be at least as long as
379 * the number of cEs in the <code>CEList</code>
381 * @return the length of the shortest string.
383 * @internal ICU 4.0.1 technology preview
385 int32_t minLengthInChars(const CEList
*ces
, int32_t offset
, int32_t *history
) const;
389 * @internal ICU 4.0.1 technology preview
391 virtual UClassID
getDynamicClassID() const;
394 * @internal ICU 4.0.1 technology preview
396 static UClassID
getStaticClassID();
399 * <code>CollData</code> objects are expensive to compute, and so
400 * may be cached. This routine will free the cached objects and delete
403 * WARNING: Don't call this until you are have called <code>close</code>
404 * for each <code>CollData</code> object that you have used. also,
405 * DO NOT call this if another thread may be calling <code>flushCollDataCache</code>
408 * @internal 4.0.1 technology preview
410 static void freeCollDataCache();
413 * <code>CollData</code> objects are expensive to compute, and so
414 * may be cached. This routine will remove any unused <code>CollData</code>
415 * objects from the cache.
417 * @internal 4.0.1 technology preview
419 static void flushCollDataCache();
422 friend class CollDataCache
;
423 friend class CollDataCacheEntry
;
425 CollData(UCollator
*collator
, char *cacheKey
, int32_t cachekeyLength
, UErrorCode
&status
);
430 static char *getCollatorKey(UCollator
*collator
, char *buffer
, int32_t bufferLength
);
432 static CollDataCache
*getCollDataCache();
435 StringToCEsMap
*charsToCEList
;
436 CEToStringsMap
*ceToCharsStartingWith
;
438 char keyBuffer
[KEY_BUFFER_SIZE
];
441 static CollDataCache
*collDataCache
;
446 uint32_t jamoLimits
[4];
451 #endif // #if !UCONFIG_NO_COLLATION
452 #endif // #ifndef COLL_DATA_H