]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/unicode/colldata.h
2 ******************************************************************************
3 * Copyright (C) 1996-2011, International Business Machines *
4 * Corporation and others. All Rights Reserved. *
5 ******************************************************************************
10 * \brief C++ API: Collation data used to compute minLengthInChars.
17 #include "unicode/utypes.h"
19 #if !UCONFIG_NO_COLLATION
21 #include "unicode/uobject.h"
22 #include "unicode/ucol.h"
26 #ifndef U_HIDE_INTERNAL_API
28 * The size of the internal buffer for the Collator's short description string.
29 * @internal ICU 4.0.1 technology preview
31 #define KEY_BUFFER_SIZE 64
34 * The size of the internal CE buffer in a <code>CEList</code> object
35 * @internal ICU 4.0.1 technology preview
37 #define CELIST_BUFFER_SIZE 4
40 * \def INSTRUMENT_CELIST
41 * Define this to enable the <code>CEList</code> objects to collect
43 * @internal ICU 4.0.1 technology preview
45 //#define INSTRUMENT_CELIST
48 * The size of the initial list in a <code>StringList</code> object.
49 * @internal ICU 4.0.1 technology preview
51 #define STRING_LIST_BUFFER_SIZE 16
54 * \def INSTRUMENT_STRING_LIST
55 * Define this to enable the <code>StringList</code> objects to
57 * @internal ICU 4.0.1 technology preview
59 //#define INSTRUMENT_STRING_LIST
62 * This object holds a list of CEs generated from a particular
63 * <code>UnicodeString</code>
65 * @internal ICU 4.0.1 technology preview
67 class U_I18N_API CEList
: public UObject
71 * Construct a <code>CEList</code> object.
73 * @param coll - the Collator used to collect the CEs.
74 * @param string - the string for which to collect the CEs.
75 * @param status - will be set if any errors occur.
77 * Note: if on return, status is set to an error code,
78 * the only safe thing to do with this object is to call
81 * @internal ICU 4.0.1 technology preview
83 CEList(UCollator
*coll
, const UnicodeString
&string
, UErrorCode
&status
);
87 * @internal ICU 4.0.1 technology preview
92 * Return the number of CEs in the list.
94 * @return the number of CEs in the list.
96 * @internal ICU 4.0.1 technology preview
101 * Get a particular CE from the list.
103 * @param index - the index of the CE to return
105 * @return the CE, or <code>0</code> if <code>index</code> is out of range
107 * @internal ICU 4.0.1 technology preview
109 uint32_t get(int32_t index
) const;
112 * Check if the CEs in another <code>CEList</code> match the
113 * suffix of this list starting at a give offset.
115 * @param offset - the offset of the suffix
116 * @param other - the other <code>CEList</code>
118 * @return <code>TRUE</code> if the CEs match, <code>FALSE</code> otherwise.
120 * @internal ICU 4.0.1 technology preview
122 UBool
matchesAt(int32_t offset
, const CEList
*other
) const;
125 * The index operator.
127 * @param index - the index
129 * @return a reference to the given CE in the list
131 * @internal ICU 4.0.1 technology preview
133 uint32_t &operator[](int32_t index
) const;
137 * @internal ICU 4.0.1 technology preview
139 virtual UClassID
getDynamicClassID() const;
142 * @internal ICU 4.0.1 technology preview
144 static UClassID
getStaticClassID();
147 void add(uint32_t ce
, UErrorCode
&status
);
149 uint32_t ceBuffer
[CELIST_BUFFER_SIZE
];
154 #ifdef INSTRUMENT_CELIST
155 static int32_t _active
;
156 static int32_t _histogram
[10];
163 * This object holds a list of <code>UnicodeString</code> objects.
165 * @internal ICU 4.0.1 technology preview
167 class U_I18N_API StringList
: public UObject
171 * Construct an empty <code>StringList</code>
173 * @param status - will be set if any errors occur.
175 * Note: if on return, status is set to an error code,
176 * the only safe thing to do with this object is to call
179 * @internal ICU 4.0.1 technology preview
181 StringList(UErrorCode
&status
);
186 * @internal ICU 4.0.1 technology preview
191 * Add a string to the list.
193 * @param string - the string to add
194 * @param status - will be set if any errors occur.
196 * @internal ICU 4.0.1 technology preview
198 void add(const UnicodeString
*string
, UErrorCode
&status
);
201 * Add an array of Unicode code points to the list.
203 * @param chars - the address of the array of code points
204 * @param count - the number of code points in the array
205 * @param status - will be set if any errors occur.
207 * @internal ICU 4.0.1 technology preview
209 void add(const UChar
*chars
, int32_t count
, UErrorCode
&status
);
212 * Get a particular string from the list.
214 * @param index - the index of the string
216 * @return a pointer to the <code>UnicodeString</code> or <code>NULL</code>
217 * if <code>index</code> is out of bounds.
219 * @internal ICU 4.0.1 technology preview
221 const UnicodeString
*get(int32_t index
) const;
224 * Get the number of stings in the list.
226 * @return the number of strings in the list.
228 * @internal ICU 4.0.1 technology preview
230 int32_t size() const;
233 * the UObject glue...
234 * @internal ICU 4.0.1 technology preview
236 virtual UClassID
getDynamicClassID() const;
238 * the UObject glue...
239 * @internal ICU 4.0.1 technology preview
241 static UClassID
getStaticClassID();
244 UnicodeString
*strings
;
248 #ifdef INSTRUMENT_STRING_LIST
249 static int32_t _lists
;
250 static int32_t _strings
;
251 static int32_t _histogram
[101];
254 #endif /* U_HIDE_INTERNAL_API */
257 * Forward references to internal classes.
259 class StringToCEsMap
;
260 class CEToStringsMap
;
263 #ifndef U_HIDE_INTERNAL_API
267 * This class holds the Collator-specific data needed to
268 * compute the length of the shortest string that can
269 * generate a partcular list of CEs.
271 * <code>CollData</code> objects are quite expensive to compute. Because
272 * of this, they are cached. When you call <code>CollData::open</code> it
273 * returns a reference counted cached object. When you call <code>CollData::close</code>
274 * the reference count on the object is decremented but the object is not deleted.
276 * If you do not need to reuse any unreferenced objects in the cache, you can call
277 * <code>CollData::flushCollDataCache</code>. If you no longer need any <code>CollData</code>
278 * objects, you can call <code>CollData::freeCollDataCache</code>
280 * @internal ICU 4.0.1 technology preview
282 class U_I18N_API CollData
: public UObject
286 * Construct a <code>CollData</code> object.
288 * @param collator - the collator
289 * @param status - will be set if any errors occur.
291 * @return the <code>CollData</code> object. You must call
292 * <code>close</code> when you are done using the object.
294 * Note: if on return, status is set to an error code,
295 * the only safe thing to do with this object is to call
296 * <code>CollData::close</code>.
298 * @internal ICU 4.0.1 technology preview
300 static CollData
*open(UCollator
*collator
, UErrorCode
&status
);
303 * Release a <code>CollData</code> object.
305 * @param collData - the object
307 * @internal ICU 4.0.1 technology preview
309 static void close(CollData
*collData
);
312 * Get the <code>UCollator</code> object used to create this object.
313 * The object returned may not be the exact object that was used to
314 * create this object, but it will have the same behavior.
315 * @internal ICU 4.0.1 technology preview
317 UCollator
*getCollator() const;
320 * Get a list of all the strings which generate a list
321 * of CEs starting with a given CE.
325 * return a <code>StringList</code> object containing all
326 * the stirngs, or <code>NULL</code> if there are
329 * @internal ICU 4.0.1 technology preview.
331 const StringList
*getStringList(int32_t ce
) const;
334 * Get a list of the CEs generated by a partcular stirng.
336 * @param string - the string
338 * @return a <code>CEList</code> object containt the CEs. You
339 * must call <code>freeCEList</code> when you are finished
340 * using the <code>CEList</code>/
342 * @internal ICU 4.0.1 technology preview.
344 const CEList
*getCEList(const UnicodeString
*string
) const;
347 * Release a <code>CEList</code> returned by <code>getCEList</code>.
349 * @param list - the <code>CEList</code> to free.
351 * @internal ICU 4.0.1 technology preview
353 void freeCEList(const CEList
*list
);
356 * Return the length of the shortest string that will generate
357 * the given list of CEs.
359 * @param ces - the CEs
360 * @param offset - the offset of the first CE in the list to use.
362 * @return the length of the shortest string.
364 * @internal ICU 4.0.1 technology preview
366 int32_t minLengthInChars(const CEList
*ces
, int32_t offset
) const;
370 * Return the length of the shortest string that will generate
371 * the given list of CEs.
373 * Note: the algorithm used to do this computation is recursive. To
374 * limit the amount of recursion, a "history" list is used to record
375 * the best answer starting at a particular offset in the list of CEs.
376 * If the same offset is visited again during the recursion, the answer
377 * in the history list is used.
379 * @param ces - the CEs
380 * @param offset - the offset of the first CE in the list to use.
381 * @param history - the history list. Must be at least as long as
382 * the number of cEs in the <code>CEList</code>
384 * @return the length of the shortest string.
386 * @internal ICU 4.0.1 technology preview
388 int32_t minLengthInChars(const CEList
*ces
, int32_t offset
, int32_t *history
) const;
392 * @internal ICU 4.0.1 technology preview
394 virtual UClassID
getDynamicClassID() const;
397 * @internal ICU 4.0.1 technology preview
399 static UClassID
getStaticClassID();
402 * <code>CollData</code> objects are expensive to compute, and so
403 * may be cached. This routine will free the cached objects and delete
406 * WARNING: Don't call this until you are have called <code>close</code>
407 * for each <code>CollData</code> object that you have used. also,
408 * DO NOT call this if another thread may be calling <code>flushCollDataCache</code>
411 * @internal 4.0.1 technology preview
413 static void freeCollDataCache();
416 * <code>CollData</code> objects are expensive to compute, and so
417 * may be cached. This routine will remove any unused <code>CollData</code>
418 * objects from the cache.
420 * @internal 4.0.1 technology preview
422 static void flushCollDataCache();
425 friend class CollDataCache
;
426 friend class CollDataCacheEntry
;
428 CollData(UCollator
*collator
, char *cacheKey
, int32_t cachekeyLength
, UErrorCode
&status
);
433 static char *getCollatorKey(UCollator
*collator
, char *buffer
, int32_t bufferLength
);
435 static CollDataCache
*getCollDataCache();
438 StringToCEsMap
*charsToCEList
;
439 CEToStringsMap
*ceToCharsStartingWith
;
441 char keyBuffer
[KEY_BUFFER_SIZE
];
444 static CollDataCache
*collDataCache
;
449 uint32_t jamoLimits
[4];
451 #endif /* U_HIDE_INTERNAL_API */
455 #endif // #if !UCONFIG_NO_COLLATION
456 #endif // #ifndef COLL_DATA_H