]>
git.saurik.com Git - apple/icu.git/blob - icuSources/common/unicode/ucasemap.h
2 *******************************************************************************
4 * Copyright (C) 2005-2008, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: ucasemap.h
10 * tab size: 8 (not used)
13 * created on: 2005may06
14 * created by: Markus W. Scherer
16 * Case mapping service object and functions using it.
19 #ifndef __UCASEMAP_H__
20 #define __UCASEMAP_H__
22 #include "unicode/utypes.h"
23 #include "unicode/ustring.h"
27 * \brief C API: Unicode case mapping functions using a UCaseMap service object.
29 * The service object takes care of memory allocations, data loading, and setup
30 * for the attributes, as usual.
32 * Currently, the functionality provided here does not overlap with uchar.h
33 * and ustring.h, except for ucasemap_toTitle().
35 * ucasemap_utf8XYZ() functions operate directly on UTF-8 strings.
39 * UCaseMap is an opaque service object for newer ICU case mapping functions.
40 * Older functions did not use a service object.
44 typedef struct UCaseMap UCaseMap
; /**< C typedef for struct UCaseMap. @stable ICU 3.4 */
47 * Open a UCaseMap service object for a locale and a set of options.
48 * The locale ID and options are preprocessed so that functions using the
49 * service object need not process them in each call.
51 * @param locale ICU locale ID, used for language-dependent
52 * upper-/lower-/title-casing according to the Unicode standard.
53 * Usual semantics: ""=root, NULL=default locale, etc.
54 * @param options Options bit set, used for case folding and string comparisons.
55 * Same flags as for u_foldCase(), u_strFoldCase(),
56 * u_strCaseCompare(), etc.
57 * Use 0 or U_FOLD_CASE_DEFAULT for default behavior.
58 * @param pErrorCode Must be a valid pointer to an error code value,
59 * which must not indicate a failure before the function call.
60 * @return Pointer to a UCaseMap service object, if successful.
62 * @see U_FOLD_CASE_DEFAULT
63 * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
64 * @see U_TITLECASE_NO_LOWERCASE
65 * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
68 U_STABLE UCaseMap
* U_EXPORT2
69 ucasemap_open(const char *locale
, uint32_t options
, UErrorCode
*pErrorCode
);
72 * Close a UCaseMap service object.
73 * @param csm Object to be closed.
76 U_STABLE
void U_EXPORT2
77 ucasemap_close(UCaseMap
*csm
);
80 * Get the locale ID that is used for language-dependent case mappings.
81 * @param csm UCaseMap service object.
85 U_STABLE
const char * U_EXPORT2
86 ucasemap_getLocale(const UCaseMap
*csm
);
89 * Get the options bit set that is used for case folding and string comparisons.
90 * @param csm UCaseMap service object.
91 * @return options bit set
94 U_STABLE
uint32_t U_EXPORT2
95 ucasemap_getOptions(const UCaseMap
*csm
);
98 * Set the locale ID that is used for language-dependent case mappings.
100 * @param csm UCaseMap service object.
101 * @param locale Locale ID, see ucasemap_open().
102 * @param pErrorCode Must be a valid pointer to an error code value,
103 * which must not indicate a failure before the function call.
108 U_STABLE
void U_EXPORT2
109 ucasemap_setLocale(UCaseMap
*csm
, const char *locale
, UErrorCode
*pErrorCode
);
112 * Set the options bit set that is used for case folding and string comparisons.
114 * @param csm UCaseMap service object.
115 * @param options Options bit set, see ucasemap_open().
116 * @param pErrorCode Must be a valid pointer to an error code value,
117 * which must not indicate a failure before the function call.
122 U_STABLE
void U_EXPORT2
123 ucasemap_setOptions(UCaseMap
*csm
, uint32_t options
, UErrorCode
*pErrorCode
);
126 * Do not lowercase non-initial parts of words when titlecasing.
127 * Option bit for titlecasing APIs that take an options bit set.
129 * By default, titlecasing will titlecase the first cased character
130 * of a word and lowercase all other characters.
131 * With this option, the other characters will not be modified.
133 * @see ucasemap_setOptions
134 * @see ucasemap_toTitle
135 * @see ucasemap_utf8ToTitle
136 * @see UnicodeString::toTitle
139 #define U_TITLECASE_NO_LOWERCASE 0x100
142 * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
143 * titlecase exactly the characters at breaks from the iterator.
144 * Option bit for titlecasing APIs that take an options bit set.
146 * By default, titlecasing will take each break iterator index,
147 * adjust it by looking for the next cased character, and titlecase that one.
148 * Other characters are lowercased.
150 * This follows Unicode 4 & 5 section 3.13 Default Case Operations:
152 * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
153 * #29, "Text Boundaries." Between each pair of word boundaries, find the first
154 * cased character F. If F exists, map F to default_title(F); then map each
155 * subsequent character C to default_lower(C).
157 * @see ucasemap_setOptions
158 * @see ucasemap_toTitle
159 * @see ucasemap_utf8ToTitle
160 * @see UnicodeString::toTitle
161 * @see U_TITLECASE_NO_LOWERCASE
164 #define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
166 #if !UCONFIG_NO_BREAK_ITERATION
169 * Get the break iterator that is used for titlecasing.
170 * Do not modify the returned break iterator.
171 * @param csm UCaseMap service object.
172 * @return titlecasing break iterator
175 U_DRAFT
const UBreakIterator
* U_EXPORT2
176 ucasemap_getBreakIterator(const UCaseMap
*csm
);
179 * Set the break iterator that is used for titlecasing.
180 * The UCaseMap service object releases a previously set break iterator
181 * and "adopts" this new one, taking ownership of it.
182 * It will be released in a subsequent call to ucasemap_setBreakIterator()
183 * or ucasemap_close().
185 * Break iterator operations are not thread-safe. Therefore, titlecasing
186 * functions use non-const UCaseMap objects. It is not possible to titlecase
187 * strings concurrently using the same UCaseMap.
189 * @param csm UCaseMap service object.
190 * @param iterToAdopt Break iterator to be adopted for titlecasing.
191 * @param pErrorCode Must be a valid pointer to an error code value,
192 * which must not indicate a failure before the function call.
194 * @see ucasemap_toTitle
195 * @see ucasemap_utf8ToTitle
198 U_DRAFT
void U_EXPORT2
199 ucasemap_setBreakIterator(UCaseMap
*csm
, UBreakIterator
*iterToAdopt
, UErrorCode
*pErrorCode
);
202 * Titlecase a UTF-16 string. This function is almost a duplicate of u_strToTitle(),
203 * except that it takes ucasemap_setOptions() into account and has performance
204 * advantages from being able to use a UCaseMap object for multiple case mapping
205 * operations, saving setup time.
207 * Casing is locale-dependent and context-sensitive.
208 * Titlecasing uses a break iterator to find the first characters of words
209 * that are to be titlecased. It titlecases those characters and lowercases
210 * all others. (This can be modified with ucasemap_setOptions().)
212 * The titlecase break iterator can be provided to customize for arbitrary
213 * styles, using rules and dictionaries beyond the standard iterators.
214 * It may be more efficient to always provide an iterator to avoid
215 * opening and closing one for each string.
216 * The standard titlecase iterator for the root locale implements the
217 * algorithm of Unicode TR 21.
219 * This function uses only the setText(), first() and next() methods of the
220 * provided break iterator.
222 * The result may be longer or shorter than the original.
223 * The source string and the destination buffer must not overlap.
225 * @param csm UCaseMap service object.
226 * @param dest A buffer for the result string. The result will be NUL-terminated if
227 * the buffer is large enough.
228 * The contents is undefined in case of failure.
229 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
230 * dest may be NULL and the function will only return the length of the result
231 * without writing any of the result string.
232 * @param src The original string.
233 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
234 * @param pErrorCode Must be a valid pointer to an error code value,
235 * which must not indicate a failure before the function call.
236 * @return The length of the result string, if successful - or in case of a buffer overflow,
237 * in which case it will be greater than destCapacity.
242 U_DRAFT
int32_t U_EXPORT2
243 ucasemap_toTitle(UCaseMap
*csm
,
244 UChar
*dest
, int32_t destCapacity
,
245 const UChar
*src
, int32_t srcLength
,
246 UErrorCode
*pErrorCode
);
251 * Lowercase the characters in a UTF-8 string.
252 * Casing is locale-dependent and context-sensitive.
253 * The result may be longer or shorter than the original.
254 * The source string and the destination buffer must not overlap.
256 * @param csm UCaseMap service object.
257 * @param dest A buffer for the result string. The result will be NUL-terminated if
258 * the buffer is large enough.
259 * The contents is undefined in case of failure.
260 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
261 * dest may be NULL and the function will only return the length of the result
262 * without writing any of the result string.
263 * @param src The original string.
264 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
265 * @param pErrorCode Must be a valid pointer to an error code value,
266 * which must not indicate a failure before the function call.
267 * @return The length of the result string, if successful - or in case of a buffer overflow,
268 * in which case it will be greater than destCapacity.
273 U_STABLE
int32_t U_EXPORT2
274 ucasemap_utf8ToLower(const UCaseMap
*csm
,
275 char *dest
, int32_t destCapacity
,
276 const char *src
, int32_t srcLength
,
277 UErrorCode
*pErrorCode
);
280 * Uppercase the characters in a UTF-8 string.
281 * Casing is locale-dependent and context-sensitive.
282 * The result may be longer or shorter than the original.
283 * The source string and the destination buffer must not overlap.
285 * @param csm UCaseMap service object.
286 * @param dest A buffer for the result string. The result will be NUL-terminated if
287 * the buffer is large enough.
288 * The contents is undefined in case of failure.
289 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
290 * dest may be NULL and the function will only return the length of the result
291 * without writing any of the result string.
292 * @param src The original string.
293 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
294 * @param pErrorCode Must be a valid pointer to an error code value,
295 * which must not indicate a failure before the function call.
296 * @return The length of the result string, if successful - or in case of a buffer overflow,
297 * in which case it will be greater than destCapacity.
302 U_STABLE
int32_t U_EXPORT2
303 ucasemap_utf8ToUpper(const UCaseMap
*csm
,
304 char *dest
, int32_t destCapacity
,
305 const char *src
, int32_t srcLength
,
306 UErrorCode
*pErrorCode
);
308 #if !UCONFIG_NO_BREAK_ITERATION
311 * Titlecase a UTF-8 string.
312 * Casing is locale-dependent and context-sensitive.
313 * Titlecasing uses a break iterator to find the first characters of words
314 * that are to be titlecased. It titlecases those characters and lowercases
315 * all others. (This can be modified with ucasemap_setOptions().)
317 * The titlecase break iterator can be provided to customize for arbitrary
318 * styles, using rules and dictionaries beyond the standard iterators.
319 * It may be more efficient to always provide an iterator to avoid
320 * opening and closing one for each string.
321 * The standard titlecase iterator for the root locale implements the
322 * algorithm of Unicode TR 21.
324 * This function uses only the setText(), first() and next() methods of the
325 * provided break iterator.
327 * The result may be longer or shorter than the original.
328 * The source string and the destination buffer must not overlap.
330 * @param csm UCaseMap service object.
331 * @param dest A buffer for the result string. The result will be NUL-terminated if
332 * the buffer is large enough.
333 * The contents is undefined in case of failure.
334 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
335 * dest may be NULL and the function will only return the length of the result
336 * without writing any of the result string.
337 * @param src The original string.
338 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
339 * @param pErrorCode Must be a valid pointer to an error code value,
340 * which must not indicate a failure before the function call.
341 * @return The length of the result string, if successful - or in case of a buffer overflow,
342 * in which case it will be greater than destCapacity.
345 * @see U_TITLECASE_NO_LOWERCASE
346 * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
349 U_DRAFT
int32_t U_EXPORT2
350 ucasemap_utf8ToTitle(UCaseMap
*csm
,
351 char *dest
, int32_t destCapacity
,
352 const char *src
, int32_t srcLength
,
353 UErrorCode
*pErrorCode
);
358 * Case-fold the characters in a UTF-8 string.
359 * Case-folding is locale-independent and not context-sensitive,
360 * but there is an option for whether to include or exclude mappings for dotted I
361 * and dotless i that are marked with 'I' in CaseFolding.txt.
362 * The result may be longer or shorter than the original.
363 * The source string and the destination buffer must not overlap.
365 * @param csm UCaseMap service object.
366 * @param dest A buffer for the result string. The result will be NUL-terminated if
367 * the buffer is large enough.
368 * The contents is undefined in case of failure.
369 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
370 * dest may be NULL and the function will only return the length of the result
371 * without writing any of the result string.
372 * @param src The original string.
373 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
374 * @param pErrorCode Must be a valid pointer to an error code value,
375 * which must not indicate a failure before the function call.
376 * @return The length of the result string, if successful - or in case of a buffer overflow,
377 * in which case it will be greater than destCapacity.
380 * @see ucasemap_setOptions
381 * @see U_FOLD_CASE_DEFAULT
382 * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
385 U_DRAFT
int32_t U_EXPORT2
386 ucasemap_utf8FoldCase(const UCaseMap
*csm
,
387 char *dest
, int32_t destCapacity
,
388 const char *src
, int32_t srcLength
,
389 UErrorCode
*pErrorCode
);