]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/unicode/casemap.h
ICU-59117.0.1.tar.gz
[apple/icu.git] / icuSources / common / unicode / casemap.h
CommitLineData
f3c0d7a5
A
1// © 2017 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4// casemap.h
5// created: 2017jan12 Markus W. Scherer
6
7#ifndef __CASEMAP_H__
8#define __CASEMAP_H__
9
10#include "unicode/utypes.h"
11#include "unicode/uobject.h"
12
13/**
14 * \file
15 * \brief C++ API: Low-level C++ case mapping functions.
16 */
17
18#if U_SHOW_CPLUSPLUS_API
19U_NAMESPACE_BEGIN
20
21#ifndef U_HIDE_DRAFT_API
22
23class BreakIterator;
24class Edits;
25
26/**
27 * Low-level C++ case mapping functions.
28 *
29 * @draft ICU 59
30 */
31class U_COMMON_API CaseMap U_FINAL : public UMemory {
32public:
33 /**
34 * Lowercases a UTF-16 string and optionally records edits.
35 * Casing is locale-dependent and context-sensitive.
36 * The result may be longer or shorter than the original.
37 * The source string and the destination buffer must not overlap.
38 *
39 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
40 * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
41 * @param src The original string.
42 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
43 * @param dest A buffer for the result string. The result will be NUL-terminated if
44 * the buffer is large enough.
45 * The contents is undefined in case of failure.
46 * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
47 * dest may be NULL and the function will only return the length of the result
48 * without writing any of the result string.
49 * @param edits Records edits for index mapping, working with styled text,
50 * and getting only changes (if any).
51 * The Edits contents is undefined if any error occurs.
52 * This function calls edits->reset() first. edits can be NULL.
53 * @param errorCode Reference to an in/out error code value
54 * which must not indicate a failure before the function call.
55 * @return The length of the result string, if successful.
56 * When the result would be longer than destCapacity,
57 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
58 *
59 * @see u_strToLower
60 * @draft ICU 59
61 */
62 static int32_t toLower(
63 const char *locale, uint32_t options,
64 const char16_t *src, int32_t srcLength,
65 char16_t *dest, int32_t destCapacity, Edits *edits,
66 UErrorCode &errorCode);
67
68 /**
69 * Uppercases a UTF-16 string and optionally records edits.
70 * Casing is locale-dependent and context-sensitive.
71 * The result may be longer or shorter than the original.
72 * The source string and the destination buffer must not overlap.
73 *
74 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
75 * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
76 * @param src The original string.
77 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
78 * @param dest A buffer for the result string. The result will be NUL-terminated if
79 * the buffer is large enough.
80 * The contents is undefined in case of failure.
81 * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
82 * dest may be NULL and the function will only return the length of the result
83 * without writing any of the result string.
84 * @param edits Records edits for index mapping, working with styled text,
85 * and getting only changes (if any).
86 * The Edits contents is undefined if any error occurs.
87 * This function calls edits->reset() first. edits can be NULL.
88 * @param errorCode Reference to an in/out error code value
89 * which must not indicate a failure before the function call.
90 * @return The length of the result string, if successful.
91 * When the result would be longer than destCapacity,
92 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
93 *
94 * @see u_strToUpper
95 * @draft ICU 59
96 */
97 static int32_t toUpper(
98 const char *locale, uint32_t options,
99 const char16_t *src, int32_t srcLength,
100 char16_t *dest, int32_t destCapacity, Edits *edits,
101 UErrorCode &errorCode);
102
103#if !UCONFIG_NO_BREAK_ITERATION
104
105 /**
106 * Titlecases a UTF-16 string and optionally records edits.
107 * Casing is locale-dependent and context-sensitive.
108 * The result may be longer or shorter than the original.
109 * The source string and the destination buffer must not overlap.
110 *
111 * Titlecasing uses a break iterator to find the first characters of words
112 * that are to be titlecased. It titlecases those characters and lowercases
113 * all others. (This can be modified with options bits.)
114 *
115 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
116 * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
117 * U_TITLECASE_NO_LOWERCASE, U_TITLECASE_NO_BREAK_ADJUSTMENT.
118 * @param iter A break iterator to find the first characters of words that are to be titlecased.
119 * It is set to the source string (setText())
120 * and used one or more times for iteration (first() and next()).
121 * If NULL, then a word break iterator for the locale is used
122 * (or something equivalent).
123 * @param src The original string.
124 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
125 * @param dest A buffer for the result string. The result will be NUL-terminated if
126 * the buffer is large enough.
127 * The contents is undefined in case of failure.
128 * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
129 * dest may be NULL and the function will only return the length of the result
130 * without writing any of the result string.
131 * @param edits Records edits for index mapping, working with styled text,
132 * and getting only changes (if any).
133 * The Edits contents is undefined if any error occurs.
134 * This function calls edits->reset() first. edits can be NULL.
135 * @param errorCode Reference to an in/out error code value
136 * which must not indicate a failure before the function call.
137 * @return The length of the result string, if successful.
138 * When the result would be longer than destCapacity,
139 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
140 *
141 * @see u_strToTitle
142 * @see ucasemap_toTitle
143 * @draft ICU 59
144 */
145 static int32_t toTitle(
146 const char *locale, uint32_t options, BreakIterator *iter,
147 const char16_t *src, int32_t srcLength,
148 char16_t *dest, int32_t destCapacity, Edits *edits,
149 UErrorCode &errorCode);
150
151#endif // UCONFIG_NO_BREAK_ITERATION
152
153 /**
154 * Case-folds a UTF-16 string and optionally records edits.
155 *
156 * Case folding is locale-independent and not context-sensitive,
157 * but there is an option for whether to include or exclude mappings for dotted I
158 * and dotless i that are marked with 'T' in CaseFolding.txt.
159 *
160 * The result may be longer or shorter than the original.
161 * The source string and the destination buffer must not overlap.
162 *
163 * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
164 * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
165 * @param src The original string.
166 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
167 * @param dest A buffer for the result string. The result will be NUL-terminated if
168 * the buffer is large enough.
169 * The contents is undefined in case of failure.
170 * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
171 * dest may be NULL and the function will only return the length of the result
172 * without writing any of the result string.
173 * @param edits Records edits for index mapping, working with styled text,
174 * and getting only changes (if any).
175 * The Edits contents is undefined if any error occurs.
176 * This function calls edits->reset() first. edits can be NULL.
177 * @param errorCode Reference to an in/out error code value
178 * which must not indicate a failure before the function call.
179 * @return The length of the result string, if successful.
180 * When the result would be longer than destCapacity,
181 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
182 *
183 * @see u_strFoldCase
184 * @draft ICU 59
185 */
186 static int32_t fold(
187 uint32_t options,
188 const char16_t *src, int32_t srcLength,
189 char16_t *dest, int32_t destCapacity, Edits *edits,
190 UErrorCode &errorCode);
191
192 /**
193 * Lowercases a UTF-8 string and optionally records edits.
194 * Casing is locale-dependent and context-sensitive.
195 * The result may be longer or shorter than the original.
196 * The source string and the destination buffer must not overlap.
197 *
198 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
199 * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
200 * @param src The original string.
201 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
202 * @param dest A buffer for the result string. The result will be NUL-terminated if
203 * the buffer is large enough.
204 * The contents is undefined in case of failure.
205 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
206 * dest may be NULL and the function will only return the length of the result
207 * without writing any of the result string.
208 * @param edits Records edits for index mapping, working with styled text,
209 * and getting only changes (if any).
210 * The Edits contents is undefined if any error occurs.
211 * This function calls edits->reset() first. edits can be NULL.
212 * @param errorCode Reference to an in/out error code value
213 * which must not indicate a failure before the function call.
214 * @return The length of the result string, if successful.
215 * When the result would be longer than destCapacity,
216 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
217 *
218 * @see ucasemap_utf8ToLower
219 * @draft ICU 59
220 */
221 static int32_t utf8ToLower(
222 const char *locale, uint32_t options,
223 const char *src, int32_t srcLength,
224 char *dest, int32_t destCapacity, Edits *edits,
225 UErrorCode &errorCode);
226
227 /**
228 * Uppercases a UTF-8 string and optionally records edits.
229 * Casing is locale-dependent and context-sensitive.
230 * The result may be longer or shorter than the original.
231 * The source string and the destination buffer must not overlap.
232 *
233 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
234 * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
235 * @param src The original string.
236 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
237 * @param dest A buffer for the result string. The result will be NUL-terminated if
238 * the buffer is large enough.
239 * The contents is undefined in case of failure.
240 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
241 * dest may be NULL and the function will only return the length of the result
242 * without writing any of the result string.
243 * @param edits Records edits for index mapping, working with styled text,
244 * and getting only changes (if any).
245 * The Edits contents is undefined if any error occurs.
246 * This function calls edits->reset() first. edits can be NULL.
247 * @param errorCode Reference to an in/out error code value
248 * which must not indicate a failure before the function call.
249 * @return The length of the result string, if successful.
250 * When the result would be longer than destCapacity,
251 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
252 *
253 * @see ucasemap_utf8ToUpper
254 * @draft ICU 59
255 */
256 static int32_t utf8ToUpper(
257 const char *locale, uint32_t options,
258 const char *src, int32_t srcLength,
259 char *dest, int32_t destCapacity, Edits *edits,
260 UErrorCode &errorCode);
261
262#if !UCONFIG_NO_BREAK_ITERATION
263
264 /**
265 * Titlecases a UTF-8 string and optionally records edits.
266 * Casing is locale-dependent and context-sensitive.
267 * The result may be longer or shorter than the original.
268 * The source string and the destination buffer must not overlap.
269 *
270 * Titlecasing uses a break iterator to find the first characters of words
271 * that are to be titlecased. It titlecases those characters and lowercases
272 * all others. (This can be modified with options bits.)
273 *
274 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
275 * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
276 * U_TITLECASE_NO_LOWERCASE, U_TITLECASE_NO_BREAK_ADJUSTMENT.
277 * @param iter A break iterator to find the first characters of words that are to be titlecased.
278 * It is set to the source string (setText())
279 * and used one or more times for iteration (first() and next()).
280 * If NULL, then a word break iterator for the locale is used
281 * (or something equivalent).
282 * @param src The original string.
283 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
284 * @param dest A buffer for the result string. The result will be NUL-terminated if
285 * the buffer is large enough.
286 * The contents is undefined in case of failure.
287 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
288 * dest may be NULL and the function will only return the length of the result
289 * without writing any of the result string.
290 * @param edits Records edits for index mapping, working with styled text,
291 * and getting only changes (if any).
292 * The Edits contents is undefined if any error occurs.
293 * This function calls edits->reset() first. edits can be NULL.
294 * @param errorCode Reference to an in/out error code value
295 * which must not indicate a failure before the function call.
296 * @return The length of the result string, if successful.
297 * When the result would be longer than destCapacity,
298 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
299 *
300 * @see ucasemap_utf8ToTitle
301 * @draft ICU 59
302 */
303 static int32_t utf8ToTitle(
304 const char *locale, uint32_t options, BreakIterator *iter,
305 const char *src, int32_t srcLength,
306 char *dest, int32_t destCapacity, Edits *edits,
307 UErrorCode &errorCode);
308
309#endif // UCONFIG_NO_BREAK_ITERATION
310
311 /**
312 * Case-folds a UTF-8 string and optionally records edits.
313 *
314 * Case folding is locale-independent and not context-sensitive,
315 * but there is an option for whether to include or exclude mappings for dotted I
316 * and dotless i that are marked with 'T' in CaseFolding.txt.
317 *
318 * The result may be longer or shorter than the original.
319 * The source string and the destination buffer must not overlap.
320 *
321 * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
322 * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
323 * @param src The original string.
324 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
325 * @param dest A buffer for the result string. The result will be NUL-terminated if
326 * the buffer is large enough.
327 * The contents is undefined in case of failure.
328 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
329 * dest may be NULL and the function will only return the length of the result
330 * without writing any of the result string.
331 * @param edits Records edits for index mapping, working with styled text,
332 * and getting only changes (if any).
333 * The Edits contents is undefined if any error occurs.
334 * This function calls edits->reset() first. edits can be NULL.
335 * @param errorCode Reference to an in/out error code value
336 * which must not indicate a failure before the function call.
337 * @return The length of the result string, if successful.
338 * When the result would be longer than destCapacity,
339 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
340 *
341 * @see ucasemap_utf8FoldCase
342 * @draft ICU 59
343 */
344 static int32_t utf8Fold(
345 uint32_t options,
346 const char *src, int32_t srcLength,
347 char *dest, int32_t destCapacity, Edits *edits,
348 UErrorCode &errorCode);
349
350private:
351 CaseMap() = delete;
352 CaseMap(const CaseMap &other) = delete;
353 CaseMap &operator=(const CaseMap &other) = delete;
354};
355
356#endif // U_HIDE_DRAFT_API
357
358U_NAMESPACE_END
359#endif // U_SHOW_CPLUSPLUS_API
360
361#endif // __CASEMAP_H__