]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/unicode/casemap.h
e5ec8e8dae5f5192aeac59a39f855b7e5651e354
[apple/icu.git] / icuSources / common / unicode / casemap.h
1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 // casemap.h
5 // created: 2017jan12 Markus W. Scherer
6
7 #ifndef __CASEMAP_H__
8 #define __CASEMAP_H__
9
10 #include "unicode/utypes.h"
11 #include "unicode/stringpiece.h"
12 #include "unicode/uobject.h"
13
14 /**
15 * \file
16 * \brief C++ API: Low-level C++ case mapping functions.
17 */
18
19 #if U_SHOW_CPLUSPLUS_API
20 U_NAMESPACE_BEGIN
21
22 class BreakIterator;
23 class ByteSink;
24 class Edits;
25
26 /**
27 * Low-level C++ case mapping functions.
28 *
29 * @stable ICU 59
30 */
31 class U_COMMON_API CaseMap U_FINAL : public UMemory {
32 public:
33 /**
34 * Lowercases a UTF-16 string and optionally records edits.
35 * Casing is locale-dependent and context-sensitive.
36 * The result may be longer or shorter than the original.
37 * The source string and the destination buffer must not overlap.
38 *
39 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
40 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
41 * @param src The original string.
42 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
43 * @param dest A buffer for the result string. The result will be NUL-terminated if
44 * the buffer is large enough.
45 * The contents is undefined in case of failure.
46 * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
47 * dest may be NULL and the function will only return the length of the result
48 * without writing any of the result string.
49 * @param edits Records edits for index mapping, working with styled text,
50 * and getting only changes (if any).
51 * The Edits contents is undefined if any error occurs.
52 * This function calls edits->reset() first unless
53 * options includes U_EDITS_NO_RESET. edits can be NULL.
54 * @param errorCode Reference to an in/out error code value
55 * which must not indicate a failure before the function call.
56 * @return The length of the result string, if successful.
57 * When the result would be longer than destCapacity,
58 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
59 *
60 * @see u_strToLower
61 * @stable ICU 59
62 */
63 static int32_t toLower(
64 const char *locale, uint32_t options,
65 const char16_t *src, int32_t srcLength,
66 char16_t *dest, int32_t destCapacity, Edits *edits,
67 UErrorCode &errorCode);
68
69 /**
70 * Uppercases a UTF-16 string and optionally records edits.
71 * Casing is locale-dependent and context-sensitive.
72 * The result may be longer or shorter than the original.
73 * The source string and the destination buffer must not overlap.
74 *
75 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
76 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
77 * @param src The original string.
78 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
79 * @param dest A buffer for the result string. The result will be NUL-terminated if
80 * the buffer is large enough.
81 * The contents is undefined in case of failure.
82 * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
83 * dest may be NULL and the function will only return the length of the result
84 * without writing any of the result string.
85 * @param edits Records edits for index mapping, working with styled text,
86 * and getting only changes (if any).
87 * The Edits contents is undefined if any error occurs.
88 * This function calls edits->reset() first unless
89 * options includes U_EDITS_NO_RESET. edits can be NULL.
90 * @param errorCode Reference to an in/out error code value
91 * which must not indicate a failure before the function call.
92 * @return The length of the result string, if successful.
93 * When the result would be longer than destCapacity,
94 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
95 *
96 * @see u_strToUpper
97 * @stable ICU 59
98 */
99 static int32_t toUpper(
100 const char *locale, uint32_t options,
101 const char16_t *src, int32_t srcLength,
102 char16_t *dest, int32_t destCapacity, Edits *edits,
103 UErrorCode &errorCode);
104
105 #if !UCONFIG_NO_BREAK_ITERATION
106
107 /**
108 * Titlecases a UTF-16 string and optionally records edits.
109 * Casing is locale-dependent and context-sensitive.
110 * The result may be longer or shorter than the original.
111 * The source string and the destination buffer must not overlap.
112 *
113 * Titlecasing uses a break iterator to find the first characters of words
114 * that are to be titlecased. It titlecases those characters and lowercases
115 * all others. (This can be modified with options bits.)
116 *
117 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
118 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
119 * U_TITLECASE_NO_LOWERCASE,
120 * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
121 * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
122 * @param iter A break iterator to find the first characters of words that are to be titlecased.
123 * It is set to the source string (setText())
124 * and used one or more times for iteration (first() and next()).
125 * If NULL, then a word break iterator for the locale is used
126 * (or something equivalent).
127 * @param src The original string.
128 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
129 * @param dest A buffer for the result string. The result will be NUL-terminated if
130 * the buffer is large enough.
131 * The contents is undefined in case of failure.
132 * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
133 * dest may be NULL and the function will only return the length of the result
134 * without writing any of the result string.
135 * @param edits Records edits for index mapping, working with styled text,
136 * and getting only changes (if any).
137 * The Edits contents is undefined if any error occurs.
138 * This function calls edits->reset() first unless
139 * options includes U_EDITS_NO_RESET. edits can be NULL.
140 * @param errorCode Reference to an in/out error code value
141 * which must not indicate a failure before the function call.
142 * @return The length of the result string, if successful.
143 * When the result would be longer than destCapacity,
144 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
145 *
146 * @see u_strToTitle
147 * @see ucasemap_toTitle
148 * @stable ICU 59
149 */
150 static int32_t toTitle(
151 const char *locale, uint32_t options, BreakIterator *iter,
152 const char16_t *src, int32_t srcLength,
153 char16_t *dest, int32_t destCapacity, Edits *edits,
154 UErrorCode &errorCode);
155
156 #endif // UCONFIG_NO_BREAK_ITERATION
157
158 /**
159 * Case-folds a UTF-16 string and optionally records edits.
160 *
161 * Case folding is locale-independent and not context-sensitive,
162 * but there is an option for whether to include or exclude mappings for dotted I
163 * and dotless i that are marked with 'T' in CaseFolding.txt.
164 *
165 * The result may be longer or shorter than the original.
166 * The source string and the destination buffer must not overlap.
167 *
168 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
169 * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
170 * @param src The original string.
171 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
172 * @param dest A buffer for the result string. The result will be NUL-terminated if
173 * the buffer is large enough.
174 * The contents is undefined in case of failure.
175 * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
176 * dest may be NULL and the function will only return the length of the result
177 * without writing any of the result string.
178 * @param edits Records edits for index mapping, working with styled text,
179 * and getting only changes (if any).
180 * The Edits contents is undefined if any error occurs.
181 * This function calls edits->reset() first unless
182 * options includes U_EDITS_NO_RESET. edits can be NULL.
183 * @param errorCode Reference to an in/out error code value
184 * which must not indicate a failure before the function call.
185 * @return The length of the result string, if successful.
186 * When the result would be longer than destCapacity,
187 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
188 *
189 * @see u_strFoldCase
190 * @stable ICU 59
191 */
192 static int32_t fold(
193 uint32_t options,
194 const char16_t *src, int32_t srcLength,
195 char16_t *dest, int32_t destCapacity, Edits *edits,
196 UErrorCode &errorCode);
197
198 /**
199 * Lowercases a UTF-8 string and optionally records edits.
200 * Casing is locale-dependent and context-sensitive.
201 * The result may be longer or shorter than the original.
202 *
203 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
204 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
205 * @param src The original string.
206 * @param sink A ByteSink to which the result string is written.
207 * sink.Flush() is called at the end.
208 * @param edits Records edits for index mapping, working with styled text,
209 * and getting only changes (if any).
210 * The Edits contents is undefined if any error occurs.
211 * This function calls edits->reset() first unless
212 * options includes U_EDITS_NO_RESET. edits can be NULL.
213 * @param errorCode Reference to an in/out error code value
214 * which must not indicate a failure before the function call.
215 *
216 * @see ucasemap_utf8ToLower
217 * @stable ICU 60
218 */
219 static void utf8ToLower(
220 const char *locale, uint32_t options,
221 StringPiece src, ByteSink &sink, Edits *edits,
222 UErrorCode &errorCode);
223
224 /**
225 * Uppercases a UTF-8 string and optionally records edits.
226 * Casing is locale-dependent and context-sensitive.
227 * The result may be longer or shorter than the original.
228 *
229 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
230 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
231 * @param src The original string.
232 * @param sink A ByteSink to which the result string is written.
233 * sink.Flush() is called at the end.
234 * @param edits Records edits for index mapping, working with styled text,
235 * and getting only changes (if any).
236 * The Edits contents is undefined if any error occurs.
237 * This function calls edits->reset() first unless
238 * options includes U_EDITS_NO_RESET. edits can be NULL.
239 * @param errorCode Reference to an in/out error code value
240 * which must not indicate a failure before the function call.
241 *
242 * @see ucasemap_utf8ToUpper
243 * @stable ICU 60
244 */
245 static void utf8ToUpper(
246 const char *locale, uint32_t options,
247 StringPiece src, ByteSink &sink, Edits *edits,
248 UErrorCode &errorCode);
249
250 #if !UCONFIG_NO_BREAK_ITERATION
251
252 /**
253 * Titlecases a UTF-8 string and optionally records edits.
254 * Casing is locale-dependent and context-sensitive.
255 * The result may be longer or shorter than the original.
256 *
257 * Titlecasing uses a break iterator to find the first characters of words
258 * that are to be titlecased. It titlecases those characters and lowercases
259 * all others. (This can be modified with options bits.)
260 *
261 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
262 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
263 * U_TITLECASE_NO_LOWERCASE,
264 * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
265 * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
266 * @param iter A break iterator to find the first characters of words that are to be titlecased.
267 * It is set to the source string (setUText())
268 * and used one or more times for iteration (first() and next()).
269 * If NULL, then a word break iterator for the locale is used
270 * (or something equivalent).
271 * @param src The original string.
272 * @param sink A ByteSink to which the result string is written.
273 * sink.Flush() is called at the end.
274 * @param edits Records edits for index mapping, working with styled text,
275 * and getting only changes (if any).
276 * The Edits contents is undefined if any error occurs.
277 * This function calls edits->reset() first unless
278 * options includes U_EDITS_NO_RESET. edits can be NULL.
279 * @param errorCode Reference to an in/out error code value
280 * which must not indicate a failure before the function call.
281 *
282 * @see ucasemap_utf8ToTitle
283 * @stable ICU 60
284 */
285 static void utf8ToTitle(
286 const char *locale, uint32_t options, BreakIterator *iter,
287 StringPiece src, ByteSink &sink, Edits *edits,
288 UErrorCode &errorCode);
289
290 #endif // UCONFIG_NO_BREAK_ITERATION
291
292 /**
293 * Case-folds a UTF-8 string and optionally records edits.
294 *
295 * Case folding is locale-independent and not context-sensitive,
296 * but there is an option for whether to include or exclude mappings for dotted I
297 * and dotless i that are marked with 'T' in CaseFolding.txt.
298 *
299 * The result may be longer or shorter than the original.
300 *
301 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
302 * @param src The original string.
303 * @param sink A ByteSink to which the result string is written.
304 * sink.Flush() is called at the end.
305 * @param edits Records edits for index mapping, working with styled text,
306 * and getting only changes (if any).
307 * The Edits contents is undefined if any error occurs.
308 * This function calls edits->reset() first unless
309 * options includes U_EDITS_NO_RESET. edits can be NULL.
310 * @param errorCode Reference to an in/out error code value
311 * which must not indicate a failure before the function call.
312 *
313 * @see ucasemap_utf8FoldCase
314 * @stable ICU 60
315 */
316 static void utf8Fold(
317 uint32_t options,
318 StringPiece src, ByteSink &sink, Edits *edits,
319 UErrorCode &errorCode);
320
321 /**
322 * Lowercases a UTF-8 string and optionally records edits.
323 * Casing is locale-dependent and context-sensitive.
324 * The result may be longer or shorter than the original.
325 * The source string and the destination buffer must not overlap.
326 *
327 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
328 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
329 * @param src The original string.
330 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
331 * @param dest A buffer for the result string. The result will be NUL-terminated if
332 * the buffer is large enough.
333 * The contents is undefined in case of failure.
334 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
335 * dest may be NULL and the function will only return the length of the result
336 * without writing any of the result string.
337 * @param edits Records edits for index mapping, working with styled text,
338 * and getting only changes (if any).
339 * The Edits contents is undefined if any error occurs.
340 * This function calls edits->reset() first unless
341 * options includes U_EDITS_NO_RESET. edits can be NULL.
342 * @param errorCode Reference to an in/out error code value
343 * which must not indicate a failure before the function call.
344 * @return The length of the result string, if successful.
345 * When the result would be longer than destCapacity,
346 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
347 *
348 * @see ucasemap_utf8ToLower
349 * @stable ICU 59
350 */
351 static int32_t utf8ToLower(
352 const char *locale, uint32_t options,
353 const char *src, int32_t srcLength,
354 char *dest, int32_t destCapacity, Edits *edits,
355 UErrorCode &errorCode);
356
357 /**
358 * Uppercases a UTF-8 string and optionally records edits.
359 * Casing is locale-dependent and context-sensitive.
360 * The result may be longer or shorter than the original.
361 * The source string and the destination buffer must not overlap.
362 *
363 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
364 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
365 * @param src The original string.
366 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
367 * @param dest A buffer for the result string. The result will be NUL-terminated if
368 * the buffer is large enough.
369 * The contents is undefined in case of failure.
370 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
371 * dest may be NULL and the function will only return the length of the result
372 * without writing any of the result string.
373 * @param edits Records edits for index mapping, working with styled text,
374 * and getting only changes (if any).
375 * The Edits contents is undefined if any error occurs.
376 * This function calls edits->reset() first unless
377 * options includes U_EDITS_NO_RESET. edits can be NULL.
378 * @param errorCode Reference to an in/out error code value
379 * which must not indicate a failure before the function call.
380 * @return The length of the result string, if successful.
381 * When the result would be longer than destCapacity,
382 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
383 *
384 * @see ucasemap_utf8ToUpper
385 * @stable ICU 59
386 */
387 static int32_t utf8ToUpper(
388 const char *locale, uint32_t options,
389 const char *src, int32_t srcLength,
390 char *dest, int32_t destCapacity, Edits *edits,
391 UErrorCode &errorCode);
392
393 #if !UCONFIG_NO_BREAK_ITERATION
394
395 /**
396 * Titlecases a UTF-8 string and optionally records edits.
397 * Casing is locale-dependent and context-sensitive.
398 * The result may be longer or shorter than the original.
399 * The source string and the destination buffer must not overlap.
400 *
401 * Titlecasing uses a break iterator to find the first characters of words
402 * that are to be titlecased. It titlecases those characters and lowercases
403 * all others. (This can be modified with options bits.)
404 *
405 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
406 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
407 * U_TITLECASE_NO_LOWERCASE,
408 * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
409 * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
410 * @param iter A break iterator to find the first characters of words that are to be titlecased.
411 * It is set to the source string (setUText())
412 * and used one or more times for iteration (first() and next()).
413 * If NULL, then a word break iterator for the locale is used
414 * (or something equivalent).
415 * @param src The original string.
416 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
417 * @param dest A buffer for the result string. The result will be NUL-terminated if
418 * the buffer is large enough.
419 * The contents is undefined in case of failure.
420 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
421 * dest may be NULL and the function will only return the length of the result
422 * without writing any of the result string.
423 * @param edits Records edits for index mapping, working with styled text,
424 * and getting only changes (if any).
425 * The Edits contents is undefined if any error occurs.
426 * This function calls edits->reset() first unless
427 * options includes U_EDITS_NO_RESET. edits can be NULL.
428 * @param errorCode Reference to an in/out error code value
429 * which must not indicate a failure before the function call.
430 * @return The length of the result string, if successful.
431 * When the result would be longer than destCapacity,
432 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
433 *
434 * @see ucasemap_utf8ToTitle
435 * @stable ICU 59
436 */
437 static int32_t utf8ToTitle(
438 const char *locale, uint32_t options, BreakIterator *iter,
439 const char *src, int32_t srcLength,
440 char *dest, int32_t destCapacity, Edits *edits,
441 UErrorCode &errorCode);
442
443 #endif // UCONFIG_NO_BREAK_ITERATION
444
445 /**
446 * Case-folds a UTF-8 string and optionally records edits.
447 *
448 * Case folding is locale-independent and not context-sensitive,
449 * but there is an option for whether to include or exclude mappings for dotted I
450 * and dotless i that are marked with 'T' in CaseFolding.txt.
451 *
452 * The result may be longer or shorter than the original.
453 * The source string and the destination buffer must not overlap.
454 *
455 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
456 * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
457 * @param src The original string.
458 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
459 * @param dest A buffer for the result string. The result will be NUL-terminated if
460 * the buffer is large enough.
461 * The contents is undefined in case of failure.
462 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
463 * dest may be NULL and the function will only return the length of the result
464 * without writing any of the result string.
465 * @param edits Records edits for index mapping, working with styled text,
466 * and getting only changes (if any).
467 * The Edits contents is undefined if any error occurs.
468 * This function calls edits->reset() first unless
469 * options includes U_EDITS_NO_RESET. edits can be NULL.
470 * @param errorCode Reference to an in/out error code value
471 * which must not indicate a failure before the function call.
472 * @return The length of the result string, if successful.
473 * When the result would be longer than destCapacity,
474 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
475 *
476 * @see ucasemap_utf8FoldCase
477 * @stable ICU 59
478 */
479 static int32_t utf8Fold(
480 uint32_t options,
481 const char *src, int32_t srcLength,
482 char *dest, int32_t destCapacity, Edits *edits,
483 UErrorCode &errorCode);
484
485 private:
486 CaseMap() = delete;
487 CaseMap(const CaseMap &other) = delete;
488 CaseMap &operator=(const CaseMap &other) = delete;
489 };
490
491 U_NAMESPACE_END
492 #endif // U_SHOW_CPLUSPLUS_API
493
494 #endif // __CASEMAP_H__