]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/unicode/casemap.h
ICU-62107.0.1.tar.gz
[apple/icu.git] / icuSources / common / unicode / casemap.h
1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 // casemap.h
5 // created: 2017jan12 Markus W. Scherer
6
7 #ifndef __CASEMAP_H__
8 #define __CASEMAP_H__
9
10 #include "unicode/utypes.h"
11 #include "unicode/stringpiece.h"
12 #include "unicode/uobject.h"
13
14 /**
15 * \file
16 * \brief C++ API: Low-level C++ case mapping functions.
17 */
18
19 #if U_SHOW_CPLUSPLUS_API
20 U_NAMESPACE_BEGIN
21
22 class BreakIterator;
23 class ByteSink;
24 class Edits;
25
26 /**
27 * Low-level C++ case mapping functions.
28 *
29 * @stable ICU 59
30 */
31 class U_COMMON_API CaseMap U_FINAL : public UMemory {
32 public:
33 /**
34 * Lowercases a UTF-16 string and optionally records edits.
35 * Casing is locale-dependent and context-sensitive.
36 * The result may be longer or shorter than the original.
37 * The source string and the destination buffer must not overlap.
38 *
39 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
40 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
41 * @param src The original string.
42 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
43 * @param dest A buffer for the result string. The result will be NUL-terminated if
44 * the buffer is large enough.
45 * The contents is undefined in case of failure.
46 * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
47 * dest may be NULL and the function will only return the length of the result
48 * without writing any of the result string.
49 * @param edits Records edits for index mapping, working with styled text,
50 * and getting only changes (if any).
51 * The Edits contents is undefined if any error occurs.
52 * This function calls edits->reset() first unless
53 * options includes U_EDITS_NO_RESET. edits can be NULL.
54 * @param errorCode Reference to an in/out error code value
55 * which must not indicate a failure before the function call.
56 * @return The length of the result string, if successful.
57 * When the result would be longer than destCapacity,
58 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
59 *
60 * @see u_strToLower
61 * @stable ICU 59
62 */
63 static int32_t toLower(
64 const char *locale, uint32_t options,
65 const char16_t *src, int32_t srcLength,
66 char16_t *dest, int32_t destCapacity, Edits *edits,
67 UErrorCode &errorCode);
68
69 /**
70 * Uppercases a UTF-16 string and optionally records edits.
71 * Casing is locale-dependent and context-sensitive.
72 * The result may be longer or shorter than the original.
73 * The source string and the destination buffer must not overlap.
74 *
75 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
76 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
77 * @param src The original string.
78 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
79 * @param dest A buffer for the result string. The result will be NUL-terminated if
80 * the buffer is large enough.
81 * The contents is undefined in case of failure.
82 * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
83 * dest may be NULL and the function will only return the length of the result
84 * without writing any of the result string.
85 * @param edits Records edits for index mapping, working with styled text,
86 * and getting only changes (if any).
87 * The Edits contents is undefined if any error occurs.
88 * This function calls edits->reset() first unless
89 * options includes U_EDITS_NO_RESET. edits can be NULL.
90 * @param errorCode Reference to an in/out error code value
91 * which must not indicate a failure before the function call.
92 * @return The length of the result string, if successful.
93 * When the result would be longer than destCapacity,
94 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
95 *
96 * @see u_strToUpper
97 * @stable ICU 59
98 */
99 static int32_t toUpper(
100 const char *locale, uint32_t options,
101 const char16_t *src, int32_t srcLength,
102 char16_t *dest, int32_t destCapacity, Edits *edits,
103 UErrorCode &errorCode);
104
105 #if !UCONFIG_NO_BREAK_ITERATION
106
107 /**
108 * Titlecases a UTF-16 string and optionally records edits.
109 * Casing is locale-dependent and context-sensitive.
110 * The result may be longer or shorter than the original.
111 * The source string and the destination buffer must not overlap.
112 *
113 * Titlecasing uses a break iterator to find the first characters of words
114 * that are to be titlecased. It titlecases those characters and lowercases
115 * all others. (This can be modified with options bits.)
116 *
117 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
118 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
119 * U_TITLECASE_NO_LOWERCASE,
120 * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
121 * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
122 * @param iter A break iterator to find the first characters of words that are to be titlecased.
123 * It is set to the source string (setText())
124 * and used one or more times for iteration (first() and next()).
125 * If NULL, then a word break iterator for the locale is used
126 * (or something equivalent).
127 * @param src The original string.
128 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
129 * @param dest A buffer for the result string. The result will be NUL-terminated if
130 * the buffer is large enough.
131 * The contents is undefined in case of failure.
132 * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
133 * dest may be NULL and the function will only return the length of the result
134 * without writing any of the result string.
135 * @param edits Records edits for index mapping, working with styled text,
136 * and getting only changes (if any).
137 * The Edits contents is undefined if any error occurs.
138 * This function calls edits->reset() first unless
139 * options includes U_EDITS_NO_RESET. edits can be NULL.
140 * @param errorCode Reference to an in/out error code value
141 * which must not indicate a failure before the function call.
142 * @return The length of the result string, if successful.
143 * When the result would be longer than destCapacity,
144 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
145 *
146 * @see u_strToTitle
147 * @see ucasemap_toTitle
148 * @stable ICU 59
149 */
150 static int32_t toTitle(
151 const char *locale, uint32_t options, BreakIterator *iter,
152 const char16_t *src, int32_t srcLength,
153 char16_t *dest, int32_t destCapacity, Edits *edits,
154 UErrorCode &errorCode);
155
156 #endif // UCONFIG_NO_BREAK_ITERATION
157
158 /**
159 * Case-folds a UTF-16 string and optionally records edits.
160 *
161 * Case folding is locale-independent and not context-sensitive,
162 * but there is an option for whether to include or exclude mappings for dotted I
163 * and dotless i that are marked with 'T' in CaseFolding.txt.
164 *
165 * The result may be longer or shorter than the original.
166 * The source string and the destination buffer must not overlap.
167 *
168 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
169 * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
170 * @param src The original string.
171 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
172 * @param dest A buffer for the result string. The result will be NUL-terminated if
173 * the buffer is large enough.
174 * The contents is undefined in case of failure.
175 * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
176 * dest may be NULL and the function will only return the length of the result
177 * without writing any of the result string.
178 * @param edits Records edits for index mapping, working with styled text,
179 * and getting only changes (if any).
180 * The Edits contents is undefined if any error occurs.
181 * This function calls edits->reset() first unless
182 * options includes U_EDITS_NO_RESET. edits can be NULL.
183 * @param errorCode Reference to an in/out error code value
184 * which must not indicate a failure before the function call.
185 * @return The length of the result string, if successful.
186 * When the result would be longer than destCapacity,
187 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
188 *
189 * @see u_strFoldCase
190 * @stable ICU 59
191 */
192 static int32_t fold(
193 uint32_t options,
194 const char16_t *src, int32_t srcLength,
195 char16_t *dest, int32_t destCapacity, Edits *edits,
196 UErrorCode &errorCode);
197
198 #ifndef U_HIDE_DRAFT_API
199 /**
200 * Lowercases a UTF-8 string and optionally records edits.
201 * Casing is locale-dependent and context-sensitive.
202 * The result may be longer or shorter than the original.
203 *
204 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
205 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
206 * @param src The original string.
207 * @param sink A ByteSink to which the result string is written.
208 * sink.Flush() is called at the end.
209 * @param edits Records edits for index mapping, working with styled text,
210 * and getting only changes (if any).
211 * The Edits contents is undefined if any error occurs.
212 * This function calls edits->reset() first unless
213 * options includes U_EDITS_NO_RESET. edits can be NULL.
214 * @param errorCode Reference to an in/out error code value
215 * which must not indicate a failure before the function call.
216 *
217 * @see ucasemap_utf8ToLower
218 * @draft ICU 60
219 */
220 static void utf8ToLower(
221 const char *locale, uint32_t options,
222 StringPiece src, ByteSink &sink, Edits *edits,
223 UErrorCode &errorCode);
224
225 /**
226 * Uppercases a UTF-8 string and optionally records edits.
227 * Casing is locale-dependent and context-sensitive.
228 * The result may be longer or shorter than the original.
229 *
230 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
231 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
232 * @param src The original string.
233 * @param sink A ByteSink to which the result string is written.
234 * sink.Flush() is called at the end.
235 * @param edits Records edits for index mapping, working with styled text,
236 * and getting only changes (if any).
237 * The Edits contents is undefined if any error occurs.
238 * This function calls edits->reset() first unless
239 * options includes U_EDITS_NO_RESET. edits can be NULL.
240 * @param errorCode Reference to an in/out error code value
241 * which must not indicate a failure before the function call.
242 *
243 * @see ucasemap_utf8ToUpper
244 * @draft ICU 60
245 */
246 static void utf8ToUpper(
247 const char *locale, uint32_t options,
248 StringPiece src, ByteSink &sink, Edits *edits,
249 UErrorCode &errorCode);
250
251 #if !UCONFIG_NO_BREAK_ITERATION
252
253 /**
254 * Titlecases a UTF-8 string and optionally records edits.
255 * Casing is locale-dependent and context-sensitive.
256 * The result may be longer or shorter than the original.
257 *
258 * Titlecasing uses a break iterator to find the first characters of words
259 * that are to be titlecased. It titlecases those characters and lowercases
260 * all others. (This can be modified with options bits.)
261 *
262 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
263 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
264 * U_TITLECASE_NO_LOWERCASE,
265 * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
266 * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
267 * @param iter A break iterator to find the first characters of words that are to be titlecased.
268 * It is set to the source string (setUText())
269 * and used one or more times for iteration (first() and next()).
270 * If NULL, then a word break iterator for the locale is used
271 * (or something equivalent).
272 * @param src The original string.
273 * @param sink A ByteSink to which the result string is written.
274 * sink.Flush() is called at the end.
275 * @param edits Records edits for index mapping, working with styled text,
276 * and getting only changes (if any).
277 * The Edits contents is undefined if any error occurs.
278 * This function calls edits->reset() first unless
279 * options includes U_EDITS_NO_RESET. edits can be NULL.
280 * @param errorCode Reference to an in/out error code value
281 * which must not indicate a failure before the function call.
282 *
283 * @see ucasemap_utf8ToTitle
284 * @draft ICU 60
285 */
286 static void utf8ToTitle(
287 const char *locale, uint32_t options, BreakIterator *iter,
288 StringPiece src, ByteSink &sink, Edits *edits,
289 UErrorCode &errorCode);
290
291 #endif // UCONFIG_NO_BREAK_ITERATION
292
293 /**
294 * Case-folds a UTF-8 string and optionally records edits.
295 *
296 * Case folding is locale-independent and not context-sensitive,
297 * but there is an option for whether to include or exclude mappings for dotted I
298 * and dotless i that are marked with 'T' in CaseFolding.txt.
299 *
300 * The result may be longer or shorter than the original.
301 *
302 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
303 * @param src The original string.
304 * @param sink A ByteSink to which the result string is written.
305 * sink.Flush() is called at the end.
306 * @param edits Records edits for index mapping, working with styled text,
307 * and getting only changes (if any).
308 * The Edits contents is undefined if any error occurs.
309 * This function calls edits->reset() first unless
310 * options includes U_EDITS_NO_RESET. edits can be NULL.
311 * @param errorCode Reference to an in/out error code value
312 * which must not indicate a failure before the function call.
313 *
314 * @see ucasemap_utf8FoldCase
315 * @draft ICU 60
316 */
317 static void utf8Fold(
318 uint32_t options,
319 StringPiece src, ByteSink &sink, Edits *edits,
320 UErrorCode &errorCode);
321 #endif // U_HIDE_DRAFT_API
322
323 /**
324 * Lowercases a UTF-8 string and optionally records edits.
325 * Casing is locale-dependent and context-sensitive.
326 * The result may be longer or shorter than the original.
327 * The source string and the destination buffer must not overlap.
328 *
329 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
330 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
331 * @param src The original string.
332 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
333 * @param dest A buffer for the result string. The result will be NUL-terminated if
334 * the buffer is large enough.
335 * The contents is undefined in case of failure.
336 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
337 * dest may be NULL and the function will only return the length of the result
338 * without writing any of the result string.
339 * @param edits Records edits for index mapping, working with styled text,
340 * and getting only changes (if any).
341 * The Edits contents is undefined if any error occurs.
342 * This function calls edits->reset() first unless
343 * options includes U_EDITS_NO_RESET. edits can be NULL.
344 * @param errorCode Reference to an in/out error code value
345 * which must not indicate a failure before the function call.
346 * @return The length of the result string, if successful.
347 * When the result would be longer than destCapacity,
348 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
349 *
350 * @see ucasemap_utf8ToLower
351 * @stable ICU 59
352 */
353 static int32_t utf8ToLower(
354 const char *locale, uint32_t options,
355 const char *src, int32_t srcLength,
356 char *dest, int32_t destCapacity, Edits *edits,
357 UErrorCode &errorCode);
358
359 /**
360 * Uppercases a UTF-8 string and optionally records edits.
361 * Casing is locale-dependent and context-sensitive.
362 * The result may be longer or shorter than the original.
363 * The source string and the destination buffer must not overlap.
364 *
365 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
366 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
367 * @param src The original string.
368 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
369 * @param dest A buffer for the result string. The result will be NUL-terminated if
370 * the buffer is large enough.
371 * The contents is undefined in case of failure.
372 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
373 * dest may be NULL and the function will only return the length of the result
374 * without writing any of the result string.
375 * @param edits Records edits for index mapping, working with styled text,
376 * and getting only changes (if any).
377 * The Edits contents is undefined if any error occurs.
378 * This function calls edits->reset() first unless
379 * options includes U_EDITS_NO_RESET. edits can be NULL.
380 * @param errorCode Reference to an in/out error code value
381 * which must not indicate a failure before the function call.
382 * @return The length of the result string, if successful.
383 * When the result would be longer than destCapacity,
384 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
385 *
386 * @see ucasemap_utf8ToUpper
387 * @stable ICU 59
388 */
389 static int32_t utf8ToUpper(
390 const char *locale, uint32_t options,
391 const char *src, int32_t srcLength,
392 char *dest, int32_t destCapacity, Edits *edits,
393 UErrorCode &errorCode);
394
395 #if !UCONFIG_NO_BREAK_ITERATION
396
397 /**
398 * Titlecases a UTF-8 string and optionally records edits.
399 * Casing is locale-dependent and context-sensitive.
400 * The result may be longer or shorter than the original.
401 * The source string and the destination buffer must not overlap.
402 *
403 * Titlecasing uses a break iterator to find the first characters of words
404 * that are to be titlecased. It titlecases those characters and lowercases
405 * all others. (This can be modified with options bits.)
406 *
407 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
408 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
409 * U_TITLECASE_NO_LOWERCASE,
410 * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
411 * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
412 * @param iter A break iterator to find the first characters of words that are to be titlecased.
413 * It is set to the source string (setUText())
414 * and used one or more times for iteration (first() and next()).
415 * If NULL, then a word break iterator for the locale is used
416 * (or something equivalent).
417 * @param src The original string.
418 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
419 * @param dest A buffer for the result string. The result will be NUL-terminated if
420 * the buffer is large enough.
421 * The contents is undefined in case of failure.
422 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
423 * dest may be NULL and the function will only return the length of the result
424 * without writing any of the result string.
425 * @param edits Records edits for index mapping, working with styled text,
426 * and getting only changes (if any).
427 * The Edits contents is undefined if any error occurs.
428 * This function calls edits->reset() first unless
429 * options includes U_EDITS_NO_RESET. edits can be NULL.
430 * @param errorCode Reference to an in/out error code value
431 * which must not indicate a failure before the function call.
432 * @return The length of the result string, if successful.
433 * When the result would be longer than destCapacity,
434 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
435 *
436 * @see ucasemap_utf8ToTitle
437 * @stable ICU 59
438 */
439 static int32_t utf8ToTitle(
440 const char *locale, uint32_t options, BreakIterator *iter,
441 const char *src, int32_t srcLength,
442 char *dest, int32_t destCapacity, Edits *edits,
443 UErrorCode &errorCode);
444
445 #endif // UCONFIG_NO_BREAK_ITERATION
446
447 /**
448 * Case-folds a UTF-8 string and optionally records edits.
449 *
450 * Case folding is locale-independent and not context-sensitive,
451 * but there is an option for whether to include or exclude mappings for dotted I
452 * and dotless i that are marked with 'T' in CaseFolding.txt.
453 *
454 * The result may be longer or shorter than the original.
455 * The source string and the destination buffer must not overlap.
456 *
457 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
458 * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
459 * @param src The original string.
460 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
461 * @param dest A buffer for the result string. The result will be NUL-terminated if
462 * the buffer is large enough.
463 * The contents is undefined in case of failure.
464 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
465 * dest may be NULL and the function will only return the length of the result
466 * without writing any of the result string.
467 * @param edits Records edits for index mapping, working with styled text,
468 * and getting only changes (if any).
469 * The Edits contents is undefined if any error occurs.
470 * This function calls edits->reset() first unless
471 * options includes U_EDITS_NO_RESET. edits can be NULL.
472 * @param errorCode Reference to an in/out error code value
473 * which must not indicate a failure before the function call.
474 * @return The length of the result string, if successful.
475 * When the result would be longer than destCapacity,
476 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
477 *
478 * @see ucasemap_utf8FoldCase
479 * @stable ICU 59
480 */
481 static int32_t utf8Fold(
482 uint32_t options,
483 const char *src, int32_t srcLength,
484 char *dest, int32_t destCapacity, Edits *edits,
485 UErrorCode &errorCode);
486
487 private:
488 CaseMap() = delete;
489 CaseMap(const CaseMap &other) = delete;
490 CaseMap &operator=(const CaseMap &other) = delete;
491 };
492
493 U_NAMESPACE_END
494 #endif // U_SHOW_CPLUSPLUS_API
495
496 #endif // __CASEMAP_H__