1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
5 // created: 2017jan12 Markus W. Scherer
10 #include "unicode/utypes.h"
12 #if U_SHOW_CPLUSPLUS_API
14 #include "unicode/stringpiece.h"
15 #include "unicode/uobject.h"
19 * \brief C++ API: Low-level C++ case mapping functions.
29 * Low-level C++ case mapping functions.
33 class U_COMMON_API CaseMap U_FINAL
: public UMemory
{
36 * Lowercases a UTF-16 string and optionally records edits.
37 * Casing is locale-dependent and context-sensitive.
38 * The result may be longer or shorter than the original.
39 * The source string and the destination buffer must not overlap.
41 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
42 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
43 * @param src The original string.
44 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
45 * @param dest A buffer for the result string. The result will be NUL-terminated if
46 * the buffer is large enough.
47 * The contents is undefined in case of failure.
48 * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
49 * dest may be NULL and the function will only return the length of the result
50 * without writing any of the result string.
51 * @param edits Records edits for index mapping, working with styled text,
52 * and getting only changes (if any).
53 * The Edits contents is undefined if any error occurs.
54 * This function calls edits->reset() first unless
55 * options includes U_EDITS_NO_RESET. edits can be NULL.
56 * @param errorCode Reference to an in/out error code value
57 * which must not indicate a failure before the function call.
58 * @return The length of the result string, if successful.
59 * When the result would be longer than destCapacity,
60 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
65 static int32_t toLower(
66 const char *locale
, uint32_t options
,
67 const char16_t *src
, int32_t srcLength
,
68 char16_t *dest
, int32_t destCapacity
, Edits
*edits
,
69 UErrorCode
&errorCode
);
72 * Uppercases a UTF-16 string and optionally records edits.
73 * Casing is locale-dependent and context-sensitive.
74 * The result may be longer or shorter than the original.
75 * The source string and the destination buffer must not overlap.
77 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
78 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
79 * @param src The original string.
80 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
81 * @param dest A buffer for the result string. The result will be NUL-terminated if
82 * the buffer is large enough.
83 * The contents is undefined in case of failure.
84 * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
85 * dest may be NULL and the function will only return the length of the result
86 * without writing any of the result string.
87 * @param edits Records edits for index mapping, working with styled text,
88 * and getting only changes (if any).
89 * The Edits contents is undefined if any error occurs.
90 * This function calls edits->reset() first unless
91 * options includes U_EDITS_NO_RESET. edits can be NULL.
92 * @param errorCode Reference to an in/out error code value
93 * which must not indicate a failure before the function call.
94 * @return The length of the result string, if successful.
95 * When the result would be longer than destCapacity,
96 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
101 static int32_t toUpper(
102 const char *locale
, uint32_t options
,
103 const char16_t *src
, int32_t srcLength
,
104 char16_t *dest
, int32_t destCapacity
, Edits
*edits
,
105 UErrorCode
&errorCode
);
107 #if !UCONFIG_NO_BREAK_ITERATION
110 * Titlecases a UTF-16 string and optionally records edits.
111 * Casing is locale-dependent and context-sensitive.
112 * The result may be longer or shorter than the original.
113 * The source string and the destination buffer must not overlap.
115 * Titlecasing uses a break iterator to find the first characters of words
116 * that are to be titlecased. It titlecases those characters and lowercases
117 * all others. (This can be modified with options bits.)
119 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
120 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
121 * U_TITLECASE_NO_LOWERCASE,
122 * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
123 * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
124 * @param iter A break iterator to find the first characters of words that are to be titlecased.
125 * It is set to the source string (setText())
126 * and used one or more times for iteration (first() and next()).
127 * If NULL, then a word break iterator for the locale is used
128 * (or something equivalent).
129 * @param src The original string.
130 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
131 * @param dest A buffer for the result string. The result will be NUL-terminated if
132 * the buffer is large enough.
133 * The contents is undefined in case of failure.
134 * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
135 * dest may be NULL and the function will only return the length of the result
136 * without writing any of the result string.
137 * @param edits Records edits for index mapping, working with styled text,
138 * and getting only changes (if any).
139 * The Edits contents is undefined if any error occurs.
140 * This function calls edits->reset() first unless
141 * options includes U_EDITS_NO_RESET. edits can be NULL.
142 * @param errorCode Reference to an in/out error code value
143 * which must not indicate a failure before the function call.
144 * @return The length of the result string, if successful.
145 * When the result would be longer than destCapacity,
146 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
149 * @see ucasemap_toTitle
152 static int32_t toTitle(
153 const char *locale
, uint32_t options
, BreakIterator
*iter
,
154 const char16_t *src
, int32_t srcLength
,
155 char16_t *dest
, int32_t destCapacity
, Edits
*edits
,
156 UErrorCode
&errorCode
);
158 #endif // UCONFIG_NO_BREAK_ITERATION
161 * Case-folds a UTF-16 string and optionally records edits.
163 * Case folding is locale-independent and not context-sensitive,
164 * but there is an option for whether to include or exclude mappings for dotted I
165 * and dotless i that are marked with 'T' in CaseFolding.txt.
167 * The result may be longer or shorter than the original.
168 * The source string and the destination buffer must not overlap.
170 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
171 * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
172 * @param src The original string.
173 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
174 * @param dest A buffer for the result string. The result will be NUL-terminated if
175 * the buffer is large enough.
176 * The contents is undefined in case of failure.
177 * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
178 * dest may be NULL and the function will only return the length of the result
179 * without writing any of the result string.
180 * @param edits Records edits for index mapping, working with styled text,
181 * and getting only changes (if any).
182 * The Edits contents is undefined if any error occurs.
183 * This function calls edits->reset() first unless
184 * options includes U_EDITS_NO_RESET. edits can be NULL.
185 * @param errorCode Reference to an in/out error code value
186 * which must not indicate a failure before the function call.
187 * @return The length of the result string, if successful.
188 * When the result would be longer than destCapacity,
189 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
196 const char16_t *src
, int32_t srcLength
,
197 char16_t *dest
, int32_t destCapacity
, Edits
*edits
,
198 UErrorCode
&errorCode
);
201 * Lowercases a UTF-8 string and optionally records edits.
202 * Casing is locale-dependent and context-sensitive.
203 * The result may be longer or shorter than the original.
205 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
206 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
207 * @param src The original string.
208 * @param sink A ByteSink to which the result string is written.
209 * sink.Flush() is called at the end.
210 * @param edits Records edits for index mapping, working with styled text,
211 * and getting only changes (if any).
212 * The Edits contents is undefined if any error occurs.
213 * This function calls edits->reset() first unless
214 * options includes U_EDITS_NO_RESET. edits can be NULL.
215 * @param errorCode Reference to an in/out error code value
216 * which must not indicate a failure before the function call.
218 * @see ucasemap_utf8ToLower
221 static void utf8ToLower(
222 const char *locale
, uint32_t options
,
223 StringPiece src
, ByteSink
&sink
, Edits
*edits
,
224 UErrorCode
&errorCode
);
227 * Uppercases a UTF-8 string and optionally records edits.
228 * Casing is locale-dependent and context-sensitive.
229 * The result may be longer or shorter than the original.
231 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
232 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
233 * @param src The original string.
234 * @param sink A ByteSink to which the result string is written.
235 * sink.Flush() is called at the end.
236 * @param edits Records edits for index mapping, working with styled text,
237 * and getting only changes (if any).
238 * The Edits contents is undefined if any error occurs.
239 * This function calls edits->reset() first unless
240 * options includes U_EDITS_NO_RESET. edits can be NULL.
241 * @param errorCode Reference to an in/out error code value
242 * which must not indicate a failure before the function call.
244 * @see ucasemap_utf8ToUpper
247 static void utf8ToUpper(
248 const char *locale
, uint32_t options
,
249 StringPiece src
, ByteSink
&sink
, Edits
*edits
,
250 UErrorCode
&errorCode
);
252 #if !UCONFIG_NO_BREAK_ITERATION
255 * Titlecases a UTF-8 string and optionally records edits.
256 * Casing is locale-dependent and context-sensitive.
257 * The result may be longer or shorter than the original.
259 * Titlecasing uses a break iterator to find the first characters of words
260 * that are to be titlecased. It titlecases those characters and lowercases
261 * all others. (This can be modified with options bits.)
263 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
264 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
265 * U_TITLECASE_NO_LOWERCASE,
266 * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
267 * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
268 * @param iter A break iterator to find the first characters of words that are to be titlecased.
269 * It is set to the source string (setUText())
270 * and used one or more times for iteration (first() and next()).
271 * If NULL, then a word break iterator for the locale is used
272 * (or something equivalent).
273 * @param src The original string.
274 * @param sink A ByteSink to which the result string is written.
275 * sink.Flush() is called at the end.
276 * @param edits Records edits for index mapping, working with styled text,
277 * and getting only changes (if any).
278 * The Edits contents is undefined if any error occurs.
279 * This function calls edits->reset() first unless
280 * options includes U_EDITS_NO_RESET. edits can be NULL.
281 * @param errorCode Reference to an in/out error code value
282 * which must not indicate a failure before the function call.
284 * @see ucasemap_utf8ToTitle
287 static void utf8ToTitle(
288 const char *locale
, uint32_t options
, BreakIterator
*iter
,
289 StringPiece src
, ByteSink
&sink
, Edits
*edits
,
290 UErrorCode
&errorCode
);
292 #endif // UCONFIG_NO_BREAK_ITERATION
295 * Case-folds a UTF-8 string and optionally records edits.
297 * Case folding is locale-independent and not context-sensitive,
298 * but there is an option for whether to include or exclude mappings for dotted I
299 * and dotless i that are marked with 'T' in CaseFolding.txt.
301 * The result may be longer or shorter than the original.
303 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
304 * @param src The original string.
305 * @param sink A ByteSink to which the result string is written.
306 * sink.Flush() is called at the end.
307 * @param edits Records edits for index mapping, working with styled text,
308 * and getting only changes (if any).
309 * The Edits contents is undefined if any error occurs.
310 * This function calls edits->reset() first unless
311 * options includes U_EDITS_NO_RESET. edits can be NULL.
312 * @param errorCode Reference to an in/out error code value
313 * which must not indicate a failure before the function call.
315 * @see ucasemap_utf8FoldCase
318 static void utf8Fold(
320 StringPiece src
, ByteSink
&sink
, Edits
*edits
,
321 UErrorCode
&errorCode
);
324 * Lowercases a UTF-8 string and optionally records edits.
325 * Casing is locale-dependent and context-sensitive.
326 * The result may be longer or shorter than the original.
327 * The source string and the destination buffer must not overlap.
329 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
330 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
331 * @param src The original string.
332 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
333 * @param dest A buffer for the result string. The result will be NUL-terminated if
334 * the buffer is large enough.
335 * The contents is undefined in case of failure.
336 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
337 * dest may be NULL and the function will only return the length of the result
338 * without writing any of the result string.
339 * @param edits Records edits for index mapping, working with styled text,
340 * and getting only changes (if any).
341 * The Edits contents is undefined if any error occurs.
342 * This function calls edits->reset() first unless
343 * options includes U_EDITS_NO_RESET. edits can be NULL.
344 * @param errorCode Reference to an in/out error code value
345 * which must not indicate a failure before the function call.
346 * @return The length of the result string, if successful.
347 * When the result would be longer than destCapacity,
348 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
350 * @see ucasemap_utf8ToLower
353 static int32_t utf8ToLower(
354 const char *locale
, uint32_t options
,
355 const char *src
, int32_t srcLength
,
356 char *dest
, int32_t destCapacity
, Edits
*edits
,
357 UErrorCode
&errorCode
);
360 * Uppercases a UTF-8 string and optionally records edits.
361 * Casing is locale-dependent and context-sensitive.
362 * The result may be longer or shorter than the original.
363 * The source string and the destination buffer must not overlap.
365 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
366 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
367 * @param src The original string.
368 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
369 * @param dest A buffer for the result string. The result will be NUL-terminated if
370 * the buffer is large enough.
371 * The contents is undefined in case of failure.
372 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
373 * dest may be NULL and the function will only return the length of the result
374 * without writing any of the result string.
375 * @param edits Records edits for index mapping, working with styled text,
376 * and getting only changes (if any).
377 * The Edits contents is undefined if any error occurs.
378 * This function calls edits->reset() first unless
379 * options includes U_EDITS_NO_RESET. edits can be NULL.
380 * @param errorCode Reference to an in/out error code value
381 * which must not indicate a failure before the function call.
382 * @return The length of the result string, if successful.
383 * When the result would be longer than destCapacity,
384 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
386 * @see ucasemap_utf8ToUpper
389 static int32_t utf8ToUpper(
390 const char *locale
, uint32_t options
,
391 const char *src
, int32_t srcLength
,
392 char *dest
, int32_t destCapacity
, Edits
*edits
,
393 UErrorCode
&errorCode
);
395 #if !UCONFIG_NO_BREAK_ITERATION
398 * Titlecases a UTF-8 string and optionally records edits.
399 * Casing is locale-dependent and context-sensitive.
400 * The result may be longer or shorter than the original.
401 * The source string and the destination buffer must not overlap.
403 * Titlecasing uses a break iterator to find the first characters of words
404 * that are to be titlecased. It titlecases those characters and lowercases
405 * all others. (This can be modified with options bits.)
407 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
408 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
409 * U_TITLECASE_NO_LOWERCASE,
410 * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
411 * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
412 * @param iter A break iterator to find the first characters of words that are to be titlecased.
413 * It is set to the source string (setUText())
414 * and used one or more times for iteration (first() and next()).
415 * If NULL, then a word break iterator for the locale is used
416 * (or something equivalent).
417 * @param src The original string.
418 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
419 * @param dest A buffer for the result string. The result will be NUL-terminated if
420 * the buffer is large enough.
421 * The contents is undefined in case of failure.
422 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
423 * dest may be NULL and the function will only return the length of the result
424 * without writing any of the result string.
425 * @param edits Records edits for index mapping, working with styled text,
426 * and getting only changes (if any).
427 * The Edits contents is undefined if any error occurs.
428 * This function calls edits->reset() first unless
429 * options includes U_EDITS_NO_RESET. edits can be NULL.
430 * @param errorCode Reference to an in/out error code value
431 * which must not indicate a failure before the function call.
432 * @return The length of the result string, if successful.
433 * When the result would be longer than destCapacity,
434 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
436 * @see ucasemap_utf8ToTitle
439 static int32_t utf8ToTitle(
440 const char *locale
, uint32_t options
, BreakIterator
*iter
,
441 const char *src
, int32_t srcLength
,
442 char *dest
, int32_t destCapacity
, Edits
*edits
,
443 UErrorCode
&errorCode
);
445 #endif // UCONFIG_NO_BREAK_ITERATION
448 * Case-folds a UTF-8 string and optionally records edits.
450 * Case folding is locale-independent and not context-sensitive,
451 * but there is an option for whether to include or exclude mappings for dotted I
452 * and dotless i that are marked with 'T' in CaseFolding.txt.
454 * The result may be longer or shorter than the original.
455 * The source string and the destination buffer must not overlap.
457 * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
458 * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
459 * @param src The original string.
460 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
461 * @param dest A buffer for the result string. The result will be NUL-terminated if
462 * the buffer is large enough.
463 * The contents is undefined in case of failure.
464 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
465 * dest may be NULL and the function will only return the length of the result
466 * without writing any of the result string.
467 * @param edits Records edits for index mapping, working with styled text,
468 * and getting only changes (if any).
469 * The Edits contents is undefined if any error occurs.
470 * This function calls edits->reset() first unless
471 * options includes U_EDITS_NO_RESET. edits can be NULL.
472 * @param errorCode Reference to an in/out error code value
473 * which must not indicate a failure before the function call.
474 * @return The length of the result string, if successful.
475 * When the result would be longer than destCapacity,
476 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
478 * @see ucasemap_utf8FoldCase
481 static int32_t utf8Fold(
483 const char *src
, int32_t srcLength
,
484 char *dest
, int32_t destCapacity
, Edits
*edits
,
485 UErrorCode
&errorCode
);
489 CaseMap(const CaseMap
&other
) = delete;
490 CaseMap
&operator=(const CaseMap
&other
) = delete;
495 #endif /* U_SHOW_CPLUSPLUS_API */
497 #endif // __CASEMAP_H__