1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
5 // created: 2017jan12 Markus W. Scherer
10 #include "unicode/utypes.h"
11 #include "unicode/uobject.h"
15 * \brief C++ API: Low-level C++ case mapping functions.
18 #if U_SHOW_CPLUSPLUS_API
21 #ifndef U_HIDE_DRAFT_API
27 * Low-level C++ case mapping functions.
31 class U_COMMON_API CaseMap U_FINAL
: public UMemory
{
34 * Lowercases a UTF-16 string and optionally records edits.
35 * Casing is locale-dependent and context-sensitive.
36 * The result may be longer or shorter than the original.
37 * The source string and the destination buffer must not overlap.
39 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
40 * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
41 * @param src The original string.
42 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
43 * @param dest A buffer for the result string. The result will be NUL-terminated if
44 * the buffer is large enough.
45 * The contents is undefined in case of failure.
46 * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
47 * dest may be NULL and the function will only return the length of the result
48 * without writing any of the result string.
49 * @param edits Records edits for index mapping, working with styled text,
50 * and getting only changes (if any).
51 * The Edits contents is undefined if any error occurs.
52 * This function calls edits->reset() first. edits can be NULL.
53 * @param errorCode Reference to an in/out error code value
54 * which must not indicate a failure before the function call.
55 * @return The length of the result string, if successful.
56 * When the result would be longer than destCapacity,
57 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
62 static int32_t toLower(
63 const char *locale
, uint32_t options
,
64 const char16_t *src
, int32_t srcLength
,
65 char16_t *dest
, int32_t destCapacity
, Edits
*edits
,
66 UErrorCode
&errorCode
);
69 * Uppercases a UTF-16 string and optionally records edits.
70 * Casing is locale-dependent and context-sensitive.
71 * The result may be longer or shorter than the original.
72 * The source string and the destination buffer must not overlap.
74 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
75 * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
76 * @param src The original string.
77 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
78 * @param dest A buffer for the result string. The result will be NUL-terminated if
79 * the buffer is large enough.
80 * The contents is undefined in case of failure.
81 * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
82 * dest may be NULL and the function will only return the length of the result
83 * without writing any of the result string.
84 * @param edits Records edits for index mapping, working with styled text,
85 * and getting only changes (if any).
86 * The Edits contents is undefined if any error occurs.
87 * This function calls edits->reset() first. edits can be NULL.
88 * @param errorCode Reference to an in/out error code value
89 * which must not indicate a failure before the function call.
90 * @return The length of the result string, if successful.
91 * When the result would be longer than destCapacity,
92 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
97 static int32_t toUpper(
98 const char *locale
, uint32_t options
,
99 const char16_t *src
, int32_t srcLength
,
100 char16_t *dest
, int32_t destCapacity
, Edits
*edits
,
101 UErrorCode
&errorCode
);
103 #if !UCONFIG_NO_BREAK_ITERATION
106 * Titlecases a UTF-16 string and optionally records edits.
107 * Casing is locale-dependent and context-sensitive.
108 * The result may be longer or shorter than the original.
109 * The source string and the destination buffer must not overlap.
111 * Titlecasing uses a break iterator to find the first characters of words
112 * that are to be titlecased. It titlecases those characters and lowercases
113 * all others. (This can be modified with options bits.)
115 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
116 * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
117 * U_TITLECASE_NO_LOWERCASE, U_TITLECASE_NO_BREAK_ADJUSTMENT.
118 * @param iter A break iterator to find the first characters of words that are to be titlecased.
119 * It is set to the source string (setText())
120 * and used one or more times for iteration (first() and next()).
121 * If NULL, then a word break iterator for the locale is used
122 * (or something equivalent).
123 * @param src The original string.
124 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
125 * @param dest A buffer for the result string. The result will be NUL-terminated if
126 * the buffer is large enough.
127 * The contents is undefined in case of failure.
128 * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
129 * dest may be NULL and the function will only return the length of the result
130 * without writing any of the result string.
131 * @param edits Records edits for index mapping, working with styled text,
132 * and getting only changes (if any).
133 * The Edits contents is undefined if any error occurs.
134 * This function calls edits->reset() first. edits can be NULL.
135 * @param errorCode Reference to an in/out error code value
136 * which must not indicate a failure before the function call.
137 * @return The length of the result string, if successful.
138 * When the result would be longer than destCapacity,
139 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
142 * @see ucasemap_toTitle
145 static int32_t toTitle(
146 const char *locale
, uint32_t options
, BreakIterator
*iter
,
147 const char16_t *src
, int32_t srcLength
,
148 char16_t *dest
, int32_t destCapacity
, Edits
*edits
,
149 UErrorCode
&errorCode
);
151 #endif // UCONFIG_NO_BREAK_ITERATION
154 * Case-folds a UTF-16 string and optionally records edits.
156 * Case folding is locale-independent and not context-sensitive,
157 * but there is an option for whether to include or exclude mappings for dotted I
158 * and dotless i that are marked with 'T' in CaseFolding.txt.
160 * The result may be longer or shorter than the original.
161 * The source string and the destination buffer must not overlap.
163 * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
164 * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
165 * @param src The original string.
166 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
167 * @param dest A buffer for the result string. The result will be NUL-terminated if
168 * the buffer is large enough.
169 * The contents is undefined in case of failure.
170 * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
171 * dest may be NULL and the function will only return the length of the result
172 * without writing any of the result string.
173 * @param edits Records edits for index mapping, working with styled text,
174 * and getting only changes (if any).
175 * The Edits contents is undefined if any error occurs.
176 * This function calls edits->reset() first. edits can be NULL.
177 * @param errorCode Reference to an in/out error code value
178 * which must not indicate a failure before the function call.
179 * @return The length of the result string, if successful.
180 * When the result would be longer than destCapacity,
181 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
188 const char16_t *src
, int32_t srcLength
,
189 char16_t *dest
, int32_t destCapacity
, Edits
*edits
,
190 UErrorCode
&errorCode
);
193 * Lowercases a UTF-8 string and optionally records edits.
194 * Casing is locale-dependent and context-sensitive.
195 * The result may be longer or shorter than the original.
196 * The source string and the destination buffer must not overlap.
198 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
199 * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
200 * @param src The original string.
201 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
202 * @param dest A buffer for the result string. The result will be NUL-terminated if
203 * the buffer is large enough.
204 * The contents is undefined in case of failure.
205 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
206 * dest may be NULL and the function will only return the length of the result
207 * without writing any of the result string.
208 * @param edits Records edits for index mapping, working with styled text,
209 * and getting only changes (if any).
210 * The Edits contents is undefined if any error occurs.
211 * This function calls edits->reset() first. edits can be NULL.
212 * @param errorCode Reference to an in/out error code value
213 * which must not indicate a failure before the function call.
214 * @return The length of the result string, if successful.
215 * When the result would be longer than destCapacity,
216 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
218 * @see ucasemap_utf8ToLower
221 static int32_t utf8ToLower(
222 const char *locale
, uint32_t options
,
223 const char *src
, int32_t srcLength
,
224 char *dest
, int32_t destCapacity
, Edits
*edits
,
225 UErrorCode
&errorCode
);
228 * Uppercases a UTF-8 string and optionally records edits.
229 * Casing is locale-dependent and context-sensitive.
230 * The result may be longer or shorter than the original.
231 * The source string and the destination buffer must not overlap.
233 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
234 * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
235 * @param src The original string.
236 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
237 * @param dest A buffer for the result string. The result will be NUL-terminated if
238 * the buffer is large enough.
239 * The contents is undefined in case of failure.
240 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
241 * dest may be NULL and the function will only return the length of the result
242 * without writing any of the result string.
243 * @param edits Records edits for index mapping, working with styled text,
244 * and getting only changes (if any).
245 * The Edits contents is undefined if any error occurs.
246 * This function calls edits->reset() first. edits can be NULL.
247 * @param errorCode Reference to an in/out error code value
248 * which must not indicate a failure before the function call.
249 * @return The length of the result string, if successful.
250 * When the result would be longer than destCapacity,
251 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
253 * @see ucasemap_utf8ToUpper
256 static int32_t utf8ToUpper(
257 const char *locale
, uint32_t options
,
258 const char *src
, int32_t srcLength
,
259 char *dest
, int32_t destCapacity
, Edits
*edits
,
260 UErrorCode
&errorCode
);
262 #if !UCONFIG_NO_BREAK_ITERATION
265 * Titlecases a UTF-8 string and optionally records edits.
266 * Casing is locale-dependent and context-sensitive.
267 * The result may be longer or shorter than the original.
268 * The source string and the destination buffer must not overlap.
270 * Titlecasing uses a break iterator to find the first characters of words
271 * that are to be titlecased. It titlecases those characters and lowercases
272 * all others. (This can be modified with options bits.)
274 * @param locale The locale ID. ("" = root locale, NULL = default locale.)
275 * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
276 * U_TITLECASE_NO_LOWERCASE, U_TITLECASE_NO_BREAK_ADJUSTMENT.
277 * @param iter A break iterator to find the first characters of words that are to be titlecased.
278 * It is set to the source string (setText())
279 * and used one or more times for iteration (first() and next()).
280 * If NULL, then a word break iterator for the locale is used
281 * (or something equivalent).
282 * @param src The original string.
283 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
284 * @param dest A buffer for the result string. The result will be NUL-terminated if
285 * the buffer is large enough.
286 * The contents is undefined in case of failure.
287 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
288 * dest may be NULL and the function will only return the length of the result
289 * without writing any of the result string.
290 * @param edits Records edits for index mapping, working with styled text,
291 * and getting only changes (if any).
292 * The Edits contents is undefined if any error occurs.
293 * This function calls edits->reset() first. edits can be NULL.
294 * @param errorCode Reference to an in/out error code value
295 * which must not indicate a failure before the function call.
296 * @return The length of the result string, if successful.
297 * When the result would be longer than destCapacity,
298 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
300 * @see ucasemap_utf8ToTitle
303 static int32_t utf8ToTitle(
304 const char *locale
, uint32_t options
, BreakIterator
*iter
,
305 const char *src
, int32_t srcLength
,
306 char *dest
, int32_t destCapacity
, Edits
*edits
,
307 UErrorCode
&errorCode
);
309 #endif // UCONFIG_NO_BREAK_ITERATION
312 * Case-folds a UTF-8 string and optionally records edits.
314 * Case folding is locale-independent and not context-sensitive,
315 * but there is an option for whether to include or exclude mappings for dotted I
316 * and dotless i that are marked with 'T' in CaseFolding.txt.
318 * The result may be longer or shorter than the original.
319 * The source string and the destination buffer must not overlap.
321 * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
322 * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
323 * @param src The original string.
324 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
325 * @param dest A buffer for the result string. The result will be NUL-terminated if
326 * the buffer is large enough.
327 * The contents is undefined in case of failure.
328 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
329 * dest may be NULL and the function will only return the length of the result
330 * without writing any of the result string.
331 * @param edits Records edits for index mapping, working with styled text,
332 * and getting only changes (if any).
333 * The Edits contents is undefined if any error occurs.
334 * This function calls edits->reset() first. edits can be NULL.
335 * @param errorCode Reference to an in/out error code value
336 * which must not indicate a failure before the function call.
337 * @return The length of the result string, if successful.
338 * When the result would be longer than destCapacity,
339 * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
341 * @see ucasemap_utf8FoldCase
344 static int32_t utf8Fold(
346 const char *src
, int32_t srcLength
,
347 char *dest
, int32_t destCapacity
, Edits
*edits
,
348 UErrorCode
&errorCode
);
352 CaseMap(const CaseMap
&other
) = delete;
353 CaseMap
&operator=(const CaseMap
&other
) = delete;
356 #endif // U_HIDE_DRAFT_API
359 #endif // U_SHOW_CPLUSPLUS_API
361 #endif // __CASEMAP_H__