]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2017 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | ||
4 | // casemap.h | |
5 | // created: 2017jan12 Markus W. Scherer | |
6 | ||
7 | #ifndef __CASEMAP_H__ | |
8 | #define __CASEMAP_H__ | |
9 | ||
10 | #include "unicode/utypes.h" | |
11 | #include "unicode/uobject.h" | |
12 | ||
13 | /** | |
14 | * \file | |
15 | * \brief C++ API: Low-level C++ case mapping functions. | |
16 | */ | |
17 | ||
18 | #if U_SHOW_CPLUSPLUS_API | |
19 | U_NAMESPACE_BEGIN | |
20 | ||
21 | #ifndef U_HIDE_DRAFT_API | |
22 | ||
23 | class BreakIterator; | |
24 | class Edits; | |
25 | ||
26 | /** | |
27 | * Low-level C++ case mapping functions. | |
28 | * | |
29 | * @draft ICU 59 | |
30 | */ | |
31 | class U_COMMON_API CaseMap U_FINAL : public UMemory { | |
32 | public: | |
33 | /** | |
34 | * Lowercases a UTF-16 string and optionally records edits. | |
35 | * Casing is locale-dependent and context-sensitive. | |
36 | * The result may be longer or shorter than the original. | |
37 | * The source string and the destination buffer must not overlap. | |
38 | * | |
39 | * @param locale The locale ID. ("" = root locale, NULL = default locale.) | |
40 | * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT. | |
41 | * @param src The original string. | |
42 | * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. | |
43 | * @param dest A buffer for the result string. The result will be NUL-terminated if | |
44 | * the buffer is large enough. | |
45 | * The contents is undefined in case of failure. | |
46 | * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then | |
47 | * dest may be NULL and the function will only return the length of the result | |
48 | * without writing any of the result string. | |
49 | * @param edits Records edits for index mapping, working with styled text, | |
50 | * and getting only changes (if any). | |
51 | * The Edits contents is undefined if any error occurs. | |
52 | * This function calls edits->reset() first. edits can be NULL. | |
53 | * @param errorCode Reference to an in/out error code value | |
54 | * which must not indicate a failure before the function call. | |
55 | * @return The length of the result string, if successful. | |
56 | * When the result would be longer than destCapacity, | |
57 | * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. | |
58 | * | |
59 | * @see u_strToLower | |
60 | * @draft ICU 59 | |
61 | */ | |
62 | static int32_t toLower( | |
63 | const char *locale, uint32_t options, | |
64 | const char16_t *src, int32_t srcLength, | |
65 | char16_t *dest, int32_t destCapacity, Edits *edits, | |
66 | UErrorCode &errorCode); | |
67 | ||
68 | /** | |
69 | * Uppercases a UTF-16 string and optionally records edits. | |
70 | * Casing is locale-dependent and context-sensitive. | |
71 | * The result may be longer or shorter than the original. | |
72 | * The source string and the destination buffer must not overlap. | |
73 | * | |
74 | * @param locale The locale ID. ("" = root locale, NULL = default locale.) | |
75 | * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT. | |
76 | * @param src The original string. | |
77 | * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. | |
78 | * @param dest A buffer for the result string. The result will be NUL-terminated if | |
79 | * the buffer is large enough. | |
80 | * The contents is undefined in case of failure. | |
81 | * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then | |
82 | * dest may be NULL and the function will only return the length of the result | |
83 | * without writing any of the result string. | |
84 | * @param edits Records edits for index mapping, working with styled text, | |
85 | * and getting only changes (if any). | |
86 | * The Edits contents is undefined if any error occurs. | |
87 | * This function calls edits->reset() first. edits can be NULL. | |
88 | * @param errorCode Reference to an in/out error code value | |
89 | * which must not indicate a failure before the function call. | |
90 | * @return The length of the result string, if successful. | |
91 | * When the result would be longer than destCapacity, | |
92 | * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. | |
93 | * | |
94 | * @see u_strToUpper | |
95 | * @draft ICU 59 | |
96 | */ | |
97 | static int32_t toUpper( | |
98 | const char *locale, uint32_t options, | |
99 | const char16_t *src, int32_t srcLength, | |
100 | char16_t *dest, int32_t destCapacity, Edits *edits, | |
101 | UErrorCode &errorCode); | |
102 | ||
103 | #if !UCONFIG_NO_BREAK_ITERATION | |
104 | ||
105 | /** | |
106 | * Titlecases a UTF-16 string and optionally records edits. | |
107 | * Casing is locale-dependent and context-sensitive. | |
108 | * The result may be longer or shorter than the original. | |
109 | * The source string and the destination buffer must not overlap. | |
110 | * | |
111 | * Titlecasing uses a break iterator to find the first characters of words | |
112 | * that are to be titlecased. It titlecases those characters and lowercases | |
113 | * all others. (This can be modified with options bits.) | |
114 | * | |
115 | * @param locale The locale ID. ("" = root locale, NULL = default locale.) | |
116 | * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT, | |
117 | * U_TITLECASE_NO_LOWERCASE, U_TITLECASE_NO_BREAK_ADJUSTMENT. | |
118 | * @param iter A break iterator to find the first characters of words that are to be titlecased. | |
119 | * It is set to the source string (setText()) | |
120 | * and used one or more times for iteration (first() and next()). | |
121 | * If NULL, then a word break iterator for the locale is used | |
122 | * (or something equivalent). | |
123 | * @param src The original string. | |
124 | * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. | |
125 | * @param dest A buffer for the result string. The result will be NUL-terminated if | |
126 | * the buffer is large enough. | |
127 | * The contents is undefined in case of failure. | |
128 | * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then | |
129 | * dest may be NULL and the function will only return the length of the result | |
130 | * without writing any of the result string. | |
131 | * @param edits Records edits for index mapping, working with styled text, | |
132 | * and getting only changes (if any). | |
133 | * The Edits contents is undefined if any error occurs. | |
134 | * This function calls edits->reset() first. edits can be NULL. | |
135 | * @param errorCode Reference to an in/out error code value | |
136 | * which must not indicate a failure before the function call. | |
137 | * @return The length of the result string, if successful. | |
138 | * When the result would be longer than destCapacity, | |
139 | * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. | |
140 | * | |
141 | * @see u_strToTitle | |
142 | * @see ucasemap_toTitle | |
143 | * @draft ICU 59 | |
144 | */ | |
145 | static int32_t toTitle( | |
146 | const char *locale, uint32_t options, BreakIterator *iter, | |
147 | const char16_t *src, int32_t srcLength, | |
148 | char16_t *dest, int32_t destCapacity, Edits *edits, | |
149 | UErrorCode &errorCode); | |
150 | ||
151 | #endif // UCONFIG_NO_BREAK_ITERATION | |
152 | ||
153 | /** | |
154 | * Case-folds a UTF-16 string and optionally records edits. | |
155 | * | |
156 | * Case folding is locale-independent and not context-sensitive, | |
157 | * but there is an option for whether to include or exclude mappings for dotted I | |
158 | * and dotless i that are marked with 'T' in CaseFolding.txt. | |
159 | * | |
160 | * The result may be longer or shorter than the original. | |
161 | * The source string and the destination buffer must not overlap. | |
162 | * | |
163 | * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT, | |
164 | * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I. | |
165 | * @param src The original string. | |
166 | * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. | |
167 | * @param dest A buffer for the result string. The result will be NUL-terminated if | |
168 | * the buffer is large enough. | |
169 | * The contents is undefined in case of failure. | |
170 | * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then | |
171 | * dest may be NULL and the function will only return the length of the result | |
172 | * without writing any of the result string. | |
173 | * @param edits Records edits for index mapping, working with styled text, | |
174 | * and getting only changes (if any). | |
175 | * The Edits contents is undefined if any error occurs. | |
176 | * This function calls edits->reset() first. edits can be NULL. | |
177 | * @param errorCode Reference to an in/out error code value | |
178 | * which must not indicate a failure before the function call. | |
179 | * @return The length of the result string, if successful. | |
180 | * When the result would be longer than destCapacity, | |
181 | * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. | |
182 | * | |
183 | * @see u_strFoldCase | |
184 | * @draft ICU 59 | |
185 | */ | |
186 | static int32_t fold( | |
187 | uint32_t options, | |
188 | const char16_t *src, int32_t srcLength, | |
189 | char16_t *dest, int32_t destCapacity, Edits *edits, | |
190 | UErrorCode &errorCode); | |
191 | ||
192 | /** | |
193 | * Lowercases a UTF-8 string and optionally records edits. | |
194 | * Casing is locale-dependent and context-sensitive. | |
195 | * The result may be longer or shorter than the original. | |
196 | * The source string and the destination buffer must not overlap. | |
197 | * | |
198 | * @param locale The locale ID. ("" = root locale, NULL = default locale.) | |
199 | * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT. | |
200 | * @param src The original string. | |
201 | * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. | |
202 | * @param dest A buffer for the result string. The result will be NUL-terminated if | |
203 | * the buffer is large enough. | |
204 | * The contents is undefined in case of failure. | |
205 | * @param destCapacity The size of the buffer (number of bytes). If it is 0, then | |
206 | * dest may be NULL and the function will only return the length of the result | |
207 | * without writing any of the result string. | |
208 | * @param edits Records edits for index mapping, working with styled text, | |
209 | * and getting only changes (if any). | |
210 | * The Edits contents is undefined if any error occurs. | |
211 | * This function calls edits->reset() first. edits can be NULL. | |
212 | * @param errorCode Reference to an in/out error code value | |
213 | * which must not indicate a failure before the function call. | |
214 | * @return The length of the result string, if successful. | |
215 | * When the result would be longer than destCapacity, | |
216 | * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. | |
217 | * | |
218 | * @see ucasemap_utf8ToLower | |
219 | * @draft ICU 59 | |
220 | */ | |
221 | static int32_t utf8ToLower( | |
222 | const char *locale, uint32_t options, | |
223 | const char *src, int32_t srcLength, | |
224 | char *dest, int32_t destCapacity, Edits *edits, | |
225 | UErrorCode &errorCode); | |
226 | ||
227 | /** | |
228 | * Uppercases a UTF-8 string and optionally records edits. | |
229 | * Casing is locale-dependent and context-sensitive. | |
230 | * The result may be longer or shorter than the original. | |
231 | * The source string and the destination buffer must not overlap. | |
232 | * | |
233 | * @param locale The locale ID. ("" = root locale, NULL = default locale.) | |
234 | * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT. | |
235 | * @param src The original string. | |
236 | * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. | |
237 | * @param dest A buffer for the result string. The result will be NUL-terminated if | |
238 | * the buffer is large enough. | |
239 | * The contents is undefined in case of failure. | |
240 | * @param destCapacity The size of the buffer (number of bytes). If it is 0, then | |
241 | * dest may be NULL and the function will only return the length of the result | |
242 | * without writing any of the result string. | |
243 | * @param edits Records edits for index mapping, working with styled text, | |
244 | * and getting only changes (if any). | |
245 | * The Edits contents is undefined if any error occurs. | |
246 | * This function calls edits->reset() first. edits can be NULL. | |
247 | * @param errorCode Reference to an in/out error code value | |
248 | * which must not indicate a failure before the function call. | |
249 | * @return The length of the result string, if successful. | |
250 | * When the result would be longer than destCapacity, | |
251 | * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. | |
252 | * | |
253 | * @see ucasemap_utf8ToUpper | |
254 | * @draft ICU 59 | |
255 | */ | |
256 | static int32_t utf8ToUpper( | |
257 | const char *locale, uint32_t options, | |
258 | const char *src, int32_t srcLength, | |
259 | char *dest, int32_t destCapacity, Edits *edits, | |
260 | UErrorCode &errorCode); | |
261 | ||
262 | #if !UCONFIG_NO_BREAK_ITERATION | |
263 | ||
264 | /** | |
265 | * Titlecases a UTF-8 string and optionally records edits. | |
266 | * Casing is locale-dependent and context-sensitive. | |
267 | * The result may be longer or shorter than the original. | |
268 | * The source string and the destination buffer must not overlap. | |
269 | * | |
270 | * Titlecasing uses a break iterator to find the first characters of words | |
271 | * that are to be titlecased. It titlecases those characters and lowercases | |
272 | * all others. (This can be modified with options bits.) | |
273 | * | |
274 | * @param locale The locale ID. ("" = root locale, NULL = default locale.) | |
275 | * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT, | |
276 | * U_TITLECASE_NO_LOWERCASE, U_TITLECASE_NO_BREAK_ADJUSTMENT. | |
277 | * @param iter A break iterator to find the first characters of words that are to be titlecased. | |
278 | * It is set to the source string (setText()) | |
279 | * and used one or more times for iteration (first() and next()). | |
280 | * If NULL, then a word break iterator for the locale is used | |
281 | * (or something equivalent). | |
282 | * @param src The original string. | |
283 | * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. | |
284 | * @param dest A buffer for the result string. The result will be NUL-terminated if | |
285 | * the buffer is large enough. | |
286 | * The contents is undefined in case of failure. | |
287 | * @param destCapacity The size of the buffer (number of bytes). If it is 0, then | |
288 | * dest may be NULL and the function will only return the length of the result | |
289 | * without writing any of the result string. | |
290 | * @param edits Records edits for index mapping, working with styled text, | |
291 | * and getting only changes (if any). | |
292 | * The Edits contents is undefined if any error occurs. | |
293 | * This function calls edits->reset() first. edits can be NULL. | |
294 | * @param errorCode Reference to an in/out error code value | |
295 | * which must not indicate a failure before the function call. | |
296 | * @return The length of the result string, if successful. | |
297 | * When the result would be longer than destCapacity, | |
298 | * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. | |
299 | * | |
300 | * @see ucasemap_utf8ToTitle | |
301 | * @draft ICU 59 | |
302 | */ | |
303 | static int32_t utf8ToTitle( | |
304 | const char *locale, uint32_t options, BreakIterator *iter, | |
305 | const char *src, int32_t srcLength, | |
306 | char *dest, int32_t destCapacity, Edits *edits, | |
307 | UErrorCode &errorCode); | |
308 | ||
309 | #endif // UCONFIG_NO_BREAK_ITERATION | |
310 | ||
311 | /** | |
312 | * Case-folds a UTF-8 string and optionally records edits. | |
313 | * | |
314 | * Case folding is locale-independent and not context-sensitive, | |
315 | * but there is an option for whether to include or exclude mappings for dotted I | |
316 | * and dotless i that are marked with 'T' in CaseFolding.txt. | |
317 | * | |
318 | * The result may be longer or shorter than the original. | |
319 | * The source string and the destination buffer must not overlap. | |
320 | * | |
321 | * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT, | |
322 | * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I. | |
323 | * @param src The original string. | |
324 | * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. | |
325 | * @param dest A buffer for the result string. The result will be NUL-terminated if | |
326 | * the buffer is large enough. | |
327 | * The contents is undefined in case of failure. | |
328 | * @param destCapacity The size of the buffer (number of bytes). If it is 0, then | |
329 | * dest may be NULL and the function will only return the length of the result | |
330 | * without writing any of the result string. | |
331 | * @param edits Records edits for index mapping, working with styled text, | |
332 | * and getting only changes (if any). | |
333 | * The Edits contents is undefined if any error occurs. | |
334 | * This function calls edits->reset() first. edits can be NULL. | |
335 | * @param errorCode Reference to an in/out error code value | |
336 | * which must not indicate a failure before the function call. | |
337 | * @return The length of the result string, if successful. | |
338 | * When the result would be longer than destCapacity, | |
339 | * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. | |
340 | * | |
341 | * @see ucasemap_utf8FoldCase | |
342 | * @draft ICU 59 | |
343 | */ | |
344 | static int32_t utf8Fold( | |
345 | uint32_t options, | |
346 | const char *src, int32_t srcLength, | |
347 | char *dest, int32_t destCapacity, Edits *edits, | |
348 | UErrorCode &errorCode); | |
349 | ||
350 | private: | |
351 | CaseMap() = delete; | |
352 | CaseMap(const CaseMap &other) = delete; | |
353 | CaseMap &operator=(const CaseMap &other) = delete; | |
354 | }; | |
355 | ||
356 | #endif // U_HIDE_DRAFT_API | |
357 | ||
358 | U_NAMESPACE_END | |
359 | #endif // U_SHOW_CPLUSPLUS_API | |
360 | ||
361 | #endif // __CASEMAP_H__ |