]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/unicode/stringoptions.h
ICU-62141.0.1.tar.gz
[apple/icu.git] / icuSources / common / unicode / stringoptions.h
CommitLineData
0f5d89e8
A
1// © 2017 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4// stringoptions.h
5// created: 2017jun08 Markus W. Scherer
6
7#ifndef __STRINGOPTIONS_H__
8#define __STRINGOPTIONS_H__
9
10#include "unicode/utypes.h"
11
12/**
13 * \file
14 * \brief C API: Bit set option bit constants for various string and character processing functions.
15 */
16
17/**
18 * Option value for case folding: Use default mappings defined in CaseFolding.txt.
19 *
20 * @stable ICU 2.0
21 */
22#define U_FOLD_CASE_DEFAULT 0
23
24/**
25 * Option value for case folding:
26 *
27 * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
28 * and dotless i appropriately for Turkic languages (tr, az).
29 *
30 * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
31 * are to be included for default mappings and
32 * excluded for the Turkic-specific mappings.
33 *
34 * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
35 * are to be excluded for default mappings and
36 * included for the Turkic-specific mappings.
37 *
38 * @stable ICU 2.0
39 */
40#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
41
42#ifndef U_HIDE_DRAFT_API
43
44/**
45 * Titlecase the string as a whole rather than each word.
46 * (Titlecase only the character at index 0, possibly adjusted.)
47 * Option bits value for titlecasing APIs that take an options bit set.
48 *
49 * It is an error to specify multiple titlecasing iterator options together,
50 * including both an options bit and an explicit BreakIterator.
51 *
52 * @see U_TITLECASE_ADJUST_TO_CASED
53 * @draft ICU 60
54 */
55#define U_TITLECASE_WHOLE_STRING 0x20
56
57/**
58 * Titlecase sentences rather than words.
59 * (Titlecase only the first character of each sentence, possibly adjusted.)
60 * Option bits value for titlecasing APIs that take an options bit set.
61 *
62 * It is an error to specify multiple titlecasing iterator options together,
63 * including both an options bit and an explicit BreakIterator.
64 *
65 * @see U_TITLECASE_ADJUST_TO_CASED
66 * @draft ICU 60
67 */
68#define U_TITLECASE_SENTENCES 0x40
69
70#endif // U_HIDE_DRAFT_API
71
72/**
73 * Do not lowercase non-initial parts of words when titlecasing.
74 * Option bit for titlecasing APIs that take an options bit set.
75 *
76 * By default, titlecasing will titlecase the character at each
77 * (possibly adjusted) BreakIterator index and
78 * lowercase all other characters up to the next iterator index.
79 * With this option, the other characters will not be modified.
80 *
81 * @see U_TITLECASE_ADJUST_TO_CASED
82 * @see UnicodeString::toTitle
83 * @see CaseMap::toTitle
84 * @see ucasemap_setOptions
85 * @see ucasemap_toTitle
86 * @see ucasemap_utf8ToTitle
87 * @stable ICU 3.8
88 */
89#define U_TITLECASE_NO_LOWERCASE 0x100
90
91/**
92 * Do not adjust the titlecasing BreakIterator indexes;
93 * titlecase exactly the characters at breaks from the iterator.
94 * Option bit for titlecasing APIs that take an options bit set.
95 *
96 * By default, titlecasing will take each break iterator index,
97 * adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED),
98 * and titlecase that one.
99 *
100 * Other characters are lowercased.
101 *
102 * It is an error to specify multiple titlecasing adjustment options together.
103 *
104 * @see U_TITLECASE_ADJUST_TO_CASED
105 * @see U_TITLECASE_NO_LOWERCASE
106 * @see UnicodeString::toTitle
107 * @see CaseMap::toTitle
108 * @see ucasemap_setOptions
109 * @see ucasemap_toTitle
110 * @see ucasemap_utf8ToTitle
111 * @stable ICU 3.8
112 */
113#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
114
115#ifndef U_HIDE_DRAFT_API
116
117/**
118 * Adjust each titlecasing BreakIterator index to the next cased character.
119 * (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).)
120 * Option bit for titlecasing APIs that take an options bit set.
121 *
122 * This used to be the default index adjustment in ICU.
123 * Since ICU 60, the default index adjustment is to the next character that is
124 * a letter, number, symbol, or private use code point.
125 * (Uncased modifier letters are skipped.)
126 * The difference in behavior is small for word titlecasing,
127 * but the new adjustment is much better for whole-string and sentence titlecasing:
128 * It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»".
129 *
130 * It is an error to specify multiple titlecasing adjustment options together.
131 *
132 * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
133 * @draft ICU 60
134 */
135#define U_TITLECASE_ADJUST_TO_CASED 0x400
136
137/**
138 * Option for string transformation functions to not first reset the Edits object.
139 * Used for example in some case-mapping and normalization functions.
140 *
141 * @see CaseMap
142 * @see Edits
143 * @see Normalizer2
144 * @draft ICU 60
145 */
146#define U_EDITS_NO_RESET 0x2000
147
148/**
149 * Omit unchanged text when recording how source substrings
150 * relate to changed and unchanged result substrings.
151 * Used for example in some case-mapping and normalization functions.
152 *
153 * @see CaseMap
154 * @see Edits
155 * @see Normalizer2
156 * @draft ICU 60
157 */
158#define U_OMIT_UNCHANGED_TEXT 0x4000
159
160#endif // U_HIDE_DRAFT_API
161
162/**
163 * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
164 * Compare strings in code point order instead of code unit order.
165 * @stable ICU 2.2
166 */
167#define U_COMPARE_CODE_POINT_ORDER 0x8000
168
169/**
170 * Option bit for unorm_compare:
171 * Perform case-insensitive comparison.
172 * @stable ICU 2.2
173 */
174#define U_COMPARE_IGNORE_CASE 0x10000
175
176/**
177 * Option bit for unorm_compare:
178 * Both input strings are assumed to fulfill FCD conditions.
179 * @stable ICU 2.2
180 */
181#define UNORM_INPUT_IS_FCD 0x20000
182
183// Related definitions elsewhere.
184// Options that are not meaningful in the same functions
185// can share the same bits.
186//
187// Public:
188// unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
189//
190// Internal: (may change or be removed)
191// ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff
192// ucase.h #define _FOLD_CASE_OPTIONS_MASK 7
193// ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0
194// ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600
195// ustr_imp.h #define _STRNCMP_STYLE 0x1000
196// unormcmp.cpp #define _COMPARE_EQUIV 0x80000
197
198#endif // __STRINGOPTIONS_H__