]>
Commit | Line | Data |
---|---|---|
0f5d89e8 A |
1 | // © 2017 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | ||
4 | // stringoptions.h | |
5 | // created: 2017jun08 Markus W. Scherer | |
6 | ||
7 | #ifndef __STRINGOPTIONS_H__ | |
8 | #define __STRINGOPTIONS_H__ | |
9 | ||
10 | #include "unicode/utypes.h" | |
11 | ||
12 | /** | |
13 | * \file | |
14 | * \brief C API: Bit set option bit constants for various string and character processing functions. | |
15 | */ | |
16 | ||
17 | /** | |
18 | * Option value for case folding: Use default mappings defined in CaseFolding.txt. | |
19 | * | |
20 | * @stable ICU 2.0 | |
21 | */ | |
22 | #define U_FOLD_CASE_DEFAULT 0 | |
23 | ||
24 | /** | |
25 | * Option value for case folding: | |
26 | * | |
27 | * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I | |
28 | * and dotless i appropriately for Turkic languages (tr, az). | |
29 | * | |
30 | * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that | |
31 | * are to be included for default mappings and | |
32 | * excluded for the Turkic-specific mappings. | |
33 | * | |
34 | * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that | |
35 | * are to be excluded for default mappings and | |
36 | * included for the Turkic-specific mappings. | |
37 | * | |
38 | * @stable ICU 2.0 | |
39 | */ | |
40 | #define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1 | |
41 | ||
42 | #ifndef U_HIDE_DRAFT_API | |
43 | ||
44 | /** | |
45 | * Titlecase the string as a whole rather than each word. | |
46 | * (Titlecase only the character at index 0, possibly adjusted.) | |
47 | * Option bits value for titlecasing APIs that take an options bit set. | |
48 | * | |
49 | * It is an error to specify multiple titlecasing iterator options together, | |
50 | * including both an options bit and an explicit BreakIterator. | |
51 | * | |
52 | * @see U_TITLECASE_ADJUST_TO_CASED | |
53 | * @draft ICU 60 | |
54 | */ | |
55 | #define U_TITLECASE_WHOLE_STRING 0x20 | |
56 | ||
57 | /** | |
58 | * Titlecase sentences rather than words. | |
59 | * (Titlecase only the first character of each sentence, possibly adjusted.) | |
60 | * Option bits value for titlecasing APIs that take an options bit set. | |
61 | * | |
62 | * It is an error to specify multiple titlecasing iterator options together, | |
63 | * including both an options bit and an explicit BreakIterator. | |
64 | * | |
65 | * @see U_TITLECASE_ADJUST_TO_CASED | |
66 | * @draft ICU 60 | |
67 | */ | |
68 | #define U_TITLECASE_SENTENCES 0x40 | |
69 | ||
70 | #endif // U_HIDE_DRAFT_API | |
71 | ||
72 | /** | |
73 | * Do not lowercase non-initial parts of words when titlecasing. | |
74 | * Option bit for titlecasing APIs that take an options bit set. | |
75 | * | |
76 | * By default, titlecasing will titlecase the character at each | |
77 | * (possibly adjusted) BreakIterator index and | |
78 | * lowercase all other characters up to the next iterator index. | |
79 | * With this option, the other characters will not be modified. | |
80 | * | |
81 | * @see U_TITLECASE_ADJUST_TO_CASED | |
82 | * @see UnicodeString::toTitle | |
83 | * @see CaseMap::toTitle | |
84 | * @see ucasemap_setOptions | |
85 | * @see ucasemap_toTitle | |
86 | * @see ucasemap_utf8ToTitle | |
87 | * @stable ICU 3.8 | |
88 | */ | |
89 | #define U_TITLECASE_NO_LOWERCASE 0x100 | |
90 | ||
91 | /** | |
92 | * Do not adjust the titlecasing BreakIterator indexes; | |
93 | * titlecase exactly the characters at breaks from the iterator. | |
94 | * Option bit for titlecasing APIs that take an options bit set. | |
95 | * | |
96 | * By default, titlecasing will take each break iterator index, | |
97 | * adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED), | |
98 | * and titlecase that one. | |
99 | * | |
100 | * Other characters are lowercased. | |
101 | * | |
102 | * It is an error to specify multiple titlecasing adjustment options together. | |
103 | * | |
104 | * @see U_TITLECASE_ADJUST_TO_CASED | |
105 | * @see U_TITLECASE_NO_LOWERCASE | |
106 | * @see UnicodeString::toTitle | |
107 | * @see CaseMap::toTitle | |
108 | * @see ucasemap_setOptions | |
109 | * @see ucasemap_toTitle | |
110 | * @see ucasemap_utf8ToTitle | |
111 | * @stable ICU 3.8 | |
112 | */ | |
113 | #define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200 | |
114 | ||
115 | #ifndef U_HIDE_DRAFT_API | |
116 | ||
117 | /** | |
118 | * Adjust each titlecasing BreakIterator index to the next cased character. | |
119 | * (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).) | |
120 | * Option bit for titlecasing APIs that take an options bit set. | |
121 | * | |
122 | * This used to be the default index adjustment in ICU. | |
123 | * Since ICU 60, the default index adjustment is to the next character that is | |
124 | * a letter, number, symbol, or private use code point. | |
125 | * (Uncased modifier letters are skipped.) | |
126 | * The difference in behavior is small for word titlecasing, | |
127 | * but the new adjustment is much better for whole-string and sentence titlecasing: | |
128 | * It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»". | |
129 | * | |
130 | * It is an error to specify multiple titlecasing adjustment options together. | |
131 | * | |
132 | * @see U_TITLECASE_NO_BREAK_ADJUSTMENT | |
133 | * @draft ICU 60 | |
134 | */ | |
135 | #define U_TITLECASE_ADJUST_TO_CASED 0x400 | |
136 | ||
137 | /** | |
138 | * Option for string transformation functions to not first reset the Edits object. | |
139 | * Used for example in some case-mapping and normalization functions. | |
140 | * | |
141 | * @see CaseMap | |
142 | * @see Edits | |
143 | * @see Normalizer2 | |
144 | * @draft ICU 60 | |
145 | */ | |
146 | #define U_EDITS_NO_RESET 0x2000 | |
147 | ||
148 | /** | |
149 | * Omit unchanged text when recording how source substrings | |
150 | * relate to changed and unchanged result substrings. | |
151 | * Used for example in some case-mapping and normalization functions. | |
152 | * | |
153 | * @see CaseMap | |
154 | * @see Edits | |
155 | * @see Normalizer2 | |
156 | * @draft ICU 60 | |
157 | */ | |
158 | #define U_OMIT_UNCHANGED_TEXT 0x4000 | |
159 | ||
160 | #endif // U_HIDE_DRAFT_API | |
161 | ||
162 | /** | |
163 | * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: | |
164 | * Compare strings in code point order instead of code unit order. | |
165 | * @stable ICU 2.2 | |
166 | */ | |
167 | #define U_COMPARE_CODE_POINT_ORDER 0x8000 | |
168 | ||
169 | /** | |
170 | * Option bit for unorm_compare: | |
171 | * Perform case-insensitive comparison. | |
172 | * @stable ICU 2.2 | |
173 | */ | |
174 | #define U_COMPARE_IGNORE_CASE 0x10000 | |
175 | ||
176 | /** | |
177 | * Option bit for unorm_compare: | |
178 | * Both input strings are assumed to fulfill FCD conditions. | |
179 | * @stable ICU 2.2 | |
180 | */ | |
181 | #define UNORM_INPUT_IS_FCD 0x20000 | |
182 | ||
183 | // Related definitions elsewhere. | |
184 | // Options that are not meaningful in the same functions | |
185 | // can share the same bits. | |
186 | // | |
187 | // Public: | |
188 | // unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20 | |
189 | // | |
190 | // Internal: (may change or be removed) | |
191 | // ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff | |
192 | // ucase.h #define _FOLD_CASE_OPTIONS_MASK 7 | |
193 | // ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0 | |
194 | // ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600 | |
195 | // ustr_imp.h #define _STRNCMP_STYLE 0x1000 | |
196 | // unormcmp.cpp #define _COMPARE_EQUIV 0x80000 | |
197 | ||
198 | #endif // __STRINGOPTIONS_H__ |