]>
git.saurik.com Git - apple/icu.git/blob - icuSources/common/unicode/stringoptions.h
1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
5 // created: 2017jun08 Markus W. Scherer
7 #ifndef __STRINGOPTIONS_H__
8 #define __STRINGOPTIONS_H__
10 #include "unicode/utypes.h"
14 * \brief C API: Bit set option bit constants for various string and character processing functions.
18 * Option value for case folding: Use default mappings defined in CaseFolding.txt.
22 #define U_FOLD_CASE_DEFAULT 0
25 * Option value for case folding:
27 * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
28 * and dotless i appropriately for Turkic languages (tr, az).
30 * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
31 * are to be included for default mappings and
32 * excluded for the Turkic-specific mappings.
34 * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
35 * are to be excluded for default mappings and
36 * included for the Turkic-specific mappings.
40 #define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
43 * Titlecase the string as a whole rather than each word.
44 * (Titlecase only the character at index 0, possibly adjusted.)
45 * Option bits value for titlecasing APIs that take an options bit set.
47 * It is an error to specify multiple titlecasing iterator options together,
48 * including both an options bit and an explicit BreakIterator.
50 * @see U_TITLECASE_ADJUST_TO_CASED
53 #define U_TITLECASE_WHOLE_STRING 0x20
56 * Titlecase sentences rather than words.
57 * (Titlecase only the first character of each sentence, possibly adjusted.)
58 * Option bits value for titlecasing APIs that take an options bit set.
60 * It is an error to specify multiple titlecasing iterator options together,
61 * including both an options bit and an explicit BreakIterator.
63 * @see U_TITLECASE_ADJUST_TO_CASED
66 #define U_TITLECASE_SENTENCES 0x40
69 * Do not lowercase non-initial parts of words when titlecasing.
70 * Option bit for titlecasing APIs that take an options bit set.
72 * By default, titlecasing will titlecase the character at each
73 * (possibly adjusted) BreakIterator index and
74 * lowercase all other characters up to the next iterator index.
75 * With this option, the other characters will not be modified.
77 * @see U_TITLECASE_ADJUST_TO_CASED
78 * @see UnicodeString::toTitle
79 * @see CaseMap::toTitle
80 * @see ucasemap_setOptions
81 * @see ucasemap_toTitle
82 * @see ucasemap_utf8ToTitle
85 #define U_TITLECASE_NO_LOWERCASE 0x100
88 * Do not adjust the titlecasing BreakIterator indexes;
89 * titlecase exactly the characters at breaks from the iterator.
90 * Option bit for titlecasing APIs that take an options bit set.
92 * By default, titlecasing will take each break iterator index,
93 * adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED),
94 * and titlecase that one.
96 * Other characters are lowercased.
98 * It is an error to specify multiple titlecasing adjustment options together.
100 * @see U_TITLECASE_ADJUST_TO_CASED
101 * @see U_TITLECASE_NO_LOWERCASE
102 * @see UnicodeString::toTitle
103 * @see CaseMap::toTitle
104 * @see ucasemap_setOptions
105 * @see ucasemap_toTitle
106 * @see ucasemap_utf8ToTitle
109 #define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
112 * Adjust each titlecasing BreakIterator index to the next cased character.
113 * (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).)
114 * Option bit for titlecasing APIs that take an options bit set.
116 * This used to be the default index adjustment in ICU.
117 * Since ICU 60, the default index adjustment is to the next character that is
118 * a letter, number, symbol, or private use code point.
119 * (Uncased modifier letters are skipped.)
120 * The difference in behavior is small for word titlecasing,
121 * but the new adjustment is much better for whole-string and sentence titlecasing:
122 * It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»".
124 * It is an error to specify multiple titlecasing adjustment options together.
126 * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
129 #define U_TITLECASE_ADJUST_TO_CASED 0x400
132 * Option for string transformation functions to not first reset the Edits object.
133 * Used for example in some case-mapping and normalization functions.
140 #define U_EDITS_NO_RESET 0x2000
143 * Omit unchanged text when recording how source substrings
144 * relate to changed and unchanged result substrings.
145 * Used for example in some case-mapping and normalization functions.
152 #define U_OMIT_UNCHANGED_TEXT 0x4000
155 * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
156 * Compare strings in code point order instead of code unit order.
159 #define U_COMPARE_CODE_POINT_ORDER 0x8000
162 * Option bit for unorm_compare:
163 * Perform case-insensitive comparison.
166 #define U_COMPARE_IGNORE_CASE 0x10000
169 * Option bit for unorm_compare:
170 * Both input strings are assumed to fulfill FCD conditions.
173 #define UNORM_INPUT_IS_FCD 0x20000
175 // Related definitions elsewhere.
176 // Options that are not meaningful in the same functions
177 // can share the same bits.
180 // unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
182 // Internal: (may change or be removed)
183 // ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff
184 // ucase.h #define _FOLD_CASE_OPTIONS_MASK 7
185 // ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0
186 // ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600
187 // ustr_imp.h #define _STRNCMP_STYLE 0x1000
188 // unormcmp.cpp #define _COMPARE_EQUIV 0x80000
190 #endif // __STRINGOPTIONS_H__