[apple/icu.git] / icuSources / common / unicode / stringoptions.h

// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

// stringoptions.h
// created: 2017jun08 Markus W. Scherer

#ifndef __STRINGOPTIONS_H__
#define __STRINGOPTIONS_H__

#include "unicode/utypes.h"

/**
 * \file
 * \brief C API: Bit set option bit constants for various string and character processing functions.
 */

/**
 * Option value for case folding: Use default mappings defined in CaseFolding.txt.
 *
 * @stable ICU 2.0
 */
#define U_FOLD_CASE_DEFAULT 0

/**
 * Option value for case folding:
 *
 * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
 * and dotless i appropriately for Turkic languages (tr, az).
 *
 * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
 * are to be included for default mappings and
 * excluded for the Turkic-specific mappings.
 *
 * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
 * are to be excluded for default mappings and
 * included for the Turkic-specific mappings.
 *
 * @stable ICU 2.0
 */
#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1

#ifndef U_HIDE_DRAFT_API

/**
 * Titlecase the string as a whole rather than each word.
 * (Titlecase only the character at index 0, possibly adjusted.)
 * Option bits value for titlecasing APIs that take an options bit set.
 *
 * It is an error to specify multiple titlecasing iterator options together,
 * including both an options bit and an explicit BreakIterator.
 *
 * @see U_TITLECASE_ADJUST_TO_CASED
 * @draft ICU 60
 */
#define U_TITLECASE_WHOLE_STRING 0x20

/**
 * Titlecase sentences rather than words.
 * (Titlecase only the first character of each sentence, possibly adjusted.)
 * Option bits value for titlecasing APIs that take an options bit set.
 *
 * It is an error to specify multiple titlecasing iterator options together,
 * including both an options bit and an explicit BreakIterator.
 *
 * @see U_TITLECASE_ADJUST_TO_CASED
 * @draft ICU 60
 */
#define U_TITLECASE_SENTENCES 0x40

#endif  // U_HIDE_DRAFT_API

/**
 * Do not lowercase non-initial parts of words when titlecasing.
 * Option bit for titlecasing APIs that take an options bit set.
 *
 * By default, titlecasing will titlecase the character at each
 * (possibly adjusted) BreakIterator index and
 * lowercase all other characters up to the next iterator index.
 * With this option, the other characters will not be modified.
 *
 * @see U_TITLECASE_ADJUST_TO_CASED
 * @see UnicodeString::toTitle
 * @see CaseMap::toTitle
 * @see ucasemap_setOptions
 * @see ucasemap_toTitle
 * @see ucasemap_utf8ToTitle
 * @stable ICU 3.8
 */
#define U_TITLECASE_NO_LOWERCASE 0x100

/**
 * Do not adjust the titlecasing BreakIterator indexes;
 * titlecase exactly the characters at breaks from the iterator.
 * Option bit for titlecasing APIs that take an options bit set.
 *
 * By default, titlecasing will take each break iterator index,
 * adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED),
 * and titlecase that one.
 *
 * Other characters are lowercased.
 *
 * It is an error to specify multiple titlecasing adjustment options together.
 *
 * @see U_TITLECASE_ADJUST_TO_CASED
 * @see U_TITLECASE_NO_LOWERCASE
 * @see UnicodeString::toTitle
 * @see CaseMap::toTitle
 * @see ucasemap_setOptions
 * @see ucasemap_toTitle
 * @see ucasemap_utf8ToTitle
 * @stable ICU 3.8
 */
#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200

#ifndef U_HIDE_DRAFT_API

/**
 * Adjust each titlecasing BreakIterator index to the next cased character.
 * (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).)
 * Option bit for titlecasing APIs that take an options bit set.
 *
 * This used to be the default index adjustment in ICU.
 * Since ICU 60, the default index adjustment is to the next character that is
 * a letter, number, symbol, or private use code point.
 * (Uncased modifier letters are skipped.)
 * The difference in behavior is small for word titlecasing,
 * but the new adjustment is much better for whole-string and sentence titlecasing:
 * It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»".
 *
 * It is an error to specify multiple titlecasing adjustment options together.
 *
 * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
 * @draft ICU 60
 */
#define U_TITLECASE_ADJUST_TO_CASED 0x400

/**
 * Option for string transformation functions to not first reset the Edits object.
 * Used for example in some case-mapping and normalization functions.
 *
 * @see CaseMap
 * @see Edits
 * @see Normalizer2
 * @draft ICU 60
 */
#define U_EDITS_NO_RESET 0x2000

/**
 * Omit unchanged text when recording how source substrings
 * relate to changed and unchanged result substrings.
 * Used for example in some case-mapping and normalization functions.
 *
 * @see CaseMap
 * @see Edits
 * @see Normalizer2
 * @draft ICU 60
 */
#define U_OMIT_UNCHANGED_TEXT 0x4000

#endif  // U_HIDE_DRAFT_API

/**
 * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
 * Compare strings in code point order instead of code unit order.
 * @stable ICU 2.2
 */
#define U_COMPARE_CODE_POINT_ORDER  0x8000

/**
 * Option bit for unorm_compare:
 * Perform case-insensitive comparison.
 * @stable ICU 2.2
 */
#define U_COMPARE_IGNORE_CASE       0x10000

/**
 * Option bit for unorm_compare:
 * Both input strings are assumed to fulfill FCD conditions.
 * @stable ICU 2.2
 */
#define UNORM_INPUT_IS_FCD          0x20000

// Related definitions elsewhere.
// Options that are not meaningful in the same functions
// can share the same bits.
//
// Public:
// unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
//
// Internal: (may change or be removed)
// ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff
// ucase.h #define _FOLD_CASE_OPTIONS_MASK 7
// ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0
// ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600
// ustr_imp.h #define _STRNCMP_STYLE 0x1000
// unormcmp.cpp #define _COMPARE_EQUIV 0x80000

#endif  // __STRINGOPTIONS_H__
Commit	Line	Data
0f5d89e8 A	1	// © 2017 and later: Unicode, Inc. and others.
	2	// License & terms of use: http://www.unicode.org/copyright.html
	3
	4	// stringoptions.h
	5	// created: 2017jun08 Markus W. Scherer
	6
	7	#ifndef __STRINGOPTIONS_H__
	8	#define __STRINGOPTIONS_H__
	9
	10	#include "unicode/utypes.h"
	11
	12	/**
	13	* \file
	14	* \brief C API: Bit set option bit constants for various string and character processing functions.
	15	*/
	16
	17	/**
	18	* Option value for case folding: Use default mappings defined in CaseFolding.txt.
	19	*
	20	* @stable ICU 2.0
	21	*/
	22	#define U_FOLD_CASE_DEFAULT 0
	23
	24	/**
	25	* Option value for case folding:
	26	*
	27	* Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
	28	* and dotless i appropriately for Turkic languages (tr, az).
	29	*
	30	* Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
	31	* are to be included for default mappings and
	32	* excluded for the Turkic-specific mappings.
	33	*
	34	* Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
	35	* are to be excluded for default mappings and
	36	* included for the Turkic-specific mappings.
	37	*
	38	* @stable ICU 2.0
	39	*/
	40	#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
	41
	42	#ifndef U_HIDE_DRAFT_API
	43
	44	/**
	45	* Titlecase the string as a whole rather than each word.
	46	* (Titlecase only the character at index 0, possibly adjusted.)
	47	* Option bits value for titlecasing APIs that take an options bit set.
	48	*
	49	* It is an error to specify multiple titlecasing iterator options together,
	50	* including both an options bit and an explicit BreakIterator.
	51	*
	52	* @see U_TITLECASE_ADJUST_TO_CASED
	53	* @draft ICU 60
	54	*/
	55	#define U_TITLECASE_WHOLE_STRING 0x20
	56
	57	/**
	58	* Titlecase sentences rather than words.
	59	* (Titlecase only the first character of each sentence, possibly adjusted.)
	60	* Option bits value for titlecasing APIs that take an options bit set.
	61	*
	62	* It is an error to specify multiple titlecasing iterator options together,
	63	* including both an options bit and an explicit BreakIterator.
	64	*
65	* @see U_TITLECASE_ADJUST_TO_CASED
66	* @draft ICU 60
67	*/
68	#define U_TITLECASE_SENTENCES 0x40
69
70	#endif // U_HIDE_DRAFT_API
71
72	/**
73	* Do not lowercase non-initial parts of words when titlecasing.
74	* Option bit for titlecasing APIs that take an options bit set.
75	*
76	* By default, titlecasing will titlecase the character at each
77	* (possibly adjusted) BreakIterator index and
78	* lowercase all other characters up to the next iterator index.
79	* With this option, the other characters will not be modified.
80	*
81	* @see U_TITLECASE_ADJUST_TO_CASED
82	* @see UnicodeString::toTitle
83	* @see CaseMap::toTitle
84	* @see ucasemap_setOptions
85	* @see ucasemap_toTitle
86	* @see ucasemap_utf8ToTitle
87	* @stable ICU 3.8
88	*/
89	#define U_TITLECASE_NO_LOWERCASE 0x100
90
91	/**
92	* Do not adjust the titlecasing BreakIterator indexes;
93	* titlecase exactly the characters at breaks from the iterator.
94	* Option bit for titlecasing APIs that take an options bit set.
95	*
96	* By default, titlecasing will take each break iterator index,
97	* adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED),
98	* and titlecase that one.
99	*
100	* Other characters are lowercased.
101	*
102	* It is an error to specify multiple titlecasing adjustment options together.
103	*
104	* @see U_TITLECASE_ADJUST_TO_CASED
105	* @see U_TITLECASE_NO_LOWERCASE
106	* @see UnicodeString::toTitle
107	* @see CaseMap::toTitle
108	* @see ucasemap_setOptions
109	* @see ucasemap_toTitle
110	* @see ucasemap_utf8ToTitle
111	* @stable ICU 3.8
112	*/
113	#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
114
115	#ifndef U_HIDE_DRAFT_API
116
117	/**
118	* Adjust each titlecasing BreakIterator index to the next cased character.
119	* (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).)
120	* Option bit for titlecasing APIs that take an options bit set.
121	*
122	* This used to be the default index adjustment in ICU.
123	* Since ICU 60, the default index adjustment is to the next character that is
124	* a letter, number, symbol, or private use code point.
125	* (Uncased modifier letters are skipped.)
126	* The difference in behavior is small for word titlecasing,
127	* but the new adjustment is much better for whole-string and sentence titlecasing:
128	* It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»".
129	*
130	* It is an error to specify multiple titlecasing adjustment options together.
131	*
132	* @see U_TITLECASE_NO_BREAK_ADJUSTMENT
133	* @draft ICU 60
134	*/
135	#define U_TITLECASE_ADJUST_TO_CASED 0x400
136
137	/**
138	* Option for string transformation functions to not first reset the Edits object.
139	* Used for example in some case-mapping and normalization functions.
140	*
141	* @see CaseMap
142	* @see Edits
143	* @see Normalizer2
144	* @draft ICU 60
145	*/
146	#define U_EDITS_NO_RESET 0x2000
147
148	/**
149	* Omit unchanged text when recording how source substrings
150	* relate to changed and unchanged result substrings.
151	* Used for example in some case-mapping and normalization functions.
152	*
153	* @see CaseMap
154	* @see Edits
155	* @see Normalizer2
156	* @draft ICU 60
157	*/
158	#define U_OMIT_UNCHANGED_TEXT 0x4000
159
160	#endif // U_HIDE_DRAFT_API
161
162	/**
163	* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
164	* Compare strings in code point order instead of code unit order.
165	* @stable ICU 2.2
166	*/
167	#define U_COMPARE_CODE_POINT_ORDER 0x8000
168
169	/**
170	* Option bit for unorm_compare:
171	* Perform case-insensitive comparison.
172	* @stable ICU 2.2
173	*/
174	#define U_COMPARE_IGNORE_CASE 0x10000
175
176	/**
177	* Option bit for unorm_compare:
178	* Both input strings are assumed to fulfill FCD conditions.
179	* @stable ICU 2.2
180	*/
181	#define UNORM_INPUT_IS_FCD 0x20000
182
183	// Related definitions elsewhere.
184	// Options that are not meaningful in the same functions
185	// can share the same bits.
186	//
187	// Public:
188	// unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
189	//
190	// Internal: (may change or be removed)
191	// ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff
192	// ucase.h #define _FOLD_CASE_OPTIONS_MASK 7
193	// ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0
194	// ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600
195	// ustr_imp.h #define _STRNCMP_STYLE 0x1000
196	// unormcmp.cpp #define _COMPARE_EQUIV 0x80000
197
198	#endif // __STRINGOPTIONS_H__