]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/ucasemap_imp.h
ICU-59131.0.1.tar.gz
[apple/icu.git] / icuSources / common / ucasemap_imp.h
1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 // ucasemap_imp.h
5 // created: 2017feb08 Markus W. Scherer
6
7 #ifndef __UCASEMAP_IMP_H__
8 #define __UCASEMAP_IMP_H__
9
10 #include "unicode/utypes.h"
11 #include "unicode/ucasemap.h"
12 #include "ucase.h"
13
14 #ifndef U_COMPARE_IGNORE_CASE
15 /* see also unorm.h */
16 /**
17 * Option bit for unorm_compare:
18 * Perform case-insensitive comparison.
19 */
20 #define U_COMPARE_IGNORE_CASE 0x10000
21 #endif
22
23 /**
24 * Internal API, used by u_strcasecmp() etc.
25 * Compare strings case-insensitively,
26 * in code point order or code unit order.
27 */
28 U_CFUNC int32_t
29 u_strcmpFold(const UChar *s1, int32_t length1,
30 const UChar *s2, int32_t length2,
31 uint32_t options,
32 UErrorCode *pErrorCode);
33
34 /**
35 * Interanl API, used for detecting length of
36 * shared prefix case-insensitively.
37 * @param s1 input string 1
38 * @param length1 length of string 1, or -1 (NULL terminated)
39 * @param s2 input string 2
40 * @param length2 length of string 2, or -1 (NULL terminated)
41 * @param options compare options
42 * @param matchLen1 (output) length of partial prefix match in s1
43 * @param matchLen2 (output) length of partial prefix match in s2
44 * @param pErrorCode receives error status
45 */
46 U_CAPI void
47 u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,
48 const UChar *s2, int32_t length2,
49 uint32_t options,
50 int32_t *matchLen1, int32_t *matchLen2,
51 UErrorCode *pErrorCode);
52
53 /**
54 * Are the Unicode properties loaded?
55 * This must be used before internal functions are called that do
56 * not perform this check.
57 * Generate a debug assertion failure if data is not loaded.
58 */
59 U_CFUNC UBool
60 uprv_haveProperties(UErrorCode *pErrorCode);
61
62 #ifdef __cplusplus
63
64 #include "unicode/unistr.h" // for UStringCaseMapper
65
66 /*
67 * Internal string casing functions implementing
68 * ustring.h/ustrcase.cpp and UnicodeString case mapping functions.
69 */
70
71 struct UCaseMap : public icu::UMemory {
72 /** Implements most of ucasemap_open(). */
73 UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode);
74 ~UCaseMap();
75
76 #if !UCONFIG_NO_BREAK_ITERATION
77 icu::BreakIterator *iter; /* We adopt the iterator, so we own it. */
78 #endif
79 char locale[32];
80 int32_t caseLocale;
81 uint32_t options;
82 };
83
84 #if UCONFIG_NO_BREAK_ITERATION
85 # define UCASEMAP_BREAK_ITERATOR_PARAM
86 # define UCASEMAP_BREAK_ITERATOR_UNUSED
87 # define UCASEMAP_BREAK_ITERATOR
88 # define UCASEMAP_BREAK_ITERATOR_NULL
89 #else
90 # define UCASEMAP_BREAK_ITERATOR_PARAM icu::BreakIterator *iter,
91 # define UCASEMAP_BREAK_ITERATOR_UNUSED icu::BreakIterator *,
92 # define UCASEMAP_BREAK_ITERATOR iter,
93 # define UCASEMAP_BREAK_ITERATOR_NULL NULL,
94 #endif
95
96 U_CFUNC int32_t
97 ustrcase_getCaseLocale(const char *locale);
98
99 // TODO: swap src / dest if approved for new public api
100 /** Implements UStringCaseMapper. */
101 U_CFUNC int32_t U_CALLCONV
102 ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
103 UChar *dest, int32_t destCapacity,
104 const UChar *src, int32_t srcLength,
105 icu::Edits *edits,
106 UErrorCode &errorCode);
107
108 /** Implements UStringCaseMapper. */
109 U_CFUNC int32_t U_CALLCONV
110 ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
111 UChar *dest, int32_t destCapacity,
112 const UChar *src, int32_t srcLength,
113 icu::Edits *edits,
114 UErrorCode &errorCode);
115
116 #if !UCONFIG_NO_BREAK_ITERATION
117
118 /** Implements UStringCaseMapper. */
119 U_CFUNC int32_t U_CALLCONV
120 ustrcase_internalToTitle(int32_t caseLocale, uint32_t options,
121 icu::BreakIterator *iter,
122 UChar *dest, int32_t destCapacity,
123 const UChar *src, int32_t srcLength,
124 icu::Edits *edits,
125 UErrorCode &errorCode);
126
127 #endif
128
129 /** Implements UStringCaseMapper. */
130 U_CFUNC int32_t U_CALLCONV
131 ustrcase_internalFold(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
132 UChar *dest, int32_t destCapacity,
133 const UChar *src, int32_t srcLength,
134 icu::Edits *edits,
135 UErrorCode &errorCode);
136
137 /**
138 * Common string case mapping implementation for ucasemap_toXyz() and UnicodeString::toXyz().
139 * Implements argument checking.
140 */
141 U_CFUNC int32_t
142 ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
143 UChar *dest, int32_t destCapacity,
144 const UChar *src, int32_t srcLength,
145 UStringCaseMapper *stringCaseMapper,
146 icu::Edits *edits,
147 UErrorCode &errorCode);
148
149 /**
150 * Common string case mapping implementation for old-fashioned u_strToXyz() functions
151 * that allow the source string to overlap the destination buffer.
152 * Implements argument checking and internally works with an intermediate buffer if necessary.
153 */
154 U_CFUNC int32_t
155 ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
156 UChar *dest, int32_t destCapacity,
157 const UChar *src, int32_t srcLength,
158 UStringCaseMapper *stringCaseMapper,
159 UErrorCode &errorCode);
160
161 /**
162 * UTF-8 string case mapping function type, used by ucasemap_mapUTF8().
163 * UTF-8 version of UStringCaseMapper.
164 * All error checking must be done.
165 * The UCaseMap must be fully initialized, with locale and/or iter set as needed.
166 * src and dest must not overlap.
167 */
168 typedef int32_t U_CALLCONV
169 UTF8CaseMapper(int32_t caseLocale, uint32_t options,
170 #if !UCONFIG_NO_BREAK_ITERATION
171 icu::BreakIterator *iter,
172 #endif
173 uint8_t *dest, int32_t destCapacity,
174 const uint8_t *src, int32_t srcLength,
175 icu::Edits *edits,
176 UErrorCode &errorCode);
177
178 #if !UCONFIG_NO_BREAK_ITERATION
179
180 /** Implements UTF8CaseMapper. */
181 U_CFUNC int32_t U_CALLCONV
182 ucasemap_internalUTF8ToTitle(int32_t caseLocale, uint32_t options,
183 icu::BreakIterator *iter,
184 uint8_t *dest, int32_t destCapacity,
185 const uint8_t *src, int32_t srcLength,
186 icu::Edits *edits,
187 UErrorCode &errorCode);
188
189 #endif
190
191 /**
192 * Implements argument checking and buffer handling
193 * for UTF-8 string case mapping as a common function.
194 */
195 U_CFUNC int32_t
196 ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
197 uint8_t *dest, int32_t destCapacity,
198 const uint8_t *src, int32_t srcLength,
199 UTF8CaseMapper *stringCaseMapper,
200 icu::Edits *edits,
201 UErrorCode &errorCode);
202
203 U_NAMESPACE_BEGIN
204 namespace GreekUpper {
205
206 // Data bits.
207 static const uint32_t UPPER_MASK = 0x3ff;
208 static const uint32_t HAS_VOWEL = 0x1000;
209 static const uint32_t HAS_YPOGEGRAMMENI = 0x2000;
210 static const uint32_t HAS_ACCENT = 0x4000;
211 static const uint32_t HAS_DIALYTIKA = 0x8000;
212 // Further bits during data building and processing, not stored in the data map.
213 static const uint32_t HAS_COMBINING_DIALYTIKA = 0x10000;
214 static const uint32_t HAS_OTHER_GREEK_DIACRITIC = 0x20000;
215
216 static const uint32_t HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT;
217 static const uint32_t HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA =
218 HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA;
219 static const uint32_t HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA;
220
221 // State bits.
222 static const uint32_t AFTER_CASED = 1;
223 static const uint32_t AFTER_VOWEL_WITH_ACCENT = 2;
224
225 uint32_t getLetterData(UChar32 c);
226
227 /**
228 * Returns a non-zero value for each of the Greek combining diacritics
229 * listed in The Unicode Standard, version 8, chapter 7.2 Greek,
230 * plus some perispomeni look-alikes.
231 */
232 uint32_t getDiacriticData(UChar32 c);
233
234 } // namespace GreekUpper
235 U_NAMESPACE_END
236
237 #endif // __cplusplus
238
239 #endif // __UCASEMAP_IMP_H__