1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
5 // created: 2017feb08 Markus W. Scherer
7 #ifndef __UCASEMAP_IMP_H__
8 #define __UCASEMAP_IMP_H__
10 #include "unicode/utypes.h"
11 #include "unicode/ucasemap.h"
14 #ifndef U_COMPARE_IGNORE_CASE
15 /* see also unorm.h */
17 * Option bit for unorm_compare:
18 * Perform case-insensitive comparison.
20 #define U_COMPARE_IGNORE_CASE 0x10000
24 * Internal API, used by u_strcasecmp() etc.
25 * Compare strings case-insensitively,
26 * in code point order or code unit order.
29 u_strcmpFold(const UChar
*s1
, int32_t length1
,
30 const UChar
*s2
, int32_t length2
,
32 UErrorCode
*pErrorCode
);
35 * Interanl API, used for detecting length of
36 * shared prefix case-insensitively.
37 * @param s1 input string 1
38 * @param length1 length of string 1, or -1 (NULL terminated)
39 * @param s2 input string 2
40 * @param length2 length of string 2, or -1 (NULL terminated)
41 * @param options compare options
42 * @param matchLen1 (output) length of partial prefix match in s1
43 * @param matchLen2 (output) length of partial prefix match in s2
44 * @param pErrorCode receives error status
47 u_caseInsensitivePrefixMatch(const UChar
*s1
, int32_t length1
,
48 const UChar
*s2
, int32_t length2
,
50 int32_t *matchLen1
, int32_t *matchLen2
,
51 UErrorCode
*pErrorCode
);
54 * Are the Unicode properties loaded?
55 * This must be used before internal functions are called that do
56 * not perform this check.
57 * Generate a debug assertion failure if data is not loaded.
60 uprv_haveProperties(UErrorCode
*pErrorCode
);
64 #include "unicode/unistr.h" // for UStringCaseMapper
67 * Internal string casing functions implementing
68 * ustring.h/ustrcase.cpp and UnicodeString case mapping functions.
71 struct UCaseMap
: public icu::UMemory
{
72 /** Implements most of ucasemap_open(). */
73 UCaseMap(const char *localeID
, uint32_t opts
, UErrorCode
*pErrorCode
);
76 #if !UCONFIG_NO_BREAK_ITERATION
77 icu::BreakIterator
*iter
; /* We adopt the iterator, so we own it. */
84 #if UCONFIG_NO_BREAK_ITERATION
85 # define UCASEMAP_BREAK_ITERATOR_PARAM
86 # define UCASEMAP_BREAK_ITERATOR_UNUSED
87 # define UCASEMAP_BREAK_ITERATOR
88 # define UCASEMAP_BREAK_ITERATOR_NULL
90 # define UCASEMAP_BREAK_ITERATOR_PARAM icu::BreakIterator *iter,
91 # define UCASEMAP_BREAK_ITERATOR_UNUSED icu::BreakIterator *,
92 # define UCASEMAP_BREAK_ITERATOR iter,
93 # define UCASEMAP_BREAK_ITERATOR_NULL NULL,
97 ustrcase_getCaseLocale(const char *locale
);
99 // TODO: swap src / dest if approved for new public api
100 /** Implements UStringCaseMapper. */
101 U_CFUNC
int32_t U_CALLCONV
102 ustrcase_internalToLower(int32_t caseLocale
, uint32_t options
, UCASEMAP_BREAK_ITERATOR_PARAM
103 UChar
*dest
, int32_t destCapacity
,
104 const UChar
*src
, int32_t srcLength
,
106 UErrorCode
&errorCode
);
108 /** Implements UStringCaseMapper. */
109 U_CFUNC
int32_t U_CALLCONV
110 ustrcase_internalToUpper(int32_t caseLocale
, uint32_t options
, UCASEMAP_BREAK_ITERATOR_PARAM
111 UChar
*dest
, int32_t destCapacity
,
112 const UChar
*src
, int32_t srcLength
,
114 UErrorCode
&errorCode
);
116 #if !UCONFIG_NO_BREAK_ITERATION
118 /** Implements UStringCaseMapper. */
119 U_CFUNC
int32_t U_CALLCONV
120 ustrcase_internalToTitle(int32_t caseLocale
, uint32_t options
,
121 icu::BreakIterator
*iter
,
122 UChar
*dest
, int32_t destCapacity
,
123 const UChar
*src
, int32_t srcLength
,
125 UErrorCode
&errorCode
);
129 /** Implements UStringCaseMapper. */
130 U_CFUNC
int32_t U_CALLCONV
131 ustrcase_internalFold(int32_t caseLocale
, uint32_t options
, UCASEMAP_BREAK_ITERATOR_PARAM
132 UChar
*dest
, int32_t destCapacity
,
133 const UChar
*src
, int32_t srcLength
,
135 UErrorCode
&errorCode
);
138 * Common string case mapping implementation for ucasemap_toXyz() and UnicodeString::toXyz().
139 * Implements argument checking.
142 ustrcase_map(int32_t caseLocale
, uint32_t options
, UCASEMAP_BREAK_ITERATOR_PARAM
143 UChar
*dest
, int32_t destCapacity
,
144 const UChar
*src
, int32_t srcLength
,
145 UStringCaseMapper
*stringCaseMapper
,
147 UErrorCode
&errorCode
);
150 * Common string case mapping implementation for old-fashioned u_strToXyz() functions
151 * that allow the source string to overlap the destination buffer.
152 * Implements argument checking and internally works with an intermediate buffer if necessary.
155 ustrcase_mapWithOverlap(int32_t caseLocale
, uint32_t options
, UCASEMAP_BREAK_ITERATOR_PARAM
156 UChar
*dest
, int32_t destCapacity
,
157 const UChar
*src
, int32_t srcLength
,
158 UStringCaseMapper
*stringCaseMapper
,
159 UErrorCode
&errorCode
);
162 * UTF-8 string case mapping function type, used by ucasemap_mapUTF8().
163 * UTF-8 version of UStringCaseMapper.
164 * All error checking must be done.
165 * The UCaseMap must be fully initialized, with locale and/or iter set as needed.
166 * src and dest must not overlap.
168 typedef int32_t U_CALLCONV
169 UTF8CaseMapper(int32_t caseLocale
, uint32_t options
,
170 #if !UCONFIG_NO_BREAK_ITERATION
171 icu::BreakIterator
*iter
,
173 uint8_t *dest
, int32_t destCapacity
,
174 const uint8_t *src
, int32_t srcLength
,
176 UErrorCode
&errorCode
);
178 #if !UCONFIG_NO_BREAK_ITERATION
180 /** Implements UTF8CaseMapper. */
181 U_CFUNC
int32_t U_CALLCONV
182 ucasemap_internalUTF8ToTitle(int32_t caseLocale
, uint32_t options
,
183 icu::BreakIterator
*iter
,
184 uint8_t *dest
, int32_t destCapacity
,
185 const uint8_t *src
, int32_t srcLength
,
187 UErrorCode
&errorCode
);
192 * Implements argument checking and buffer handling
193 * for UTF-8 string case mapping as a common function.
196 ucasemap_mapUTF8(int32_t caseLocale
, uint32_t options
, UCASEMAP_BREAK_ITERATOR_PARAM
197 uint8_t *dest
, int32_t destCapacity
,
198 const uint8_t *src
, int32_t srcLength
,
199 UTF8CaseMapper
*stringCaseMapper
,
201 UErrorCode
&errorCode
);
204 namespace GreekUpper
{
207 static const uint32_t UPPER_MASK
= 0x3ff;
208 static const uint32_t HAS_VOWEL
= 0x1000;
209 static const uint32_t HAS_YPOGEGRAMMENI
= 0x2000;
210 static const uint32_t HAS_ACCENT
= 0x4000;
211 static const uint32_t HAS_DIALYTIKA
= 0x8000;
212 // Further bits during data building and processing, not stored in the data map.
213 static const uint32_t HAS_COMBINING_DIALYTIKA
= 0x10000;
214 static const uint32_t HAS_OTHER_GREEK_DIACRITIC
= 0x20000;
216 static const uint32_t HAS_VOWEL_AND_ACCENT
= HAS_VOWEL
| HAS_ACCENT
;
217 static const uint32_t HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA
=
218 HAS_VOWEL_AND_ACCENT
| HAS_DIALYTIKA
;
219 static const uint32_t HAS_EITHER_DIALYTIKA
= HAS_DIALYTIKA
| HAS_COMBINING_DIALYTIKA
;
222 static const uint32_t AFTER_CASED
= 1;
223 static const uint32_t AFTER_VOWEL_WITH_ACCENT
= 2;
225 uint32_t getLetterData(UChar32 c
);
228 * Returns a non-zero value for each of the Greek combining diacritics
229 * listed in The Unicode Standard, version 8, chapter 7.2 Greek,
230 * plus some perispomeni look-alikes.
232 uint32_t getDiacriticData(UChar32 c
);
234 } // namespace GreekUpper
237 #endif // __cplusplus
239 #endif // __UCASEMAP_IMP_H__