]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ucasemap_imp.h
ICU-59180.0.1.tar.gz
[apple/icu.git] / icuSources / common / ucasemap_imp.h
CommitLineData
f3c0d7a5
A
1// © 2017 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4// ucasemap_imp.h
5// created: 2017feb08 Markus W. Scherer
6
7#ifndef __UCASEMAP_IMP_H__
8#define __UCASEMAP_IMP_H__
9
10#include "unicode/utypes.h"
11#include "unicode/ucasemap.h"
12#include "ucase.h"
13
14#ifndef U_COMPARE_IGNORE_CASE
15/* see also unorm.h */
16/**
17 * Option bit for unorm_compare:
18 * Perform case-insensitive comparison.
19 */
20#define U_COMPARE_IGNORE_CASE 0x10000
21#endif
22
23/**
24 * Internal API, used by u_strcasecmp() etc.
25 * Compare strings case-insensitively,
26 * in code point order or code unit order.
27 */
28U_CFUNC int32_t
29u_strcmpFold(const UChar *s1, int32_t length1,
30 const UChar *s2, int32_t length2,
31 uint32_t options,
32 UErrorCode *pErrorCode);
33
34/**
35 * Interanl API, used for detecting length of
36 * shared prefix case-insensitively.
37 * @param s1 input string 1
38 * @param length1 length of string 1, or -1 (NULL terminated)
39 * @param s2 input string 2
40 * @param length2 length of string 2, or -1 (NULL terminated)
41 * @param options compare options
42 * @param matchLen1 (output) length of partial prefix match in s1
43 * @param matchLen2 (output) length of partial prefix match in s2
44 * @param pErrorCode receives error status
45 */
46U_CAPI void
47u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,
48 const UChar *s2, int32_t length2,
49 uint32_t options,
50 int32_t *matchLen1, int32_t *matchLen2,
51 UErrorCode *pErrorCode);
52
53/**
54 * Are the Unicode properties loaded?
55 * This must be used before internal functions are called that do
56 * not perform this check.
57 * Generate a debug assertion failure if data is not loaded.
58 */
59U_CFUNC UBool
60uprv_haveProperties(UErrorCode *pErrorCode);
61
62#ifdef __cplusplus
63
64#include "unicode/unistr.h" // for UStringCaseMapper
65
66/*
67 * Internal string casing functions implementing
68 * ustring.h/ustrcase.cpp and UnicodeString case mapping functions.
69 */
70
71struct UCaseMap : public icu::UMemory {
72 /** Implements most of ucasemap_open(). */
73 UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode);
74 ~UCaseMap();
75
76#if !UCONFIG_NO_BREAK_ITERATION
77 icu::BreakIterator *iter; /* We adopt the iterator, so we own it. */
78#endif
79 char locale[32];
80 int32_t caseLocale;
81 uint32_t options;
82};
83
84#if UCONFIG_NO_BREAK_ITERATION
85# define UCASEMAP_BREAK_ITERATOR_PARAM
86# define UCASEMAP_BREAK_ITERATOR_UNUSED
87# define UCASEMAP_BREAK_ITERATOR
88# define UCASEMAP_BREAK_ITERATOR_NULL
89#else
90# define UCASEMAP_BREAK_ITERATOR_PARAM icu::BreakIterator *iter,
91# define UCASEMAP_BREAK_ITERATOR_UNUSED icu::BreakIterator *,
92# define UCASEMAP_BREAK_ITERATOR iter,
93# define UCASEMAP_BREAK_ITERATOR_NULL NULL,
94#endif
95
96U_CFUNC int32_t
97ustrcase_getCaseLocale(const char *locale);
98
99// TODO: swap src / dest if approved for new public api
100/** Implements UStringCaseMapper. */
101U_CFUNC int32_t U_CALLCONV
102ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
103 UChar *dest, int32_t destCapacity,
104 const UChar *src, int32_t srcLength,
105 icu::Edits *edits,
106 UErrorCode &errorCode);
107
108/** Implements UStringCaseMapper. */
109U_CFUNC int32_t U_CALLCONV
110ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
111 UChar *dest, int32_t destCapacity,
112 const UChar *src, int32_t srcLength,
113 icu::Edits *edits,
114 UErrorCode &errorCode);
115
116#if !UCONFIG_NO_BREAK_ITERATION
117
118/** Implements UStringCaseMapper. */
119U_CFUNC int32_t U_CALLCONV
120ustrcase_internalToTitle(int32_t caseLocale, uint32_t options,
121 icu::BreakIterator *iter,
122 UChar *dest, int32_t destCapacity,
123 const UChar *src, int32_t srcLength,
124 icu::Edits *edits,
125 UErrorCode &errorCode);
126
127#endif
128
129/** Implements UStringCaseMapper. */
130U_CFUNC int32_t U_CALLCONV
131ustrcase_internalFold(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
132 UChar *dest, int32_t destCapacity,
133 const UChar *src, int32_t srcLength,
134 icu::Edits *edits,
135 UErrorCode &errorCode);
136
137/**
138 * Common string case mapping implementation for ucasemap_toXyz() and UnicodeString::toXyz().
139 * Implements argument checking.
140 */
141U_CFUNC int32_t
142ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
143 UChar *dest, int32_t destCapacity,
144 const UChar *src, int32_t srcLength,
145 UStringCaseMapper *stringCaseMapper,
146 icu::Edits *edits,
147 UErrorCode &errorCode);
148
149/**
150 * Common string case mapping implementation for old-fashioned u_strToXyz() functions
151 * that allow the source string to overlap the destination buffer.
152 * Implements argument checking and internally works with an intermediate buffer if necessary.
153 */
154U_CFUNC int32_t
155ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
156 UChar *dest, int32_t destCapacity,
157 const UChar *src, int32_t srcLength,
158 UStringCaseMapper *stringCaseMapper,
159 UErrorCode &errorCode);
160
161/**
162 * UTF-8 string case mapping function type, used by ucasemap_mapUTF8().
163 * UTF-8 version of UStringCaseMapper.
164 * All error checking must be done.
165 * The UCaseMap must be fully initialized, with locale and/or iter set as needed.
166 * src and dest must not overlap.
167 */
168typedef int32_t U_CALLCONV
169UTF8CaseMapper(int32_t caseLocale, uint32_t options,
170#if !UCONFIG_NO_BREAK_ITERATION
171 icu::BreakIterator *iter,
172#endif
173 uint8_t *dest, int32_t destCapacity,
174 const uint8_t *src, int32_t srcLength,
175 icu::Edits *edits,
176 UErrorCode &errorCode);
177
178#if !UCONFIG_NO_BREAK_ITERATION
179
180/** Implements UTF8CaseMapper. */
181U_CFUNC int32_t U_CALLCONV
182ucasemap_internalUTF8ToTitle(int32_t caseLocale, uint32_t options,
183 icu::BreakIterator *iter,
184 uint8_t *dest, int32_t destCapacity,
185 const uint8_t *src, int32_t srcLength,
186 icu::Edits *edits,
187 UErrorCode &errorCode);
188
189#endif
190
191/**
192 * Implements argument checking and buffer handling
193 * for UTF-8 string case mapping as a common function.
194 */
195U_CFUNC int32_t
196ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
197 uint8_t *dest, int32_t destCapacity,
198 const uint8_t *src, int32_t srcLength,
199 UTF8CaseMapper *stringCaseMapper,
200 icu::Edits *edits,
201 UErrorCode &errorCode);
202
203U_NAMESPACE_BEGIN
204namespace GreekUpper {
205
206// Data bits.
207static const uint32_t UPPER_MASK = 0x3ff;
208static const uint32_t HAS_VOWEL = 0x1000;
209static const uint32_t HAS_YPOGEGRAMMENI = 0x2000;
210static const uint32_t HAS_ACCENT = 0x4000;
211static const uint32_t HAS_DIALYTIKA = 0x8000;
212// Further bits during data building and processing, not stored in the data map.
213static const uint32_t HAS_COMBINING_DIALYTIKA = 0x10000;
214static const uint32_t HAS_OTHER_GREEK_DIACRITIC = 0x20000;
215
216static const uint32_t HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT;
217static const uint32_t HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA =
218 HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA;
219static const uint32_t HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA;
220
221// State bits.
222static const uint32_t AFTER_CASED = 1;
223static const uint32_t AFTER_VOWEL_WITH_ACCENT = 2;
224
225uint32_t getLetterData(UChar32 c);
226
227/**
228 * Returns a non-zero value for each of the Greek combining diacritics
229 * listed in The Unicode Standard, version 8, chapter 7.2 Greek,
230 * plus some perispomeni look-alikes.
231 */
232uint32_t getDiacriticData(UChar32 c);
233
234} // namespace GreekUpper
235U_NAMESPACE_END
236
237#endif // __cplusplus
238
239#endif // __UCASEMAP_IMP_H__