]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/norm2allmodes.h
ICU-59173.0.1.tar.gz
[apple/icu.git] / icuSources / common / norm2allmodes.h
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2014, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * loadednormalizer2impl.h
9 *
10 * created on: 2014sep07
11 * created by: Markus W. Scherer
12 */
13
14 #ifndef __NORM2ALLMODES_H__
15 #define __NORM2ALLMODES_H__
16
17 #include "unicode/utypes.h"
18
19 #if !UCONFIG_NO_NORMALIZATION
20
21 #include "unicode/normalizer2.h"
22 #include "unicode/unistr.h"
23 #include "cpputils.h"
24 #include "normalizer2impl.h"
25
26 U_NAMESPACE_BEGIN
27
28 // Intermediate class:
29 // Has Normalizer2Impl and does boilerplate argument checking and setup.
30 class Normalizer2WithImpl : public Normalizer2 {
31 public:
32 Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
33 virtual ~Normalizer2WithImpl();
34
35 // normalize
36 virtual UnicodeString &
37 normalize(const UnicodeString &src,
38 UnicodeString &dest,
39 UErrorCode &errorCode) const {
40 if(U_FAILURE(errorCode)) {
41 dest.setToBogus();
42 return dest;
43 }
44 const UChar *sArray=src.getBuffer();
45 if(&dest==&src || sArray==NULL) {
46 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
47 dest.setToBogus();
48 return dest;
49 }
50 dest.remove();
51 ReorderingBuffer buffer(impl, dest);
52 if(buffer.init(src.length(), errorCode)) {
53 normalize(sArray, sArray+src.length(), buffer, errorCode);
54 }
55 return dest;
56 }
57 virtual void
58 normalize(const UChar *src, const UChar *limit,
59 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
60
61 // normalize and append
62 virtual UnicodeString &
63 normalizeSecondAndAppend(UnicodeString &first,
64 const UnicodeString &second,
65 UErrorCode &errorCode) const {
66 return normalizeSecondAndAppend(first, second, TRUE, errorCode);
67 }
68 virtual UnicodeString &
69 append(UnicodeString &first,
70 const UnicodeString &second,
71 UErrorCode &errorCode) const {
72 return normalizeSecondAndAppend(first, second, FALSE, errorCode);
73 }
74 UnicodeString &
75 normalizeSecondAndAppend(UnicodeString &first,
76 const UnicodeString &second,
77 UBool doNormalize,
78 UErrorCode &errorCode) const {
79 uprv_checkCanGetBuffer(first, errorCode);
80 if(U_FAILURE(errorCode)) {
81 return first;
82 }
83 const UChar *secondArray=second.getBuffer();
84 if(&first==&second || secondArray==NULL) {
85 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
86 return first;
87 }
88 int32_t firstLength=first.length();
89 UnicodeString safeMiddle;
90 {
91 ReorderingBuffer buffer(impl, first);
92 if(buffer.init(firstLength+second.length(), errorCode)) {
93 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
94 safeMiddle, buffer, errorCode);
95 }
96 } // The ReorderingBuffer destructor finalizes the first string.
97 if(U_FAILURE(errorCode)) {
98 // Restore the modified suffix of the first string.
99 first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
100 }
101 return first;
102 }
103 virtual void
104 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
105 UnicodeString &safeMiddle,
106 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
107 virtual UBool
108 getDecomposition(UChar32 c, UnicodeString &decomposition) const {
109 UChar buffer[4];
110 int32_t length;
111 const UChar *d=impl.getDecomposition(c, buffer, length);
112 if(d==NULL) {
113 return FALSE;
114 }
115 if(d==buffer) {
116 decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c)
117 } else {
118 decomposition.setTo(FALSE, d, length); // read-only alias
119 }
120 return TRUE;
121 }
122 virtual UBool
123 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
124 UChar buffer[30];
125 int32_t length;
126 const UChar *d=impl.getRawDecomposition(c, buffer, length);
127 if(d==NULL) {
128 return FALSE;
129 }
130 if(d==buffer) {
131 decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition)
132 } else {
133 decomposition.setTo(FALSE, d, length); // read-only alias
134 }
135 return TRUE;
136 }
137 virtual UChar32
138 composePair(UChar32 a, UChar32 b) const {
139 return impl.composePair(a, b);
140 }
141
142 virtual uint8_t
143 getCombiningClass(UChar32 c) const {
144 return impl.getCC(impl.getNorm16(c));
145 }
146
147 // quick checks
148 virtual UBool
149 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
150 if(U_FAILURE(errorCode)) {
151 return FALSE;
152 }
153 const UChar *sArray=s.getBuffer();
154 if(sArray==NULL) {
155 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
156 return FALSE;
157 }
158 const UChar *sLimit=sArray+s.length();
159 return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
160 }
161 virtual UNormalizationCheckResult
162 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
163 return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
164 }
165 virtual int32_t
166 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
167 if(U_FAILURE(errorCode)) {
168 return 0;
169 }
170 const UChar *sArray=s.getBuffer();
171 if(sArray==NULL) {
172 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
173 return 0;
174 }
175 return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
176 }
177 virtual const UChar *
178 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
179
180 virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
181 return UNORM_YES;
182 }
183
184 const Normalizer2Impl &impl;
185 };
186
187 class DecomposeNormalizer2 : public Normalizer2WithImpl {
188 public:
189 DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
190 virtual ~DecomposeNormalizer2();
191
192 private:
193 virtual void
194 normalize(const UChar *src, const UChar *limit,
195 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
196 impl.decompose(src, limit, &buffer, errorCode);
197 }
198 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
199 virtual void
200 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
201 UnicodeString &safeMiddle,
202 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
203 impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
204 }
205 virtual const UChar *
206 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
207 return impl.decompose(src, limit, NULL, errorCode);
208 }
209 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
210 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
211 return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
212 }
213 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
214 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
215 virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
216 };
217
218 class ComposeNormalizer2 : public Normalizer2WithImpl {
219 public:
220 ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
221 Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
222 virtual ~ComposeNormalizer2();
223
224 private:
225 virtual void
226 normalize(const UChar *src, const UChar *limit,
227 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
228 impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
229 }
230 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
231 virtual void
232 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
233 UnicodeString &safeMiddle,
234 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
235 impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
236 }
237
238 virtual UBool
239 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
240 if(U_FAILURE(errorCode)) {
241 return FALSE;
242 }
243 const UChar *sArray=s.getBuffer();
244 if(sArray==NULL) {
245 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
246 return FALSE;
247 }
248 UnicodeString temp;
249 ReorderingBuffer buffer(impl, temp);
250 if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization
251 return FALSE;
252 }
253 return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
254 }
255 virtual UNormalizationCheckResult
256 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
257 if(U_FAILURE(errorCode)) {
258 return UNORM_MAYBE;
259 }
260 const UChar *sArray=s.getBuffer();
261 if(sArray==NULL) {
262 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
263 return UNORM_MAYBE;
264 }
265 UNormalizationCheckResult qcResult=UNORM_YES;
266 impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
267 return qcResult;
268 }
269 virtual const UChar *
270 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
271 return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
272 }
273 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
274 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
275 return impl.getCompQuickCheck(impl.getNorm16(c));
276 }
277 virtual UBool hasBoundaryBefore(UChar32 c) const {
278 return impl.hasCompBoundaryBefore(c);
279 }
280 virtual UBool hasBoundaryAfter(UChar32 c) const {
281 return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
282 }
283 virtual UBool isInert(UChar32 c) const {
284 return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
285 }
286
287 const UBool onlyContiguous;
288 };
289
290 class FCDNormalizer2 : public Normalizer2WithImpl {
291 public:
292 FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
293 virtual ~FCDNormalizer2();
294
295 private:
296 virtual void
297 normalize(const UChar *src, const UChar *limit,
298 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
299 impl.makeFCD(src, limit, &buffer, errorCode);
300 }
301 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
302 virtual void
303 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
304 UnicodeString &safeMiddle,
305 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
306 impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
307 }
308 virtual const UChar *
309 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
310 return impl.makeFCD(src, limit, NULL, errorCode);
311 }
312 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
313 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
314 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
315 virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
316 };
317
318 struct Norm2AllModes : public UMemory {
319 Norm2AllModes(Normalizer2Impl *i)
320 : impl(i), comp(*i, FALSE), decomp(*i), fcd(*i), fcc(*i, TRUE) {}
321 ~Norm2AllModes();
322
323 static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode);
324 static Norm2AllModes *createNFCInstance(UErrorCode &errorCode);
325 static Norm2AllModes *createInstance(const char *packageName,
326 const char *name,
327 UErrorCode &errorCode);
328
329 static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode);
330 static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode);
331 static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode);
332
333 Normalizer2Impl *impl;
334 ComposeNormalizer2 comp;
335 DecomposeNormalizer2 decomp;
336 FCDNormalizer2 fcd;
337 ComposeNormalizer2 fcc;
338 };
339
340 U_NAMESPACE_END
341
342 #endif // !UCONFIG_NO_NORMALIZATION
343 #endif // __NORM2ALLMODES_H__