]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/norm2allmodes.h
ICU-66108.tar.gz
[apple/icu.git] / icuSources / common / norm2allmodes.h
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2014, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * norm2allmodes.h
9 *
10 * created on: 2014sep07
11 * created by: Markus W. Scherer
12 */
13
14 #ifndef __NORM2ALLMODES_H__
15 #define __NORM2ALLMODES_H__
16
17 #include "unicode/utypes.h"
18
19 #if !UCONFIG_NO_NORMALIZATION
20
21 #include "unicode/edits.h"
22 #include "unicode/normalizer2.h"
23 #include "unicode/stringoptions.h"
24 #include "unicode/unistr.h"
25 #include "cpputils.h"
26 #include "normalizer2impl.h"
27
28 U_NAMESPACE_BEGIN
29
30 // Intermediate class:
31 // Has Normalizer2Impl and does boilerplate argument checking and setup.
32 class Normalizer2WithImpl : public Normalizer2 {
33 public:
34 Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
35 virtual ~Normalizer2WithImpl();
36
37 // normalize
38 virtual UnicodeString &
39 normalize(const UnicodeString &src,
40 UnicodeString &dest,
41 UErrorCode &errorCode) const {
42 if(U_FAILURE(errorCode)) {
43 dest.setToBogus();
44 return dest;
45 }
46 const UChar *sArray=src.getBuffer();
47 if(&dest==&src || sArray==NULL) {
48 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
49 dest.setToBogus();
50 return dest;
51 }
52 dest.remove();
53 ReorderingBuffer buffer(impl, dest);
54 if(buffer.init(src.length(), errorCode)) {
55 normalize(sArray, sArray+src.length(), buffer, errorCode);
56 }
57 return dest;
58 }
59 virtual void
60 normalize(const UChar *src, const UChar *limit,
61 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
62
63 // normalize and append
64 virtual UnicodeString &
65 normalizeSecondAndAppend(UnicodeString &first,
66 const UnicodeString &second,
67 UErrorCode &errorCode) const {
68 return normalizeSecondAndAppend(first, second, TRUE, errorCode);
69 }
70 virtual UnicodeString &
71 append(UnicodeString &first,
72 const UnicodeString &second,
73 UErrorCode &errorCode) const {
74 return normalizeSecondAndAppend(first, second, FALSE, errorCode);
75 }
76 UnicodeString &
77 normalizeSecondAndAppend(UnicodeString &first,
78 const UnicodeString &second,
79 UBool doNormalize,
80 UErrorCode &errorCode) const {
81 uprv_checkCanGetBuffer(first, errorCode);
82 if(U_FAILURE(errorCode)) {
83 return first;
84 }
85 const UChar *secondArray=second.getBuffer();
86 if(&first==&second || secondArray==NULL) {
87 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
88 return first;
89 }
90 int32_t firstLength=first.length();
91 UnicodeString safeMiddle;
92 {
93 ReorderingBuffer buffer(impl, first);
94 if(buffer.init(firstLength+second.length(), errorCode)) {
95 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
96 safeMiddle, buffer, errorCode);
97 }
98 } // The ReorderingBuffer destructor finalizes the first string.
99 if(U_FAILURE(errorCode)) {
100 // Restore the modified suffix of the first string.
101 first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
102 }
103 return first;
104 }
105 virtual void
106 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
107 UnicodeString &safeMiddle,
108 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
109 virtual UBool
110 getDecomposition(UChar32 c, UnicodeString &decomposition) const {
111 UChar buffer[4];
112 int32_t length;
113 const UChar *d=impl.getDecomposition(c, buffer, length);
114 if(d==NULL) {
115 return FALSE;
116 }
117 if(d==buffer) {
118 decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c)
119 } else {
120 decomposition.setTo(FALSE, d, length); // read-only alias
121 }
122 return TRUE;
123 }
124 virtual UBool
125 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
126 UChar buffer[30];
127 int32_t length;
128 const UChar *d=impl.getRawDecomposition(c, buffer, length);
129 if(d==NULL) {
130 return FALSE;
131 }
132 if(d==buffer) {
133 decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition)
134 } else {
135 decomposition.setTo(FALSE, d, length); // read-only alias
136 }
137 return TRUE;
138 }
139 virtual UChar32
140 composePair(UChar32 a, UChar32 b) const {
141 return impl.composePair(a, b);
142 }
143
144 virtual uint8_t
145 getCombiningClass(UChar32 c) const {
146 return impl.getCC(impl.getNorm16(c));
147 }
148
149 // quick checks
150 virtual UBool
151 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
152 if(U_FAILURE(errorCode)) {
153 return FALSE;
154 }
155 const UChar *sArray=s.getBuffer();
156 if(sArray==NULL) {
157 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
158 return FALSE;
159 }
160 const UChar *sLimit=sArray+s.length();
161 return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
162 }
163 virtual UNormalizationCheckResult
164 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
165 return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
166 }
167 virtual int32_t
168 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
169 if(U_FAILURE(errorCode)) {
170 return 0;
171 }
172 const UChar *sArray=s.getBuffer();
173 if(sArray==NULL) {
174 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
175 return 0;
176 }
177 return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
178 }
179 virtual const UChar *
180 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
181
182 virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
183 return UNORM_YES;
184 }
185
186 const Normalizer2Impl &impl;
187 };
188
189 class DecomposeNormalizer2 : public Normalizer2WithImpl {
190 public:
191 DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
192 virtual ~DecomposeNormalizer2();
193
194 private:
195 virtual void
196 normalize(const UChar *src, const UChar *limit,
197 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
198 impl.decompose(src, limit, &buffer, errorCode);
199 }
200 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
201 virtual void
202 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
203 UnicodeString &safeMiddle,
204 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
205 impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
206 }
207 virtual const UChar *
208 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
209 return impl.decompose(src, limit, NULL, errorCode);
210 }
211 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
212 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
213 return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
214 }
215 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundaryBefore(c); }
216 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundaryAfter(c); }
217 virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
218 };
219
220 class ComposeNormalizer2 : public Normalizer2WithImpl {
221 public:
222 ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
223 Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
224 virtual ~ComposeNormalizer2();
225
226 private:
227 virtual void
228 normalize(const UChar *src, const UChar *limit,
229 ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
230 impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
231 }
232 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
233
234 void
235 normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
236 Edits *edits, UErrorCode &errorCode) const U_OVERRIDE {
237 if (U_FAILURE(errorCode)) {
238 return;
239 }
240 if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
241 edits->reset();
242 }
243 const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
244 impl.composeUTF8(options, onlyContiguous, s, s + src.length(),
245 &sink, edits, errorCode);
246 sink.Flush();
247 }
248
249 virtual void
250 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
251 UnicodeString &safeMiddle,
252 ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
253 impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
254 }
255
256 virtual UBool
257 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
258 if(U_FAILURE(errorCode)) {
259 return FALSE;
260 }
261 const UChar *sArray=s.getBuffer();
262 if(sArray==NULL) {
263 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
264 return FALSE;
265 }
266 UnicodeString temp;
267 ReorderingBuffer buffer(impl, temp);
268 if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization
269 return FALSE;
270 }
271 return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
272 }
273 virtual UBool
274 isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const U_OVERRIDE {
275 if(U_FAILURE(errorCode)) {
276 return FALSE;
277 }
278 const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
279 return impl.composeUTF8(0, onlyContiguous, s, s + sp.length(), nullptr, nullptr, errorCode);
280 }
281 virtual UNormalizationCheckResult
282 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
283 if(U_FAILURE(errorCode)) {
284 return UNORM_MAYBE;
285 }
286 const UChar *sArray=s.getBuffer();
287 if(sArray==NULL) {
288 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
289 return UNORM_MAYBE;
290 }
291 UNormalizationCheckResult qcResult=UNORM_YES;
292 impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
293 return qcResult;
294 }
295 virtual const UChar *
296 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const U_OVERRIDE {
297 return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
298 }
299 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
300 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const U_OVERRIDE {
301 return impl.getCompQuickCheck(impl.getNorm16(c));
302 }
303 virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE {
304 return impl.hasCompBoundaryBefore(c);
305 }
306 virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE {
307 return impl.hasCompBoundaryAfter(c, onlyContiguous);
308 }
309 virtual UBool isInert(UChar32 c) const U_OVERRIDE {
310 return impl.isCompInert(c, onlyContiguous);
311 }
312
313 const UBool onlyContiguous;
314 };
315
316 class FCDNormalizer2 : public Normalizer2WithImpl {
317 public:
318 FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
319 virtual ~FCDNormalizer2();
320
321 private:
322 virtual void
323 normalize(const UChar *src, const UChar *limit,
324 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
325 impl.makeFCD(src, limit, &buffer, errorCode);
326 }
327 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
328 virtual void
329 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
330 UnicodeString &safeMiddle,
331 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
332 impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
333 }
334 virtual const UChar *
335 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
336 return impl.makeFCD(src, limit, NULL, errorCode);
337 }
338 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
339 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
340 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
341 virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
342 };
343
344 struct Norm2AllModes : public UMemory {
345 Norm2AllModes(Normalizer2Impl *i)
346 : impl(i), comp(*i, FALSE), decomp(*i), fcd(*i), fcc(*i, TRUE) {}
347 ~Norm2AllModes();
348
349 static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode);
350 static Norm2AllModes *createNFCInstance(UErrorCode &errorCode);
351 static Norm2AllModes *createInstance(const char *packageName,
352 const char *name,
353 UErrorCode &errorCode);
354
355 static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode);
356 static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode);
357 static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode);
358
359 Normalizer2Impl *impl;
360 ComposeNormalizer2 comp;
361 DecomposeNormalizer2 decomp;
362 FCDNormalizer2 fcd;
363 ComposeNormalizer2 fcc;
364 };
365
366 U_NAMESPACE_END
367
368 #endif // !UCONFIG_NO_NORMALIZATION
369 #endif // __NORM2ALLMODES_H__