]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/norm2allmodes.h
ICU-57131.0.1.tar.gz
[apple/icu.git] / icuSources / common / norm2allmodes.h
1 /*
2 *******************************************************************************
3 * Copyright (C) 2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * loadednormalizer2impl.h
7 *
8 * created on: 2014sep07
9 * created by: Markus W. Scherer
10 */
11
12 #ifndef __NORM2ALLMODES_H__
13 #define __NORM2ALLMODES_H__
14
15 #include "unicode/utypes.h"
16
17 #if !UCONFIG_NO_NORMALIZATION
18
19 #include "unicode/normalizer2.h"
20 #include "unicode/unistr.h"
21 #include "cpputils.h"
22 #include "normalizer2impl.h"
23
24 U_NAMESPACE_BEGIN
25
26 // Intermediate class:
27 // Has Normalizer2Impl and does boilerplate argument checking and setup.
28 class Normalizer2WithImpl : public Normalizer2 {
29 public:
30 Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
31 virtual ~Normalizer2WithImpl();
32
33 // normalize
34 virtual UnicodeString &
35 normalize(const UnicodeString &src,
36 UnicodeString &dest,
37 UErrorCode &errorCode) const {
38 if(U_FAILURE(errorCode)) {
39 dest.setToBogus();
40 return dest;
41 }
42 const UChar *sArray=src.getBuffer();
43 if(&dest==&src || sArray==NULL) {
44 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
45 dest.setToBogus();
46 return dest;
47 }
48 dest.remove();
49 ReorderingBuffer buffer(impl, dest);
50 if(buffer.init(src.length(), errorCode)) {
51 normalize(sArray, sArray+src.length(), buffer, errorCode);
52 }
53 return dest;
54 }
55 virtual void
56 normalize(const UChar *src, const UChar *limit,
57 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
58
59 // normalize and append
60 virtual UnicodeString &
61 normalizeSecondAndAppend(UnicodeString &first,
62 const UnicodeString &second,
63 UErrorCode &errorCode) const {
64 return normalizeSecondAndAppend(first, second, TRUE, errorCode);
65 }
66 virtual UnicodeString &
67 append(UnicodeString &first,
68 const UnicodeString &second,
69 UErrorCode &errorCode) const {
70 return normalizeSecondAndAppend(first, second, FALSE, errorCode);
71 }
72 UnicodeString &
73 normalizeSecondAndAppend(UnicodeString &first,
74 const UnicodeString &second,
75 UBool doNormalize,
76 UErrorCode &errorCode) const {
77 uprv_checkCanGetBuffer(first, errorCode);
78 if(U_FAILURE(errorCode)) {
79 return first;
80 }
81 const UChar *secondArray=second.getBuffer();
82 if(&first==&second || secondArray==NULL) {
83 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
84 return first;
85 }
86 int32_t firstLength=first.length();
87 UnicodeString safeMiddle;
88 {
89 ReorderingBuffer buffer(impl, first);
90 if(buffer.init(firstLength+second.length(), errorCode)) {
91 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
92 safeMiddle, buffer, errorCode);
93 }
94 } // The ReorderingBuffer destructor finalizes the first string.
95 if(U_FAILURE(errorCode)) {
96 // Restore the modified suffix of the first string.
97 first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
98 }
99 return first;
100 }
101 virtual void
102 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
103 UnicodeString &safeMiddle,
104 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
105 virtual UBool
106 getDecomposition(UChar32 c, UnicodeString &decomposition) const {
107 UChar buffer[4];
108 int32_t length;
109 const UChar *d=impl.getDecomposition(c, buffer, length);
110 if(d==NULL) {
111 return FALSE;
112 }
113 if(d==buffer) {
114 decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c)
115 } else {
116 decomposition.setTo(FALSE, d, length); // read-only alias
117 }
118 return TRUE;
119 }
120 virtual UBool
121 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
122 UChar buffer[30];
123 int32_t length;
124 const UChar *d=impl.getRawDecomposition(c, buffer, length);
125 if(d==NULL) {
126 return FALSE;
127 }
128 if(d==buffer) {
129 decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition)
130 } else {
131 decomposition.setTo(FALSE, d, length); // read-only alias
132 }
133 return TRUE;
134 }
135 virtual UChar32
136 composePair(UChar32 a, UChar32 b) const {
137 return impl.composePair(a, b);
138 }
139
140 virtual uint8_t
141 getCombiningClass(UChar32 c) const {
142 return impl.getCC(impl.getNorm16(c));
143 }
144
145 // quick checks
146 virtual UBool
147 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
148 if(U_FAILURE(errorCode)) {
149 return FALSE;
150 }
151 const UChar *sArray=s.getBuffer();
152 if(sArray==NULL) {
153 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
154 return FALSE;
155 }
156 const UChar *sLimit=sArray+s.length();
157 return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
158 }
159 virtual UNormalizationCheckResult
160 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
161 return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
162 }
163 virtual int32_t
164 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
165 if(U_FAILURE(errorCode)) {
166 return 0;
167 }
168 const UChar *sArray=s.getBuffer();
169 if(sArray==NULL) {
170 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
171 return 0;
172 }
173 return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
174 }
175 virtual const UChar *
176 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
177
178 virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
179 return UNORM_YES;
180 }
181
182 const Normalizer2Impl &impl;
183 };
184
185 class DecomposeNormalizer2 : public Normalizer2WithImpl {
186 public:
187 DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
188 virtual ~DecomposeNormalizer2();
189
190 private:
191 virtual void
192 normalize(const UChar *src, const UChar *limit,
193 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
194 impl.decompose(src, limit, &buffer, errorCode);
195 }
196 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
197 virtual void
198 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
199 UnicodeString &safeMiddle,
200 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
201 impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
202 }
203 virtual const UChar *
204 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
205 return impl.decompose(src, limit, NULL, errorCode);
206 }
207 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
208 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
209 return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
210 }
211 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
212 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
213 virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
214 };
215
216 class ComposeNormalizer2 : public Normalizer2WithImpl {
217 public:
218 ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
219 Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
220 virtual ~ComposeNormalizer2();
221
222 private:
223 virtual void
224 normalize(const UChar *src, const UChar *limit,
225 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
226 impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
227 }
228 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
229 virtual void
230 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
231 UnicodeString &safeMiddle,
232 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
233 impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
234 }
235
236 virtual UBool
237 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
238 if(U_FAILURE(errorCode)) {
239 return FALSE;
240 }
241 const UChar *sArray=s.getBuffer();
242 if(sArray==NULL) {
243 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
244 return FALSE;
245 }
246 UnicodeString temp;
247 ReorderingBuffer buffer(impl, temp);
248 if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization
249 return FALSE;
250 }
251 return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
252 }
253 virtual UNormalizationCheckResult
254 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
255 if(U_FAILURE(errorCode)) {
256 return UNORM_MAYBE;
257 }
258 const UChar *sArray=s.getBuffer();
259 if(sArray==NULL) {
260 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
261 return UNORM_MAYBE;
262 }
263 UNormalizationCheckResult qcResult=UNORM_YES;
264 impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
265 return qcResult;
266 }
267 virtual const UChar *
268 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
269 return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
270 }
271 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
272 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
273 return impl.getCompQuickCheck(impl.getNorm16(c));
274 }
275 virtual UBool hasBoundaryBefore(UChar32 c) const {
276 return impl.hasCompBoundaryBefore(c);
277 }
278 virtual UBool hasBoundaryAfter(UChar32 c) const {
279 return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
280 }
281 virtual UBool isInert(UChar32 c) const {
282 return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
283 }
284
285 const UBool onlyContiguous;
286 };
287
288 class FCDNormalizer2 : public Normalizer2WithImpl {
289 public:
290 FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
291 virtual ~FCDNormalizer2();
292
293 private:
294 virtual void
295 normalize(const UChar *src, const UChar *limit,
296 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
297 impl.makeFCD(src, limit, &buffer, errorCode);
298 }
299 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
300 virtual void
301 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
302 UnicodeString &safeMiddle,
303 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
304 impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
305 }
306 virtual const UChar *
307 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
308 return impl.makeFCD(src, limit, NULL, errorCode);
309 }
310 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
311 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
312 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
313 virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
314 };
315
316 struct Norm2AllModes : public UMemory {
317 Norm2AllModes(Normalizer2Impl *i)
318 : impl(i), comp(*i, FALSE), decomp(*i), fcd(*i), fcc(*i, TRUE) {}
319 ~Norm2AllModes();
320
321 static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode);
322 static Norm2AllModes *createNFCInstance(UErrorCode &errorCode);
323 static Norm2AllModes *createInstance(const char *packageName,
324 const char *name,
325 UErrorCode &errorCode);
326
327 static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode);
328 static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode);
329 static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode);
330
331 Normalizer2Impl *impl;
332 ComposeNormalizer2 comp;
333 DecomposeNormalizer2 decomp;
334 FCDNormalizer2 fcd;
335 ComposeNormalizer2 fcc;
336 };
337
338 U_NAMESPACE_END
339
340 #endif // !UCONFIG_NO_NORMALIZATION
341 #endif // __NORM2ALLMODES_H__